Merge tag 'gcc-plugins-v4.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[cascardo/linux.git] / mm / page_alloc.c
index 901121a..2b3bf67 100644 (file)
@@ -612,6 +612,9 @@ static bool need_debug_guardpage(void)
        if (!debug_pagealloc_enabled())
                return false;
 
+       if (!debug_guardpage_minorder())
+               return false;
+
        return true;
 }
 
@@ -620,6 +623,9 @@ static void init_debug_guardpage(void)
        if (!debug_pagealloc_enabled())
                return;
 
+       if (!debug_guardpage_minorder())
+               return;
+
        _debug_guardpage_enabled = true;
 }
 
@@ -640,19 +646,22 @@ static int __init debug_guardpage_minorder_setup(char *buf)
        pr_info("Setting debug_guardpage_minorder to %lu\n", res);
        return 0;
 }
-__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
+early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
 
-static inline void set_page_guard(struct zone *zone, struct page *page,
+static inline bool set_page_guard(struct zone *zone, struct page *page,
                                unsigned int order, int migratetype)
 {
        struct page_ext *page_ext;
 
        if (!debug_guardpage_enabled())
-               return;
+               return false;
+
+       if (order >= debug_guardpage_minorder())
+               return false;
 
        page_ext = lookup_page_ext(page);
        if (unlikely(!page_ext))
-               return;
+               return false;
 
        __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
 
@@ -660,6 +669,8 @@ static inline void set_page_guard(struct zone *zone, struct page *page,
        set_page_private(page, order);
        /* Guard pages are not available for any usage */
        __mod_zone_freepage_state(zone, -(1 << order), migratetype);
+
+       return true;
 }
 
 static inline void clear_page_guard(struct zone *zone, struct page *page,
@@ -681,9 +692,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
                __mod_zone_freepage_state(zone, (1 << order), migratetype);
 }
 #else
-struct page_ext_operations debug_guardpage_ops = { NULL, };
-static inline void set_page_guard(struct zone *zone, struct page *page,
-                               unsigned int order, int migratetype) {}
+struct page_ext_operations debug_guardpage_ops;
+static inline bool set_page_guard(struct zone *zone, struct page *page,
+                       unsigned int order, int migratetype) { return false; }
 static inline void clear_page_guard(struct zone *zone, struct page *page,
                                unsigned int order, int migratetype) {}
 #endif
@@ -1398,15 +1409,18 @@ static void __init deferred_free_range(struct page *page,
                return;
 
        /* Free a large naturally-aligned chunk if possible */
-       if (nr_pages == MAX_ORDER_NR_PAGES &&
-           (pfn & (MAX_ORDER_NR_PAGES-1)) == 0) {
+       if (nr_pages == pageblock_nr_pages &&
+           (pfn & (pageblock_nr_pages - 1)) == 0) {
                set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-               __free_pages_boot_core(page, MAX_ORDER-1);
+               __free_pages_boot_core(page, pageblock_order);
                return;
        }
 
-       for (i = 0; i < nr_pages; i++, page++)
+       for (i = 0; i < nr_pages; i++, page++, pfn++) {
+               if ((pfn & (pageblock_nr_pages - 1)) == 0)
+                       set_pageblock_migratetype(page, MIGRATE_MOVABLE);
                __free_pages_boot_core(page, 0);
+       }
 }
 
 /* Completion tracking for deferred_init_memmap() threads */
@@ -1474,9 +1488,9 @@ static int __init deferred_init_memmap(void *data)
 
                        /*
                         * Ensure pfn_valid is checked every
-                        * MAX_ORDER_NR_PAGES for memory holes
+                        * pageblock_nr_pages for memory holes
                         */
-                       if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0) {
+                       if ((pfn & (pageblock_nr_pages - 1)) == 0) {
                                if (!pfn_valid(pfn)) {
                                        page = NULL;
                                        goto free_range;
@@ -1489,7 +1503,7 @@ static int __init deferred_init_memmap(void *data)
                        }
 
                        /* Minimise pfn page lookups and scheduler checks */
-                       if (page && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0) {
+                       if (page && (pfn & (pageblock_nr_pages - 1)) != 0) {
                                page++;
                        } else {
                                nr_pages += nr_to_free;
@@ -1525,6 +1539,9 @@ free_range:
                        free_base_page = NULL;
                        free_base_pfn = nr_to_free = 0;
                }
+               /* Free the last block of pages to allocator */
+               nr_pages += nr_to_free;
+               deferred_free_range(free_base_page, free_base_pfn, nr_to_free);
 
                first_init_pfn = max(end_pfn, first_init_pfn);
        }
@@ -1621,18 +1638,15 @@ static inline void expand(struct zone *zone, struct page *page,
                size >>= 1;
                VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
 
-               if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) &&
-                       debug_guardpage_enabled() &&
-                       high < debug_guardpage_minorder()) {
-                       /*
-                        * Mark as guard pages (or page), that will allow to
-                        * merge back to allocator when buddy will be freed.
-                        * Corresponding page table entries will not be touched,
-                        * pages will stay not present in virtual address space
-                        */
-                       set_page_guard(zone, &page[size], high, migratetype);
+               /*
+                * Mark as guard pages (or page), that will allow to
+                * merge back to allocator when buddy will be freed.
+                * Corresponding page table entries will not be touched,
+                * pages will stay not present in virtual address space
+                */
+               if (set_page_guard(zone, &page[size], high, migratetype))
                        continue;
-               }
+
                list_add(&page[size].lru, &area->free_list[migratetype]);
                area->nr_free++;
                set_page_order(&page[size], high);
@@ -2494,9 +2508,14 @@ int __isolate_free_page(struct page *page, unsigned int order)
        mt = get_pageblock_migratetype(page);
 
        if (!is_migrate_isolate(mt)) {
-               /* Obey watermarks as if the page was being allocated */
-               watermark = low_wmark_pages(zone) + (1 << order);
-               if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
+               /*
+                * Obey watermarks as if the page was being allocated. We can
+                * emulate a high-order watermark check with a raised order-0
+                * watermark, because we already know our high-order page
+                * exists.
+                */
+               watermark = min_wmark_pages(zone) + (1UL << order);
+               if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA))
                        return 0;
 
                __mod_zone_freepage_state(zone, -(1UL << order), mt);
@@ -2965,9 +2984,11 @@ static DEFINE_RATELIMIT_STATE(nopage_rs,
                DEFAULT_RATELIMIT_INTERVAL,
                DEFAULT_RATELIMIT_BURST);
 
-void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
+void warn_alloc(gfp_t gfp_mask, const char *fmt, ...)
 {
        unsigned int filter = SHOW_MEM_FILTER_NODES;
+       struct va_format vaf;
+       va_list args;
 
        if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
            debug_guardpage_minorder() > 0)
@@ -2985,22 +3006,16 @@ void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
        if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
                filter &= ~SHOW_MEM_FILTER_NODES;
 
-       if (fmt) {
-               struct va_format vaf;
-               va_list args;
+       pr_warn("%s: ", current->comm);
 
-               va_start(args, fmt);
+       va_start(args, fmt);
+       vaf.fmt = fmt;
+       vaf.va = &args;
+       pr_cont("%pV", &vaf);
+       va_end(args);
 
-               vaf.fmt = fmt;
-               vaf.va = &args;
+       pr_cont(", mode:%#x(%pGg)\n", gfp_mask, &gfp_mask);
 
-               pr_warn("%pV", &vaf);
-
-               va_end(args);
-       }
-
-       pr_warn("%s: page allocation failure: order:%u, mode:%#x(%pGg)\n",
-               current->comm, order, gfp_mask, &gfp_mask);
        dump_stack();
        if (!should_suppress_show_mem())
                show_mem(filter);
@@ -3142,6 +3157,65 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        return NULL;
 }
 
+static inline bool
+should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
+                    enum compact_result compact_result,
+                    enum compact_priority *compact_priority,
+                    int *compaction_retries)
+{
+       int max_retries = MAX_COMPACT_RETRIES;
+       int min_priority;
+
+       if (!order)
+               return false;
+
+       if (compaction_made_progress(compact_result))
+               (*compaction_retries)++;
+
+       /*
+        * compaction considers all the zone as desperately out of memory
+        * so it doesn't really make much sense to retry except when the
+        * failure could be caused by insufficient priority
+        */
+       if (compaction_failed(compact_result))
+               goto check_priority;
+
+       /*
+        * make sure the compaction wasn't deferred or didn't bail out early
+        * due to locks contention before we declare that we should give up.
+        * But do not retry if the given zonelist is not suitable for
+        * compaction.
+        */
+       if (compaction_withdrawn(compact_result))
+               return compaction_zonelist_suitable(ac, order, alloc_flags);
+
+       /*
+        * !costly requests are much more important than __GFP_REPEAT
+        * costly ones because they are de facto nofail and invoke OOM
+        * killer to move on while costly can fail and users are ready
+        * to cope with that. 1/4 retries is rather arbitrary but we
+        * would need much more detailed feedback from compaction to
+        * make a better decision.
+        */
+       if (order > PAGE_ALLOC_COSTLY_ORDER)
+               max_retries /= 4;
+       if (*compaction_retries <= max_retries)
+               return true;
+
+       /*
+        * Make sure there are attempts at the highest priority if we exhausted
+        * all retries or failed at the lower priorities.
+        */
+check_priority:
+       min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
+                       MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
+       if (*compact_priority > min_priority) {
+               (*compact_priority)--;
+               *compaction_retries = 0;
+               return true;
+       }
+       return false;
+}
 #else
 static inline struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
@@ -3152,13 +3226,11 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        return NULL;
 }
 
-#endif /* CONFIG_COMPACTION */
-
 static inline bool
 should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
                     enum compact_result compact_result,
                     enum compact_priority *compact_priority,
-                    int compaction_retries)
+                    int *compaction_retries)
 {
        struct zone *zone;
        struct zoneref *z;
@@ -3180,6 +3252,7 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
        }
        return false;
 }
+#endif /* CONFIG_COMPACTION */
 
 /* Perform direct synchronous page reclaim */
 static int
@@ -3330,16 +3403,26 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
 static inline bool
 should_reclaim_retry(gfp_t gfp_mask, unsigned order,
                     struct alloc_context *ac, int alloc_flags,
-                    bool did_some_progress, int no_progress_loops)
+                    bool did_some_progress, int *no_progress_loops)
 {
        struct zone *zone;
        struct zoneref *z;
 
+       /*
+        * Costly allocations might have made a progress but this doesn't mean
+        * their order will become available due to high fragmentation so
+        * always increment the no progress counter for them
+        */
+       if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
+               *no_progress_loops = 0;
+       else
+               (*no_progress_loops)++;
+
        /*
         * Make sure we converge to OOM if we cannot make any progress
         * several times in the row.
         */
-       if (no_progress_loops > MAX_RECLAIM_RETRIES)
+       if (*no_progress_loops > MAX_RECLAIM_RETRIES)
                return false;
 
        /*
@@ -3354,7 +3437,7 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
                unsigned long reclaimable;
 
                available = reclaimable = zone_reclaimable_pages(zone);
-               available -= DIV_ROUND_UP(no_progress_loops * available,
+               available -= DIV_ROUND_UP((*no_progress_loops) * available,
                                          MAX_RECLAIM_RETRIES);
                available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
 
@@ -3415,6 +3498,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
        enum compact_result compact_result;
        int compaction_retries = 0;
        int no_progress_loops = 0;
+       unsigned long alloc_start = jiffies;
+       unsigned int stall_timeout = 10 * HZ;
 
        /*
         * In the slowpath, we sanity check order to avoid ever trying to
@@ -3559,9 +3644,6 @@ retry:
        if (page)
                goto got_pg;
 
-       if (order && compaction_made_progress(compact_result))
-               compaction_retries++;
-
        /* Do not loop if specifically requested */
        if (gfp_mask & __GFP_NORETRY)
                goto nopage;
@@ -3573,18 +3655,16 @@ retry:
        if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
                goto nopage;
 
-       /*
-        * Costly allocations might have made a progress but this doesn't mean
-        * their order will become available due to high fragmentation so
-        * always increment the no progress counter for them
-        */
-       if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
-               no_progress_loops = 0;
-       else
-               no_progress_loops++;
+       /* Make sure we know about allocations which stall for too long */
+       if (time_after(jiffies, alloc_start + stall_timeout)) {
+               warn_alloc(gfp_mask,
+                       "page alloction stalls for %ums, order:%u\n",
+                       jiffies_to_msecs(jiffies-alloc_start), order);
+               stall_timeout += 10 * HZ;
+       }
 
        if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
-                                did_some_progress > 0, no_progress_loops))
+                                did_some_progress > 0, &no_progress_loops))
                goto retry;
 
        /*
@@ -3596,7 +3676,7 @@ retry:
        if (did_some_progress > 0 &&
                        should_compact_retry(ac, order, alloc_flags,
                                compact_result, &compact_priority,
-                               compaction_retries))
+                               &compaction_retries))
                goto retry;
 
        /* Reclaim has failed us, start killing things */
@@ -3611,7 +3691,8 @@ retry:
        }
 
 nopage:
-       warn_alloc_failed(gfp_mask, order, NULL);
+       warn_alloc(gfp_mask,
+                       "page allocation failure: order:%u", order);
 got_pg:
        return page;
 }
@@ -4560,7 +4641,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
        int j;
        struct zonelist *zonelist;
 
-       zonelist = &pgdat->node_zonelists[0];
+       zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
        for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
                ;
        j = build_zonelists_node(NODE_DATA(node), zonelist, j);
@@ -4576,7 +4657,7 @@ static void build_thisnode_zonelists(pg_data_t *pgdat)
        int j;
        struct zonelist *zonelist;
 
-       zonelist = &pgdat->node_zonelists[1];
+       zonelist = &pgdat->node_zonelists[ZONELIST_NOFALLBACK];
        j = build_zonelists_node(pgdat, zonelist, 0);
        zonelist->_zonerefs[j].zone = NULL;
        zonelist->_zonerefs[j].zone_idx = 0;
@@ -4597,7 +4678,7 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
        struct zone *z;
        struct zonelist *zonelist;
 
-       zonelist = &pgdat->node_zonelists[0];
+       zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
        pos = 0;
        for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) {
                for (j = 0; j < nr_nodes; j++) {
@@ -4732,7 +4813,7 @@ static void build_zonelists(pg_data_t *pgdat)
 
        local_node = pgdat->node_id;
 
-       zonelist = &pgdat->node_zonelists[0];
+       zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
        j = build_zonelists_node(pgdat, zonelist, 0);
 
        /*
@@ -5004,15 +5085,6 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                        break;
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
-               /*
-                * If not mirrored_kernelcore and ZONE_MOVABLE exists, range
-                * from zone_movable_pfn[nid] to end of each node should be
-                * ZONE_MOVABLE not ZONE_NORMAL. skip it.
-                */
-               if (!mirrored_kernelcore && zone_movable_pfn[nid])
-                       if (zone == ZONE_NORMAL && pfn >= zone_movable_pfn[nid])
-                               continue;
-
                /*
                 * Check given memblock attribute by firmware which can affect
                 * kernel memory layout.  If zone==ZONE_MOVABLE but memory is
@@ -5456,6 +5528,12 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid,
                        *zone_end_pfn = min(node_end_pfn,
                                arch_zone_highest_possible_pfn[movable_zone]);
 
+               /* Adjust for ZONE_MOVABLE starting within this range */
+               } else if (!mirrored_kernelcore &&
+                       *zone_start_pfn < zone_movable_pfn[nid] &&
+                       *zone_end_pfn > zone_movable_pfn[nid]) {
+                       *zone_end_pfn = zone_movable_pfn[nid];
+
                /* Check if this whole range is within ZONE_MOVABLE */
                } else if (*zone_start_pfn >= zone_movable_pfn[nid])
                        *zone_start_pfn = *zone_end_pfn;
@@ -5559,28 +5637,23 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
         * Treat pages to be ZONE_MOVABLE in ZONE_NORMAL as absent pages
         * and vice versa.
         */
-       if (zone_movable_pfn[nid]) {
-               if (mirrored_kernelcore) {
-                       unsigned long start_pfn, end_pfn;
-                       struct memblock_region *r;
-
-                       for_each_memblock(memory, r) {
-                               start_pfn = clamp(memblock_region_memory_base_pfn(r),
-                                                 zone_start_pfn, zone_end_pfn);
-                               end_pfn = clamp(memblock_region_memory_end_pfn(r),
-                                               zone_start_pfn, zone_end_pfn);
-
-                               if (zone_type == ZONE_MOVABLE &&
-                                   memblock_is_mirror(r))
-                                       nr_absent += end_pfn - start_pfn;
-
-                               if (zone_type == ZONE_NORMAL &&
-                                   !memblock_is_mirror(r))
-                                       nr_absent += end_pfn - start_pfn;
-                       }
-               } else {
-                       if (zone_type == ZONE_NORMAL)
-                               nr_absent += node_end_pfn - zone_movable_pfn[nid];
+       if (mirrored_kernelcore && zone_movable_pfn[nid]) {
+               unsigned long start_pfn, end_pfn;
+               struct memblock_region *r;
+
+               for_each_memblock(memory, r) {
+                       start_pfn = clamp(memblock_region_memory_base_pfn(r),
+                                         zone_start_pfn, zone_end_pfn);
+                       end_pfn = clamp(memblock_region_memory_end_pfn(r),
+                                       zone_start_pfn, zone_end_pfn);
+
+                       if (zone_type == ZONE_MOVABLE &&
+                           memblock_is_mirror(r))
+                               nr_absent += end_pfn - start_pfn;
+
+                       if (zone_type == ZONE_NORMAL &&
+                           !memblock_is_mirror(r))
+                               nr_absent += end_pfn - start_pfn;
                }
        }
 
@@ -6934,6 +7007,17 @@ static int __init set_hashdist(char *str)
 __setup("hashdist=", set_hashdist);
 #endif
 
+#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
+/*
+ * Returns the number of pages that arch has reserved but
+ * is not known to alloc_large_system_hash().
+ */
+static unsigned long __init arch_reserved_kernel_pages(void)
+{
+       return 0;
+}
+#endif
+
 /*
  * allocate a large system hash table from bootmem
  * - it is assumed that the hash table must contain an exact power-of-2
@@ -6958,6 +7042,7 @@ void *__init alloc_large_system_hash(const char *tablename,
        if (!numentries) {
                /* round applicable memory size up to nearest megabyte */
                numentries = nr_kernel_pages;
+               numentries -= arch_reserved_kernel_pages();
 
                /* It isn't necessary when PAGE_SIZE >= 1MB */
                if (PAGE_SHIFT < 20)