Merge branch 'pcmcia' of git://git.armlinux.org.uk/~rmk/linux-arm

[cascardo/linux.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index 0c34633..ca423cc 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -607,6 +607,9 @@ static bool need_debug_guardpage(void)
         if (!debug_pagealloc_enabled())
                 return false;
  
+       if (!debug_guardpage_minorder())
+               return false;
+
         return true;
  }
  
@@ -615,6 +618,9 @@ static void init_debug_guardpage(void)
         if (!debug_pagealloc_enabled())
                 return;
  
+       if (!debug_guardpage_minorder())
+               return;
+
         _debug_guardpage_enabled = true;
  }
  
@@ -635,19 +641,22 @@ static int __init debug_guardpage_minorder_setup(char *buf)
         pr_info("Setting debug_guardpage_minorder to %lu\n", res);
         return 0;
  }
-__setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
+early_param("debug_guardpage_minorder", debug_guardpage_minorder_setup);
  
-static inline void set_page_guard(struct zone *zone, struct page *page,
+static inline bool set_page_guard(struct zone *zone, struct page *page,
                                 unsigned int order, int migratetype)
  {
         struct page_ext *page_ext;
  
         if (!debug_guardpage_enabled())
-               return;
+               return false;
+
+       if (order >= debug_guardpage_minorder())
+               return false;
  
         page_ext = lookup_page_ext(page);
         if (unlikely(!page_ext))
-               return;
+               return false;
  
         __set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
  
@@ -655,6 +664,8 @@ static inline void set_page_guard(struct zone *zone, struct page *page,
         set_page_private(page, order);
         /* Guard pages are not available for any usage */
         __mod_zone_freepage_state(zone, -(1 << order), migratetype);
+
+       return true;
  }
  
  static inline void clear_page_guard(struct zone *zone, struct page *page,
@@ -676,9 +687,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
                 __mod_zone_freepage_state(zone, (1 << order), migratetype);
  }
  #else
-struct page_ext_operations debug_guardpage_ops = { NULL, };
-static inline void set_page_guard(struct zone *zone, struct page *page,
-                               unsigned int order, int migratetype) {}
+struct page_ext_operations debug_guardpage_ops;
+static inline bool set_page_guard(struct zone *zone, struct page *page,
+                       unsigned int order, int migratetype) { return false; }
  static inline void clear_page_guard(struct zone *zone, struct page *page,
                                 unsigned int order, int migratetype) {}
  #endif
@@ -1622,18 +1633,15 @@ static inline void expand(struct zone *zone, struct page *page,
                 size >>= 1;
                 VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
  
-               if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) &&
-                       debug_guardpage_enabled() &&
-                       high < debug_guardpage_minorder()) {
-                       /*
-                        * Mark as guard pages (or page), that will allow to
-                        * merge back to allocator when buddy will be freed.
-                        * Corresponding page table entries will not be touched,
-                        * pages will stay not present in virtual address space
-                        */
-                       set_page_guard(zone, &page[size], high, migratetype);
+               /*
+                * Mark as guard pages (or page), that will allow to
+                * merge back to allocator when buddy will be freed.
+                * Corresponding page table entries will not be touched,
+                * pages will stay not present in virtual address space
+                */
+               if (set_page_guard(zone, &page[size], high, migratetype))
                         continue;
-               }
+
                 list_add(&page[size].lru, &area->free_list[migratetype]);
                 area->nr_free++;
                 set_page_order(&page[size], high);
@@ -2971,9 +2979,11 @@ static DEFINE_RATELIMIT_STATE(nopage_rs,
                 DEFAULT_RATELIMIT_INTERVAL,
                 DEFAULT_RATELIMIT_BURST);
  
-void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
+void warn_alloc(gfp_t gfp_mask, const char *fmt, ...)
  {
         unsigned int filter = SHOW_MEM_FILTER_NODES;
+       struct va_format vaf;
+       va_list args;
  
         if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
             debug_guardpage_minorder() > 0)
@@ -2991,22 +3001,16 @@ void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
         if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
                 filter &= ~SHOW_MEM_FILTER_NODES;
  
-       if (fmt) {
-               struct va_format vaf;
-               va_list args;
+       pr_warn("%s: ", current->comm);
  
-               va_start(args, fmt);
+       va_start(args, fmt);
+       vaf.fmt = fmt;
+       vaf.va = &args;
+       pr_cont("%pV", &vaf);
+       va_end(args);
  
-               vaf.fmt = fmt;
-               vaf.va = &args;
+       pr_cont(", mode:%#x(%pGg)\n", gfp_mask, &gfp_mask);
  
-               pr_warn("%pV", &vaf);
-
-               va_end(args);
-       }
-
-       pr_warn("%s: page allocation failure: order:%u, mode:%#x(%pGg)\n",
-               current->comm, order, gfp_mask, &gfp_mask);
         dump_stack();
         if (!should_suppress_show_mem())
                 show_mem(filter);
@@ -3148,6 +3152,65 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
         return NULL;
  }
  
+static inline bool
+should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
+                    enum compact_result compact_result,
+                    enum compact_priority *compact_priority,
+                    int *compaction_retries)
+{
+       int max_retries = MAX_COMPACT_RETRIES;
+       int min_priority;
+
+       if (!order)
+               return false;
+
+       if (compaction_made_progress(compact_result))
+               (*compaction_retries)++;
+
+       /*
+        * compaction considers all the zone as desperately out of memory
+        * so it doesn't really make much sense to retry except when the
+        * failure could be caused by insufficient priority
+        */
+       if (compaction_failed(compact_result))
+               goto check_priority;
+
+       /*
+        * make sure the compaction wasn't deferred or didn't bail out early
+        * due to locks contention before we declare that we should give up.
+        * But do not retry if the given zonelist is not suitable for
+        * compaction.
+        */
+       if (compaction_withdrawn(compact_result))
+               return compaction_zonelist_suitable(ac, order, alloc_flags);
+
+       /*
+        * !costly requests are much more important than __GFP_REPEAT
+        * costly ones because they are de facto nofail and invoke OOM
+        * killer to move on while costly can fail and users are ready
+        * to cope with that. 1/4 retries is rather arbitrary but we
+        * would need much more detailed feedback from compaction to
+        * make a better decision.
+        */
+       if (order > PAGE_ALLOC_COSTLY_ORDER)
+               max_retries /= 4;
+       if (*compaction_retries <= max_retries)
+               return true;
+
+       /*
+        * Make sure there are attempts at the highest priority if we exhausted
+        * all retries or failed at the lower priorities.
+        */
+check_priority:
+       min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
+                       MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
+       if (*compact_priority > min_priority) {
+               (*compact_priority)--;
+               *compaction_retries = 0;
+               return true;
+       }
+       return false;
+}
  #else
  static inline struct page *
  __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
@@ -3158,13 +3221,11 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
         return NULL;
  }
  
-#endif /* CONFIG_COMPACTION */
-
  static inline bool
  should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
                      enum compact_result compact_result,
                      enum compact_priority *compact_priority,
-                    int compaction_retries)
+                    int *compaction_retries)
  {
         struct zone *zone;
         struct zoneref *z;
@@ -3186,6 +3247,7 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
         }
         return false;
  }
+#endif /* CONFIG_COMPACTION */
  
  /* Perform direct synchronous page reclaim */
  static int
@@ -3336,16 +3398,26 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
  static inline bool
  should_reclaim_retry(gfp_t gfp_mask, unsigned order,
                      struct alloc_context *ac, int alloc_flags,
-                    bool did_some_progress, int no_progress_loops)
+                    bool did_some_progress, int *no_progress_loops)
  {
         struct zone *zone;
         struct zoneref *z;
  
+       /*
+        * Costly allocations might have made a progress but this doesn't mean
+        * their order will become available due to high fragmentation so
+        * always increment the no progress counter for them
+        */
+       if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
+               *no_progress_loops = 0;
+       else
+               (*no_progress_loops)++;
+
         /*
          * Make sure we converge to OOM if we cannot make any progress
          * several times in the row.
          */
-       if (no_progress_loops > MAX_RECLAIM_RETRIES)
+       if (*no_progress_loops > MAX_RECLAIM_RETRIES)
                 return false;
  
         /*
@@ -3360,7 +3432,7 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
                 unsigned long reclaimable;
  
                 available = reclaimable = zone_reclaimable_pages(zone);
-               available -= DIV_ROUND_UP(no_progress_loops * available,
+               available -= DIV_ROUND_UP((*no_progress_loops) * available,
                                           MAX_RECLAIM_RETRIES);
                 available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
  
@@ -3421,6 +3493,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
         enum compact_result compact_result;
         int compaction_retries = 0;
         int no_progress_loops = 0;
+       unsigned long alloc_start = jiffies;
+       unsigned int stall_timeout = 10 * HZ;
  
         /*
          * In the slowpath, we sanity check order to avoid ever trying to
@@ -3565,9 +3639,6 @@ retry:
         if (page)
                 goto got_pg;
  
-       if (order && compaction_made_progress(compact_result))
-               compaction_retries++;
-
         /* Do not loop if specifically requested */
         if (gfp_mask & __GFP_NORETRY)
                 goto nopage;
@@ -3579,18 +3650,16 @@ retry:
         if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
                 goto nopage;
  
-       /*
-        * Costly allocations might have made a progress but this doesn't mean
-        * their order will become available due to high fragmentation so
-        * always increment the no progress counter for them
-        */
-       if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
-               no_progress_loops = 0;
-       else
-               no_progress_loops++;
+       /* Make sure we know about allocations which stall for too long */
+       if (time_after(jiffies, alloc_start + stall_timeout)) {
+               warn_alloc(gfp_mask,
+                       "page alloction stalls for %ums, order:%u\n",
+                       jiffies_to_msecs(jiffies-alloc_start), order);
+               stall_timeout += 10 * HZ;
+       }
  
         if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
-                                did_some_progress > 0, no_progress_loops))
+                                did_some_progress > 0, &no_progress_loops))
                 goto retry;
  
         /*
@@ -3602,7 +3671,7 @@ retry:
         if (did_some_progress > 0 &&
                         should_compact_retry(ac, order, alloc_flags,
                                 compact_result, &compact_priority,
-                               compaction_retries))
+                               &compaction_retries))
                 goto retry;
  
         /* Reclaim has failed us, start killing things */
@@ -3617,7 +3686,8 @@ retry:
         }
  
  nopage:
-       warn_alloc_failed(gfp_mask, order, NULL);
+       warn_alloc(gfp_mask,
+                       "page allocation failure: order:%u", order);
  got_pg:
         return page;
  }
@@ -4566,7 +4636,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
         int j;
         struct zonelist *zonelist;
  
-       zonelist = &pgdat->node_zonelists[0];
+       zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
         for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
                 ;
         j = build_zonelists_node(NODE_DATA(node), zonelist, j);
@@ -4582,7 +4652,7 @@ static void build_thisnode_zonelists(pg_data_t *pgdat)
         int j;
         struct zonelist *zonelist;
  
-       zonelist = &pgdat->node_zonelists[1];
+       zonelist = &pgdat->node_zonelists[ZONELIST_NOFALLBACK];
         j = build_zonelists_node(pgdat, zonelist, 0);
         zonelist->_zonerefs[j].zone = NULL;
         zonelist->_zonerefs[j].zone_idx = 0;
@@ -4603,7 +4673,7 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
         struct zone *z;
         struct zonelist *zonelist;
  
-       zonelist = &pgdat->node_zonelists[0];
+       zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
         pos = 0;
         for (zone_type = MAX_NR_ZONES - 1; zone_type >= 0; zone_type--) {
                 for (j = 0; j < nr_nodes; j++) {
@@ -4738,7 +4808,7 @@ static void build_zonelists(pg_data_t *pgdat)
  
         local_node = pgdat->node_id;
  
-       zonelist = &pgdat->node_zonelists[0];
+       zonelist = &pgdat->node_zonelists[ZONELIST_FALLBACK];
         j = build_zonelists_node(pgdat, zonelist, 0);
  
         /*
@@ -6932,6 +7002,17 @@ static int __init set_hashdist(char *str)
  __setup("hashdist=", set_hashdist);
  #endif
  
+#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
+/*
+ * Returns the number of pages that arch has reserved but
+ * is not known to alloc_large_system_hash().
+ */
+static unsigned long __init arch_reserved_kernel_pages(void)
+{
+       return 0;
+}
+#endif
+
  /*
   * allocate a large system hash table from bootmem
   * - it is assumed that the hash table must contain an exact power-of-2
@@ -6956,6 +7037,7 @@ void *__init alloc_large_system_hash(const char *tablename,
         if (!numentries) {
                 /* round applicable memory size up to nearest megabyte */
                 numentries = nr_kernel_pages;
+               numentries -= arch_reserved_kernel_pages();
  
                 /* It isn't necessary when PAGE_SIZE >= 1MB */
                 if (PAGE_SHIFT < 20)