mm: warn about allocations which stall for too long
[cascardo/linux.git] / mm / page_alloc.c
index e00f545..ca423cc 100644 (file)
@@ -2979,9 +2979,11 @@ static DEFINE_RATELIMIT_STATE(nopage_rs,
                DEFAULT_RATELIMIT_INTERVAL,
                DEFAULT_RATELIMIT_BURST);
 
-void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
+void warn_alloc(gfp_t gfp_mask, const char *fmt, ...)
 {
        unsigned int filter = SHOW_MEM_FILTER_NODES;
+       struct va_format vaf;
+       va_list args;
 
        if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs) ||
            debug_guardpage_minorder() > 0)
@@ -2999,22 +3001,16 @@ void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
        if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
                filter &= ~SHOW_MEM_FILTER_NODES;
 
-       if (fmt) {
-               struct va_format vaf;
-               va_list args;
+       pr_warn("%s: ", current->comm);
 
-               va_start(args, fmt);
+       va_start(args, fmt);
+       vaf.fmt = fmt;
+       vaf.va = &args;
+       pr_cont("%pV", &vaf);
+       va_end(args);
 
-               vaf.fmt = fmt;
-               vaf.va = &args;
+       pr_cont(", mode:%#x(%pGg)\n", gfp_mask, &gfp_mask);
 
-               pr_warn("%pV", &vaf);
-
-               va_end(args);
-       }
-
-       pr_warn("%s: page allocation failure: order:%u, mode:%#x(%pGg)\n",
-               current->comm, order, gfp_mask, &gfp_mask);
        dump_stack();
        if (!should_suppress_show_mem())
                show_mem(filter);
@@ -3156,6 +3152,65 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        return NULL;
 }
 
+static inline bool
+should_compact_retry(struct alloc_context *ac, int order, int alloc_flags,
+                    enum compact_result compact_result,
+                    enum compact_priority *compact_priority,
+                    int *compaction_retries)
+{
+       int max_retries = MAX_COMPACT_RETRIES;
+       int min_priority;
+
+       if (!order)
+               return false;
+
+       if (compaction_made_progress(compact_result))
+               (*compaction_retries)++;
+
+       /*
+        * compaction considers all the zone as desperately out of memory
+        * so it doesn't really make much sense to retry except when the
+        * failure could be caused by insufficient priority
+        */
+       if (compaction_failed(compact_result))
+               goto check_priority;
+
+       /*
+        * make sure the compaction wasn't deferred or didn't bail out early
+        * due to locks contention before we declare that we should give up.
+        * But do not retry if the given zonelist is not suitable for
+        * compaction.
+        */
+       if (compaction_withdrawn(compact_result))
+               return compaction_zonelist_suitable(ac, order, alloc_flags);
+
+       /*
+        * !costly requests are much more important than __GFP_REPEAT
+        * costly ones because they are de facto nofail and invoke OOM
+        * killer to move on while costly can fail and users are ready
+        * to cope with that. 1/4 retries is rather arbitrary but we
+        * would need much more detailed feedback from compaction to
+        * make a better decision.
+        */
+       if (order > PAGE_ALLOC_COSTLY_ORDER)
+               max_retries /= 4;
+       if (*compaction_retries <= max_retries)
+               return true;
+
+       /*
+        * Make sure there are attempts at the highest priority if we exhausted
+        * all retries or failed at the lower priorities.
+        */
+check_priority:
+       min_priority = (order > PAGE_ALLOC_COSTLY_ORDER) ?
+                       MIN_COMPACT_COSTLY_PRIORITY : MIN_COMPACT_PRIORITY;
+       if (*compact_priority > min_priority) {
+               (*compact_priority)--;
+               *compaction_retries = 0;
+               return true;
+       }
+       return false;
+}
 #else
 static inline struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
@@ -3166,13 +3221,11 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
        return NULL;
 }
 
-#endif /* CONFIG_COMPACTION */
-
 static inline bool
 should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_flags,
                     enum compact_result compact_result,
                     enum compact_priority *compact_priority,
-                    int compaction_retries)
+                    int *compaction_retries)
 {
        struct zone *zone;
        struct zoneref *z;
@@ -3194,6 +3247,7 @@ should_compact_retry(struct alloc_context *ac, unsigned int order, int alloc_fla
        }
        return false;
 }
+#endif /* CONFIG_COMPACTION */
 
 /* Perform direct synchronous page reclaim */
 static int
@@ -3344,16 +3398,26 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
 static inline bool
 should_reclaim_retry(gfp_t gfp_mask, unsigned order,
                     struct alloc_context *ac, int alloc_flags,
-                    bool did_some_progress, int no_progress_loops)
+                    bool did_some_progress, int *no_progress_loops)
 {
        struct zone *zone;
        struct zoneref *z;
 
+       /*
+        * Costly allocations might have made a progress but this doesn't mean
+        * their order will become available due to high fragmentation so
+        * always increment the no progress counter for them
+        */
+       if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
+               *no_progress_loops = 0;
+       else
+               (*no_progress_loops)++;
+
        /*
         * Make sure we converge to OOM if we cannot make any progress
         * several times in the row.
         */
-       if (no_progress_loops > MAX_RECLAIM_RETRIES)
+       if (*no_progress_loops > MAX_RECLAIM_RETRIES)
                return false;
 
        /*
@@ -3368,7 +3432,7 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
                unsigned long reclaimable;
 
                available = reclaimable = zone_reclaimable_pages(zone);
-               available -= DIV_ROUND_UP(no_progress_loops * available,
+               available -= DIV_ROUND_UP((*no_progress_loops) * available,
                                          MAX_RECLAIM_RETRIES);
                available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
 
@@ -3429,6 +3493,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
        enum compact_result compact_result;
        int compaction_retries = 0;
        int no_progress_loops = 0;
+       unsigned long alloc_start = jiffies;
+       unsigned int stall_timeout = 10 * HZ;
 
        /*
         * In the slowpath, we sanity check order to avoid ever trying to
@@ -3573,9 +3639,6 @@ retry:
        if (page)
                goto got_pg;
 
-       if (order && compaction_made_progress(compact_result))
-               compaction_retries++;
-
        /* Do not loop if specifically requested */
        if (gfp_mask & __GFP_NORETRY)
                goto nopage;
@@ -3587,18 +3650,16 @@ retry:
        if (order > PAGE_ALLOC_COSTLY_ORDER && !(gfp_mask & __GFP_REPEAT))
                goto nopage;
 
-       /*
-        * Costly allocations might have made a progress but this doesn't mean
-        * their order will become available due to high fragmentation so
-        * always increment the no progress counter for them
-        */
-       if (did_some_progress && order <= PAGE_ALLOC_COSTLY_ORDER)
-               no_progress_loops = 0;
-       else
-               no_progress_loops++;
+       /* Make sure we know about allocations which stall for too long */
+       if (time_after(jiffies, alloc_start + stall_timeout)) {
+               warn_alloc(gfp_mask,
+                       "page alloction stalls for %ums, order:%u\n",
+                       jiffies_to_msecs(jiffies-alloc_start), order);
+               stall_timeout += 10 * HZ;
+       }
 
        if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
-                                did_some_progress > 0, no_progress_loops))
+                                did_some_progress > 0, &no_progress_loops))
                goto retry;
 
        /*
@@ -3610,7 +3671,7 @@ retry:
        if (did_some_progress > 0 &&
                        should_compact_retry(ac, order, alloc_flags,
                                compact_result, &compact_priority,
-                               compaction_retries))
+                               &compaction_retries))
                goto retry;
 
        /* Reclaim has failed us, start killing things */
@@ -3625,7 +3686,8 @@ retry:
        }
 
 nopage:
-       warn_alloc_failed(gfp_mask, order, NULL);
+       warn_alloc(gfp_mask,
+                       "page allocation failure: order:%u", order);
 got_pg:
        return page;
 }