mm, compaction: skip buddy pages by their order in the migrate scanner

author Vlastimil Babka <vbabka@suse.cz>

Thu, 9 Oct 2014 22:27:23 +0000 (15:27 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 10 Oct 2014 02:25:54 +0000 (22:25 -0400)
author Vlastimil Babka <vbabka@suse.cz>
Thu, 9 Oct 2014 22:27:23 +0000 (15:27 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 10 Oct 2014 02:25:54 +0000 (22:25 -0400)
diff --git a/mm/compaction.c b/mm/compaction.c

index b69b7da..b9cf751 100644 (file)
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -313,8 +313,15 @@ static inline bool compact_should_abort(struct compact_control *cc)
  static bool suitable_migration_target(struct page *page)
  {
         /* If the page is a large free page, then disallow migration */
-       if (PageBuddy(page) && page_order(page) >= pageblock_order)
-               return false;
+       if (PageBuddy(page)) {
+               /*
+                * We are checking page_order without zone->lock taken. But
+                * the only small danger is that we skip a potentially suitable
+                * pageblock, so it's not worth to check order for valid range.
+                */
+               if (page_order_unsafe(page) >= pageblock_order)
+                       return false;
+       }
  
         /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
         if (migrate_async_suitable(get_pageblock_migratetype(page)))
@@ -608,11 +615,23 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                         valid_page = page;
  
                 /*
-                * Skip if free. page_order cannot be used without zone->lock
-                * as nothing prevents parallel allocations or buddy merging.
+                * Skip if free. We read page order here without zone lock
+                * which is generally unsafe, but the race window is small and
+                * the worst thing that can happen is that we skip some
+                * potential isolation targets.
                  */
-               if (PageBuddy(page))
+               if (PageBuddy(page)) {
+                       unsigned long freepage_order = page_order_unsafe(page);
+
+                       /*
+                        * Without lock, we cannot be sure that what we got is
+                        * a valid page order. Consider only values in the
+                        * valid order range to prevent low_pfn overflow.
+                        */
+                       if (freepage_order > 0 && freepage_order < MAX_ORDER)
+                               low_pfn += (1UL << freepage_order) - 1;
                         continue;
+               }
  
                 /*
                  * Check may be lockless but that's ok as we recheck later.
@@ -698,6 +717,13 @@ isolate_success:
                 }
         }
  
+       /*
+        * The PageBuddy() check could have potentially brought us outside
+        * the range to be scanned.
+        */
+       if (unlikely(low_pfn > end_pfn))
+               low_pfn = end_pfn;
+
         if (locked)
                 spin_unlock_irqrestore(&zone->lru_lock, flags);
  
diff --git a/mm/internal.h b/mm/internal.h

index 4c1d604..86ae964 100644 (file)
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -164,7 +164,8 @@ isolate_migratepages_range(struct compact_control *cc,
   * general, page_zone(page)->lock must be held by the caller to prevent the
   * page from being allocated in parallel and returning garbage as the order.
   * If a caller does not hold page_zone(page)->lock, it must guarantee that the
- * page cannot be allocated or merged in parallel.
+ * page cannot be allocated or merged in parallel. Alternatively, it must
+ * handle invalid values gracefully, and use page_order_unsafe() below.
   */
  static inline unsigned long page_order(struct page *page)
  {
@@ -172,6 +173,19 @@ static inline unsigned long page_order(struct page *page)
         return page_private(page);
  }
  
+/*
+ * Like page_order(), but for callers who cannot afford to hold the zone lock.
+ * PageBuddy() should be checked first by the caller to minimize race window,
+ * and invalid values must be handled gracefully.
+ *
+ * ACCESS_ONCE is used so that if the caller assigns the result into a local
+ * variable and e.g. tests it for valid range before using, the compiler cannot
+ * decide to remove the variable and inline the page_private(page) multiple
+ * times, potentially observing different values in the tests and the actual
+ * use of the result.
+ */
+#define page_order_unsafe(page)                ACCESS_ONCE(page_private(page))
+
  static inline bool is_cow_mapping(vm_flags_t flags)
  {
         return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
author	Vlastimil Babka <vbabka@suse.cz>
	Thu, 9 Oct 2014 22:27:23 +0000 (15:27 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 10 Oct 2014 02:25:54 +0000 (22:25 -0400)
mm/compaction.c		patch \| blob \| history
mm/internal.h		patch \| blob \| history