shmem: fix tmpfs to handle the huge= option properly

[cascardo/linux.git] / mm / migrate.c
diff --git a/mm/migrate.c b/mm/migrate.c

index bd3fdc2..f7ee04a 100644 (file)
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -31,6 +31,7 @@
  #include <linux/vmalloc.h>
  #include <linux/security.h>
  #include <linux/backing-dev.h>
+#include <linux/compaction.h>
  #include <linux/syscalls.h>
  #include <linux/hugetlb.h>
  #include <linux/hugetlb_cgroup.h>
@@ -73,6 +74,81 @@ int migrate_prep_local(void)
         return 0;
  }
  
+bool isolate_movable_page(struct page *page, isolate_mode_t mode)
+{
+       struct address_space *mapping;
+
+       /*
+        * Avoid burning cycles with pages that are yet under __free_pages(),
+        * or just got freed under us.
+        *
+        * In case we 'win' a race for a movable page being freed under us and
+        * raise its refcount preventing __free_pages() from doing its job
+        * the put_page() at the end of this block will take care of
+        * release this page, thus avoiding a nasty leakage.
+        */
+       if (unlikely(!get_page_unless_zero(page)))
+               goto out;
+
+       /*
+        * Check PageMovable before holding a PG_lock because page's owner
+        * assumes anybody doesn't touch PG_lock of newly allocated page
+        * so unconditionally grapping the lock ruins page's owner side.
+        */
+       if (unlikely(!__PageMovable(page)))
+               goto out_putpage;
+       /*
+        * As movable pages are not isolated from LRU lists, concurrent
+        * compaction threads can race against page migration functions
+        * as well as race against the releasing a page.
+        *
+        * In order to avoid having an already isolated movable page
+        * being (wrongly) re-isolated while it is under migration,
+        * or to avoid attempting to isolate pages being released,
+        * lets be sure we have the page lock
+        * before proceeding with the movable page isolation steps.
+        */
+       if (unlikely(!trylock_page(page)))
+               goto out_putpage;
+
+       if (!PageMovable(page) || PageIsolated(page))
+               goto out_no_isolated;
+
+       mapping = page_mapping(page);
+       VM_BUG_ON_PAGE(!mapping, page);
+
+       if (!mapping->a_ops->isolate_page(page, mode))
+               goto out_no_isolated;
+
+       /* Driver shouldn't use PG_isolated bit of page->flags */
+       WARN_ON_ONCE(PageIsolated(page));
+       __SetPageIsolated(page);
+       unlock_page(page);
+
+       return true;
+
+out_no_isolated:
+       unlock_page(page);
+out_putpage:
+       put_page(page);
+out:
+       return false;
+}
+
+/* It should be called on page which is PG_movable */
+void putback_movable_page(struct page *page)
+{
+       struct address_space *mapping;
+
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+       VM_BUG_ON_PAGE(!PageMovable(page), page);
+       VM_BUG_ON_PAGE(!PageIsolated(page), page);
+
+       mapping = page_mapping(page);
+       mapping->a_ops->putback_page(page);
+       __ClearPageIsolated(page);
+}
+
  /*
   * Put previously isolated pages back onto the appropriate lists
   * from where they were once taken off for compaction/migration.
@@ -92,12 +168,25 @@ void putback_movable_pages(struct list_head *l)
                         continue;
                 }
                 list_del(&page->lru);
-               dec_zone_page_state(page, NR_ISOLATED_ANON +
+               dec_node_page_state(page, NR_ISOLATED_ANON +
                                 page_is_file_cache(page));
-               if (unlikely(isolated_balloon_page(page)))
-                       balloon_page_putback(page);
-               else
+               /*
+                * We isolated non-lru movable page so here we can use
+                * __PageMovable because LRU page's mapping cannot have
+                * PAGE_MAPPING_MOVABLE.
+                */
+               if (unlikely(__PageMovable(page))) {
+                       VM_BUG_ON_PAGE(!PageIsolated(page), page);
+                       lock_page(page);
+                       if (PageMovable(page))
+                               putback_movable_page(page);
+                       else
+                               __ClearPageIsolated(page);
+                       unlock_page(page);
+                       put_page(page);
+               } else {
                         putback_lru_page(page);
+               }
         }
  }
  
@@ -170,7 +259,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
         } else if (PageAnon(new))
                 page_add_anon_rmap(new, vma, addr, false);
         else
-               page_add_file_rmap(new);
+               page_add_file_rmap(new, false);
  
         if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
                 mlock_vma_page(new);
@@ -412,19 +501,21 @@ int migrate_page_move_mapping(struct address_space *mapping,
          * new page and drop references to the old page.
          *
          * Note that anonymous pages are accounted for
-        * via NR_FILE_PAGES and NR_ANON_PAGES if they
+        * via NR_FILE_PAGES and NR_ANON_MAPPED if they
          * are mapped to swap space.
          */
         if (newzone != oldzone) {
-               __dec_zone_state(oldzone, NR_FILE_PAGES);
-               __inc_zone_state(newzone, NR_FILE_PAGES);
+               __dec_node_state(oldzone->zone_pgdat, NR_FILE_PAGES);
+               __inc_node_state(newzone->zone_pgdat, NR_FILE_PAGES);
                 if (PageSwapBacked(page) && !PageSwapCache(page)) {
-                       __dec_zone_state(oldzone, NR_SHMEM);
-                       __inc_zone_state(newzone, NR_SHMEM);
+                       __dec_node_state(oldzone->zone_pgdat, NR_SHMEM);
+                       __inc_node_state(newzone->zone_pgdat, NR_SHMEM);
                 }
                 if (dirty && mapping_cap_account_dirty(mapping)) {
-                       __dec_zone_state(oldzone, NR_FILE_DIRTY);
-                       __inc_zone_state(newzone, NR_FILE_DIRTY);
+                       __dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY);
+                       __dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING);
+                       __inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY);
+                       __inc_zone_state(newzone, NR_ZONE_WRITE_PENDING);
                 }
         }
         local_irq_enable();
@@ -594,7 +685,7 @@ EXPORT_SYMBOL(migrate_page_copy);
   ***********************************************************/
  
  /*
- * Common logic to directly migrate a single page suitable for
+ * Common logic to directly migrate a single LRU page suitable for
   * pages that do not use PagePrivate/PagePrivate2.
   *
   * Pages are locked upon entry and exit.
@@ -757,33 +848,72 @@ static int move_to_new_page(struct page *newpage, struct page *page,
                                 enum migrate_mode mode)
  {
         struct address_space *mapping;
-       int rc;
+       int rc = -EAGAIN;
+       bool is_lru = !__PageMovable(page);
  
         VM_BUG_ON_PAGE(!PageLocked(page), page);
         VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
  
         mapping = page_mapping(page);
-       if (!mapping)
-               rc = migrate_page(mapping, newpage, page, mode);
-       else if (mapping->a_ops->migratepage)
+
+       if (likely(is_lru)) {
+               if (!mapping)
+                       rc = migrate_page(mapping, newpage, page, mode);
+               else if (mapping->a_ops->migratepage)
+                       /*
+                        * Most pages have a mapping and most filesystems
+                        * provide a migratepage callback. Anonymous pages
+                        * are part of swap space which also has its own
+                        * migratepage callback. This is the most common path
+                        * for page migration.
+                        */
+                       rc = mapping->a_ops->migratepage(mapping, newpage,
+                                                       page, mode);
+               else
+                       rc = fallback_migrate_page(mapping, newpage,
+                                                       page, mode);
+       } else {
                 /*
-                * Most pages have a mapping and most filesystems provide a
-                * migratepage callback. Anonymous pages are part of swap
-                * space which also has its own migratepage callback. This
-                * is the most common path for page migration.
+                * In case of non-lru page, it could be released after
+                * isolation step. In that case, we shouldn't try migration.
                  */
-               rc = mapping->a_ops->migratepage(mapping, newpage, page, mode);
-       else
-               rc = fallback_migrate_page(mapping, newpage, page, mode);
+               VM_BUG_ON_PAGE(!PageIsolated(page), page);
+               if (!PageMovable(page)) {
+                       rc = MIGRATEPAGE_SUCCESS;
+                       __ClearPageIsolated(page);
+                       goto out;
+               }
+
+               rc = mapping->a_ops->migratepage(mapping, newpage,
+                                               page, mode);
+               WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
+                       !PageIsolated(page));
+       }
  
         /*
          * When successful, old pagecache page->mapping must be cleared before
          * page is freed; but stats require that PageAnon be left as PageAnon.
          */
         if (rc == MIGRATEPAGE_SUCCESS) {
-               if (!PageAnon(page))
+               if (__PageMovable(page)) {
+                       VM_BUG_ON_PAGE(!PageIsolated(page), page);
+
+                       /*
+                        * We clear PG_movable under page_lock so any compactor
+                        * cannot try to migrate this page.
+                        */
+                       __ClearPageIsolated(page);
+               }
+
+               /*
+                * Anonymous and movable page->mapping will be cleard by
+                * free_pages_prepare so don't reset it here for keeping
+                * the type to work PageAnon, for example.
+                */
+               if (!PageMappingFlags(page))
                         page->mapping = NULL;
         }
+out:
         return rc;
  }
  
@@ -793,6 +923,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
         int rc = -EAGAIN;
         int page_was_mapped = 0;
         struct anon_vma *anon_vma = NULL;
+       bool is_lru = !__PageMovable(page);
  
         if (!trylock_page(page)) {
                 if (!force || mode == MIGRATE_ASYNC)
@@ -861,15 +992,8 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
         if (unlikely(!trylock_page(newpage)))
                 goto out_unlock;
  
-       if (unlikely(isolated_balloon_page(page))) {
-               /*
-                * A ballooned page does not need any special attention from
-                * physical to virtual reverse mapping procedures.
-                * Skip any attempt to unmap PTEs or to remap swap cache,
-                * in order to avoid burning cycles at rmap level, and perform
-                * the page migration right away (proteced by page lock).
-                */
-               rc = balloon_page_migrate(newpage, page, mode);
+       if (unlikely(!is_lru)) {
+               rc = move_to_new_page(newpage, page, mode);
                 goto out_unlock_both;
         }
  
@@ -915,6 +1039,19 @@ out_unlock:
                 put_anon_vma(anon_vma);
         unlock_page(page);
  out:
+       /*
+        * If migration is successful, decrease refcount of the newpage
+        * which will not free the page because new page owner increased
+        * refcounter. As well, if it is LRU page, add the page to LRU
+        * list in here.
+        */
+       if (rc == MIGRATEPAGE_SUCCESS) {
+               if (unlikely(__PageMovable(newpage)))
+                       put_page(newpage);
+               else
+                       putback_lru_page(newpage);
+       }
+
         return rc;
  }
  
@@ -948,6 +1085,18 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
  
         if (page_count(page) == 1) {
                 /* page was freed from under us. So we are done. */
+               ClearPageActive(page);
+               ClearPageUnevictable(page);
+               if (unlikely(__PageMovable(page))) {
+                       lock_page(page);
+                       if (!PageMovable(page))
+                               __ClearPageIsolated(page);
+                       unlock_page(page);
+               }
+               if (put_new_page)
+                       put_new_page(newpage, private);
+               else
+                       put_page(newpage);
                 goto out;
         }
  
@@ -960,10 +1109,8 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
         }
  
         rc = __unmap_and_move(page, newpage, force, mode);
-       if (rc == MIGRATEPAGE_SUCCESS) {
-               put_new_page = NULL;
+       if (rc == MIGRATEPAGE_SUCCESS)
                 set_page_owner_migrate_reason(newpage, reason);
-       }
  
  out:
         if (rc != -EAGAIN) {
@@ -974,35 +1121,47 @@ out:
                  * restored.
                  */
                 list_del(&page->lru);
-               dec_zone_page_state(page, NR_ISOLATED_ANON +
+               dec_node_page_state(page, NR_ISOLATED_ANON +
                                 page_is_file_cache(page));
-               /* Soft-offlined page shouldn't go through lru cache list */
-               if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) {
+       }
+
+       /*
+        * If migration is successful, releases reference grabbed during
+        * isolation. Otherwise, restore the page to right list unless
+        * we want to retry.
+        */
+       if (rc == MIGRATEPAGE_SUCCESS) {
+               put_page(page);
+               if (reason == MR_MEMORY_FAILURE) {
                         /*
-                        * With this release, we free successfully migrated
-                        * page and set PG_HWPoison on just freed page
-                        * intentionally. Although it's rather weird, it's how
-                        * HWPoison flag works at the moment.
+                        * Set PG_HWPoison on just freed page
+                        * intentionally. Although it's rather weird,
+                        * it's how HWPoison flag works at the moment.
                          */
-                       put_page(page);
                         if (!test_set_page_hwpoison(page))
                                 num_poisoned_pages_inc();
-               } else
-                       putback_lru_page(page);
-       }
+               }
+       } else {
+               if (rc != -EAGAIN) {
+                       if (likely(!__PageMovable(page))) {
+                               putback_lru_page(page);
+                               goto put_new;
+                       }
  
-       /*
-        * If migration was not successful and there's a freeing callback, use
-        * it.  Otherwise, putback_lru_page() will drop the reference grabbed
-        * during isolation.
-        */
-       if (put_new_page)
-               put_new_page(newpage, private);
-       else if (unlikely(__is_movable_balloon_page(newpage))) {
-               /* drop our reference, page already in the balloon */
-               put_page(newpage);
-       } else
-               putback_lru_page(newpage);
+                       lock_page(page);
+                       if (PageMovable(page))
+                               putback_movable_page(page);
+                       else
+                               __ClearPageIsolated(page);
+                       unlock_page(page);
+                       put_page(page);
+               }
+put_new:
+               if (put_new_page)
+                       put_new_page(newpage, private);
+               else
+                       put_page(newpage);
+       }
  
         if (result) {
                 if (rc)
@@ -1303,7 +1462,7 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
                 err = isolate_lru_page(page);
                 if (!err) {
                         list_add_tail(&page->lru, &pagelist);
-                       inc_zone_page_state(page, NR_ISOLATED_ANON +
+                       inc_node_page_state(page, NR_ISOLATED_ANON +
                                             page_is_file_cache(page));
                 }
  put_and_set:
@@ -1569,15 +1728,16 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
                                    unsigned long nr_migrate_pages)
  {
         int z;
+
+       if (!pgdat_reclaimable(pgdat))
+               return false;
+
         for (z = pgdat->nr_zones - 1; z >= 0; z--) {
                 struct zone *zone = pgdat->node_zones + z;
  
                 if (!populated_zone(zone))
                         continue;
  
-               if (!zone_reclaimable(zone))
-                       continue;
-
                 /* Avoid waking kswapd by allocating pages_to_migrate pages. */
                 if (!zone_watermark_ok(zone, 0,
                                        high_wmark_pages(zone) +
@@ -1671,7 +1831,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
         }
  
         page_lru = page_is_file_cache(page);
-       mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru,
+       mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
                                 hpage_nr_pages(page));
  
         /*
@@ -1729,7 +1889,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
         if (nr_remaining) {
                 if (!list_empty(&migratepages)) {
                         list_del(&page->lru);
-                       dec_zone_page_state(page, NR_ISOLATED_ANON +
+                       dec_node_page_state(page, NR_ISOLATED_ANON +
                                         page_is_file_cache(page));
                         putback_lru_page(page);
                 }
@@ -1774,7 +1934,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
                 goto out_dropref;
  
         new_page = alloc_pages_node(node,
-               (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
+               (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
                 HPAGE_PMD_ORDER);
         if (!new_page)
                 goto out_fail;
@@ -1822,15 +1982,14 @@ fail_putback:
                 /* Retake the callers reference and putback on LRU */
                 get_page(page);
                 putback_lru_page(page);
-               mod_zone_page_state(page_zone(page),
+               mod_node_page_state(page_pgdat(page),
                          NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
  
                 goto out_unlock;
         }
  
         orig_entry = *pmd;
-       entry = mk_pmd(new_page, vma->vm_page_prot);
-       entry = pmd_mkhuge(entry);
+       entry = mk_huge_pmd(new_page, vma->vm_page_prot);
         entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
  
         /*
@@ -1874,7 +2033,7 @@ fail_putback:
         count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
         count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
  
-       mod_zone_page_state(page_zone(page),
+       mod_node_page_state(page_pgdat(page),
                         NR_ISOLATED_ANON + page_lru,
                         -HPAGE_PMD_NR);
         return isolated;