shmem: fix tmpfs to handle the huge= option properly
[cascardo/linux.git] / mm / migrate.c
index bd3fdc2..f7ee04a 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/vmalloc.h>
 #include <linux/security.h>
 #include <linux/backing-dev.h>
+#include <linux/compaction.h>
 #include <linux/syscalls.h>
 #include <linux/hugetlb.h>
 #include <linux/hugetlb_cgroup.h>
@@ -73,6 +74,81 @@ int migrate_prep_local(void)
        return 0;
 }
 
+bool isolate_movable_page(struct page *page, isolate_mode_t mode)
+{
+       struct address_space *mapping;
+
+       /*
+        * Avoid burning cycles with pages that are yet under __free_pages(),
+        * or just got freed under us.
+        *
+        * In case we 'win' a race for a movable page being freed under us and
+        * raise its refcount preventing __free_pages() from doing its job
+        * the put_page() at the end of this block will take care of
+        * release this page, thus avoiding a nasty leakage.
+        */
+       if (unlikely(!get_page_unless_zero(page)))
+               goto out;
+
+       /*
+        * Check PageMovable before holding a PG_lock because page's owner
+        * assumes anybody doesn't touch PG_lock of newly allocated page
+        * so unconditionally grapping the lock ruins page's owner side.
+        */
+       if (unlikely(!__PageMovable(page)))
+               goto out_putpage;
+       /*
+        * As movable pages are not isolated from LRU lists, concurrent
+        * compaction threads can race against page migration functions
+        * as well as race against the releasing a page.
+        *
+        * In order to avoid having an already isolated movable page
+        * being (wrongly) re-isolated while it is under migration,
+        * or to avoid attempting to isolate pages being released,
+        * lets be sure we have the page lock
+        * before proceeding with the movable page isolation steps.
+        */
+       if (unlikely(!trylock_page(page)))
+               goto out_putpage;
+
+       if (!PageMovable(page) || PageIsolated(page))
+               goto out_no_isolated;
+
+       mapping = page_mapping(page);
+       VM_BUG_ON_PAGE(!mapping, page);
+
+       if (!mapping->a_ops->isolate_page(page, mode))
+               goto out_no_isolated;
+
+       /* Driver shouldn't use PG_isolated bit of page->flags */
+       WARN_ON_ONCE(PageIsolated(page));
+       __SetPageIsolated(page);
+       unlock_page(page);
+
+       return true;
+
+out_no_isolated:
+       unlock_page(page);
+out_putpage:
+       put_page(page);
+out:
+       return false;
+}
+
+/* It should be called on page which is PG_movable */
+void putback_movable_page(struct page *page)
+{
+       struct address_space *mapping;
+
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+       VM_BUG_ON_PAGE(!PageMovable(page), page);
+       VM_BUG_ON_PAGE(!PageIsolated(page), page);
+
+       mapping = page_mapping(page);
+       mapping->a_ops->putback_page(page);
+       __ClearPageIsolated(page);
+}
+
 /*
  * Put previously isolated pages back onto the appropriate lists
  * from where they were once taken off for compaction/migration.
@@ -92,12 +168,25 @@ void putback_movable_pages(struct list_head *l)
                        continue;
                }
                list_del(&page->lru);
-               dec_zone_page_state(page, NR_ISOLATED_ANON +
+               dec_node_page_state(page, NR_ISOLATED_ANON +
                                page_is_file_cache(page));
-               if (unlikely(isolated_balloon_page(page)))
-                       balloon_page_putback(page);
-               else
+               /*
+                * We isolated non-lru movable page so here we can use
+                * __PageMovable because LRU page's mapping cannot have
+                * PAGE_MAPPING_MOVABLE.
+                */
+               if (unlikely(__PageMovable(page))) {
+                       VM_BUG_ON_PAGE(!PageIsolated(page), page);
+                       lock_page(page);
+                       if (PageMovable(page))
+                               putback_movable_page(page);
+                       else
+                               __ClearPageIsolated(page);
+                       unlock_page(page);
+                       put_page(page);
+               } else {
                        putback_lru_page(page);
+               }
        }
 }
 
@@ -170,7 +259,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma,
        } else if (PageAnon(new))
                page_add_anon_rmap(new, vma, addr, false);
        else
-               page_add_file_rmap(new);
+               page_add_file_rmap(new, false);
 
        if (vma->vm_flags & VM_LOCKED && !PageTransCompound(new))
                mlock_vma_page(new);
@@ -412,19 +501,21 @@ int migrate_page_move_mapping(struct address_space *mapping,
         * new page and drop references to the old page.
         *
         * Note that anonymous pages are accounted for
-        * via NR_FILE_PAGES and NR_ANON_PAGES if they
+        * via NR_FILE_PAGES and NR_ANON_MAPPED if they
         * are mapped to swap space.
         */
        if (newzone != oldzone) {
-               __dec_zone_state(oldzone, NR_FILE_PAGES);
-               __inc_zone_state(newzone, NR_FILE_PAGES);
+               __dec_node_state(oldzone->zone_pgdat, NR_FILE_PAGES);
+               __inc_node_state(newzone->zone_pgdat, NR_FILE_PAGES);
                if (PageSwapBacked(page) && !PageSwapCache(page)) {
-                       __dec_zone_state(oldzone, NR_SHMEM);
-                       __inc_zone_state(newzone, NR_SHMEM);
+                       __dec_node_state(oldzone->zone_pgdat, NR_SHMEM);
+                       __inc_node_state(newzone->zone_pgdat, NR_SHMEM);
                }
                if (dirty && mapping_cap_account_dirty(mapping)) {
-                       __dec_zone_state(oldzone, NR_FILE_DIRTY);
-                       __inc_zone_state(newzone, NR_FILE_DIRTY);
+                       __dec_node_state(oldzone->zone_pgdat, NR_FILE_DIRTY);
+                       __dec_zone_state(oldzone, NR_ZONE_WRITE_PENDING);
+                       __inc_node_state(newzone->zone_pgdat, NR_FILE_DIRTY);
+                       __inc_zone_state(newzone, NR_ZONE_WRITE_PENDING);
                }
        }
        local_irq_enable();
@@ -594,7 +685,7 @@ EXPORT_SYMBOL(migrate_page_copy);
  ***********************************************************/
 
 /*
- * Common logic to directly migrate a single page suitable for
+ * Common logic to directly migrate a single LRU page suitable for
  * pages that do not use PagePrivate/PagePrivate2.
  *
  * Pages are locked upon entry and exit.
@@ -757,33 +848,72 @@ static int move_to_new_page(struct page *newpage, struct page *page,
                                enum migrate_mode mode)
 {
        struct address_space *mapping;
-       int rc;
+       int rc = -EAGAIN;
+       bool is_lru = !__PageMovable(page);
 
        VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
 
        mapping = page_mapping(page);
-       if (!mapping)
-               rc = migrate_page(mapping, newpage, page, mode);
-       else if (mapping->a_ops->migratepage)
+
+       if (likely(is_lru)) {
+               if (!mapping)
+                       rc = migrate_page(mapping, newpage, page, mode);
+               else if (mapping->a_ops->migratepage)
+                       /*
+                        * Most pages have a mapping and most filesystems
+                        * provide a migratepage callback. Anonymous pages
+                        * are part of swap space which also has its own
+                        * migratepage callback. This is the most common path
+                        * for page migration.
+                        */
+                       rc = mapping->a_ops->migratepage(mapping, newpage,
+                                                       page, mode);
+               else
+                       rc = fallback_migrate_page(mapping, newpage,
+                                                       page, mode);
+       } else {
                /*
-                * Most pages have a mapping and most filesystems provide a
-                * migratepage callback. Anonymous pages are part of swap
-                * space which also has its own migratepage callback. This
-                * is the most common path for page migration.
+                * In case of non-lru page, it could be released after
+                * isolation step. In that case, we shouldn't try migration.
                 */
-               rc = mapping->a_ops->migratepage(mapping, newpage, page, mode);
-       else
-               rc = fallback_migrate_page(mapping, newpage, page, mode);
+               VM_BUG_ON_PAGE(!PageIsolated(page), page);
+               if (!PageMovable(page)) {
+                       rc = MIGRATEPAGE_SUCCESS;
+                       __ClearPageIsolated(page);
+                       goto out;
+               }
+
+               rc = mapping->a_ops->migratepage(mapping, newpage,
+                                               page, mode);
+               WARN_ON_ONCE(rc == MIGRATEPAGE_SUCCESS &&
+                       !PageIsolated(page));
+       }
 
        /*
         * When successful, old pagecache page->mapping must be cleared before
         * page is freed; but stats require that PageAnon be left as PageAnon.
         */
        if (rc == MIGRATEPAGE_SUCCESS) {
-               if (!PageAnon(page))
+               if (__PageMovable(page)) {
+                       VM_BUG_ON_PAGE(!PageIsolated(page), page);
+
+                       /*
+                        * We clear PG_movable under page_lock so any compactor
+                        * cannot try to migrate this page.
+                        */
+                       __ClearPageIsolated(page);
+               }
+
+               /*
+                * Anonymous and movable page->mapping will be cleard by
+                * free_pages_prepare so don't reset it here for keeping
+                * the type to work PageAnon, for example.
+                */
+               if (!PageMappingFlags(page))
                        page->mapping = NULL;
        }
+out:
        return rc;
 }
 
@@ -793,6 +923,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
        int rc = -EAGAIN;
        int page_was_mapped = 0;
        struct anon_vma *anon_vma = NULL;
+       bool is_lru = !__PageMovable(page);
 
        if (!trylock_page(page)) {
                if (!force || mode == MIGRATE_ASYNC)
@@ -861,15 +992,8 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
        if (unlikely(!trylock_page(newpage)))
                goto out_unlock;
 
-       if (unlikely(isolated_balloon_page(page))) {
-               /*
-                * A ballooned page does not need any special attention from
-                * physical to virtual reverse mapping procedures.
-                * Skip any attempt to unmap PTEs or to remap swap cache,
-                * in order to avoid burning cycles at rmap level, and perform
-                * the page migration right away (proteced by page lock).
-                */
-               rc = balloon_page_migrate(newpage, page, mode);
+       if (unlikely(!is_lru)) {
+               rc = move_to_new_page(newpage, page, mode);
                goto out_unlock_both;
        }
 
@@ -915,6 +1039,19 @@ out_unlock:
                put_anon_vma(anon_vma);
        unlock_page(page);
 out:
+       /*
+        * If migration is successful, decrease refcount of the newpage
+        * which will not free the page because new page owner increased
+        * refcounter. As well, if it is LRU page, add the page to LRU
+        * list in here.
+        */
+       if (rc == MIGRATEPAGE_SUCCESS) {
+               if (unlikely(__PageMovable(newpage)))
+                       put_page(newpage);
+               else
+                       putback_lru_page(newpage);
+       }
+
        return rc;
 }
 
@@ -948,6 +1085,18 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
 
        if (page_count(page) == 1) {
                /* page was freed from under us. So we are done. */
+               ClearPageActive(page);
+               ClearPageUnevictable(page);
+               if (unlikely(__PageMovable(page))) {
+                       lock_page(page);
+                       if (!PageMovable(page))
+                               __ClearPageIsolated(page);
+                       unlock_page(page);
+               }
+               if (put_new_page)
+                       put_new_page(newpage, private);
+               else
+                       put_page(newpage);
                goto out;
        }
 
@@ -960,10 +1109,8 @@ static ICE_noinline int unmap_and_move(new_page_t get_new_page,
        }
 
        rc = __unmap_and_move(page, newpage, force, mode);
-       if (rc == MIGRATEPAGE_SUCCESS) {
-               put_new_page = NULL;
+       if (rc == MIGRATEPAGE_SUCCESS)
                set_page_owner_migrate_reason(newpage, reason);
-       }
 
 out:
        if (rc != -EAGAIN) {
@@ -974,35 +1121,47 @@ out:
                 * restored.
                 */
                list_del(&page->lru);
-               dec_zone_page_state(page, NR_ISOLATED_ANON +
+               dec_node_page_state(page, NR_ISOLATED_ANON +
                                page_is_file_cache(page));
-               /* Soft-offlined page shouldn't go through lru cache list */
-               if (reason == MR_MEMORY_FAILURE && rc == MIGRATEPAGE_SUCCESS) {
+       }
+
+       /*
+        * If migration is successful, releases reference grabbed during
+        * isolation. Otherwise, restore the page to right list unless
+        * we want to retry.
+        */
+       if (rc == MIGRATEPAGE_SUCCESS) {
+               put_page(page);
+               if (reason == MR_MEMORY_FAILURE) {
                        /*
-                        * With this release, we free successfully migrated
-                        * page and set PG_HWPoison on just freed page
-                        * intentionally. Although it's rather weird, it's how
-                        * HWPoison flag works at the moment.
+                        * Set PG_HWPoison on just freed page
+                        * intentionally. Although it's rather weird,
+                        * it's how HWPoison flag works at the moment.
                         */
-                       put_page(page);
                        if (!test_set_page_hwpoison(page))
                                num_poisoned_pages_inc();
-               } else
-                       putback_lru_page(page);
-       }
+               }
+       } else {
+               if (rc != -EAGAIN) {
+                       if (likely(!__PageMovable(page))) {
+                               putback_lru_page(page);
+                               goto put_new;
+                       }
 
-       /*
-        * If migration was not successful and there's a freeing callback, use
-        * it.  Otherwise, putback_lru_page() will drop the reference grabbed
-        * during isolation.
-        */
-       if (put_new_page)
-               put_new_page(newpage, private);
-       else if (unlikely(__is_movable_balloon_page(newpage))) {
-               /* drop our reference, page already in the balloon */
-               put_page(newpage);
-       } else
-               putback_lru_page(newpage);
+                       lock_page(page);
+                       if (PageMovable(page))
+                               putback_movable_page(page);
+                       else
+                               __ClearPageIsolated(page);
+                       unlock_page(page);
+                       put_page(page);
+               }
+put_new:
+               if (put_new_page)
+                       put_new_page(newpage, private);
+               else
+                       put_page(newpage);
+       }
 
        if (result) {
                if (rc)
@@ -1303,7 +1462,7 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
                err = isolate_lru_page(page);
                if (!err) {
                        list_add_tail(&page->lru, &pagelist);
-                       inc_zone_page_state(page, NR_ISOLATED_ANON +
+                       inc_node_page_state(page, NR_ISOLATED_ANON +
                                            page_is_file_cache(page));
                }
 put_and_set:
@@ -1569,15 +1728,16 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
                                   unsigned long nr_migrate_pages)
 {
        int z;
+
+       if (!pgdat_reclaimable(pgdat))
+               return false;
+
        for (z = pgdat->nr_zones - 1; z >= 0; z--) {
                struct zone *zone = pgdat->node_zones + z;
 
                if (!populated_zone(zone))
                        continue;
 
-               if (!zone_reclaimable(zone))
-                       continue;
-
                /* Avoid waking kswapd by allocating pages_to_migrate pages. */
                if (!zone_watermark_ok(zone, 0,
                                       high_wmark_pages(zone) +
@@ -1671,7 +1831,7 @@ static int numamigrate_isolate_page(pg_data_t *pgdat, struct page *page)
        }
 
        page_lru = page_is_file_cache(page);
-       mod_zone_page_state(page_zone(page), NR_ISOLATED_ANON + page_lru,
+       mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON + page_lru,
                                hpage_nr_pages(page));
 
        /*
@@ -1729,7 +1889,7 @@ int migrate_misplaced_page(struct page *page, struct vm_area_struct *vma,
        if (nr_remaining) {
                if (!list_empty(&migratepages)) {
                        list_del(&page->lru);
-                       dec_zone_page_state(page, NR_ISOLATED_ANON +
+                       dec_node_page_state(page, NR_ISOLATED_ANON +
                                        page_is_file_cache(page));
                        putback_lru_page(page);
                }
@@ -1774,7 +1934,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
                goto out_dropref;
 
        new_page = alloc_pages_node(node,
-               (GFP_TRANSHUGE | __GFP_THISNODE) & ~__GFP_RECLAIM,
+               (GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
                HPAGE_PMD_ORDER);
        if (!new_page)
                goto out_fail;
@@ -1822,15 +1982,14 @@ fail_putback:
                /* Retake the callers reference and putback on LRU */
                get_page(page);
                putback_lru_page(page);
-               mod_zone_page_state(page_zone(page),
+               mod_node_page_state(page_pgdat(page),
                         NR_ISOLATED_ANON + page_lru, -HPAGE_PMD_NR);
 
                goto out_unlock;
        }
 
        orig_entry = *pmd;
-       entry = mk_pmd(new_page, vma->vm_page_prot);
-       entry = pmd_mkhuge(entry);
+       entry = mk_huge_pmd(new_page, vma->vm_page_prot);
        entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 
        /*
@@ -1874,7 +2033,7 @@ fail_putback:
        count_vm_events(PGMIGRATE_SUCCESS, HPAGE_PMD_NR);
        count_vm_numa_events(NUMA_PAGE_MIGRATE, HPAGE_PMD_NR);
 
-       mod_zone_page_state(page_zone(page),
+       mod_node_page_state(page_pgdat(page),
                        NR_ISOLATED_ANON + page_lru,
                        -HPAGE_PMD_NR);
        return isolated;