mm: memcontrol: zap oom_info_lock
[cascardo/linux.git] / mm / huge_memory.c
index e10a4fe..021db17 100644 (file)
@@ -78,7 +78,7 @@ unsigned long transparent_hugepage_flags __read_mostly =
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE_MADVISE
        (1<<TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG)|
 #endif
-       (1<<TRANSPARENT_HUGEPAGE_DEFRAG_FLAG)|
+       (1<<TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG)|
        (1<<TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG)|
        (1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG);
 
@@ -168,8 +168,7 @@ static void set_recommended_min_free_kbytes(void)
 
        if (recommended_min > min_free_kbytes) {
                if (user_min_free_kbytes >= 0)
-                       pr_info("raising min_free_kbytes from %d to %lu "
-                               "to help transparent hugepage allocations\n",
+                       pr_info("raising min_free_kbytes from %d to %lu to help transparent hugepage allocations\n",
                                min_free_kbytes, recommended_min);
 
                min_free_kbytes = recommended_min;
@@ -270,37 +269,35 @@ static struct shrinker huge_zero_page_shrinker = {
 
 #ifdef CONFIG_SYSFS
 
-static ssize_t double_flag_show(struct kobject *kobj,
-                               struct kobj_attribute *attr, char *buf,
-                               enum transparent_hugepage_flag enabled,
-                               enum transparent_hugepage_flag req_madv)
-{
-       if (test_bit(enabled, &transparent_hugepage_flags)) {
-               VM_BUG_ON(test_bit(req_madv, &transparent_hugepage_flags));
-               return sprintf(buf, "[always] madvise never\n");
-       } else if (test_bit(req_madv, &transparent_hugepage_flags))
-               return sprintf(buf, "always [madvise] never\n");
-       else
-               return sprintf(buf, "always madvise [never]\n");
-}
-static ssize_t double_flag_store(struct kobject *kobj,
+static ssize_t triple_flag_store(struct kobject *kobj,
                                 struct kobj_attribute *attr,
                                 const char *buf, size_t count,
                                 enum transparent_hugepage_flag enabled,
+                                enum transparent_hugepage_flag deferred,
                                 enum transparent_hugepage_flag req_madv)
 {
-       if (!memcmp("always", buf,
+       if (!memcmp("defer", buf,
+                   min(sizeof("defer")-1, count))) {
+               if (enabled == deferred)
+                       return -EINVAL;
+               clear_bit(enabled, &transparent_hugepage_flags);
+               clear_bit(req_madv, &transparent_hugepage_flags);
+               set_bit(deferred, &transparent_hugepage_flags);
+       } else if (!memcmp("always", buf,
                    min(sizeof("always")-1, count))) {
-               set_bit(enabled, &transparent_hugepage_flags);
+               clear_bit(deferred, &transparent_hugepage_flags);
                clear_bit(req_madv, &transparent_hugepage_flags);
+               set_bit(enabled, &transparent_hugepage_flags);
        } else if (!memcmp("madvise", buf,
                           min(sizeof("madvise")-1, count))) {
                clear_bit(enabled, &transparent_hugepage_flags);
+               clear_bit(deferred, &transparent_hugepage_flags);
                set_bit(req_madv, &transparent_hugepage_flags);
        } else if (!memcmp("never", buf,
                           min(sizeof("never")-1, count))) {
                clear_bit(enabled, &transparent_hugepage_flags);
                clear_bit(req_madv, &transparent_hugepage_flags);
+               clear_bit(deferred, &transparent_hugepage_flags);
        } else
                return -EINVAL;
 
@@ -310,17 +307,22 @@ static ssize_t double_flag_store(struct kobject *kobj,
 static ssize_t enabled_show(struct kobject *kobj,
                            struct kobj_attribute *attr, char *buf)
 {
-       return double_flag_show(kobj, attr, buf,
-                               TRANSPARENT_HUGEPAGE_FLAG,
-                               TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG);
+       if (test_bit(TRANSPARENT_HUGEPAGE_FLAG, &transparent_hugepage_flags))
+               return sprintf(buf, "[always] madvise never\n");
+       else if (test_bit(TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, &transparent_hugepage_flags))
+               return sprintf(buf, "always [madvise] never\n");
+       else
+               return sprintf(buf, "always madvise [never]\n");
 }
+
 static ssize_t enabled_store(struct kobject *kobj,
                             struct kobj_attribute *attr,
                             const char *buf, size_t count)
 {
        ssize_t ret;
 
-       ret = double_flag_store(kobj, attr, buf, count,
+       ret = triple_flag_store(kobj, attr, buf, count,
+                               TRANSPARENT_HUGEPAGE_FLAG,
                                TRANSPARENT_HUGEPAGE_FLAG,
                                TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG);
 
@@ -378,16 +380,23 @@ static ssize_t single_flag_store(struct kobject *kobj,
 static ssize_t defrag_show(struct kobject *kobj,
                           struct kobj_attribute *attr, char *buf)
 {
-       return double_flag_show(kobj, attr, buf,
-                               TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
-                               TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG);
+       if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
+               return sprintf(buf, "[always] defer madvise never\n");
+       if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
+               return sprintf(buf, "always [defer] madvise never\n");
+       else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags))
+               return sprintf(buf, "always defer [madvise] never\n");
+       else
+               return sprintf(buf, "always defer madvise [never]\n");
+
 }
 static ssize_t defrag_store(struct kobject *kobj,
                            struct kobj_attribute *attr,
                            const char *buf, size_t count)
 {
-       return double_flag_store(kobj, attr, buf, count,
-                                TRANSPARENT_HUGEPAGE_DEFRAG_FLAG,
+       return triple_flag_store(kobj, attr, buf, count,
+                                TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
+                                TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG,
                                 TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG);
 }
 static struct kobj_attribute defrag_attr =
@@ -843,9 +852,30 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
        return 0;
 }
 
-static inline gfp_t alloc_hugepage_gfpmask(int defrag, gfp_t extra_gfp)
+/*
+ * If THP is set to always then directly reclaim/compact as necessary
+ * If set to defer then do no reclaim and defer to khugepaged
+ * If set to madvise and the VMA is flagged then directly reclaim/compact
+ */
+static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+{
+       gfp_t reclaim_flags = 0;
+
+       if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG, &transparent_hugepage_flags) &&
+           (vma->vm_flags & VM_HUGEPAGE))
+               reclaim_flags = __GFP_DIRECT_RECLAIM;
+       else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_KSWAPD_FLAG, &transparent_hugepage_flags))
+               reclaim_flags = __GFP_KSWAPD_RECLAIM;
+       else if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
+               reclaim_flags = __GFP_DIRECT_RECLAIM;
+
+       return GFP_TRANSHUGE | reclaim_flags;
+}
+
+/* Defrag for khugepaged will enter direct reclaim/compaction if necessary */
+static inline gfp_t alloc_hugepage_khugepaged_gfpmask(void)
 {
-       return (GFP_TRANSHUGE & ~(defrag ? 0 : __GFP_RECLAIM)) | extra_gfp;
+       return GFP_TRANSHUGE | (khugepaged_defrag() ? __GFP_DIRECT_RECLAIM : 0);
 }
 
 /* Caller must hold page table lock. */
@@ -919,7 +949,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
                }
                return ret;
        }
-       gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
+       gfp = alloc_hugepage_direct_gfpmask(vma);
        page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
        if (unlikely(!page)) {
                count_vm_event(THP_FAULT_FALLBACK);
@@ -1279,7 +1309,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 alloc:
        if (transparent_hugepage_enabled(vma) &&
            !transparent_hugepage_debug_cow()) {
-               huge_gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0);
+               huge_gfp = alloc_hugepage_direct_gfpmask(vma);
                new_page = alloc_hugepage_vma(huge_gfp, vma, haddr, HPAGE_PMD_ORDER);
        } else
                new_page = NULL;
@@ -2249,11 +2279,12 @@ static int khugepaged_find_target_node(void)
        return 0;
 }
 
-static inline struct page *alloc_hugepage(int defrag)
+static inline struct page *alloc_khugepaged_hugepage(void)
 {
        struct page *page;
 
-       page = alloc_pages(alloc_hugepage_gfpmask(defrag, 0), HPAGE_PMD_ORDER);
+       page = alloc_pages(alloc_hugepage_khugepaged_gfpmask(),
+                          HPAGE_PMD_ORDER);
        if (page)
                prep_transhuge_page(page);
        return page;
@@ -2264,7 +2295,7 @@ static struct page *khugepaged_alloc_hugepage(bool *wait)
        struct page *hpage;
 
        do {
-               hpage = alloc_hugepage(khugepaged_defrag());
+               hpage = alloc_khugepaged_hugepage();
                if (!hpage) {
                        count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
                        if (!*wait)
@@ -2335,8 +2366,7 @@ static void collapse_huge_page(struct mm_struct *mm,
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
 
        /* Only allocate from the target node */
-       gfp = alloc_hugepage_gfpmask(khugepaged_defrag(), __GFP_OTHER_NODE) |
-               __GFP_THISNODE;
+       gfp = alloc_hugepage_khugepaged_gfpmask() | __GFP_OTHER_NODE | __GFP_THISNODE;
 
        /* release the mmap_sem read lock. */
        new_page = khugepaged_alloc_page(hpage, gfp, mm, address, node);
@@ -2857,7 +2887,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 
        page = pmd_page(*pmd);
        VM_BUG_ON_PAGE(!page_count(page), page);
-       atomic_add(HPAGE_PMD_NR - 1, &page->_count);
+       page_ref_add(page, HPAGE_PMD_NR - 1);
        write = pmd_write(*pmd);
        young = pmd_young(*pmd);
        dirty = pmd_dirty(*pmd);
@@ -2947,44 +2977,33 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 }
 
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-               unsigned long address)
+               unsigned long address, bool freeze)
 {
        spinlock_t *ptl;
        struct mm_struct *mm = vma->vm_mm;
-       struct page *page = NULL;
        unsigned long haddr = address & HPAGE_PMD_MASK;
 
        mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PMD_SIZE);
        ptl = pmd_lock(mm, pmd);
        if (pmd_trans_huge(*pmd)) {
-               page = pmd_page(*pmd);
+               struct page *page = pmd_page(*pmd);
                if (PageMlocked(page))
-                       get_page(page);
-               else
-                       page = NULL;
+                       clear_page_mlock(page);
        } else if (!pmd_devmap(*pmd))
                goto out;
-       __split_huge_pmd_locked(vma, pmd, haddr, false);
+       __split_huge_pmd_locked(vma, pmd, haddr, freeze);
 out:
        spin_unlock(ptl);
        mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PMD_SIZE);
-       if (page) {
-               lock_page(page);
-               munlock_vma_page(page);
-               unlock_page(page);
-               put_page(page);
-       }
 }
 
-static void split_huge_pmd_address(struct vm_area_struct *vma,
-                                   unsigned long address)
+void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address,
+               bool freeze, struct page *page)
 {
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd;
 
-       VM_BUG_ON(!(address & ~HPAGE_PMD_MASK));
-
        pgd = pgd_offset(vma->vm_mm, address);
        if (!pgd_present(*pgd))
                return;
@@ -2996,11 +3015,20 @@ static void split_huge_pmd_address(struct vm_area_struct *vma,
        pmd = pmd_offset(pud, address);
        if (!pmd_present(*pmd) || (!pmd_trans_huge(*pmd) && !pmd_devmap(*pmd)))
                return;
+
+       /*
+        * If caller asks to setup a migration entries, we need a page to check
+        * pmd against. Otherwise we can end up replacing wrong page.
+        */
+       VM_BUG_ON(freeze && !page);
+       if (page && page != pmd_page(*pmd))
+               return;
+
        /*
         * Caller holds the mmap_sem write mode, so a huge pmd cannot
         * materialize from under us.
         */
-       split_huge_pmd(vma, pmd, address);
+       __split_huge_pmd(vma, pmd, address, freeze);
 }
 
 void vma_adjust_trans_huge(struct vm_area_struct *vma,
@@ -3016,7 +3044,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
        if (start & ~HPAGE_PMD_MASK &&
            (start & HPAGE_PMD_MASK) >= vma->vm_start &&
            (start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
-               split_huge_pmd_address(vma, start);
+               split_huge_pmd_address(vma, start, false, NULL);
 
        /*
         * If the new end address isn't hpage aligned and it could
@@ -3026,7 +3054,7 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
        if (end & ~HPAGE_PMD_MASK &&
            (end & HPAGE_PMD_MASK) >= vma->vm_start &&
            (end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
-               split_huge_pmd_address(vma, end);
+               split_huge_pmd_address(vma, end, false, NULL);
 
        /*
         * If we're also updating the vma->vm_next->vm_start, if the new
@@ -3040,208 +3068,58 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma,
                if (nstart & ~HPAGE_PMD_MASK &&
                    (nstart & HPAGE_PMD_MASK) >= next->vm_start &&
                    (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
-                       split_huge_pmd_address(next, nstart);
-       }
-}
-
-static void freeze_page_vma(struct vm_area_struct *vma, struct page *page,
-               unsigned long address)
-{
-       unsigned long haddr = address & HPAGE_PMD_MASK;
-       spinlock_t *ptl;
-       pgd_t *pgd;
-       pud_t *pud;
-       pmd_t *pmd;
-       pte_t *pte;
-       int i, nr = HPAGE_PMD_NR;
-
-       /* Skip pages which doesn't belong to the VMA */
-       if (address < vma->vm_start) {
-               int off = (vma->vm_start - address) >> PAGE_SHIFT;
-               page += off;
-               nr -= off;
-               address = vma->vm_start;
-       }
-
-       pgd = pgd_offset(vma->vm_mm, address);
-       if (!pgd_present(*pgd))
-               return;
-       pud = pud_offset(pgd, address);
-       if (!pud_present(*pud))
-               return;
-       pmd = pmd_offset(pud, address);
-       ptl = pmd_lock(vma->vm_mm, pmd);
-       if (!pmd_present(*pmd)) {
-               spin_unlock(ptl);
-               return;
+                       split_huge_pmd_address(next, nstart, false, NULL);
        }
-       if (pmd_trans_huge(*pmd)) {
-               if (page == pmd_page(*pmd))
-                       __split_huge_pmd_locked(vma, pmd, haddr, true);
-               spin_unlock(ptl);
-               return;
-       }
-       spin_unlock(ptl);
-
-       pte = pte_offset_map_lock(vma->vm_mm, pmd, address, &ptl);
-       for (i = 0; i < nr; i++, address += PAGE_SIZE, page++, pte++) {
-               pte_t entry, swp_pte;
-               swp_entry_t swp_entry;
-
-               /*
-                * We've just crossed page table boundary: need to map next one.
-                * It can happen if THP was mremaped to non PMD-aligned address.
-                */
-               if (unlikely(address == haddr + HPAGE_PMD_SIZE)) {
-                       pte_unmap_unlock(pte - 1, ptl);
-                       pmd = mm_find_pmd(vma->vm_mm, address);
-                       if (!pmd)
-                               return;
-                       pte = pte_offset_map_lock(vma->vm_mm, pmd,
-                                       address, &ptl);
-               }
-
-               if (!pte_present(*pte))
-                       continue;
-               if (page_to_pfn(page) != pte_pfn(*pte))
-                       continue;
-               flush_cache_page(vma, address, page_to_pfn(page));
-               entry = ptep_clear_flush(vma, address, pte);
-               if (pte_dirty(entry))
-                       SetPageDirty(page);
-               swp_entry = make_migration_entry(page, pte_write(entry));
-               swp_pte = swp_entry_to_pte(swp_entry);
-               if (pte_soft_dirty(entry))
-                       swp_pte = pte_swp_mksoft_dirty(swp_pte);
-               set_pte_at(vma->vm_mm, address, pte, swp_pte);
-               page_remove_rmap(page, false);
-               put_page(page);
-       }
-       pte_unmap_unlock(pte - 1, ptl);
 }
 
-static void freeze_page(struct anon_vma *anon_vma, struct page *page)
+static void freeze_page(struct page *page)
 {
-       struct anon_vma_chain *avc;
-       pgoff_t pgoff = page_to_pgoff(page);
+       enum ttu_flags ttu_flags = TTU_MIGRATION | TTU_IGNORE_MLOCK |
+               TTU_IGNORE_ACCESS | TTU_RMAP_LOCKED;
+       int i, ret;
 
        VM_BUG_ON_PAGE(!PageHead(page), page);
 
-       anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root, pgoff,
-                       pgoff + HPAGE_PMD_NR - 1) {
-               unsigned long address = __vma_address(page, avc->vma);
-
-               mmu_notifier_invalidate_range_start(avc->vma->vm_mm,
-                               address, address + HPAGE_PMD_SIZE);
-               freeze_page_vma(avc->vma, page, address);
-               mmu_notifier_invalidate_range_end(avc->vma->vm_mm,
-                               address, address + HPAGE_PMD_SIZE);
-       }
-}
-
-static void unfreeze_page_vma(struct vm_area_struct *vma, struct page *page,
-               unsigned long address)
-{
-       spinlock_t *ptl;
-       pmd_t *pmd;
-       pte_t *pte, entry;
-       swp_entry_t swp_entry;
-       unsigned long haddr = address & HPAGE_PMD_MASK;
-       int i, nr = HPAGE_PMD_NR;
-
-       /* Skip pages which doesn't belong to the VMA */
-       if (address < vma->vm_start) {
-               int off = (vma->vm_start - address) >> PAGE_SHIFT;
-               page += off;
-               nr -= off;
-               address = vma->vm_start;
-       }
-
-       pmd = mm_find_pmd(vma->vm_mm, address);
-       if (!pmd)
-               return;
-
-       pte = pte_offset_map_lock(vma->vm_mm, pmd, address, &ptl);
-       for (i = 0; i < nr; i++, address += PAGE_SIZE, page++, pte++) {
-               /*
-                * We've just crossed page table boundary: need to map next one.
-                * It can happen if THP was mremaped to non-PMD aligned address.
-                */
-               if (unlikely(address == haddr + HPAGE_PMD_SIZE)) {
-                       pte_unmap_unlock(pte - 1, ptl);
-                       pmd = mm_find_pmd(vma->vm_mm, address);
-                       if (!pmd)
-                               return;
-                       pte = pte_offset_map_lock(vma->vm_mm, pmd,
-                                       address, &ptl);
-               }
-
-               if (!is_swap_pte(*pte))
-                       continue;
-
-               swp_entry = pte_to_swp_entry(*pte);
-               if (!is_migration_entry(swp_entry))
-                       continue;
-               if (migration_entry_to_page(swp_entry) != page)
-                       continue;
-
-               get_page(page);
-               page_add_anon_rmap(page, vma, address, false);
-
-               entry = pte_mkold(mk_pte(page, vma->vm_page_prot));
-               if (PageDirty(page))
-                       entry = pte_mkdirty(entry);
-               if (is_write_migration_entry(swp_entry))
-                       entry = maybe_mkwrite(entry, vma);
-
-               flush_dcache_page(page);
-               set_pte_at(vma->vm_mm, address, pte, entry);
+       /* We only need TTU_SPLIT_HUGE_PMD once */
+       ret = try_to_unmap(page, ttu_flags | TTU_SPLIT_HUGE_PMD);
+       for (i = 1; !ret && i < HPAGE_PMD_NR; i++) {
+               /* Cut short if the page is unmapped */
+               if (page_count(page) == 1)
+                       return;
 
-               /* No need to invalidate - it was non-present before */
-               update_mmu_cache(vma, address, pte);
+               ret = try_to_unmap(page + i, ttu_flags);
        }
-       pte_unmap_unlock(pte - 1, ptl);
+       VM_BUG_ON(ret);
 }
 
-static void unfreeze_page(struct anon_vma *anon_vma, struct page *page)
+static void unfreeze_page(struct page *page)
 {
-       struct anon_vma_chain *avc;
-       pgoff_t pgoff = page_to_pgoff(page);
-
-       anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,
-                       pgoff, pgoff + HPAGE_PMD_NR - 1) {
-               unsigned long address = __vma_address(page, avc->vma);
+       int i;
 
-               mmu_notifier_invalidate_range_start(avc->vma->vm_mm,
-                               address, address + HPAGE_PMD_SIZE);
-               unfreeze_page_vma(avc->vma, page, address);
-               mmu_notifier_invalidate_range_end(avc->vma->vm_mm,
-                               address, address + HPAGE_PMD_SIZE);
-       }
+       for (i = 0; i < HPAGE_PMD_NR; i++)
+               remove_migration_ptes(page + i, page + i, true);
 }
 
-static int __split_huge_page_tail(struct page *head, int tail,
+static void __split_huge_page_tail(struct page *head, int tail,
                struct lruvec *lruvec, struct list_head *list)
 {
-       int mapcount;
        struct page *page_tail = head + tail;
 
-       mapcount = atomic_read(&page_tail->_mapcount) + 1;
-       VM_BUG_ON_PAGE(atomic_read(&page_tail->_count) != 0, page_tail);
+       VM_BUG_ON_PAGE(atomic_read(&page_tail->_mapcount) != -1, page_tail);
+       VM_BUG_ON_PAGE(page_ref_count(page_tail) != 0, page_tail);
 
        /*
         * tail_page->_count is zero and not changing from under us. But
         * get_page_unless_zero() may be running from under us on the
-        * tail_page. If we used atomic_set() below instead of atomic_add(), we
+        * tail_page. If we used atomic_set() below instead of atomic_inc(), we
         * would then run atomic_set() concurrently with
         * get_page_unless_zero(), and atomic_set() is implemented in C not
         * using locked ops. spin_unlock on x86 sometime uses locked ops
         * because of PPro errata 66, 92, so unless somebody can guarantee
         * atomic_set() here would be safe on all archs (and not only on x86),
-        * it's safer to use atomic_add().
+        * it's safer to use atomic_inc().
         */
-       atomic_add(mapcount + 1, &page_tail->_count);
-
+       page_ref_inc(page_tail);
 
        page_tail->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
        page_tail->flags |= (head->flags &
@@ -3275,8 +3153,6 @@ static int __split_huge_page_tail(struct page *head, int tail,
        page_tail->index = head->index + tail;
        page_cpupid_xchg_last(page_tail, page_cpupid_last(head));
        lru_add_page_tail(head, page_tail, lruvec, list);
-
-       return mapcount;
 }
 
 static void __split_huge_page(struct page *page, struct list_head *list)
@@ -3284,7 +3160,7 @@ static void __split_huge_page(struct page *page, struct list_head *list)
        struct page *head = compound_head(page);
        struct zone *zone = page_zone(head);
        struct lruvec *lruvec;
-       int i, tail_mapcount;
+       int i;
 
        /* prevent PageLRU to go away from under us, and freeze lru stats */
        spin_lock_irq(&zone->lru_lock);
@@ -3293,15 +3169,13 @@ static void __split_huge_page(struct page *page, struct list_head *list)
        /* complete memcg works before add pages to LRU */
        mem_cgroup_split_huge_fixup(head);
 
-       tail_mapcount = 0;
        for (i = HPAGE_PMD_NR - 1; i >= 1; i--)
-               tail_mapcount += __split_huge_page_tail(head, i, lruvec, list);
-       atomic_sub(tail_mapcount, &head->_count);
+               __split_huge_page_tail(head, i, lruvec, list);
 
        ClearPageCompound(head);
        spin_unlock_irq(&zone->lru_lock);
 
-       unfreeze_page(page_anon_vma(head), head);
+       unfreeze_page(head);
 
        for (i = 0; i < HPAGE_PMD_NR; i++) {
                struct page *subpage = head + i;
@@ -3397,7 +3271,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
        }
 
        mlocked = PageMlocked(page);
-       freeze_page(anon_vma, head);
+       freeze_page(head);
        VM_BUG_ON_PAGE(compound_mapcount(head), head);
 
        /* Make sure the page is not on per-CPU pagevec as it takes pin */
@@ -3426,7 +3300,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                BUG();
        } else {
                spin_unlock_irqrestore(&pgdata->split_queue_lock, flags);
-               unfreeze_page(anon_vma, head);
+               unfreeze_page(head);
                ret = -EBUSY;
        }
 
@@ -3461,6 +3335,7 @@ void deferred_split_huge_page(struct page *page)
 
        spin_lock_irqsave(&pgdata->split_queue_lock, flags);
        if (list_empty(page_deferred_list(page))) {
+               count_vm_event(THP_DEFERRED_SPLIT_PAGE);
                list_add_tail(page_deferred_list(page), &pgdata->split_queue);
                pgdata->split_queue_len++;
        }