thp: reduce usage of huge zero page's atomic counter

author Aaron Lu <aaron.lu@intel.com>

Sat, 8 Oct 2016 00:00:08 +0000 (17:00 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sat, 8 Oct 2016 01:46:28 +0000 (18:46 -0700)
author Aaron Lu <aaron.lu@intel.com>
Sat, 8 Oct 2016 00:00:08 +0000 (17:00 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sat, 8 Oct 2016 01:46:28 +0000 (18:46 -0700)
diff --git a/fs/dax.c b/fs/dax.c

index cc025f8..014defd 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1036,7 +1036,7 @@ int dax_pmd_fault(struct vm_area_struct *vma, unsigned long address,
         if (!write && !buffer_mapped(&bh)) {
                 spinlock_t *ptl;
                 pmd_t entry;
-               struct page *zero_page = get_huge_zero_page();
+               struct page *zero_page = mm_get_huge_zero_page(vma->vm_mm);
  
                 if (unlikely(!zero_page)) {
                         dax_pmd_dbg(&bh, address, "no zero page");
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h

index 4fca526..9b9f65d 100644 (file)
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -156,8 +156,8 @@ static inline bool is_huge_zero_pmd(pmd_t pmd)
         return is_huge_zero_page(pmd_page(pmd));
  }
  
-struct page *get_huge_zero_page(void);
-void put_huge_zero_page(void);
+struct page *mm_get_huge_zero_page(struct mm_struct *mm);
+void mm_put_huge_zero_page(struct mm_struct *mm);
  
  #define mk_huge_pmd(page, prot) pmd_mkhuge(mk_pmd(page, prot))
  
@@ -220,9 +220,9 @@ static inline bool is_huge_zero_page(struct page *page)
         return false;
  }
  
-static inline void put_huge_zero_page(void)
+static inline void mm_put_huge_zero_page(struct mm_struct *mm)
  {
-       BUILD_BUG();
+       return;
  }
  
  static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma,
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 6bee6f9..348f51b 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -526,6 +526,7 @@ static inline int get_dumpable(struct mm_struct *mm)
  #define MMF_RECALC_UPROBES     20      /* MMF_HAS_UPROBES can be wrong */
  #define MMF_OOM_SKIP           21      /* mm is of no interest for the OOM killer */
  #define MMF_UNSTABLE           22      /* mm is unstable for copy_from_user */
+#define MMF_HUGE_ZERO_PAGE     23      /* mm has ever used the global huge zero page */
  
  #define MMF_INIT_MASK          (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
  
diff --git a/kernel/fork.c b/kernel/fork.c

index 9a8ec66..6d42242 100644 (file)
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -854,6 +854,7 @@ static inline void __mmput(struct mm_struct *mm)
         ksm_exit(mm);
         khugepaged_exit(mm); /* must run before exit_mmap */
         exit_mmap(mm);
+       mm_put_huge_zero_page(mm);
         set_mm_exe_file(mm, NULL);
         if (!list_empty(&mm->mmlist)) {
                 spin_lock(&mmlist_lock);
diff --git a/mm/huge_memory.c b/mm/huge_memory.c

index a0b0e56..12b9f1a 100644 (file)
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -59,7 +59,7 @@ static struct shrinker deferred_split_shrinker;
  static atomic_t huge_zero_refcount;
  struct page *huge_zero_page __read_mostly;
  
-struct page *get_huge_zero_page(void)
+static struct page *get_huge_zero_page(void)
  {
         struct page *zero_page;
  retry:
@@ -86,7 +86,7 @@ retry:
         return READ_ONCE(huge_zero_page);
  }
  
-void put_huge_zero_page(void)
+static void put_huge_zero_page(void)
  {
         /*
          * Counter should never go to zero here. Only shrinker can put
@@ -95,6 +95,26 @@ void put_huge_zero_page(void)
         BUG_ON(atomic_dec_and_test(&huge_zero_refcount));
  }
  
+struct page *mm_get_huge_zero_page(struct mm_struct *mm)
+{
+       if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
+               return READ_ONCE(huge_zero_page);
+
+       if (!get_huge_zero_page())
+               return NULL;
+
+       if (test_and_set_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
+               put_huge_zero_page();
+
+       return READ_ONCE(huge_zero_page);
+}
+
+void mm_put_huge_zero_page(struct mm_struct *mm)
+{
+       if (test_bit(MMF_HUGE_ZERO_PAGE, &mm->flags))
+               put_huge_zero_page();
+}
+
  static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink,
                                         struct shrink_control *sc)
  {
@@ -644,7 +664,7 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
                 pgtable = pte_alloc_one(vma->vm_mm, haddr);
                 if (unlikely(!pgtable))
                         return VM_FAULT_OOM;
-               zero_page = get_huge_zero_page();
+               zero_page = mm_get_huge_zero_page(vma->vm_mm);
                 if (unlikely(!zero_page)) {
                         pte_free(vma->vm_mm, pgtable);
                         count_vm_event(THP_FAULT_FALLBACK);
@@ -666,10 +686,8 @@ int do_huge_pmd_anonymous_page(struct fault_env *fe)
                         }
                 } else
                         spin_unlock(fe->ptl);
-               if (!set) {
+               if (!set)
                         pte_free(vma->vm_mm, pgtable);
-                       put_huge_zero_page();
-               }
                 return ret;
         }
         gfp = alloc_hugepage_direct_gfpmask(vma);
@@ -823,7 +841,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                  * since we already have a zero page to copy. It just takes a
                  * reference.
                  */
-               zero_page = get_huge_zero_page();
+               zero_page = mm_get_huge_zero_page(dst_mm);
                 set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd,
                                 zero_page);
                 ret = 0;
@@ -1081,7 +1099,6 @@ alloc:
                 update_mmu_cache_pmd(vma, fe->address, fe->pmd);
                 if (!page) {
                         add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
-                       put_huge_zero_page();
                 } else {
                         VM_BUG_ON_PAGE(!PageHead(page), page);
                         page_remove_rmap(page, true);
@@ -1542,7 +1559,6 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
         }
         smp_wmb(); /* make pte visible before pmd */
         pmd_populate(mm, pmd, pgtable);
-       put_huge_zero_page();
  }
  
  static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
@@ -1565,8 +1581,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
  
         if (!vma_is_anonymous(vma)) {
                 _pmd = pmdp_huge_clear_flush_notify(vma, haddr, pmd);
-               if (is_huge_zero_pmd(_pmd))
-                       put_huge_zero_page();
                 if (vma_is_dax(vma))
                         return;
                 page = pmd_page(_pmd);
diff --git a/mm/swap.c b/mm/swap.c

index 75c63bb..4dcf852 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -748,10 +748,8 @@ void release_pages(struct page **pages, int nr, bool cold)
                         locked_pgdat = NULL;
                 }
  
-               if (is_huge_zero_page(page)) {
-                       put_huge_zero_page();
+               if (is_huge_zero_page(page))
                         continue;
-               }
  
                 page = compound_head(page);
                 if (!put_page_testzero(page))
diff --git a/mm/swap_state.c b/mm/swap_state.c

index 268b819..8679c99 100644 (file)
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -254,9 +254,7 @@ static inline void free_swap_cache(struct page *page)
  void free_page_and_swap_cache(struct page *page)
  {
         free_swap_cache(page);
-       if (is_huge_zero_page(page))
-               put_huge_zero_page();
-       else
+       if (!is_huge_zero_page(page))
                 put_page(page);
  }
author	Aaron Lu <aaron.lu@intel.com>
	Sat, 8 Oct 2016 00:00:08 +0000 (17:00 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sat, 8 Oct 2016 01:46:28 +0000 (18:46 -0700)
fs/dax.c		patch \| blob \| history
include/linux/huge_mm.h		patch \| blob \| history
include/linux/sched.h		patch \| blob \| history
kernel/fork.c		patch \| blob \| history
mm/huge_memory.c		patch \| blob \| history
mm/swap.c		patch \| blob \| history
mm/swap_state.c		patch \| blob \| history