Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

[cascardo/linux.git] / mm / memory.c
diff --git a/mm/memory.c b/mm/memory.c

index e2bbba4..33a3dbe 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -69,8 +69,8 @@
  
  #include "internal.h"
  
-#ifdef LAST_NID_NOT_IN_PAGE_FLAGS
-#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_nid.
+#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS
+#warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid.
  #endif
  
  #ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -837,6 +837,8 @@ copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                                          */
                                         make_migration_entry_read(&entry);
                                         pte = swp_entry_to_pte(entry);
+                                       if (pte_swp_soft_dirty(*src_pte))
+                                               pte = pte_swp_mksoft_dirty(pte);
                                         set_pte_at(src_mm, addr, src_pte, pte);
                                 }
                         }
@@ -2719,6 +2721,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 get_page(dirty_page);
  
  reuse:
+               /*
+                * Clear the pages cpupid information as the existing
+                * information potentially belongs to a now completely
+                * unrelated process.
+                */
+               if (old_page)
+                       page_cpupid_xchg_last(old_page, (1 << LAST_CPUPID_SHIFT) - 1);
+
                 flush_cache_page(vma, address, pte_pfn(orig_pte));
                 entry = pte_mkyoung(orig_pte);
                 entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -3519,13 +3529,16 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
  }
  
  int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,
-                               unsigned long addr, int current_nid)
+                               unsigned long addr, int page_nid,
+                               int *flags)
  {
         get_page(page);
  
         count_vm_numa_event(NUMA_HINT_FAULTS);
-       if (current_nid == numa_node_id())
+       if (page_nid == numa_node_id()) {
                 count_vm_numa_event(NUMA_HINT_FAULTS_LOCAL);
+               *flags |= TNF_FAULT_LOCAL;
+       }
  
         return mpol_misplaced(page, vma, addr);
  }
@@ -3535,9 +3548,11 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
  {
         struct page *page = NULL;
         spinlock_t *ptl;
-       int current_nid = -1;
+       int page_nid = -1;
+       int last_cpupid;
         int target_nid;
         bool migrated = false;
+       int flags = 0;
  
         /*
         * The "pte" at this point cannot be used safely without
@@ -3564,123 +3579,44 @@ int do_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 pte_unmap_unlock(ptep, ptl);
                 return 0;
         }
+       BUG_ON(is_zero_pfn(page_to_pfn(page)));
+
+       /*
+        * Avoid grouping on DSO/COW pages in specific and RO pages
+        * in general, RO pages shouldn't hurt as much anyway since
+        * they can be in shared cache state.
+        */
+       if (!pte_write(pte))
+               flags |= TNF_NO_GROUP;
  
-       current_nid = page_to_nid(page);
-       target_nid = numa_migrate_prep(page, vma, addr, current_nid);
+       /*
+        * Flag if the page is shared between multiple address spaces. This
+        * is later used when determining whether to group tasks together
+        */
+       if (page_mapcount(page) > 1 && (vma->vm_flags & VM_SHARED))
+               flags |= TNF_SHARED;
+
+       last_cpupid = page_cpupid_last(page);
+       page_nid = page_to_nid(page);
+       target_nid = numa_migrate_prep(page, vma, addr, page_nid, &flags);
         pte_unmap_unlock(ptep, ptl);
         if (target_nid == -1) {
-               /*
-                * Account for the fault against the current node if it not
-                * being replaced regardless of where the page is located.
-                */
-               current_nid = numa_node_id();
                 put_page(page);
                 goto out;
         }
  
         /* Migrate to the requested node */
-       migrated = migrate_misplaced_page(page, target_nid);
-       if (migrated)
-               current_nid = target_nid;
-
-out:
-       if (current_nid != -1)
-               task_numa_fault(current_nid, 1, migrated);
-       return 0;
-}
-
-/* NUMA hinting page fault entry point for regular pmds */
-#ifdef CONFIG_NUMA_BALANCING
-static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
-                    unsigned long addr, pmd_t *pmdp)
-{
-       pmd_t pmd;
-       pte_t *pte, *orig_pte;
-       unsigned long _addr = addr & PMD_MASK;
-       unsigned long offset;
-       spinlock_t *ptl;
-       bool numa = false;
-       int local_nid = numa_node_id();
-
-       spin_lock(&mm->page_table_lock);
-       pmd = *pmdp;
-       if (pmd_numa(pmd)) {
-               set_pmd_at(mm, _addr, pmdp, pmd_mknonnuma(pmd));
-               numa = true;
+       migrated = migrate_misplaced_page(page, vma, target_nid);
+       if (migrated) {
+               page_nid = target_nid;
+               flags |= TNF_MIGRATED;
         }
-       spin_unlock(&mm->page_table_lock);
  
-       if (!numa)
-               return 0;
-
-       /* we're in a page fault so some vma must be in the range */
-       BUG_ON(!vma);
-       BUG_ON(vma->vm_start >= _addr + PMD_SIZE);
-       offset = max(_addr, vma->vm_start) & ~PMD_MASK;
-       VM_BUG_ON(offset >= PMD_SIZE);
-       orig_pte = pte = pte_offset_map_lock(mm, pmdp, _addr, &ptl);
-       pte += offset >> PAGE_SHIFT;
-       for (addr = _addr + offset; addr < _addr + PMD_SIZE; pte++, addr += PAGE_SIZE) {
-               pte_t pteval = *pte;
-               struct page *page;
-               int curr_nid = local_nid;
-               int target_nid;
-               bool migrated;
-               if (!pte_present(pteval))
-                       continue;
-               if (!pte_numa(pteval))
-                       continue;
-               if (addr >= vma->vm_end) {
-                       vma = find_vma(mm, addr);
-                       /* there's a pte present so there must be a vma */
-                       BUG_ON(!vma);
-                       BUG_ON(addr < vma->vm_start);
-               }
-               if (pte_numa(pteval)) {
-                       pteval = pte_mknonnuma(pteval);
-                       set_pte_at(mm, addr, pte, pteval);
-               }
-               page = vm_normal_page(vma, addr, pteval);
-               if (unlikely(!page))
-                       continue;
-               /* only check non-shared pages */
-               if (unlikely(page_mapcount(page) != 1))
-                       continue;
-
-               /*
-                * Note that the NUMA fault is later accounted to either
-                * the node that is currently running or where the page is
-                * migrated to.
-                */
-               curr_nid = local_nid;
-               target_nid = numa_migrate_prep(page, vma, addr,
-                                              page_to_nid(page));
-               if (target_nid == -1) {
-                       put_page(page);
-                       continue;
-               }
-
-               /* Migrate to the requested node */
-               pte_unmap_unlock(pte, ptl);
-               migrated = migrate_misplaced_page(page, target_nid);
-               if (migrated)
-                       curr_nid = target_nid;
-               task_numa_fault(curr_nid, 1, migrated);
-
-               pte = pte_offset_map_lock(mm, pmdp, addr, &ptl);
-       }
-       pte_unmap_unlock(orig_pte, ptl);
-
-       return 0;
-}
-#else
-static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
-                    unsigned long addr, pmd_t *pmdp)
-{
-       BUG();
+out:
+       if (page_nid != -1)
+               task_numa_fault(last_cpupid, page_nid, 1, flags);
         return 0;
  }
-#endif /* CONFIG_NUMA_BALANCING */
  
  /*
   * These routines also need to handle stuff like marking pages dirty
@@ -3820,8 +3756,8 @@ retry:
                 }
         }
  
-       if (pmd_numa(*pmd))
-               return do_pmd_numa_page(mm, vma, address, pmd);
+       /* THP should already have been handled */
+       BUG_ON(pmd_numa(*pmd));
  
         /*
          * Use __pte_alloc instead of pte_alloc_map, because we can't
@@ -3863,15 +3799,21 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
          * space.  Kernel faults are handled more gracefully.
          */
         if (flags & FAULT_FLAG_USER)
-               mem_cgroup_enable_oom();
+               mem_cgroup_oom_enable();
  
         ret = __handle_mm_fault(mm, vma, address, flags);
  
-       if (flags & FAULT_FLAG_USER)
-               mem_cgroup_disable_oom();
-
-       if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)))
-               mem_cgroup_oom_synchronize();
+       if (flags & FAULT_FLAG_USER) {
+               mem_cgroup_oom_disable();
+                /*
+                 * The task may have entered a memcg OOM situation but
+                 * if the allocation error was handled gracefully (no
+                 * VM_FAULT_OOM), there is no need to kill anything.
+                 * Just clean up the OOM state peacefully.
+                 */
+                if (task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM))
+                        mem_cgroup_oom_synchronize(false);
+       }
  
         return ret;
  }