mm: change anon_vma linking to fix multi-process server scalability issue

[cascardo/linux.git] / include / linux / mm.h
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 8b2fa85..8e2841a 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -12,6 +12,7 @@
  #include <linux/prio_tree.h>
  #include <linux/debug_locks.h>
  #include <linux/mm_types.h>
+#include <linux/range.h>
  
  struct mempolicy;
  struct anon_vma;
@@ -96,7 +97,11 @@ extern unsigned int kobjsize(const void *objp);
  #define VM_NORESERVE   0x00200000      /* should the VM suppress accounting */
  #define VM_HUGETLB     0x00400000      /* Huge TLB Page VM */
  #define VM_NONLINEAR   0x00800000      /* Is non-linear (remap_file_pages) */
+#ifdef CONFIG_MMU
+#define VM_LOCK_RMAP   0x01000000      /* Do not follow this rmap (mmu mmap) */
+#else
  #define VM_MAPPED_COPY 0x01000000      /* T if mapped copy of data (nommu mmap) */
+#endif
  #define VM_INSERTPAGE  0x02000000      /* The vma has had "vm_insert_page()" done on it */
  #define VM_ALWAYSDUMP  0x04000000      /* Always include in core dumps */
  
@@ -869,6 +874,108 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
   */
  int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
                           struct page **pages);
+/*
+ * per-process(per-mm_struct) statistics.
+ */
+#if defined(SPLIT_RSS_COUNTING)
+/*
+ * The mm counters are not protected by its page_table_lock,
+ * so must be incremented atomically.
+ */
+static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
+{
+       atomic_long_set(&mm->rss_stat.count[member], value);
+}
+
+unsigned long get_mm_counter(struct mm_struct *mm, int member);
+
+static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
+{
+       atomic_long_add(value, &mm->rss_stat.count[member]);
+}
+
+static inline void inc_mm_counter(struct mm_struct *mm, int member)
+{
+       atomic_long_inc(&mm->rss_stat.count[member]);
+}
+
+static inline void dec_mm_counter(struct mm_struct *mm, int member)
+{
+       atomic_long_dec(&mm->rss_stat.count[member]);
+}
+
+#else  /* !USE_SPLIT_PTLOCKS */
+/*
+ * The mm counters are protected by its page_table_lock,
+ * so can be incremented directly.
+ */
+static inline void set_mm_counter(struct mm_struct *mm, int member, long value)
+{
+       mm->rss_stat.count[member] = value;
+}
+
+static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
+{
+       return mm->rss_stat.count[member];
+}
+
+static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
+{
+       mm->rss_stat.count[member] += value;
+}
+
+static inline void inc_mm_counter(struct mm_struct *mm, int member)
+{
+       mm->rss_stat.count[member]++;
+}
+
+static inline void dec_mm_counter(struct mm_struct *mm, int member)
+{
+       mm->rss_stat.count[member]--;
+}
+
+#endif /* !USE_SPLIT_PTLOCKS */
+
+static inline unsigned long get_mm_rss(struct mm_struct *mm)
+{
+       return get_mm_counter(mm, MM_FILEPAGES) +
+               get_mm_counter(mm, MM_ANONPAGES);
+}
+
+static inline unsigned long get_mm_hiwater_rss(struct mm_struct *mm)
+{
+       return max(mm->hiwater_rss, get_mm_rss(mm));
+}
+
+static inline unsigned long get_mm_hiwater_vm(struct mm_struct *mm)
+{
+       return max(mm->hiwater_vm, mm->total_vm);
+}
+
+static inline void update_hiwater_rss(struct mm_struct *mm)
+{
+       unsigned long _rss = get_mm_rss(mm);
+
+       if ((mm)->hiwater_rss < _rss)
+               (mm)->hiwater_rss = _rss;
+}
+
+static inline void update_hiwater_vm(struct mm_struct *mm)
+{
+       if (mm->hiwater_vm < mm->total_vm)
+               mm->hiwater_vm = mm->total_vm;
+}
+
+static inline void setmax_mm_hiwater_rss(unsigned long *maxrss,
+                                        struct mm_struct *mm)
+{
+       unsigned long hiwater_rss = get_mm_hiwater_rss(mm);
+
+       if (*maxrss < hiwater_rss)
+               *maxrss = hiwater_rss;
+}
+
+void sync_mm_rss(struct task_struct *task, struct mm_struct *mm);
  
  /*
   * A callback you can register to apply pressure to ageable caches.
@@ -1049,6 +1156,10 @@ extern void get_pfn_range_for_nid(unsigned int nid,
  extern unsigned long find_min_pfn_with_active_regions(void);
  extern void free_bootmem_with_active_regions(int nid,
                                                 unsigned long max_low_pfn);
+int add_from_early_node_map(struct range *range, int az,
+                                  int nr_range, int nid);
+void *__alloc_memory_core_early(int nodeid, u64 size, u64 align,
+                                u64 goal, u64 limit);
  typedef int (*work_fn_t)(unsigned long, unsigned long, void *);
  extern void work_with_active_regions(int nid, work_fn_t work_fn, void *data);
  extern void sparse_memory_present_with_active_regions(int nid);
@@ -1081,11 +1192,7 @@ extern void si_meminfo(struct sysinfo * val);
  extern void si_meminfo_node(struct sysinfo *val, int nid);
  extern int after_bootmem;
  
-#ifdef CONFIG_NUMA
  extern void setup_per_cpu_pageset(void);
-#else
-static inline void setup_per_cpu_pageset(void) {}
-#endif
  
  extern void zone_pcp_update(struct zone *zone);
  
@@ -1113,7 +1220,7 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
  
  /* mmap.c */
  extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
-extern void vma_adjust(struct vm_area_struct *vma, unsigned long start,
+extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
         unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert);
  extern struct vm_area_struct *vma_merge(struct mm_struct *,
         struct vm_area_struct *prev, unsigned long addr, unsigned long end,
@@ -1321,12 +1428,19 @@ extern int randomize_va_space;
  const char * arch_vma_name(struct vm_area_struct *vma);
  void print_vma_addr(char *prefix, unsigned long rip);
  
+void sparse_mem_maps_populate_node(struct page **map_map,
+                                  unsigned long pnum_begin,
+                                  unsigned long pnum_end,
+                                  unsigned long map_count,
+                                  int nodeid);
+
  struct page *sparse_mem_map_populate(unsigned long pnum, int nid);
  pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
  pud_t *vmemmap_pud_populate(pgd_t *pgd, unsigned long addr, int node);
  pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
  pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
  void *vmemmap_alloc_block(unsigned long size, int node);
+void *vmemmap_alloc_block_buf(unsigned long size, int node);
  void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
  int vmemmap_populate_basepages(struct page *start_page,
                                                 unsigned long pages, int node);