[PATCH] overcommit: add calculate_totalreserve_pages()

[cascardo/linux.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index b7f14a4..97d6827 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -49,9 +49,9 @@ nodemask_t node_online_map __read_mostly = { { [0] = 1UL } };
  EXPORT_SYMBOL(node_online_map);
  nodemask_t node_possible_map __read_mostly = NODE_MASK_ALL;
  EXPORT_SYMBOL(node_possible_map);
-struct pglist_data *pgdat_list __read_mostly;
  unsigned long totalram_pages __read_mostly;
  unsigned long totalhigh_pages __read_mostly;
+unsigned long totalreserve_pages __read_mostly;
  long nr_swap_pages;
  int percpu_pagelist_fraction;
  
@@ -152,7 +152,8 @@ static void bad_page(struct page *page)
                         1 << PG_reclaim |
                         1 << PG_slab    |
                         1 << PG_swapcache |
-                       1 << PG_writeback );
+                       1 << PG_writeback |
+                       1 << PG_buddy );
         set_page_count(page, 0);
         reset_page_mapcount(page);
         page->mapping = NULL;
@@ -237,12 +238,12 @@ static inline unsigned long page_order(struct page *page) {
  
  static inline void set_page_order(struct page *page, int order) {
         set_page_private(page, order);
-       __SetPagePrivate(page);
+       __SetPageBuddy(page);
  }
  
  static inline void rmv_page_order(struct page *page)
  {
-       __ClearPagePrivate(page);
+       __ClearPageBuddy(page);
         set_page_private(page, 0);
  }
  
@@ -281,11 +282,13 @@ __find_combined_index(unsigned long page_idx, unsigned int order)
   * This function checks whether a page is free && is the buddy
   * we can do coalesce a page and its buddy if
   * (a) the buddy is not in a hole &&
- * (b) the buddy is free &&
- * (c) the buddy is on the buddy system &&
- * (d) a page and its buddy have the same order.
- * for recording page's order, we use page_private(page) and PG_private.
+ * (b) the buddy is in the buddy system &&
+ * (c) a page and its buddy have the same order.
+ *
+ * For recording whether a page is in the buddy system, we use PG_buddy.
+ * Setting, clearing, and testing PG_buddy is serialized by zone->lock.
   *
+ * For recording page's order, we use page_private(page).
   */
  static inline int page_is_buddy(struct page *page, int order)
  {
@@ -294,10 +297,10 @@ static inline int page_is_buddy(struct page *page, int order)
                 return 0;
  #endif
  
-       if (PagePrivate(page)           &&
-           (page_order(page) == order) &&
-            page_count(page) == 0)
+       if (PageBuddy(page) && page_order(page) == order) {
+               BUG_ON(page_count(page) != 0);
                 return 1;
+       }
         return 0;
  }
  
@@ -314,7 +317,7 @@ static inline int page_is_buddy(struct page *page, int order)
   * as necessary, plus some accounting needed to play nicely with other
   * parts of the VM system.
   * At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with PG_Private.Page's
+ * free pages of length of (1 << order) and marked with PG_buddy. Page's
   * order is recorded in page_private(page) field.
   * So when we are allocating or freeing one, we can derive the state of the
   * other.  That is, if we allocate a small block, and both were   
@@ -377,7 +380,8 @@ static inline int free_pages_check(struct page *page)
                         1 << PG_slab    |
                         1 << PG_swapcache |
                         1 << PG_writeback |
-                       1 << PG_reserved ))))
+                       1 << PG_reserved |
+                       1 << PG_buddy ))))
                 bad_page(page);
         if (PageDirty(page))
                 __ClearPageDirty(page);
@@ -525,7 +529,8 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags)
                         1 << PG_slab    |
                         1 << PG_swapcache |
                         1 << PG_writeback |
-                       1 << PG_reserved ))))
+                       1 << PG_reserved |
+                       1 << PG_buddy ))))
                 bad_page(page);
  
         /*
@@ -943,7 +948,8 @@ restart:
                 goto got_pg;
  
         do {
-               wakeup_kswapd(*z, order);
+               if (cpuset_zone_allowed(*z, gfp_mask))
+                       wakeup_kswapd(*z, order);
         } while (*(++z));
  
         /*
@@ -1200,7 +1206,7 @@ unsigned int nr_free_highpages (void)
         pg_data_t *pgdat;
         unsigned int pages = 0;
  
-       for_each_pgdat(pgdat)
+       for_each_online_pgdat(pgdat)
                 pages += pgdat->node_zones[ZONE_HIGHMEM].free_pages;
  
         return pages;
@@ -1342,7 +1348,7 @@ void get_zone_counts(unsigned long *active,
         *active = 0;
         *inactive = 0;
         *free = 0;
-       for_each_pgdat(pgdat) {
+       for_each_online_pgdat(pgdat) {
                 unsigned long l, m, n;
                 __get_zone_counts(&l, &m, &n, pgdat);
                 *active += l;
@@ -2028,8 +2034,9 @@ static __meminit void zone_pcp_init(struct zone *zone)
                 setup_pageset(zone_pcp(zone,cpu), batch);
  #endif
         }
-       printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
-               zone->name, zone->present_pages, batch);
+       if (zone->present_pages)
+               printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
+                       zone->name, zone->present_pages, batch);
  }
  
  static __meminit void init_currently_empty_zone(struct zone *zone,
@@ -2040,7 +2047,6 @@ static __meminit void init_currently_empty_zone(struct zone *zone,
         zone_wait_table_init(zone, size);
         pgdat->nr_zones = zone_idx(zone) + 1;
  
-       zone->zone_mem_map = pfn_to_page(zone_start_pfn);
         zone->zone_start_pfn = zone_start_pfn;
  
         memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
@@ -2168,8 +2174,9 @@ static void *frag_start(struct seq_file *m, loff_t *pos)
  {
         pg_data_t *pgdat;
         loff_t node = *pos;
-
-       for (pgdat = pgdat_list; pgdat && node; pgdat = pgdat->pgdat_next)
+       for (pgdat = first_online_pgdat();
+            pgdat && node;
+            pgdat = next_online_pgdat(pgdat))
                 --node;
  
         return pgdat;
@@ -2180,7 +2187,7 @@ static void *frag_next(struct seq_file *m, void *arg, loff_t *pos)
         pg_data_t *pgdat = (pg_data_t *)arg;
  
         (*pos)++;
-       return pgdat->pgdat_next;
+       return next_online_pgdat(pgdat);
  }
  
  static void frag_stop(struct seq_file *m, void *arg)
@@ -2470,6 +2477,38 @@ void __init page_alloc_init(void)
         hotcpu_notifier(page_alloc_cpu_notify, 0);
  }
  
+/*
+ * calculate_totalreserve_pages - called when sysctl_lower_zone_reserve_ratio
+ *     or min_free_kbytes changes.
+ */
+static void calculate_totalreserve_pages(void)
+{
+       struct pglist_data *pgdat;
+       unsigned long reserve_pages = 0;
+       int i, j;
+
+       for_each_online_pgdat(pgdat) {
+               for (i = 0; i < MAX_NR_ZONES; i++) {
+                       struct zone *zone = pgdat->node_zones + i;
+                       unsigned long max = 0;
+
+                       /* Find valid and maximum lowmem_reserve in the zone */
+                       for (j = i; j < MAX_NR_ZONES; j++) {
+                               if (zone->lowmem_reserve[j] > max)
+                                       max = zone->lowmem_reserve[j];
+                       }
+
+                       /* we treat pages_high as reserved pages. */
+                       max += zone->pages_high;
+
+                       if (max > zone->present_pages)
+                               max = zone->present_pages;
+                       reserve_pages += max;
+               }
+       }
+       totalreserve_pages = reserve_pages;
+}
+
  /*
   * setup_per_zone_lowmem_reserve - called whenever
   *     sysctl_lower_zone_reserve_ratio changes.  Ensures that each zone
@@ -2481,7 +2520,7 @@ static void setup_per_zone_lowmem_reserve(void)
         struct pglist_data *pgdat;
         int j, idx;
  
-       for_each_pgdat(pgdat) {
+       for_each_online_pgdat(pgdat) {
                 for (j = 0; j < MAX_NR_ZONES; j++) {
                         struct zone *zone = pgdat->node_zones + j;
                         unsigned long present_pages = zone->present_pages;
@@ -2501,6 +2540,9 @@ static void setup_per_zone_lowmem_reserve(void)
                         }
                 }
         }
+
+       /* update totalreserve_pages */
+       calculate_totalreserve_pages();
  }
  
  /*
@@ -2555,6 +2597,9 @@ void setup_per_zone_pages_min(void)
                 zone->pages_high  = zone->pages_min + tmp / 2;
                 spin_unlock_irqrestore(&zone->lru_lock, flags);
         }
+
+       /* update totalreserve_pages */
+       calculate_totalreserve_pages();
  }
  
  /*
@@ -2700,8 +2745,7 @@ void *__init alloc_large_system_hash(const char *tablename,
                 else
                         numentries <<= (PAGE_SHIFT - scale);
         }
-       /* rounded up to nearest power of 2 in size */
-       numentries = 1UL << (long_log2(numentries) + 1);
+       numentries = roundup_pow_of_two(numentries);
  
         /* limit allocation size to 1/16 total memory by default */
         if (max == 0) {
@@ -2744,3 +2788,44 @@ void *__init alloc_large_system_hash(const char *tablename,
  
         return table;
  }
+
+#ifdef CONFIG_OUT_OF_LINE_PFN_TO_PAGE
+/*
+ * pfn <-> page translation. out-of-line version.
+ * (see asm-generic/memory_model.h)
+ */
+#if defined(CONFIG_FLATMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+       return mem_map + (pfn - ARCH_PFN_OFFSET);
+}
+unsigned long page_to_pfn(struct page *page)
+{
+       return (page - mem_map) + ARCH_PFN_OFFSET;
+}
+#elif defined(CONFIG_DISCONTIGMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+       int nid = arch_pfn_to_nid(pfn);
+       return NODE_DATA(nid)->node_mem_map + arch_local_page_offset(pfn,nid);
+}
+unsigned long page_to_pfn(struct page *page)
+{
+       struct pglist_data *pgdat = NODE_DATA(page_to_nid(page));
+       return (page - pgdat->node_mem_map) + pgdat->node_start_pfn;
+}
+#elif defined(CONFIG_SPARSEMEM)
+struct page *pfn_to_page(unsigned long pfn)
+{
+       return __section_mem_map_addr(__pfn_to_section(pfn)) + pfn;
+}
+
+unsigned long page_to_pfn(struct page *page)
+{
+       long section_id = page_to_section(page);
+       return page - __section_mem_map_addr(__nr_to_section(section_id));
+}
+#endif /* CONFIG_FLATMEM/DISCONTIGMME/SPARSEMEM */
+EXPORT_SYMBOL(pfn_to_page);
+EXPORT_SYMBOL(page_to_pfn);
+#endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */