mm/page_alloc.c: remove unlikely() from the current_order test
[cascardo/linux.git] / mm / page_alloc.c
index d4bcc20..b5855e5 100644 (file)
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
 
+#include <asm/sections.h>
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
 #include "internal.h"
 
+/* prevent >1 _updater_ of zone percpu pageset ->high and ->batch fields */
+static DEFINE_MUTEX(pcp_batch_high_lock);
+
 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
 DEFINE_PER_CPU(int, numa_node);
 EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -100,6 +104,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
 };
 EXPORT_SYMBOL(node_states);
 
+/* Protect totalram_pages and zone->managed_pages */
+static DEFINE_SPINLOCK(managed_page_count_lock);
+
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
 /*
@@ -739,14 +746,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
        local_irq_restore(flags);
 }
 
-/*
- * Read access to zone->managed_pages is safe because it's unsigned long,
- * but we still need to serialize writers. Currently all callers of
- * __free_pages_bootmem() except put_page_bootmem() should only be used
- * at boot time. So for shorter boot time, we shift the burden to
- * put_page_bootmem() to serialize writers.
- */
-void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
+void __init __free_pages_bootmem(struct page *page, unsigned int order)
 {
        unsigned int nr_pages = 1 << order;
        unsigned int loop;
@@ -781,11 +781,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
        set_page_refcounted(page);
        set_pageblock_migratetype(page, MIGRATE_CMA);
        __free_pages(page, pageblock_order);
-       totalram_pages += pageblock_nr_pages;
-#ifdef CONFIG_HIGHMEM
-       if (PageHighMem(page))
-               totalhigh_pages += pageblock_nr_pages;
-#endif
+       adjust_managed_page_count(page, pageblock_nr_pages);
 }
 #endif
 
@@ -1050,7 +1046,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
                         * MIGRATE_CMA areas.
                         */
                        if (!is_migrate_cma(migratetype) &&
-                           (unlikely(current_order >= pageblock_order / 2) ||
+                           (current_order >= pageblock_order / 2 ||
                             start_migratetype == MIGRATE_RECLAIMABLE ||
                             page_group_by_mobility_disabled)) {
                                int pages;
@@ -1179,10 +1175,12 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 {
        unsigned long flags;
        int to_drain;
+       unsigned long batch;
 
        local_irq_save(flags);
-       if (pcp->count >= pcp->batch)
-               to_drain = pcp->batch;
+       batch = ACCESS_ONCE(pcp->batch);
+       if (pcp->count >= batch)
+               to_drain = batch;
        else
                to_drain = pcp->count;
        if (to_drain > 0) {
@@ -1350,8 +1348,9 @@ void free_hot_cold_page(struct page *page, int cold)
                list_add(&page->lru, &pcp->lists[migratetype]);
        pcp->count++;
        if (pcp->count >= pcp->high) {
-               free_pcppages_bulk(zone, pcp->batch, pcp);
-               pcp->count -= pcp->batch;
+               unsigned long batch = ACCESS_ONCE(pcp->batch);
+               free_pcppages_bulk(zone, batch, pcp);
+               pcp->count -= batch;
        }
 
 out:
@@ -2839,7 +2838,7 @@ EXPORT_SYMBOL(free_pages_exact);
  * nr_free_zone_pages() counts the number of counts pages which are beyond the
  * high watermark within all zones at or below a given zone index.  For each
  * zone, the number of pages is calculated as:
- *     present_pages - high_pages
+ *     managed_pages - high_pages
  */
 static unsigned long nr_free_zone_pages(int offset)
 {
@@ -2906,9 +2905,13 @@ EXPORT_SYMBOL(si_meminfo);
 #ifdef CONFIG_NUMA
 void si_meminfo_node(struct sysinfo *val, int nid)
 {
+       int zone_type;          /* needs to be signed */
+       unsigned long managed_pages = 0;
        pg_data_t *pgdat = NODE_DATA(nid);
 
-       val->totalram = pgdat->node_present_pages;
+       for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+               managed_pages += pgdat->node_zones[zone_type].managed_pages;
+       val->totalram = managed_pages;
        val->freeram = node_page_state(nid, NR_FREE_PAGES);
 #ifdef CONFIG_HIGHMEM
        val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
@@ -3150,12 +3153,10 @@ static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
  * Add all populated zones of a node to the zonelist.
  */
 static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
-                               int nr_zones, enum zone_type zone_type)
+                               int nr_zones)
 {
        struct zone *zone;
-
-       BUG_ON(zone_type >= MAX_NR_ZONES);
-       zone_type++;
+       enum zone_type zone_type = MAX_NR_ZONES;
 
        do {
                zone_type--;
@@ -3165,8 +3166,8 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
                                &zonelist->_zonerefs[nr_zones++]);
                        check_highest_zone(zone_type);
                }
-
        } while (zone_type);
+
        return nr_zones;
 }
 
@@ -3250,18 +3251,25 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
        static DEFINE_MUTEX(zl_order_mutex);
 
        mutex_lock(&zl_order_mutex);
-       if (write)
-               strcpy(saved_string, (char*)table->data);
+       if (write) {
+               if (strlen((char *)table->data) >= NUMA_ZONELIST_ORDER_LEN) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               strcpy(saved_string, (char *)table->data);
+       }
        ret = proc_dostring(table, write, buffer, length, ppos);
        if (ret)
                goto out;
        if (write) {
                int oldval = user_zonelist_order;
-               if (__parse_numa_zonelist_order((char*)table->data)) {
+
+               ret = __parse_numa_zonelist_order((char *)table->data);
+               if (ret) {
                        /*
                         * bogus value.  restore saved string
                         */
-                       strncpy((char*)table->data, saved_string,
+                       strncpy((char *)table->data, saved_string,
                                NUMA_ZONELIST_ORDER_LEN);
                        user_zonelist_order = oldval;
                } else if (oldval != user_zonelist_order) {
@@ -3353,8 +3361,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
        zonelist = &pgdat->node_zonelists[0];
        for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
                ;
-       j = build_zonelists_node(NODE_DATA(node), zonelist, j,
-                                                       MAX_NR_ZONES - 1);
+       j = build_zonelists_node(NODE_DATA(node), zonelist, j);
        zonelist->_zonerefs[j].zone = NULL;
        zonelist->_zonerefs[j].zone_idx = 0;
 }
@@ -3368,7 +3375,7 @@ static void build_thisnode_zonelists(pg_data_t *pgdat)
        struct zonelist *zonelist;
 
        zonelist = &pgdat->node_zonelists[1];
-       j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
+       j = build_zonelists_node(pgdat, zonelist, 0);
        zonelist->_zonerefs[j].zone = NULL;
        zonelist->_zonerefs[j].zone_idx = 0;
 }
@@ -3425,8 +3432,8 @@ static int default_zonelist_order(void)
                        z = &NODE_DATA(nid)->node_zones[zone_type];
                        if (populated_zone(z)) {
                                if (zone_type < ZONE_NORMAL)
-                                       low_kmem_size += z->present_pages;
-                               total_size += z->present_pages;
+                                       low_kmem_size += z->managed_pages;
+                               total_size += z->managed_pages;
                        } else if (zone_type == ZONE_NORMAL) {
                                /*
                                 * If any node has only lowmem, then node order
@@ -3576,7 +3583,7 @@ static void build_zonelists(pg_data_t *pgdat)
        local_node = pgdat->node_id;
 
        zonelist = &pgdat->node_zonelists[0];
-       j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
+       j = build_zonelists_node(pgdat, zonelist, 0);
 
        /*
         * Now we build the zonelist so that it contains the zones
@@ -3589,14 +3596,12 @@ static void build_zonelists(pg_data_t *pgdat)
        for (node = local_node + 1; node < MAX_NUMNODES; node++) {
                if (!node_online(node))
                        continue;
-               j = build_zonelists_node(NODE_DATA(node), zonelist, j,
-                                                       MAX_NR_ZONES - 1);
+               j = build_zonelists_node(NODE_DATA(node), zonelist, j);
        }
        for (node = 0; node < local_node; node++) {
                if (!node_online(node))
                        continue;
-               j = build_zonelists_node(NODE_DATA(node), zonelist, j,
-                                                       MAX_NR_ZONES - 1);
+               j = build_zonelists_node(NODE_DATA(node), zonelist, j);
        }
 
        zonelist->_zonerefs[j].zone = NULL;
@@ -3705,12 +3710,12 @@ void __ref build_all_zonelists(pg_data_t *pgdat, struct zone *zone)
                mminit_verify_zonelist();
                cpuset_init_current_mems_allowed();
        } else {
-               /* we have to stop all cpus to guarantee there is no user
-                  of zonelist */
 #ifdef CONFIG_MEMORY_HOTPLUG
                if (zone)
                        setup_zone_pageset(zone);
 #endif
+               /* we have to stop all cpus to guarantee there is no user
+                  of zonelist */
                stop_machine(__build_all_zonelists, pgdat, NULL);
                /* cpuset refresh routine should be here */
        }
@@ -4032,15 +4037,40 @@ static int __meminit zone_batchsize(struct zone *zone)
 #endif
 }
 
-/* a companion to setup_pagelist_highmark() */
+/*
+ * pcp->high and pcp->batch values are related and dependent on one another:
+ * ->batch must never be higher then ->high.
+ * The following function updates them in a safe manner without read side
+ * locking.
+ *
+ * Any new users of pcp->batch and pcp->high should ensure they can cope with
+ * those fields changing asynchronously (acording the the above rule).
+ *
+ * mutex_is_locked(&pcp_batch_high_lock) required when calling this function
+ * outside of boot time (or some other assurance that no concurrent updaters
+ * exist).
+ */
+static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
+               unsigned long batch)
+{
+       /* start with a fail safe value for batch */
+       pcp->batch = 1;
+       smp_wmb();
+
+       /* Update high, then batch, in order */
+       pcp->high = high;
+       smp_wmb();
+
+       pcp->batch = batch;
+}
+
+/* a companion to pageset_set_high() */
 static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch)
 {
-       struct per_cpu_pages *pcp = &p->pcp;
-       pcp->high = 6 * batch;
-       pcp->batch = max(1UL, 1 * batch);
+       pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch));
 }
 
-static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
+static void pageset_init(struct per_cpu_pageset *p)
 {
        struct per_cpu_pages *pcp;
        int migratetype;
@@ -4049,43 +4079,55 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 
        pcp = &p->pcp;
        pcp->count = 0;
-       pageset_set_batch(p, batch);
        for (migratetype = 0; migratetype < MIGRATE_PCPTYPES; migratetype++)
                INIT_LIST_HEAD(&pcp->lists[migratetype]);
 }
 
+static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
+{
+       pageset_init(p);
+       pageset_set_batch(p, batch);
+}
+
 /*
- * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
+ * pageset_set_high() sets the high water mark for hot per_cpu_pagelist
  * to the value high for the pageset p.
  */
-static void setup_pagelist_highmark(struct per_cpu_pageset *p,
+static void pageset_set_high(struct per_cpu_pageset *p,
                                unsigned long high)
 {
-       struct per_cpu_pages *pcp;
+       unsigned long batch = max(1UL, high / 4);
+       if ((high / 4) > (PAGE_SHIFT * 8))
+               batch = PAGE_SHIFT * 8;
 
-       pcp = &p->pcp;
-       pcp->high = high;
-       pcp->batch = max(1UL, high/4);
-       if ((high/4) > (PAGE_SHIFT * 8))
-               pcp->batch = PAGE_SHIFT * 8;
+       pageset_update(&p->pcp, high, batch);
 }
 
-static void __meminit setup_zone_pageset(struct zone *zone)
+static void __meminit pageset_set_high_and_batch(struct zone *zone,
+               struct per_cpu_pageset *pcp)
 {
-       int cpu;
-
-       zone->pageset = alloc_percpu(struct per_cpu_pageset);
+       if (percpu_pagelist_fraction)
+               pageset_set_high(pcp,
+                       (zone->managed_pages /
+                               percpu_pagelist_fraction));
+       else
+               pageset_set_batch(pcp, zone_batchsize(zone));
+}
 
-       for_each_possible_cpu(cpu) {
-               struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
+static void __meminit zone_pageset_init(struct zone *zone, int cpu)
+{
+       struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
 
-               setup_pageset(pcp, zone_batchsize(zone));
+       pageset_init(pcp);
+       pageset_set_high_and_batch(zone, pcp);
+}
 
-               if (percpu_pagelist_fraction)
-                       setup_pagelist_highmark(pcp,
-                               (zone->managed_pages /
-                                       percpu_pagelist_fraction));
-       }
+static void __meminit setup_zone_pageset(struct zone *zone)
+{
+       int cpu;
+       zone->pageset = alloc_percpu(struct per_cpu_pageset);
+       for_each_possible_cpu(cpu)
+               zone_pageset_init(zone, cpu);
 }
 
 /*
@@ -4374,13 +4416,13 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid,
  */
 static unsigned long __meminit zone_spanned_pages_in_node(int nid,
                                        unsigned long zone_type,
+                                       unsigned long node_start_pfn,
+                                       unsigned long node_end_pfn,
                                        unsigned long *ignored)
 {
-       unsigned long node_start_pfn, node_end_pfn;
        unsigned long zone_start_pfn, zone_end_pfn;
 
-       /* Get the start and end of the node and zone */
-       get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
+       /* Get the start and end of the zone */
        zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
        zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
        adjust_zone_range_for_zone_movable(nid, zone_type,
@@ -4435,14 +4477,14 @@ unsigned long __init absent_pages_in_range(unsigned long start_pfn,
 /* Return the number of page frames in holes in a zone on a node */
 static unsigned long __meminit zone_absent_pages_in_node(int nid,
                                        unsigned long zone_type,
+                                       unsigned long node_start_pfn,
+                                       unsigned long node_end_pfn,
                                        unsigned long *ignored)
 {
        unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
        unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
-       unsigned long node_start_pfn, node_end_pfn;
        unsigned long zone_start_pfn, zone_end_pfn;
 
-       get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
        zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
        zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
 
@@ -4455,6 +4497,8 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
 #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
                                        unsigned long zone_type,
+                                       unsigned long node_start_pfn,
+                                       unsigned long node_end_pfn,
                                        unsigned long *zones_size)
 {
        return zones_size[zone_type];
@@ -4462,6 +4506,8 @@ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
 
 static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
                                                unsigned long zone_type,
+                                               unsigned long node_start_pfn,
+                                               unsigned long node_end_pfn,
                                                unsigned long *zholes_size)
 {
        if (!zholes_size)
@@ -4473,21 +4519,27 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
-               unsigned long *zones_size, unsigned long *zholes_size)
+                                               unsigned long node_start_pfn,
+                                               unsigned long node_end_pfn,
+                                               unsigned long *zones_size,
+                                               unsigned long *zholes_size)
 {
        unsigned long realtotalpages, totalpages = 0;
        enum zone_type i;
 
        for (i = 0; i < MAX_NR_ZONES; i++)
                totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
-                                                               zones_size);
+                                                        node_start_pfn,
+                                                        node_end_pfn,
+                                                        zones_size);
        pgdat->node_spanned_pages = totalpages;
 
        realtotalpages = totalpages;
        for (i = 0; i < MAX_NR_ZONES; i++)
                realtotalpages -=
                        zone_absent_pages_in_node(pgdat->node_id, i,
-                                                               zholes_size);
+                                                 node_start_pfn, node_end_pfn,
+                                                 zholes_size);
        pgdat->node_present_pages = realtotalpages;
        printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
                                                        realtotalpages);
@@ -4596,6 +4648,7 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
  * NOTE: pgdat should get zeroed by caller.
  */
 static void __paginginit free_area_init_core(struct pglist_data *pgdat,
+               unsigned long node_start_pfn, unsigned long node_end_pfn,
                unsigned long *zones_size, unsigned long *zholes_size)
 {
        enum zone_type j;
@@ -4617,8 +4670,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                struct zone *zone = pgdat->node_zones + j;
                unsigned long size, realsize, freesize, memmap_pages;
 
-               size = zone_spanned_pages_in_node(nid, j, zones_size);
+               size = zone_spanned_pages_in_node(nid, j, node_start_pfn,
+                                                 node_end_pfn, zones_size);
                realsize = freesize = size - zone_absent_pages_in_node(nid, j,
+                                                               node_start_pfn,
+                                                               node_end_pfn,
                                                                zholes_size);
 
                /*
@@ -4732,6 +4788,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
                unsigned long node_start_pfn, unsigned long *zholes_size)
 {
        pg_data_t *pgdat = NODE_DATA(nid);
+       unsigned long start_pfn = 0;
+       unsigned long end_pfn = 0;
 
        /* pg_data_t should be reset to zero when it's allocated */
        WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
@@ -4739,7 +4797,11 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
        pgdat->node_id = nid;
        pgdat->node_start_pfn = node_start_pfn;
        init_zone_allows_reclaim(nid);
-       calculate_node_totalpages(pgdat, zones_size, zholes_size);
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+       get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+#endif
+       calculate_node_totalpages(pgdat, start_pfn, end_pfn,
+                                 zones_size, zholes_size);
 
        alloc_node_mem_map(pgdat);
 #ifdef CONFIG_FLAT_NODE_MEM_MAP
@@ -4748,7 +4810,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
                (unsigned long)pgdat->node_mem_map);
 #endif
 
-       free_area_init_core(pgdat, zones_size, zholes_size);
+       free_area_init_core(pgdat, start_pfn, end_pfn,
+                           zones_size, zholes_size);
 }
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
@@ -5156,35 +5219,101 @@ early_param("movablecore", cmdline_parse_movablecore);
 
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
-unsigned long free_reserved_area(unsigned long start, unsigned long end,
-                                int poison, char *s)
+void adjust_managed_page_count(struct page *page, long count)
+{
+       spin_lock(&managed_page_count_lock);
+       page_zone(page)->managed_pages += count;
+       totalram_pages += count;
+#ifdef CONFIG_HIGHMEM
+       if (PageHighMem(page))
+               totalhigh_pages += count;
+#endif
+       spin_unlock(&managed_page_count_lock);
+}
+EXPORT_SYMBOL(adjust_managed_page_count);
+
+unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
 {
-       unsigned long pages, pos;
+       void *pos;
+       unsigned long pages = 0;
 
-       pos = start = PAGE_ALIGN(start);
-       end &= PAGE_MASK;
-       for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) {
-               if (poison)
-                       memset((void *)pos, poison, PAGE_SIZE);
-               free_reserved_page(virt_to_page((void *)pos));
+       start = (void *)PAGE_ALIGN((unsigned long)start);
+       end = (void *)((unsigned long)end & PAGE_MASK);
+       for (pos = start; pos < end; pos += PAGE_SIZE, pages++) {
+               if ((unsigned int)poison <= 0xFF)
+                       memset(pos, poison, PAGE_SIZE);
+               free_reserved_page(virt_to_page(pos));
        }
 
        if (pages && s)
-               pr_info("Freeing %s memory: %ldK (%lx - %lx)\n",
+               pr_info("Freeing %s memory: %ldK (%p - %p)\n",
                        s, pages << (PAGE_SHIFT - 10), start, end);
 
        return pages;
 }
+EXPORT_SYMBOL(free_reserved_area);
 
 #ifdef CONFIG_HIGHMEM
 void free_highmem_page(struct page *page)
 {
        __free_reserved_page(page);
        totalram_pages++;
+       page_zone(page)->managed_pages++;
        totalhigh_pages++;
 }
 #endif
 
+
+void __init mem_init_print_info(const char *str)
+{
+       unsigned long physpages, codesize, datasize, rosize, bss_size;
+       unsigned long init_code_size, init_data_size;
+
+       physpages = get_num_physpages();
+       codesize = _etext - _stext;
+       datasize = _edata - _sdata;
+       rosize = __end_rodata - __start_rodata;
+       bss_size = __bss_stop - __bss_start;
+       init_data_size = __init_end - __init_begin;
+       init_code_size = _einittext - _sinittext;
+
+       /*
+        * Detect special cases and adjust section sizes accordingly:
+        * 1) .init.* may be embedded into .data sections
+        * 2) .init.text.* may be out of [__init_begin, __init_end],
+        *    please refer to arch/tile/kernel/vmlinux.lds.S.
+        * 3) .rodata.* may be embedded into .text or .data sections.
+        */
+#define adj_init_size(start, end, size, pos, adj) \
+       if (start <= pos && pos < end && size > adj) \
+               size -= adj;
+
+       adj_init_size(__init_begin, __init_end, init_data_size,
+                    _sinittext, init_code_size);
+       adj_init_size(_stext, _etext, codesize, _sinittext, init_code_size);
+       adj_init_size(_sdata, _edata, datasize, __init_begin, init_data_size);
+       adj_init_size(_stext, _etext, codesize, __start_rodata, rosize);
+       adj_init_size(_sdata, _edata, datasize, __start_rodata, rosize);
+
+#undef adj_init_size
+
+       printk("Memory: %luK/%luK available "
+              "(%luK kernel code, %luK rwdata, %luK rodata, "
+              "%luK init, %luK bss, %luK reserved"
+#ifdef CONFIG_HIGHMEM
+              ", %luK highmem"
+#endif
+              "%s%s)\n",
+              nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10),
+              codesize >> 10, datasize >> 10, rosize >> 10,
+              (init_data_size + init_code_size) >> 10, bss_size >> 10,
+              (physpages - totalram_pages) << (PAGE_SHIFT-10),
+#ifdef CONFIG_HIGHMEM
+              totalhigh_pages << (PAGE_SHIFT-10),
+#endif
+              str ? ", " : "", str ? str : "");
+}
+
 /**
  * set_dma_reserve - set the specified number of pages reserved in the first zone
  * @new_dma_reserve: The number of pages to mark reserved
@@ -5546,7 +5675,6 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
  * cpu.  It is the fraction of total pages in each zone that a hot per cpu pagelist
  * can have before it gets flushed back to buddy allocator.
  */
-
 int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
 {
@@ -5557,14 +5685,16 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
        ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
        if (!write || (ret < 0))
                return ret;
+
+       mutex_lock(&pcp_batch_high_lock);
        for_each_populated_zone(zone) {
-               for_each_possible_cpu(cpu) {
-                       unsigned long  high;
-                       high = zone->managed_pages / percpu_pagelist_fraction;
-                       setup_pagelist_highmark(
-                               per_cpu_ptr(zone->pageset, cpu), high);
-               }
+               unsigned long  high;
+               high = zone->managed_pages / percpu_pagelist_fraction;
+               for_each_possible_cpu(cpu)
+                       pageset_set_high(per_cpu_ptr(zone->pageset, cpu),
+                                        high);
        }
+       mutex_unlock(&pcp_batch_high_lock);
        return 0;
 }
 
@@ -6053,32 +6183,18 @@ void free_contig_range(unsigned long pfn, unsigned nr_pages)
 #endif
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-static int __meminit __zone_pcp_update(void *data)
-{
-       struct zone *zone = data;
-       int cpu;
-       unsigned long batch = zone_batchsize(zone), flags;
-
-       for_each_possible_cpu(cpu) {
-               struct per_cpu_pageset *pset;
-               struct per_cpu_pages *pcp;
-
-               pset = per_cpu_ptr(zone->pageset, cpu);
-               pcp = &pset->pcp;
-
-               local_irq_save(flags);
-               if (pcp->count > 0)
-                       free_pcppages_bulk(zone, pcp->count, pcp);
-               drain_zonestat(zone, pset);
-               setup_pageset(pset, batch);
-               local_irq_restore(flags);
-       }
-       return 0;
-}
-
+/*
+ * The zone indicated has a new number of managed_pages; batch sizes and percpu
+ * page high values need to be recalulated.
+ */
 void __meminit zone_pcp_update(struct zone *zone)
 {
-       stop_machine(__zone_pcp_update, zone, NULL);
+       unsigned cpu;
+       mutex_lock(&pcp_batch_high_lock);
+       for_each_possible_cpu(cpu)
+               pageset_set_high_and_batch(zone,
+                               per_cpu_ptr(zone->pageset, cpu));
+       mutex_unlock(&pcp_batch_high_lock);
 }
 #endif
 
@@ -6148,6 +6264,10 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
                list_del(&page->lru);
                rmv_page_order(page);
                zone->free_area[order].nr_free--;
+#ifdef CONFIG_HIGHMEM
+               if (PageHighMem(page))
+                       totalhigh_pages -= 1 << order;
+#endif
                for (i = 0; i < (1 << order); i++)
                        SetPageReserved((page+i));
                pfn += (1 << order);