mm/page_alloc.c: remove unlikely() from the current_order test
[cascardo/linux.git] / mm / page_alloc.c
index c7344d1..b5855e5 100644 (file)
@@ -61,6 +61,7 @@
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
 
+#include <asm/sections.h>
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
 #include "internal.h"
@@ -103,6 +104,9 @@ nodemask_t node_states[NR_NODE_STATES] __read_mostly = {
 };
 EXPORT_SYMBOL(node_states);
 
+/* Protect totalram_pages and zone->managed_pages */
+static DEFINE_SPINLOCK(managed_page_count_lock);
+
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
 /*
@@ -742,14 +746,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
        local_irq_restore(flags);
 }
 
-/*
- * Read access to zone->managed_pages is safe because it's unsigned long,
- * but we still need to serialize writers. Currently all callers of
- * __free_pages_bootmem() except put_page_bootmem() should only be used
- * at boot time. So for shorter boot time, we shift the burden to
- * put_page_bootmem() to serialize writers.
- */
-void __meminit __free_pages_bootmem(struct page *page, unsigned int order)
+void __init __free_pages_bootmem(struct page *page, unsigned int order)
 {
        unsigned int nr_pages = 1 << order;
        unsigned int loop;
@@ -784,11 +781,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
        set_page_refcounted(page);
        set_pageblock_migratetype(page, MIGRATE_CMA);
        __free_pages(page, pageblock_order);
-       totalram_pages += pageblock_nr_pages;
-#ifdef CONFIG_HIGHMEM
-       if (PageHighMem(page))
-               totalhigh_pages += pageblock_nr_pages;
-#endif
+       adjust_managed_page_count(page, pageblock_nr_pages);
 }
 #endif
 
@@ -1053,7 +1046,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
                         * MIGRATE_CMA areas.
                         */
                        if (!is_migrate_cma(migratetype) &&
-                           (unlikely(current_order >= pageblock_order / 2) ||
+                           (current_order >= pageblock_order / 2 ||
                             start_migratetype == MIGRATE_RECLAIMABLE ||
                             page_group_by_mobility_disabled)) {
                                int pages;
@@ -2845,7 +2838,7 @@ EXPORT_SYMBOL(free_pages_exact);
  * nr_free_zone_pages() counts the number of counts pages which are beyond the
  * high watermark within all zones at or below a given zone index.  For each
  * zone, the number of pages is calculated as:
- *     present_pages - high_pages
+ *     managed_pages - high_pages
  */
 static unsigned long nr_free_zone_pages(int offset)
 {
@@ -2912,9 +2905,13 @@ EXPORT_SYMBOL(si_meminfo);
 #ifdef CONFIG_NUMA
 void si_meminfo_node(struct sysinfo *val, int nid)
 {
+       int zone_type;          /* needs to be signed */
+       unsigned long managed_pages = 0;
        pg_data_t *pgdat = NODE_DATA(nid);
 
-       val->totalram = pgdat->node_present_pages;
+       for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
+               managed_pages += pgdat->node_zones[zone_type].managed_pages;
+       val->totalram = managed_pages;
        val->freeram = node_page_state(nid, NR_FREE_PAGES);
 #ifdef CONFIG_HIGHMEM
        val->totalhigh = pgdat->node_zones[ZONE_HIGHMEM].managed_pages;
@@ -3156,12 +3153,10 @@ static void zoneref_set_zone(struct zone *zone, struct zoneref *zoneref)
  * Add all populated zones of a node to the zonelist.
  */
 static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
-                               int nr_zones, enum zone_type zone_type)
+                               int nr_zones)
 {
        struct zone *zone;
-
-       BUG_ON(zone_type >= MAX_NR_ZONES);
-       zone_type++;
+       enum zone_type zone_type = MAX_NR_ZONES;
 
        do {
                zone_type--;
@@ -3171,8 +3166,8 @@ static int build_zonelists_node(pg_data_t *pgdat, struct zonelist *zonelist,
                                &zonelist->_zonerefs[nr_zones++]);
                        check_highest_zone(zone_type);
                }
-
        } while (zone_type);
+
        return nr_zones;
 }
 
@@ -3256,18 +3251,25 @@ int numa_zonelist_order_handler(ctl_table *table, int write,
        static DEFINE_MUTEX(zl_order_mutex);
 
        mutex_lock(&zl_order_mutex);
-       if (write)
-               strcpy(saved_string, (char*)table->data);
+       if (write) {
+               if (strlen((char *)table->data) >= NUMA_ZONELIST_ORDER_LEN) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               strcpy(saved_string, (char *)table->data);
+       }
        ret = proc_dostring(table, write, buffer, length, ppos);
        if (ret)
                goto out;
        if (write) {
                int oldval = user_zonelist_order;
-               if (__parse_numa_zonelist_order((char*)table->data)) {
+
+               ret = __parse_numa_zonelist_order((char *)table->data);
+               if (ret) {
                        /*
                         * bogus value.  restore saved string
                         */
-                       strncpy((char*)table->data, saved_string,
+                       strncpy((char *)table->data, saved_string,
                                NUMA_ZONELIST_ORDER_LEN);
                        user_zonelist_order = oldval;
                } else if (oldval != user_zonelist_order) {
@@ -3359,8 +3361,7 @@ static void build_zonelists_in_node_order(pg_data_t *pgdat, int node)
        zonelist = &pgdat->node_zonelists[0];
        for (j = 0; zonelist->_zonerefs[j].zone != NULL; j++)
                ;
-       j = build_zonelists_node(NODE_DATA(node), zonelist, j,
-                                                       MAX_NR_ZONES - 1);
+       j = build_zonelists_node(NODE_DATA(node), zonelist, j);
        zonelist->_zonerefs[j].zone = NULL;
        zonelist->_zonerefs[j].zone_idx = 0;
 }
@@ -3374,7 +3375,7 @@ static void build_thisnode_zonelists(pg_data_t *pgdat)
        struct zonelist *zonelist;
 
        zonelist = &pgdat->node_zonelists[1];
-       j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
+       j = build_zonelists_node(pgdat, zonelist, 0);
        zonelist->_zonerefs[j].zone = NULL;
        zonelist->_zonerefs[j].zone_idx = 0;
 }
@@ -3431,8 +3432,8 @@ static int default_zonelist_order(void)
                        z = &NODE_DATA(nid)->node_zones[zone_type];
                        if (populated_zone(z)) {
                                if (zone_type < ZONE_NORMAL)
-                                       low_kmem_size += z->present_pages;
-                               total_size += z->present_pages;
+                                       low_kmem_size += z->managed_pages;
+                               total_size += z->managed_pages;
                        } else if (zone_type == ZONE_NORMAL) {
                                /*
                                 * If any node has only lowmem, then node order
@@ -3582,7 +3583,7 @@ static void build_zonelists(pg_data_t *pgdat)
        local_node = pgdat->node_id;
 
        zonelist = &pgdat->node_zonelists[0];
-       j = build_zonelists_node(pgdat, zonelist, 0, MAX_NR_ZONES - 1);
+       j = build_zonelists_node(pgdat, zonelist, 0);
 
        /*
         * Now we build the zonelist so that it contains the zones
@@ -3595,14 +3596,12 @@ static void build_zonelists(pg_data_t *pgdat)
        for (node = local_node + 1; node < MAX_NUMNODES; node++) {
                if (!node_online(node))
                        continue;
-               j = build_zonelists_node(NODE_DATA(node), zonelist, j,
-                                                       MAX_NR_ZONES - 1);
+               j = build_zonelists_node(NODE_DATA(node), zonelist, j);
        }
        for (node = 0; node < local_node; node++) {
                if (!node_online(node))
                        continue;
-               j = build_zonelists_node(NODE_DATA(node), zonelist, j,
-                                                       MAX_NR_ZONES - 1);
+               j = build_zonelists_node(NODE_DATA(node), zonelist, j);
        }
 
        zonelist->_zonerefs[j].zone = NULL;
@@ -4065,7 +4064,7 @@ static void pageset_update(struct per_cpu_pages *pcp, unsigned long high,
        pcp->batch = batch;
 }
 
-/* a companion to setup_pagelist_highmark() */
+/* a companion to pageset_set_high() */
 static void pageset_set_batch(struct per_cpu_pageset *p, unsigned long batch)
 {
        pageset_update(&p->pcp, 6 * batch, max(1UL, 1 * batch));
@@ -4091,10 +4090,10 @@ static void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 }
 
 /*
- * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
+ * pageset_set_high() sets the high water mark for hot per_cpu_pagelist
  * to the value high for the pageset p.
  */
-static void setup_pagelist_highmark(struct per_cpu_pageset *p,
+static void pageset_set_high(struct per_cpu_pageset *p,
                                unsigned long high)
 {
        unsigned long batch = max(1UL, high / 4);
@@ -4104,19 +4103,25 @@ static void setup_pagelist_highmark(struct per_cpu_pageset *p,
        pageset_update(&p->pcp, high, batch);
 }
 
-static void __meminit zone_pageset_init(struct zone *zone, int cpu)
+static void __meminit pageset_set_high_and_batch(struct zone *zone,
+               struct per_cpu_pageset *pcp)
 {
-       struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
-
-       pageset_init(pcp);
        if (percpu_pagelist_fraction)
-               setup_pagelist_highmark(pcp,
+               pageset_set_high(pcp,
                        (zone->managed_pages /
                                percpu_pagelist_fraction));
        else
                pageset_set_batch(pcp, zone_batchsize(zone));
 }
 
+static void __meminit zone_pageset_init(struct zone *zone, int cpu)
+{
+       struct per_cpu_pageset *pcp = per_cpu_ptr(zone->pageset, cpu);
+
+       pageset_init(pcp);
+       pageset_set_high_and_batch(zone, pcp);
+}
+
 static void __meminit setup_zone_pageset(struct zone *zone)
 {
        int cpu;
@@ -4411,13 +4416,13 @@ static void __meminit adjust_zone_range_for_zone_movable(int nid,
  */
 static unsigned long __meminit zone_spanned_pages_in_node(int nid,
                                        unsigned long zone_type,
+                                       unsigned long node_start_pfn,
+                                       unsigned long node_end_pfn,
                                        unsigned long *ignored)
 {
-       unsigned long node_start_pfn, node_end_pfn;
        unsigned long zone_start_pfn, zone_end_pfn;
 
-       /* Get the start and end of the node and zone */
-       get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
+       /* Get the start and end of the zone */
        zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
        zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
        adjust_zone_range_for_zone_movable(nid, zone_type,
@@ -4472,14 +4477,14 @@ unsigned long __init absent_pages_in_range(unsigned long start_pfn,
 /* Return the number of page frames in holes in a zone on a node */
 static unsigned long __meminit zone_absent_pages_in_node(int nid,
                                        unsigned long zone_type,
+                                       unsigned long node_start_pfn,
+                                       unsigned long node_end_pfn,
                                        unsigned long *ignored)
 {
        unsigned long zone_low = arch_zone_lowest_possible_pfn[zone_type];
        unsigned long zone_high = arch_zone_highest_possible_pfn[zone_type];
-       unsigned long node_start_pfn, node_end_pfn;
        unsigned long zone_start_pfn, zone_end_pfn;
 
-       get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
        zone_start_pfn = clamp(node_start_pfn, zone_low, zone_high);
        zone_end_pfn = clamp(node_end_pfn, zone_low, zone_high);
 
@@ -4492,6 +4497,8 @@ static unsigned long __meminit zone_absent_pages_in_node(int nid,
 #else /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
                                        unsigned long zone_type,
+                                       unsigned long node_start_pfn,
+                                       unsigned long node_end_pfn,
                                        unsigned long *zones_size)
 {
        return zones_size[zone_type];
@@ -4499,6 +4506,8 @@ static inline unsigned long __meminit zone_spanned_pages_in_node(int nid,
 
 static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
                                                unsigned long zone_type,
+                                               unsigned long node_start_pfn,
+                                               unsigned long node_end_pfn,
                                                unsigned long *zholes_size)
 {
        if (!zholes_size)
@@ -4510,21 +4519,27 @@ static inline unsigned long __meminit zone_absent_pages_in_node(int nid,
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
 static void __meminit calculate_node_totalpages(struct pglist_data *pgdat,
-               unsigned long *zones_size, unsigned long *zholes_size)
+                                               unsigned long node_start_pfn,
+                                               unsigned long node_end_pfn,
+                                               unsigned long *zones_size,
+                                               unsigned long *zholes_size)
 {
        unsigned long realtotalpages, totalpages = 0;
        enum zone_type i;
 
        for (i = 0; i < MAX_NR_ZONES; i++)
                totalpages += zone_spanned_pages_in_node(pgdat->node_id, i,
-                                                               zones_size);
+                                                        node_start_pfn,
+                                                        node_end_pfn,
+                                                        zones_size);
        pgdat->node_spanned_pages = totalpages;
 
        realtotalpages = totalpages;
        for (i = 0; i < MAX_NR_ZONES; i++)
                realtotalpages -=
                        zone_absent_pages_in_node(pgdat->node_id, i,
-                                                               zholes_size);
+                                                 node_start_pfn, node_end_pfn,
+                                                 zholes_size);
        pgdat->node_present_pages = realtotalpages;
        printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
                                                        realtotalpages);
@@ -4633,6 +4648,7 @@ static unsigned long __paginginit calc_memmap_size(unsigned long spanned_pages,
  * NOTE: pgdat should get zeroed by caller.
  */
 static void __paginginit free_area_init_core(struct pglist_data *pgdat,
+               unsigned long node_start_pfn, unsigned long node_end_pfn,
                unsigned long *zones_size, unsigned long *zholes_size)
 {
        enum zone_type j;
@@ -4654,8 +4670,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                struct zone *zone = pgdat->node_zones + j;
                unsigned long size, realsize, freesize, memmap_pages;
 
-               size = zone_spanned_pages_in_node(nid, j, zones_size);
+               size = zone_spanned_pages_in_node(nid, j, node_start_pfn,
+                                                 node_end_pfn, zones_size);
                realsize = freesize = size - zone_absent_pages_in_node(nid, j,
+                                                               node_start_pfn,
+                                                               node_end_pfn,
                                                                zholes_size);
 
                /*
@@ -4769,6 +4788,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
                unsigned long node_start_pfn, unsigned long *zholes_size)
 {
        pg_data_t *pgdat = NODE_DATA(nid);
+       unsigned long start_pfn = 0;
+       unsigned long end_pfn = 0;
 
        /* pg_data_t should be reset to zero when it's allocated */
        WARN_ON(pgdat->nr_zones || pgdat->classzone_idx);
@@ -4776,7 +4797,11 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
        pgdat->node_id = nid;
        pgdat->node_start_pfn = node_start_pfn;
        init_zone_allows_reclaim(nid);
-       calculate_node_totalpages(pgdat, zones_size, zholes_size);
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+       get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+#endif
+       calculate_node_totalpages(pgdat, start_pfn, end_pfn,
+                                 zones_size, zholes_size);
 
        alloc_node_mem_map(pgdat);
 #ifdef CONFIG_FLAT_NODE_MEM_MAP
@@ -4785,7 +4810,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
                (unsigned long)pgdat->node_mem_map);
 #endif
 
-       free_area_init_core(pgdat, zones_size, zholes_size);
+       free_area_init_core(pgdat, start_pfn, end_pfn,
+                           zones_size, zholes_size);
 }
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
@@ -5193,35 +5219,101 @@ early_param("movablecore", cmdline_parse_movablecore);
 
 #endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
 
-unsigned long free_reserved_area(unsigned long start, unsigned long end,
-                                int poison, char *s)
+void adjust_managed_page_count(struct page *page, long count)
+{
+       spin_lock(&managed_page_count_lock);
+       page_zone(page)->managed_pages += count;
+       totalram_pages += count;
+#ifdef CONFIG_HIGHMEM
+       if (PageHighMem(page))
+               totalhigh_pages += count;
+#endif
+       spin_unlock(&managed_page_count_lock);
+}
+EXPORT_SYMBOL(adjust_managed_page_count);
+
+unsigned long free_reserved_area(void *start, void *end, int poison, char *s)
 {
-       unsigned long pages, pos;
+       void *pos;
+       unsigned long pages = 0;
 
-       pos = start = PAGE_ALIGN(start);
-       end &= PAGE_MASK;
-       for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) {
-               if (poison)
-                       memset((void *)pos, poison, PAGE_SIZE);
-               free_reserved_page(virt_to_page((void *)pos));
+       start = (void *)PAGE_ALIGN((unsigned long)start);
+       end = (void *)((unsigned long)end & PAGE_MASK);
+       for (pos = start; pos < end; pos += PAGE_SIZE, pages++) {
+               if ((unsigned int)poison <= 0xFF)
+                       memset(pos, poison, PAGE_SIZE);
+               free_reserved_page(virt_to_page(pos));
        }
 
        if (pages && s)
-               pr_info("Freeing %s memory: %ldK (%lx - %lx)\n",
+               pr_info("Freeing %s memory: %ldK (%p - %p)\n",
                        s, pages << (PAGE_SHIFT - 10), start, end);
 
        return pages;
 }
+EXPORT_SYMBOL(free_reserved_area);
 
 #ifdef CONFIG_HIGHMEM
 void free_highmem_page(struct page *page)
 {
        __free_reserved_page(page);
        totalram_pages++;
+       page_zone(page)->managed_pages++;
        totalhigh_pages++;
 }
 #endif
 
+
+void __init mem_init_print_info(const char *str)
+{
+       unsigned long physpages, codesize, datasize, rosize, bss_size;
+       unsigned long init_code_size, init_data_size;
+
+       physpages = get_num_physpages();
+       codesize = _etext - _stext;
+       datasize = _edata - _sdata;
+       rosize = __end_rodata - __start_rodata;
+       bss_size = __bss_stop - __bss_start;
+       init_data_size = __init_end - __init_begin;
+       init_code_size = _einittext - _sinittext;
+
+       /*
+        * Detect special cases and adjust section sizes accordingly:
+        * 1) .init.* may be embedded into .data sections
+        * 2) .init.text.* may be out of [__init_begin, __init_end],
+        *    please refer to arch/tile/kernel/vmlinux.lds.S.
+        * 3) .rodata.* may be embedded into .text or .data sections.
+        */
+#define adj_init_size(start, end, size, pos, adj) \
+       if (start <= pos && pos < end && size > adj) \
+               size -= adj;
+
+       adj_init_size(__init_begin, __init_end, init_data_size,
+                    _sinittext, init_code_size);
+       adj_init_size(_stext, _etext, codesize, _sinittext, init_code_size);
+       adj_init_size(_sdata, _edata, datasize, __init_begin, init_data_size);
+       adj_init_size(_stext, _etext, codesize, __start_rodata, rosize);
+       adj_init_size(_sdata, _edata, datasize, __start_rodata, rosize);
+
+#undef adj_init_size
+
+       printk("Memory: %luK/%luK available "
+              "(%luK kernel code, %luK rwdata, %luK rodata, "
+              "%luK init, %luK bss, %luK reserved"
+#ifdef CONFIG_HIGHMEM
+              ", %luK highmem"
+#endif
+              "%s%s)\n",
+              nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10),
+              codesize >> 10, datasize >> 10, rosize >> 10,
+              (init_data_size + init_code_size) >> 10, bss_size >> 10,
+              (physpages - totalram_pages) << (PAGE_SHIFT-10),
+#ifdef CONFIG_HIGHMEM
+              totalhigh_pages << (PAGE_SHIFT-10),
+#endif
+              str ? ", " : "", str ? str : "");
+}
+
 /**
  * set_dma_reserve - set the specified number of pages reserved in the first zone
  * @new_dma_reserve: The number of pages to mark reserved
@@ -5599,8 +5691,8 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
                unsigned long  high;
                high = zone->managed_pages / percpu_pagelist_fraction;
                for_each_possible_cpu(cpu)
-                       setup_pagelist_highmark(
-                                       per_cpu_ptr(zone->pageset, cpu), high);
+                       pageset_set_high(per_cpu_ptr(zone->pageset, cpu),
+                                        high);
        }
        mutex_unlock(&pcp_batch_high_lock);
        return 0;
@@ -6100,7 +6192,8 @@ void __meminit zone_pcp_update(struct zone *zone)
        unsigned cpu;
        mutex_lock(&pcp_batch_high_lock);
        for_each_possible_cpu(cpu)
-               zone_pageset_init(zone, cpu);
+               pageset_set_high_and_batch(zone,
+                               per_cpu_ptr(zone->pageset, cpu));
        mutex_unlock(&pcp_batch_high_lock);
 }
 #endif
@@ -6171,6 +6264,10 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
                list_del(&page->lru);
                rmv_page_order(page);
                zone->free_area[order].nr_free--;
+#ifdef CONFIG_HIGHMEM
+               if (PageHighMem(page))
+                       totalhigh_pages -= 1 << order;
+#endif
                for (i = 0; i < (1 << order); i++)
                        SetPageReserved((page+i));
                pfn += (1 << order);