Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[cascardo/linux.git] / mm / page_alloc.c
index ae2f847..736d8e1 100644 (file)
@@ -53,8 +53,6 @@
 #include <linux/kmemleak.h>
 #include <linux/compaction.h>
 #include <trace/events/kmem.h>
-#include <linux/ftrace_event.h>
-#include <linux/memcontrol.h>
 #include <linux/prefetch.h>
 #include <linux/mm_inline.h>
 #include <linux/migrate.h>
@@ -1015,7 +1013,7 @@ int move_freepages(struct zone *zone,
         * Remove at a later date when no bug reports exist related to
         * grouping pages by mobility
         */
-       BUG_ON(page_zone(start_page) != page_zone(end_page));
+       VM_BUG_ON(page_zone(start_page) != page_zone(end_page));
 #endif
 
        for (page = start_page; page <= end_page;) {
@@ -1614,8 +1612,8 @@ again:
 
        __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
        if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 &&
-           !zone_is_fair_depleted(zone))
-               zone_set_flag(zone, ZONE_FAIR_DEPLETED);
+           !test_bit(ZONE_FAIR_DEPLETED, &zone->flags))
+               set_bit(ZONE_FAIR_DEPLETED, &zone->flags);
 
        __count_zone_vm_events(PGALLOC, zone, 1 << order);
        zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1935,7 +1933,7 @@ static void reset_alloc_batches(struct zone *preferred_zone)
                mod_zone_page_state(zone, NR_ALLOC_BATCH,
                        high_wmark_pages(zone) - low_wmark_pages(zone) -
                        atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
-               zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
+               clear_bit(ZONE_FAIR_DEPLETED, &zone->flags);
        } while (zone++ != preferred_zone);
 }
 
@@ -1986,7 +1984,7 @@ zonelist_scan:
                if (alloc_flags & ALLOC_FAIR) {
                        if (!zone_local(preferred_zone, zone))
                                break;
-                       if (zone_is_fair_depleted(zone)) {
+                       if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) {
                                nr_fair_skipped++;
                                continue;
                        }
@@ -3614,68 +3612,30 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
        zonelist->_zonerefs[pos].zone_idx = 0;
 }
 
+#if defined(CONFIG_64BIT)
+/*
+ * Devices that require DMA32/DMA are relatively rare and do not justify a
+ * penalty to every machine in case the specialised case applies. Default
+ * to Node-ordering on 64-bit NUMA machines
+ */
+static int default_zonelist_order(void)
+{
+       return ZONELIST_ORDER_NODE;
+}
+#else
+/*
+ * On 32-bit, the Normal zone needs to be preserved for allocations accessible
+ * by the kernel. If processes running on node 0 deplete the low memory zone
+ * then reclaim will occur more frequency increasing stalls and potentially
+ * be easier to OOM if a large percentage of the zone is under writeback or
+ * dirty. The problem is significantly worse if CONFIG_HIGHPTE is not set.
+ * Hence, default to zone ordering on 32-bit.
+ */
 static int default_zonelist_order(void)
 {
-       int nid, zone_type;
-       unsigned long low_kmem_size, total_size;
-       struct zone *z;
-       int average_size;
-       /*
-        * ZONE_DMA and ZONE_DMA32 can be very small area in the system.
-        * If they are really small and used heavily, the system can fall
-        * into OOM very easily.
-        * This function detect ZONE_DMA/DMA32 size and configures zone order.
-        */
-       /* Is there ZONE_NORMAL ? (ex. ppc has only DMA zone..) */
-       low_kmem_size = 0;
-       total_size = 0;
-       for_each_online_node(nid) {
-               for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
-                       z = &NODE_DATA(nid)->node_zones[zone_type];
-                       if (populated_zone(z)) {
-                               if (zone_type < ZONE_NORMAL)
-                                       low_kmem_size += z->managed_pages;
-                               total_size += z->managed_pages;
-                       } else if (zone_type == ZONE_NORMAL) {
-                               /*
-                                * If any node has only lowmem, then node order
-                                * is preferred to allow kernel allocations
-                                * locally; otherwise, they can easily infringe
-                                * on other nodes when there is an abundance of
-                                * lowmem available to allocate from.
-                                */
-                               return ZONELIST_ORDER_NODE;
-                       }
-               }
-       }
-       if (!low_kmem_size ||  /* there are no DMA area. */
-           low_kmem_size > total_size/2) /* DMA/DMA32 is big. */
-               return ZONELIST_ORDER_NODE;
-       /*
-        * look into each node's config.
-        * If there is a node whose DMA/DMA32 memory is very big area on
-        * local memory, NODE_ORDER may be suitable.
-        */
-       average_size = total_size /
-                               (nodes_weight(node_states[N_MEMORY]) + 1);
-       for_each_online_node(nid) {
-               low_kmem_size = 0;
-               total_size = 0;
-               for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
-                       z = &NODE_DATA(nid)->node_zones[zone_type];
-                       if (populated_zone(z)) {
-                               if (zone_type < ZONE_NORMAL)
-                                       low_kmem_size += z->present_pages;
-                               total_size += z->present_pages;
-                       }
-               }
-               if (low_kmem_size &&
-                   total_size > average_size && /* ignore small node */
-                   low_kmem_size > total_size * 70/100)
-                       return ZONELIST_ORDER_NODE;
-       }
        return ZONELIST_ORDER_ZONE;
 }
+#endif /* CONFIG_64BIT */
 
 static void set_zonelist_order(void)
 {
@@ -5011,6 +4971,8 @@ void __paginginit free_area_init_node(int nid, unsigned long *zones_size,
        pgdat->node_start_pfn = node_start_pfn;
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
        get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+       printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid,
+                       (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1);
 #endif
        calculate_node_totalpages(pgdat, start_pfn, end_pfn,
                                  zones_size, zholes_size);
@@ -6588,161 +6550,3 @@ bool is_free_buddy_page(struct page *page)
        return order < MAX_ORDER;
 }
 #endif
-
-static const struct trace_print_flags pageflag_names[] = {
-       {1UL << PG_locked,              "locked"        },
-       {1UL << PG_error,               "error"         },
-       {1UL << PG_referenced,          "referenced"    },
-       {1UL << PG_uptodate,            "uptodate"      },
-       {1UL << PG_dirty,               "dirty"         },
-       {1UL << PG_lru,                 "lru"           },
-       {1UL << PG_active,              "active"        },
-       {1UL << PG_slab,                "slab"          },
-       {1UL << PG_owner_priv_1,        "owner_priv_1"  },
-       {1UL << PG_arch_1,              "arch_1"        },
-       {1UL << PG_reserved,            "reserved"      },
-       {1UL << PG_private,             "private"       },
-       {1UL << PG_private_2,           "private_2"     },
-       {1UL << PG_writeback,           "writeback"     },
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
-       {1UL << PG_head,                "head"          },
-       {1UL << PG_tail,                "tail"          },
-#else
-       {1UL << PG_compound,            "compound"      },
-#endif
-       {1UL << PG_swapcache,           "swapcache"     },
-       {1UL << PG_mappedtodisk,        "mappedtodisk"  },
-       {1UL << PG_reclaim,             "reclaim"       },
-       {1UL << PG_swapbacked,          "swapbacked"    },
-       {1UL << PG_unevictable,         "unevictable"   },
-#ifdef CONFIG_MMU
-       {1UL << PG_mlocked,             "mlocked"       },
-#endif
-#ifdef CONFIG_ARCH_USES_PG_UNCACHED
-       {1UL << PG_uncached,            "uncached"      },
-#endif
-#ifdef CONFIG_MEMORY_FAILURE
-       {1UL << PG_hwpoison,            "hwpoison"      },
-#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       {1UL << PG_compound_lock,       "compound_lock" },
-#endif
-};
-
-static void dump_flags(unsigned long flags,
-                       const struct trace_print_flags *names, int count)
-{
-       const char *delim = "";
-       unsigned long mask;
-       int i;
-
-       printk(KERN_ALERT "flags: %#lx(", flags);
-
-       /* remove zone id */
-       flags &= (1UL << NR_PAGEFLAGS) - 1;
-
-       for (i = 0; i < count && flags; i++) {
-
-               mask = names[i].mask;
-               if ((flags & mask) != mask)
-                       continue;
-
-               flags &= ~mask;
-               printk("%s%s", delim, names[i].name);
-               delim = "|";
-       }
-
-       /* check for left over flags */
-       if (flags)
-               printk("%s%#lx", delim, flags);
-
-       printk(")\n");
-}
-
-void dump_page_badflags(struct page *page, const char *reason,
-               unsigned long badflags)
-{
-       printk(KERN_ALERT
-              "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
-               page, atomic_read(&page->_count), page_mapcount(page),
-               page->mapping, page->index);
-       BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS);
-       dump_flags(page->flags, pageflag_names, ARRAY_SIZE(pageflag_names));
-       if (reason)
-               pr_alert("page dumped because: %s\n", reason);
-       if (page->flags & badflags) {
-               pr_alert("bad because of flags:\n");
-               dump_flags(page->flags & badflags,
-                               pageflag_names, ARRAY_SIZE(pageflag_names));
-       }
-       mem_cgroup_print_bad_page(page);
-}
-
-void dump_page(struct page *page, const char *reason)
-{
-       dump_page_badflags(page, reason, 0);
-}
-EXPORT_SYMBOL(dump_page);
-
-#ifdef CONFIG_DEBUG_VM
-
-static const struct trace_print_flags vmaflags_names[] = {
-       {VM_READ,                       "read"          },
-       {VM_WRITE,                      "write"         },
-       {VM_EXEC,                       "exec"          },
-       {VM_SHARED,                     "shared"        },
-       {VM_MAYREAD,                    "mayread"       },
-       {VM_MAYWRITE,                   "maywrite"      },
-       {VM_MAYEXEC,                    "mayexec"       },
-       {VM_MAYSHARE,                   "mayshare"      },
-       {VM_GROWSDOWN,                  "growsdown"     },
-       {VM_PFNMAP,                     "pfnmap"        },
-       {VM_DENYWRITE,                  "denywrite"     },
-       {VM_LOCKED,                     "locked"        },
-       {VM_IO,                         "io"            },
-       {VM_SEQ_READ,                   "seqread"       },
-       {VM_RAND_READ,                  "randread"      },
-       {VM_DONTCOPY,                   "dontcopy"      },
-       {VM_DONTEXPAND,                 "dontexpand"    },
-       {VM_ACCOUNT,                    "account"       },
-       {VM_NORESERVE,                  "noreserve"     },
-       {VM_HUGETLB,                    "hugetlb"       },
-       {VM_NONLINEAR,                  "nonlinear"     },
-#if defined(CONFIG_X86)
-       {VM_PAT,                        "pat"           },
-#elif defined(CONFIG_PPC)
-       {VM_SAO,                        "sao"           },
-#elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64)
-       {VM_GROWSUP,                    "growsup"       },
-#elif !defined(CONFIG_MMU)
-       {VM_MAPPED_COPY,                "mappedcopy"    },
-#else
-       {VM_ARCH_1,                     "arch_1"        },
-#endif
-       {VM_DONTDUMP,                   "dontdump"      },
-#ifdef CONFIG_MEM_SOFT_DIRTY
-       {VM_SOFTDIRTY,                  "softdirty"     },
-#endif
-       {VM_MIXEDMAP,                   "mixedmap"      },
-       {VM_HUGEPAGE,                   "hugepage"      },
-       {VM_NOHUGEPAGE,                 "nohugepage"    },
-       {VM_MERGEABLE,                  "mergeable"     },
-};
-
-void dump_vma(const struct vm_area_struct *vma)
-{
-       printk(KERN_ALERT
-               "vma %p start %p end %p\n"
-               "next %p prev %p mm %p\n"
-               "prot %lx anon_vma %p vm_ops %p\n"
-               "pgoff %lx file %p private_data %p\n",
-               vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,
-               vma->vm_prev, vma->vm_mm,
-               (unsigned long)pgprot_val(vma->vm_page_prot),
-               vma->anon_vma, vma->vm_ops, vma->vm_pgoff,
-               vma->vm_file, vma->vm_private_data);
-       dump_flags(vma->vm_flags, vmaflags_names, ARRAY_SIZE(vmaflags_names));
-}
-EXPORT_SYMBOL(dump_vma);
-
-#endif         /* CONFIG_DEBUG_VM */