#include <linux/kmemleak.h>
#include <linux/compaction.h>
#include <trace/events/kmem.h>
-#include <linux/ftrace_event.h>
-#include <linux/memcontrol.h>
#include <linux/prefetch.h>
#include <linux/mm_inline.h>
#include <linux/migrate.h>
* Remove at a later date when no bug reports exist related to
* grouping pages by mobility
*/
- BUG_ON(page_zone(start_page) != page_zone(end_page));
+ VM_BUG_ON(page_zone(start_page) != page_zone(end_page));
#endif
for (page = start_page; page <= end_page;) {
__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 &&
- !zone_is_fair_depleted(zone))
- zone_set_flag(zone, ZONE_FAIR_DEPLETED);
+ !test_bit(ZONE_FAIR_DEPLETED, &zone->flags))
+ set_bit(ZONE_FAIR_DEPLETED, &zone->flags);
__count_zone_vm_events(PGALLOC, zone, 1 << order);
zone_statistics(preferred_zone, zone, gfp_flags);
mod_zone_page_state(zone, NR_ALLOC_BATCH,
high_wmark_pages(zone) - low_wmark_pages(zone) -
atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
- zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
+ clear_bit(ZONE_FAIR_DEPLETED, &zone->flags);
} while (zone++ != preferred_zone);
}
if (alloc_flags & ALLOC_FAIR) {
if (!zone_local(preferred_zone, zone))
break;
- if (zone_is_fair_depleted(zone)) {
+ if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) {
nr_fair_skipped++;
continue;
}
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
int classzone_idx, int migratetype, enum migrate_mode mode,
- bool *contended_compaction, bool *deferred_compaction)
+ int *contended_compaction, bool *deferred_compaction)
{
struct zone *last_compact_zone = NULL;
unsigned long compact_result;
struct zonelist *zonelist, enum zone_type high_zoneidx,
nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
int classzone_idx, int migratetype, enum migrate_mode mode,
- bool *contended_compaction, bool *deferred_compaction)
+ int *contended_compaction, bool *deferred_compaction)
{
return NULL;
}
static void wake_all_kswapds(unsigned int order,
struct zonelist *zonelist,
enum zone_type high_zoneidx,
- struct zone *preferred_zone)
+ struct zone *preferred_zone,
+ nodemask_t *nodemask)
{
struct zoneref *z;
struct zone *zone;
- for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
+ for_each_zone_zonelist_nodemask(zone, z, zonelist,
+ high_zoneidx, nodemask)
wakeup_kswapd(zone, order, zone_idx(preferred_zone));
}
alloc_flags |= ALLOC_NO_WATERMARKS;
}
#ifdef CONFIG_CMA
- if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+ if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
#endif
return alloc_flags;
unsigned long did_some_progress;
enum migrate_mode migration_mode = MIGRATE_ASYNC;
bool deferred_compaction = false;
- bool contended_compaction = false;
+ int contended_compaction = COMPACT_CONTENDED_NONE;
/*
* In the slowpath, we sanity check order to avoid ever trying to
restart:
if (!(gfp_mask & __GFP_NO_KSWAPD))
- wake_all_kswapds(order, zonelist, high_zoneidx, preferred_zone);
+ wake_all_kswapds(order, zonelist, high_zoneidx,
+ preferred_zone, nodemask);
/*
* OK, we're below the kswapd watermark and have kicked background
if (page)
goto got_pg;
- /*
- * If compaction is deferred for high-order allocations, it is because
- * sync compaction recently failed. In this is the case and the caller
- * requested a movable allocation that does not heavily disrupt the
- * system then fail the allocation instead of entering direct reclaim.
- */
- if ((deferred_compaction || contended_compaction) &&
- (gfp_mask & __GFP_NO_KSWAPD))
- goto nopage;
+ /* Checks for THP-specific high-order allocations */
+ if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) {
+ /*
+ * If compaction is deferred for high-order allocations, it is
+ * because sync compaction recently failed. If this is the case
+ * and the caller requested a THP allocation, we do not want
+ * to heavily disrupt the system, so we fail the allocation
+ * instead of entering direct reclaim.
+ */
+ if (deferred_compaction)
+ goto nopage;
+
+ /*
+ * In all zones where compaction was attempted (and not
+ * deferred or skipped), lock contention has been detected.
+ * For THP allocation we do not want to disrupt the others
+ * so we fallback to base pages instead.
+ */
+ if (contended_compaction == COMPACT_CONTENDED_LOCK)
+ goto nopage;
+
+ /*
+ * If compaction was aborted due to need_resched(), we do not
+ * want to further increase allocation latency, unless it is
+ * khugepaged trying to collapse.
+ */
+ if (contended_compaction == COMPACT_CONTENDED_SCHED
+ && !(current->flags & PF_KTHREAD))
+ goto nopage;
+ }
/*
* It can become very expensive to allocate transparent hugepages at
struct zone *preferred_zone;
struct zoneref *preferred_zoneref;
struct page *page = NULL;
- int migratetype = allocflags_to_migratetype(gfp_mask);
+ int migratetype = gfpflags_to_migratetype(gfp_mask);
unsigned int cpuset_mems_cookie;
int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
int classzone_idx;
zonelist->_zonerefs[pos].zone_idx = 0;
}
+#if defined(CONFIG_64BIT)
+/*
+ * Devices that require DMA32/DMA are relatively rare and do not justify a
+ * penalty to every machine in case the specialised case applies. Default
+ * to Node-ordering on 64-bit NUMA machines
+ */
+static int default_zonelist_order(void)
+{
+ return ZONELIST_ORDER_NODE;
+}
+#else
+/*
+ * On 32-bit, the Normal zone needs to be preserved for allocations accessible
+ * by the kernel. If processes running on node 0 deplete the low memory zone
+ * then reclaim will occur more frequency increasing stalls and potentially
+ * be easier to OOM if a large percentage of the zone is under writeback or
+ * dirty. The problem is significantly worse if CONFIG_HIGHPTE is not set.
+ * Hence, default to zone ordering on 32-bit.
+ */
static int default_zonelist_order(void)
{
- int nid, zone_type;
- unsigned long low_kmem_size, total_size;
- struct zone *z;
- int average_size;
- /*
- * ZONE_DMA and ZONE_DMA32 can be very small area in the system.
- * If they are really small and used heavily, the system can fall
- * into OOM very easily.
- * This function detect ZONE_DMA/DMA32 size and configures zone order.
- */
- /* Is there ZONE_NORMAL ? (ex. ppc has only DMA zone..) */
- low_kmem_size = 0;
- total_size = 0;
- for_each_online_node(nid) {
- for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
- z = &NODE_DATA(nid)->node_zones[zone_type];
- if (populated_zone(z)) {
- if (zone_type < ZONE_NORMAL)
- low_kmem_size += z->managed_pages;
- total_size += z->managed_pages;
- } else if (zone_type == ZONE_NORMAL) {
- /*
- * If any node has only lowmem, then node order
- * is preferred to allow kernel allocations
- * locally; otherwise, they can easily infringe
- * on other nodes when there is an abundance of
- * lowmem available to allocate from.
- */
- return ZONELIST_ORDER_NODE;
- }
- }
- }
- if (!low_kmem_size || /* there are no DMA area. */
- low_kmem_size > total_size/2) /* DMA/DMA32 is big. */
- return ZONELIST_ORDER_NODE;
- /*
- * look into each node's config.
- * If there is a node whose DMA/DMA32 memory is very big area on
- * local memory, NODE_ORDER may be suitable.
- */
- average_size = total_size /
- (nodes_weight(node_states[N_MEMORY]) + 1);
- for_each_online_node(nid) {
- low_kmem_size = 0;
- total_size = 0;
- for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) {
- z = &NODE_DATA(nid)->node_zones[zone_type];
- if (populated_zone(z)) {
- if (zone_type < ZONE_NORMAL)
- low_kmem_size += z->present_pages;
- total_size += z->present_pages;
- }
- }
- if (low_kmem_size &&
- total_size > average_size && /* ignore small node */
- low_kmem_size > total_size * 70/100)
- return ZONELIST_ORDER_NODE;
- }
return ZONELIST_ORDER_ZONE;
}
+#endif /* CONFIG_64BIT */
static void set_zonelist_order(void)
{
pgdat->node_start_pfn = node_start_pfn;
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
+ printk(KERN_INFO "Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid,
+ (u64) start_pfn << PAGE_SHIFT, (u64) (end_pfn << PAGE_SHIFT) - 1);
#endif
calculate_node_totalpages(pgdat, start_pfn, end_pfn,
zones_size, zholes_size);
if (list_empty(&cc->migratepages)) {
cc->nr_migratepages = 0;
- pfn = isolate_migratepages_range(cc->zone, cc,
- pfn, end, true);
+ pfn = isolate_migratepages_range(cc, pfn, end);
if (!pfn) {
ret = -EINTR;
break;
return order < MAX_ORDER;
}
#endif
-
-static const struct trace_print_flags pageflag_names[] = {
- {1UL << PG_locked, "locked" },
- {1UL << PG_error, "error" },
- {1UL << PG_referenced, "referenced" },
- {1UL << PG_uptodate, "uptodate" },
- {1UL << PG_dirty, "dirty" },
- {1UL << PG_lru, "lru" },
- {1UL << PG_active, "active" },
- {1UL << PG_slab, "slab" },
- {1UL << PG_owner_priv_1, "owner_priv_1" },
- {1UL << PG_arch_1, "arch_1" },
- {1UL << PG_reserved, "reserved" },
- {1UL << PG_private, "private" },
- {1UL << PG_private_2, "private_2" },
- {1UL << PG_writeback, "writeback" },
-#ifdef CONFIG_PAGEFLAGS_EXTENDED
- {1UL << PG_head, "head" },
- {1UL << PG_tail, "tail" },
-#else
- {1UL << PG_compound, "compound" },
-#endif
- {1UL << PG_swapcache, "swapcache" },
- {1UL << PG_mappedtodisk, "mappedtodisk" },
- {1UL << PG_reclaim, "reclaim" },
- {1UL << PG_swapbacked, "swapbacked" },
- {1UL << PG_unevictable, "unevictable" },
-#ifdef CONFIG_MMU
- {1UL << PG_mlocked, "mlocked" },
-#endif
-#ifdef CONFIG_ARCH_USES_PG_UNCACHED
- {1UL << PG_uncached, "uncached" },
-#endif
-#ifdef CONFIG_MEMORY_FAILURE
- {1UL << PG_hwpoison, "hwpoison" },
-#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- {1UL << PG_compound_lock, "compound_lock" },
-#endif
-};
-
-static void dump_page_flags(unsigned long flags)
-{
- const char *delim = "";
- unsigned long mask;
- int i;
-
- BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS);
-
- printk(KERN_ALERT "page flags: %#lx(", flags);
-
- /* remove zone id */
- flags &= (1UL << NR_PAGEFLAGS) - 1;
-
- for (i = 0; i < ARRAY_SIZE(pageflag_names) && flags; i++) {
-
- mask = pageflag_names[i].mask;
- if ((flags & mask) != mask)
- continue;
-
- flags &= ~mask;
- printk("%s%s", delim, pageflag_names[i].name);
- delim = "|";
- }
-
- /* check for left over flags */
- if (flags)
- printk("%s%#lx", delim, flags);
-
- printk(")\n");
-}
-
-void dump_page_badflags(struct page *page, const char *reason,
- unsigned long badflags)
-{
- printk(KERN_ALERT
- "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
- page, atomic_read(&page->_count), page_mapcount(page),
- page->mapping, page->index);
- dump_page_flags(page->flags);
- if (reason)
- pr_alert("page dumped because: %s\n", reason);
- if (page->flags & badflags) {
- pr_alert("bad because of flags:\n");
- dump_page_flags(page->flags & badflags);
- }
- mem_cgroup_print_bad_page(page);
-}
-
-void dump_page(struct page *page, const char *reason)
-{
- dump_page_badflags(page, reason, 0);
-}
-EXPORT_SYMBOL(dump_page);