X-Git-Url: http://git.cascardo.info/?a=blobdiff_plain;f=mm%2Fcompaction.c;h=0409a4ad6ea1363611d49269ecbb5ef88afe5c87;hb=68ba0326b4e14988f9e0c24a6e12a85cf2acd1ca;hp=cd93ea24c565c17206ff56817a25b6929a94d632;hpb=bca6759258dbef378bcf5b872177bcd2259ceb68;p=cascardo%2Flinux.git diff --git a/mm/compaction.c b/mm/compaction.c index cd93ea24c565..0409a4ad6ea1 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -331,7 +331,7 @@ static bool compact_trylock_irqsave(spinlock_t *lock, unsigned long *flags, { if (cc->mode == MIGRATE_ASYNC) { if (!spin_trylock_irqsave(lock, *flags)) { - cc->contended = COMPACT_CONTENDED_LOCK; + cc->contended = true; return false; } } else { @@ -365,13 +365,13 @@ static bool compact_unlock_should_abort(spinlock_t *lock, } if (fatal_signal_pending(current)) { - cc->contended = COMPACT_CONTENDED_SCHED; + cc->contended = true; return true; } if (need_resched()) { if (cc->mode == MIGRATE_ASYNC) { - cc->contended = COMPACT_CONTENDED_SCHED; + cc->contended = true; return true; } cond_resched(); @@ -394,7 +394,7 @@ static inline bool compact_should_abort(struct compact_control *cc) /* async compaction aborts if contended */ if (need_resched()) { if (cc->mode == MIGRATE_ASYNC) { - cc->contended = COMPACT_CONTENDED_SCHED; + cc->contended = true; return true; } @@ -997,8 +997,12 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn, #ifdef CONFIG_COMPACTION /* Returns true if the page is within a block suitable for migration to */ -static bool suitable_migration_target(struct page *page) +static bool suitable_migration_target(struct compact_control *cc, + struct page *page) { + if (cc->ignore_block_suitable) + return true; + /* If the page is a large free page, then disallow migration */ if (PageBuddy(page)) { /* @@ -1083,7 +1087,7 @@ static void isolate_freepages(struct compact_control *cc) continue; /* Check the block is suitable for migration */ - if (!suitable_migration_target(page)) + if (!suitable_migration_target(cc, page)) continue; /* If isolation recently failed, do not retry */ @@ -1200,7 +1204,7 @@ static isolate_migrate_t isolate_migratepages(struct zone *zone, struct page *page; const isolate_mode_t isolate_mode = (sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) | - (cc->mode == MIGRATE_ASYNC ? ISOLATE_ASYNC_MIGRATE : 0); + (cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0); /* * Start at where we last stopped, or beginning of the zone as @@ -1316,7 +1320,7 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_ return COMPACT_CONTINUE; /* Compaction run is not finished if the watermark is not met */ - watermark = low_wmark_pages(zone); + watermark = zone->watermark[cc->alloc_flags & ALLOC_WMARK_MASK]; if (!zone_watermark_ok(zone, cc->order, watermark, cc->classzone_idx, cc->alloc_flags)) @@ -1329,13 +1333,13 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_ /* Job done if page is free of the right migratetype */ if (!list_empty(&area->free_list[migratetype])) - return COMPACT_PARTIAL; + return COMPACT_SUCCESS; #ifdef CONFIG_CMA /* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */ if (migratetype == MIGRATE_MOVABLE && !list_empty(&area->free_list[MIGRATE_CMA])) - return COMPACT_PARTIAL; + return COMPACT_SUCCESS; #endif /* * Job done if allocation would steal freepages from @@ -1343,7 +1347,7 @@ static enum compact_result __compact_finished(struct zone *zone, struct compact_ */ if (find_suitable_fallback(area, order, migratetype, true, &can_steal) != -1) - return COMPACT_PARTIAL; + return COMPACT_SUCCESS; } return COMPACT_NO_SUITABLE_PAGE; @@ -1367,7 +1371,7 @@ static enum compact_result compact_finished(struct zone *zone, * compaction_suitable: Is this suitable to run compaction on this zone now? * Returns * COMPACT_SKIPPED - If there are too few free pages for compaction - * COMPACT_PARTIAL - If the allocation would succeed without compaction + * COMPACT_SUCCESS - If the allocation would succeed without compaction * COMPACT_CONTINUE - If compaction should run now */ static enum compact_result __compaction_suitable(struct zone *zone, int order, @@ -1375,46 +1379,41 @@ static enum compact_result __compaction_suitable(struct zone *zone, int order, int classzone_idx, unsigned long wmark_target) { - int fragindex; unsigned long watermark; if (is_via_compact_memory(order)) return COMPACT_CONTINUE; - watermark = low_wmark_pages(zone); + watermark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK]; /* * If watermarks for high-order allocation are already met, there * should be no need for compaction at all. */ if (zone_watermark_ok(zone, order, watermark, classzone_idx, alloc_flags)) - return COMPACT_PARTIAL; + return COMPACT_SUCCESS; /* - * Watermarks for order-0 must be met for compaction. Note the 2UL. - * This is because during migration, copies of pages need to be - * allocated and for a short time, the footprint is higher + * Watermarks for order-0 must be met for compaction to be able to + * isolate free pages for migration targets. This means that the + * watermark and alloc_flags have to match, or be more pessimistic than + * the check in __isolate_free_page(). We don't use the direct + * compactor's alloc_flags, as they are not relevant for freepage + * isolation. We however do use the direct compactor's classzone_idx to + * skip over zones where lowmem reserves would prevent allocation even + * if compaction succeeds. + * For costly orders, we require low watermark instead of min for + * compaction to proceed to increase its chances. + * ALLOC_CMA is used, as pages in CMA pageblocks are considered + * suitable migration targets */ - watermark += (2UL << order); + watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ? + low_wmark_pages(zone) : min_wmark_pages(zone); + watermark += compact_gap(order); if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx, - alloc_flags, wmark_target)) + ALLOC_CMA, wmark_target)) return COMPACT_SKIPPED; - /* - * fragmentation index determines if allocation failures are due to - * low memory or external fragmentation - * - * index of -1000 would imply allocations might succeed depending on - * watermarks, but we already failed the high-order watermark check - * index towards 0 implies failure is due to lack of memory - * index towards 1000 implies failure is due to fragmentation - * - * Only compact if a failure would be due to fragmentation. - */ - fragindex = fragmentation_index(zone, order); - if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) - return COMPACT_NOT_SUITABLE_ZONE; - return COMPACT_CONTINUE; } @@ -1423,9 +1422,32 @@ enum compact_result compaction_suitable(struct zone *zone, int order, int classzone_idx) { enum compact_result ret; + int fragindex; ret = __compaction_suitable(zone, order, alloc_flags, classzone_idx, zone_page_state(zone, NR_FREE_PAGES)); + /* + * fragmentation index determines if allocation failures are due to + * low memory or external fragmentation + * + * index of -1000 would imply allocations might succeed depending on + * watermarks, but we already failed the high-order watermark check + * index towards 0 implies failure is due to lack of memory + * index towards 1000 implies failure is due to fragmentation + * + * Only compact if a failure would be due to fragmentation. Also + * ignore fragindex for non-costly orders where the alternative to + * a successful reclaim/compaction is OOM. Fragindex and the + * vm.extfrag_threshold sysctl is meant as a heuristic to prevent + * excessive compaction for costly orders, but it should not be at the + * expense of system stability. + */ + if (ret == COMPACT_CONTINUE && (order > PAGE_ALLOC_COSTLY_ORDER)) { + fragindex = fragmentation_index(zone, order); + if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold) + ret = COMPACT_NOT_SUITABLE_ZONE; + } + trace_mm_compaction_suitable(zone, order, ret); if (ret == COMPACT_NOT_SUITABLE_ZONE) ret = COMPACT_SKIPPED; @@ -1438,11 +1460,6 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, { struct zone *zone; struct zoneref *z; - pg_data_t *last_pgdat = NULL; - - /* Do not retry compaction for zone-constrained allocations */ - if (ac->high_zoneidx < ZONE_NORMAL) - return false; /* * Make sure at least one zone would pass __compaction_suitable if we continue @@ -1453,31 +1470,17 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order, unsigned long available; enum compact_result compact_result; - if (last_pgdat == zone->zone_pgdat) - continue; - - /* - * This over-estimates the number of pages available for - * reclaim/compaction but walking the LRU would take too - * long. The consequences are that compaction may retry - * longer than it should for a zone-constrained allocation - * request. - */ - last_pgdat = zone->zone_pgdat; - available = pgdat_reclaimable_pages(zone->zone_pgdat) / order; - /* * Do not consider all the reclaimable memory because we do not * want to trash just for a single high order allocation which * is even not guaranteed to appear even if __compaction_suitable * is happy about the watermark check. */ + available = zone_reclaimable_pages(zone) / order; available += zone_page_state_snapshot(zone, NR_FREE_PAGES); - available = min(zone->managed_pages, available); compact_result = __compaction_suitable(zone, order, alloc_flags, ac_classzone_idx(ac), available); - if (compact_result != COMPACT_SKIPPED && - compact_result != COMPACT_NOT_SUITABLE_ZONE) + if (compact_result != COMPACT_SKIPPED) return true; } @@ -1495,7 +1498,7 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro ret = compaction_suitable(zone, cc->order, cc->alloc_flags, cc->classzone_idx); /* Compaction is likely to fail */ - if (ret == COMPACT_PARTIAL || ret == COMPACT_SKIPPED) + if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED) return ret; /* huh, compaction_suitable is returning something unexpected */ @@ -1510,23 +1513,29 @@ static enum compact_result compact_zone(struct zone *zone, struct compact_contro /* * Setup to move all movable pages to the end of the zone. Used cached - * information on where the scanners should start but check that it - * is initialised by ensuring the values are within zone boundaries. + * information on where the scanners should start (unless we explicitly + * want to compact the whole zone), but check that it is initialised + * by ensuring the values are within zone boundaries. */ - cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync]; - cc->free_pfn = zone->compact_cached_free_pfn; - if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) { - cc->free_pfn = pageblock_start_pfn(end_pfn - 1); - zone->compact_cached_free_pfn = cc->free_pfn; - } - if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) { + if (cc->whole_zone) { cc->migrate_pfn = start_pfn; - zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; - zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; - } + cc->free_pfn = pageblock_start_pfn(end_pfn - 1); + } else { + cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync]; + cc->free_pfn = zone->compact_cached_free_pfn; + if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) { + cc->free_pfn = pageblock_start_pfn(end_pfn - 1); + zone->compact_cached_free_pfn = cc->free_pfn; + } + if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) { + cc->migrate_pfn = start_pfn; + zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn; + zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn; + } - if (cc->migrate_pfn == start_pfn) - cc->whole_zone = true; + if (cc->migrate_pfn == start_pfn) + cc->whole_zone = true; + } cc->last_migrated_pfn = 0; @@ -1637,14 +1646,11 @@ out: trace_mm_compaction_end(start_pfn, cc->migrate_pfn, cc->free_pfn, end_pfn, sync, ret); - if (ret == COMPACT_CONTENDED) - ret = COMPACT_PARTIAL; - return ret; } static enum compact_result compact_zone_order(struct zone *zone, int order, - gfp_t gfp_mask, enum migrate_mode mode, int *contended, + gfp_t gfp_mask, enum compact_priority prio, unsigned int alloc_flags, int classzone_idx) { enum compact_result ret; @@ -1654,10 +1660,14 @@ static enum compact_result compact_zone_order(struct zone *zone, int order, .order = order, .gfp_mask = gfp_mask, .zone = zone, - .mode = mode, + .mode = (prio == COMPACT_PRIO_ASYNC) ? + MIGRATE_ASYNC : MIGRATE_SYNC_LIGHT, .alloc_flags = alloc_flags, .classzone_idx = classzone_idx, .direct_compaction = true, + .whole_zone = (prio == MIN_COMPACT_PRIORITY), + .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY), + .ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY) }; INIT_LIST_HEAD(&cc.freepages); INIT_LIST_HEAD(&cc.migratepages); @@ -1667,7 +1677,6 @@ static enum compact_result compact_zone_order(struct zone *zone, int order, VM_BUG_ON(!list_empty(&cc.freepages)); VM_BUG_ON(!list_empty(&cc.migratepages)); - *contended = cc.contended; return ret; } @@ -1680,54 +1689,42 @@ int sysctl_extfrag_threshold = 500; * @alloc_flags: The allocation flags of the current allocation * @ac: The context of current allocation * @mode: The migration mode for async, sync light, or sync migration - * @contended: Return value that determines if compaction was aborted due to - * need_resched() or lock contention * * This is the main entry point for direct page compaction. */ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, - enum migrate_mode mode, int *contended) + enum compact_priority prio) { int may_enter_fs = gfp_mask & __GFP_FS; int may_perform_io = gfp_mask & __GFP_IO; struct zoneref *z; struct zone *zone; enum compact_result rc = COMPACT_SKIPPED; - int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */ - - *contended = COMPACT_CONTENDED_NONE; /* Check if the GFP flags allow compaction */ if (!may_enter_fs || !may_perform_io) return COMPACT_SKIPPED; - trace_mm_compaction_try_to_compact_pages(order, gfp_mask, mode); + trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio); /* Compact each zone in the list */ for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx, ac->nodemask) { enum compact_result status; - int zone_contended; - if (compaction_deferred(zone, order)) { + if (prio > MIN_COMPACT_PRIORITY + && compaction_deferred(zone, order)) { rc = max_t(enum compact_result, COMPACT_DEFERRED, rc); continue; } - status = compact_zone_order(zone, order, gfp_mask, mode, - &zone_contended, alloc_flags, - ac_classzone_idx(ac)); + status = compact_zone_order(zone, order, gfp_mask, prio, + alloc_flags, ac_classzone_idx(ac)); rc = max(status, rc); - /* - * It takes at least one zone that wasn't lock contended - * to clear all_zones_contended. - */ - all_zones_contended &= zone_contended; - /* If a normal allocation would succeed, stop compacting */ - if (zone_watermark_ok(zone, order, low_wmark_pages(zone), - ac_classzone_idx(ac), alloc_flags)) { + /* The allocation should succeed, stop compacting */ + if (status == COMPACT_SUCCESS) { /* * We think the allocation will succeed in this zone, * but it is not certain, hence the false. The caller @@ -1735,68 +1732,46 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, * succeeds in this zone. */ compaction_defer_reset(zone, order, false); - /* - * It is possible that async compaction aborted due to - * need_resched() and the watermarks were ok thanks to - * somebody else freeing memory. The allocation can - * however still fail so we better signal the - * need_resched() contention anyway (this will not - * prevent the allocation attempt). - */ - if (zone_contended == COMPACT_CONTENDED_SCHED) - *contended = COMPACT_CONTENDED_SCHED; - goto break_loop; + break; } - if (mode != MIGRATE_ASYNC && (status == COMPACT_COMPLETE || - status == COMPACT_PARTIAL_SKIPPED)) { + if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE || + status == COMPACT_PARTIAL_SKIPPED)) /* * We think that allocation won't succeed in this zone * so we defer compaction there. If it ends up * succeeding after all, it will be reset. */ defer_compaction(zone, order); - } /* * We might have stopped compacting due to need_resched() in * async compaction, or due to a fatal signal detected. In that - * case do not try further zones and signal need_resched() - * contention. + * case do not try further zones */ - if ((zone_contended == COMPACT_CONTENDED_SCHED) - || fatal_signal_pending(current)) { - *contended = COMPACT_CONTENDED_SCHED; - goto break_loop; - } - - continue; -break_loop: - /* - * We might not have tried all the zones, so be conservative - * and assume they are not all lock contended. - */ - all_zones_contended = 0; - break; + if ((prio == COMPACT_PRIO_ASYNC && need_resched()) + || fatal_signal_pending(current)) + break; } - /* - * If at least one zone wasn't deferred or skipped, we report if all - * zones that were tried were lock contended. - */ - if (rc > COMPACT_INACTIVE && all_zones_contended) - *contended = COMPACT_CONTENDED_LOCK; - return rc; } /* Compact all zones within a node */ -static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) +static void compact_node(int nid) { + pg_data_t *pgdat = NODE_DATA(nid); int zoneid; struct zone *zone; + struct compact_control cc = { + .order = -1, + .mode = MIGRATE_SYNC, + .ignore_skip_hint = true, + .whole_zone = true, + }; + for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) { @@ -1804,60 +1779,19 @@ static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc) if (!populated_zone(zone)) continue; - cc->nr_freepages = 0; - cc->nr_migratepages = 0; - cc->zone = zone; - INIT_LIST_HEAD(&cc->freepages); - INIT_LIST_HEAD(&cc->migratepages); - - /* - * When called via /proc/sys/vm/compact_memory - * this makes sure we compact the whole zone regardless of - * cached scanner positions. - */ - if (is_via_compact_memory(cc->order)) - __reset_isolation_suitable(zone); - - if (is_via_compact_memory(cc->order) || - !compaction_deferred(zone, cc->order)) - compact_zone(zone, cc); - - VM_BUG_ON(!list_empty(&cc->freepages)); - VM_BUG_ON(!list_empty(&cc->migratepages)); + cc.nr_freepages = 0; + cc.nr_migratepages = 0; + cc.zone = zone; + INIT_LIST_HEAD(&cc.freepages); + INIT_LIST_HEAD(&cc.migratepages); - if (is_via_compact_memory(cc->order)) - continue; + compact_zone(zone, &cc); - if (zone_watermark_ok(zone, cc->order, - low_wmark_pages(zone), 0, 0)) - compaction_defer_reset(zone, cc->order, false); + VM_BUG_ON(!list_empty(&cc.freepages)); + VM_BUG_ON(!list_empty(&cc.migratepages)); } } -void compact_pgdat(pg_data_t *pgdat, int order) -{ - struct compact_control cc = { - .order = order, - .mode = MIGRATE_ASYNC, - }; - - if (!order) - return; - - __compact_pgdat(pgdat, &cc); -} - -static void compact_node(int nid) -{ - struct compact_control cc = { - .order = -1, - .mode = MIGRATE_SYNC, - .ignore_skip_hint = true, - }; - - __compact_pgdat(NODE_DATA(nid), &cc); -} - /* Compact all nodes in the system */ static void compact_nodes(void) { @@ -1963,8 +1897,6 @@ static void kcompactd_do_work(pg_data_t *pgdat) .ignore_skip_hint = true, }; - bool success = false; - trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order, cc.classzone_idx); count_vm_event(KCOMPACTD_WAKE); @@ -1993,9 +1925,7 @@ static void kcompactd_do_work(pg_data_t *pgdat) return; status = compact_zone(zone, &cc); - if (zone_watermark_ok(zone, cc.order, low_wmark_pages(zone), - cc.classzone_idx, 0)) { - success = true; + if (status == COMPACT_SUCCESS) { compaction_defer_reset(zone, cc.order, false); } else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) { /*