#include <linux/backing-dev.h>
#include <linux/sysctl.h>
#include <linux/sysfs.h>
-#include <linux/balloon_compaction.h>
#include <linux/page-isolation.h>
#include <linux/kasan.h>
#include <linux/kthread.h>
#include <linux/freezer.h>
+#include <linux/page_owner.h>
#include "internal.h"
#ifdef CONFIG_COMPACTION
static void map_pages(struct list_head *list)
{
- struct page *page;
+ unsigned int i, order, nr_pages;
+ struct page *page, *next;
+ LIST_HEAD(tmp_list);
+
+ list_for_each_entry_safe(page, next, list, lru) {
+ list_del(&page->lru);
- list_for_each_entry(page, list, lru) {
- arch_alloc_page(page, 0);
- kernel_map_pages(page, 1, 1);
- kasan_alloc_pages(page, 0);
+ order = page_private(page);
+ nr_pages = 1 << order;
+
+ post_alloc_hook(page, order, __GFP_MOVABLE);
+ if (order)
+ split_page(page, order);
+
+ for (i = 0; i < nr_pages; i++) {
+ list_add(&page->lru, &tmp_list);
+ page++;
+ }
}
+
+ list_splice(&tmp_list, list);
}
static inline bool migrate_async_suitable(int migratetype)
#ifdef CONFIG_COMPACTION
+int PageMovable(struct page *page)
+{
+ struct address_space *mapping;
+
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ if (!__PageMovable(page))
+ return 0;
+
+ mapping = page_mapping(page);
+ if (mapping && mapping->a_ops && mapping->a_ops->isolate_page)
+ return 1;
+
+ return 0;
+}
+EXPORT_SYMBOL(PageMovable);
+
+void __SetPageMovable(struct page *page, struct address_space *mapping)
+{
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE((unsigned long)mapping & PAGE_MAPPING_MOVABLE, page);
+ page->mapping = (void *)((unsigned long)mapping | PAGE_MAPPING_MOVABLE);
+}
+EXPORT_SYMBOL(__SetPageMovable);
+
+void __ClearPageMovable(struct page *page)
+{
+ VM_BUG_ON_PAGE(!PageLocked(page), page);
+ VM_BUG_ON_PAGE(!PageMovable(page), page);
+ /*
+ * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
+ * flag so that VM can catch up released page by driver after isolation.
+ * With it, VM migration doesn't try to put it back.
+ */
+ page->mapping = (void *)((unsigned long)page->mapping &
+ PAGE_MAPPING_MOVABLE);
+}
+EXPORT_SYMBOL(__ClearPageMovable);
+
/* Do not skip compaction more than 64 times */
#define COMPACT_MAX_DEFER_SHIFT 6
{
if (cc->mode == MIGRATE_ASYNC) {
if (!spin_trylock_irqsave(lock, *flags)) {
- cc->contended = COMPACT_CONTENDED_LOCK;
+ cc->contended = true;
return false;
}
} else {
}
if (fatal_signal_pending(current)) {
- cc->contended = COMPACT_CONTENDED_SCHED;
+ cc->contended = true;
return true;
}
if (need_resched()) {
if (cc->mode == MIGRATE_ASYNC) {
- cc->contended = COMPACT_CONTENDED_SCHED;
+ cc->contended = true;
return true;
}
cond_resched();
/* async compaction aborts if contended */
if (need_resched()) {
if (cc->mode == MIGRATE_ASYNC) {
- cc->contended = COMPACT_CONTENDED_SCHED;
+ cc->contended = true;
return true;
}
unsigned long flags = 0;
bool locked = false;
unsigned long blockpfn = *start_pfn;
+ unsigned int order;
cursor = pfn_to_page(blockpfn);
/* Isolate free pages. */
for (; blockpfn < end_pfn; blockpfn++, cursor++) {
- int isolated, i;
+ int isolated;
struct page *page = cursor;
/*
goto isolate_fail;
}
- /* Found a free page, break it into order-0 pages */
- isolated = split_free_page(page);
+ /* Found a free page, will break it into order-0 pages */
+ order = page_order(page);
+ isolated = __isolate_free_page(page, order);
if (!isolated)
break;
+ set_page_private(page, order);
total_isolated += isolated;
cc->nr_freepages += isolated;
- for (i = 0; i < isolated; i++) {
- list_add(&page->lru, freelist);
- page++;
- }
+ list_add_tail(&page->lru, freelist);
+
if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
blockpfn += isolated;
break;
*/
}
- /* split_free_page does not map the pages */
+ /* __isolate_free_page() does not map the pages */
map_pages(&freelist);
if (pfn < end_pfn) {
list_for_each_entry(page, &cc->migratepages, lru)
count[!!page_is_file_cache(page)]++;
- mod_zone_page_state(zone, NR_ISOLATED_ANON, count[0]);
- mod_zone_page_state(zone, NR_ISOLATED_FILE, count[1]);
+ mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON, count[0]);
+ mod_node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE, count[1]);
}
/* Similar to reclaim, but different enough that they don't share logic */
{
unsigned long active, inactive, isolated;
- inactive = zone_page_state(zone, NR_INACTIVE_FILE) +
- zone_page_state(zone, NR_INACTIVE_ANON);
- active = zone_page_state(zone, NR_ACTIVE_FILE) +
- zone_page_state(zone, NR_ACTIVE_ANON);
- isolated = zone_page_state(zone, NR_ISOLATED_FILE) +
- zone_page_state(zone, NR_ISOLATED_ANON);
+ inactive = node_page_state(zone->zone_pgdat, NR_INACTIVE_FILE) +
+ node_page_state(zone->zone_pgdat, NR_INACTIVE_ANON);
+ active = node_page_state(zone->zone_pgdat, NR_ACTIVE_FILE) +
+ node_page_state(zone->zone_pgdat, NR_ACTIVE_ANON);
+ isolated = node_page_state(zone->zone_pgdat, NR_ISOLATED_FILE) +
+ node_page_state(zone->zone_pgdat, NR_ISOLATED_ANON);
return isolated > (inactive + active) / 2;
}
/* Time to isolate some pages for migration */
for (; low_pfn < end_pfn; low_pfn++) {
- bool is_lru;
if (skip_on_failure && low_pfn >= next_skip_pfn) {
/*
* if contended.
*/
if (!(low_pfn % SWAP_CLUSTER_MAX)
- && compact_unlock_should_abort(&zone->lru_lock, flags,
+ && compact_unlock_should_abort(zone_lru_lock(zone), flags,
&locked, cc))
break;
continue;
}
- /*
- * Check may be lockless but that's ok as we recheck later.
- * It's possible to migrate LRU pages and balloon pages
- * Skip any other type of page
- */
- is_lru = PageLRU(page);
- if (!is_lru) {
- if (unlikely(balloon_page_movable(page))) {
- if (balloon_page_isolate(page)) {
- /* Successfully isolated */
- goto isolate_success;
- }
- }
- }
-
/*
* Regardless of being on LRU, compound pages such as THP and
* hugetlbfs are not to be compacted. We can potentially save
goto isolate_fail;
}
- if (!is_lru)
+ /*
+ * Check may be lockless but that's ok as we recheck later.
+ * It's possible to migrate LRU and non-lru movable pages.
+ * Skip any other type of page
+ */
+ if (!PageLRU(page)) {
+ /*
+ * __PageMovable can return false positive so we need
+ * to verify it under page_lock.
+ */
+ if (unlikely(__PageMovable(page)) &&
+ !PageIsolated(page)) {
+ if (locked) {
+ spin_unlock_irqrestore(zone_lru_lock(zone),
+ flags);
+ locked = false;
+ }
+
+ if (isolate_movable_page(page, isolate_mode))
+ goto isolate_success;
+ }
+
goto isolate_fail;
+ }
/*
* Migration will fail if an anonymous page is pinned in memory,
/* If we already hold the lock, we can skip some rechecking */
if (!locked) {
- locked = compact_trylock_irqsave(&zone->lru_lock,
+ locked = compact_trylock_irqsave(zone_lru_lock(zone),
&flags, cc);
if (!locked)
break;
}
}
- lruvec = mem_cgroup_page_lruvec(page, zone);
+ lruvec = mem_cgroup_page_lruvec(page, zone->zone_pgdat);
/* Try isolate the page */
if (__isolate_lru_page(page, isolate_mode) != 0)
*/
if (nr_isolated) {
if (locked) {
- spin_unlock_irqrestore(&zone->lru_lock, flags);
+ spin_unlock_irqrestore(zone_lru_lock(zone), flags);
locked = false;
}
acct_isolated(zone, cc);
low_pfn = end_pfn;
if (locked)
- spin_unlock_irqrestore(&zone->lru_lock, flags);
+ spin_unlock_irqrestore(zone_lru_lock(zone), flags);
/*
* Update the pageblock-skip information and cached scanner pfn,
}
}
- /* split_free_page does not map the pages */
+ /* __isolate_free_page() does not map the pages */
map_pages(freelist);
/*
struct page *page;
const isolate_mode_t isolate_mode =
(sysctl_compact_unevictable_allowed ? ISOLATE_UNEVICTABLE : 0) |
- (cc->mode == MIGRATE_ASYNC ? ISOLATE_ASYNC_MIGRATE : 0);
+ (cc->mode != MIGRATE_SYNC ? ISOLATE_ASYNC_MIGRATE : 0);
/*
* Start at where we last stopped, or beginning of the zone as
return COMPACT_CONTINUE;
/* Compaction run is not finished if the watermark is not met */
- watermark = low_wmark_pages(zone);
+ watermark = zone->watermark[cc->alloc_flags & ALLOC_WMARK_MASK];
if (!zone_watermark_ok(zone, cc->order, watermark, cc->classzone_idx,
cc->alloc_flags))
/* Job done if page is free of the right migratetype */
if (!list_empty(&area->free_list[migratetype]))
- return COMPACT_PARTIAL;
+ return COMPACT_SUCCESS;
#ifdef CONFIG_CMA
/* MIGRATE_MOVABLE can fallback on MIGRATE_CMA */
if (migratetype == MIGRATE_MOVABLE &&
!list_empty(&area->free_list[MIGRATE_CMA]))
- return COMPACT_PARTIAL;
+ return COMPACT_SUCCESS;
#endif
/*
* Job done if allocation would steal freepages from
*/
if (find_suitable_fallback(area, order, migratetype,
true, &can_steal) != -1)
- return COMPACT_PARTIAL;
+ return COMPACT_SUCCESS;
}
return COMPACT_NO_SUITABLE_PAGE;
* compaction_suitable: Is this suitable to run compaction on this zone now?
* Returns
* COMPACT_SKIPPED - If there are too few free pages for compaction
- * COMPACT_PARTIAL - If the allocation would succeed without compaction
+ * COMPACT_SUCCESS - If the allocation would succeed without compaction
* COMPACT_CONTINUE - If compaction should run now
*/
static enum compact_result __compaction_suitable(struct zone *zone, int order,
if (is_via_compact_memory(order))
return COMPACT_CONTINUE;
- watermark = low_wmark_pages(zone);
+ watermark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
/*
* If watermarks for high-order allocation are already met, there
* should be no need for compaction at all.
*/
if (zone_watermark_ok(zone, order, watermark, classzone_idx,
alloc_flags))
- return COMPACT_PARTIAL;
+ return COMPACT_SUCCESS;
/*
- * Watermarks for order-0 must be met for compaction. Note the 2UL.
- * This is because during migration, copies of pages need to be
- * allocated and for a short time, the footprint is higher
+ * Watermarks for order-0 must be met for compaction to be able to
+ * isolate free pages for migration targets. This means that the
+ * watermark and alloc_flags have to match, or be more pessimistic than
+ * the check in __isolate_free_page(). We don't use the direct
+ * compactor's alloc_flags, as they are not relevant for freepage
+ * isolation. We however do use the direct compactor's classzone_idx to
+ * skip over zones where lowmem reserves would prevent allocation even
+ * if compaction succeeds.
+ * For costly orders, we require low watermark instead of min for
+ * compaction to proceed to increase its chances.
+ * ALLOC_CMA is used, as pages in CMA pageblocks are considered
+ * suitable migration targets
*/
- watermark += (2UL << order);
+ watermark = (order > PAGE_ALLOC_COSTLY_ORDER) ?
+ low_wmark_pages(zone) : min_wmark_pages(zone);
+ watermark += compact_gap(order);
if (!__zone_watermark_ok(zone, 0, watermark, classzone_idx,
- alloc_flags, wmark_target))
+ ALLOC_CMA, wmark_target))
return COMPACT_SKIPPED;
/*
ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
cc->classzone_idx);
/* Compaction is likely to fail */
- if (ret == COMPACT_PARTIAL || ret == COMPACT_SKIPPED)
+ if (ret == COMPACT_SUCCESS || ret == COMPACT_SKIPPED)
return ret;
/* huh, compaction_suitable is returning something unexpected */
/*
* Setup to move all movable pages to the end of the zone. Used cached
- * information on where the scanners should start but check that it
- * is initialised by ensuring the values are within zone boundaries.
+ * information on where the scanners should start (unless we explicitly
+ * want to compact the whole zone), but check that it is initialised
+ * by ensuring the values are within zone boundaries.
*/
- cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
- cc->free_pfn = zone->compact_cached_free_pfn;
- if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
- cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
- zone->compact_cached_free_pfn = cc->free_pfn;
- }
- if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
+ if (cc->whole_zone) {
cc->migrate_pfn = start_pfn;
- zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
- zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
- }
+ cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
+ } else {
+ cc->migrate_pfn = zone->compact_cached_migrate_pfn[sync];
+ cc->free_pfn = zone->compact_cached_free_pfn;
+ if (cc->free_pfn < start_pfn || cc->free_pfn >= end_pfn) {
+ cc->free_pfn = pageblock_start_pfn(end_pfn - 1);
+ zone->compact_cached_free_pfn = cc->free_pfn;
+ }
+ if (cc->migrate_pfn < start_pfn || cc->migrate_pfn >= end_pfn) {
+ cc->migrate_pfn = start_pfn;
+ zone->compact_cached_migrate_pfn[0] = cc->migrate_pfn;
+ zone->compact_cached_migrate_pfn[1] = cc->migrate_pfn;
+ }
- if (cc->migrate_pfn == start_pfn)
- cc->whole_zone = true;
+ if (cc->migrate_pfn == start_pfn)
+ cc->whole_zone = true;
+ }
cc->last_migrated_pfn = 0;
trace_mm_compaction_end(start_pfn, cc->migrate_pfn,
cc->free_pfn, end_pfn, sync, ret);
- if (ret == COMPACT_CONTENDED)
- ret = COMPACT_PARTIAL;
-
return ret;
}
static enum compact_result compact_zone_order(struct zone *zone, int order,
- gfp_t gfp_mask, enum migrate_mode mode, int *contended,
+ gfp_t gfp_mask, enum compact_priority prio,
unsigned int alloc_flags, int classzone_idx)
{
enum compact_result ret;
.order = order,
.gfp_mask = gfp_mask,
.zone = zone,
- .mode = mode,
+ .mode = (prio == COMPACT_PRIO_ASYNC) ?
+ MIGRATE_ASYNC : MIGRATE_SYNC_LIGHT,
.alloc_flags = alloc_flags,
.classzone_idx = classzone_idx,
.direct_compaction = true,
+ .whole_zone = (prio == MIN_COMPACT_PRIORITY),
+ .ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY)
};
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
VM_BUG_ON(!list_empty(&cc.freepages));
VM_BUG_ON(!list_empty(&cc.migratepages));
- *contended = cc.contended;
return ret;
}
* @alloc_flags: The allocation flags of the current allocation
* @ac: The context of current allocation
* @mode: The migration mode for async, sync light, or sync migration
- * @contended: Return value that determines if compaction was aborted due to
- * need_resched() or lock contention
*
* This is the main entry point for direct page compaction.
*/
enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
unsigned int alloc_flags, const struct alloc_context *ac,
- enum migrate_mode mode, int *contended)
+ enum compact_priority prio)
{
int may_enter_fs = gfp_mask & __GFP_FS;
int may_perform_io = gfp_mask & __GFP_IO;
struct zoneref *z;
struct zone *zone;
enum compact_result rc = COMPACT_SKIPPED;
- int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */
-
- *contended = COMPACT_CONTENDED_NONE;
/* Check if the GFP flags allow compaction */
- if (!order || !may_enter_fs || !may_perform_io)
+ if (!may_enter_fs || !may_perform_io)
return COMPACT_SKIPPED;
- trace_mm_compaction_try_to_compact_pages(order, gfp_mask, mode);
+ trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
/* Compact each zone in the list */
for_each_zone_zonelist_nodemask(zone, z, ac->zonelist, ac->high_zoneidx,
ac->nodemask) {
enum compact_result status;
- int zone_contended;
- if (compaction_deferred(zone, order)) {
+ if (prio > MIN_COMPACT_PRIORITY
+ && compaction_deferred(zone, order)) {
rc = max_t(enum compact_result, COMPACT_DEFERRED, rc);
continue;
}
- status = compact_zone_order(zone, order, gfp_mask, mode,
- &zone_contended, alloc_flags,
- ac_classzone_idx(ac));
+ status = compact_zone_order(zone, order, gfp_mask, prio,
+ alloc_flags, ac_classzone_idx(ac));
rc = max(status, rc);
- /*
- * It takes at least one zone that wasn't lock contended
- * to clear all_zones_contended.
- */
- all_zones_contended &= zone_contended;
- /* If a normal allocation would succeed, stop compacting */
- if (zone_watermark_ok(zone, order, low_wmark_pages(zone),
- ac_classzone_idx(ac), alloc_flags)) {
+ /* The allocation should succeed, stop compacting */
+ if (status == COMPACT_SUCCESS) {
/*
* We think the allocation will succeed in this zone,
* but it is not certain, hence the false. The caller
* succeeds in this zone.
*/
compaction_defer_reset(zone, order, false);
- /*
- * It is possible that async compaction aborted due to
- * need_resched() and the watermarks were ok thanks to
- * somebody else freeing memory. The allocation can
- * however still fail so we better signal the
- * need_resched() contention anyway (this will not
- * prevent the allocation attempt).
- */
- if (zone_contended == COMPACT_CONTENDED_SCHED)
- *contended = COMPACT_CONTENDED_SCHED;
- goto break_loop;
+ break;
}
- if (mode != MIGRATE_ASYNC && (status == COMPACT_COMPLETE ||
- status == COMPACT_PARTIAL_SKIPPED)) {
+ if (prio != COMPACT_PRIO_ASYNC && (status == COMPACT_COMPLETE ||
+ status == COMPACT_PARTIAL_SKIPPED))
/*
* We think that allocation won't succeed in this zone
* so we defer compaction there. If it ends up
* succeeding after all, it will be reset.
*/
defer_compaction(zone, order);
- }
/*
* We might have stopped compacting due to need_resched() in
* async compaction, or due to a fatal signal detected. In that
- * case do not try further zones and signal need_resched()
- * contention.
+ * case do not try further zones
*/
- if ((zone_contended == COMPACT_CONTENDED_SCHED)
- || fatal_signal_pending(current)) {
- *contended = COMPACT_CONTENDED_SCHED;
- goto break_loop;
- }
-
- continue;
-break_loop:
- /*
- * We might not have tried all the zones, so be conservative
- * and assume they are not all lock contended.
- */
- all_zones_contended = 0;
- break;
+ if ((prio == COMPACT_PRIO_ASYNC && need_resched())
+ || fatal_signal_pending(current))
+ break;
}
- /*
- * If at least one zone wasn't deferred or skipped, we report if all
- * zones that were tried were lock contended.
- */
- if (rc > COMPACT_INACTIVE && all_zones_contended)
- *contended = COMPACT_CONTENDED_LOCK;
-
return rc;
}
/* Compact all zones within a node */
-static void __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
+static void compact_node(int nid)
{
+ pg_data_t *pgdat = NODE_DATA(nid);
int zoneid;
struct zone *zone;
+ struct compact_control cc = {
+ .order = -1,
+ .mode = MIGRATE_SYNC,
+ .ignore_skip_hint = true,
+ .whole_zone = true,
+ };
+
for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
if (!populated_zone(zone))
continue;
- cc->nr_freepages = 0;
- cc->nr_migratepages = 0;
- cc->zone = zone;
- INIT_LIST_HEAD(&cc->freepages);
- INIT_LIST_HEAD(&cc->migratepages);
-
- /*
- * When called via /proc/sys/vm/compact_memory
- * this makes sure we compact the whole zone regardless of
- * cached scanner positions.
- */
- if (is_via_compact_memory(cc->order))
- __reset_isolation_suitable(zone);
-
- if (is_via_compact_memory(cc->order) ||
- !compaction_deferred(zone, cc->order))
- compact_zone(zone, cc);
-
- VM_BUG_ON(!list_empty(&cc->freepages));
- VM_BUG_ON(!list_empty(&cc->migratepages));
+ cc.nr_freepages = 0;
+ cc.nr_migratepages = 0;
+ cc.zone = zone;
+ INIT_LIST_HEAD(&cc.freepages);
+ INIT_LIST_HEAD(&cc.migratepages);
- if (is_via_compact_memory(cc->order))
- continue;
+ compact_zone(zone, &cc);
- if (zone_watermark_ok(zone, cc->order,
- low_wmark_pages(zone), 0, 0))
- compaction_defer_reset(zone, cc->order, false);
+ VM_BUG_ON(!list_empty(&cc.freepages));
+ VM_BUG_ON(!list_empty(&cc.migratepages));
}
}
-void compact_pgdat(pg_data_t *pgdat, int order)
-{
- struct compact_control cc = {
- .order = order,
- .mode = MIGRATE_ASYNC,
- };
-
- if (!order)
- return;
-
- __compact_pgdat(pgdat, &cc);
-}
-
-static void compact_node(int nid)
-{
- struct compact_control cc = {
- .order = -1,
- .mode = MIGRATE_SYNC,
- .ignore_skip_hint = true,
- };
-
- __compact_pgdat(NODE_DATA(nid), &cc);
-}
-
/* Compact all nodes in the system */
static void compact_nodes(void)
{
.ignore_skip_hint = true,
};
- bool success = false;
-
trace_mm_compaction_kcompactd_wake(pgdat->node_id, cc.order,
cc.classzone_idx);
count_vm_event(KCOMPACTD_WAKE);
return;
status = compact_zone(zone, &cc);
- if (zone_watermark_ok(zone, cc.order, low_wmark_pages(zone),
- cc.classzone_idx, 0)) {
- success = true;
+ if (status == COMPACT_SUCCESS) {
compaction_defer_reset(zone, cc.order, false);
} else if (status == COMPACT_PARTIAL_SKIPPED || status == COMPACT_COMPLETE) {
/*