}
#endif
+unsigned long zone_reclaimable_pages(struct zone *zone)
+{
+ int nr;
+
+ nr = zone_page_state(zone, NR_ACTIVE_FILE) +
+ zone_page_state(zone, NR_INACTIVE_FILE);
+
+ if (get_nr_swap_pages() > 0)
+ nr += zone_page_state(zone, NR_ACTIVE_ANON) +
+ zone_page_state(zone, NR_INACTIVE_ANON);
+
+ return nr;
+}
+
+bool zone_reclaimable(struct zone *zone)
+{
+ return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+}
+
static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
{
if (!mem_cgroup_disabled())
}
/*
- * Add a shrinker callback to be called from the vm
+ * Add a shrinker callback to be called from the vm.
*/
-void register_shrinker(struct shrinker *shrinker)
+int register_shrinker(struct shrinker *shrinker)
{
- atomic_long_set(&shrinker->nr_in_batch, 0);
+ size_t size = sizeof(*shrinker->nr_deferred);
+
+ /*
+ * If we only have one possible node in the system anyway, save
+ * ourselves the trouble and disable NUMA aware behavior. This way we
+ * will save memory and some small loop time later.
+ */
+ if (nr_node_ids == 1)
+ shrinker->flags &= ~SHRINKER_NUMA_AWARE;
+
+ if (shrinker->flags & SHRINKER_NUMA_AWARE)
+ size *= nr_node_ids;
+
+ shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
+ if (!shrinker->nr_deferred)
+ return -ENOMEM;
+
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
up_write(&shrinker_rwsem);
+ return 0;
}
EXPORT_SYMBOL(register_shrinker);
}
EXPORT_SYMBOL(unregister_shrinker);
-static inline int do_shrinker_shrink(struct shrinker *shrinker,
- struct shrink_control *sc,
- unsigned long nr_to_scan)
-{
- sc->nr_to_scan = nr_to_scan;
- return (*shrinker->shrink)(shrinker, sc);
+#define SHRINK_BATCH 128
+
+static unsigned long
+shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
+ unsigned long nr_pages_scanned, unsigned long lru_pages)
+{
+ unsigned long freed = 0;
+ unsigned long long delta;
+ long total_scan;
+ long max_pass;
+ long nr;
+ long new_nr;
+ int nid = shrinkctl->nid;
+ long batch_size = shrinker->batch ? shrinker->batch
+ : SHRINK_BATCH;
+
+ max_pass = shrinker->count_objects(shrinker, shrinkctl);
+ if (max_pass == 0)
+ return 0;
+
+ /*
+ * copy the current shrinker scan count into a local variable
+ * and zero it so that other concurrent shrinker invocations
+ * don't also do this scanning work.
+ */
+ nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
+
+ total_scan = nr;
+ delta = (4 * nr_pages_scanned) / shrinker->seeks;
+ delta *= max_pass;
+ do_div(delta, lru_pages + 1);
+ total_scan += delta;
+ if (total_scan < 0) {
+ printk(KERN_ERR
+ "shrink_slab: %pF negative objects to delete nr=%ld\n",
+ shrinker->scan_objects, total_scan);
+ total_scan = max_pass;
+ }
+
+ /*
+ * We need to avoid excessive windup on filesystem shrinkers
+ * due to large numbers of GFP_NOFS allocations causing the
+ * shrinkers to return -1 all the time. This results in a large
+ * nr being built up so when a shrink that can do some work
+ * comes along it empties the entire cache due to nr >>>
+ * max_pass. This is bad for sustaining a working set in
+ * memory.
+ *
+ * Hence only allow the shrinker to scan the entire cache when
+ * a large delta change is calculated directly.
+ */
+ if (delta < max_pass / 4)
+ total_scan = min(total_scan, max_pass / 2);
+
+ /*
+ * Avoid risking looping forever due to too large nr value:
+ * never try to free more than twice the estimate number of
+ * freeable entries.
+ */
+ if (total_scan > max_pass * 2)
+ total_scan = max_pass * 2;
+
+ trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
+ nr_pages_scanned, lru_pages,
+ max_pass, delta, total_scan);
+
+ while (total_scan >= batch_size) {
+ unsigned long ret;
+
+ shrinkctl->nr_to_scan = batch_size;
+ ret = shrinker->scan_objects(shrinker, shrinkctl);
+ if (ret == SHRINK_STOP)
+ break;
+ freed += ret;
+
+ count_vm_events(SLABS_SCANNED, batch_size);
+ total_scan -= batch_size;
+
+ cond_resched();
+ }
+
+ /*
+ * move the unused scan count back into the shrinker in a
+ * manner that handles concurrent updates. If we exhausted the
+ * scan, there is no need to do an update.
+ */
+ if (total_scan > 0)
+ new_nr = atomic_long_add_return(total_scan,
+ &shrinker->nr_deferred[nid]);
+ else
+ new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
+
+ trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr);
+ return freed;
}
-#define SHRINK_BATCH 128
/*
* Call the shrink functions to age shrinkable caches
*
*
* Returns the number of slab objects which we shrunk.
*/
-unsigned long shrink_slab(struct shrink_control *shrink,
+unsigned long shrink_slab(struct shrink_control *shrinkctl,
unsigned long nr_pages_scanned,
unsigned long lru_pages)
{
struct shrinker *shrinker;
- unsigned long ret = 0;
+ unsigned long freed = 0;
if (nr_pages_scanned == 0)
nr_pages_scanned = SWAP_CLUSTER_MAX;
if (!down_read_trylock(&shrinker_rwsem)) {
- /* Assume we'll be able to shrink next time */
- ret = 1;
+ /*
+ * If we would return 0, our callers would understand that we
+ * have nothing else to shrink and give up trying. By returning
+ * 1 we keep it going and assume we'll be able to shrink next
+ * time.
+ */
+ freed = 1;
goto out;
}
list_for_each_entry(shrinker, &shrinker_list, list) {
- unsigned long long delta;
- long total_scan;
- long max_pass;
- int shrink_ret = 0;
- long nr;
- long new_nr;
- long batch_size = shrinker->batch ? shrinker->batch
- : SHRINK_BATCH;
-
- max_pass = do_shrinker_shrink(shrinker, shrink, 0);
- if (max_pass <= 0)
- continue;
-
- /*
- * copy the current shrinker scan count into a local variable
- * and zero it so that other concurrent shrinker invocations
- * don't also do this scanning work.
- */
- nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
-
- total_scan = nr;
- delta = (4 * nr_pages_scanned) / shrinker->seeks;
- delta *= max_pass;
- do_div(delta, lru_pages + 1);
- total_scan += delta;
- if (total_scan < 0) {
- printk(KERN_ERR "shrink_slab: %pF negative objects to "
- "delete nr=%ld\n",
- shrinker->shrink, total_scan);
- total_scan = max_pass;
- }
-
- /*
- * We need to avoid excessive windup on filesystem shrinkers
- * due to large numbers of GFP_NOFS allocations causing the
- * shrinkers to return -1 all the time. This results in a large
- * nr being built up so when a shrink that can do some work
- * comes along it empties the entire cache due to nr >>>
- * max_pass. This is bad for sustaining a working set in
- * memory.
- *
- * Hence only allow the shrinker to scan the entire cache when
- * a large delta change is calculated directly.
- */
- if (delta < max_pass / 4)
- total_scan = min(total_scan, max_pass / 2);
-
- /*
- * Avoid risking looping forever due to too large nr value:
- * never try to free more than twice the estimate number of
- * freeable entries.
- */
- if (total_scan > max_pass * 2)
- total_scan = max_pass * 2;
-
- trace_mm_shrink_slab_start(shrinker, shrink, nr,
- nr_pages_scanned, lru_pages,
- max_pass, delta, total_scan);
-
- while (total_scan >= batch_size) {
- int nr_before;
+ for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
+ if (!node_online(shrinkctl->nid))
+ continue;
- nr_before = do_shrinker_shrink(shrinker, shrink, 0);
- shrink_ret = do_shrinker_shrink(shrinker, shrink,
- batch_size);
- if (shrink_ret == -1)
+ if (!(shrinker->flags & SHRINKER_NUMA_AWARE) &&
+ (shrinkctl->nid != 0))
break;
- if (shrink_ret < nr_before)
- ret += nr_before - shrink_ret;
- count_vm_events(SLABS_SCANNED, batch_size);
- total_scan -= batch_size;
- cond_resched();
- }
+ freed += shrink_slab_node(shrinkctl, shrinker,
+ nr_pages_scanned, lru_pages);
- /*
- * move the unused scan count back into the shrinker in a
- * manner that handles concurrent updates. If we exhausted the
- * scan, there is no need to do an update.
- */
- if (total_scan > 0)
- new_nr = atomic_long_add_return(total_scan,
- &shrinker->nr_in_batch);
- else
- new_nr = atomic_long_read(&shrinker->nr_in_batch);
-
- trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
+ }
}
up_read(&shrinker_rwsem);
out:
cond_resched();
- return ret;
+ return freed;
}
static inline int is_page_cache_freeable(struct page *page)
*/
void putback_lru_page(struct page *page)
{
- int lru;
+ bool is_unevictable;
int was_unevictable = PageUnevictable(page);
VM_BUG_ON(PageLRU(page));
* unevictable page on [in]active list.
* We know how to handle that.
*/
- lru = page_lru_base_type(page);
+ is_unevictable = false;
lru_cache_add(page);
} else {
/*
* Put unevictable pages directly on zone's unevictable
* list.
*/
- lru = LRU_UNEVICTABLE;
+ is_unevictable = true;
add_page_to_unevictable_list(page);
/*
* When racing with an mlock or AS_UNEVICTABLE clearing
* page is on unevictable list, it never be freed. To avoid that,
* check after we added it to the list, again.
*/
- if (lru == LRU_UNEVICTABLE && page_evictable(page)) {
+ if (is_unevictable && page_evictable(page)) {
if (!isolate_lru_page(page)) {
put_page(page);
goto redo;
*/
}
- if (was_unevictable && lru != LRU_UNEVICTABLE)
+ if (was_unevictable && !is_unevictable)
count_vm_event(UNEVICTABLE_PGRESCUED);
- else if (!was_unevictable && lru == LRU_UNEVICTABLE)
+ else if (!was_unevictable && is_unevictable)
count_vm_event(UNEVICTABLE_PGCULLED);
put_page(page); /* drop ref from isolate */
* latencies, so it's better to scan a minimum amount there as
* well.
*/
- if (current_is_kswapd() && zone->all_unreclaimable)
+ if (current_is_kswapd() && !zone_reclaimable(zone))
force_scan = true;
if (!global_reclaim(sc))
force_scan = true;
if (global_reclaim(sc)) {
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue;
- if (zone->all_unreclaimable &&
- sc->priority != DEF_PRIORITY)
+ if (sc->priority != DEF_PRIORITY &&
+ !zone_reclaimable(zone))
continue; /* Let kswapd poll it */
if (IS_ENABLED(CONFIG_COMPACTION)) {
/*
return aborted_reclaim;
}
-static bool zone_reclaimable(struct zone *zone)
-{
- return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
-}
-
/* All zones in zonelist are unreclaimable? */
static bool all_unreclaimable(struct zonelist *zonelist,
struct scan_control *sc)
continue;
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue;
- if (!zone->all_unreclaimable)
+ if (zone_reclaimable(zone))
return false;
}
*/
if (global_reclaim(sc)) {
unsigned long lru_pages = 0;
+
+ nodes_clear(shrink->nodes_to_scan);
for_each_zone_zonelist(zone, z, zonelist,
gfp_zone(sc->gfp_mask)) {
if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
continue;
lru_pages += zone_reclaimable_pages(zone);
+ node_set(zone_to_nid(zone),
+ shrink->nodes_to_scan);
}
shrink_slab(shrink, sc->nr_scanned, lru_pages);
* DEF_PRIORITY. Effectively, it considers them balanced so
* they must be considered balanced here as well!
*/
- if (zone->all_unreclaimable) {
+ if (!zone_reclaimable(zone)) {
balanced_pages += zone->managed_pages;
continue;
}
unsigned long lru_pages,
unsigned long *nr_attempted)
{
- unsigned long nr_slab;
int testorder = sc->order;
unsigned long balance_gap;
struct reclaim_state *reclaim_state = current->reclaim_state;
return true;
shrink_zone(zone, sc);
+ nodes_clear(shrink.nodes_to_scan);
+ node_set(zone_to_nid(zone), shrink.nodes_to_scan);
reclaim_state->reclaimed_slab = 0;
- nr_slab = shrink_slab(&shrink, sc->nr_scanned, lru_pages);
+ shrink_slab(&shrink, sc->nr_scanned, lru_pages);
sc->nr_reclaimed += reclaim_state->reclaimed_slab;
/* Account for the number of pages attempted to reclaim */
*nr_attempted += sc->nr_to_reclaim;
- if (nr_slab == 0 && !zone_reclaimable(zone))
- zone->all_unreclaimable = 1;
-
zone_clear_flag(zone, ZONE_WRITEBACK);
/*
* BDIs but as pressure is relieved, speculatively avoid congestion
* waits.
*/
- if (!zone->all_unreclaimable &&
+ if (zone_reclaimable(zone) &&
zone_balanced(zone, testorder, 0, classzone_idx)) {
zone_clear_flag(zone, ZONE_CONGESTED);
zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY);
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable &&
- sc.priority != DEF_PRIORITY)
+ if (sc.priority != DEF_PRIORITY &&
+ !zone_reclaimable(zone))
continue;
/*
if (!populated_zone(zone))
continue;
- if (zone->all_unreclaimable &&
- sc.priority != DEF_PRIORITY)
+ if (sc.priority != DEF_PRIORITY &&
+ !zone_reclaimable(zone))
continue;
sc.nr_scanned = 0;
}
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
- if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
+ if (zone_balanced(zone, order, 0, 0))
return;
trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
return nr;
}
-unsigned long zone_reclaimable_pages(struct zone *zone)
-{
- int nr;
-
- nr = zone_page_state(zone, NR_ACTIVE_FILE) +
- zone_page_state(zone, NR_INACTIVE_FILE);
-
- if (get_nr_swap_pages() > 0)
- nr += zone_page_state(zone, NR_ACTIVE_ANON) +
- zone_page_state(zone, NR_INACTIVE_ANON);
-
- return nr;
-}
-
#ifdef CONFIG_HIBERNATION
/*
* Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
* number of slab pages and shake the slab until it is reduced
* by the same nr_pages that we used for reclaiming unmapped
* pages.
- *
- * Note that shrink_slab will free memory on all zones and may
- * take a long time.
*/
+ nodes_clear(shrink.nodes_to_scan);
+ node_set(zone_to_nid(zone), shrink.nodes_to_scan);
for (;;) {
unsigned long lru_pages = zone_reclaimable_pages(zone);
zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
return ZONE_RECLAIM_FULL;
- if (zone->all_unreclaimable)
+ if (!zone_reclaimable(zone))
return ZONE_RECLAIM_FULL;
/*