X-Git-Url: http://git.cascardo.info/?a=blobdiff_plain;f=mm%2Fvmscan.c;h=5d4c4d02254dc977d84951845dada98a80bd0627;hb=f0ee3404cce2c45f8b95b341dd6311cd92e5cee0;hp=72babac71deaba28f9c75a1b6d2ccacb8ba537fa;hpb=bd1e22b8e0a90f9a91e4c27db14ca15773659bf7;p=cascardo%2Flinux.git diff --git a/mm/vmscan.c b/mm/vmscan.c index 72babac71dea..5d4c4d02254d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include @@ -46,8 +47,6 @@ struct scan_control { /* Incremented by the number of inactive pages that were scanned */ unsigned long nr_scanned; - unsigned long nr_mapped; /* From page_state */ - /* This context's GFP mask */ gfp_t gfp_mask; @@ -216,7 +215,7 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask, break; if (shrink_ret < nr_before) ret += nr_before - shrink_ret; - mod_page_state(slabs_scanned, this_scan); + count_vm_events(SLABS_SCANNED, this_scan); total_scan -= this_scan; cond_resched(); @@ -570,7 +569,7 @@ keep: list_splice(&ret_pages, page_list); if (pagevec_count(&freed_pvec)) __pagevec_release_nonlru(&freed_pvec); - mod_page_state(pgactivate, pgactivate); + count_vm_events(PGACTIVATE, pgactivate); return nr_reclaimed; } @@ -660,11 +659,11 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, nr_reclaimed += nr_freed; local_irq_disable(); if (current_is_kswapd()) { - __mod_page_state_zone(zone, pgscan_kswapd, nr_scan); - __mod_page_state(kswapd_steal, nr_freed); + __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan); + __count_vm_events(KSWAPD_STEAL, nr_freed); } else - __mod_page_state_zone(zone, pgscan_direct, nr_scan); - __mod_page_state_zone(zone, pgsteal, nr_freed); + __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan); + __count_vm_events(PGACTIVATE, nr_freed); if (nr_taken == 0) goto done; @@ -743,7 +742,9 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, * how much memory * is mapped. */ - mapped_ratio = (sc->nr_mapped * 100) / vm_total_pages; + mapped_ratio = ((global_page_state(NR_FILE_MAPPED) + + global_page_state(NR_ANON_PAGES)) * 100) / + vm_total_pages; /* * Now decide how much we really want to unmap some pages. The @@ -840,11 +841,10 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, } } zone->nr_active += pgmoved; - spin_unlock(&zone->lru_lock); - __mod_page_state_zone(zone, pgrefill, pgscanned); - __mod_page_state(pgdeactivate, pgdeactivate); - local_irq_enable(); + __count_zone_vm_events(PGREFILL, zone, pgscanned); + __count_vm_events(PGDEACTIVATE, pgdeactivate); + spin_unlock_irq(&zone->lru_lock); pagevec_release(&pvec); } @@ -976,7 +976,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) .swappiness = vm_swappiness, }; - inc_page_state(allocstall); + count_vm_event(ALLOCSTALL); for (i = 0; zones[i] != NULL; i++) { struct zone *zone = zones[i]; @@ -989,7 +989,6 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask) } for (priority = DEF_PRIORITY; priority >= 0; priority--) { - sc.nr_mapped = read_page_state(nr_mapped); sc.nr_scanned = 0; if (!priority) disable_swap_token(); @@ -1074,9 +1073,7 @@ loop_again: total_scanned = 0; nr_reclaimed = 0; sc.may_writepage = !laptop_mode; - sc.nr_mapped = read_page_state(nr_mapped); - - inc_page_state(pageoutrun); + count_vm_event(PAGEOUTRUN); for (i = 0; i < pgdat->nr_zones; i++) { struct zone *zone = pgdat->node_zones + i; @@ -1223,7 +1220,6 @@ static int kswapd(void *p) }; cpumask_t cpumask; - daemonize("kswapd%d", pgdat->node_id); cpumask = node_to_cpumask(pgdat->node_id); if (!cpus_empty(cpumask)) set_cpus_allowed(tsk, cpumask); @@ -1365,7 +1361,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) for_each_zone(zone) lru_pages += zone->nr_active + zone->nr_inactive; - nr_slab = read_page_state(nr_slab); + nr_slab = global_page_state(NR_SLAB); /* If slab caches are huge, it's better to hit them first */ while (nr_slab >= lru_pages) { reclaim_state.reclaimed_slab = 0; @@ -1407,9 +1403,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) for (prio = DEF_PRIORITY; prio >= 0; prio--) { unsigned long nr_to_scan = nr_pages - ret; - sc.nr_mapped = read_page_state(nr_mapped); sc.nr_scanned = 0; - ret += shrink_all_zones(nr_to_scan, prio, pass, &sc); if (ret >= nr_pages) goto out; @@ -1450,7 +1444,7 @@ out: not required for correctness. So if the last cpu in a node goes away, we get changed to run anywhere: as the first one comes back, restore their cpu bindings. */ -static int cpu_callback(struct notifier_block *nfb, +static int __devinit cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { pg_data_t *pgdat; @@ -1468,20 +1462,35 @@ static int cpu_callback(struct notifier_block *nfb, } #endif /* CONFIG_HOTPLUG_CPU */ +/* + * This kswapd start function will be called by init and node-hot-add. + * On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added. + */ +int kswapd_run(int nid) +{ + pg_data_t *pgdat = NODE_DATA(nid); + int ret = 0; + + if (pgdat->kswapd) + return 0; + + pgdat->kswapd = kthread_run(kswapd, pgdat, "kswapd%d", nid); + if (IS_ERR(pgdat->kswapd)) { + /* failure at boot is fatal */ + BUG_ON(system_state == SYSTEM_BOOTING); + printk("Failed to start kswapd on node %d\n",nid); + ret = -1; + } + return ret; +} + static int __init kswapd_init(void) { - pg_data_t *pgdat; + int nid; swap_setup(); - for_each_online_pgdat(pgdat) { - pid_t pid; - - pid = kernel_thread(kswapd, pgdat, CLONE_KERNEL); - BUG_ON(pid < 0); - read_lock(&tasklist_lock); - pgdat->kswapd = find_task_by_pid(pid); - read_unlock(&tasklist_lock); - } + for_each_online_node(nid) + kswapd_run(nid); hotcpu_notifier(cpu_callback, 0); return 0; } @@ -1494,10 +1503,6 @@ module_init(kswapd_init) * * If non-zero call zone_reclaim when the number of free pages falls below * the watermarks. - * - * In the future we may add flags to the mode. However, the page allocator - * should only have to check that zone_reclaim_mode != 0 before calling - * zone_reclaim(). */ int zone_reclaim_mode __read_mostly; @@ -1507,11 +1512,6 @@ int zone_reclaim_mode __read_mostly; #define RECLAIM_SWAP (1<<2) /* Swap pages out during reclaim */ #define RECLAIM_SLAB (1<<3) /* Do a global slab shrink if the zone is out of memory */ -/* - * Mininum time between zone reclaim scans - */ -int zone_reclaim_interval __read_mostly = 30*HZ; - /* * Priority for ZONE_RECLAIM. This determines the fraction of pages * of a node considered for each zone_reclaim. 4 scans 1/16th of @@ -1519,6 +1519,12 @@ int zone_reclaim_interval __read_mostly = 30*HZ; */ #define ZONE_RECLAIM_PRIORITY 4 +/* + * Percentage of pages in a zone that must be unmapped for zone_reclaim to + * occur. + */ +int sysctl_min_unmapped_ratio = 1; + /* * Try to free up some pages from this zone through reclaim. */ @@ -1533,7 +1539,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) struct scan_control sc = { .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP), - .nr_mapped = read_page_state(nr_mapped), .swap_cluster_max = max_t(unsigned long, nr_pages, SWAP_CLUSTER_MAX), .gfp_mask = gfp_mask, @@ -1578,16 +1583,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) p->reclaim_state = NULL; current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE); - - if (nr_reclaimed == 0) { - /* - * We were unable to reclaim enough pages to stay on node. We - * now allow off node accesses for a certain time period before - * trying again to reclaim pages from the local zone. - */ - zone->last_unsuccessful_zone_reclaim = jiffies; - } - return nr_reclaimed >= nr_pages; } @@ -1597,14 +1592,17 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) int node_id; /* - * Do not reclaim if there was a recent unsuccessful attempt at zone - * reclaim. In that case we let allocations go off node for the - * zone_reclaim_interval. Otherwise we would scan for each off-node - * page allocation. + * Zone reclaim reclaims unmapped file backed pages. + * + * A small portion of unmapped file backed pages is needed for + * file I/O otherwise pages read by file I/O will be immediately + * thrown out if the zone is overallocated. So we do not reclaim + * if less than a specified percentage of the zone is used by + * unmapped file backed pages. */ - if (time_before(jiffies, - zone->last_unsuccessful_zone_reclaim + zone_reclaim_interval)) - return 0; + if (zone_page_state(zone, NR_FILE_PAGES) - + zone_page_state(zone, NR_FILE_MAPPED) <= zone->min_unmapped_ratio) + return 0; /* * Avoid concurrent zone reclaims, do not reclaim in a zone that does