enic: fix rx skb checksum
[cascardo/linux.git] / mm / vmscan.c
index 4636d9e..bd9a72b 100644 (file)
@@ -229,9 +229,10 @@ EXPORT_SYMBOL(unregister_shrinker);
 
 #define SHRINK_BATCH 128
 
-static unsigned long
-shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
-                unsigned long nr_pages_scanned, unsigned long lru_pages)
+static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
+                                 struct shrinker *shrinker,
+                                 unsigned long nr_scanned,
+                                 unsigned long nr_eligible)
 {
        unsigned long freed = 0;
        unsigned long long delta;
@@ -255,9 +256,9 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
        nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
 
        total_scan = nr;
-       delta = (4 * nr_pages_scanned) / shrinker->seeks;
+       delta = (4 * nr_scanned) / shrinker->seeks;
        delta *= freeable;
-       do_div(delta, lru_pages + 1);
+       do_div(delta, nr_eligible + 1);
        total_scan += delta;
        if (total_scan < 0) {
                pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
@@ -289,8 +290,8 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
                total_scan = freeable * 2;
 
        trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
-                               nr_pages_scanned, lru_pages,
-                               freeable, delta, total_scan);
+                                  nr_scanned, nr_eligible,
+                                  freeable, delta, total_scan);
 
        /*
         * Normally, we should not scan less than batch_size objects in one
@@ -339,34 +340,37 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
        return freed;
 }
 
-/*
- * Call the shrink functions to age shrinkable caches
- *
- * Here we assume it costs one seek to replace a lru page and that it also
- * takes a seek to recreate a cache object.  With this in mind we age equal
- * percentages of the lru and ageable caches.  This should balance the seeks
- * generated by these structures.
+/**
+ * shrink_node_slabs - shrink slab caches of a given node
+ * @gfp_mask: allocation context
+ * @nid: node whose slab caches to target
+ * @nr_scanned: pressure numerator
+ * @nr_eligible: pressure denominator
  *
- * If the vm encountered mapped pages on the LRU it increase the pressure on
- * slab to avoid swapping.
+ * Call the shrink functions to age shrinkable caches.
  *
- * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
+ * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
+ * unaware shrinkers will receive a node id of 0 instead.
  *
- * `lru_pages' represents the number of on-LRU pages in all the zones which
- * are eligible for the caller's allocation attempt.  It is used for balancing
- * slab reclaim versus page reclaim.
+ * @nr_scanned and @nr_eligible form a ratio that indicate how much of
+ * the available objects should be scanned.  Page reclaim for example
+ * passes the number of pages scanned and the number of pages on the
+ * LRU lists that it considered on @nid, plus a bias in @nr_scanned
+ * when it encountered mapped pages.  The ratio is further biased by
+ * the ->seeks setting of the shrink function, which indicates the
+ * cost to recreate an object relative to that of an LRU page.
  *
- * Returns the number of slab objects which we shrunk.
+ * Returns the number of reclaimed slab objects.
  */
-unsigned long shrink_slab(struct shrink_control *shrinkctl,
-                         unsigned long nr_pages_scanned,
-                         unsigned long lru_pages)
+unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
+                               unsigned long nr_scanned,
+                               unsigned long nr_eligible)
 {
        struct shrinker *shrinker;
        unsigned long freed = 0;
 
-       if (nr_pages_scanned == 0)
-               nr_pages_scanned = SWAP_CLUSTER_MAX;
+       if (nr_scanned == 0)
+               nr_scanned = SWAP_CLUSTER_MAX;
 
        if (!down_read_trylock(&shrinker_rwsem)) {
                /*
@@ -380,20 +384,17 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl,
        }
 
        list_for_each_entry(shrinker, &shrinker_list, list) {
-               if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) {
-                       shrinkctl->nid = 0;
-                       freed += shrink_slab_node(shrinkctl, shrinker,
-                                       nr_pages_scanned, lru_pages);
-                       continue;
-               }
+               struct shrink_control sc = {
+                       .gfp_mask = gfp_mask,
+                       .nid = nid,
+               };
 
-               for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
-                       if (node_online(shrinkctl->nid))
-                               freed += shrink_slab_node(shrinkctl, shrinker,
-                                               nr_pages_scanned, lru_pages);
+               if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
+                       sc.nid = 0;
 
-               }
+               freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible);
        }
+
        up_read(&shrinker_rwsem);
 out:
        cond_resched();
@@ -1876,7 +1877,8 @@ enum scan_balance {
  * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
  */
 static void get_scan_count(struct lruvec *lruvec, int swappiness,
-                          struct scan_control *sc, unsigned long *nr)
+                          struct scan_control *sc, unsigned long *nr,
+                          unsigned long *lru_pages)
 {
        struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
        u64 fraction[2];
@@ -2022,6 +2024,7 @@ out:
        some_scanned = false;
        /* Only use force_scan on second pass. */
        for (pass = 0; !some_scanned && pass < 2; pass++) {
+               *lru_pages = 0;
                for_each_evictable_lru(lru) {
                        int file = is_file_lru(lru);
                        unsigned long size;
@@ -2048,14 +2051,19 @@ out:
                        case SCAN_FILE:
                        case SCAN_ANON:
                                /* Scan one type exclusively */
-                               if ((scan_balance == SCAN_FILE) != file)
+                               if ((scan_balance == SCAN_FILE) != file) {
+                                       size = 0;
                                        scan = 0;
+                               }
                                break;
                        default:
                                /* Look ma, no brain */
                                BUG();
                        }
+
+                       *lru_pages += size;
                        nr[lru] = scan;
+
                        /*
                         * Skip the second pass and don't force_scan,
                         * if we found something to scan.
@@ -2069,7 +2077,7 @@ out:
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
 static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
-                         struct scan_control *sc)
+                         struct scan_control *sc, unsigned long *lru_pages)
 {
        unsigned long nr[NR_LRU_LISTS];
        unsigned long targets[NR_LRU_LISTS];
@@ -2080,7 +2088,7 @@ static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
        struct blk_plug plug;
        bool scan_adjusted;
 
-       get_scan_count(lruvec, swappiness, sc, nr);
+       get_scan_count(lruvec, swappiness, sc, nr, lru_pages);
 
        /* Record the original scan target for proportional adjustments later */
        memcpy(targets, nr, sizeof(nr));
@@ -2258,7 +2266,8 @@ static inline bool should_continue_reclaim(struct zone *zone,
        }
 }
 
-static bool shrink_zone(struct zone *zone, struct scan_control *sc)
+static bool shrink_zone(struct zone *zone, struct scan_control *sc,
+                       bool is_classzone)
 {
        unsigned long nr_reclaimed, nr_scanned;
        bool reclaimable = false;
@@ -2269,6 +2278,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
                        .zone = zone,
                        .priority = sc->priority,
                };
+               unsigned long zone_lru_pages = 0;
                struct mem_cgroup *memcg;
 
                nr_reclaimed = sc->nr_reclaimed;
@@ -2276,13 +2286,15 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
 
                memcg = mem_cgroup_iter(root, NULL, &reclaim);
                do {
+                       unsigned long lru_pages;
                        struct lruvec *lruvec;
                        int swappiness;
 
                        lruvec = mem_cgroup_zone_lruvec(zone, memcg);
                        swappiness = mem_cgroup_swappiness(memcg);
 
-                       shrink_lruvec(lruvec, swappiness, sc);
+                       shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
+                       zone_lru_pages += lru_pages;
 
                        /*
                         * Direct reclaim and kswapd have to scan all memory
@@ -2302,6 +2314,25 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
                        memcg = mem_cgroup_iter(root, memcg, &reclaim);
                } while (memcg);
 
+               /*
+                * Shrink the slab caches in the same proportion that
+                * the eligible LRU pages were scanned.
+                */
+               if (global_reclaim(sc) && is_classzone) {
+                       struct reclaim_state *reclaim_state;
+
+                       shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone),
+                                         sc->nr_scanned - nr_scanned,
+                                         zone_lru_pages);
+
+                       reclaim_state = current->reclaim_state;
+                       if (reclaim_state) {
+                               sc->nr_reclaimed +=
+                                       reclaim_state->reclaimed_slab;
+                               reclaim_state->reclaimed_slab = 0;
+                       }
+               }
+
                vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
                           sc->nr_scanned - nr_scanned,
                           sc->nr_reclaimed - nr_reclaimed);
@@ -2376,12 +2407,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
        struct zone *zone;
        unsigned long nr_soft_reclaimed;
        unsigned long nr_soft_scanned;
-       unsigned long lru_pages = 0;
-       struct reclaim_state *reclaim_state = current->reclaim_state;
        gfp_t orig_mask;
-       struct shrink_control shrink = {
-               .gfp_mask = sc->gfp_mask,
-       };
        enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
        bool reclaimable = false;
 
@@ -2394,23 +2420,27 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
        if (buffer_heads_over_limit)
                sc->gfp_mask |= __GFP_HIGHMEM;
 
-       nodes_clear(shrink.nodes_to_scan);
-
        for_each_zone_zonelist_nodemask(zone, z, zonelist,
-                                       gfp_zone(sc->gfp_mask), sc->nodemask) {
+                                       requested_highidx, sc->nodemask) {
+               enum zone_type classzone_idx;
+
                if (!populated_zone(zone))
                        continue;
+
+               classzone_idx = requested_highidx;
+               while (!populated_zone(zone->zone_pgdat->node_zones +
+                                                       classzone_idx))
+                       classzone_idx--;
+
                /*
                 * Take care memory controller reclaiming has small influence
                 * to global LRU.
                 */
                if (global_reclaim(sc)) {
-                       if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
+                       if (!cpuset_zone_allowed(zone,
+                                                GFP_KERNEL | __GFP_HARDWALL))
                                continue;
 
-                       lru_pages += zone_reclaimable_pages(zone);
-                       node_set(zone_to_nid(zone), shrink.nodes_to_scan);
-
                        if (sc->priority != DEF_PRIORITY &&
                            !zone_reclaimable(zone))
                                continue;       /* Let kswapd poll it */
@@ -2449,7 +2479,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                        /* need some check for avoid more shrink_zone() */
                }
 
-               if (shrink_zone(zone, sc))
+               if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx))
                        reclaimable = true;
 
                if (global_reclaim(sc) &&
@@ -2457,20 +2487,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                        reclaimable = true;
        }
 
-       /*
-        * Don't shrink slabs when reclaiming memory from over limit cgroups
-        * but do shrink slab at least once when aborting reclaim for
-        * compaction to avoid unevenly scanning file/anon LRU pages over slab
-        * pages.
-        */
-       if (global_reclaim(sc)) {
-               shrink_slab(&shrink, sc->nr_scanned, lru_pages);
-               if (reclaim_state) {
-                       sc->nr_reclaimed += reclaim_state->reclaimed_slab;
-                       reclaim_state->reclaimed_slab = 0;
-               }
-       }
-
        /*
         * Restore to original mask to avoid the impact on the caller if we
         * promoted it to __GFP_HIGHMEM.
@@ -2735,6 +2751,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
        };
        struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
        int swappiness = mem_cgroup_swappiness(memcg);
+       unsigned long lru_pages;
 
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2750,7 +2767,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
         * will pick up pages from other mem cgroup's as well. We hack
         * the priority and make it zero.
         */
-       shrink_lruvec(lruvec, swappiness, &sc);
+       shrink_lruvec(lruvec, swappiness, &sc, &lru_pages);
 
        trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
 
@@ -2931,15 +2948,10 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
 static bool kswapd_shrink_zone(struct zone *zone,
                               int classzone_idx,
                               struct scan_control *sc,
-                              unsigned long lru_pages,
                               unsigned long *nr_attempted)
 {
        int testorder = sc->order;
        unsigned long balance_gap;
-       struct reclaim_state *reclaim_state = current->reclaim_state;
-       struct shrink_control shrink = {
-               .gfp_mask = sc->gfp_mask,
-       };
        bool lowmem_pressure;
 
        /* Reclaim above the high watermark. */
@@ -2974,13 +2986,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
                                                balance_gap, classzone_idx))
                return true;
 
-       shrink_zone(zone, sc);
-       nodes_clear(shrink.nodes_to_scan);
-       node_set(zone_to_nid(zone), shrink.nodes_to_scan);
-
-       reclaim_state->reclaimed_slab = 0;
-       shrink_slab(&shrink, sc->nr_scanned, lru_pages);
-       sc->nr_reclaimed += reclaim_state->reclaimed_slab;
+       shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
 
        /* Account for the number of pages attempted to reclaim */
        *nr_attempted += sc->nr_to_reclaim;
@@ -3041,7 +3047,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
        count_vm_event(PAGEOUTRUN);
 
        do {
-               unsigned long lru_pages = 0;
                unsigned long nr_attempted = 0;
                bool raise_priority = true;
                bool pgdat_needs_compaction = (order > 0);
@@ -3101,8 +3106,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                        if (!populated_zone(zone))
                                continue;
 
-                       lru_pages += zone_reclaimable_pages(zone);
-
                        /*
                         * If any zone is currently balanced then kswapd will
                         * not call compaction as it is expected that the
@@ -3158,8 +3161,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                         * that that high watermark would be met at 100%
                         * efficiency.
                         */
-                       if (kswapd_shrink_zone(zone, end_zone, &sc,
-                                       lru_pages, &nr_attempted))
+                       if (kswapd_shrink_zone(zone, end_zone,
+                                              &sc, &nr_attempted))
                                raise_priority = false;
                }
 
@@ -3388,7 +3391,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
        if (!populated_zone(zone))
                return;
 
-       if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
+       if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
                return;
        pgdat = zone->zone_pgdat;
        if (pgdat->kswapd_max_order < order) {
@@ -3611,10 +3614,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
                .may_swap = 1,
        };
-       struct shrink_control shrink = {
-               .gfp_mask = sc.gfp_mask,
-       };
-       unsigned long nr_slab_pages0, nr_slab_pages1;
 
        cond_resched();
        /*
@@ -3633,44 +3632,10 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                 * priorities until we have enough memory freed.
                 */
                do {
-                       shrink_zone(zone, &sc);
+                       shrink_zone(zone, &sc, true);
                } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
        }
 
-       nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
-       if (nr_slab_pages0 > zone->min_slab_pages) {
-               /*
-                * shrink_slab() does not currently allow us to determine how
-                * many pages were freed in this zone. So we take the current
-                * number of slab pages and shake the slab until it is reduced
-                * by the same nr_pages that we used for reclaiming unmapped
-                * pages.
-                */
-               nodes_clear(shrink.nodes_to_scan);
-               node_set(zone_to_nid(zone), shrink.nodes_to_scan);
-               for (;;) {
-                       unsigned long lru_pages = zone_reclaimable_pages(zone);
-
-                       /* No reclaimable slab or very low memory pressure */
-                       if (!shrink_slab(&shrink, sc.nr_scanned, lru_pages))
-                               break;
-
-                       /* Freed enough memory */
-                       nr_slab_pages1 = zone_page_state(zone,
-                                                       NR_SLAB_RECLAIMABLE);
-                       if (nr_slab_pages1 + nr_pages <= nr_slab_pages0)
-                               break;
-               }
-
-               /*
-                * Update nr_reclaimed by the number of slab pages we
-                * reclaimed from this zone.
-                */
-               nr_slab_pages1 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
-               if (nr_slab_pages1 < nr_slab_pages0)
-                       sc.nr_reclaimed += nr_slab_pages0 - nr_slab_pages1;
-       }
-
        p->reclaim_state = NULL;
        current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
        lockdep_clear_current_reclaim_state();