memcg: get rid of soft-limit tree infrastructure
authorMichal Hocko <mhocko@suse.cz>
Thu, 12 Sep 2013 22:13:23 +0000 (15:13 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 12 Sep 2013 22:38:00 +0000 (15:38 -0700)
Now that the soft limit is integrated to the reclaim directly the whole
soft-limit tree infrastructure is not needed anymore.  Rip it out.

Signed-off-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Glauber Costa <glommer@openvz.org>
Reviewed-by: Tejun Heo <tj@kernel.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michel Lespinasse <walken@google.com>
Cc: Ying Han <yinghan@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
mm/memcontrol.c

index 6c32271..87a448d 100644 (file)
@@ -39,7 +39,6 @@
 #include <linux/limits.h>
 #include <linux/export.h>
 #include <linux/mutex.h>
-#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
@@ -139,7 +138,6 @@ static const char * const mem_cgroup_lru_names[] = {
  */
 enum mem_cgroup_events_target {
        MEM_CGROUP_TARGET_THRESH,
-       MEM_CGROUP_TARGET_SOFTLIMIT,
        MEM_CGROUP_TARGET_NUMAINFO,
        MEM_CGROUP_NTARGETS,
 };
@@ -175,10 +173,6 @@ struct mem_cgroup_per_zone {
 
        struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
 
-       struct rb_node          tree_node;      /* RB tree node */
-       unsigned long long      usage_in_excess;/* Set to the value by which */
-                                               /* the soft limit is exceeded*/
-       bool                    on_tree;
        struct mem_cgroup       *memcg;         /* Back pointer, we cannot */
                                                /* use container_of        */
 };
@@ -187,26 +181,6 @@ struct mem_cgroup_per_node {
        struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
 };
 
-/*
- * Cgroups above their limits are maintained in a RB-Tree, independent of
- * their hierarchy representation
- */
-
-struct mem_cgroup_tree_per_zone {
-       struct rb_root rb_root;
-       spinlock_t lock;
-};
-
-struct mem_cgroup_tree_per_node {
-       struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
-};
-
-struct mem_cgroup_tree {
-       struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
-};
-
-static struct mem_cgroup_tree soft_limit_tree __read_mostly;
-
 struct mem_cgroup_threshold {
        struct eventfd_ctx *eventfd;
        u64 threshold;
@@ -444,7 +418,6 @@ static bool move_file(void)
  * limit reclaim to prevent infinite loops, if they ever occur.
  */
 #define        MEM_CGROUP_MAX_RECLAIM_LOOPS            100
-#define        MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
 
 enum charge_type {
        MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
@@ -671,164 +644,6 @@ page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
        return mem_cgroup_zoneinfo(memcg, nid, zid);
 }
 
-static struct mem_cgroup_tree_per_zone *
-soft_limit_tree_node_zone(int nid, int zid)
-{
-       return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
-}
-
-static struct mem_cgroup_tree_per_zone *
-soft_limit_tree_from_page(struct page *page)
-{
-       int nid = page_to_nid(page);
-       int zid = page_zonenum(page);
-
-       return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
-}
-
-static void
-__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
-                               struct mem_cgroup_per_zone *mz,
-                               struct mem_cgroup_tree_per_zone *mctz,
-                               unsigned long long new_usage_in_excess)
-{
-       struct rb_node **p = &mctz->rb_root.rb_node;
-       struct rb_node *parent = NULL;
-       struct mem_cgroup_per_zone *mz_node;
-
-       if (mz->on_tree)
-               return;
-
-       mz->usage_in_excess = new_usage_in_excess;
-       if (!mz->usage_in_excess)
-               return;
-       while (*p) {
-               parent = *p;
-               mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
-                                       tree_node);
-               if (mz->usage_in_excess < mz_node->usage_in_excess)
-                       p = &(*p)->rb_left;
-               /*
-                * We can't avoid mem cgroups that are over their soft
-                * limit by the same amount
-                */
-               else if (mz->usage_in_excess >= mz_node->usage_in_excess)
-                       p = &(*p)->rb_right;
-       }
-       rb_link_node(&mz->tree_node, parent, p);
-       rb_insert_color(&mz->tree_node, &mctz->rb_root);
-       mz->on_tree = true;
-}
-
-static void
-__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
-                               struct mem_cgroup_per_zone *mz,
-                               struct mem_cgroup_tree_per_zone *mctz)
-{
-       if (!mz->on_tree)
-               return;
-       rb_erase(&mz->tree_node, &mctz->rb_root);
-       mz->on_tree = false;
-}
-
-static void
-mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
-                               struct mem_cgroup_per_zone *mz,
-                               struct mem_cgroup_tree_per_zone *mctz)
-{
-       spin_lock(&mctz->lock);
-       __mem_cgroup_remove_exceeded(memcg, mz, mctz);
-       spin_unlock(&mctz->lock);
-}
-
-
-static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
-{
-       unsigned long long excess;
-       struct mem_cgroup_per_zone *mz;
-       struct mem_cgroup_tree_per_zone *mctz;
-       int nid = page_to_nid(page);
-       int zid = page_zonenum(page);
-       mctz = soft_limit_tree_from_page(page);
-
-       /*
-        * Necessary to update all ancestors when hierarchy is used.
-        * because their event counter is not touched.
-        */
-       for (; memcg; memcg = parent_mem_cgroup(memcg)) {
-               mz = mem_cgroup_zoneinfo(memcg, nid, zid);
-               excess = res_counter_soft_limit_excess(&memcg->res);
-               /*
-                * We have to update the tree if mz is on RB-tree or
-                * mem is over its softlimit.
-                */
-               if (excess || mz->on_tree) {
-                       spin_lock(&mctz->lock);
-                       /* if on-tree, remove it */
-                       if (mz->on_tree)
-                               __mem_cgroup_remove_exceeded(memcg, mz, mctz);
-                       /*
-                        * Insert again. mz->usage_in_excess will be updated.
-                        * If excess is 0, no tree ops.
-                        */
-                       __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
-                       spin_unlock(&mctz->lock);
-               }
-       }
-}
-
-static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
-{
-       int node, zone;
-       struct mem_cgroup_per_zone *mz;
-       struct mem_cgroup_tree_per_zone *mctz;
-
-       for_each_node(node) {
-               for (zone = 0; zone < MAX_NR_ZONES; zone++) {
-                       mz = mem_cgroup_zoneinfo(memcg, node, zone);
-                       mctz = soft_limit_tree_node_zone(node, zone);
-                       mem_cgroup_remove_exceeded(memcg, mz, mctz);
-               }
-       }
-}
-
-static struct mem_cgroup_per_zone *
-__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
-{
-       struct rb_node *rightmost = NULL;
-       struct mem_cgroup_per_zone *mz;
-
-retry:
-       mz = NULL;
-       rightmost = rb_last(&mctz->rb_root);
-       if (!rightmost)
-               goto done;              /* Nothing to reclaim from */
-
-       mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
-       /*
-        * Remove the node now but someone else can add it back,
-        * we will to add it back at the end of reclaim to its correct
-        * position in the tree.
-        */
-       __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
-       if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
-               !css_tryget(&mz->memcg->css))
-               goto retry;
-done:
-       return mz;
-}
-
-static struct mem_cgroup_per_zone *
-mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
-{
-       struct mem_cgroup_per_zone *mz;
-
-       spin_lock(&mctz->lock);
-       mz = __mem_cgroup_largest_soft_limit_node(mctz);
-       spin_unlock(&mctz->lock);
-       return mz;
-}
-
 /*
  * Implementation Note: reading percpu statistics for memcg.
  *
@@ -987,9 +802,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
                case MEM_CGROUP_TARGET_THRESH:
                        next = val + THRESHOLDS_EVENTS_TARGET;
                        break;
-               case MEM_CGROUP_TARGET_SOFTLIMIT:
-                       next = val + SOFTLIMIT_EVENTS_TARGET;
-                       break;
                case MEM_CGROUP_TARGET_NUMAINFO:
                        next = val + NUMAINFO_EVENTS_TARGET;
                        break;
@@ -1012,11 +824,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
        /* threshold event is triggered in finer grain than soft limit */
        if (unlikely(mem_cgroup_event_ratelimit(memcg,
                                                MEM_CGROUP_TARGET_THRESH))) {
-               bool do_softlimit;
                bool do_numainfo __maybe_unused;
 
-               do_softlimit = mem_cgroup_event_ratelimit(memcg,
-                                               MEM_CGROUP_TARGET_SOFTLIMIT);
 #if MAX_NUMNODES > 1
                do_numainfo = mem_cgroup_event_ratelimit(memcg,
                                                MEM_CGROUP_TARGET_NUMAINFO);
@@ -1024,8 +833,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
                preempt_enable();
 
                mem_cgroup_threshold(memcg);
-               if (unlikely(do_softlimit))
-                       mem_cgroup_update_tree(memcg, page);
 #if MAX_NUMNODES > 1
                if (unlikely(do_numainfo))
                        atomic_inc(&memcg->numainfo_events);
@@ -1867,6 +1674,7 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
        return total;
 }
 
+#if MAX_NUMNODES > 1
 /**
  * test_mem_cgroup_node_reclaimable
  * @memcg: the target memcg
@@ -1889,7 +1697,6 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
        return false;
 
 }
-#if MAX_NUMNODES > 1
 
 /*
  * Always updating the nodemask is not very good - even if we have an empty
@@ -1957,51 +1764,12 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
        return node;
 }
 
-/*
- * Check all nodes whether it contains reclaimable pages or not.
- * For quick scan, we make use of scan_nodes. This will allow us to skip
- * unused nodes. But scan_nodes is lazily updated and may not cotain
- * enough new information. We need to do double check.
- */
-static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
-{
-       int nid;
-
-       /*
-        * quick check...making use of scan_node.
-        * We can skip unused nodes.
-        */
-       if (!nodes_empty(memcg->scan_nodes)) {
-               for (nid = first_node(memcg->scan_nodes);
-                    nid < MAX_NUMNODES;
-                    nid = next_node(nid, memcg->scan_nodes)) {
-
-                       if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
-                               return true;
-               }
-       }
-       /*
-        * Check rest of nodes.
-        */
-       for_each_node_state(nid, N_MEMORY) {
-               if (node_isset(nid, memcg->scan_nodes))
-                       continue;
-               if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
-                       return true;
-       }
-       return false;
-}
-
 #else
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
 {
        return 0;
 }
 
-static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
-{
-       return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
-}
 #endif
 
 /*
@@ -2876,9 +2644,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
        unlock_page_cgroup(pc);
 
        /*
-        * "charge_statistics" updated event counter. Then, check it.
-        * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
-        * if they exceeds softlimit.
+        * "charge_statistics" updated event counter.
         */
        memcg_check_events(memcg, page);
 }
@@ -5962,8 +5728,6 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
        for (zone = 0; zone < MAX_NR_ZONES; zone++) {
                mz = &pn->zoneinfo[zone];
                lruvec_init(&mz->lruvec);
-               mz->usage_in_excess = 0;
-               mz->on_tree = false;
                mz->memcg = memcg;
        }
        memcg->nodeinfo[node] = pn;
@@ -6019,7 +5783,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
        int node;
        size_t size = memcg_size();
 
-       mem_cgroup_remove_from_trees(memcg);
        free_css_id(&mem_cgroup_subsys, &memcg->css);
 
        for_each_node(node)
@@ -6056,29 +5819,6 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 }
 EXPORT_SYMBOL(parent_mem_cgroup);
 
-static void __init mem_cgroup_soft_limit_tree_init(void)
-{
-       struct mem_cgroup_tree_per_node *rtpn;
-       struct mem_cgroup_tree_per_zone *rtpz;
-       int tmp, node, zone;
-
-       for_each_node(node) {
-               tmp = node;
-               if (!node_state(node, N_NORMAL_MEMORY))
-                       tmp = -1;
-               rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
-               BUG_ON(!rtpn);
-
-               soft_limit_tree.rb_tree_per_node[node] = rtpn;
-
-               for (zone = 0; zone < MAX_NR_ZONES; zone++) {
-                       rtpz = &rtpn->rb_tree_per_zone[zone];
-                       rtpz->rb_root = RB_ROOT;
-                       spin_lock_init(&rtpz->lock);
-               }
-       }
-}
-
 static struct cgroup_subsys_state * __ref
 mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
@@ -6859,7 +6599,6 @@ static int __init mem_cgroup_init(void)
 {
        hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
        enable_swap_cgroup();
-       mem_cgroup_soft_limit_tree_init();
        memcg_stock_init();
        return 0;
 }