mm: memcontrol: reclaim and OOM kill when shrinking memory.max below usage

author Johannes Weiner <hannes@cmpxchg.org>

Thu, 17 Mar 2016 21:20:28 +0000 (14:20 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 17 Mar 2016 22:09:34 +0000 (15:09 -0700)
author Johannes Weiner <hannes@cmpxchg.org>
Thu, 17 Mar 2016 21:20:28 +0000 (14:20 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 17 Mar 2016 22:09:34 +0000 (15:09 -0700)
diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt

index e2f4e79..8f1329a 100644 (file)
--- a/Documentation/cgroup-v2.txt
+++ b/Documentation/cgroup-v2.txt
@@ -1387,6 +1387,12 @@ system than killing the group.  Otherwise, memory.max is there to
  limit this type of spillover and ultimately contain buggy or even
  malicious applications.
  
+Setting the original memory.limit_in_bytes below the current usage was
+subject to a race condition, where concurrent charges could cause the
+limit setting to fail. memory.max on the other hand will first set the
+limit to prevent new charges, and then reclaim and OOM kill until the
+new limit is met - or the task writing to memory.max is killed.
+
  The combined memory+swap accounting and limiting is replaced by real
  control over swap space.
  
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index f7c9b4c..8614e0d 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1236,7 +1236,7 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
         return limit;
  }
  
-static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
+static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
                                      int order)
  {
         struct oom_control oc = {
@@ -1314,6 +1314,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
         }
  unlock:
         mutex_unlock(&oom_lock);
+       return chosen;
  }
  
  #if MAX_NUMNODES > 1
@@ -5029,6 +5030,8 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
                                 char *buf, size_t nbytes, loff_t off)
  {
         struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+       unsigned int nr_reclaims = MEM_CGROUP_RECLAIM_RETRIES;
+       bool drained = false;
         unsigned long max;
         int err;
  
@@ -5037,9 +5040,36 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
         if (err)
                 return err;
  
-       err = mem_cgroup_resize_limit(memcg, max);
-       if (err)
-               return err;
+       xchg(&memcg->memory.limit, max);
+
+       for (;;) {
+               unsigned long nr_pages = page_counter_read(&memcg->memory);
+
+               if (nr_pages <= max)
+                       break;
+
+               if (signal_pending(current)) {
+                       err = -EINTR;
+                       break;
+               }
+
+               if (!drained) {
+                       drain_all_stock(memcg);
+                       drained = true;
+                       continue;
+               }
+
+               if (nr_reclaims) {
+                       if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
+                                                         GFP_KERNEL, true))
+                               nr_reclaims--;
+                       continue;
+               }
+
+               mem_cgroup_events(memcg, MEMCG_OOM, 1);
+               if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0))
+                       break;
+       }
  
         memcg_wb_domain_size_changed(memcg);
         return nbytes;
author	Johannes Weiner <hannes@cmpxchg.org>
	Thu, 17 Mar 2016 21:20:28 +0000 (14:20 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 17 Mar 2016 22:09:34 +0000 (15:09 -0700)
Documentation/cgroup-v2.txt		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history