oom, oom_reaper: try to reap tasks which skip regular OOM killer path
authorMichal Hocko <mhocko@suse.com>
Fri, 20 May 2016 00:13:12 +0000 (17:13 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 20 May 2016 02:12:14 +0000 (19:12 -0700)
If either the current task is already killed or PF_EXITING or a selected
task is PF_EXITING then the oom killer is suppressed and so is the oom
reaper.  This patch adds try_oom_reaper which checks the given task and
queues it for the oom reaper if that is safe to be done meaning that the
task doesn't share the mm with an alive process.

This might help to release the memory pressure while the task tries to
exit.

[akpm@linux-foundation.org: fix nommu build]
Signed-off-by: Michal Hocko <mhocko@suse.com>
Cc: Raushaniya Maksudova <rmaksudova@parallels.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Daniel Vetter <daniel.vetter@intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/oom.h
mm/memcontrol.c
mm/oom_kill.c

index 628a432..83b9c39 100644 (file)
@@ -72,6 +72,14 @@ static inline bool oom_task_origin(const struct task_struct *p)
 
 extern void mark_oom_victim(struct task_struct *tsk);
 
+#ifdef CONFIG_MMU
+extern void try_oom_reaper(struct task_struct *tsk);
+#else
+static inline void try_oom_reaper(struct task_struct *tsk)
+{
+}
+#endif
+
 extern unsigned long oom_badness(struct task_struct *p,
                struct mem_cgroup *memcg, const nodemask_t *nodemask,
                unsigned long totalpages);
index 1b40dca..d71d387 100644 (file)
@@ -1275,6 +1275,7 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
         */
        if (fatal_signal_pending(current) || task_will_free_mem(current)) {
                mark_oom_victim(current);
+               try_oom_reaper(current);
                goto unlock;
        }
 
index 32d8210..850b6ff 100644 (file)
@@ -412,6 +412,25 @@ bool oom_killer_disabled __read_mostly;
 
 #define K(x) ((x) << (PAGE_SHIFT-10))
 
+/*
+ * task->mm can be NULL if the task is the exited group leader.  So to
+ * determine whether the task is using a particular mm, we examine all the
+ * task's threads: if one of those is using this mm then this task was also
+ * using it.
+ */
+static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
+{
+       struct task_struct *t;
+
+       for_each_thread(p, t) {
+               struct mm_struct *t_mm = READ_ONCE(t->mm);
+               if (t_mm)
+                       return t_mm == mm;
+       }
+       return false;
+}
+
+
 #ifdef CONFIG_MMU
 /*
  * OOM Reaper kernel thread which tries to reap the memory used by the OOM
@@ -563,6 +582,53 @@ static void wake_oom_reaper(struct task_struct *tsk)
        wake_up(&oom_reaper_wait);
 }
 
+/* Check if we can reap the given task. This has to be called with stable
+ * tsk->mm
+ */
+void try_oom_reaper(struct task_struct *tsk)
+{
+       struct mm_struct *mm = tsk->mm;
+       struct task_struct *p;
+
+       if (!mm)
+               return;
+
+       /*
+        * There might be other threads/processes which are either not
+        * dying or even not killable.
+        */
+       if (atomic_read(&mm->mm_users) > 1) {
+               rcu_read_lock();
+               for_each_process(p) {
+                       bool exiting;
+
+                       if (!process_shares_mm(p, mm))
+                               continue;
+                       if (same_thread_group(p, tsk))
+                               continue;
+                       if (fatal_signal_pending(p))
+                               continue;
+
+                       /*
+                        * If the task is exiting make sure the whole thread group
+                        * is exiting and cannot acces mm anymore.
+                        */
+                       spin_lock_irq(&p->sighand->siglock);
+                       exiting = signal_group_exit(p->signal);
+                       spin_unlock_irq(&p->sighand->siglock);
+                       if (exiting)
+                               continue;
+
+                       /* Give up */
+                       rcu_read_unlock();
+                       return;
+               }
+               rcu_read_unlock();
+       }
+
+       wake_oom_reaper(tsk);
+}
+
 static int __init oom_init(void)
 {
        oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
@@ -652,24 +718,6 @@ void oom_killer_enable(void)
        oom_killer_disabled = false;
 }
 
-/*
- * task->mm can be NULL if the task is the exited group leader.  So to
- * determine whether the task is using a particular mm, we examine all the
- * task's threads: if one of those is using this mm then this task was also
- * using it.
- */
-static bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
-{
-       struct task_struct *t;
-
-       for_each_thread(p, t) {
-               struct mm_struct *t_mm = READ_ONCE(t->mm);
-               if (t_mm)
-                       return t_mm == mm;
-       }
-       return false;
-}
-
 /*
  * Must be called while holding a reference to p, which will be released upon
  * returning.
@@ -694,6 +742,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
        task_lock(p);
        if (p->mm && task_will_free_mem(p)) {
                mark_oom_victim(p);
+               try_oom_reaper(p);
                task_unlock(p);
                put_task_struct(p);
                return;
@@ -873,6 +922,7 @@ bool out_of_memory(struct oom_control *oc)
        if (current->mm &&
            (fatal_signal_pending(current) || task_will_free_mem(current))) {
                mark_oom_victim(current);
+               try_oom_reaper(current);
                return true;
        }