/*
* This task already has access to memory reserves and is being killed.
- * Don't allow any other task to have access to the reserves.
+ * Don't allow any other task to have access to the reserves unless
+ * the task has MMF_OOM_REAPED because chances that it would release
+ * any memory is quite low.
*/
- if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims))
- return OOM_SCAN_ABORT;
+ if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims)) {
+ struct task_struct *p = find_lock_task_mm(task);
+ enum oom_scan_t ret = OOM_SCAN_ABORT;
+
+ if (p) {
+ if (test_bit(MMF_OOM_REAPED, &p->mm->flags))
+ ret = OOM_SCAN_CONTINUE;
+ task_unlock(p);
+ }
+
+ return ret;
+ }
/*
* If task is allocating a lot of memory and has been marked to be
schedule_timeout_idle(HZ/10);
if (attempts > MAX_OOM_REAP_RETRIES) {
+ struct task_struct *p;
+
pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
task_pid_nr(tsk), tsk->comm);
+
+ /*
+ * If we've already tried to reap this task in the past and
+ * failed it probably doesn't make much sense to try yet again
+ * so hide the mm from the oom killer so that it can move on
+ * to another task with a different mm struct.
+ */
+ p = find_lock_task_mm(tsk);
+ if (p) {
+ if (test_and_set_bit(MMF_OOM_NOT_REAPABLE, &p->mm->flags)) {
+ pr_info("oom_reaper: giving up pid:%d (%s)\n",
+ task_pid_nr(tsk), tsk->comm);
+ set_bit(MMF_OOM_REAPED, &p->mm->flags);
+ }
+ task_unlock(p);
+ }
+
debug_show_all_locks();
}
* Checks whether the given task is dying or exiting and likely to
* release its address space. This means that all threads and processes
* sharing the same mm have to be killed or exiting.
+ * Caller has to make sure that task->mm is stable (hold task_lock or
+ * it operates on the current).
*/
bool task_will_free_mem(struct task_struct *task)
{
- struct mm_struct *mm;
+ struct mm_struct *mm = task->mm;
struct task_struct *p;
bool ret;
- if (!__task_will_free_mem(task))
- return false;
-
/*
- * If the process has passed exit_mm we have to skip it because
- * we have lost a link to other tasks sharing this mm, we do not
- * have anything to reap and the task might then get stuck waiting
- * for parent as zombie and we do not want it to hold TIF_MEMDIE
+ * Skip tasks without mm because it might have passed its exit_mm and
+ * exit_oom_victim. oom_reaper could have rescued that but do not rely
+ * on that for now. We can consider find_lock_task_mm in future.
*/
- p = find_lock_task_mm(task);
- if (!p)
+ if (!mm)
return false;
- mm = p->mm;
+ if (!__task_will_free_mem(task))
+ return false;
/*
* This task has already been drained by the oom reaper so there are
* only small chances it will free some more
*/
- if (test_bit(MMF_OOM_REAPED, &mm->flags)) {
- task_unlock(p);
+ if (test_bit(MMF_OOM_REAPED, &mm->flags))
return false;
- }
- if (atomic_read(&mm->mm_users) <= 1) {
- task_unlock(p);
+ if (atomic_read(&mm->mm_users) <= 1)
return true;
- }
-
- /* pin the mm to not get freed and reused */
- atomic_inc(&mm->mm_count);
- task_unlock(p);
/*
* This is really pessimistic but we do not have any reliable way
break;
}
rcu_read_unlock();
- mmdrop(mm);
return ret;
}
* If the task is already exiting, don't alarm the sysadmin or kill
* its children or threads, just set TIF_MEMDIE so it can die quickly
*/
+ task_lock(p);
if (task_will_free_mem(p)) {
mark_oom_victim(p);
wake_oom_reaper(p);
+ task_unlock(p);
put_task_struct(p);
return;
}
+ task_unlock(p);
if (__ratelimit(&oom_rs))
dump_header(oc, p);
/*
* We cannot use oom_reaper for the mm shared by this
* process because it wouldn't get killed and so the
- * memory might be still used.
+ * memory might be still used. Hide the mm from the oom
+ * killer to guarantee OOM forward progress.
*/
can_oom_reap = false;
+ set_bit(MMF_OOM_REAPED, &mm->flags);
+ pr_info("oom killer %d (%s) has mm pinned by %d (%s)\n",
+ task_pid_nr(victim), victim->comm,
+ task_pid_nr(p), p->comm);
continue;
}
do_send_sig_info(SIGKILL, SEND_SIG_FORCED, p, true);
* If current has a pending SIGKILL or is exiting, then automatically
* select it. The goal is to allow it to allocate so that it may
* quickly exit and free its memory.
- *
- * But don't select if current has already released its mm and cleared
- * TIF_MEMDIE flag at exit_mm(), otherwise an OOM livelock may occur.
*/
- if (current->mm && task_will_free_mem(current)) {
+ if (task_will_free_mem(current)) {
mark_oom_victim(current);
wake_oom_reaper(current);
return true;