X-Git-Url: http://git.cascardo.info/?a=blobdiff_plain;f=mm%2Foom_kill.c;h=463cdd22d4e0df8e1834330e058ab37c69bb2273;hb=7ceb009a22517297ae0e32863eb86ec766782263;hp=d53a9aa00977cbd0f81970e9e8a30b011cc73f31;hpb=9274139f4e8bb0835a5a6224957e15b8e63693e4;p=cascardo%2Flinux.git diff --git a/mm/oom_kill.c b/mm/oom_kill.c index d53a9aa00977..463cdd22d4e0 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -132,6 +132,11 @@ static inline bool is_sysrq_oom(struct oom_control *oc) return oc->order == -1; } +static inline bool is_memcg_oom(struct oom_control *oc) +{ + return oc->memcg != NULL; +} + /* return true if the task is not adequate as candidate victim task. */ static bool oom_unkillable_task(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask) @@ -213,12 +218,17 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, return points > 0 ? points : 1; } +enum oom_constraint { + CONSTRAINT_NONE, + CONSTRAINT_CPUSET, + CONSTRAINT_MEMORY_POLICY, + CONSTRAINT_MEMCG, +}; + /* * Determine the type of allocation constraint. */ -#ifdef CONFIG_NUMA -static enum oom_constraint constrained_alloc(struct oom_control *oc, - unsigned long *totalpages) +static enum oom_constraint constrained_alloc(struct oom_control *oc) { struct zone *zone; struct zoneref *z; @@ -226,8 +236,16 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc, bool cpuset_limited = false; int nid; + if (is_memcg_oom(oc)) { + oc->totalpages = mem_cgroup_get_limit(oc->memcg) ?: 1; + return CONSTRAINT_MEMCG; + } + /* Default to all available memory */ - *totalpages = totalram_pages + total_swap_pages; + oc->totalpages = totalram_pages + total_swap_pages; + + if (!IS_ENABLED(CONFIG_NUMA)) + return CONSTRAINT_NONE; if (!oc->zonelist) return CONSTRAINT_NONE; @@ -246,9 +264,9 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc, */ if (oc->nodemask && !nodes_subset(node_states[N_MEMORY], *oc->nodemask)) { - *totalpages = total_swap_pages; + oc->totalpages = total_swap_pages; for_each_node_mask(nid, *oc->nodemask) - *totalpages += node_spanned_pages(nid); + oc->totalpages += node_spanned_pages(nid); return CONSTRAINT_MEMORY_POLICY; } @@ -259,27 +277,21 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc, cpuset_limited = true; if (cpuset_limited) { - *totalpages = total_swap_pages; + oc->totalpages = total_swap_pages; for_each_node_mask(nid, cpuset_current_mems_allowed) - *totalpages += node_spanned_pages(nid); + oc->totalpages += node_spanned_pages(nid); return CONSTRAINT_CPUSET; } return CONSTRAINT_NONE; } -#else -static enum oom_constraint constrained_alloc(struct oom_control *oc, - unsigned long *totalpages) -{ - *totalpages = totalram_pages + total_swap_pages; - return CONSTRAINT_NONE; -} -#endif -enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, - struct task_struct *task) +static int oom_evaluate_task(struct task_struct *task, void *arg) { + struct oom_control *oc = arg; + unsigned long points; + if (oom_unkillable_task(task, NULL, oc->nodemask)) - return OOM_SCAN_CONTINUE; + goto next; /* * This task already has access to memory reserves and is being killed. @@ -289,68 +301,67 @@ enum oom_scan_t oom_scan_process_thread(struct oom_control *oc, */ if (!is_sysrq_oom(oc) && atomic_read(&task->signal->oom_victims)) { struct task_struct *p = find_lock_task_mm(task); - enum oom_scan_t ret = OOM_SCAN_ABORT; + bool reaped = false; if (p) { - if (test_bit(MMF_OOM_REAPED, &p->mm->flags)) - ret = OOM_SCAN_CONTINUE; + reaped = test_bit(MMF_OOM_REAPED, &p->mm->flags); task_unlock(p); } - - return ret; + if (reaped) + goto next; + goto abort; } /* * If task is allocating a lot of memory and has been marked to be * killed first if it triggers an oom, then select it. */ - if (oom_task_origin(task)) - return OOM_SCAN_SELECT; + if (oom_task_origin(task)) { + points = ULONG_MAX; + goto select; + } - return OOM_SCAN_OK; + points = oom_badness(task, NULL, oc->nodemask, oc->totalpages); + if (!points || points < oc->chosen_points) + goto next; + + /* Prefer thread group leaders for display purposes */ + if (points == oc->chosen_points && thread_group_leader(oc->chosen)) + goto next; +select: + if (oc->chosen) + put_task_struct(oc->chosen); + get_task_struct(task); + oc->chosen = task; + oc->chosen_points = points; +next: + return 0; +abort: + if (oc->chosen) + put_task_struct(oc->chosen); + oc->chosen = (void *)-1UL; + return 1; } /* - * Simple selection loop. We chose the process with the highest - * number of 'points'. Returns -1 on scan abort. + * Simple selection loop. We choose the process with the highest number of + * 'points'. In case scan was aborted, oc->chosen is set to -1. */ -static struct task_struct *select_bad_process(struct oom_control *oc, - unsigned int *ppoints, unsigned long totalpages) +static void select_bad_process(struct oom_control *oc) { - struct task_struct *p; - struct task_struct *chosen = NULL; - unsigned long chosen_points = 0; - - rcu_read_lock(); - for_each_process(p) { - unsigned int points; - - switch (oom_scan_process_thread(oc, p)) { - case OOM_SCAN_SELECT: - chosen = p; - chosen_points = ULONG_MAX; - /* fall through */ - case OOM_SCAN_CONTINUE: - continue; - case OOM_SCAN_ABORT: - rcu_read_unlock(); - return (struct task_struct *)(-1UL); - case OOM_SCAN_OK: - break; - }; - points = oom_badness(p, NULL, oc->nodemask, totalpages); - if (!points || points < chosen_points) - continue; + if (is_memcg_oom(oc)) + mem_cgroup_scan_tasks(oc->memcg, oom_evaluate_task, oc); + else { + struct task_struct *p; - chosen = p; - chosen_points = points; + rcu_read_lock(); + for_each_process(p) + if (oom_evaluate_task(p, oc)) + break; + rcu_read_unlock(); } - if (chosen) - get_task_struct(chosen); - rcu_read_unlock(); - *ppoints = chosen_points * 1000 / totalpages; - return chosen; + oc->chosen_points = oc->chosen_points * 1000 / oc->totalpages; } /** @@ -419,7 +430,7 @@ static void dump_header(struct oom_control *oc, struct task_struct *p) static atomic_t oom_victims = ATOMIC_INIT(0); static DECLARE_WAIT_QUEUE_HEAD(oom_victims_wait); -bool oom_killer_disabled __read_mostly; +static bool oom_killer_disabled __read_mostly; #define K(x) ((x) << (PAGE_SHIFT-10)) @@ -627,7 +638,7 @@ static int oom_reaper(void *unused) return 0; } -void wake_oom_reaper(struct task_struct *tsk) +static void wake_oom_reaper(struct task_struct *tsk) { if (!oom_reaper_th) return; @@ -656,7 +667,11 @@ static int __init oom_init(void) return 0; } subsys_initcall(oom_init) -#endif +#else +static inline void wake_oom_reaper(struct task_struct *tsk) +{ +} +#endif /* CONFIG_MMU */ /** * mark_oom_victim - mark the given task as OOM victim @@ -665,7 +680,7 @@ subsys_initcall(oom_init) * Has to be called with oom_lock held and never after * oom has been disabled already. */ -void mark_oom_victim(struct task_struct *tsk) +static void mark_oom_victim(struct task_struct *tsk) { WARN_ON(oom_killer_disabled); /* OOM killer might race with memcg OOM */ @@ -760,7 +775,7 @@ static inline bool __task_will_free_mem(struct task_struct *task) * Caller has to make sure that task->mm is stable (hold task_lock or * it operates on the current). */ -bool task_will_free_mem(struct task_struct *task) +static bool task_will_free_mem(struct task_struct *task) { struct mm_struct *mm = task->mm; struct task_struct *p; @@ -788,8 +803,9 @@ bool task_will_free_mem(struct task_struct *task) return true; /* - * This is really pessimistic but we do not have any reliable way - * to check that external processes share with our mm + * Make sure that all tasks which share the mm with the given tasks + * are dying as well to make sure that a) nobody pins its mm and + * b) the task is also reapable by the oom reaper. */ rcu_read_lock(); for_each_process(p) { @@ -806,14 +822,10 @@ bool task_will_free_mem(struct task_struct *task) return ret; } -/* - * Must be called while holding a reference to p, which will be released upon - * returning. - */ -void oom_kill_process(struct oom_control *oc, struct task_struct *p, - unsigned int points, unsigned long totalpages, - const char *message) +static void oom_kill_process(struct oom_control *oc, const char *message) { + struct task_struct *p = oc->chosen; + unsigned int points = oc->chosen_points; struct task_struct *victim = p; struct task_struct *child; struct task_struct *t; @@ -860,7 +872,7 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, * oom_badness() returns 0 if the thread is unkillable */ child_points = oom_badness(child, - oc->memcg, oc->nodemask, totalpages); + oc->memcg, oc->nodemask, oc->totalpages); if (child_points > victim_points) { put_task_struct(victim); victim = child; @@ -942,7 +954,8 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p, /* * Determines whether the kernel must panic because of the panic_on_oom sysctl. */ -void check_panic_on_oom(struct oom_control *oc, enum oom_constraint constraint) +static void check_panic_on_oom(struct oom_control *oc, + enum oom_constraint constraint) { if (likely(!sysctl_panic_on_oom)) return; @@ -988,19 +1001,18 @@ EXPORT_SYMBOL_GPL(unregister_oom_notifier); */ bool out_of_memory(struct oom_control *oc) { - struct task_struct *p; - unsigned long totalpages; unsigned long freed = 0; - unsigned int uninitialized_var(points); enum oom_constraint constraint = CONSTRAINT_NONE; if (oom_killer_disabled) return false; - blocking_notifier_call_chain(&oom_notify_list, 0, &freed); - if (freed > 0) - /* Got some memory back in the last second. */ - return true; + if (!is_memcg_oom(oc)) { + blocking_notifier_call_chain(&oom_notify_list, 0, &freed); + if (freed > 0) + /* Got some memory back in the last second. */ + return true; + } /* * If current has a pending SIGKILL or is exiting, then automatically @@ -1024,37 +1036,38 @@ bool out_of_memory(struct oom_control *oc) /* * Check if there were limitations on the allocation (only relevant for - * NUMA) that may require different handling. + * NUMA and memcg) that may require different handling. */ - constraint = constrained_alloc(oc, &totalpages); + constraint = constrained_alloc(oc); if (constraint != CONSTRAINT_MEMORY_POLICY) oc->nodemask = NULL; check_panic_on_oom(oc, constraint); - if (sysctl_oom_kill_allocating_task && current->mm && - !oom_unkillable_task(current, NULL, oc->nodemask) && + if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task && + current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) && current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) { get_task_struct(current); - oom_kill_process(oc, current, 0, totalpages, - "Out of memory (oom_kill_allocating_task)"); + oc->chosen = current; + oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)"); return true; } - p = select_bad_process(oc, &points, totalpages); + select_bad_process(oc); /* Found nothing?!?! Either we hang forever, or we panic. */ - if (!p && !is_sysrq_oom(oc)) { + if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) { dump_header(oc, NULL); panic("Out of memory and no killable processes...\n"); } - if (p && p != (void *)-1UL) { - oom_kill_process(oc, p, points, totalpages, "Out of memory"); + if (oc->chosen && oc->chosen != (void *)-1UL) { + oom_kill_process(oc, !is_memcg_oom(oc) ? "Out of memory" : + "Memory cgroup out of memory"); /* * Give the killed process a good chance to exit before trying * to allocate memory again. */ schedule_timeout_killable(1); } - return true; + return !!oc->chosen; } /*