mm, compaction: ignore fragindex from compaction_zonelist_suitable()
[cascardo/linux.git] / mm / memcontrol.c
index 2ff0289..60bb830 100644 (file)
@@ -920,6 +920,43 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
             iter != NULL;                              \
             iter = mem_cgroup_iter(NULL, iter, NULL))
 
+/**
+ * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy
+ * @memcg: hierarchy root
+ * @fn: function to call for each task
+ * @arg: argument passed to @fn
+ *
+ * This function iterates over tasks attached to @memcg or to any of its
+ * descendants and calls @fn for each task. If @fn returns a non-zero
+ * value, the function breaks the iteration loop and returns the value.
+ * Otherwise, it will iterate over all tasks and return 0.
+ *
+ * This function must not be called for the root memory cgroup.
+ */
+int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
+                         int (*fn)(struct task_struct *, void *), void *arg)
+{
+       struct mem_cgroup *iter;
+       int ret = 0;
+
+       BUG_ON(memcg == root_mem_cgroup);
+
+       for_each_mem_cgroup_tree(iter, memcg) {
+               struct css_task_iter it;
+               struct task_struct *task;
+
+               css_task_iter_start(&iter->css, &it);
+               while (!ret && (task = css_task_iter_next(&it)))
+                       ret = fn(task, arg);
+               css_task_iter_end(&it);
+               if (ret) {
+                       mem_cgroup_iter_break(memcg, iter);
+                       break;
+               }
+       }
+       return ret;
+}
+
 /**
  * mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
  * @page: the page
@@ -1178,7 +1215,7 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
 /*
  * Return the memory (and swap, if configured) limit for a memcg.
  */
-static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
+unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
 {
        unsigned long limit;
 
@@ -1205,79 +1242,12 @@ static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
                .gfp_mask = gfp_mask,
                .order = order,
        };
-       struct mem_cgroup *iter;
-       unsigned long chosen_points = 0;
-       unsigned long totalpages;
-       unsigned int points = 0;
-       struct task_struct *chosen = NULL;
+       bool ret;
 
        mutex_lock(&oom_lock);
-
-       /*
-        * If current has a pending SIGKILL or is exiting, then automatically
-        * select it.  The goal is to allow it to allocate so that it may
-        * quickly exit and free its memory.
-        */
-       if (task_will_free_mem(current)) {
-               mark_oom_victim(current);
-               wake_oom_reaper(current);
-               goto unlock;
-       }
-
-       check_panic_on_oom(&oc, CONSTRAINT_MEMCG);
-       totalpages = mem_cgroup_get_limit(memcg) ? : 1;
-       for_each_mem_cgroup_tree(iter, memcg) {
-               struct css_task_iter it;
-               struct task_struct *task;
-
-               css_task_iter_start(&iter->css, &it);
-               while ((task = css_task_iter_next(&it))) {
-                       switch (oom_scan_process_thread(&oc, task)) {
-                       case OOM_SCAN_SELECT:
-                               if (chosen)
-                                       put_task_struct(chosen);
-                               chosen = task;
-                               chosen_points = ULONG_MAX;
-                               get_task_struct(chosen);
-                               /* fall through */
-                       case OOM_SCAN_CONTINUE:
-                               continue;
-                       case OOM_SCAN_ABORT:
-                               css_task_iter_end(&it);
-                               mem_cgroup_iter_break(memcg, iter);
-                               if (chosen)
-                                       put_task_struct(chosen);
-                               /* Set a dummy value to return "true". */
-                               chosen = (void *) 1;
-                               goto unlock;
-                       case OOM_SCAN_OK:
-                               break;
-                       };
-                       points = oom_badness(task, memcg, NULL, totalpages);
-                       if (!points || points < chosen_points)
-                               continue;
-                       /* Prefer thread group leaders for display purposes */
-                       if (points == chosen_points &&
-                           thread_group_leader(chosen))
-                               continue;
-
-                       if (chosen)
-                               put_task_struct(chosen);
-                       chosen = task;
-                       chosen_points = points;
-                       get_task_struct(chosen);
-               }
-               css_task_iter_end(&it);
-       }
-
-       if (chosen) {
-               points = chosen_points * 1000 / totalpages;
-               oom_kill_process(&oc, chosen, points, totalpages,
-                                "Memory cgroup out of memory");
-       }
-unlock:
+       ret = out_of_memory(&oc);
        mutex_unlock(&oom_lock);
-       return chosen;
+       return ret;
 }
 
 #if MAX_NUMNODES > 1
@@ -1600,7 +1570,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
        if (!memcg)
                return false;
 
-       if (!handle || oom_killer_disabled)
+       if (!handle)
                goto cleanup;
 
        owait.memcg = memcg;
@@ -1740,17 +1710,22 @@ static DEFINE_MUTEX(percpu_charge_mutex);
 static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
        struct memcg_stock_pcp *stock;
+       unsigned long flags;
        bool ret = false;
 
        if (nr_pages > CHARGE_BATCH)
                return ret;
 
-       stock = &get_cpu_var(memcg_stock);
+       local_irq_save(flags);
+
+       stock = this_cpu_ptr(&memcg_stock);
        if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
                stock->nr_pages -= nr_pages;
                ret = true;
        }
-       put_cpu_var(memcg_stock);
+
+       local_irq_restore(flags);
+
        return ret;
 }
 
@@ -1771,15 +1746,18 @@ static void drain_stock(struct memcg_stock_pcp *stock)
        stock->cached = NULL;
 }
 
-/*
- * This must be called under preempt disabled or must be called by
- * a thread which is pinned to local cpu.
- */
 static void drain_local_stock(struct work_struct *dummy)
 {
-       struct memcg_stock_pcp *stock = this_cpu_ptr(&memcg_stock);
+       struct memcg_stock_pcp *stock;
+       unsigned long flags;
+
+       local_irq_save(flags);
+
+       stock = this_cpu_ptr(&memcg_stock);
        drain_stock(stock);
        clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
+
+       local_irq_restore(flags);
 }
 
 /*
@@ -1788,14 +1766,19 @@ static void drain_local_stock(struct work_struct *dummy)
  */
 static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
-       struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock);
+       struct memcg_stock_pcp *stock;
+       unsigned long flags;
 
+       local_irq_save(flags);
+
+       stock = this_cpu_ptr(&memcg_stock);
        if (stock->cached != memcg) { /* reset if necessary */
                drain_stock(stock);
                stock->cached = memcg;
        }
        stock->nr_pages += nr_pages;
-       put_cpu_var(memcg_stock);
+
+       local_irq_restore(flags);
 }
 
 /*
@@ -4079,29 +4062,13 @@ static DEFINE_IDR(mem_cgroup_idr);
 
 static void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n)
 {
+       VM_BUG_ON(atomic_read(&memcg->id.ref) <= 0);
        atomic_add(n, &memcg->id.ref);
 }
 
-static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
-{
-       while (!atomic_inc_not_zero(&memcg->id.ref)) {
-               /*
-                * The root cgroup cannot be destroyed, so it's refcount must
-                * always be >= 1.
-                */
-               if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
-                       VM_BUG_ON(1);
-                       break;
-               }
-               memcg = parent_mem_cgroup(memcg);
-               if (!memcg)
-                       memcg = root_mem_cgroup;
-       }
-       return memcg;
-}
-
 static void mem_cgroup_id_put_many(struct mem_cgroup *memcg, unsigned int n)
 {
+       VM_BUG_ON(atomic_read(&memcg->id.ref) < n);
        if (atomic_sub_and_test(n, &memcg->id.ref)) {
                idr_remove(&mem_cgroup_idr, memcg->id.id);
                memcg->id.id = 0;
@@ -4290,8 +4257,10 @@ fail:
 
 static int mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
+       struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
        /* Online state pins memcg ID, memcg ID pins CSS */
-       mem_cgroup_id_get(mem_cgroup_from_css(css));
+       atomic_set(&memcg->id.ref, 1);
        css_get(css);
        return 0;
 }
@@ -4439,7 +4408,7 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
         * Because lookup_swap_cache() updates some statistics counter,
         * we call find_get_page() with swapper_space directly.
         */
-       page = find_get_page(swap_address_space(ent), ent.val);
+       page = find_get_page(swap_address_space(ent), swp_offset(ent));
        if (do_memsw_account())
                entry->val = ent.val;
 
@@ -4477,7 +4446,8 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
                        swp_entry_t swp = radix_to_swp_entry(page);
                        if (do_memsw_account())
                                *entry = swp;
-                       page = find_get_page(swap_address_space(swp), swp.val);
+                       page = find_get_page(swap_address_space(swp),
+                                            swp_offset(swp));
                }
        } else
                page = find_get_page(mapping, pgoff);
@@ -4712,7 +4682,8 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
                .mm = mm,
        };
        down_read(&mm->mmap_sem);
-       walk_page_range(0, ~0UL, &mem_cgroup_count_precharge_walk);
+       walk_page_range(0, mm->highest_vm_end,
+                       &mem_cgroup_count_precharge_walk);
        up_read(&mm->mmap_sem);
 
        precharge = mc.precharge;
@@ -5000,7 +4971,8 @@ retry:
         * When we have consumed all precharges and failed in doing
         * additional charge, the page walk just aborts.
         */
-       walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
+       walk_page_range(0, mc.mm->highest_vm_end, &mem_cgroup_move_charge_walk);
+
        up_read(&mc.mm->mmap_sem);
        atomic_dec(&mc.from->moving_account);
 }
@@ -5821,6 +5793,24 @@ static int __init mem_cgroup_init(void)
 subsys_initcall(mem_cgroup_init);
 
 #ifdef CONFIG_MEMCG_SWAP
+static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
+{
+       while (!atomic_inc_not_zero(&memcg->id.ref)) {
+               /*
+                * The root cgroup cannot be destroyed, so it's refcount must
+                * always be >= 1.
+                */
+               if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
+                       VM_BUG_ON(1);
+                       break;
+               }
+               memcg = parent_mem_cgroup(memcg);
+               if (!memcg)
+                       memcg = root_mem_cgroup;
+       }
+       return memcg;
+}
+
 /**
  * mem_cgroup_swapout - transfer a memsw charge to swap
  * @page: page whose memsw charge to transfer