b43: move under broadcom vendor directory

[cascardo/linux.git] / mm / memcontrol.c
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index c57c442..9acfb16 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -62,6 +62,7 @@
  #include <linux/oom.h>
  #include <linux/lockdep.h>
  #include <linux/file.h>
+#include <linux/tracehook.h>
  #include "internal.h"
  #include <net/sock.h>
  #include <net/ip.h>
@@ -434,7 +435,7 @@ struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page)
  
         memcg = page->mem_cgroup;
  
-       if (!memcg || !cgroup_on_dfl(memcg->css.cgroup))
+       if (!memcg || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
                 memcg = root_mem_cgroup;
  
         rcu_read_unlock();
@@ -1661,7 +1662,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
  
  static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
  {
-       if (!current->memcg_oom.may_oom)
+       if (!current->memcg_may_oom)
                 return;
         /*
          * We are in the middle of the charge context here, so we
@@ -1678,9 +1679,9 @@ static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
          * and when we know whether the fault was overall successful.
          */
         css_get(&memcg->css);
-       current->memcg_oom.memcg = memcg;
-       current->memcg_oom.gfp_mask = mask;
-       current->memcg_oom.order = order;
+       current->memcg_in_oom = memcg;
+       current->memcg_oom_gfp_mask = mask;
+       current->memcg_oom_order = order;
  }
  
  /**
@@ -1702,7 +1703,7 @@ static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
   */
  bool mem_cgroup_oom_synchronize(bool handle)
  {
-       struct mem_cgroup *memcg = current->memcg_oom.memcg;
+       struct mem_cgroup *memcg = current->memcg_in_oom;
         struct oom_wait_info owait;
         bool locked;
  
@@ -1730,8 +1731,8 @@ bool mem_cgroup_oom_synchronize(bool handle)
         if (locked && !memcg->oom_kill_disable) {
                 mem_cgroup_unmark_under_oom(memcg);
                 finish_wait(&memcg_oom_waitq, &owait.wait);
-               mem_cgroup_out_of_memory(memcg, current->memcg_oom.gfp_mask,
-                                        current->memcg_oom.order);
+               mem_cgroup_out_of_memory(memcg, current->memcg_oom_gfp_mask,
+                                        current->memcg_oom_order);
         } else {
                 schedule();
                 mem_cgroup_unmark_under_oom(memcg);
@@ -1748,7 +1749,7 @@ bool mem_cgroup_oom_synchronize(bool handle)
                 memcg_oom_recover(memcg);
         }
  cleanup:
-       current->memcg_oom.memcg = NULL;
+       current->memcg_in_oom = NULL;
         css_put(&memcg->css);
         return true;
  }
@@ -1972,6 +1973,31 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
         return NOTIFY_OK;
  }
  
+/*
+ * Scheduled by try_charge() to be executed from the userland return path
+ * and reclaims memory over the high limit.
+ */
+void mem_cgroup_handle_over_high(void)
+{
+       unsigned int nr_pages = current->memcg_nr_pages_over_high;
+       struct mem_cgroup *memcg, *pos;
+
+       if (likely(!nr_pages))
+               return;
+
+       pos = memcg = get_mem_cgroup_from_mm(current->mm);
+
+       do {
+               if (page_counter_read(&pos->memory) <= pos->high)
+                       continue;
+               mem_cgroup_events(pos, MEMCG_HIGH, 1);
+               try_to_free_mem_cgroup_pages(pos, nr_pages, GFP_KERNEL, true);
+       } while ((pos = parent_mem_cgroup(pos)));
+
+       css_put(&memcg->css);
+       current->memcg_nr_pages_over_high = 0;
+}
+
  static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
                       unsigned int nr_pages)
  {
@@ -1982,17 +2008,16 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
         unsigned long nr_reclaimed;
         bool may_swap = true;
         bool drained = false;
-       int ret = 0;
  
         if (mem_cgroup_is_root(memcg))
-               goto done;
+               return 0;
  retry:
         if (consume_stock(memcg, nr_pages))
-               goto done;
+               return 0;
  
         if (!do_swap_account ||
-           !page_counter_try_charge(&memcg->memsw, batch, &counter)) {
-               if (!page_counter_try_charge(&memcg->memory, batch, &counter))
+           page_counter_try_charge(&memcg->memsw, batch, &counter)) {
+               if (page_counter_try_charge(&memcg->memory, batch, &counter))
                         goto done_restock;
                 if (do_swap_account)
                         page_counter_uncharge(&memcg->memsw, batch);
@@ -2016,12 +2041,12 @@ retry:
         if (unlikely(test_thread_flag(TIF_MEMDIE) ||
                      fatal_signal_pending(current) ||
                      current->flags & PF_EXITING))
-               goto bypass;
+               goto force;
  
         if (unlikely(task_in_memcg_oom(current)))
                 goto nomem;
  
-       if (!(gfp_mask & __GFP_WAIT))
+       if (!gfpflags_allow_blocking(gfp_mask))
                 goto nomem;
  
         mem_cgroup_events(mem_over_limit, MEMCG_MAX, 1);
@@ -2062,38 +2087,54 @@ retry:
                 goto retry;
  
         if (gfp_mask & __GFP_NOFAIL)
-               goto bypass;
+               goto force;
  
         if (fatal_signal_pending(current))
-               goto bypass;
+               goto force;
  
         mem_cgroup_events(mem_over_limit, MEMCG_OOM, 1);
  
-       mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(nr_pages));
+       mem_cgroup_oom(mem_over_limit, gfp_mask,
+                      get_order(nr_pages * PAGE_SIZE));
  nomem:
         if (!(gfp_mask & __GFP_NOFAIL))
                 return -ENOMEM;
-bypass:
-       return -EINTR;
+force:
+       /*
+        * The allocation either can't fail or will lead to more memory
+        * being freed very soon.  Allow memory usage go over the limit
+        * temporarily by force charging it.
+        */
+       page_counter_charge(&memcg->memory, nr_pages);
+       if (do_swap_account)
+               page_counter_charge(&memcg->memsw, nr_pages);
+       css_get_many(&memcg->css, nr_pages);
+
+       return 0;
  
  done_restock:
         css_get_many(&memcg->css, batch);
         if (batch > nr_pages)
                 refill_stock(memcg, batch - nr_pages);
-       if (!(gfp_mask & __GFP_WAIT))
-               goto done;
+
         /*
-        * If the hierarchy is above the normal consumption range,
-        * make the charging task trim their excess contribution.
+        * If the hierarchy is above the normal consumption range, schedule
+        * reclaim on returning to userland.  We can perform reclaim here
+        * if __GFP_RECLAIM but let's always punt for simplicity and so that
+        * GFP_KERNEL can consistently be used during reclaim.  @memcg is
+        * not recorded as it most likely matches current's and won't
+        * change in the meantime.  As high limit is checked again before
+        * reclaim, the cost of mismatch is negligible.
          */
         do {
-               if (page_counter_read(&memcg->memory) <= memcg->high)
-                       continue;
-               mem_cgroup_events(memcg, MEMCG_HIGH, 1);
-               try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
+               if (page_counter_read(&memcg->memory) > memcg->high) {
+                       current->memcg_nr_pages_over_high += nr_pages;
+                       set_notify_resume(current);
+                       break;
+               }
         } while ((memcg = parent_mem_cgroup(memcg)));
-done:
-       return ret;
+
+       return 0;
  }
  
  static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
@@ -2174,55 +2215,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
  }
  
  #ifdef CONFIG_MEMCG_KMEM
-int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
-                     unsigned long nr_pages)
-{
-       struct page_counter *counter;
-       int ret = 0;
-
-       ret = page_counter_try_charge(&memcg->kmem, nr_pages, &counter);
-       if (ret < 0)
-               return ret;
-
-       ret = try_charge(memcg, gfp, nr_pages);
-       if (ret == -EINTR)  {
-               /*
-                * try_charge() chose to bypass to root due to OOM kill or
-                * fatal signal.  Since our only options are to either fail
-                * the allocation or charge it to this cgroup, do it as a
-                * temporary condition. But we can't fail. From a kmem/slab
-                * perspective, the cache has already been selected, by
-                * mem_cgroup_kmem_get_cache(), so it is too late to change
-                * our minds.
-                *
-                * This condition will only trigger if the task entered
-                * memcg_charge_kmem in a sane state, but was OOM-killed
-                * during try_charge() above. Tasks that were already dying
-                * when the allocation triggers should have been already
-                * directed to the root cgroup in memcontrol.h
-                */
-               page_counter_charge(&memcg->memory, nr_pages);
-               if (do_swap_account)
-                       page_counter_charge(&memcg->memsw, nr_pages);
-               css_get_many(&memcg->css, nr_pages);
-               ret = 0;
-       } else if (ret)
-               page_counter_uncharge(&memcg->kmem, nr_pages);
-
-       return ret;
-}
-
-void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages)
-{
-       page_counter_uncharge(&memcg->memory, nr_pages);
-       if (do_swap_account)
-               page_counter_uncharge(&memcg->memsw, nr_pages);
-
-       page_counter_uncharge(&memcg->kmem, nr_pages);
-
-       css_put_many(&memcg->css, nr_pages);
-}
-
  static int memcg_alloc_cache_id(void)
  {
         int id, size;
@@ -2384,85 +2376,58 @@ void __memcg_kmem_put_cache(struct kmem_cache *cachep)
                 css_put(&cachep->memcg_params.memcg->css);
  }
  
-/*
- * We need to verify if the allocation against current->mm->owner's memcg is
- * possible for the given order. But the page is not allocated yet, so we'll
- * need a further commit step to do the final arrangements.
- *
- * It is possible for the task to switch cgroups in this mean time, so at
- * commit time, we can't rely on task conversion any longer.  We'll then use
- * the handle argument to return to the caller which cgroup we should commit
- * against. We could also return the memcg directly and avoid the pointer
- * passing, but a boolean return value gives better semantics considering
- * the compiled-out case as well.
- *
- * Returning true means the allocation is possible.
- */
-bool
-__memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
+int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order,
+                             struct mem_cgroup *memcg)
  {
-       struct mem_cgroup *memcg;
+       unsigned int nr_pages = 1 << order;
+       struct page_counter *counter;
         int ret;
  
-       *_memcg = NULL;
+       if (!memcg_kmem_is_active(memcg))
+               return 0;
  
-       memcg = get_mem_cgroup_from_mm(current->mm);
+       if (!page_counter_try_charge(&memcg->kmem, nr_pages, &counter))
+               return -ENOMEM;
  
-       if (!memcg_kmem_is_active(memcg)) {
-               css_put(&memcg->css);
-               return true;
+       ret = try_charge(memcg, gfp, nr_pages);
+       if (ret) {
+               page_counter_uncharge(&memcg->kmem, nr_pages);
+               return ret;
         }
  
-       ret = memcg_charge_kmem(memcg, gfp, 1 << order);
-       if (!ret)
-               *_memcg = memcg;
+       page->mem_cgroup = memcg;
  
-       css_put(&memcg->css);
-       return (ret == 0);
+       return 0;
  }
  
-void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
-                             int order)
+int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
  {
-       VM_BUG_ON(mem_cgroup_is_root(memcg));
+       struct mem_cgroup *memcg;
+       int ret;
  
-       /* The page allocation failed. Revert */
-       if (!page) {
-               memcg_uncharge_kmem(memcg, 1 << order);
-               return;
-       }
-       page->mem_cgroup = memcg;
+       memcg = get_mem_cgroup_from_mm(current->mm);
+       ret = __memcg_kmem_charge_memcg(page, gfp, order, memcg);
+       css_put(&memcg->css);
+       return ret;
  }
  
-void __memcg_kmem_uncharge_pages(struct page *page, int order)
+void __memcg_kmem_uncharge(struct page *page, int order)
  {
         struct mem_cgroup *memcg = page->mem_cgroup;
+       unsigned int nr_pages = 1 << order;
  
         if (!memcg)
                 return;
  
         VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
  
-       memcg_uncharge_kmem(memcg, 1 << order);
-       page->mem_cgroup = NULL;
-}
-
-struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr)
-{
-       struct mem_cgroup *memcg = NULL;
-       struct kmem_cache *cachep;
-       struct page *page;
-
-       page = virt_to_head_page(ptr);
-       if (PageSlab(page)) {
-               cachep = page->slab_cache;
-               if (!is_root_cache(cachep))
-                       memcg = cachep->memcg_params.memcg;
-       } else
-               /* page allocated by alloc_kmem_pages */
-               memcg = page->mem_cgroup;
+       page_counter_uncharge(&memcg->kmem, nr_pages);
+       page_counter_uncharge(&memcg->memory, nr_pages);
+       if (do_swap_account)
+               page_counter_uncharge(&memcg->memsw, nr_pages);
  
-       return memcg;
+       page->mem_cgroup = NULL;
+       css_put_many(&memcg->css, nr_pages);
  }
  #endif /* CONFIG_MEMCG_KMEM */
  
@@ -2836,9 +2801,9 @@ static unsigned long tree_stat(struct mem_cgroup *memcg,
         return val;
  }
  
-static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
+static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
  {
-       u64 val;
+       unsigned long val;
  
         if (mem_cgroup_is_root(memcg)) {
                 val = tree_stat(memcg, MEM_CGROUP_STAT_CACHE);
@@ -2851,7 +2816,7 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
                 else
                         val = page_counter_read(&memcg->memsw);
         }
-       return val << PAGE_SHIFT;
+       return val;
  }
  
  enum {
@@ -2885,9 +2850,9 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
         switch (MEMFILE_ATTR(cft->private)) {
         case RES_USAGE:
                 if (counter == &memcg->memory)
-                       return mem_cgroup_usage(memcg, false);
+                       return (u64)mem_cgroup_usage(memcg, false) * PAGE_SIZE;
                 if (counter == &memcg->memsw)
-                       return mem_cgroup_usage(memcg, true);
+                       return (u64)mem_cgroup_usage(memcg, true) * PAGE_SIZE;
                 return (u64)page_counter_read(counter) * PAGE_SIZE;
         case RES_LIMIT:
                 return (u64)counter->limit * PAGE_SIZE;
@@ -2926,7 +2891,7 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
          * of course permitted.
          */
         mutex_lock(&memcg_create_mutex);
-       if (cgroup_has_tasks(memcg->css.cgroup) ||
+       if (cgroup_is_populated(memcg->css.cgroup) ||
             (memcg->use_hierarchy && memcg_has_children(memcg)))
                 err = -EBUSY;
         mutex_unlock(&memcg_create_mutex);
@@ -3387,7 +3352,6 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
         ret = page_counter_memparse(args, "-1", &threshold);
         if (ret)
                 return ret;
-       threshold <<= PAGE_SHIFT;
  
         mutex_lock(&memcg->thresholds_lock);
  
@@ -4066,8 +4030,7 @@ static struct cftype mem_cgroup_legacy_files[] = {
         {
                 .name = "cgroup.event_control",         /* XXX: for compat */
                 .write = memcg_write_event_control,
-               .flags = CFTYPE_NO_PREFIX,
-               .mode = S_IWUGO,
+               .flags = CFTYPE_NO_PREFIX | CFTYPE_WORLD_WRITABLE,
         },
         {
                 .name = "swappiness",
@@ -4401,28 +4364,16 @@ static int mem_cgroup_do_precharge(unsigned long count)
  {
         int ret;
  
-       /* Try a single bulk charge without reclaim first */
-       ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
+       /* Try a single bulk charge without reclaim first, kswapd may wake */
+       ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_DIRECT_RECLAIM, count);
         if (!ret) {
                 mc.precharge += count;
                 return ret;
         }
-       if (ret == -EINTR) {
-               cancel_charge(root_mem_cgroup, count);
-               return ret;
-       }
  
         /* Try charges one by one with reclaim */
         while (count--) {
                 ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
-               /*
-                * In case of failure, any residual charges against
-                * mc.to will be dropped by mem_cgroup_clear_mc()
-                * later on.  However, cancel any charges that are
-                * bypassed to root right away or they'll be lost.
-                */
-               if (ret == -EINTR)
-                       cancel_charge(root_mem_cgroup, 1);
                 if (ret)
                         return ret;
                 mc.precharge++;
@@ -4577,9 +4528,8 @@ static int mem_cgroup_move_account(struct page *page,
                 goto out;
  
         /*
-        * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup
-        * of its source page while we change it: page migration takes
-        * both pages off the LRU, but page cache replacement doesn't.
+        * Prevent mem_cgroup_replace_page() from looking at
+        * page->mem_cgroup of its source page while we change it.
          */
         if (!trylock_page(page))
                 goto out;
@@ -4834,7 +4784,7 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
  {
         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
         struct mem_cgroup *from;
-       struct task_struct *p;
+       struct task_struct *leader, *p;
         struct mm_struct *mm;
         unsigned long move_flags;
         int ret = 0;
@@ -4848,7 +4798,20 @@ static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
         if (!move_flags)
                 return 0;
  
-       p = cgroup_taskset_first(tset);
+       /*
+        * Multi-process migrations only happen on the default hierarchy
+        * where charge immigration is not used.  Perform charge
+        * immigration if @tset contains a leader and whine if there are
+        * multiple.
+        */
+       p = NULL;
+       cgroup_taskset_for_each_leader(leader, tset) {
+               WARN_ON_ONCE(p);
+               p = leader;
+       }
+       if (!p)
+               return 0;
+
         from = mem_cgroup_from_task(p);
  
         VM_BUG_ON(from == memcg);
@@ -5064,7 +5027,7 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
          * guarantees that @root doesn't have any children, so turning it
          * on for the root memcg is enough.
          */
-       if (cgroup_on_dfl(root_css->cgroup))
+       if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
                 root_mem_cgroup->use_hierarchy = true;
         else
                 root_mem_cgroup->use_hierarchy = false;
@@ -5073,7 +5036,9 @@ static void mem_cgroup_bind(struct cgroup_subsys_state *root_css)
  static u64 memory_current_read(struct cgroup_subsys_state *css,
                                struct cftype *cft)
  {
-       return mem_cgroup_usage(mem_cgroup_from_css(css), false);
+       struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+       return (u64)page_counter_read(&memcg->memory) * PAGE_SIZE;
  }
  
  static int memory_low_show(struct seq_file *m, void *v)
@@ -5185,6 +5150,7 @@ static int memory_events_show(struct seq_file *m, void *v)
  static struct cftype memory_files[] = {
         {
                 .name = "current",
+               .flags = CFTYPE_NOT_ON_ROOT,
                 .read_u64 = memory_current_read,
         },
         {
@@ -5208,6 +5174,7 @@ static struct cftype memory_files[] = {
         {
                 .name = "events",
                 .flags = CFTYPE_NOT_ON_ROOT,
+               .file_offset = offsetof(struct mem_cgroup, events_file),
                 .seq_show = memory_events_show,
         },
         { }     /* terminate */
@@ -5327,11 +5294,6 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
         ret = try_charge(memcg, gfp_mask, nr_pages);
  
         css_put(&memcg->css);
-
-       if (ret == -EINTR) {
-               memcg = root_mem_cgroup;
-               ret = 0;
-       }
  out:
         *memcgp = memcg;
         return ret;
@@ -5546,7 +5508,7 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
  }
  
  /**
- * mem_cgroup_migrate - migrate a charge to another page
+ * mem_cgroup_replace_page - migrate a charge to another page
   * @oldpage: currently charged page
   * @newpage: page to transfer the charge to
   * @lrucare: either or both pages might be on the LRU already
@@ -5555,16 +5517,13 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
   *
   * Both pages must be locked, @newpage->mapping must be set up.
   */
-void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
-                       bool lrucare)
+void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
  {
         struct mem_cgroup *memcg;
         int isolated;
  
         VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
         VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
-       VM_BUG_ON_PAGE(!lrucare && PageLRU(oldpage), oldpage);
-       VM_BUG_ON_PAGE(!lrucare && PageLRU(newpage), newpage);
         VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage);
         VM_BUG_ON_PAGE(PageTransHuge(oldpage) != PageTransHuge(newpage),
                        newpage);
@@ -5576,25 +5535,16 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
         if (newpage->mem_cgroup)
                 return;
  
-       /*
-        * Swapcache readahead pages can get migrated before being
-        * charged, and migration from compaction can happen to an
-        * uncharged page when the PFN walker finds a page that
-        * reclaim just put back on the LRU but has not released yet.
-        */
+       /* Swapcache readahead pages can get replaced before being charged */
         memcg = oldpage->mem_cgroup;
         if (!memcg)
                 return;
  
-       if (lrucare)
-               lock_page_lru(oldpage, &isolated);
-
+       lock_page_lru(oldpage, &isolated);
         oldpage->mem_cgroup = NULL;
+       unlock_page_lru(oldpage, isolated);
  
-       if (lrucare)
-               unlock_page_lru(oldpage, isolated);
-
-       commit_charge(newpage, memcg, lrucare);
+       commit_charge(newpage, memcg, true);
  }
  
  /*