Merge branch 'akpm' (patches from Andrew Morton)

[cascardo/linux.git] / mm / memcontrol.c
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 13b9d0f..f20a57b 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -59,6 +59,7 @@
  #include <net/sock.h>
  #include <net/ip.h>
  #include <net/tcp_memcontrol.h>
+#include "slab.h"
  
  #include <asm/uaccess.h>
  
@@ -499,6 +500,29 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
         return (memcg == root_mem_cgroup);
  }
  
+/*
+ * We restrict the id in the range of [1, 65535], so it can fit into
+ * an unsigned short.
+ */
+#define MEM_CGROUP_ID_MAX      USHRT_MAX
+
+static inline unsigned short mem_cgroup_id(struct mem_cgroup *memcg)
+{
+       /*
+        * The ID of the root cgroup is 0, but memcg treat 0 as an
+        * invalid ID, so we return (cgroup_id + 1).
+        */
+       return memcg->css.cgroup->id + 1;
+}
+
+static inline struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
+{
+       struct cgroup_subsys_state *css;
+
+       css = css_from_id(id - 1, &mem_cgroup_subsys);
+       return mem_cgroup_from_css(css);
+}
+
  /* Writing them here to avoid exposing memcg's inner layout */
  #if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
  
@@ -570,16 +594,11 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
  #ifdef CONFIG_MEMCG_KMEM
  /*
   * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
- * There are two main reasons for not using the css_id for this:
- *  1) this works better in sparse environments, where we have a lot of memcgs,
- *     but only a few kmem-limited. Or also, if we have, for instance, 200
- *     memcgs, and none but the 200th is kmem-limited, we'd have to have a
- *     200 entry array for that.
- *
- *  2) In order not to violate the cgroup API, we would like to do all memory
- *     allocation in ->create(). At that point, we haven't yet allocated the
- *     css_id. Having a separate index prevents us from messing with the cgroup
- *     core for this
+ * The main reason for not using cgroup id for this:
+ *  this works better in sparse environments, where we have a lot of memcgs,
+ *  but only a few kmem-limited. Or also, if we have, for instance, 200
+ *  memcgs, and none but the 200th is kmem-limited, we'd have to have a
+ *  200 entry array for that.
   *
   * The current size of the caches array is stored in
   * memcg_limited_groups_array_size.  It will double each time we have to
@@ -594,14 +613,14 @@ int memcg_limited_groups_array_size;
   * cgroups is a reasonable guess. In the future, it could be a parameter or
   * tunable, but that is strictly not necessary.
   *
- * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get
+ * MAX_SIZE should be as large as the number of cgrp_ids. Ideally, we could get
   * this constant directly from cgroup, but it is understandable that this is
   * better kept as an internal representation in cgroup.c. In any case, the
- * css_id space is not getting any smaller, and we don't have to necessarily
+ * cgrp_id space is not getting any smaller, and we don't have to necessarily
   * increase ours as well if it increases.
   */
  #define MEMCG_CACHES_MIN_SIZE 4
-#define MEMCG_CACHES_MAX_SIZE 65535
+#define MEMCG_CACHES_MAX_SIZE MEM_CGROUP_ID_MAX
  
  /*
   * A lot of the calls to the cache allocation functions are expected to be
@@ -1408,7 +1427,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
                 return true;
         if (!root_memcg->use_hierarchy || !memcg)
                 return false;
-       return css_is_ancestor(&memcg->css, &root_memcg->css);
+       return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup);
  }
  
  static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
@@ -2826,15 +2845,10 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
   */
  static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
  {
-       struct cgroup_subsys_state *css;
-
         /* ID 0 is unused ID */
         if (!id)
                 return NULL;
-       css = css_lookup(&mem_cgroup_subsys, id);
-       if (!css)
-               return NULL;
-       return mem_cgroup_from_css(css);
+       return mem_cgroup_from_id(id);
  }
  
  struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
@@ -2955,7 +2969,7 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
  
         VM_BUG_ON(p->is_root_cache);
         cachep = p->root_cache;
-       return cachep->memcg_params->memcg_caches[memcg_cache_id(p->memcg)];
+       return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg));
  }
  
  #ifdef CONFIG_SLABINFO
@@ -2984,21 +2998,14 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
         struct res_counter *fail_res;
         struct mem_cgroup *_memcg;
         int ret = 0;
-       bool may_oom;
  
         ret = res_counter_charge(&memcg->kmem, size, &fail_res);
         if (ret)
                 return ret;
  
-       /*
-        * Conditions under which we can wait for the oom_killer. Those are
-        * the same conditions tested by the core page allocator
-        */
-       may_oom = (gfp & __GFP_FS) && !(gfp & __GFP_NORETRY);
-
         _memcg = memcg;
         ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT,
-                                     &_memcg, may_oom);
+                                     &_memcg, oom_gfp_allowed(gfp));
  
         if (ret == -EINTR)  {
                 /*
@@ -3138,7 +3145,7 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
  {
         struct memcg_cache_params *cur_params = s->memcg_params;
  
-       VM_BUG_ON(s->memcg_params && !s->memcg_params->is_root_cache);
+       VM_BUG_ON(!is_root_cache(s));
  
         if (num_groups > memcg_limited_groups_array_size) {
                 int i;
@@ -3399,7 +3406,7 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
         idx = memcg_cache_id(memcg);
  
         mutex_lock(&memcg_cache_mutex);
-       new_cachep = cachep->memcg_params->memcg_caches[idx];
+       new_cachep = cache_from_memcg_idx(cachep, idx);
         if (new_cachep) {
                 css_put(&memcg->css);
                 goto out;
@@ -3445,8 +3452,8 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
          * we'll take the set_limit_mutex to protect ourselves against this.
          */
         mutex_lock(&set_limit_mutex);
-       for (i = 0; i < memcg_limited_groups_array_size; i++) {
-               c = s->memcg_params->memcg_caches[i];
+       for_each_memcg_cache_index(i) {
+               c = cache_from_memcg_idx(s, i);
                 if (!c)
                         continue;
  
@@ -3579,8 +3586,8 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
          * code updating memcg_caches will issue a write barrier to match this.
          */
         read_barrier_depends();
-       if (likely(cachep->memcg_params->memcg_caches[idx])) {
-               cachep = cachep->memcg_params->memcg_caches[idx];
+       if (likely(cache_from_memcg_idx(cachep, idx))) {
+               cachep = cache_from_memcg_idx(cachep, idx);
                 goto out;
         }
  
@@ -4350,7 +4357,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
          * css_get() was called in uncharge().
          */
         if (do_swap_account && swapout && memcg)
-               swap_cgroup_record(ent, css_id(&memcg->css));
+               swap_cgroup_record(ent, mem_cgroup_id(memcg));
  }
  #endif
  
@@ -4402,8 +4409,8 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
  {
         unsigned short old_id, new_id;
  
-       old_id = css_id(&from->css);
-       new_id = css_id(&to->css);
+       old_id = mem_cgroup_id(from);
+       new_id = mem_cgroup_id(to);
  
         if (swap_cgroup_cmpxchg(entry, old_id, new_id) == old_id) {
                 mem_cgroup_swap_statistics(from, false);
@@ -5376,45 +5383,50 @@ static int mem_cgroup_move_charge_write(struct cgroup_subsys_state *css,
  static int memcg_numa_stat_show(struct cgroup_subsys_state *css,
                                 struct cftype *cft, struct seq_file *m)
  {
+       struct numa_stat {
+               const char *name;
+               unsigned int lru_mask;
+       };
+
+       static const struct numa_stat stats[] = {
+               { "total", LRU_ALL },
+               { "file", LRU_ALL_FILE },
+               { "anon", LRU_ALL_ANON },
+               { "unevictable", BIT(LRU_UNEVICTABLE) },
+       };
+       const struct numa_stat *stat;
         int nid;
-       unsigned long total_nr, file_nr, anon_nr, unevictable_nr;
-       unsigned long node_nr;
+       unsigned long nr;
         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
  
-       total_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL);
-       seq_printf(m, "total=%lu", total_nr);
-       for_each_node_state(nid, N_MEMORY) {
-               node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid, LRU_ALL);
-               seq_printf(m, " N%d=%lu", nid, node_nr);
-       }
-       seq_putc(m, '\n');
-
-       file_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_FILE);
-       seq_printf(m, "file=%lu", file_nr);
-       for_each_node_state(nid, N_MEMORY) {
-               node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
-                               LRU_ALL_FILE);
-               seq_printf(m, " N%d=%lu", nid, node_nr);
-       }
-       seq_putc(m, '\n');
-
-       anon_nr = mem_cgroup_nr_lru_pages(memcg, LRU_ALL_ANON);
-       seq_printf(m, "anon=%lu", anon_nr);
-       for_each_node_state(nid, N_MEMORY) {
-               node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
-                               LRU_ALL_ANON);
-               seq_printf(m, " N%d=%lu", nid, node_nr);
+       for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
+               nr = mem_cgroup_nr_lru_pages(memcg, stat->lru_mask);
+               seq_printf(m, "%s=%lu", stat->name, nr);
+               for_each_node_state(nid, N_MEMORY) {
+                       nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
+                                                         stat->lru_mask);
+                       seq_printf(m, " N%d=%lu", nid, nr);
+               }
+               seq_putc(m, '\n');
+       }
+
+       for (stat = stats; stat < stats + ARRAY_SIZE(stats); stat++) {
+               struct mem_cgroup *iter;
+
+               nr = 0;
+               for_each_mem_cgroup_tree(iter, memcg)
+                       nr += mem_cgroup_nr_lru_pages(iter, stat->lru_mask);
+               seq_printf(m, "hierarchical_%s=%lu", stat->name, nr);
+               for_each_node_state(nid, N_MEMORY) {
+                       nr = 0;
+                       for_each_mem_cgroup_tree(iter, memcg)
+                               nr += mem_cgroup_node_nr_lru_pages(
+                                       iter, nid, stat->lru_mask);
+                       seq_printf(m, " N%d=%lu", nid, nr);
+               }
+               seq_putc(m, '\n');
         }
-       seq_putc(m, '\n');
  
-       unevictable_nr = mem_cgroup_nr_lru_pages(memcg, BIT(LRU_UNEVICTABLE));
-       seq_printf(m, "unevictable=%lu", unevictable_nr);
-       for_each_node_state(nid, N_MEMORY) {
-               node_nr = mem_cgroup_node_nr_lru_pages(memcg, nid,
-                               BIT(LRU_UNEVICTABLE));
-               seq_printf(m, " N%d=%lu", nid, node_nr);
-       }
-       seq_putc(m, '\n');
         return 0;
  }
  #endif /* CONFIG_NUMA */
@@ -6166,7 +6178,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
         size_t size = memcg_size();
  
         mem_cgroup_remove_from_trees(memcg);
-       free_css_id(&mem_cgroup_subsys, &memcg->css);
  
         for_each_node(node)
                 free_mem_cgroup_per_zone_info(memcg, node);
@@ -6269,6 +6280,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
         struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
         int error = 0;
  
+       if (css->cgroup->id > MEM_CGROUP_ID_MAX)
+               return -ENOSPC;
+
         if (!parent)
                 return 0;
  
@@ -6540,7 +6554,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
         }
         /* There is a swap entry and a page doesn't exist or isn't charged */
         if (ent.val && !ret &&
-                       css_id(&mc.from->css) == lookup_swap_cgroup_id(ent)) {
+           mem_cgroup_id(mc.from) == lookup_swap_cgroup_id(ent)) {
                 ret = MC_TARGET_SWAP;
                 if (target)
                         target->ent = ent;
@@ -6960,7 +6974,6 @@ struct cgroup_subsys mem_cgroup_subsys = {
         .bind = mem_cgroup_bind,
         .base_cftypes = mem_cgroup_files,
         .early_init = 0,
-       .use_id = 1,
  };
  
  #ifdef CONFIG_MEMCG_SWAP