Merge branch 'for-3.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

[cascardo/linux.git] / kernel / cpuset.c
diff --git a/kernel/cpuset.c b/kernel/cpuset.c

index e6b1b66..e2dbb60 100644 (file)
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -119,7 +119,7 @@ static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
  /* Retrieve the cpuset for a task */
  static inline struct cpuset *task_cs(struct task_struct *task)
  {
-       return css_cs(task_css(task, cpuset_subsys_id));
+       return css_cs(task_css(task, cpuset_cgrp_id));
  }
  
  static inline struct cpuset *parent_cs(struct cpuset *cs)
@@ -467,7 +467,7 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
          * be changed to have empty cpus_allowed or mems_allowed.
          */
         ret = -ENOSPC;
-       if ((cgroup_task_count(cur->css.cgroup) || cur->attach_in_progress)) {
+       if ((cgroup_has_tasks(cur->css.cgroup) || cur->attach_in_progress)) {
                 if (!cpumask_empty(cur->cpus_allowed) &&
                     cpumask_empty(trial->cpus_allowed))
                         goto out;
@@ -828,56 +828,37 @@ static struct cpuset *effective_nodemask_cpuset(struct cpuset *cs)
         return cs;
  }
  
-/**
- * cpuset_change_cpumask - make a task's cpus_allowed the same as its cpuset's
- * @tsk: task to test
- * @data: cpuset to @tsk belongs to
- *
- * Called by css_scan_tasks() for each task in a cgroup whose cpus_allowed
- * mask needs to be changed.
- *
- * We don't need to re-check for the cgroup/cpuset membership, since we're
- * holding cpuset_mutex at this point.
- */
-static void cpuset_change_cpumask(struct task_struct *tsk, void *data)
-{
-       struct cpuset *cs = data;
-       struct cpuset *cpus_cs = effective_cpumask_cpuset(cs);
-
-       set_cpus_allowed_ptr(tsk, cpus_cs->cpus_allowed);
-}
-
  /**
   * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
   * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
- * @heap: if NULL, defer allocating heap memory to css_scan_tasks()
- *
- * Called with cpuset_mutex held
   *
- * The css_scan_tasks() function will scan all the tasks in a cgroup,
- * calling callback functions for each.
- *
- * No return value. It's guaranteed that css_scan_tasks() always returns 0
- * if @heap != NULL.
+ * Iterate through each task of @cs updating its cpus_allowed to the
+ * effective cpuset's.  As this function is called with cpuset_mutex held,
+ * cpuset membership stays stable.
   */
-static void update_tasks_cpumask(struct cpuset *cs, struct ptr_heap *heap)
+static void update_tasks_cpumask(struct cpuset *cs)
  {
-       css_scan_tasks(&cs->css, NULL, cpuset_change_cpumask, cs, heap);
+       struct cpuset *cpus_cs = effective_cpumask_cpuset(cs);
+       struct css_task_iter it;
+       struct task_struct *task;
+
+       css_task_iter_start(&cs->css, &it);
+       while ((task = css_task_iter_next(&it)))
+               set_cpus_allowed_ptr(task, cpus_cs->cpus_allowed);
+       css_task_iter_end(&it);
  }
  
  /*
   * update_tasks_cpumask_hier - Update the cpumasks of tasks in the hierarchy.
   * @root_cs: the root cpuset of the hierarchy
   * @update_root: update root cpuset or not?
- * @heap: the heap used by css_scan_tasks()
   *
   * This will update cpumasks of tasks in @root_cs and all other empty cpusets
   * which take on cpumask of @root_cs.
   *
   * Called with cpuset_mutex held
   */
-static void update_tasks_cpumask_hier(struct cpuset *root_cs,
-                                     bool update_root, struct ptr_heap *heap)
+static void update_tasks_cpumask_hier(struct cpuset *root_cs, bool update_root)
  {
         struct cpuset *cp;
         struct cgroup_subsys_state *pos_css;
@@ -898,7 +879,7 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs,
                         continue;
                 rcu_read_unlock();
  
-               update_tasks_cpumask(cp, heap);
+               update_tasks_cpumask(cp);
  
                 rcu_read_lock();
                 css_put(&cp->css);
@@ -914,7 +895,6 @@ static void update_tasks_cpumask_hier(struct cpuset *root_cs,
  static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
                           const char *buf)
  {
-       struct ptr_heap heap;
         int retval;
         int is_load_balanced;
  
@@ -947,19 +927,13 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
         if (retval < 0)
                 return retval;
  
-       retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
-       if (retval)
-               return retval;
-
         is_load_balanced = is_sched_load_balance(trialcs);
  
         mutex_lock(&callback_mutex);
         cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
         mutex_unlock(&callback_mutex);
  
-       update_tasks_cpumask_hier(cs, true, &heap);
-
-       heap_free(&heap);
+       update_tasks_cpumask_hier(cs, true);
  
         if (is_load_balanced)
                 rebuild_sched_domains_locked();
@@ -1048,53 +1022,22 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
         task_unlock(tsk);
  }
  
-struct cpuset_change_nodemask_arg {
-       struct cpuset           *cs;
-       nodemask_t              *newmems;
-};
-
-/*
- * Update task's mems_allowed and rebind its mempolicy and vmas' mempolicy
- * of it to cpuset's new mems_allowed, and migrate pages to new nodes if
- * memory_migrate flag is set. Called with cpuset_mutex held.
- */
-static void cpuset_change_nodemask(struct task_struct *p, void *data)
-{
-       struct cpuset_change_nodemask_arg *arg = data;
-       struct cpuset *cs = arg->cs;
-       struct mm_struct *mm;
-       int migrate;
-
-       cpuset_change_task_nodemask(p, arg->newmems);
-
-       mm = get_task_mm(p);
-       if (!mm)
-               return;
-
-       migrate = is_memory_migrate(cs);
-
-       mpol_rebind_mm(mm, &cs->mems_allowed);
-       if (migrate)
-               cpuset_migrate_mm(mm, &cs->old_mems_allowed, arg->newmems);
-       mmput(mm);
-}
-
  static void *cpuset_being_rebound;
  
  /**
   * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
   * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
- * @heap: if NULL, defer allocating heap memory to css_scan_tasks()
   *
- * Called with cpuset_mutex held.  No return value. It's guaranteed that
- * css_scan_tasks() always returns 0 if @heap != NULL.
+ * Iterate through each task of @cs updating its mems_allowed to the
+ * effective cpuset's.  As this function is called with cpuset_mutex held,
+ * cpuset membership stays stable.
   */
-static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap)
+static void update_tasks_nodemask(struct cpuset *cs)
  {
         static nodemask_t newmems;      /* protected by cpuset_mutex */
         struct cpuset *mems_cs = effective_nodemask_cpuset(cs);
-       struct cpuset_change_nodemask_arg arg = { .cs = cs,
-                                                 .newmems = &newmems };
+       struct css_task_iter it;
+       struct task_struct *task;
  
         cpuset_being_rebound = cs;              /* causes mpol_dup() rebind */
  
@@ -1110,7 +1053,25 @@ static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap)
          * It's ok if we rebind the same mm twice; mpol_rebind_mm()
          * is idempotent.  Also migrate pages in each mm to new nodes.
          */
-       css_scan_tasks(&cs->css, NULL, cpuset_change_nodemask, &arg, heap);
+       css_task_iter_start(&cs->css, &it);
+       while ((task = css_task_iter_next(&it))) {
+               struct mm_struct *mm;
+               bool migrate;
+
+               cpuset_change_task_nodemask(task, &newmems);
+
+               mm = get_task_mm(task);
+               if (!mm)
+                       continue;
+
+               migrate = is_memory_migrate(cs);
+
+               mpol_rebind_mm(mm, &cs->mems_allowed);
+               if (migrate)
+                       cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
+               mmput(mm);
+       }
+       css_task_iter_end(&it);
  
         /*
          * All the tasks' nodemasks have been updated, update
@@ -1126,15 +1087,13 @@ static void update_tasks_nodemask(struct cpuset *cs, struct ptr_heap *heap)
   * update_tasks_nodemask_hier - Update the nodemasks of tasks in the hierarchy.
   * @cs: the root cpuset of the hierarchy
   * @update_root: update the root cpuset or not?
- * @heap: the heap used by css_scan_tasks()
   *
   * This will update nodemasks of tasks in @root_cs and all other empty cpusets
   * which take on nodemask of @root_cs.
   *
   * Called with cpuset_mutex held
   */
-static void update_tasks_nodemask_hier(struct cpuset *root_cs,
-                                      bool update_root, struct ptr_heap *heap)
+static void update_tasks_nodemask_hier(struct cpuset *root_cs, bool update_root)
  {
         struct cpuset *cp;
         struct cgroup_subsys_state *pos_css;
@@ -1155,7 +1114,7 @@ static void update_tasks_nodemask_hier(struct cpuset *root_cs,
                         continue;
                 rcu_read_unlock();
  
-               update_tasks_nodemask(cp, heap);
+               update_tasks_nodemask(cp);
  
                 rcu_read_lock();
                 css_put(&cp->css);
@@ -1180,7 +1139,6 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
                            const char *buf)
  {
         int retval;
-       struct ptr_heap heap;
  
         /*
          * top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
@@ -1219,17 +1177,11 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
         if (retval < 0)
                 goto done;
  
-       retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
-       if (retval < 0)
-               goto done;
-
         mutex_lock(&callback_mutex);
         cs->mems_allowed = trialcs->mems_allowed;
         mutex_unlock(&callback_mutex);
  
-       update_tasks_nodemask_hier(cs, true, &heap);
-
-       heap_free(&heap);
+       update_tasks_nodemask_hier(cs, true);
  done:
         return retval;
  }
@@ -1256,39 +1208,23 @@ static int update_relax_domain_level(struct cpuset *cs, s64 val)
         return 0;
  }
  
-/**
- * cpuset_change_flag - make a task's spread flags the same as its cpuset's
- * @tsk: task to be updated
- * @data: cpuset to @tsk belongs to
- *
- * Called by css_scan_tasks() for each task in a cgroup.
- *
- * We don't need to re-check for the cgroup/cpuset membership, since we're
- * holding cpuset_mutex at this point.
- */
-static void cpuset_change_flag(struct task_struct *tsk, void *data)
-{
-       struct cpuset *cs = data;
-
-       cpuset_update_task_spread_flag(cs, tsk);
-}
-
  /**
   * update_tasks_flags - update the spread flags of tasks in the cpuset.
   * @cs: the cpuset in which each task's spread flags needs to be changed
- * @heap: if NULL, defer allocating heap memory to css_scan_tasks()
- *
- * Called with cpuset_mutex held
   *
- * The css_scan_tasks() function will scan all the tasks in a cgroup,
- * calling callback functions for each.
- *
- * No return value. It's guaranteed that css_scan_tasks() always returns 0
- * if @heap != NULL.
+ * Iterate through each task of @cs updating its spread flags.  As this
+ * function is called with cpuset_mutex held, cpuset membership stays
+ * stable.
   */
-static void update_tasks_flags(struct cpuset *cs, struct ptr_heap *heap)
+static void update_tasks_flags(struct cpuset *cs)
  {
-       css_scan_tasks(&cs->css, NULL, cpuset_change_flag, cs, heap);
+       struct css_task_iter it;
+       struct task_struct *task;
+
+       css_task_iter_start(&cs->css, &it);
+       while ((task = css_task_iter_next(&it)))
+               cpuset_update_task_spread_flag(cs, task);
+       css_task_iter_end(&it);
  }
  
  /*
@@ -1306,7 +1242,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
         struct cpuset *trialcs;
         int balance_flag_changed;
         int spread_flag_changed;
-       struct ptr_heap heap;
         int err;
  
         trialcs = alloc_trial_cpuset(cs);
@@ -1322,10 +1257,6 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
         if (err < 0)
                 goto out;
  
-       err = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
-       if (err < 0)
-               goto out;
-
         balance_flag_changed = (is_sched_load_balance(cs) !=
                                 is_sched_load_balance(trialcs));
  
@@ -1340,8 +1271,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
                 rebuild_sched_domains_locked();
  
         if (spread_flag_changed)
-               update_tasks_flags(cs, &heap);
-       heap_free(&heap);
+               update_tasks_flags(cs);
  out:
         free_trial_cpuset(trialcs);
         return err;
@@ -1445,6 +1375,8 @@ static int fmeter_getrate(struct fmeter *fmp)
         return val;
  }
  
+static struct cpuset *cpuset_attach_old_cs;
+
  /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
  static int cpuset_can_attach(struct cgroup_subsys_state *css,
                              struct cgroup_taskset *tset)
@@ -1453,6 +1385,9 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
         struct task_struct *task;
         int ret;
  
+       /* used later by cpuset_attach() */
+       cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset));
+
         mutex_lock(&cpuset_mutex);
  
         /*
@@ -1464,7 +1399,7 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
             (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
                 goto out_unlock;
  
-       cgroup_taskset_for_each(task, css, tset) {
+       cgroup_taskset_for_each(task, tset) {
                 /*
                  * Kthreads which disallow setaffinity shouldn't be moved
                  * to a new cpuset; we don't want to change their cpu
@@ -1516,10 +1451,8 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
         struct mm_struct *mm;
         struct task_struct *task;
         struct task_struct *leader = cgroup_taskset_first(tset);
-       struct cgroup_subsys_state *oldcss = cgroup_taskset_cur_css(tset,
-                                                       cpuset_subsys_id);
         struct cpuset *cs = css_cs(css);
-       struct cpuset *oldcs = css_cs(oldcss);
+       struct cpuset *oldcs = cpuset_attach_old_cs;
         struct cpuset *cpus_cs = effective_cpumask_cpuset(cs);
         struct cpuset *mems_cs = effective_nodemask_cpuset(cs);
  
@@ -1533,7 +1466,7 @@ static void cpuset_attach(struct cgroup_subsys_state *css,
  
         guarantee_online_mems(mems_cs, &cpuset_attach_nodemask_to);
  
-       cgroup_taskset_for_each(task, css, tset) {
+       cgroup_taskset_for_each(task, tset) {
                 /*
                  * can_attach beforehand should guarantee that this doesn't
                  * fail.  TODO: have a better way to handle failure here
@@ -1673,7 +1606,7 @@ out_unlock:
   * Common handling for a write to a "cpus" or "mems" file.
   */
  static int cpuset_write_resmask(struct cgroup_subsys_state *css,
-                               struct cftype *cft, const char *buf)
+                               struct cftype *cft, char *buf)
  {
         struct cpuset *cs = css_cs(css);
         struct cpuset *trialcs;
@@ -2020,8 +1953,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css)
         kfree(cs);
  }
  
-struct cgroup_subsys cpuset_subsys = {
-       .name = "cpuset",
+struct cgroup_subsys cpuset_cgrp_subsys = {
         .css_alloc = cpuset_css_alloc,
         .css_online = cpuset_css_online,
         .css_offline = cpuset_css_offline,
@@ -2029,7 +1961,6 @@ struct cgroup_subsys cpuset_subsys = {
         .can_attach = cpuset_can_attach,
         .cancel_attach = cpuset_cancel_attach,
         .attach = cpuset_attach,
-       .subsys_id = cpuset_subsys_id,
         .base_cftypes = files,
         .early_init = 1,
  };
@@ -2086,10 +2017,9 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
                 parent = parent_cs(parent);
  
         if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
-               rcu_read_lock();
-               printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset %s\n",
-                      cgroup_name(cs->css.cgroup));
-               rcu_read_unlock();
+               printk(KERN_ERR "cpuset: failed to transfer tasks out of empty cpuset ");
+               pr_cont_cgroup_name(cs->css.cgroup);
+               pr_cont("\n");
         }
  }
  
@@ -2137,7 +2067,7 @@ retry:
          */
         if ((sane && cpumask_empty(cs->cpus_allowed)) ||
             (!cpumask_empty(&off_cpus) && !cpumask_empty(cs->cpus_allowed)))
-               update_tasks_cpumask(cs, NULL);
+               update_tasks_cpumask(cs);
  
         mutex_lock(&callback_mutex);
         nodes_andnot(cs->mems_allowed, cs->mems_allowed, off_mems);
@@ -2151,7 +2081,7 @@ retry:
          */
         if ((sane && nodes_empty(cs->mems_allowed)) ||
             (!nodes_empty(off_mems) && !nodes_empty(cs->mems_allowed)))
-               update_tasks_nodemask(cs, NULL);
+               update_tasks_nodemask(cs);
  
         is_empty = cpumask_empty(cs->cpus_allowed) ||
                 nodes_empty(cs->mems_allowed);
@@ -2213,7 +2143,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
                 mutex_lock(&callback_mutex);
                 top_cpuset.mems_allowed = new_mems;
                 mutex_unlock(&callback_mutex);
-               update_tasks_nodemask(&top_cpuset, NULL);
+               update_tasks_nodemask(&top_cpuset);
         }
  
         mutex_unlock(&cpuset_mutex);
@@ -2305,10 +2235,10 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
         struct cpuset *cpus_cs;
  
         mutex_lock(&callback_mutex);
-       task_lock(tsk);
+       rcu_read_lock();
         cpus_cs = effective_cpumask_cpuset(task_cs(tsk));
         guarantee_online_cpus(cpus_cs, pmask);
-       task_unlock(tsk);
+       rcu_read_unlock();
         mutex_unlock(&callback_mutex);
  }
  
@@ -2361,10 +2291,10 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
         nodemask_t mask;
  
         mutex_lock(&callback_mutex);
-       task_lock(tsk);
+       rcu_read_lock();
         mems_cs = effective_nodemask_cpuset(task_cs(tsk));
         guarantee_online_mems(mems_cs, &mask);
-       task_unlock(tsk);
+       rcu_read_unlock();
         mutex_unlock(&callback_mutex);
  
         return mask;
@@ -2480,10 +2410,10 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
         /* Not hardwall and node outside mems_allowed: scan up cpusets */
         mutex_lock(&callback_mutex);
  
-       task_lock(current);
+       rcu_read_lock();
         cs = nearest_hardwall_ancestor(task_cs(current));
         allowed = node_isset(node, cs->mems_allowed);
-       task_unlock(current);
+       rcu_read_unlock();
  
         mutex_unlock(&callback_mutex);
         return allowed;
@@ -2609,27 +2539,27 @@ int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
   * @task: pointer to task_struct of some task.
   *
   * Description: Prints @task's name, cpuset name, and cached copy of its
- * mems_allowed to the kernel log.  Must hold task_lock(task) to allow
- * dereferencing task_cs(task).
+ * mems_allowed to the kernel log.
   */
  void cpuset_print_task_mems_allowed(struct task_struct *tsk)
  {
          /* Statically allocated to prevent using excess stack. */
         static char cpuset_nodelist[CPUSET_NODELIST_LEN];
         static DEFINE_SPINLOCK(cpuset_buffer_lock);
+       struct cgroup *cgrp;
  
-       struct cgroup *cgrp = task_cs(tsk)->css.cgroup;
-
-       rcu_read_lock();
         spin_lock(&cpuset_buffer_lock);
+       rcu_read_lock();
  
+       cgrp = task_cs(tsk)->css.cgroup;
         nodelist_scnprintf(cpuset_nodelist, CPUSET_NODELIST_LEN,
                            tsk->mems_allowed);
-       printk(KERN_INFO "%s cpuset=%s mems_allowed=%s\n",
-              tsk->comm, cgroup_name(cgrp), cpuset_nodelist);
+       printk(KERN_INFO "%s cpuset=", tsk->comm);
+       pr_cont_cgroup_name(cgrp);
+       pr_cont(" mems_allowed=%s\n", cpuset_nodelist);
  
-       spin_unlock(&cpuset_buffer_lock);
         rcu_read_unlock();
+       spin_unlock(&cpuset_buffer_lock);
  }
  
  /*
@@ -2660,9 +2590,9 @@ int cpuset_memory_pressure_enabled __read_mostly;
  
  void __cpuset_memory_pressure_bump(void)
  {
-       task_lock(current);
+       rcu_read_lock();
         fmeter_markevent(&task_cs(current)->fmeter);
-       task_unlock(current);
+       rcu_read_unlock();
  }
  
  #ifdef CONFIG_PROC_PID_CPUSET
@@ -2679,12 +2609,12 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
  {
         struct pid *pid;
         struct task_struct *tsk;
-       char *buf;
+       char *buf, *p;
         struct cgroup_subsys_state *css;
         int retval;
  
         retval = -ENOMEM;
-       buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       buf = kmalloc(PATH_MAX, GFP_KERNEL);
         if (!buf)
                 goto out;
  
@@ -2694,14 +2624,16 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
         if (!tsk)
                 goto out_free;
  
+       retval = -ENAMETOOLONG;
         rcu_read_lock();
-       css = task_css(tsk, cpuset_subsys_id);
-       retval = cgroup_path(css->cgroup, buf, PAGE_SIZE);
+       css = task_css(tsk, cpuset_cgrp_id);
+       p = cgroup_path(css->cgroup, buf, PATH_MAX);
         rcu_read_unlock();
-       if (retval < 0)
+       if (!p)
                 goto out_put_task;
-       seq_puts(m, buf);
+       seq_puts(m, p);
         seq_putc(m, '\n');
+       retval = 0;
  out_put_task:
         put_task_struct(tsk);
  out_free: