Merge branch 'for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

[cascardo/linux.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 34cb9f7..44253ad 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -731,7 +731,7 @@ bool sched_can_stop_tick(void)
         if (current->policy == SCHED_RR) {
                 struct sched_rt_entity *rt_se = &current->rt;
  
-               return rt_se->run_list.prev == rt_se->run_list.next;
+               return list_is_singular(&rt_se->run_list);
         }
  
         /*
@@ -823,8 +823,8 @@ static void set_load_weight(struct task_struct *p)
                 return;
         }
  
-       load->weight = scale_load(prio_to_weight[prio]);
-       load->inv_weight = prio_to_wmult[prio];
+       load->weight = scale_load(sched_prio_to_weight[prio]);
+       load->inv_weight = sched_prio_to_wmult[prio];
  }
  
  static inline void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
@@ -1071,8 +1071,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
  {
         lockdep_assert_held(&rq->lock);
  
-       dequeue_task(rq, p, 0);
         p->on_rq = TASK_ON_RQ_MIGRATING;
+       dequeue_task(rq, p, 0);
         set_task_cpu(p, new_cpu);
         raw_spin_unlock(&rq->lock);
  
@@ -1080,8 +1080,8 @@ static struct rq *move_queued_task(struct rq *rq, struct task_struct *p, int new
  
         raw_spin_lock(&rq->lock);
         BUG_ON(task_cpu(p) != new_cpu);
-       p->on_rq = TASK_ON_RQ_QUEUED;
         enqueue_task(rq, p, 0);
+       p->on_rq = TASK_ON_RQ_QUEUED;
         check_preempt_curr(rq, p, 0);
  
         return rq;
@@ -1274,6 +1274,15 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
         WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
                         !p->on_rq);
  
+       /*
+        * Migrating fair class task must have p->on_rq = TASK_ON_RQ_MIGRATING,
+        * because schedstat_wait_{start,end} rebase migrating task's wait_start
+        * time relying on p->on_rq.
+        */
+       WARN_ON_ONCE(p->state == TASK_RUNNING &&
+                    p->sched_class == &fair_sched_class &&
+                    (p->on_rq && !task_on_rq_migrating(p)));
+
  #ifdef CONFIG_LOCKDEP
         /*
          * The caller should hold either p->pi_lock or rq->lock, when changing
@@ -1310,9 +1319,11 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
                 src_rq = task_rq(p);
                 dst_rq = cpu_rq(cpu);
  
+               p->on_rq = TASK_ON_RQ_MIGRATING;
                 deactivate_task(src_rq, p, 0);
                 set_task_cpu(p, cpu);
                 activate_task(dst_rq, p, 0);
+               p->on_rq = TASK_ON_RQ_QUEUED;
                 check_preempt_curr(dst_rq, p, 0);
         } else {
                 /*
@@ -2194,6 +2205,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
         p->se.vruntime                  = 0;
         INIT_LIST_HEAD(&p->se.group_node);
  
+#ifdef CONFIG_FAIR_GROUP_SCHED
+       p->se.cfs_rq                    = NULL;
+#endif
+
  #ifdef CONFIG_SCHEDSTATS
         memset(&p->se.statistics, 0, sizeof(p->se.statistics));
  #endif
@@ -7442,6 +7457,9 @@ int in_sched_functions(unsigned long addr)
   */
  struct task_group root_task_group;
  LIST_HEAD(task_groups);
+
+/* Cacheline aligned slab cache for task_group */
+static struct kmem_cache *task_group_cache __read_mostly;
  #endif
  
  DECLARE_PER_CPU(cpumask_var_t, load_balance_mask);
@@ -7499,11 +7517,12 @@ void __init sched_init(void)
  #endif /* CONFIG_RT_GROUP_SCHED */
  
  #ifdef CONFIG_CGROUP_SCHED
+       task_group_cache = KMEM_CACHE(task_group, 0);
+
         list_add(&root_task_group.list, &task_groups);
         INIT_LIST_HEAD(&root_task_group.children);
         INIT_LIST_HEAD(&root_task_group.siblings);
         autogroup_init(&init_task);
-
  #endif /* CONFIG_CGROUP_SCHED */
  
         for_each_possible_cpu(i) {
@@ -7784,7 +7803,7 @@ static void free_sched_group(struct task_group *tg)
         free_fair_sched_group(tg);
         free_rt_sched_group(tg);
         autogroup_free(tg);
-       kfree(tg);
+       kmem_cache_free(task_group_cache, tg);
  }
  
  /* allocate runqueue etc for a new task group */
@@ -7792,7 +7811,7 @@ struct task_group *sched_create_group(struct task_group *parent)
  {
         struct task_group *tg;
  
-       tg = kzalloc(sizeof(*tg), GFP_KERNEL);
+       tg = kmem_cache_alloc(task_group_cache, GFP_KERNEL | __GFP_ZERO);
         if (!tg)
                 return ERR_PTR(-ENOMEM);
  
@@ -8323,7 +8342,7 @@ static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
         sched_offline_group(tg);
  }
  
-static void cpu_cgroup_fork(struct task_struct *task, void *private)
+static void cpu_cgroup_fork(struct task_struct *task)
  {
         sched_move_task(task);
  }
@@ -8697,3 +8716,44 @@ void dump_cpu_task(int cpu)
         pr_info("Task dump for CPU %d:\n", cpu);
         sched_show_task(cpu_curr(cpu));
  }
+
+/*
+ * Nice levels are multiplicative, with a gentle 10% change for every
+ * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
+ * nice 1, it will get ~10% less CPU time than another CPU-bound task
+ * that remained on nice 0.
+ *
+ * The "10% effect" is relative and cumulative: from _any_ nice level,
+ * if you go up 1 level, it's -10% CPU usage, if you go down 1 level
+ * it's +10% CPU usage. (to achieve that we use a multiplier of 1.25.
+ * If a task goes up by ~10% and another task goes down by ~10% then
+ * the relative distance between them is ~25%.)
+ */
+const int sched_prio_to_weight[40] = {
+ /* -20 */     88761,     71755,     56483,     46273,     36291,
+ /* -15 */     29154,     23254,     18705,     14949,     11916,
+ /* -10 */      9548,      7620,      6100,      4904,      3906,
+ /*  -5 */      3121,      2501,      1991,      1586,      1277,
+ /*   0 */      1024,       820,       655,       526,       423,
+ /*   5 */       335,       272,       215,       172,       137,
+ /*  10 */       110,        87,        70,        56,        45,
+ /*  15 */        36,        29,        23,        18,        15,
+};
+
+/*
+ * Inverse (2^32/x) values of the sched_prio_to_weight[] array, precalculated.
+ *
+ * In cases where the weight does not change often, we can use the
+ * precalculated inverse to speed up arithmetics by turning divisions
+ * into multiplications:
+ */
+const u32 sched_prio_to_wmult[40] = {
+ /* -20 */     48388,     59856,     76040,     92818,    118348,
+ /* -15 */    147320,    184698,    229616,    287308,    360437,
+ /* -10 */    449829,    563644,    704093,    875809,   1099582,
+ /*  -5 */   1376151,   1717300,   2157191,   2708050,   3363326,
+ /*   0 */   4194304,   5237765,   6557202,   8165337,  10153587,
+ /*   5 */  12820798,  15790321,  19976592,  24970740,  31350126,
+ /*  10 */  39045157,  49367440,  61356676,  76695844,  95443717,
+ /*  15 */ 119304647, 148102320, 186737708, 238609294, 286331153,
+};