Merge branch 'for-4.6' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
[cascardo/linux.git] / kernel / sched / core.c
index 0f5abc6..4ee3ce7 100644 (file)
@@ -26,6 +26,7 @@
  *              Thomas Gleixner, Mike Kravetz
  */
 
+#include <linux/kasan.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/nmi.h>
 #include <linux/pagemap.h>
 #include <linux/hrtimer.h>
 #include <linux/tick.h>
-#include <linux/debugfs.h>
 #include <linux/ctype.h>
 #include <linux/ftrace.h>
 #include <linux/slab.h>
 #include <linux/init_task.h>
-#include <linux/binfmts.h>
 #include <linux/context_tracking.h>
 #include <linux/compiler.h>
 
@@ -124,138 +123,6 @@ const_debug unsigned int sysctl_sched_features =
 
 #undef SCHED_FEAT
 
-#ifdef CONFIG_SCHED_DEBUG
-#define SCHED_FEAT(name, enabled)      \
-       #name ,
-
-static const char * const sched_feat_names[] = {
-#include "features.h"
-};
-
-#undef SCHED_FEAT
-
-static int sched_feat_show(struct seq_file *m, void *v)
-{
-       int i;
-
-       for (i = 0; i < __SCHED_FEAT_NR; i++) {
-               if (!(sysctl_sched_features & (1UL << i)))
-                       seq_puts(m, "NO_");
-               seq_printf(m, "%s ", sched_feat_names[i]);
-       }
-       seq_puts(m, "\n");
-
-       return 0;
-}
-
-#ifdef HAVE_JUMP_LABEL
-
-#define jump_label_key__true  STATIC_KEY_INIT_TRUE
-#define jump_label_key__false STATIC_KEY_INIT_FALSE
-
-#define SCHED_FEAT(name, enabled)      \
-       jump_label_key__##enabled ,
-
-struct static_key sched_feat_keys[__SCHED_FEAT_NR] = {
-#include "features.h"
-};
-
-#undef SCHED_FEAT
-
-static void sched_feat_disable(int i)
-{
-       static_key_disable(&sched_feat_keys[i]);
-}
-
-static void sched_feat_enable(int i)
-{
-       static_key_enable(&sched_feat_keys[i]);
-}
-#else
-static void sched_feat_disable(int i) { };
-static void sched_feat_enable(int i) { };
-#endif /* HAVE_JUMP_LABEL */
-
-static int sched_feat_set(char *cmp)
-{
-       int i;
-       int neg = 0;
-
-       if (strncmp(cmp, "NO_", 3) == 0) {
-               neg = 1;
-               cmp += 3;
-       }
-
-       for (i = 0; i < __SCHED_FEAT_NR; i++) {
-               if (strcmp(cmp, sched_feat_names[i]) == 0) {
-                       if (neg) {
-                               sysctl_sched_features &= ~(1UL << i);
-                               sched_feat_disable(i);
-                       } else {
-                               sysctl_sched_features |= (1UL << i);
-                               sched_feat_enable(i);
-                       }
-                       break;
-               }
-       }
-
-       return i;
-}
-
-static ssize_t
-sched_feat_write(struct file *filp, const char __user *ubuf,
-               size_t cnt, loff_t *ppos)
-{
-       char buf[64];
-       char *cmp;
-       int i;
-       struct inode *inode;
-
-       if (cnt > 63)
-               cnt = 63;
-
-       if (copy_from_user(&buf, ubuf, cnt))
-               return -EFAULT;
-
-       buf[cnt] = 0;
-       cmp = strstrip(buf);
-
-       /* Ensure the static_key remains in a consistent state */
-       inode = file_inode(filp);
-       mutex_lock(&inode->i_mutex);
-       i = sched_feat_set(cmp);
-       mutex_unlock(&inode->i_mutex);
-       if (i == __SCHED_FEAT_NR)
-               return -EINVAL;
-
-       *ppos += cnt;
-
-       return cnt;
-}
-
-static int sched_feat_open(struct inode *inode, struct file *filp)
-{
-       return single_open(filp, sched_feat_show, NULL);
-}
-
-static const struct file_operations sched_feat_fops = {
-       .open           = sched_feat_open,
-       .write          = sched_feat_write,
-       .read           = seq_read,
-       .llseek         = seq_lseek,
-       .release        = single_release,
-};
-
-static __init int sched_init_debug(void)
-{
-       debugfs_create_file("sched_features", 0644, NULL, NULL,
-                       &sched_feat_fops);
-
-       return 0;
-}
-late_initcall(sched_init_debug);
-#endif /* CONFIG_SCHED_DEBUG */
-
 /*
  * Number of tasks to iterate in a single balance run.
  * Limited because this is done with IRQs disabled.
@@ -453,20 +320,6 @@ static inline void init_hrtick(void)
 }
 #endif /* CONFIG_SCHED_HRTICK */
 
-/*
- * cmpxchg based fetch_or, macro so it works for different integer types
- */
-#define fetch_or(ptr, val)                                             \
-({     typeof(*(ptr)) __old, __val = *(ptr);                           \
-       for (;;) {                                                      \
-               __old = cmpxchg((ptr), __val, __val | (val));           \
-               if (__old == __val)                                     \
-                       break;                                          \
-               __val = __old;                                          \
-       }                                                               \
-       __old;                                                          \
-})
-
 #if defined(CONFIG_SMP) && defined(TIF_POLLING_NRFLAG)
 /*
  * Atomically set TIF_NEED_RESCHED and test for TIF_POLLING_NRFLAG,
@@ -715,31 +568,36 @@ static inline bool got_nohz_idle_kick(void)
 #endif /* CONFIG_NO_HZ_COMMON */
 
 #ifdef CONFIG_NO_HZ_FULL
-bool sched_can_stop_tick(void)
+bool sched_can_stop_tick(struct rq *rq)
 {
+       int fifo_nr_running;
+
+       /* Deadline tasks, even if single, need the tick */
+       if (rq->dl.dl_nr_running)
+               return false;
+
        /*
-        * FIFO realtime policy runs the highest priority task. Other runnable
-        * tasks are of a lower priority. The scheduler tick does nothing.
+        * FIFO realtime policy runs the highest priority task (after DEADLINE).
+        * Other runnable tasks are of a lower priority. The scheduler tick
+        * isn't needed.
         */
-       if (current->policy == SCHED_FIFO)
+       fifo_nr_running = rq->rt.rt_nr_running - rq->rt.rr_nr_running;
+       if (fifo_nr_running)
                return true;
 
        /*
         * Round-robin realtime tasks time slice with other tasks at the same
-        * realtime priority. Is this task the only one at this priority?
+        * realtime priority.
         */
-       if (current->policy == SCHED_RR) {
-               struct sched_rt_entity *rt_se = &current->rt;
-
-               return list_is_singular(&rt_se->run_list);
+       if (rq->rt.rr_nr_running) {
+               if (rq->rt.rr_nr_running == 1)
+                       return true;
+               else
+                       return false;
        }
 
-       /*
-        * More than one running task need preemption.
-        * nr_running update is assumed to be visible
-        * after IPI is sent from wakers.
-        */
-       if (this_rq()->nr_running > 1)
+       /* Normal multitasking need periodic preemption checks */
+       if (rq->cfs.nr_running > 1)
                return false;
 
        return true;
@@ -2093,7 +1951,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 
        ttwu_queue(p, cpu);
 stat:
-       ttwu_stat(p, cpu, wake_flags);
+       if (schedstat_enabled())
+               ttwu_stat(p, cpu, wake_flags);
 out:
        raw_spin_unlock_irqrestore(&p->pi_lock, flags);
 
@@ -2141,7 +2000,8 @@ static void try_to_wake_up_local(struct task_struct *p)
                ttwu_activate(rq, p, ENQUEUE_WAKEUP);
 
        ttwu_do_wakeup(rq, p, 0);
-       ttwu_stat(p, smp_processor_id(), 0);
+       if (schedstat_enabled())
+               ttwu_stat(p, smp_processor_id(), 0);
 out:
        raw_spin_unlock(&p->pi_lock);
 }
@@ -2183,7 +2043,6 @@ void __dl_clear_params(struct task_struct *p)
        dl_se->dl_bw = 0;
 
        dl_se->dl_throttled = 0;
-       dl_se->dl_new = 1;
        dl_se->dl_yielded = 0;
 }
 
@@ -2210,6 +2069,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 #endif
 
 #ifdef CONFIG_SCHEDSTATS
+       /* Even if schedstat is disabled, there should not be garbage */
        memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
 
@@ -2218,6 +2078,10 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
        __dl_clear_params(p);
 
        INIT_LIST_HEAD(&p->rt.run_list);
+       p->rt.timeout           = 0;
+       p->rt.time_slice        = sched_rr_timeslice;
+       p->rt.on_rq             = 0;
+       p->rt.on_list           = 0;
 
 #ifdef CONFIG_PREEMPT_NOTIFIERS
        INIT_HLIST_HEAD(&p->preempt_notifiers);
@@ -2281,6 +2145,69 @@ int sysctl_numa_balancing(struct ctl_table *table, int write,
 #endif
 #endif
 
+DEFINE_STATIC_KEY_FALSE(sched_schedstats);
+
+#ifdef CONFIG_SCHEDSTATS
+static void set_schedstats(bool enabled)
+{
+       if (enabled)
+               static_branch_enable(&sched_schedstats);
+       else
+               static_branch_disable(&sched_schedstats);
+}
+
+void force_schedstat_enabled(void)
+{
+       if (!schedstat_enabled()) {
+               pr_info("kernel profiling enabled schedstats, disable via kernel.sched_schedstats.\n");
+               static_branch_enable(&sched_schedstats);
+       }
+}
+
+static int __init setup_schedstats(char *str)
+{
+       int ret = 0;
+       if (!str)
+               goto out;
+
+       if (!strcmp(str, "enable")) {
+               set_schedstats(true);
+               ret = 1;
+       } else if (!strcmp(str, "disable")) {
+               set_schedstats(false);
+               ret = 1;
+       }
+out:
+       if (!ret)
+               pr_warn("Unable to parse schedstats=\n");
+
+       return ret;
+}
+__setup("schedstats=", setup_schedstats);
+
+#ifdef CONFIG_PROC_SYSCTL
+int sysctl_schedstats(struct ctl_table *table, int write,
+                        void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       struct ctl_table t;
+       int err;
+       int state = static_branch_likely(&sched_schedstats);
+
+       if (write && !capable(CAP_SYS_ADMIN))
+               return -EPERM;
+
+       t = *table;
+       t.data = &state;
+       err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos);
+       if (err < 0)
+               return err;
+       if (write)
+               set_schedstats(state);
+       return err;
+}
+#endif
+#endif
+
 /*
  * fork()/clone()-time setup:
  */
@@ -3010,16 +2937,6 @@ u64 scheduler_tick_max_deferment(void)
 }
 #endif
 
-notrace unsigned long get_parent_ip(unsigned long addr)
-{
-       if (in_lock_functions(addr)) {
-               addr = CALLER_ADDR2;
-               if (in_lock_functions(addr))
-                       addr = CALLER_ADDR3;
-       }
-       return addr;
-}
-
 #if defined(CONFIG_PREEMPT) && (defined(CONFIG_DEBUG_PREEMPT) || \
                                defined(CONFIG_PREEMPT_TRACER))
 
@@ -3041,7 +2958,7 @@ void preempt_count_add(int val)
                                PREEMPT_MASK - 10);
 #endif
        if (preempt_count() == val) {
-               unsigned long ip = get_parent_ip(CALLER_ADDR1);
+               unsigned long ip = get_lock_parent_ip();
 #ifdef CONFIG_DEBUG_PREEMPT
                current->preempt_disable_ip = ip;
 #endif
@@ -3068,7 +2985,7 @@ void preempt_count_sub(int val)
 #endif
 
        if (preempt_count() == val)
-               trace_preempt_on(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
+               trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
        __preempt_count_sub(val);
 }
 EXPORT_SYMBOL(preempt_count_sub);
@@ -3257,7 +3174,7 @@ static void __sched notrace __schedule(bool preempt)
                        if (prev->flags & PF_WQ_WORKER) {
                                struct task_struct *to_wakeup;
 
-                               to_wakeup = wq_worker_sleeping(prev, cpu);
+                               to_wakeup = wq_worker_sleeping(prev);
                                if (to_wakeup)
                                        try_to_wake_up_local(to_wakeup);
                        }
@@ -3280,7 +3197,6 @@ static void __sched notrace __schedule(bool preempt)
 
                trace_sched_switch(preempt, prev, next);
                rq = context_switch(rq, prev, next); /* unlocks the rq */
-               cpu = cpu_of(rq);
        } else {
                lockdep_unpin_lock(&rq->lock);
                raw_spin_unlock_irq(&rq->lock);
@@ -3466,7 +3382,7 @@ EXPORT_SYMBOL(default_wake_function);
  */
 void rt_mutex_setprio(struct task_struct *p, int prio)
 {
-       int oldprio, queued, running, enqueue_flag = ENQUEUE_RESTORE;
+       int oldprio, queued, running, queue_flag = DEQUEUE_SAVE | DEQUEUE_MOVE;
        struct rq *rq;
        const struct sched_class *prev_class;
 
@@ -3494,11 +3410,15 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
        trace_sched_pi_setprio(p, prio);
        oldprio = p->prio;
+
+       if (oldprio == prio)
+               queue_flag &= ~DEQUEUE_MOVE;
+
        prev_class = p->sched_class;
        queued = task_on_rq_queued(p);
        running = task_current(rq, p);
        if (queued)
-               dequeue_task(rq, p, DEQUEUE_SAVE);
+               dequeue_task(rq, p, queue_flag);
        if (running)
                put_prev_task(rq, p);
 
@@ -3516,7 +3436,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
                if (!dl_prio(p->normal_prio) ||
                    (pi_task && dl_entity_preempt(&pi_task->dl, &p->dl))) {
                        p->dl.dl_boosted = 1;
-                       enqueue_flag |= ENQUEUE_REPLENISH;
+                       queue_flag |= ENQUEUE_REPLENISH;
                } else
                        p->dl.dl_boosted = 0;
                p->sched_class = &dl_sched_class;
@@ -3524,7 +3444,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
                if (dl_prio(oldprio))
                        p->dl.dl_boosted = 0;
                if (oldprio < prio)
-                       enqueue_flag |= ENQUEUE_HEAD;
+                       queue_flag |= ENQUEUE_HEAD;
                p->sched_class = &rt_sched_class;
        } else {
                if (dl_prio(oldprio))
@@ -3539,7 +3459,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
        if (running)
                p->sched_class->set_curr_task(rq);
        if (queued)
-               enqueue_task(rq, p, enqueue_flag);
+               enqueue_task(rq, p, queue_flag);
 
        check_class_changed(rq, p, prev_class, oldprio);
 out_unlock:
@@ -3895,6 +3815,7 @@ static int __sched_setscheduler(struct task_struct *p,
        const struct sched_class *prev_class;
        struct rq *rq;
        int reset_on_fork;
+       int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE;
 
        /* may grab non-irq protected spin_locks */
        BUG_ON(in_interrupt());
@@ -4077,17 +3998,14 @@ change:
                 * itself.
                 */
                new_effective_prio = rt_mutex_get_effective_prio(p, newprio);
-               if (new_effective_prio == oldprio) {
-                       __setscheduler_params(p, attr);
-                       task_rq_unlock(rq, p, &flags);
-                       return 0;
-               }
+               if (new_effective_prio == oldprio)
+                       queue_flags &= ~DEQUEUE_MOVE;
        }
 
        queued = task_on_rq_queued(p);
        running = task_current(rq, p);
        if (queued)
-               dequeue_task(rq, p, DEQUEUE_SAVE);
+               dequeue_task(rq, p, queue_flags);
        if (running)
                put_prev_task(rq, p);
 
@@ -4097,15 +4015,14 @@ change:
        if (running)
                p->sched_class->set_curr_task(rq);
        if (queued) {
-               int enqueue_flags = ENQUEUE_RESTORE;
                /*
                 * We enqueue to tail when the priority of a task is
                 * increased (user space view).
                 */
-               if (oldprio <= p->prio)
-                       enqueue_flags |= ENQUEUE_HEAD;
+               if (oldprio < p->prio)
+                       queue_flags |= ENQUEUE_HEAD;
 
-               enqueue_task(rq, p, enqueue_flags);
+               enqueue_task(rq, p, queue_flags);
        }
 
        check_class_changed(rq, p, prev_class, oldprio);
@@ -5096,6 +5013,8 @@ void init_idle(struct task_struct *idle, int cpu)
        idle->state = TASK_RUNNING;
        idle->se.exec_start = sched_clock();
 
+       kasan_unpoison_task_stack(idle);
+
 #ifdef CONFIG_SMP
        /*
         * Its possible that init_idle() gets called multiple times on a task,
@@ -5405,183 +5324,6 @@ static void migrate_tasks(struct rq *dead_rq)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
-
-static struct ctl_table sd_ctl_dir[] = {
-       {
-               .procname       = "sched_domain",
-               .mode           = 0555,
-       },
-       {}
-};
-
-static struct ctl_table sd_ctl_root[] = {
-       {
-               .procname       = "kernel",
-               .mode           = 0555,
-               .child          = sd_ctl_dir,
-       },
-       {}
-};
-
-static struct ctl_table *sd_alloc_ctl_entry(int n)
-{
-       struct ctl_table *entry =
-               kcalloc(n, sizeof(struct ctl_table), GFP_KERNEL);
-
-       return entry;
-}
-
-static void sd_free_ctl_entry(struct ctl_table **tablep)
-{
-       struct ctl_table *entry;
-
-       /*
-        * In the intermediate directories, both the child directory and
-        * procname are dynamically allocated and could fail but the mode
-        * will always be set. In the lowest directory the names are
-        * static strings and all have proc handlers.
-        */
-       for (entry = *tablep; entry->mode; entry++) {
-               if (entry->child)
-                       sd_free_ctl_entry(&entry->child);
-               if (entry->proc_handler == NULL)
-                       kfree(entry->procname);
-       }
-
-       kfree(*tablep);
-       *tablep = NULL;
-}
-
-static int min_load_idx = 0;
-static int max_load_idx = CPU_LOAD_IDX_MAX-1;
-
-static void
-set_table_entry(struct ctl_table *entry,
-               const char *procname, void *data, int maxlen,
-               umode_t mode, proc_handler *proc_handler,
-               bool load_idx)
-{
-       entry->procname = procname;
-       entry->data = data;
-       entry->maxlen = maxlen;
-       entry->mode = mode;
-       entry->proc_handler = proc_handler;
-
-       if (load_idx) {
-               entry->extra1 = &min_load_idx;
-               entry->extra2 = &max_load_idx;
-       }
-}
-
-static struct ctl_table *
-sd_alloc_ctl_domain_table(struct sched_domain *sd)
-{
-       struct ctl_table *table = sd_alloc_ctl_entry(14);
-
-       if (table == NULL)
-               return NULL;
-
-       set_table_entry(&table[0], "min_interval", &sd->min_interval,
-               sizeof(long), 0644, proc_doulongvec_minmax, false);
-       set_table_entry(&table[1], "max_interval", &sd->max_interval,
-               sizeof(long), 0644, proc_doulongvec_minmax, false);
-       set_table_entry(&table[2], "busy_idx", &sd->busy_idx,
-               sizeof(int), 0644, proc_dointvec_minmax, true);
-       set_table_entry(&table[3], "idle_idx", &sd->idle_idx,
-               sizeof(int), 0644, proc_dointvec_minmax, true);
-       set_table_entry(&table[4], "newidle_idx", &sd->newidle_idx,
-               sizeof(int), 0644, proc_dointvec_minmax, true);
-       set_table_entry(&table[5], "wake_idx", &sd->wake_idx,
-               sizeof(int), 0644, proc_dointvec_minmax, true);
-       set_table_entry(&table[6], "forkexec_idx", &sd->forkexec_idx,
-               sizeof(int), 0644, proc_dointvec_minmax, true);
-       set_table_entry(&table[7], "busy_factor", &sd->busy_factor,
-               sizeof(int), 0644, proc_dointvec_minmax, false);
-       set_table_entry(&table[8], "imbalance_pct", &sd->imbalance_pct,
-               sizeof(int), 0644, proc_dointvec_minmax, false);
-       set_table_entry(&table[9], "cache_nice_tries",
-               &sd->cache_nice_tries,
-               sizeof(int), 0644, proc_dointvec_minmax, false);
-       set_table_entry(&table[10], "flags", &sd->flags,
-               sizeof(int), 0644, proc_dointvec_minmax, false);
-       set_table_entry(&table[11], "max_newidle_lb_cost",
-               &sd->max_newidle_lb_cost,
-               sizeof(long), 0644, proc_doulongvec_minmax, false);
-       set_table_entry(&table[12], "name", sd->name,
-               CORENAME_MAX_SIZE, 0444, proc_dostring, false);
-       /* &table[13] is terminator */
-
-       return table;
-}
-
-static struct ctl_table *sd_alloc_ctl_cpu_table(int cpu)
-{
-       struct ctl_table *entry, *table;
-       struct sched_domain *sd;
-       int domain_num = 0, i;
-       char buf[32];
-
-       for_each_domain(cpu, sd)
-               domain_num++;
-       entry = table = sd_alloc_ctl_entry(domain_num + 1);
-       if (table == NULL)
-               return NULL;
-
-       i = 0;
-       for_each_domain(cpu, sd) {
-               snprintf(buf, 32, "domain%d", i);
-               entry->procname = kstrdup(buf, GFP_KERNEL);
-               entry->mode = 0555;
-               entry->child = sd_alloc_ctl_domain_table(sd);
-               entry++;
-               i++;
-       }
-       return table;
-}
-
-static struct ctl_table_header *sd_sysctl_header;
-static void register_sched_domain_sysctl(void)
-{
-       int i, cpu_num = num_possible_cpus();
-       struct ctl_table *entry = sd_alloc_ctl_entry(cpu_num + 1);
-       char buf[32];
-
-       WARN_ON(sd_ctl_dir[0].child);
-       sd_ctl_dir[0].child = entry;
-
-       if (entry == NULL)
-               return;
-
-       for_each_possible_cpu(i) {
-               snprintf(buf, 32, "cpu%d", i);
-               entry->procname = kstrdup(buf, GFP_KERNEL);
-               entry->mode = 0555;
-               entry->child = sd_alloc_ctl_cpu_table(i);
-               entry++;
-       }
-
-       WARN_ON(sd_sysctl_header);
-       sd_sysctl_header = register_sysctl_table(sd_ctl_root);
-}
-
-/* may be called multiple times per register */
-static void unregister_sched_domain_sysctl(void)
-{
-       unregister_sysctl_table(sd_sysctl_header);
-       sd_sysctl_header = NULL;
-       if (sd_ctl_dir[0].child)
-               sd_free_ctl_entry(&sd_ctl_dir[0].child);
-}
-#else
-static void register_sched_domain_sysctl(void)
-{
-}
-static void unregister_sched_domain_sysctl(void)
-{
-}
-#endif /* CONFIG_SCHED_DEBUG && CONFIG_SYSCTL */
-
 static void set_rq_online(struct rq *rq)
 {
        if (!rq->online) {
@@ -5692,16 +5434,6 @@ static int sched_cpu_active(struct notifier_block *nfb,
                set_cpu_rq_start_time();
                return NOTIFY_OK;
 
-       case CPU_ONLINE:
-               /*
-                * At this point a starting CPU has marked itself as online via
-                * set_cpu_online(). But it might not yet have marked itself
-                * as active, which is essential from here on.
-                */
-               set_cpu_active(cpu, true);
-               stop_machine_unpark(cpu);
-               return NOTIFY_OK;
-
        case CPU_DOWN_FAILED:
                set_cpu_active(cpu, true);
                return NOTIFY_OK;
@@ -6173,11 +5905,16 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 /* Setup the mask of cpus configured for isolated domains */
 static int __init isolated_cpu_setup(char *str)
 {
+       int ret;
+
        alloc_bootmem_cpumask_var(&cpu_isolated_map);
-       cpulist_parse(str, cpu_isolated_map);
+       ret = cpulist_parse(str, cpu_isolated_map);
+       if (ret) {
+               pr_err("sched: Error, all isolcpus= values must be between 0 and %d\n", nr_cpu_ids);
+               return 0;
+       }
        return 1;
 }
-
 __setup("isolcpus=", isolated_cpu_setup);
 
 struct s_data {
@@ -6840,7 +6577,7 @@ static void sched_init_numa(void)
 
                        sched_domains_numa_masks[i][j] = mask;
 
-                       for (k = 0; k < nr_node_ids; k++) {
+                       for_each_node(k) {
                                if (node_distance(j, k) > sched_domains_numa_distance[i])
                                        continue;
 
@@ -7860,11 +7597,9 @@ void sched_destroy_group(struct task_group *tg)
 void sched_offline_group(struct task_group *tg)
 {
        unsigned long flags;
-       int i;
 
        /* end participation in shares distribution */
-       for_each_possible_cpu(i)
-               unregister_fair_sched_group(tg, i);
+       unregister_fair_sched_group(tg);
 
        spin_lock_irqsave(&task_group_lock, flags);
        list_del_rcu(&tg->list);
@@ -7890,7 +7625,7 @@ void sched_move_task(struct task_struct *tsk)
        queued = task_on_rq_queued(tsk);
 
        if (queued)
-               dequeue_task(rq, tsk, DEQUEUE_SAVE);
+               dequeue_task(rq, tsk, DEQUEUE_SAVE | DEQUEUE_MOVE);
        if (unlikely(running))
                put_prev_task(rq, tsk);
 
@@ -7914,7 +7649,7 @@ void sched_move_task(struct task_struct *tsk)
        if (unlikely(running))
                tsk->sched_class->set_curr_task(rq);
        if (queued)
-               enqueue_task(rq, tsk, ENQUEUE_RESTORE);
+               enqueue_task(rq, tsk, ENQUEUE_RESTORE | ENQUEUE_MOVE);
 
        task_rq_unlock(rq, tsk, &flags);
 }