Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux...
[cascardo/linux.git] / kernel / sched / fair.c
index df77c60..fd773ad 100644 (file)
@@ -1000,7 +1000,7 @@ struct numa_stats {
  */
 static void update_numa_stats(struct numa_stats *ns, int nid)
 {
-       int cpu;
+       int cpu, cpus = 0;
 
        memset(ns, 0, sizeof(*ns));
        for_each_cpu(cpu, cpumask_of_node(nid)) {
@@ -1009,8 +1009,21 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
                ns->nr_running += rq->nr_running;
                ns->load += weighted_cpuload(cpu);
                ns->power += power_of(cpu);
+
+               cpus++;
        }
 
+       /*
+        * If we raced with hotplug and there are no CPUs left in our mask
+        * the @ns structure is NULL'ed and task_numa_compare() will
+        * not find this node attractive.
+        *
+        * We'll either bail at !has_capacity, or we'll detect a huge imbalance
+        * and bail there.
+        */
+       if (!cpus)
+               return;
+
        ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power;
        ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE);
        ns->has_capacity = (ns->nr_running < ns->capacity);
@@ -1201,9 +1214,21 @@ static int task_numa_migrate(struct task_struct *p)
         */
        rcu_read_lock();
        sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
-       env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
+       if (sd)
+               env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
        rcu_read_unlock();
 
+       /*
+        * Cpusets can break the scheduler domain tree into smaller
+        * balance domains, some of which do not cross NUMA boundaries.
+        * Tasks that are "trapped" in such domains cannot be migrated
+        * elsewhere, so there is no point in (re)trying.
+        */
+       if (unlikely(!sd)) {
+               p->numa_preferred_nid = cpu_to_node(task_cpu(p));
+               return -EINVAL;
+       }
+
        taskweight = task_weight(p, env.src_nid);
        groupweight = group_weight(p, env.src_nid);
        update_numa_stats(&env.src_stats, env.src_nid);
@@ -2153,7 +2178,7 @@ static inline void __update_tg_runnable_avg(struct sched_avg *sa,
        long contrib;
 
        /* The fraction of a cpu used by this cfs_rq */
-       contrib = div_u64(sa->runnable_avg_sum << NICE_0_SHIFT,
+       contrib = div_u64((u64)sa->runnable_avg_sum << NICE_0_SHIFT,
                          sa->runnable_avg_period + 1);
        contrib -= cfs_rq->tg_runnable_contrib;
 
@@ -5354,10 +5379,31 @@ void update_group_power(struct sched_domain *sd, int cpu)
                 */
 
                for_each_cpu(cpu, sched_group_cpus(sdg)) {
-                       struct sched_group *sg = cpu_rq(cpu)->sd->groups;
+                       struct sched_group_power *sgp;
+                       struct rq *rq = cpu_rq(cpu);
+
+                       /*
+                        * build_sched_domains() -> init_sched_groups_power()
+                        * gets here before we've attached the domains to the
+                        * runqueues.
+                        *
+                        * Use power_of(), which is set irrespective of domains
+                        * in update_cpu_power().
+                        *
+                        * This avoids power/power_orig from being 0 and
+                        * causing divide-by-zero issues on boot.
+                        *
+                        * Runtime updates will correct power_orig.
+                        */
+                       if (unlikely(!rq->sd)) {
+                               power_orig += power_of(cpu);
+                               power += power_of(cpu);
+                               continue;
+                       }
 
-                       power_orig += sg->sgp->power_orig;
-                       power += sg->sgp->power;
+                       sgp = rq->sd->groups->sgp;
+                       power_orig += sgp->power_orig;
+                       power += sgp->power;
                }
        } else  {
                /*