Merge branch 'drm-nouveau-next' of git://anongit.freedesktop.org/git/nouveau/linux...

[cascardo/linux.git] / kernel / sched / fair.c
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index df77c60..fd773ad 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1000,7 +1000,7 @@ struct numa_stats {
   */
  static void update_numa_stats(struct numa_stats *ns, int nid)
  {
-       int cpu;
+       int cpu, cpus = 0;
  
         memset(ns, 0, sizeof(*ns));
         for_each_cpu(cpu, cpumask_of_node(nid)) {
@@ -1009,8 +1009,21 @@ static void update_numa_stats(struct numa_stats *ns, int nid)
                 ns->nr_running += rq->nr_running;
                 ns->load += weighted_cpuload(cpu);
                 ns->power += power_of(cpu);
+
+               cpus++;
         }
  
+       /*
+        * If we raced with hotplug and there are no CPUs left in our mask
+        * the @ns structure is NULL'ed and task_numa_compare() will
+        * not find this node attractive.
+        *
+        * We'll either bail at !has_capacity, or we'll detect a huge imbalance
+        * and bail there.
+        */
+       if (!cpus)
+               return;
+
         ns->load = (ns->load * SCHED_POWER_SCALE) / ns->power;
         ns->capacity = DIV_ROUND_CLOSEST(ns->power, SCHED_POWER_SCALE);
         ns->has_capacity = (ns->nr_running < ns->capacity);
@@ -1201,9 +1214,21 @@ static int task_numa_migrate(struct task_struct *p)
          */
         rcu_read_lock();
         sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
-       env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
+       if (sd)
+               env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
         rcu_read_unlock();
  
+       /*
+        * Cpusets can break the scheduler domain tree into smaller
+        * balance domains, some of which do not cross NUMA boundaries.
+        * Tasks that are "trapped" in such domains cannot be migrated
+        * elsewhere, so there is no point in (re)trying.
+        */
+       if (unlikely(!sd)) {
+               p->numa_preferred_nid = cpu_to_node(task_cpu(p));
+               return -EINVAL;
+       }
+
         taskweight = task_weight(p, env.src_nid);
         groupweight = group_weight(p, env.src_nid);
         update_numa_stats(&env.src_stats, env.src_nid);
@@ -2153,7 +2178,7 @@ static inline void __update_tg_runnable_avg(struct sched_avg *sa,
         long contrib;
  
         /* The fraction of a cpu used by this cfs_rq */
-       contrib = div_u64(sa->runnable_avg_sum << NICE_0_SHIFT,
+       contrib = div_u64((u64)sa->runnable_avg_sum << NICE_0_SHIFT,
                           sa->runnable_avg_period + 1);
         contrib -= cfs_rq->tg_runnable_contrib;
  
@@ -5354,10 +5379,31 @@ void update_group_power(struct sched_domain *sd, int cpu)
                  */
  
                 for_each_cpu(cpu, sched_group_cpus(sdg)) {
-                       struct sched_group *sg = cpu_rq(cpu)->sd->groups;
+                       struct sched_group_power *sgp;
+                       struct rq *rq = cpu_rq(cpu);
+
+                       /*
+                        * build_sched_domains() -> init_sched_groups_power()
+                        * gets here before we've attached the domains to the
+                        * runqueues.
+                        *
+                        * Use power_of(), which is set irrespective of domains
+                        * in update_cpu_power().
+                        *
+                        * This avoids power/power_orig from being 0 and
+                        * causing divide-by-zero issues on boot.
+                        *
+                        * Runtime updates will correct power_orig.
+                        */
+                       if (unlikely(!rq->sd)) {
+                               power_orig += power_of(cpu);
+                               power += power_of(cpu);
+                               continue;
+                       }
  
-                       power_orig += sg->sgp->power_orig;
-                       power += sg->sgp->power;
+                       sgp = rq->sd->groups->sgp;
+                       power_orig += sgp->power_orig;
+                       power += sgp->power;
                 }
         } else  {
                 /*