sched/deadline: Reschedule from switched_from_dl() after a successful pull

[cascardo/linux.git] / kernel / sched / deadline.c
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c

index abfaf3d..362ab1f 100644 (file)
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -518,12 +518,20 @@ again:
         }
  
         /*
-        * We need to take care of a possible races here. In fact, the
-        * task might have changed its scheduling policy to something
-        * different from SCHED_DEADLINE or changed its reservation
-        * parameters (through sched_setattr()).
+        * We need to take care of several possible races here:
+        *
+        *   - the task might have changed its scheduling policy
+        *     to something different than SCHED_DEADLINE
+        *   - the task might have changed its reservation parameters
+        *     (through sched_setattr())
+        *   - the task might have been boosted by someone else and
+        *     might be in the boosting/deboosting path
+        *
+        * In all this cases we bail out, as the task is already
+        * in the runqueue or is going to be enqueued back anyway.
          */
-       if (!dl_task(p) || dl_se->dl_new)
+       if (!dl_task(p) || dl_se->dl_new ||
+           dl_se->dl_boosted || !dl_se->dl_throttled)
                 goto unlock;
  
         sched_clock_tick();
@@ -532,7 +540,7 @@ again:
         dl_se->dl_yielded = 0;
         if (task_on_rq_queued(p)) {
                 enqueue_task_dl(rq, p, ENQUEUE_REPLENISH);
-               if (task_has_dl_policy(rq->curr))
+               if (dl_task(rq->curr))
                         check_preempt_curr_dl(rq, p, 0);
                 else
                         resched_curr(rq);
@@ -555,11 +563,6 @@ void init_dl_task_timer(struct sched_dl_entity *dl_se)
  {
         struct hrtimer *timer = &dl_se->dl_timer;
  
-       if (hrtimer_active(timer)) {
-               hrtimer_try_to_cancel(timer);
-               return;
-       }
-
         hrtimer_init(timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
         timer->function = dl_task_timer;
  }
@@ -625,7 +628,7 @@ static void update_curr_dl(struct rq *rq)
  
         sched_rt_avg_update(rq, delta_exec);
  
-       dl_se->runtime -= delta_exec;
+       dl_se->runtime -= dl_se->dl_yielded ? 0 : delta_exec;
         if (dl_runtime_exceeded(rq, dl_se)) {
                 __dequeue_task_dl(rq, curr, 0);
                 if (likely(start_dl_timer(dl_se, curr->dl.dl_boosted)))
@@ -847,8 +850,19 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)
          * smaller than our one... OTW we keep our runtime and
          * deadline.
          */
-       if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio))
+       if (pi_task && p->dl.dl_boosted && dl_prio(pi_task->normal_prio)) {
                 pi_se = &pi_task->dl;
+       } else if (!dl_prio(p->normal_prio)) {
+               /*
+                * Special case in which we have a !SCHED_DEADLINE task
+                * that is going to be deboosted, but exceedes its
+                * runtime while doing so. No point in replenishing
+                * it, as it's going to return back to its original
+                * scheduling class after this.
+                */
+               BUG_ON(!p->dl.dl_boosted || flags != ENQUEUE_REPLENISH);
+               return;
+       }
  
         /*
          * If p is throttled, we do nothing. In fact, if it exhausted
@@ -914,7 +928,10 @@ select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
         struct task_struct *curr;
         struct rq *rq;
  
-       if (sd_flag != SD_BALANCE_WAKE && sd_flag != SD_BALANCE_FORK)
+       if (p->nr_cpus_allowed == 1)
+               goto out;
+
+       if (sd_flag != SD_BALANCE_WAKE)
                 goto out;
  
         rq = cpu_rq(cpu);
@@ -1153,7 +1170,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
  static int find_later_rq(struct task_struct *task)
  {
         struct sched_domain *sd;
-       struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl);
+       struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
         int this_cpu = smp_processor_id();
         int best_cpu, cpu = task_cpu(task);
  
@@ -1489,7 +1506,7 @@ static void task_woken_dl(struct rq *rq, struct task_struct *p)
             p->nr_cpus_allowed > 1 &&
             dl_task(rq->curr) &&
             (rq->curr->nr_cpus_allowed < 2 ||
-            dl_entity_preempt(&rq->curr->dl, &p->dl))) {
+            !dl_entity_preempt(&p->dl, &rq->curr->dl))) {
                 push_dl_tasks(rq);
         }
  }
@@ -1498,10 +1515,33 @@ static void set_cpus_allowed_dl(struct task_struct *p,
                                 const struct cpumask *new_mask)
  {
         struct rq *rq;
+       struct root_domain *src_rd;
         int weight;
  
         BUG_ON(!dl_task(p));
  
+       rq = task_rq(p);
+       src_rd = rq->rd;
+       /*
+        * Migrating a SCHED_DEADLINE task between exclusive
+        * cpusets (different root_domains) entails a bandwidth
+        * update. We already made space for us in the destination
+        * domain (see cpuset_can_attach()).
+        */
+       if (!cpumask_intersects(src_rd->span, new_mask)) {
+               struct dl_bw *src_dl_b;
+
+               src_dl_b = dl_bw_of(cpu_of(rq));
+               /*
+                * We now free resources of the root_domain we are migrating
+                * off. In the worst case, sched_setattr() may temporary fail
+                * until we complete the update.
+                */
+               raw_spin_lock(&src_dl_b->lock);
+               __dl_clear(src_dl_b, p->dl.dl_bw);
+               raw_spin_unlock(&src_dl_b->lock);
+       }
+
         /*
          * Update only if the task is actually running (i.e.,
          * it is on the rq AND it is not throttled).
@@ -1518,8 +1558,6 @@ static void set_cpus_allowed_dl(struct task_struct *p,
         if ((p->nr_cpus_allowed > 1) == (weight > 1))
                 return;
  
-       rq = task_rq(p);
-
         /*
          * The process used to be able to migrate OR it can now migrate
          */
@@ -1567,10 +1605,35 @@ void init_sched_dl_class(void)
  
  #endif /* CONFIG_SMP */
  
+/*
+ *  Ensure p's dl_timer is cancelled. May drop rq->lock for a while.
+ */
+static void cancel_dl_timer(struct rq *rq, struct task_struct *p)
+{
+       struct hrtimer *dl_timer = &p->dl.dl_timer;
+
+       /* Nobody will change task's class if pi_lock is held */
+       lockdep_assert_held(&p->pi_lock);
+
+       if (hrtimer_active(dl_timer)) {
+               int ret = hrtimer_try_to_cancel(dl_timer);
+
+               if (unlikely(ret == -1)) {
+                       /*
+                        * Note, p may migrate OR new deadline tasks
+                        * may appear in rq when we are unlocking it.
+                        * A caller of us must be fine with that.
+                        */
+                       raw_spin_unlock(&rq->lock);
+                       hrtimer_cancel(dl_timer);
+                       raw_spin_lock(&rq->lock);
+               }
+       }
+}
+
  static void switched_from_dl(struct rq *rq, struct task_struct *p)
  {
-       if (hrtimer_active(&p->dl.dl_timer) && !dl_policy(p->policy))
-               hrtimer_try_to_cancel(&p->dl.dl_timer);
+       cancel_dl_timer(rq, p);
  
         __dl_clear_params(p);
  
@@ -1580,8 +1643,11 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p)
          * this is the right place to try to pull some other one
          * from an overloaded cpu, if any.
          */
-       if (!rq->dl.dl_nr_running)
-               pull_dl_task(rq);
+       if (!task_on_rq_queued(p) || rq->dl.dl_nr_running)
+               return;
+
+       if (pull_dl_task(rq))
+               resched_curr(rq);
  #endif
  }
  
@@ -1603,12 +1669,17 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p)
  
         if (task_on_rq_queued(p) && rq->curr != p) {
  #ifdef CONFIG_SMP
-               if (rq->dl.overloaded && push_dl_task(rq) && rq != task_rq(p))
+               if (p->nr_cpus_allowed > 1 && rq->dl.overloaded &&
+                       push_dl_task(rq) && rq != task_rq(p))
                         /* Only reschedule if pushing failed */
                         check_resched = 0;
  #endif /* CONFIG_SMP */
-               if (check_resched && task_has_dl_policy(rq->curr))
-                       check_preempt_curr_dl(rq, p, 0);
+               if (check_resched) {
+                       if (dl_task(rq->curr))
+                               check_preempt_curr_dl(rq, p, 0);
+                       else
+                               resched_curr(rq);
+               }
         }
  }
  
@@ -1679,3 +1750,12 @@ const struct sched_class dl_sched_class = {
         .switched_from          = switched_from_dl,
         .switched_to            = switched_to_dl,
  };
+
+#ifdef CONFIG_SCHED_DEBUG
+extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
+
+void print_dl_stats(struct seq_file *m, int cpu)
+{
+       print_dl_rq(m, cpu, &cpu_rq(cpu)->dl);
+}
+#endif /* CONFIG_SCHED_DEBUG */