MIPS: Fix octeon FP context switch handling
[cascardo/linux.git] / kernel / rcu / tree.c
index 44245ae..9f75f25 100644 (file)
@@ -71,6 +71,7 @@ MODULE_ALIAS("rcutree");
 static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
 static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
 static struct lock_class_key rcu_exp_class[RCU_NUM_LVLS];
+static struct lock_class_key rcu_exp_sched_class[RCU_NUM_LVLS];
 
 /*
  * In order to export the rcu_state name to the tracing tools, it
@@ -645,12 +646,12 @@ static void rcu_eqs_enter_common(long long oldval, bool user)
         * It is illegal to enter an extended quiescent state while
         * in an RCU read-side critical section.
         */
-       rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
-                          "Illegal idle entry in RCU read-side critical section.");
-       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),
-                          "Illegal idle entry in RCU-bh read-side critical section.");
-       rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),
-                          "Illegal idle entry in RCU-sched read-side critical section.");
+       RCU_LOCKDEP_WARN(lock_is_held(&rcu_lock_map),
+                        "Illegal idle entry in RCU read-side critical section.");
+       RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map),
+                        "Illegal idle entry in RCU-bh read-side critical section.");
+       RCU_LOCKDEP_WARN(lock_is_held(&rcu_sched_lock_map),
+                        "Illegal idle entry in RCU-sched read-side critical section.");
 }
 
 /*
@@ -697,7 +698,7 @@ void rcu_idle_enter(void)
 }
 EXPORT_SYMBOL_GPL(rcu_idle_enter);
 
-#ifdef CONFIG_RCU_USER_QS
+#ifdef CONFIG_NO_HZ_FULL
 /**
  * rcu_user_enter - inform RCU that we are resuming userspace.
  *
@@ -710,7 +711,7 @@ void rcu_user_enter(void)
 {
        rcu_eqs_enter(1);
 }
-#endif /* CONFIG_RCU_USER_QS */
+#endif /* CONFIG_NO_HZ_FULL */
 
 /**
  * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
@@ -824,7 +825,7 @@ void rcu_idle_exit(void)
 }
 EXPORT_SYMBOL_GPL(rcu_idle_exit);
 
-#ifdef CONFIG_RCU_USER_QS
+#ifdef CONFIG_NO_HZ_FULL
 /**
  * rcu_user_exit - inform RCU that we are exiting userspace.
  *
@@ -835,7 +836,7 @@ void rcu_user_exit(void)
 {
        rcu_eqs_exit(1);
 }
-#endif /* CONFIG_RCU_USER_QS */
+#endif /* CONFIG_NO_HZ_FULL */
 
 /**
  * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
@@ -974,9 +975,9 @@ bool notrace rcu_is_watching(void)
 {
        bool ret;
 
-       preempt_disable();
+       preempt_disable_notrace();
        ret = __rcu_is_watching();
-       preempt_enable();
+       preempt_enable_notrace();
        return ret;
 }
 EXPORT_SYMBOL_GPL(rcu_is_watching);
@@ -1903,6 +1904,26 @@ static int rcu_gp_init(struct rcu_state *rsp)
        return 1;
 }
 
+/*
+ * Helper function for wait_event_interruptible_timeout() wakeup
+ * at force-quiescent-state time.
+ */
+static bool rcu_gp_fqs_check_wake(struct rcu_state *rsp, int *gfp)
+{
+       struct rcu_node *rnp = rcu_get_root(rsp);
+
+       /* Someone like call_rcu() requested a force-quiescent-state scan. */
+       *gfp = READ_ONCE(rsp->gp_flags);
+       if (*gfp & RCU_GP_FLAG_FQS)
+               return true;
+
+       /* The current grace period has completed. */
+       if (!READ_ONCE(rnp->qsmask) && !rcu_preempt_blocked_readers_cgp(rnp))
+               return true;
+
+       return false;
+}
+
 /*
  * Do one round of quiescent-state forcing.
  */
@@ -2067,12 +2088,8 @@ static int __noreturn rcu_gp_kthread(void *arg)
                                               TPS("fqswait"));
                        rsp->gp_state = RCU_GP_WAIT_FQS;
                        ret = wait_event_interruptible_timeout(rsp->gp_wq,
-                                       ((gf = READ_ONCE(rsp->gp_flags)) &
-                                        RCU_GP_FLAG_FQS) ||
-                                       (!READ_ONCE(rnp->qsmask) &&
-                                        !rcu_preempt_blocked_readers_cgp(rnp)),
-                                       j);
-                       rsp->gp_state = RCU_GP_DONE_FQS;
+                                       rcu_gp_fqs_check_wake(rsp, &gf), j);
+                       rsp->gp_state = RCU_GP_DOING_FQS;
                        /* Locking provides needed memory barriers. */
                        /* If grace period done, leave loop. */
                        if (!READ_ONCE(rnp->qsmask) &&
@@ -3163,10 +3180,10 @@ static inline int rcu_blocking_is_gp(void)
  */
 void synchronize_sched(void)
 {
-       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
-                          !lock_is_held(&rcu_lock_map) &&
-                          !lock_is_held(&rcu_sched_lock_map),
-                          "Illegal synchronize_sched() in RCU-sched read-side critical section");
+       RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
+                        lock_is_held(&rcu_lock_map) ||
+                        lock_is_held(&rcu_sched_lock_map),
+                        "Illegal synchronize_sched() in RCU-sched read-side critical section");
        if (rcu_blocking_is_gp())
                return;
        if (rcu_gp_is_expedited())
@@ -3190,10 +3207,10 @@ EXPORT_SYMBOL_GPL(synchronize_sched);
  */
 void synchronize_rcu_bh(void)
 {
-       rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
-                          !lock_is_held(&rcu_lock_map) &&
-                          !lock_is_held(&rcu_sched_lock_map),
-                          "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
+       RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
+                        lock_is_held(&rcu_lock_map) ||
+                        lock_is_held(&rcu_sched_lock_map),
+                        "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
        if (rcu_blocking_is_gp())
                return;
        if (rcu_gp_is_expedited())
@@ -3255,6 +3272,58 @@ void cond_synchronize_rcu(unsigned long oldstate)
 }
 EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
 
+/**
+ * get_state_synchronize_sched - Snapshot current RCU-sched state
+ *
+ * Returns a cookie that is used by a later call to cond_synchronize_sched()
+ * to determine whether or not a full grace period has elapsed in the
+ * meantime.
+ */
+unsigned long get_state_synchronize_sched(void)
+{
+       /*
+        * Any prior manipulation of RCU-protected data must happen
+        * before the load from ->gpnum.
+        */
+       smp_mb();  /* ^^^ */
+
+       /*
+        * Make sure this load happens before the purportedly
+        * time-consuming work between get_state_synchronize_sched()
+        * and cond_synchronize_sched().
+        */
+       return smp_load_acquire(&rcu_sched_state.gpnum);
+}
+EXPORT_SYMBOL_GPL(get_state_synchronize_sched);
+
+/**
+ * cond_synchronize_sched - Conditionally wait for an RCU-sched grace period
+ *
+ * @oldstate: return value from earlier call to get_state_synchronize_sched()
+ *
+ * If a full RCU-sched grace period has elapsed since the earlier call to
+ * get_state_synchronize_sched(), just return.  Otherwise, invoke
+ * synchronize_sched() to wait for a full grace period.
+ *
+ * Yes, this function does not take counter wrap into account.  But
+ * counter wrap is harmless.  If the counter wraps, we have waited for
+ * more than 2 billion grace periods (and way more on a 64-bit system!),
+ * so waiting for one additional grace period should be just fine.
+ */
+void cond_synchronize_sched(unsigned long oldstate)
+{
+       unsigned long newstate;
+
+       /*
+        * Ensure that this load happens before any RCU-destructive
+        * actions the caller might carry out after we return.
+        */
+       newstate = smp_load_acquire(&rcu_sched_state.completed);
+       if (ULONG_CMP_GE(oldstate, newstate))
+               synchronize_sched();
+}
+EXPORT_SYMBOL_GPL(cond_synchronize_sched);
+
 /* Adjust sequence number for start of update-side operation. */
 static void rcu_seq_start(unsigned long *sp)
 {
@@ -3299,6 +3368,7 @@ static void rcu_exp_gp_seq_start(struct rcu_state *rsp)
 static void rcu_exp_gp_seq_end(struct rcu_state *rsp)
 {
        rcu_seq_end(&rsp->expedited_sequence);
+       smp_mb(); /* Ensure that consecutive grace periods serialize. */
 }
 static unsigned long rcu_exp_gp_seq_snap(struct rcu_state *rsp)
 {
@@ -3311,11 +3381,14 @@ static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
 
 /* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
 static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
+                              struct rcu_data *rdp,
                               atomic_long_t *stat, unsigned long s)
 {
        if (rcu_exp_gp_seq_done(rsp, s)) {
                if (rnp)
                        mutex_unlock(&rnp->exp_funnel_mutex);
+               else if (rdp)
+                       mutex_unlock(&rdp->exp_funnel_mutex);
                /* Ensure test happens before caller kfree(). */
                smp_mb__before_atomic(); /* ^^^ */
                atomic_long_inc(stat);
@@ -3331,9 +3404,26 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
  */
 static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
 {
+       struct rcu_data *rdp;
        struct rcu_node *rnp0;
        struct rcu_node *rnp1 = NULL;
 
+       /*
+        * First try directly acquiring the root lock in order to reduce
+        * latency in the common case where expedited grace periods are
+        * rare.  We check mutex_is_locked() to avoid pathological levels of
+        * memory contention on ->exp_funnel_mutex in the heavy-load case.
+        */
+       rnp0 = rcu_get_root(rsp);
+       if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
+               if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
+                       if (sync_exp_work_done(rsp, rnp0, NULL,
+                                              &rsp->expedited_workdone0, s))
+                               return NULL;
+                       return rnp0;
+               }
+       }
+
        /*
         * Each pass through the following loop works its way
         * up the rcu_node tree, returning if others have done the
@@ -3342,30 +3432,87 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
         * can be inexact, as it is just promoting locality and is not
         * strictly needed for correctness.
         */
-       rnp0 = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
+       rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
+       if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s))
+               return NULL;
+       mutex_lock(&rdp->exp_funnel_mutex);
+       rnp0 = rdp->mynode;
        for (; rnp0 != NULL; rnp0 = rnp0->parent) {
-               if (sync_exp_work_done(rsp, rnp1, &rsp->expedited_workdone1, s))
+               if (sync_exp_work_done(rsp, rnp1, rdp,
+                                      &rsp->expedited_workdone2, s))
                        return NULL;
                mutex_lock(&rnp0->exp_funnel_mutex);
                if (rnp1)
                        mutex_unlock(&rnp1->exp_funnel_mutex);
+               else
+                       mutex_unlock(&rdp->exp_funnel_mutex);
                rnp1 = rnp0;
        }
-       if (sync_exp_work_done(rsp, rnp1, &rsp->expedited_workdone2, s))
+       if (sync_exp_work_done(rsp, rnp1, rdp,
+                              &rsp->expedited_workdone3, s))
                return NULL;
        return rnp1;
 }
 
+/* Invoked on each online non-idle CPU for expedited quiescent state. */
 static int synchronize_sched_expedited_cpu_stop(void *data)
 {
-       struct rcu_state *rsp = data;
+       struct rcu_data *rdp = data;
+       struct rcu_state *rsp = rdp->rsp;
 
        /* We are here: If we are last, do the wakeup. */
+       rdp->exp_done = true;
        if (atomic_dec_and_test(&rsp->expedited_need_qs))
                wake_up(&rsp->expedited_wq);
        return 0;
 }
 
+static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
+{
+       int cpu;
+       unsigned long jiffies_stall;
+       unsigned long jiffies_start;
+       struct rcu_data *rdp;
+       int ret;
+
+       jiffies_stall = rcu_jiffies_till_stall_check();
+       jiffies_start = jiffies;
+
+       for (;;) {
+               ret = wait_event_interruptible_timeout(
+                               rsp->expedited_wq,
+                               !atomic_read(&rsp->expedited_need_qs),
+                               jiffies_stall);
+               if (ret > 0)
+                       return;
+               if (ret < 0) {
+                       /* Hit a signal, disable CPU stall warnings. */
+                       wait_event(rsp->expedited_wq,
+                                  !atomic_read(&rsp->expedited_need_qs));
+                       return;
+               }
+               pr_err("INFO: %s detected expedited stalls on CPUs: {",
+                      rsp->name);
+               for_each_online_cpu(cpu) {
+                       rdp = per_cpu_ptr(rsp->rda, cpu);
+
+                       if (rdp->exp_done)
+                               continue;
+                       pr_cont(" %d", cpu);
+               }
+               pr_cont(" } %lu jiffies s: %lu\n",
+                       jiffies - jiffies_start, rsp->expedited_sequence);
+               for_each_online_cpu(cpu) {
+                       rdp = per_cpu_ptr(rsp->rda, cpu);
+
+                       if (rdp->exp_done)
+                               continue;
+                       dump_cpu_task(cpu);
+               }
+               jiffies_stall = 3 * rcu_jiffies_till_stall_check() + 3;
+       }
+}
+
 /**
  * synchronize_sched_expedited - Brute-force RCU-sched grace period
  *
@@ -3415,23 +3562,23 @@ void synchronize_sched_expedited(void)
                struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
                struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
 
+               rdp->exp_done = false;
+
                /* Skip our CPU and any idle CPUs. */
                if (raw_smp_processor_id() == cpu ||
                    !(atomic_add_return(0, &rdtp->dynticks) & 0x1))
                        continue;
                atomic_inc(&rsp->expedited_need_qs);
                stop_one_cpu_nowait(cpu, synchronize_sched_expedited_cpu_stop,
-                                   rsp, &rdp->exp_stop_work);
+                                   rdp, &rdp->exp_stop_work);
        }
 
        /* Remove extra count and, if necessary, wait for CPUs to stop. */
        if (!atomic_dec_and_test(&rsp->expedited_need_qs))
-               wait_event(rsp->expedited_wq,
-                          !atomic_read(&rsp->expedited_need_qs));
+               synchronize_sched_expedited_wait(rsp);
 
        rcu_exp_gp_seq_end(rsp);
        mutex_unlock(&rnp->exp_funnel_mutex);
-       smp_mb(); /* ensure subsequent action seen after grace period. */
 
        put_online_cpus();
 }
@@ -3733,6 +3880,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
        WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
        rdp->cpu = cpu;
        rdp->rsp = rsp;
+       mutex_init(&rdp->exp_funnel_mutex);
        rcu_boot_init_nocb_percpu_data(rdp);
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
 }
@@ -3954,6 +4102,7 @@ static void __init rcu_init_one(struct rcu_state *rsp,
        static const char * const buf[] = RCU_NODE_NAME_INIT;
        static const char * const fqs[] = RCU_FQS_NAME_INIT;
        static const char * const exp[] = RCU_EXP_NAME_INIT;
+       static const char * const exp_sched[] = RCU_EXP_SCHED_NAME_INIT;
        static u8 fl_mask = 0x1;
 
        int levelcnt[RCU_NUM_LVLS];             /* # nodes in each level. */
@@ -4013,8 +4162,14 @@ static void __init rcu_init_one(struct rcu_state *rsp,
                        INIT_LIST_HEAD(&rnp->blkd_tasks);
                        rcu_init_one_nocb(rnp);
                        mutex_init(&rnp->exp_funnel_mutex);
-                       lockdep_set_class_and_name(&rnp->exp_funnel_mutex,
-                                                  &rcu_exp_class[i], exp[i]);
+                       if (rsp == &rcu_sched_state)
+                               lockdep_set_class_and_name(
+                                       &rnp->exp_funnel_mutex,
+                                       &rcu_exp_sched_class[i], exp_sched[i]);
+                       else
+                               lockdep_set_class_and_name(
+                                       &rnp->exp_funnel_mutex,
+                                       &rcu_exp_class[i], exp[i]);
                }
        }