locking/rwsem: Enable lockless waiter wakeup(s)

author Davidlohr Bueso <dave@stgolabs.net>

Fri, 13 May 2016 18:56:26 +0000 (11:56 -0700)

committer Ingo Molnar <mingo@kernel.org>

Fri, 3 Jun 2016 07:47:10 +0000 (09:47 +0200)
author Davidlohr Bueso <dave@stgolabs.net>
Fri, 13 May 2016 18:56:26 +0000 (11:56 -0700)
committer Ingo Molnar <mingo@kernel.org>
Fri, 3 Jun 2016 07:47:10 +0000 (09:47 +0200)
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c

index 09e30c6..80b05ac 100644 (file)
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -114,12 +114,16 @@ enum rwsem_wake_type {
   *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
   *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
   * - there must be someone on the queue
- * - the spinlock must be held by the caller
+ * - the wait_lock must be held by the caller
+ * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
+ *   to actually wakeup the blocked task(s) and drop the reference count,
+ *   preferably when the wait_lock is released
   * - woken process blocks are discarded from the list after having task zeroed
- * - writers are only woken if downgrading is false
+ * - writers are only marked woken if downgrading is false
   */
  static struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
+__rwsem_mark_wake(struct rw_semaphore *sem,
+                 enum rwsem_wake_type wake_type, struct wake_q_head *wake_q)
  {
         struct rwsem_waiter *waiter;
         struct task_struct *tsk;
@@ -128,13 +132,16 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
  
         waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
         if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
-               if (wake_type == RWSEM_WAKE_ANY)
-                       /* Wake writer at the front of the queue, but do not
-                        * grant it the lock yet as we want other writers
-                        * to be able to steal it.  Readers, on the other hand,
-                        * will block as they will notice the queued writer.
+               if (wake_type == RWSEM_WAKE_ANY) {
+                       /*
+                        * Mark writer at the front of the queue for wakeup.
+                        * Until the task is actually later awoken later by
+                        * the caller, other writers are able to steal it.
+                        * Readers, on the other hand, will block as they
+                        * will notice the queued writer.
                          */
-                       wake_up_process(waiter->task);
+                       wake_q_add(wake_q, waiter->task);
+               }
                 goto out;
         }
  
@@ -196,7 +203,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
                  */
                 smp_mb();
                 waiter->task = NULL;
-               wake_up_process(tsk);
+               wake_q_add(wake_q, tsk);
                 put_task_struct(tsk);
         } while (--loop);
  
@@ -216,6 +223,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
         long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
         struct rwsem_waiter waiter;
         struct task_struct *tsk = current;
+       WAKE_Q(wake_q);
  
         /* set up my own style of waitqueue */
         waiter.task = tsk;
@@ -238,9 +246,10 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
         if (count == RWSEM_WAITING_BIAS ||
             (count > RWSEM_WAITING_BIAS &&
              adjustment != -RWSEM_ACTIVE_READ_BIAS))
-               sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+               sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
  
         raw_spin_unlock_irq(&sem->wait_lock);
+       wake_up_q(&wake_q);
  
         /* wait to be given the lock */
         while (true) {
@@ -440,6 +449,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
         bool waiting = true; /* any queued threads before us */
         struct rwsem_waiter waiter;
         struct rw_semaphore *ret = sem;
+       WAKE_Q(wake_q);
  
         /* undo write bias from down_write operation, stop active locking */
         count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
@@ -472,8 +482,19 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
                  * no active writers, the lock must be read owned; so we try to
                  * wake any read locks that were queued ahead of us.
                  */
-               if (count > RWSEM_WAITING_BIAS)
-                       sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+               if (count > RWSEM_WAITING_BIAS) {
+                       WAKE_Q(wake_q);
+
+                       sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
+                       /*
+                        * The wakeup is normally called _after_ the wait_lock
+                        * is released, but given that we are proactively waking
+                        * readers we can deal with the wake_q overhead as it is
+                        * similar to releasing and taking the wait_lock again
+                        * for attempting rwsem_try_write_lock().
+                        */
+                       wake_up_q(&wake_q);
+               }
  
         } else
                 count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
@@ -509,8 +530,9 @@ out_nolock:
         if (list_empty(&sem->wait_list))
                 rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem);
         else
-               __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+               __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
         raw_spin_unlock_irq(&sem->wait_lock);
+       wake_up_q(&wake_q);
  
         return ERR_PTR(-EINTR);
  }
@@ -537,6 +559,7 @@ __visible
  struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
  {
         unsigned long flags;
+       WAKE_Q(wake_q);
  
         /*
          * If a spinner is present, it is not necessary to do the wakeup.
@@ -573,9 +596,10 @@ locked:
  
         /* do nothing if list empty */
         if (!list_empty(&sem->wait_list))
-               sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+               sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
  
         raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+       wake_up_q(&wake_q);
  
         return sem;
  }
@@ -590,14 +614,16 @@ __visible
  struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
  {
         unsigned long flags;
+       WAKE_Q(wake_q);
  
         raw_spin_lock_irqsave(&sem->wait_lock, flags);
  
         /* do nothing if list empty */
         if (!list_empty(&sem->wait_list))
-               sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
+               sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
  
         raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+       wake_up_q(&wake_q);
  
         return sem;
  }
author	Davidlohr Bueso <dave@stgolabs.net>
	Fri, 13 May 2016 18:56:26 +0000 (11:56 -0700)
committer	Ingo Molnar <mingo@kernel.org>
	Fri, 3 Jun 2016 07:47:10 +0000 (09:47 +0200)