Merge branch 'locking/urgent' into locking/core, to pick up dependency

author Ingo Molnar <mingo@kernel.org>

Wed, 8 Jun 2016 12:35:29 +0000 (14:35 +0200)

committer Ingo Molnar <mingo@kernel.org>

Wed, 8 Jun 2016 12:35:29 +0000 (14:35 +0200)
author Ingo Molnar <mingo@kernel.org>
Wed, 8 Jun 2016 12:35:29 +0000 (14:35 +0200)
committer Ingo Molnar <mingo@kernel.org>
Wed, 8 Jun 2016 12:35:29 +0000 (14:35 +0200)
diff --git a/include/linux/compiler.h b/include/linux/compiler.h

index 793c082..06f27fd 100644 (file)
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -545,10 +545,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
   * Similar to rcu_dereference(), but for situations where the pointed-to
   * object's lifetime is managed by something other than RCU.  That
   * "something other" might be reference counting or simple immortality.
+ *
+ * The seemingly unused void * variable is to validate @p is indeed a pointer
+ * type. All pointer types silently cast to void *.
   */
  #define lockless_dereference(p) \
  ({ \
         typeof(p) _________p1 = READ_ONCE(p); \
+       __maybe_unused const void * const _________p2 = _________p1; \
         smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
         (_________p1); \
  })
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h

index 84f542d..1c7eec0 100644 (file)
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -136,14 +136,12 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
          * used as a pointer.  If the compiler generates a separate fetch
          * when using it as a pointer, __PERCPU_REF_ATOMIC may be set in
          * between contaminating the pointer value, meaning that
-        * ACCESS_ONCE() is required when fetching it.
-        *
-        * Also, we need a data dependency barrier to be paired with
-        * smp_store_release() in __percpu_ref_switch_to_percpu().
-        *
-        * Use lockless deref which contains both.
+        * READ_ONCE() is required when fetching it.
          */
-       percpu_ptr = lockless_dereference(ref->percpu_count_ptr);
+       percpu_ptr = READ_ONCE(ref->percpu_count_ptr);
+
+       /* paired with smp_store_release() in __percpu_ref_switch_to_percpu() */
+       smp_read_barrier_depends();
  
         /*
          * Theoretically, the following could test just ATOMIC; however,
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c

index 81f1a71..589d763 100644 (file)
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -46,6 +46,7 @@
  #include <linux/gfp.h>
  #include <linux/kmemcheck.h>
  #include <linux/random.h>
+#include <linux/jhash.h>
  
  #include <asm/sections.h>
  
@@ -309,10 +310,14 @@ static struct hlist_head chainhash_table[CHAINHASH_SIZE];
   * It's a 64-bit hash, because it's important for the keys to be
   * unique.
   */
-#define iterate_chain_key(key1, key2) \
-       (((key1) << MAX_LOCKDEP_KEYS_BITS) ^ \
-       ((key1) >> (64-MAX_LOCKDEP_KEYS_BITS)) ^ \
-       (key2))
+static inline u64 iterate_chain_key(u64 key, u32 idx)
+{
+       u32 k0 = key, k1 = key >> 32;
+
+       __jhash_mix(idx, k0, k1); /* Macro that modifies arguments! */
+
+       return k0 | (u64)k1 << 32;
+}
  
  void lockdep_off(void)
  {
diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h

index 0799fd3..372e653 100644 (file)
--- a/kernel/locking/mutex-debug.h
+++ b/kernel/locking/mutex-debug.h
@@ -29,12 +29,12 @@ extern void debug_mutex_init(struct mutex *lock, const char *name,
  
  static inline void mutex_set_owner(struct mutex *lock)
  {
-       lock->owner = current;
+       WRITE_ONCE(lock->owner, current);
  }
  
  static inline void mutex_clear_owner(struct mutex *lock)
  {
-       lock->owner = NULL;
+       WRITE_ONCE(lock->owner, NULL);
  }
  
  #define spin_lock_mutex(lock, flags)                   \
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h

index 5cda397..12f9619 100644 (file)
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -17,14 +17,20 @@
                 __list_del((waiter)->list.prev, (waiter)->list.next)
  
  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
+/*
+ * The mutex owner can get read and written to locklessly.
+ * We should use WRITE_ONCE when writing the owner value to
+ * avoid store tearing, otherwise, a thread could potentially
+ * read a partially written and incomplete owner value.
+ */
  static inline void mutex_set_owner(struct mutex *lock)
  {
-       lock->owner = current;
+       WRITE_ONCE(lock->owner, current);
  }
  
  static inline void mutex_clear_owner(struct mutex *lock)
  {
-       lock->owner = NULL;
+       WRITE_ONCE(lock->owner, NULL);
  }
  #else
  static inline void mutex_set_owner(struct mutex *lock)
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c

index 3e74660..1ec0f48 100644 (file)
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1478,7 +1478,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
   */
  int __sched rt_mutex_trylock(struct rt_mutex *lock)
  {
-       if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
+       if (WARN_ON_ONCE(in_irq() || in_nmi() || in_serving_softirq()))
                 return 0;
  
         return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c

index 09e30c6..b957da7 100644 (file)
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -114,12 +114,16 @@ enum rwsem_wake_type {
   *   - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
   *   - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
   * - there must be someone on the queue
- * - the spinlock must be held by the caller
+ * - the wait_lock must be held by the caller
+ * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
+ *   to actually wakeup the blocked task(s) and drop the reference count,
+ *   preferably when the wait_lock is released
   * - woken process blocks are discarded from the list after having task zeroed
- * - writers are only woken if downgrading is false
+ * - writers are only marked woken if downgrading is false
   */
  static struct rw_semaphore *
-__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
+__rwsem_mark_wake(struct rw_semaphore *sem,
+                 enum rwsem_wake_type wake_type, struct wake_q_head *wake_q)
  {
         struct rwsem_waiter *waiter;
         struct task_struct *tsk;
@@ -128,13 +132,16 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
  
         waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
         if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
-               if (wake_type == RWSEM_WAKE_ANY)
-                       /* Wake writer at the front of the queue, but do not
-                        * grant it the lock yet as we want other writers
-                        * to be able to steal it.  Readers, on the other hand,
-                        * will block as they will notice the queued writer.
+               if (wake_type == RWSEM_WAKE_ANY) {
+                       /*
+                        * Mark writer at the front of the queue for wakeup.
+                        * Until the task is actually later awoken later by
+                        * the caller, other writers are able to steal it.
+                        * Readers, on the other hand, will block as they
+                        * will notice the queued writer.
                          */
-                       wake_up_process(waiter->task);
+                       wake_q_add(wake_q, waiter->task);
+               }
                 goto out;
         }
  
@@ -187,17 +194,15 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
                 waiter = list_entry(next, struct rwsem_waiter, list);
                 next = waiter->list.next;
                 tsk = waiter->task;
+
+               wake_q_add(wake_q, tsk);
                 /*
-                * Make sure we do not wakeup the next reader before
-                * setting the nil condition to grant the next reader;
-                * otherwise we could miss the wakeup on the other
-                * side and end up sleeping again. See the pairing
-                * in rwsem_down_read_failed().
+                * Ensure that the last operation is setting the reader
+                * waiter to nil such that rwsem_down_read_failed() cannot
+                * race with do_exit() by always holding a reference count
+                * to the task to wakeup.
                  */
-               smp_mb();
-               waiter->task = NULL;
-               wake_up_process(tsk);
-               put_task_struct(tsk);
+               smp_store_release(&waiter->task, NULL);
         } while (--loop);
  
         sem->wait_list.next = next;
@@ -216,11 +221,11 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
         long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
         struct rwsem_waiter waiter;
         struct task_struct *tsk = current;
+       WAKE_Q(wake_q);
  
         /* set up my own style of waitqueue */
         waiter.task = tsk;
         waiter.type = RWSEM_WAITING_FOR_READ;
-       get_task_struct(tsk);
  
         raw_spin_lock_irq(&sem->wait_lock);
         if (list_empty(&sem->wait_list))
@@ -238,9 +243,10 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
         if (count == RWSEM_WAITING_BIAS ||
             (count > RWSEM_WAITING_BIAS &&
              adjustment != -RWSEM_ACTIVE_READ_BIAS))
-               sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+               sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
  
         raw_spin_unlock_irq(&sem->wait_lock);
+       wake_up_q(&wake_q);
  
         /* wait to be given the lock */
         while (true) {
@@ -255,17 +261,28 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
  }
  EXPORT_SYMBOL(rwsem_down_read_failed);
  
+/*
+ * This function must be called with the sem->wait_lock held to prevent
+ * race conditions between checking the rwsem wait list and setting the
+ * sem->count accordingly.
+ */
  static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
  {
         /*
-        * Try acquiring the write lock. Check count first in order
-        * to reduce unnecessary expensive cmpxchg() operations.
+        * Avoid trying to acquire write lock if count isn't RWSEM_WAITING_BIAS.
          */
-       if (count == RWSEM_WAITING_BIAS &&
-           cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS,
-                   RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
-               if (!list_is_singular(&sem->wait_list))
-                       rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
+       if (count != RWSEM_WAITING_BIAS)
+               return false;
+
+       /*
+        * Acquire the lock by trying to set it to ACTIVE_WRITE_BIAS. If there
+        * are other tasks on the wait list, we need to add on WAITING_BIAS.
+        */
+       count = list_is_singular(&sem->wait_list) ?
+                       RWSEM_ACTIVE_WRITE_BIAS :
+                       RWSEM_ACTIVE_WRITE_BIAS + RWSEM_WAITING_BIAS;
+
+       if (cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS, count) == RWSEM_WAITING_BIAS) {
                 rwsem_set_owner(sem);
                 return true;
         }
@@ -440,6 +457,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
         bool waiting = true; /* any queued threads before us */
         struct rwsem_waiter waiter;
         struct rw_semaphore *ret = sem;
+       WAKE_Q(wake_q);
  
         /* undo write bias from down_write operation, stop active locking */
         count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
@@ -472,8 +490,19 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
                  * no active writers, the lock must be read owned; so we try to
                  * wake any read locks that were queued ahead of us.
                  */
-               if (count > RWSEM_WAITING_BIAS)
-                       sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
+               if (count > RWSEM_WAITING_BIAS) {
+                       WAKE_Q(wake_q);
+
+                       sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READERS, &wake_q);
+                       /*
+                        * The wakeup is normally called _after_ the wait_lock
+                        * is released, but given that we are proactively waking
+                        * readers we can deal with the wake_q overhead as it is
+                        * similar to releasing and taking the wait_lock again
+                        * for attempting rwsem_try_write_lock().
+                        */
+                       wake_up_q(&wake_q);
+               }
  
         } else
                 count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
@@ -509,8 +538,9 @@ out_nolock:
         if (list_empty(&sem->wait_list))
                 rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem);
         else
-               __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+               __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
         raw_spin_unlock_irq(&sem->wait_lock);
+       wake_up_q(&wake_q);
  
         return ERR_PTR(-EINTR);
  }
@@ -537,6 +567,7 @@ __visible
  struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
  {
         unsigned long flags;
+       WAKE_Q(wake_q);
  
         /*
          * If a spinner is present, it is not necessary to do the wakeup.
@@ -573,9 +604,10 @@ locked:
  
         /* do nothing if list empty */
         if (!list_empty(&sem->wait_list))
-               sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
+               sem = __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
  
         raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+       wake_up_q(&wake_q);
  
         return sem;
  }
@@ -590,14 +622,16 @@ __visible
  struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
  {
         unsigned long flags;
+       WAKE_Q(wake_q);
  
         raw_spin_lock_irqsave(&sem->wait_lock, flags);
  
         /* do nothing if list empty */
         if (!list_empty(&sem->wait_list))
-               sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
+               sem = __rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
  
         raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
+       wake_up_q(&wake_q);
  
         return sem;
  }
author	Ingo Molnar <mingo@kernel.org>
	Wed, 8 Jun 2016 12:35:29 +0000 (14:35 +0200)
committer	Ingo Molnar <mingo@kernel.org>
	Wed, 8 Jun 2016 12:35:29 +0000 (14:35 +0200)
include/linux/compiler.h		patch \| blob \| history
include/linux/percpu-refcount.h		patch \| blob \| history
kernel/locking/lockdep.c		patch \| blob \| history
kernel/locking/mutex-debug.h		patch \| blob \| history
kernel/locking/mutex.h		patch \| blob \| history
kernel/locking/rtmutex.c		patch \| blob \| history
kernel/locking/rwsem-xadd.c		patch \| blob \| history