kernel/locking/percpu-rwsem.c

   1 #include <linux/atomic.h>
   2 #include <linux/rwsem.h>
   3 #include <linux/percpu.h>
   4 #include <linux/wait.h>
   5 #include <linux/lockdep.h>
   6 #include <linux/percpu-rwsem.h>
   7 #include <linux/rcupdate.h>
   8 #include <linux/sched.h>
   9 #include <linux/errno.h>
  10
  11 int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
  12                         const char *name, struct lock_class_key *rwsem_key)
  13 {
  14         sem->read_count = alloc_percpu(int);
  15         if (unlikely(!sem->read_count))
  16                 return -ENOMEM;
  17
  18         /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
  19         rcu_sync_init(&sem->rss, RCU_SCHED_SYNC);
  20         __init_rwsem(&sem->rw_sem, name, rwsem_key);
  21         init_waitqueue_head(&sem->writer);
  22         sem->readers_block = 0;
  23         return 0;
  24 }
  25 EXPORT_SYMBOL_GPL(__percpu_init_rwsem);
  26
  27 void percpu_free_rwsem(struct percpu_rw_semaphore *sem)
  28 {
  29         /*
  30          * XXX: temporary kludge. The error path in alloc_super()
  31          * assumes that percpu_free_rwsem() is safe after kzalloc().
  32          */
  33         if (!sem->read_count)
  34                 return;
  35
  36         rcu_sync_dtor(&sem->rss);
  37         free_percpu(sem->read_count);
  38         sem->read_count = NULL; /* catch use after free bugs */
  39 }
  40 EXPORT_SYMBOL_GPL(percpu_free_rwsem);
  41
  42 int __percpu_down_read(struct percpu_rw_semaphore *sem, int try)
  43 {
  44         /*
  45          * Due to having preemption disabled the decrement happens on
  46          * the same CPU as the increment, avoiding the
  47          * increment-on-one-CPU-and-decrement-on-another problem.
  48          *
  49          * If the reader misses the writer's assignment of readers_block, then
  50          * the writer is guaranteed to see the reader's increment.
  51          *
  52          * Conversely, any readers that increment their sem->read_count after
  53          * the writer looks are guaranteed to see the readers_block value,
  54          * which in turn means that they are guaranteed to immediately
  55          * decrement their sem->read_count, so that it doesn't matter that the
  56          * writer missed them.
  57          */
  58
  59         smp_mb(); /* A matches D */
  60
  61         /*
  62          * If !readers_block the critical section starts here, matched by the
  63          * release in percpu_up_write().
  64          */
  65         if (likely(!smp_load_acquire(&sem->readers_block)))
  66                 return 1;
  67
  68         /*
  69          * Per the above comment; we still have preemption disabled and
  70          * will thus decrement on the same CPU as we incremented.
  71          */
  72         __percpu_up_read(sem);
  73
  74         if (try)
  75                 return 0;
  76
  77         /*
  78          * We either call schedule() in the wait, or we'll fall through
  79          * and reschedule on the preempt_enable() in percpu_down_read().
  80          */
  81         preempt_enable_no_resched();
  82
  83         /*
  84          * Avoid lockdep for the down/up_read() we already have them.
  85          */
  86         __down_read(&sem->rw_sem);
  87         this_cpu_inc(*sem->read_count);
  88         __up_read(&sem->rw_sem);
  89
  90         preempt_disable();
  91         return 1;
  92 }
  93 EXPORT_SYMBOL_GPL(__percpu_down_read);
  94
  95 void __percpu_up_read(struct percpu_rw_semaphore *sem)
  96 {
  97         smp_mb(); /* B matches C */
  98         /*
  99          * In other words, if they see our decrement (presumably to aggregate
 100          * zero, as that is the only time it matters) they will also see our
 101          * critical section.
 102          */
 103         __this_cpu_dec(*sem->read_count);
 104
 105         /* Prod writer to recheck readers_active */
 106         wake_up(&sem->writer);
 107 }
 108 EXPORT_SYMBOL_GPL(__percpu_up_read);
 109
 110 #define per_cpu_sum(var)                                                \
 111 ({                                                                      \
 112         typeof(var) __sum = 0;                                          \
 113         int cpu;                                                        \
 114         compiletime_assert_atomic_type(__sum);                          \
 115         for_each_possible_cpu(cpu)                                      \
 116                 __sum += per_cpu(var, cpu);                             \
 117         __sum;                                                          \
 118 })
 119
 120 /*
 121  * Return true if the modular sum of the sem->read_count per-CPU variable is
 122  * zero.  If this sum is zero, then it is stable due to the fact that if any
 123  * newly arriving readers increment a given counter, they will immediately
 124  * decrement that same counter.
 125  */
 126 static bool readers_active_check(struct percpu_rw_semaphore *sem)
 127 {
 128         if (per_cpu_sum(*sem->read_count) != 0)
 129                 return false;
 130
 131         /*
 132          * If we observed the decrement; ensure we see the entire critical
 133          * section.
 134          */
 135
 136         smp_mb(); /* C matches B */
 137
 138         return true;
 139 }
 140
 141 void percpu_down_write(struct percpu_rw_semaphore *sem)
 142 {
 143         /* Notify readers to take the slow path. */
 144         rcu_sync_enter(&sem->rss);
 145
 146         down_write(&sem->rw_sem);
 147
 148         /*
 149          * Notify new readers to block; up until now, and thus throughout the
 150          * longish rcu_sync_enter() above, new readers could still come in.
 151          */
 152         WRITE_ONCE(sem->readers_block, 1);
 153
 154         smp_mb(); /* D matches A */
 155
 156         /*
 157          * If they don't see our writer of readers_block, then we are
 158          * guaranteed to see their sem->read_count increment, and therefore
 159          * will wait for them.
 160          */
 161
 162         /* Wait for all now active readers to complete. */
 163         wait_event(sem->writer, readers_active_check(sem));
 164 }
 165 EXPORT_SYMBOL_GPL(percpu_down_write);
 166
 167 void percpu_up_write(struct percpu_rw_semaphore *sem)
 168 {
 169         /*
 170          * Signal the writer is done, no fast path yet.
 171          *
 172          * One reason that we cannot just immediately flip to readers_fast is
 173          * that new readers might fail to see the results of this writer's
 174          * critical section.
 175          *
 176          * Therefore we force it through the slow path which guarantees an
 177          * acquire and thereby guarantees the critical section's consistency.
 178          */
 179         smp_store_release(&sem->readers_block, 0);
 180
 181         /*
 182          * Release the write lock, this will allow readers back in the game.
 183          */
 184         up_write(&sem->rw_sem);
 185
 186         /*
 187          * Once this completes (at least one RCU-sched grace period hence) the
 188          * reader fast path will be available again. Safe to use outside the
 189          * exclusive write lock because its counting.
 190          */
 191         rcu_sync_exit(&sem->rss);
 192 }
 193 EXPORT_SYMBOL_GPL(percpu_up_write);