lib/percpu-refcount.c

   1 #define pr_fmt(fmt) "%s: " fmt "\n", __func__
   2
   3 #include <linux/kernel.h>
   4 #include <linux/percpu-refcount.h>
   5
   6 /*
   7  * Initially, a percpu refcount is just a set of percpu counters. Initially, we
   8  * don't try to detect the ref hitting 0 - which means that get/put can just
   9  * increment or decrement the local counter. Note that the counter on a
  10  * particular cpu can (and will) wrap - this is fine, when we go to shutdown the
  11  * percpu counters will all sum to the correct value
  12  *
  13  * (More precisely: because moduler arithmatic is commutative the sum of all the
  14  * pcpu_count vars will be equal to what it would have been if all the gets and
  15  * puts were done to a single integer, even if some of the percpu integers
  16  * overflow or underflow).
  17  *
  18  * The real trick to implementing percpu refcounts is shutdown. We can't detect
  19  * the ref hitting 0 on every put - this would require global synchronization
  20  * and defeat the whole purpose of using percpu refs.
  21  *
  22  * What we do is require the user to keep track of the initial refcount; we know
  23  * the ref can't hit 0 before the user drops the initial ref, so as long as we
  24  * convert to non percpu mode before the initial ref is dropped everything
  25  * works.
  26  *
  27  * Converting to non percpu mode is done with some RCUish stuff in
  28  * percpu_ref_kill. Additionally, we need a bias value so that the
  29  * atomic_long_t can't hit 0 before we've added up all the percpu refs.
  30  */
  31
  32 #define PCPU_COUNT_BIAS         (1LU << (BITS_PER_LONG - 1))
  33
  34 static unsigned long __percpu *pcpu_count_ptr(struct percpu_ref *ref)
  35 {
  36         return (unsigned long __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
  37 }
  38
  39 /**
  40  * percpu_ref_init - initialize a percpu refcount
  41  * @ref: percpu_ref to initialize
  42  * @release: function which will be called when refcount hits 0
  43  * @gfp: allocation mask to use
  44  *
  45  * Initializes the refcount in single atomic counter mode with a refcount of 1;
  46  * analagous to atomic_long_set(ref, 1).
  47  *
  48  * Note that @release must not sleep - it may potentially be called from RCU
  49  * callback context by percpu_ref_kill().
  50  */
  51 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
  52                     gfp_t gfp)
  53 {
  54         atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
  55
  56         ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned long, gfp);
  57         if (!ref->pcpu_count_ptr)
  58                 return -ENOMEM;
  59
  60         ref->release = release;
  61         return 0;
  62 }
  63 EXPORT_SYMBOL_GPL(percpu_ref_init);
  64
  65 /**
  66  * percpu_ref_exit - undo percpu_ref_init()
  67  * @ref: percpu_ref to exit
  68  *
  69  * This function exits @ref.  The caller is responsible for ensuring that
  70  * @ref is no longer in active use.  The usual places to invoke this
  71  * function from are the @ref->release() callback or in init failure path
  72  * where percpu_ref_init() succeeded but other parts of the initialization
  73  * of the embedding object failed.
  74  */
  75 void percpu_ref_exit(struct percpu_ref *ref)
  76 {
  77         unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
  78
  79         if (pcpu_count) {
  80                 free_percpu(pcpu_count);
  81                 ref->pcpu_count_ptr = PCPU_REF_DEAD;
  82         }
  83 }
  84 EXPORT_SYMBOL_GPL(percpu_ref_exit);
  85
  86 static void percpu_ref_kill_rcu(struct rcu_head *rcu)
  87 {
  88         struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
  89         unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
  90         unsigned long count = 0;
  91         int cpu;
  92
  93         for_each_possible_cpu(cpu)
  94                 count += *per_cpu_ptr(pcpu_count, cpu);
  95
  96         pr_debug("global %ld pcpu %ld",
  97                  atomic_long_read(&ref->count), (long)count);
  98
  99         /*
 100          * It's crucial that we sum the percpu counters _before_ adding the sum
 101          * to &ref->count; since gets could be happening on one cpu while puts
 102          * happen on another, adding a single cpu's count could cause
 103          * @ref->count to hit 0 before we've got a consistent value - but the
 104          * sum of all the counts will be consistent and correct.
 105          *
 106          * Subtracting the bias value then has to happen _after_ adding count to
 107          * &ref->count; we need the bias value to prevent &ref->count from
 108          * reaching 0 before we add the percpu counts. But doing it at the same
 109          * time is equivalent and saves us atomic operations:
 110          */
 111         atomic_long_add((long)count - PCPU_COUNT_BIAS, &ref->count);
 112
 113         WARN_ONCE(atomic_long_read(&ref->count) <= 0,
 114                   "percpu ref (%pf) <= 0 (%ld) after killed",
 115                   ref->release, atomic_long_read(&ref->count));
 116
 117         /* @ref is viewed as dead on all CPUs, send out kill confirmation */
 118         if (ref->confirm_kill)
 119                 ref->confirm_kill(ref);
 120
 121         /*
 122          * Now we're in single atomic_long_t mode with a consistent
 123          * refcount, so it's safe to drop our initial ref:
 124          */
 125         percpu_ref_put(ref);
 126 }
 127
 128 /**
 129  * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
 130  * @ref: percpu_ref to kill
 131  * @confirm_kill: optional confirmation callback
 132  *
 133  * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
 134  * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
 135  * called after @ref is seen as dead from all CPUs - all further
 136  * invocations of percpu_ref_tryget_live() will fail.  See
 137  * percpu_ref_tryget_live() for more details.
 138  *
 139  * Due to the way percpu_ref is implemented, @confirm_kill will be called
 140  * after at least one full RCU grace period has passed but this is an
 141  * implementation detail and callers must not depend on it.
 142  */
 143 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 144                                  percpu_ref_func_t *confirm_kill)
 145 {
 146         WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
 147                   "%s called more than once on %pf!", __func__, ref->release);
 148
 149         ref->pcpu_count_ptr |= PCPU_REF_DEAD;
 150         ref->confirm_kill = confirm_kill;
 151
 152         call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 153 }
 154 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
 155
 156 /**
 157  * percpu_ref_reinit - re-initialize a percpu refcount
 158  * @ref: perpcu_ref to re-initialize
 159  *
 160  * Re-initialize @ref so that it's in the same state as when it finished
 161  * percpu_ref_init().  @ref must have been initialized successfully, killed
 162  * and reached 0 but not exited.
 163  *
 164  * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
 165  * this function is in progress.
 166  */
 167 void percpu_ref_reinit(struct percpu_ref *ref)
 168 {
 169         unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
 170         int cpu;
 171
 172         BUG_ON(!pcpu_count);
 173         WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 174
 175         atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 176
 177         /*
 178          * Restore per-cpu operation.  smp_store_release() is paired with
 179          * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
 180          * that the zeroing is visible to all percpu accesses which can see
 181          * the following PCPU_REF_DEAD clearing.
 182          */
 183         for_each_possible_cpu(cpu)
 184                 *per_cpu_ptr(pcpu_count, cpu) = 0;
 185
 186         smp_store_release(&ref->pcpu_count_ptr,
 187                           ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
 188 }
 189 EXPORT_SYMBOL_GPL(percpu_ref_reinit);