include/linux/rhashtable.h

   1 /*
   2  * Resizable, Scalable, Concurrent Hash Table
   3  *
   4  * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au>
   5  * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch>
   6  * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
   7  *
   8  * Code partially derived from nft_hash
   9  * Rewritten with rehash code from br_multicast plus single list
  10  * pointer as suggested by Josh Triplett
  11  *
  12  * This program is free software; you can redistribute it and/or modify
  13  * it under the terms of the GNU General Public License version 2 as
  14  * published by the Free Software Foundation.
  15  */
  16
  17 #ifndef _LINUX_RHASHTABLE_H
  18 #define _LINUX_RHASHTABLE_H
  19
  20 #include <linux/atomic.h>
  21 #include <linux/compiler.h>
  22 #include <linux/err.h>
  23 #include <linux/errno.h>
  24 #include <linux/jhash.h>
  25 #include <linux/list_nulls.h>
  26 #include <linux/workqueue.h>
  27 #include <linux/mutex.h>
  28 #include <linux/rcupdate.h>
  29
  30 /*
  31  * The end of the chain is marked with a special nulls marks which has
  32  * the following format:
  33  *
  34  * +-------+-----------------------------------------------------+-+
  35  * | Base  |                      Hash                           |1|
  36  * +-------+-----------------------------------------------------+-+
  37  *
  38  * Base (4 bits) : Reserved to distinguish between multiple tables.
  39  *                 Specified via &struct rhashtable_params.nulls_base.
  40  * Hash (27 bits): Full hash (unmasked) of first element added to bucket
  41  * 1 (1 bit)     : Nulls marker (always set)
  42  *
  43  * The remaining bits of the next pointer remain unused for now.
  44  */
  45 #define RHT_BASE_BITS           4
  46 #define RHT_HASH_BITS           27
  47 #define RHT_BASE_SHIFT          RHT_HASH_BITS
  48
  49 /* Base bits plus 1 bit for nulls marker */
  50 #define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1)
  51
  52 struct rhash_head {
  53         struct rhash_head __rcu         *next;
  54 };
  55
  56 /**
  57  * struct bucket_table - Table of hash buckets
  58  * @size: Number of hash buckets
  59  * @rehash: Current bucket being rehashed
  60  * @hash_rnd: Random seed to fold into hash
  61  * @locks_mask: Mask to apply before accessing locks[]
  62  * @locks: Array of spinlocks protecting individual buckets
  63  * @walkers: List of active walkers
  64  * @rcu: RCU structure for freeing the table
  65  * @future_tbl: Table under construction during rehashing
  66  * @buckets: size * hash buckets
  67  */
  68 struct bucket_table {
  69         unsigned int            size;
  70         unsigned int            rehash;
  71         u32                     hash_rnd;
  72         unsigned int            locks_mask;
  73         spinlock_t              *locks;
  74         struct list_head        walkers;
  75         struct rcu_head         rcu;
  76
  77         struct bucket_table __rcu *future_tbl;
  78
  79         struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp;
  80 };
  81
  82 /**
  83  * struct rhashtable_compare_arg - Key for the function rhashtable_compare
  84  * @ht: Hash table
  85  * @key: Key to compare against
  86  */
  87 struct rhashtable_compare_arg {
  88         struct rhashtable *ht;
  89         const void *key;
  90 };
  91
  92 typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
  93 typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed);
  94 typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg,
  95                                const void *obj);
  96
  97 struct rhashtable;
  98
  99 /**
 100  * struct rhashtable_params - Hash table construction parameters
 101  * @nelem_hint: Hint on number of elements, should be 75% of desired size
 102  * @key_len: Length of key
 103  * @key_offset: Offset of key in struct to be hashed
 104  * @head_offset: Offset of rhash_head in struct to be hashed
 105  * @insecure_max_entries: Maximum number of entries (may be exceeded)
 106  * @max_size: Maximum size while expanding
 107  * @min_size: Minimum size while shrinking
 108  * @nulls_base: Base value to generate nulls marker
 109  * @insecure_elasticity: Set to true to disable chain length checks
 110  * @automatic_shrinking: Enable automatic shrinking of tables
 111  * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
 112  * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash)
 113  * @obj_hashfn: Function to hash object
 114  * @obj_cmpfn: Function to compare key with object
 115  */
 116 struct rhashtable_params {
 117         size_t                  nelem_hint;
 118         size_t                  key_len;
 119         size_t                  key_offset;
 120         size_t                  head_offset;
 121         unsigned int            insecure_max_entries;
 122         unsigned int            max_size;
 123         unsigned int            min_size;
 124         u32                     nulls_base;
 125         bool                    insecure_elasticity;
 126         bool                    automatic_shrinking;
 127         size_t                  locks_mul;
 128         rht_hashfn_t            hashfn;
 129         rht_obj_hashfn_t        obj_hashfn;
 130         rht_obj_cmpfn_t         obj_cmpfn;
 131 };
 132
 133 /**
 134  * struct rhashtable - Hash table handle
 135  * @tbl: Bucket table
 136  * @nelems: Number of elements in table
 137  * @key_len: Key length for hashfn
 138  * @elasticity: Maximum chain length before rehash
 139  * @p: Configuration parameters
 140  * @run_work: Deferred worker to expand/shrink asynchronously
 141  * @mutex: Mutex to protect current/future table swapping
 142  * @lock: Spin lock to protect walker list
 143  */
 144 struct rhashtable {
 145         struct bucket_table __rcu       *tbl;
 146         atomic_t                        nelems;
 147         unsigned int                    key_len;
 148         unsigned int                    elasticity;
 149         struct rhashtable_params        p;
 150         struct work_struct              run_work;
 151         struct mutex                    mutex;
 152         spinlock_t                      lock;
 153 };
 154
 155 /**
 156  * struct rhashtable_walker - Hash table walker
 157  * @list: List entry on list of walkers
 158  * @tbl: The table that we were walking over
 159  */
 160 struct rhashtable_walker {
 161         struct list_head list;
 162         struct bucket_table *tbl;
 163 };
 164
 165 /**
 166  * struct rhashtable_iter - Hash table iterator, fits into netlink cb
 167  * @ht: Table to iterate through
 168  * @p: Current pointer
 169  * @walker: Associated rhashtable walker
 170  * @slot: Current slot
 171  * @skip: Number of entries to skip in slot
 172  */
 173 struct rhashtable_iter {
 174         struct rhashtable *ht;
 175         struct rhash_head *p;
 176         struct rhashtable_walker walker;
 177         unsigned int slot;
 178         unsigned int skip;
 179 };
 180
 181 static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
 182 {
 183         return NULLS_MARKER(ht->p.nulls_base + hash);
 184 }
 185
 186 #define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \
 187         ((ptr) = (typeof(ptr)) rht_marker(ht, hash))
 188
 189 static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
 190 {
 191         return ((unsigned long) ptr & 1);
 192 }
 193
 194 static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr)
 195 {
 196         return ((unsigned long) ptr) >> 1;
 197 }
 198
 199 static inline void *rht_obj(const struct rhashtable *ht,
 200                             const struct rhash_head *he)
 201 {
 202         return (char *)he - ht->p.head_offset;
 203 }
 204
 205 static inline unsigned int rht_bucket_index(const struct bucket_table *tbl,
 206                                             unsigned int hash)
 207 {
 208         return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1);
 209 }
 210
 211 static inline unsigned int rht_key_hashfn(
 212         struct rhashtable *ht, const struct bucket_table *tbl,
 213         const void *key, const struct rhashtable_params params)
 214 {
 215         unsigned int hash;
 216
 217         /* params must be equal to ht->p if it isn't constant. */
 218         if (!__builtin_constant_p(params.key_len))
 219                 hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd);
 220         else if (params.key_len) {
 221                 unsigned int key_len = params.key_len;
 222
 223                 if (params.hashfn)
 224                         hash = params.hashfn(key, key_len, tbl->hash_rnd);
 225                 else if (key_len & (sizeof(u32) - 1))
 226                         hash = jhash(key, key_len, tbl->hash_rnd);
 227                 else
 228                         hash = jhash2(key, key_len / sizeof(u32),
 229                                       tbl->hash_rnd);
 230         } else {
 231                 unsigned int key_len = ht->p.key_len;
 232
 233                 if (params.hashfn)
 234                         hash = params.hashfn(key, key_len, tbl->hash_rnd);
 235                 else
 236                         hash = jhash(key, key_len, tbl->hash_rnd);
 237         }
 238
 239         return rht_bucket_index(tbl, hash);
 240 }
 241
 242 static inline unsigned int rht_head_hashfn(
 243         struct rhashtable *ht, const struct bucket_table *tbl,
 244         const struct rhash_head *he, const struct rhashtable_params params)
 245 {
 246         const char *ptr = rht_obj(ht, he);
 247
 248         return likely(params.obj_hashfn) ?
 249                rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?:
 250                                                             ht->p.key_len,
 251                                                        tbl->hash_rnd)) :
 252                rht_key_hashfn(ht, tbl, ptr + params.key_offset, params);
 253 }
 254
 255 /**
 256  * rht_grow_above_75 - returns true if nelems > 0.75 * table-size
 257  * @ht:         hash table
 258  * @tbl:        current table
 259  */
 260 static inline bool rht_grow_above_75(const struct rhashtable *ht,
 261                                      const struct bucket_table *tbl)
 262 {
 263         /* Expand table when exceeding 75% load */
 264         return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) &&
 265                (!ht->p.max_size || tbl->size < ht->p.max_size);
 266 }
 267
 268 /**
 269  * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size
 270  * @ht:         hash table
 271  * @tbl:        current table
 272  */
 273 static inline bool rht_shrink_below_30(const struct rhashtable *ht,
 274                                        const struct bucket_table *tbl)
 275 {
 276         /* Shrink table beneath 30% load */
 277         return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) &&
 278                tbl->size > ht->p.min_size;
 279 }
 280
 281 /**
 282  * rht_grow_above_100 - returns true if nelems > table-size
 283  * @ht:         hash table
 284  * @tbl:        current table
 285  */
 286 static inline bool rht_grow_above_100(const struct rhashtable *ht,
 287                                       const struct bucket_table *tbl)
 288 {
 289         return atomic_read(&ht->nelems) > tbl->size &&
 290                 (!ht->p.max_size || tbl->size < ht->p.max_size);
 291 }
 292
 293 /**
 294  * rht_grow_above_max - returns true if table is above maximum
 295  * @ht:         hash table
 296  * @tbl:        current table
 297  */
 298 static inline bool rht_grow_above_max(const struct rhashtable *ht,
 299                                       const struct bucket_table *tbl)
 300 {
 301         return ht->p.insecure_max_entries &&
 302                atomic_read(&ht->nelems) >= ht->p.insecure_max_entries;
 303 }
 304
 305 /* The bucket lock is selected based on the hash and protects mutations
 306  * on a group of hash buckets.
 307  *
 308  * A maximum of tbl->size/2 bucket locks is allocated. This ensures that
 309  * a single lock always covers both buckets which may both contains
 310  * entries which link to the same bucket of the old table during resizing.
 311  * This allows to simplify the locking as locking the bucket in both
 312  * tables during resize always guarantee protection.
 313  *
 314  * IMPORTANT: When holding the bucket lock of both the old and new table
 315  * during expansions and shrinking, the old bucket lock must always be
 316  * acquired first.
 317  */
 318 static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl,
 319                                           unsigned int hash)
 320 {
 321         return &tbl->locks[hash & tbl->locks_mask];
 322 }
 323
 324 #ifdef CONFIG_PROVE_LOCKING
 325 int lockdep_rht_mutex_is_held(struct rhashtable *ht);
 326 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
 327 #else
 328 static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
 329 {
 330         return 1;
 331 }
 332
 333 static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
 334                                              u32 hash)
 335 {
 336         return 1;
 337 }
 338 #endif /* CONFIG_PROVE_LOCKING */
 339
 340 int rhashtable_init(struct rhashtable *ht,
 341                     const struct rhashtable_params *params);
 342
 343 struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht,
 344                                             const void *key,
 345                                             struct rhash_head *obj,
 346                                             struct bucket_table *old_tbl,
 347                                             void **data);
 348 int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl);
 349
 350 void rhashtable_walk_enter(struct rhashtable *ht,
 351                            struct rhashtable_iter *iter);
 352 void rhashtable_walk_exit(struct rhashtable_iter *iter);
 353 int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
 354 void *rhashtable_walk_next(struct rhashtable_iter *iter);
 355 void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
 356
 357 void rhashtable_free_and_destroy(struct rhashtable *ht,
 358                                  void (*free_fn)(void *ptr, void *arg),
 359                                  void *arg);
 360 void rhashtable_destroy(struct rhashtable *ht);
 361
 362 #define rht_dereference(p, ht) \
 363         rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
 364
 365 #define rht_dereference_rcu(p, ht) \
 366         rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
 367
 368 #define rht_dereference_bucket(p, tbl, hash) \
 369         rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
 370
 371 #define rht_dereference_bucket_rcu(p, tbl, hash) \
 372         rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))
 373
 374 #define rht_entry(tpos, pos, member) \
 375         ({ tpos = container_of(pos, typeof(*tpos), member); 1; })
 376
 377 /**
 378  * rht_for_each_continue - continue iterating over hash chain
 379  * @pos:        the &struct rhash_head to use as a loop cursor.
 380  * @head:       the previous &struct rhash_head to continue from
 381  * @tbl:        the &struct bucket_table
 382  * @hash:       the hash value / bucket index
 383  */
 384 #define rht_for_each_continue(pos, head, tbl, hash) \
 385         for (pos = rht_dereference_bucket(head, tbl, hash); \
 386              !rht_is_a_nulls(pos); \
 387              pos = rht_dereference_bucket((pos)->next, tbl, hash))
 388
 389 /**
 390  * rht_for_each - iterate over hash chain
 391  * @pos:        the &struct rhash_head to use as a loop cursor.
 392  * @tbl:        the &struct bucket_table
 393  * @hash:       the hash value / bucket index
 394  */
 395 #define rht_for_each(pos, tbl, hash) \
 396         rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)
 397
 398 /**
 399  * rht_for_each_entry_continue - continue iterating over hash chain
 400  * @tpos:       the type * to use as a loop cursor.
 401  * @pos:        the &struct rhash_head to use as a loop cursor.
 402  * @head:       the previous &struct rhash_head to continue from
 403  * @tbl:        the &struct bucket_table
 404  * @hash:       the hash value / bucket index
 405  * @member:     name of the &struct rhash_head within the hashable struct.
 406  */
 407 #define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member) \
 408         for (pos = rht_dereference_bucket(head, tbl, hash);             \
 409              (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);    \
 410              pos = rht_dereference_bucket((pos)->next, tbl, hash))
 411
 412 /**
 413  * rht_for_each_entry - iterate over hash chain of given type
 414  * @tpos:       the type * to use as a loop cursor.
 415  * @pos:        the &struct rhash_head to use as a loop cursor.
 416  * @tbl:        the &struct bucket_table
 417  * @hash:       the hash value / bucket index
 418  * @member:     name of the &struct rhash_head within the hashable struct.
 419  */
 420 #define rht_for_each_entry(tpos, pos, tbl, hash, member)                \
 421         rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash],    \
 422                                     tbl, hash, member)
 423
 424 /**
 425  * rht_for_each_entry_safe - safely iterate over hash chain of given type
 426  * @tpos:       the type * to use as a loop cursor.
 427  * @pos:        the &struct rhash_head to use as a loop cursor.
 428  * @next:       the &struct rhash_head to use as next in loop cursor.
 429  * @tbl:        the &struct bucket_table
 430  * @hash:       the hash value / bucket index
 431  * @member:     name of the &struct rhash_head within the hashable struct.
 432  *
 433  * This hash chain list-traversal primitive allows for the looped code to
 434  * remove the loop cursor from the list.
 435  */
 436 #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)         \
 437         for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \
 438              next = !rht_is_a_nulls(pos) ?                                  \
 439                        rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
 440              (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);        \
 441              pos = next,                                                    \
 442              next = !rht_is_a_nulls(pos) ?                                  \
 443                        rht_dereference_bucket(pos->next, tbl, hash) : NULL)
 444
 445 /**
 446  * rht_for_each_rcu_continue - continue iterating over rcu hash chain
 447  * @pos:        the &struct rhash_head to use as a loop cursor.
 448  * @head:       the previous &struct rhash_head to continue from
 449  * @tbl:        the &struct bucket_table
 450  * @hash:       the hash value / bucket index
 451  *
 452  * This hash chain list-traversal primitive may safely run concurrently with
 453  * the _rcu mutation primitives such as rhashtable_insert() as long as the
 454  * traversal is guarded by rcu_read_lock().
 455  */
 456 #define rht_for_each_rcu_continue(pos, head, tbl, hash)                 \
 457         for (({barrier(); }),                                           \
 458              pos = rht_dereference_bucket_rcu(head, tbl, hash);         \
 459              !rht_is_a_nulls(pos);                                      \
 460              pos = rcu_dereference_raw(pos->next))
 461
 462 /**
 463  * rht_for_each_rcu - iterate over rcu hash chain
 464  * @pos:        the &struct rhash_head to use as a loop cursor.
 465  * @tbl:        the &struct bucket_table
 466  * @hash:       the hash value / bucket index
 467  *
 468  * This hash chain list-traversal primitive may safely run concurrently with
 469  * the _rcu mutation primitives such as rhashtable_insert() as long as the
 470  * traversal is guarded by rcu_read_lock().
 471  */
 472 #define rht_for_each_rcu(pos, tbl, hash)                                \
 473         rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)
 474
 475 /**
 476  * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
 477  * @tpos:       the type * to use as a loop cursor.
 478  * @pos:        the &struct rhash_head to use as a loop cursor.
 479  * @head:       the previous &struct rhash_head to continue from
 480  * @tbl:        the &struct bucket_table
 481  * @hash:       the hash value / bucket index
 482  * @member:     name of the &struct rhash_head within the hashable struct.
 483  *
 484  * This hash chain list-traversal primitive may safely run concurrently with
 485  * the _rcu mutation primitives such as rhashtable_insert() as long as the
 486  * traversal is guarded by rcu_read_lock().
 487  */
 488 #define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \
 489         for (({barrier(); }),                                               \
 490              pos = rht_dereference_bucket_rcu(head, tbl, hash);             \
 491              (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);        \
 492              pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
 493
 494 /**
 495  * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
 496  * @tpos:       the type * to use as a loop cursor.
 497  * @pos:        the &struct rhash_head to use as a loop cursor.
 498  * @tbl:        the &struct bucket_table
 499  * @hash:       the hash value / bucket index
 500  * @member:     name of the &struct rhash_head within the hashable struct.
 501  *
 502  * This hash chain list-traversal primitive may safely run concurrently with
 503  * the _rcu mutation primitives such as rhashtable_insert() as long as the
 504  * traversal is guarded by rcu_read_lock().
 505  */
 506 #define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)            \
 507         rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
 508                                         tbl, hash, member)
 509
 510 static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
 511                                      const void *obj)
 512 {
 513         struct rhashtable *ht = arg->ht;
 514         const char *ptr = obj;
 515
 516         return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len);
 517 }
 518
 519 /**
 520  * rhashtable_lookup_fast - search hash table, inlined version
 521  * @ht:         hash table
 522  * @key:        the pointer to the key
 523  * @params:     hash table parameters
 524  *
 525  * Computes the hash value for the key and traverses the bucket chain looking
 526  * for a entry with an identical key. The first matching entry is returned.
 527  *
 528  * Returns the first entry on which the compare function returned true.
 529  */
 530 static inline void *rhashtable_lookup_fast(
 531         struct rhashtable *ht, const void *key,
 532         const struct rhashtable_params params)
 533 {
 534         struct rhashtable_compare_arg arg = {
 535                 .ht = ht,
 536                 .key = key,
 537         };
 538         const struct bucket_table *tbl;
 539         struct rhash_head *he;
 540         unsigned int hash;
 541
 542         rcu_read_lock();
 543
 544         tbl = rht_dereference_rcu(ht->tbl, ht);
 545 restart:
 546         hash = rht_key_hashfn(ht, tbl, key, params);
 547         rht_for_each_rcu(he, tbl, hash) {
 548                 if (params.obj_cmpfn ?
 549                     params.obj_cmpfn(&arg, rht_obj(ht, he)) :
 550                     rhashtable_compare(&arg, rht_obj(ht, he)))
 551                         continue;
 552                 rcu_read_unlock();
 553                 return rht_obj(ht, he);
 554         }
 555
 556         /* Ensure we see any new tables. */
 557         smp_rmb();
 558
 559         tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 560         if (unlikely(tbl))
 561                 goto restart;
 562         rcu_read_unlock();
 563
 564         return NULL;
 565 }
 566
 567 /* Internal function, please use rhashtable_insert_fast() instead. This
 568  * function returns the existing element already in hashes in there is a clash,
 569  * otherwise it returns an error via ERR_PTR().
 570  */
 571 static inline void *__rhashtable_insert_fast(
 572         struct rhashtable *ht, const void *key, struct rhash_head *obj,
 573         const struct rhashtable_params params)
 574 {
 575         struct rhashtable_compare_arg arg = {
 576                 .ht = ht,
 577                 .key = key,
 578         };
 579         struct bucket_table *tbl, *new_tbl;
 580         struct rhash_head *head;
 581         spinlock_t *lock;
 582         unsigned int elasticity;
 583         unsigned int hash;
 584         void *data = NULL;
 585         int err;
 586
 587 restart:
 588         rcu_read_lock();
 589
 590         tbl = rht_dereference_rcu(ht->tbl, ht);
 591
 592         /* All insertions must grab the oldest table containing
 593          * the hashed bucket that is yet to be rehashed.
 594          */
 595         for (;;) {
 596                 hash = rht_head_hashfn(ht, tbl, obj, params);
 597                 lock = rht_bucket_lock(tbl, hash);
 598                 spin_lock_bh(lock);
 599
 600                 if (tbl->rehash <= hash)
 601                         break;
 602
 603                 spin_unlock_bh(lock);
 604                 tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 605         }
 606
 607         new_tbl = rht_dereference_rcu(tbl->future_tbl, ht);
 608         if (unlikely(new_tbl)) {
 609                 tbl = rhashtable_insert_slow(ht, key, obj, new_tbl, &data);
 610                 if (!IS_ERR_OR_NULL(tbl))
 611                         goto slow_path;
 612
 613                 err = PTR_ERR(tbl);
 614                 if (err == -EEXIST)
 615                         err = 0;
 616
 617                 goto out;
 618         }
 619
 620         err = -E2BIG;
 621         if (unlikely(rht_grow_above_max(ht, tbl)))
 622                 goto out;
 623
 624         if (unlikely(rht_grow_above_100(ht, tbl))) {
 625 slow_path:
 626                 spin_unlock_bh(lock);
 627                 err = rhashtable_insert_rehash(ht, tbl);
 628                 rcu_read_unlock();
 629                 if (err)
 630                         return ERR_PTR(err);
 631
 632                 goto restart;
 633         }
 634
 635         err = 0;
 636         elasticity = ht->elasticity;
 637         rht_for_each(head, tbl, hash) {
 638                 if (key &&
 639                     unlikely(!(params.obj_cmpfn ?
 640                                params.obj_cmpfn(&arg, rht_obj(ht, head)) :
 641                                rhashtable_compare(&arg, rht_obj(ht, head))))) {
 642                         data = rht_obj(ht, head);
 643                         goto out;
 644                 }
 645                 if (!--elasticity)
 646                         goto slow_path;
 647         }
 648
 649         head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
 650
 651         RCU_INIT_POINTER(obj->next, head);
 652
 653         rcu_assign_pointer(tbl->buckets[hash], obj);
 654
 655         atomic_inc(&ht->nelems);
 656         if (rht_grow_above_75(ht, tbl))
 657                 schedule_work(&ht->run_work);
 658
 659 out:
 660         spin_unlock_bh(lock);
 661         rcu_read_unlock();
 662
 663         return err ? ERR_PTR(err) : data;
 664 }
 665
 666 /**
 667  * rhashtable_insert_fast - insert object into hash table
 668  * @ht:         hash table
 669  * @obj:        pointer to hash head inside object
 670  * @params:     hash table parameters
 671  *
 672  * Will take a per bucket spinlock to protect against mutual mutations
 673  * on the same bucket. Multiple insertions may occur in parallel unless
 674  * they map to the same bucket lock.
 675  *
 676  * It is safe to call this function from atomic context.
 677  *
 678  * Will trigger an automatic deferred table resizing if the size grows
 679  * beyond the watermark indicated by grow_decision() which can be passed
 680  * to rhashtable_init().
 681  */
 682 static inline int rhashtable_insert_fast(
 683         struct rhashtable *ht, struct rhash_head *obj,
 684         const struct rhashtable_params params)
 685 {
 686         void *ret;
 687
 688         ret = __rhashtable_insert_fast(ht, NULL, obj, params);
 689         if (IS_ERR(ret))
 690                 return PTR_ERR(ret);
 691
 692         return ret == NULL ? 0 : -EEXIST;
 693 }
 694
 695 /**
 696  * rhashtable_lookup_insert_fast - lookup and insert object into hash table
 697  * @ht:         hash table
 698  * @obj:        pointer to hash head inside object
 699  * @params:     hash table parameters
 700  *
 701  * Locks down the bucket chain in both the old and new table if a resize
 702  * is in progress to ensure that writers can't remove from the old table
 703  * and can't insert to the new table during the atomic operation of search
 704  * and insertion. Searches for duplicates in both the old and new table if
 705  * a resize is in progress.
 706  *
 707  * This lookup function may only be used for fixed key hash table (key_len
 708  * parameter set). It will BUG() if used inappropriately.
 709  *
 710  * It is safe to call this function from atomic context.
 711  *
 712  * Will trigger an automatic deferred table resizing if the size grows
 713  * beyond the watermark indicated by grow_decision() which can be passed
 714  * to rhashtable_init().
 715  */
 716 static inline int rhashtable_lookup_insert_fast(
 717         struct rhashtable *ht, struct rhash_head *obj,
 718         const struct rhashtable_params params)
 719 {
 720         const char *key = rht_obj(ht, obj);
 721         void *ret;
 722
 723         BUG_ON(ht->p.obj_hashfn);
 724
 725         ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params);
 726         if (IS_ERR(ret))
 727                 return PTR_ERR(ret);
 728
 729         return ret == NULL ? 0 : -EEXIST;
 730 }
 731
 732 /**
 733  * rhashtable_lookup_insert_key - search and insert object to hash table
 734  *                                with explicit key
 735  * @ht:         hash table
 736  * @key:        key
 737  * @obj:        pointer to hash head inside object
 738  * @params:     hash table parameters
 739  *
 740  * Locks down the bucket chain in both the old and new table if a resize
 741  * is in progress to ensure that writers can't remove from the old table
 742  * and can't insert to the new table during the atomic operation of search
 743  * and insertion. Searches for duplicates in both the old and new table if
 744  * a resize is in progress.
 745  *
 746  * Lookups may occur in parallel with hashtable mutations and resizing.
 747  *
 748  * Will trigger an automatic deferred table resizing if the size grows
 749  * beyond the watermark indicated by grow_decision() which can be passed
 750  * to rhashtable_init().
 751  *
 752  * Returns zero on success.
 753  */
 754 static inline int rhashtable_lookup_insert_key(
 755         struct rhashtable *ht, const void *key, struct rhash_head *obj,
 756         const struct rhashtable_params params)
 757 {
 758         void *ret;
 759
 760         BUG_ON(!ht->p.obj_hashfn || !key);
 761
 762         ret = __rhashtable_insert_fast(ht, key, obj, params);
 763         if (IS_ERR(ret))
 764                 return PTR_ERR(ret);
 765
 766         return ret == NULL ? 0 : -EEXIST;
 767 }
 768
 769 /**
 770  * rhashtable_lookup_get_insert_key - lookup and insert object into hash table
 771  * @ht:         hash table
 772  * @obj:        pointer to hash head inside object
 773  * @params:     hash table parameters
 774  * @data:       pointer to element data already in hashes
 775  *
 776  * Just like rhashtable_lookup_insert_key(), but this function returns the
 777  * object if it exists, NULL if it does not and the insertion was successful,
 778  * and an ERR_PTR otherwise.
 779  */
 780 static inline void *rhashtable_lookup_get_insert_key(
 781         struct rhashtable *ht, const void *key, struct rhash_head *obj,
 782         const struct rhashtable_params params)
 783 {
 784         BUG_ON(!ht->p.obj_hashfn || !key);
 785
 786         return __rhashtable_insert_fast(ht, key, obj, params);
 787 }
 788
 789 /* Internal function, please use rhashtable_remove_fast() instead */
 790 static inline int __rhashtable_remove_fast(
 791         struct rhashtable *ht, struct bucket_table *tbl,
 792         struct rhash_head *obj, const struct rhashtable_params params)
 793 {
 794         struct rhash_head __rcu **pprev;
 795         struct rhash_head *he;
 796         spinlock_t * lock;
 797         unsigned int hash;
 798         int err = -ENOENT;
 799
 800         hash = rht_head_hashfn(ht, tbl, obj, params);
 801         lock = rht_bucket_lock(tbl, hash);
 802
 803         spin_lock_bh(lock);
 804
 805         pprev = &tbl->buckets[hash];
 806         rht_for_each(he, tbl, hash) {
 807                 if (he != obj) {
 808                         pprev = &he->next;
 809                         continue;
 810                 }
 811
 812                 rcu_assign_pointer(*pprev, obj->next);
 813                 err = 0;
 814                 break;
 815         }
 816
 817         spin_unlock_bh(lock);
 818
 819         return err;
 820 }
 821
 822 /**
 823  * rhashtable_remove_fast - remove object from hash table
 824  * @ht:         hash table
 825  * @obj:        pointer to hash head inside object
 826  * @params:     hash table parameters
 827  *
 828  * Since the hash chain is single linked, the removal operation needs to
 829  * walk the bucket chain upon removal. The removal operation is thus
 830  * considerable slow if the hash table is not correctly sized.
 831  *
 832  * Will automatically shrink the table via rhashtable_expand() if the
 833  * shrink_decision function specified at rhashtable_init() returns true.
 834  *
 835  * Returns zero on success, -ENOENT if the entry could not be found.
 836  */
 837 static inline int rhashtable_remove_fast(
 838         struct rhashtable *ht, struct rhash_head *obj,
 839         const struct rhashtable_params params)
 840 {
 841         struct bucket_table *tbl;
 842         int err;
 843
 844         rcu_read_lock();
 845
 846         tbl = rht_dereference_rcu(ht->tbl, ht);
 847
 848         /* Because we have already taken (and released) the bucket
 849          * lock in old_tbl, if we find that future_tbl is not yet
 850          * visible then that guarantees the entry to still be in
 851          * the old tbl if it exists.
 852          */
 853         while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) &&
 854                (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
 855                 ;
 856
 857         if (err)
 858                 goto out;
 859
 860         atomic_dec(&ht->nelems);
 861         if (unlikely(ht->p.automatic_shrinking &&
 862                      rht_shrink_below_30(ht, tbl)))
 863                 schedule_work(&ht->run_work);
 864
 865 out:
 866         rcu_read_unlock();
 867
 868         return err;
 869 }
 870
 871 /* Internal function, please use rhashtable_replace_fast() instead */
 872 static inline int __rhashtable_replace_fast(
 873         struct rhashtable *ht, struct bucket_table *tbl,
 874         struct rhash_head *obj_old, struct rhash_head *obj_new,
 875         const struct rhashtable_params params)
 876 {
 877         struct rhash_head __rcu **pprev;
 878         struct rhash_head *he;
 879         spinlock_t *lock;
 880         unsigned int hash;
 881         int err = -ENOENT;
 882
 883         /* Minimally, the old and new objects must have same hash
 884          * (which should mean identifiers are the same).
 885          */
 886         hash = rht_head_hashfn(ht, tbl, obj_old, params);
 887         if (hash != rht_head_hashfn(ht, tbl, obj_new, params))
 888                 return -EINVAL;
 889
 890         lock = rht_bucket_lock(tbl, hash);
 891
 892         spin_lock_bh(lock);
 893
 894         pprev = &tbl->buckets[hash];
 895         rht_for_each(he, tbl, hash) {
 896                 if (he != obj_old) {
 897                         pprev = &he->next;
 898                         continue;
 899                 }
 900
 901                 rcu_assign_pointer(obj_new->next, obj_old->next);
 902                 rcu_assign_pointer(*pprev, obj_new);
 903                 err = 0;
 904                 break;
 905         }
 906
 907         spin_unlock_bh(lock);
 908
 909         return err;
 910 }
 911
 912 /**
 913  * rhashtable_replace_fast - replace an object in hash table
 914  * @ht:         hash table
 915  * @obj_old:    pointer to hash head inside object being replaced
 916  * @obj_new:    pointer to hash head inside object which is new
 917  * @params:     hash table parameters
 918  *
 919  * Replacing an object doesn't affect the number of elements in the hash table
 920  * or bucket, so we don't need to worry about shrinking or expanding the
 921  * table here.
 922  *
 923  * Returns zero on success, -ENOENT if the entry could not be found,
 924  * -EINVAL if hash is not the same for the old and new objects.
 925  */
 926 static inline int rhashtable_replace_fast(
 927         struct rhashtable *ht, struct rhash_head *obj_old,
 928         struct rhash_head *obj_new,
 929         const struct rhashtable_params params)
 930 {
 931         struct bucket_table *tbl;
 932         int err;
 933
 934         rcu_read_lock();
 935
 936         tbl = rht_dereference_rcu(ht->tbl, ht);
 937
 938         /* Because we have already taken (and released) the bucket
 939          * lock in old_tbl, if we find that future_tbl is not yet
 940          * visible then that guarantees the entry to still be in
 941          * the old tbl if it exists.
 942          */
 943         while ((err = __rhashtable_replace_fast(ht, tbl, obj_old,
 944                                                 obj_new, params)) &&
 945                (tbl = rht_dereference_rcu(tbl->future_tbl, ht)))
 946                 ;
 947
 948         rcu_read_unlock();
 949
 950         return err;
 951 }
 952
 953 /* Obsolete function, do not use in new code. */
 954 static inline int rhashtable_walk_init(struct rhashtable *ht,
 955                                        struct rhashtable_iter *iter, gfp_t gfp)
 956 {
 957         rhashtable_walk_enter(ht, iter);
 958         return 0;
 959 }
 960
 961 #endif /* _LINUX_RHASHTABLE_H */