Merge tag 'irqchip-core-4.8' of git://git.infradead.org/users/jcooper/linux into...
[cascardo/linux.git] / lib / radix-tree.c
index 8df0df2..8b7d845 100644 (file)
@@ -4,6 +4,8 @@
  * Copyright (C) 2005 SGI, Christoph Lameter
  * Copyright (C) 2006 Nick Piggin
  * Copyright (C) 2012 Konstantin Khlebnikov
+ * Copyright (C) 2016 Intel, Matthew Wilcox
+ * Copyright (C) 2016 Intel, Ross Zwisler
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License as
 #include <linux/preempt.h>             /* in_interrupt() */
 
 
-/*
- * The height_to_maxindex array needs to be one deeper than the maximum
- * path as height 0 holds only 1 entry.
- */
-static unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1] __read_mostly;
-
 /*
  * Radix tree node cache.
  */
@@ -64,18 +60,18 @@ static struct kmem_cache *radix_tree_node_cachep;
  * Per-cpu pool of preloaded nodes
  */
 struct radix_tree_preload {
-       int nr;
+       unsigned nr;
        /* nodes->private_data points to next preallocated node */
        struct radix_tree_node *nodes;
 };
 static DEFINE_PER_CPU(struct radix_tree_preload, radix_tree_preloads) = { 0, };
 
-static inline void *ptr_to_indirect(void *ptr)
+static inline void *node_to_entry(void *ptr)
 {
-       return (void *)((unsigned long)ptr | RADIX_TREE_INDIRECT_PTR);
+       return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
 }
 
-#define RADIX_TREE_RETRY       ptr_to_indirect(NULL)
+#define RADIX_TREE_RETRY       node_to_entry(NULL)
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
 /* Sibling slots point directly to another slot in the same node */
@@ -98,13 +94,14 @@ static inline unsigned long get_slot_offset(struct radix_tree_node *parent,
        return slot - parent->slots;
 }
 
-static unsigned radix_tree_descend(struct radix_tree_node *parent,
-                               struct radix_tree_node **nodep, unsigned offset)
+static unsigned int radix_tree_descend(struct radix_tree_node *parent,
+                       struct radix_tree_node **nodep, unsigned long index)
 {
+       unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
        void **entry = rcu_dereference_raw(parent->slots[offset]);
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
-       if (radix_tree_is_indirect_ptr(entry)) {
+       if (radix_tree_is_internal_node(entry)) {
                unsigned long siboff = get_slot_offset(parent, entry);
                if (siboff < RADIX_TREE_MAP_SIZE) {
                        offset = siboff;
@@ -145,7 +142,7 @@ static inline void root_tag_set(struct radix_tree_root *root, unsigned int tag)
        root->gfp_mask |= (__force gfp_t)(1 << (tag + __GFP_BITS_SHIFT));
 }
 
-static inline void root_tag_clear(struct radix_tree_root *root, unsigned int tag)
+static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
 {
        root->gfp_mask &= (__force gfp_t)~(1 << (tag + __GFP_BITS_SHIFT));
 }
@@ -157,7 +154,7 @@ static inline void root_tag_clear_all(struct radix_tree_root *root)
 
 static inline int root_tag_get(struct radix_tree_root *root, unsigned int tag)
 {
-       return (__force unsigned)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
+       return (__force int)root->gfp_mask & (1 << (tag + __GFP_BITS_SHIFT));
 }
 
 static inline unsigned root_tags_get(struct radix_tree_root *root)
@@ -171,7 +168,7 @@ static inline unsigned root_tags_get(struct radix_tree_root *root)
  */
 static inline int any_tag_set(struct radix_tree_node *node, unsigned int tag)
 {
-       int idx;
+       unsigned idx;
        for (idx = 0; idx < RADIX_TREE_TAG_LONGS; idx++) {
                if (node->tags[tag][idx])
                        return 1;
@@ -215,38 +212,45 @@ radix_tree_find_next_bit(const unsigned long *addr,
        return size;
 }
 
-#if 0
-static void dump_node(void *slot, int height, int offset)
+#ifndef __KERNEL__
+static void dump_node(struct radix_tree_node *node, unsigned long index)
 {
-       struct radix_tree_node *node;
-       int i;
+       unsigned long i;
 
-       if (!slot)
-               return;
+       pr_debug("radix node: %p offset %d tags %lx %lx %lx shift %d count %d parent %p\n",
+               node, node->offset,
+               node->tags[0][0], node->tags[1][0], node->tags[2][0],
+               node->shift, node->count, node->parent);
 
-       if (height == 0) {
-               pr_debug("radix entry %p offset %d\n", slot, offset);
-               return;
+       for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
+               unsigned long first = index | (i << node->shift);
+               unsigned long last = first | ((1UL << node->shift) - 1);
+               void *entry = node->slots[i];
+               if (!entry)
+                       continue;
+               if (is_sibling_entry(node, entry)) {
+                       pr_debug("radix sblng %p offset %ld val %p indices %ld-%ld\n",
+                                       entry, i,
+                                       *(void **)entry_to_node(entry),
+                                       first, last);
+               } else if (!radix_tree_is_internal_node(entry)) {
+                       pr_debug("radix entry %p offset %ld indices %ld-%ld\n",
+                                       entry, i, first, last);
+               } else {
+                       dump_node(entry_to_node(entry), first);
+               }
        }
-
-       node = indirect_to_ptr(slot);
-       pr_debug("radix node: %p offset %d tags %lx %lx %lx path %x count %d parent %p\n",
-               slot, offset, node->tags[0][0], node->tags[1][0],
-               node->tags[2][0], node->path, node->count, node->parent);
-
-       for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
-               dump_node(node->slots[i], height - 1, i);
 }
 
 /* For debug */
 static void radix_tree_dump(struct radix_tree_root *root)
 {
-       pr_debug("radix root: %p height %d rnode %p tags %x\n",
-                       root, root->height, root->rnode,
+       pr_debug("radix root: %p rnode %p tags %x\n",
+                       root, root->rnode,
                        root->gfp_mask >> __GFP_BITS_SHIFT);
-       if (!radix_tree_is_indirect_ptr(root->rnode))
+       if (!radix_tree_is_internal_node(root->rnode))
                return;
-       dump_node(root->rnode, root->height, 0);
+       dump_node(entry_to_node(root->rnode), 0);
 }
 #endif
 
@@ -261,9 +265,9 @@ radix_tree_node_alloc(struct radix_tree_root *root)
        gfp_t gfp_mask = root_gfp_mask(root);
 
        /*
-        * Preload code isn't irq safe and it doesn't make sence to use
-        * preloading in the interrupt anyway as all the allocations have to
-        * be atomic. So just do normal allocation when in interrupt.
+        * Preload code isn't irq safe and it doesn't make sense to use
+        * preloading during an interrupt anyway as all the allocations have
+        * to be atomic. So just do normal allocation when in interrupt.
         */
        if (!gfpflags_allow_blocking(gfp_mask) && !in_interrupt()) {
                struct radix_tree_preload *rtp;
@@ -299,7 +303,7 @@ radix_tree_node_alloc(struct radix_tree_root *root)
        ret = kmem_cache_alloc(radix_tree_node_cachep,
                               gfp_mask | __GFP_ACCOUNT);
 out:
-       BUG_ON(radix_tree_is_indirect_ptr(ret));
+       BUG_ON(radix_tree_is_internal_node(ret));
        return ret;
 }
 
@@ -399,17 +403,16 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
 EXPORT_SYMBOL(radix_tree_maybe_preload);
 
 /*
- *     Return the maximum key which can be store into a
- *     radix tree with height HEIGHT.
+ * The maximum index which can be stored in a radix tree
  */
-static inline unsigned long radix_tree_maxindex(unsigned int height)
+static inline unsigned long shift_maxindex(unsigned int shift)
 {
-       return height_to_maxindex[height];
+       return (RADIX_TREE_MAP_SIZE << shift) - 1;
 }
 
 static inline unsigned long node_maxindex(struct radix_tree_node *node)
 {
-       return radix_tree_maxindex(node->path & RADIX_TREE_HEIGHT_MASK);
+       return shift_maxindex(node->shift);
 }
 
 static unsigned radix_tree_load_root(struct radix_tree_root *root,
@@ -419,11 +422,10 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root,
 
        *nodep = node;
 
-       if (likely(radix_tree_is_indirect_ptr(node))) {
-               node = indirect_to_ptr(node);
+       if (likely(radix_tree_is_internal_node(node))) {
+               node = entry_to_node(node);
                *maxindex = node_maxindex(node);
-               return (node->path & RADIX_TREE_HEIGHT_MASK) *
-                       RADIX_TREE_MAP_SHIFT;
+               return node->shift + RADIX_TREE_MAP_SHIFT;
        }
 
        *maxindex = 0;
@@ -434,26 +436,25 @@ static unsigned radix_tree_load_root(struct radix_tree_root *root,
  *     Extend a radix tree so it can store key @index.
  */
 static int radix_tree_extend(struct radix_tree_root *root,
-                               unsigned long index)
+                               unsigned long index, unsigned int shift)
 {
-       struct radix_tree_node *node;
        struct radix_tree_node *slot;
-       unsigned int height;
+       unsigned int maxshift;
        int tag;
 
-       /* Figure out what the height should be.  */
-       height = root->height + 1;
-       while (index > radix_tree_maxindex(height))
-               height++;
+       /* Figure out what the shift should be.  */
+       maxshift = shift;
+       while (index > shift_maxindex(maxshift))
+               maxshift += RADIX_TREE_MAP_SHIFT;
 
-       if (root->rnode == NULL) {
-               root->height = height;
+       slot = root->rnode;
+       if (!slot)
                goto out;
-       }
 
        do {
-               unsigned int newheight;
-               if (!(node = radix_tree_node_alloc(root)))
+               struct radix_tree_node *node = radix_tree_node_alloc(root);
+
+               if (!node)
                        return -ENOMEM;
 
                /* Propagate the aggregated tag info into the new root */
@@ -462,25 +463,20 @@ static int radix_tree_extend(struct radix_tree_root *root,
                                tag_set(node, tag, 0);
                }
 
-               /* Increase the height.  */
-               newheight = root->height+1;
-               BUG_ON(newheight & ~RADIX_TREE_HEIGHT_MASK);
-               node->path = newheight;
+               BUG_ON(shift > BITS_PER_LONG);
+               node->shift = shift;
+               node->offset = 0;
                node->count = 1;
                node->parent = NULL;
-               slot = root->rnode;
-               if (radix_tree_is_indirect_ptr(slot)) {
-                       slot = indirect_to_ptr(slot);
-                       slot->parent = node;
-                       slot = ptr_to_indirect(slot);
-               }
+               if (radix_tree_is_internal_node(slot))
+                       entry_to_node(slot)->parent = node;
                node->slots[0] = slot;
-               node = ptr_to_indirect(node);
-               rcu_assign_pointer(root->rnode, node);
-               root->height = newheight;
-       } while (height > root->height);
+               slot = node_to_entry(node);
+               rcu_assign_pointer(root->rnode, slot);
+               shift += RADIX_TREE_MAP_SHIFT;
+       } while (shift <= maxshift);
 out:
-       return height * RADIX_TREE_MAP_SHIFT;
+       return maxshift + RADIX_TREE_MAP_SHIFT;
 }
 
 /**
@@ -504,68 +500,61 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
                        unsigned order, struct radix_tree_node **nodep,
                        void ***slotp)
 {
-       struct radix_tree_node *node = NULL, *slot;
+       struct radix_tree_node *node = NULL, *child;
+       void **slot = (void **)&root->rnode;
        unsigned long maxindex;
-       unsigned int height, shift, offset;
+       unsigned int shift, offset = 0;
        unsigned long max = index | ((1UL << order) - 1);
 
-       shift = radix_tree_load_root(root, &slot, &maxindex);
+       shift = radix_tree_load_root(root, &child, &maxindex);
 
        /* Make sure the tree is high enough.  */
        if (max > maxindex) {
-               int error = radix_tree_extend(root, max);
+               int error = radix_tree_extend(root, max, shift);
                if (error < 0)
                        return error;
                shift = error;
-               slot = root->rnode;
-               if (order == shift) {
+               child = root->rnode;
+               if (order == shift)
                        shift += RADIX_TREE_MAP_SHIFT;
-                       root->height++;
-               }
        }
 
-       height = root->height;
-
-       offset = 0;                     /* uninitialised var warning */
        while (shift > order) {
-               if (slot == NULL) {
+               shift -= RADIX_TREE_MAP_SHIFT;
+               if (child == NULL) {
                        /* Have to add a child node.  */
-                       if (!(slot = radix_tree_node_alloc(root)))
+                       child = radix_tree_node_alloc(root);
+                       if (!child)
                                return -ENOMEM;
-                       slot->path = height;
-                       slot->parent = node;
-                       if (node) {
-                               rcu_assign_pointer(node->slots[offset],
-                                                       ptr_to_indirect(slot));
+                       child->shift = shift;
+                       child->offset = offset;
+                       child->parent = node;
+                       rcu_assign_pointer(*slot, node_to_entry(child));
+                       if (node)
                                node->count++;
-                               slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT;
-                       } else
-                               rcu_assign_pointer(root->rnode,
-                                                       ptr_to_indirect(slot));
-               } else if (!radix_tree_is_indirect_ptr(slot))
+               } else if (!radix_tree_is_internal_node(child))
                        break;
 
                /* Go a level down */
-               height--;
-               shift -= RADIX_TREE_MAP_SHIFT;
-               node = indirect_to_ptr(slot);
-               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-               offset = radix_tree_descend(node, &slot, offset);
+               node = entry_to_node(child);
+               offset = radix_tree_descend(node, &child, index);
+               slot = &node->slots[offset];
        }
 
 #ifdef CONFIG_RADIX_TREE_MULTIORDER
        /* Insert pointers to the canonical entry */
        if (order > shift) {
-               int i, n = 1 << (order - shift);
+               unsigned i, n = 1 << (order - shift);
                offset = offset & ~(n - 1);
-               slot = ptr_to_indirect(&node->slots[offset]);
+               slot = &node->slots[offset];
+               child = node_to_entry(slot);
                for (i = 0; i < n; i++) {
-                       if (node->slots[offset + i])
+                       if (slot[i])
                                return -EEXIST;
                }
 
                for (i = 1; i < n; i++) {
-                       rcu_assign_pointer(node->slots[offset + i], slot);
+                       rcu_assign_pointer(slot[i], child);
                        node->count++;
                }
        }
@@ -574,7 +563,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
        if (nodep)
                *nodep = node;
        if (slotp)
-               *slotp = node ? node->slots + offset : (void **)&root->rnode;
+               *slotp = slot;
        return 0;
 }
 
@@ -594,7 +583,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
        void **slot;
        int error;
 
-       BUG_ON(radix_tree_is_indirect_ptr(item));
+       BUG_ON(radix_tree_is_internal_node(item));
 
        error = __radix_tree_create(root, index, order, &node, &slot);
        if (error)
@@ -636,25 +625,22 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
 {
        struct radix_tree_node *node, *parent;
        unsigned long maxindex;
-       unsigned int shift;
        void **slot;
 
  restart:
        parent = NULL;
        slot = (void **)&root->rnode;
-       shift = radix_tree_load_root(root, &node, &maxindex);
+       radix_tree_load_root(root, &node, &maxindex);
        if (index > maxindex)
                return NULL;
 
-       while (radix_tree_is_indirect_ptr(node)) {
+       while (radix_tree_is_internal_node(node)) {
                unsigned offset;
 
                if (node == RADIX_TREE_RETRY)
                        goto restart;
-               parent = indirect_to_ptr(node);
-               shift -= RADIX_TREE_MAP_SHIFT;
-               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-               offset = radix_tree_descend(parent, &node, offset);
+               parent = entry_to_node(node);
+               offset = radix_tree_descend(parent, &node, index);
                slot = parent->slots + offset;
        }
 
@@ -710,13 +696,13 @@ EXPORT_SYMBOL(radix_tree_lookup);
  *     radix_tree_tag_set - set a tag on a radix tree node
  *     @root:          radix tree root
  *     @index:         index key
- *     @tag:           tag index
+ *     @tag:           tag index
  *
  *     Set the search tag (which must be < RADIX_TREE_MAX_TAGS)
  *     corresponding to @index in the radix tree.  From
  *     the root all the way down to the leaf node.
  *
- *     Returns the address of the tagged item.   Setting a tag on a not-present
+ *     Returns the address of the tagged item.  Setting a tag on a not-present
  *     item is a bug.
  */
 void *radix_tree_tag_set(struct radix_tree_root *root,
@@ -724,19 +710,15 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 {
        struct radix_tree_node *node, *parent;
        unsigned long maxindex;
-       unsigned int shift;
 
-       shift = radix_tree_load_root(root, &node, &maxindex);
+       radix_tree_load_root(root, &node, &maxindex);
        BUG_ON(index > maxindex);
 
-       while (radix_tree_is_indirect_ptr(node)) {
+       while (radix_tree_is_internal_node(node)) {
                unsigned offset;
 
-               shift -= RADIX_TREE_MAP_SHIFT;
-               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-
-               parent = indirect_to_ptr(node);
-               offset = radix_tree_descend(parent, &node, offset);
+               parent = entry_to_node(node);
+               offset = radix_tree_descend(parent, &node, index);
                BUG_ON(!node);
 
                if (!tag_get(parent, tag, offset))
@@ -751,15 +733,35 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
 }
 EXPORT_SYMBOL(radix_tree_tag_set);
 
+static void node_tag_clear(struct radix_tree_root *root,
+                               struct radix_tree_node *node,
+                               unsigned int tag, unsigned int offset)
+{
+       while (node) {
+               if (!tag_get(node, tag, offset))
+                       return;
+               tag_clear(node, tag, offset);
+               if (any_tag_set(node, tag))
+                       return;
+
+               offset = node->offset;
+               node = node->parent;
+       }
+
+       /* clear the root's tag bit */
+       if (root_tag_get(root, tag))
+               root_tag_clear(root, tag);
+}
+
 /**
  *     radix_tree_tag_clear - clear a tag on a radix tree node
  *     @root:          radix tree root
  *     @index:         index key
- *     @tag:           tag index
+ *     @tag:           tag index
  *
  *     Clear the search tag (which must be < RADIX_TREE_MAX_TAGS)
- *     corresponding to @index in the radix tree.  If
- *     this causes the leaf node to have no tags set then clear the tag in the
+ *     corresponding to @index in the radix tree.  If this causes
+ *     the leaf node to have no tags set then clear the tag in the
  *     next-to-leaf node, etc.
  *
  *     Returns the address of the tagged item on success, else NULL.  ie:
@@ -770,45 +772,22 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
 {
        struct radix_tree_node *node, *parent;
        unsigned long maxindex;
-       unsigned int shift;
        int uninitialized_var(offset);
 
-       shift = radix_tree_load_root(root, &node, &maxindex);
+       radix_tree_load_root(root, &node, &maxindex);
        if (index > maxindex)
                return NULL;
 
        parent = NULL;
 
-       while (radix_tree_is_indirect_ptr(node)) {
-               shift -= RADIX_TREE_MAP_SHIFT;
-               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-
-               parent = indirect_to_ptr(node);
-               offset = radix_tree_descend(parent, &node, offset);
+       while (radix_tree_is_internal_node(node)) {
+               parent = entry_to_node(node);
+               offset = radix_tree_descend(parent, &node, index);
        }
 
-       if (node == NULL)
-               goto out;
-
-       index >>= shift;
+       if (node)
+               node_tag_clear(root, parent, tag, offset);
 
-       while (parent) {
-               if (!tag_get(parent, tag, offset))
-                       goto out;
-               tag_clear(parent, tag, offset);
-               if (any_tag_set(parent, tag))
-                       goto out;
-
-               index >>= RADIX_TREE_MAP_SHIFT;
-               offset = index & RADIX_TREE_MAP_MASK;
-               parent = parent->parent;
-       }
-
-       /* clear the root's tag bit */
-       if (root_tag_get(root, tag))
-               root_tag_clear(root, tag);
-
-out:
        return node;
 }
 EXPORT_SYMBOL(radix_tree_tag_clear);
@@ -817,7 +796,7 @@ EXPORT_SYMBOL(radix_tree_tag_clear);
  * radix_tree_tag_get - get a tag on a radix tree node
  * @root:              radix tree root
  * @index:             index key
- * @tag:               tag index (< RADIX_TREE_MAX_TAGS)
+ * @tag:               tag index (< RADIX_TREE_MAX_TAGS)
  *
  * Return values:
  *
@@ -833,25 +812,21 @@ int radix_tree_tag_get(struct radix_tree_root *root,
 {
        struct radix_tree_node *node, *parent;
        unsigned long maxindex;
-       unsigned int shift;
 
        if (!root_tag_get(root, tag))
                return 0;
 
-       shift = radix_tree_load_root(root, &node, &maxindex);
+       radix_tree_load_root(root, &node, &maxindex);
        if (index > maxindex)
                return 0;
        if (node == NULL)
                return 0;
 
-       while (radix_tree_is_indirect_ptr(node)) {
-               int offset;
-
-               shift -= RADIX_TREE_MAP_SHIFT;
-               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+       while (radix_tree_is_internal_node(node)) {
+               unsigned offset;
 
-               parent = indirect_to_ptr(node);
-               offset = radix_tree_descend(parent, &node, offset);
+               parent = entry_to_node(node);
+               offset = radix_tree_descend(parent, &node, index);
 
                if (!node)
                        return 0;
@@ -884,8 +859,8 @@ static inline void __set_iter_shift(struct radix_tree_iter *iter,
 void **radix_tree_next_chunk(struct radix_tree_root *root,
                             struct radix_tree_iter *iter, unsigned flags)
 {
-       unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK;
-       struct radix_tree_node *rnode, *node;
+       unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
+       struct radix_tree_node *node, *child;
        unsigned long index, offset, maxindex;
 
        if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag))
@@ -905,38 +880,27 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
                return NULL;
 
  restart:
-       shift = radix_tree_load_root(root, &rnode, &maxindex);
+       radix_tree_load_root(root, &child, &maxindex);
        if (index > maxindex)
                return NULL;
+       if (!child)
+               return NULL;
 
-       if (radix_tree_is_indirect_ptr(rnode)) {
-               rnode = indirect_to_ptr(rnode);
-       } else if (rnode) {
+       if (!radix_tree_is_internal_node(child)) {
                /* Single-slot tree */
                iter->index = index;
                iter->next_index = maxindex + 1;
                iter->tags = 1;
-               __set_iter_shift(iter, shift);
+               __set_iter_shift(iter, 0);
                return (void **)&root->rnode;
-       } else
-               return NULL;
-
-       shift -= RADIX_TREE_MAP_SHIFT;
-       offset = index >> shift;
-
-       node = rnode;
-       while (1) {
-               struct radix_tree_node *slot;
-               unsigned new_off = radix_tree_descend(node, &slot, offset);
+       }
 
-               if (new_off < offset) {
-                       offset = new_off;
-                       index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1);
-                       index |= offset << shift;
-               }
+       do {
+               node = entry_to_node(child);
+               offset = radix_tree_descend(node, &child, index);
 
                if ((flags & RADIX_TREE_ITER_TAGGED) ?
-                               !tag_get(node, tag, offset) : !slot) {
+                               !tag_get(node, tag, offset) : !child) {
                        /* Hole detected */
                        if (flags & RADIX_TREE_ITER_CONTIG)
                                return NULL;
@@ -954,30 +918,24 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
                                        if (slot)
                                                break;
                                }
-                       index &= ~((RADIX_TREE_MAP_SIZE << shift) - 1);
-                       index += offset << shift;
+                       index &= ~node_maxindex(node);
+                       index += offset << node->shift;
                        /* Overflow after ~0UL */
                        if (!index)
                                return NULL;
                        if (offset == RADIX_TREE_MAP_SIZE)
                                goto restart;
-                       slot = rcu_dereference_raw(node->slots[offset]);
+                       child = rcu_dereference_raw(node->slots[offset]);
                }
 
-               if ((slot == NULL) || (slot == RADIX_TREE_RETRY))
+               if ((child == NULL) || (child == RADIX_TREE_RETRY))
                        goto restart;
-               if (!radix_tree_is_indirect_ptr(slot))
-                       break;
-
-               node = indirect_to_ptr(slot);
-               shift -= RADIX_TREE_MAP_SHIFT;
-               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-       }
+       } while (radix_tree_is_internal_node(child));
 
        /* Update the iterator state */
-       iter->index = index & ~((1 << shift) - 1);
-       iter->next_index = (index | ((RADIX_TREE_MAP_SIZE << shift) - 1)) + 1;
-       __set_iter_shift(iter, shift);
+       iter->index = (index &~ node_maxindex(node)) | (offset << node->shift);
+       iter->next_index = (index | node_maxindex(node)) + 1;
+       __set_iter_shift(iter, node->shift);
 
        /* Construct iter->tags bit-mask from node->tags[tag] array */
        if (flags & RADIX_TREE_ITER_TAGGED) {
@@ -1023,7 +981,7 @@ EXPORT_SYMBOL(radix_tree_next_chunk);
  * set is outside the range we are scanning. This reults in dangling tags and
  * can lead to problems with later tag operations (e.g. livelocks on lookups).
  *
- * The function returns number of leaves where the tag was set and sets
+ * The function returns the number of leaves where the tag was set and sets
  * *first_indexp to the first unscanned index.
  * WARNING! *first_indexp can wrap if last_index is ULONG_MAX. Caller must
  * be prepared to handle that.
@@ -1033,12 +991,12 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
                unsigned long nr_to_tag,
                unsigned int iftag, unsigned int settag)
 {
-       struct radix_tree_node *slot, *node = NULL;
+       struct radix_tree_node *parent, *node, *child;
        unsigned long maxindex;
-       unsigned int shift = radix_tree_load_root(root, &slot, &maxindex);
        unsigned long tagged = 0;
        unsigned long index = *first_indexp;
 
+       radix_tree_load_root(root, &child, &maxindex);
        last_index = min(last_index, maxindex);
        if (index > last_index)
                return 0;
@@ -1048,29 +1006,23 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
                *first_indexp = last_index + 1;
                return 0;
        }
-       if (!radix_tree_is_indirect_ptr(slot)) {
+       if (!radix_tree_is_internal_node(child)) {
                *first_indexp = last_index + 1;
                root_tag_set(root, settag);
                return 1;
        }
 
-       node = indirect_to_ptr(slot);
-       shift -= RADIX_TREE_MAP_SHIFT;
+       node = entry_to_node(child);
 
        for (;;) {
-               unsigned long upindex;
-               unsigned offset;
-
-               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
-               offset = radix_tree_descend(node, &slot, offset);
-               if (!slot)
+               unsigned offset = radix_tree_descend(node, &child, index);
+               if (!child)
                        goto next;
                if (!tag_get(node, iftag, offset))
                        goto next;
                /* Sibling slots never have tags set on them */
-               if (radix_tree_is_indirect_ptr(slot)) {
-                       node = indirect_to_ptr(slot);
-                       shift -= RADIX_TREE_MAP_SHIFT;
+               if (radix_tree_is_internal_node(child)) {
+                       node = entry_to_node(child);
                        continue;
                }
 
@@ -1078,27 +1030,25 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
                tagged++;
                tag_set(node, settag, offset);
 
-               slot = node->parent;
                /* walk back up the path tagging interior nodes */
-               upindex = index >> shift;
-               while (slot) {
-                       upindex >>= RADIX_TREE_MAP_SHIFT;
-                       offset = upindex & RADIX_TREE_MAP_MASK;
-
+               parent = node;
+               for (;;) {
+                       offset = parent->offset;
+                       parent = parent->parent;
+                       if (!parent)
+                               break;
                        /* stop if we find a node with the tag already set */
-                       if (tag_get(slot, settag, offset))
+                       if (tag_get(parent, settag, offset))
                                break;
-                       tag_set(slot, settag, offset);
-                       slot = slot->parent;
+                       tag_set(parent, settag, offset);
                }
-
  next:
-               /* Go to next item at level determined by 'shift' */
-               index = ((index >> shift) + 1) << shift;
+               /* Go to next entry in node */
+               index = ((index >> node->shift) + 1) << node->shift;
                /* Overflow can happen when last_index is ~0UL... */
                if (index > last_index || !index)
                        break;
-               offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+               offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
                while (offset == 0) {
                        /*
                         * We've fully scanned this node. Go up. Because
@@ -1106,8 +1056,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
                         * we do below cannot wander astray.
                         */
                        node = node->parent;
-                       shift += RADIX_TREE_MAP_SHIFT;
-                       offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+                       offset = (index >> node->shift) & RADIX_TREE_MAP_MASK;
                }
                if (is_sibling_entry(node, node->slots[offset]))
                        goto next;
@@ -1141,9 +1090,10 @@ EXPORT_SYMBOL(radix_tree_range_tag_if_tagged);
  *
  *     Like radix_tree_lookup, radix_tree_gang_lookup may be called under
  *     rcu_read_lock. In this case, rather than the returned results being
- *     an atomic snapshot of the tree at a single point in time, the semantics
- *     of an RCU protected gang lookup are as though multiple radix_tree_lookups
- *     have been issued in individual locks, and results stored in 'results'.
+ *     an atomic snapshot of the tree at a single point in time, the
+ *     semantics of an RCU protected gang lookup are as though multiple
+ *     radix_tree_lookups have been issued in individual locks, and results
+ *     stored in 'results'.
  */
 unsigned int
 radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
@@ -1160,7 +1110,7 @@ radix_tree_gang_lookup(struct radix_tree_root *root, void **results,
                results[ret] = rcu_dereference_raw(*slot);
                if (!results[ret])
                        continue;
-               if (radix_tree_is_indirect_ptr(results[ret])) {
+               if (radix_tree_is_internal_node(results[ret])) {
                        slot = radix_tree_iter_retry(&iter);
                        continue;
                }
@@ -1243,7 +1193,7 @@ radix_tree_gang_lookup_tag(struct radix_tree_root *root, void **results,
                results[ret] = rcu_dereference_raw(*slot);
                if (!results[ret])
                        continue;
-               if (radix_tree_is_indirect_ptr(results[ret])) {
+               if (radix_tree_is_internal_node(results[ret])) {
                        slot = radix_tree_iter_retry(&iter);
                        continue;
                }
@@ -1304,14 +1254,10 @@ struct locate_info {
 static unsigned long __locate(struct radix_tree_node *slot, void *item,
                              unsigned long index, struct locate_info *info)
 {
-       unsigned int shift, height;
        unsigned long i;
 
-       height = slot->path & RADIX_TREE_HEIGHT_MASK;
-       shift = height * RADIX_TREE_MAP_SHIFT;
-
        do {
-               shift -= RADIX_TREE_MAP_SHIFT;
+               unsigned int shift = slot->shift;
 
                for (i = (index >> shift) & RADIX_TREE_MAP_MASK;
                     i < RADIX_TREE_MAP_SIZE;
@@ -1320,7 +1266,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item,
                                        rcu_dereference_raw(slot->slots[i]);
                        if (node == RADIX_TREE_RETRY)
                                goto out;
-                       if (!radix_tree_is_indirect_ptr(node)) {
+                       if (!radix_tree_is_internal_node(node)) {
                                if (node == item) {
                                        info->found_index = index;
                                        info->stop = true;
@@ -1328,15 +1274,13 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item,
                                }
                                continue;
                        }
-                       node = indirect_to_ptr(node);
+                       node = entry_to_node(node);
                        if (is_sibling_entry(slot, node))
                                continue;
                        slot = node;
                        break;
                }
-               if (i == RADIX_TREE_MAP_SIZE)
-                       break;
-       } while (shift);
+       } while (i < RADIX_TREE_MAP_SIZE);
 
 out:
        if ((index == 0) && (i == RADIX_TREE_MAP_SIZE))
@@ -1366,14 +1310,14 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
        do {
                rcu_read_lock();
                node = rcu_dereference_raw(root->rnode);
-               if (!radix_tree_is_indirect_ptr(node)) {
+               if (!radix_tree_is_internal_node(node)) {
                        rcu_read_unlock();
                        if (node == item)
                                info.found_index = 0;
                        break;
                }
 
-               node = indirect_to_ptr(node);
+               node = entry_to_node(node);
 
                max_index = node_maxindex(node);
                if (cur_index > max_index) {
@@ -1396,47 +1340,45 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
 #endif /* CONFIG_SHMEM && CONFIG_SWAP */
 
 /**
- *     radix_tree_shrink    -    shrink height of a radix tree to minimal
+ *     radix_tree_shrink    -    shrink radix tree to minimum height
  *     @root           radix tree root
  */
-static inline void radix_tree_shrink(struct radix_tree_root *root)
+static inline bool radix_tree_shrink(struct radix_tree_root *root)
 {
-       /* try to shrink tree height */
-       while (root->height > 0) {
-               struct radix_tree_node *to_free = root->rnode;
-               struct radix_tree_node *slot;
+       bool shrunk = false;
+
+       for (;;) {
+               struct radix_tree_node *node = root->rnode;
+               struct radix_tree_node *child;
 
-               BUG_ON(!radix_tree_is_indirect_ptr(to_free));
-               to_free = indirect_to_ptr(to_free);
+               if (!radix_tree_is_internal_node(node))
+                       break;
+               node = entry_to_node(node);
 
                /*
                 * The candidate node has more than one child, or its child
-                * is not at the leftmost slot, or it is a multiorder entry,
-                * we cannot shrink.
+                * is not at the leftmost slot, or the child is a multiorder
+                * entry, we cannot shrink.
                 */
-               if (to_free->count != 1)
+               if (node->count != 1)
                        break;
-               slot = to_free->slots[0];
-               if (!slot)
+               child = node->slots[0];
+               if (!child)
                        break;
-               if (!radix_tree_is_indirect_ptr(slot) && (root->height > 1))
+               if (!radix_tree_is_internal_node(child) && node->shift)
                        break;
 
-               if (radix_tree_is_indirect_ptr(slot)) {
-                       slot = indirect_to_ptr(slot);
-                       slot->parent = NULL;
-                       slot = ptr_to_indirect(slot);
-               }
+               if (radix_tree_is_internal_node(child))
+                       entry_to_node(child)->parent = NULL;
 
                /*
                 * We don't need rcu_assign_pointer(), since we are simply
                 * moving the node from one part of the tree to another: if it
                 * was safe to dereference the old pointer to it
-                * (to_free->slots[0]), it will be safe to dereference the new
+                * (node->slots[0]), it will be safe to dereference the new
                 * one (root->rnode) as far as dependent read barriers go.
                 */
-               root->rnode = slot;
-               root->height--;
+               root->rnode = child;
 
                /*
                 * We have a dilemma here. The node's slot[0] must not be
@@ -1448,7 +1390,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
                 * their slot to become empty sooner or later.
                 *
                 * For example, lockless pagecache will look up a slot, deref
-                * the page pointer, and if the page is 0 refcount it means it
+                * the page pointer, and if the page has 0 refcount it means it
                 * was concurrently deleted from pagecache so try the deref
                 * again. Fortunately there is already a requirement for logic
                 * to retry the entire slot lookup -- the indirect pointer
@@ -1456,11 +1398,14 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
                 * also results in a stale slot). So tag the slot as indirect
                 * to force callers to retry.
                 */
-               if (!radix_tree_is_indirect_ptr(slot))
-                       to_free->slots[0] = RADIX_TREE_RETRY;
+               if (!radix_tree_is_internal_node(child))
+                       node->slots[0] = RADIX_TREE_RETRY;
 
-               radix_tree_node_free(to_free);
+               radix_tree_node_free(node);
+               shrunk = true;
        }
+
+       return shrunk;
 }
 
 /**
@@ -1483,24 +1428,17 @@ bool __radix_tree_delete_node(struct radix_tree_root *root,
                struct radix_tree_node *parent;
 
                if (node->count) {
-                       if (node == indirect_to_ptr(root->rnode)) {
-                               radix_tree_shrink(root);
-                               if (root->height == 0)
-                                       deleted = true;
-                       }
+                       if (node == entry_to_node(root->rnode))
+                               deleted |= radix_tree_shrink(root);
                        return deleted;
                }
 
                parent = node->parent;
                if (parent) {
-                       unsigned int offset;
-
-                       offset = node->path >> RADIX_TREE_HEIGHT_SHIFT;
-                       parent->slots[offset] = NULL;
+                       parent->slots[node->offset] = NULL;
                        parent->count--;
                } else {
                        root_tag_clear_all(root);
-                       root->height = 0;
                        root->rnode = NULL;
                }
 
@@ -1562,16 +1500,11 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
 
        offset = get_slot_offset(node, slot);
 
-       /*
-        * Clear all tags associated with the item to be deleted.
-        * This way of doing it would be inefficient, but seldom is any set.
-        */
-       for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
-               if (tag_get(node, tag, offset))
-                       radix_tree_tag_clear(root, index, tag);
-       }
+       /* Clear all tags associated with the item to be deleted.  */
+       for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+               node_tag_clear(root, node, tag, offset);
 
-       delete_sibling_entries(node, ptr_to_indirect(slot), offset);
+       delete_sibling_entries(node, node_to_entry(slot), offset);
        node->slots[offset] = NULL;
        node->count--;
 
@@ -1596,6 +1529,28 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
 }
 EXPORT_SYMBOL(radix_tree_delete);
 
+struct radix_tree_node *radix_tree_replace_clear_tags(
+                       struct radix_tree_root *root,
+                       unsigned long index, void *entry)
+{
+       struct radix_tree_node *node;
+       void **slot;
+
+       __radix_tree_lookup(root, index, &node, &slot);
+
+       if (node) {
+               unsigned int tag, offset = get_slot_offset(node, slot);
+               for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
+                       node_tag_clear(root, node, tag, offset);
+       } else {
+               /* Clear root node tags */
+               root->gfp_mask &= __GFP_BITS_MASK;
+       }
+
+       radix_tree_replace_slot(slot, entry);
+       return node;
+}
+
 /**
  *     radix_tree_tagged - test whether any items in the tree are tagged
  *     @root:          radix tree root
@@ -1616,45 +1571,24 @@ radix_tree_node_ctor(void *arg)
        INIT_LIST_HEAD(&node->private_list);
 }
 
-static __init unsigned long __maxindex(unsigned int height)
-{
-       unsigned int width = height * RADIX_TREE_MAP_SHIFT;
-       int shift = RADIX_TREE_INDEX_BITS - width;
-
-       if (shift < 0)
-               return ~0UL;
-       if (shift >= BITS_PER_LONG)
-               return 0UL;
-       return ~0UL >> shift;
-}
-
-static __init void radix_tree_init_maxindex(void)
-{
-       unsigned int i;
-
-       for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
-               height_to_maxindex[i] = __maxindex(i);
-}
-
 static int radix_tree_callback(struct notifier_block *nfb,
-                            unsigned long action,
-                            void *hcpu)
+                               unsigned long action, void *hcpu)
 {
-       int cpu = (long)hcpu;
-       struct radix_tree_preload *rtp;
-       struct radix_tree_node *node;
-
-       /* Free per-cpu pool of perloaded nodes */
-       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
-               rtp = &per_cpu(radix_tree_preloads, cpu);
-               while (rtp->nr) {
+       int cpu = (long)hcpu;
+       struct radix_tree_preload *rtp;
+       struct radix_tree_node *node;
+
+       /* Free per-cpu pool of preloaded nodes */
+       if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
+               rtp = &per_cpu(radix_tree_preloads, cpu);
+               while (rtp->nr) {
                        node = rtp->nodes;
                        rtp->nodes = node->private_data;
                        kmem_cache_free(radix_tree_node_cachep, node);
                        rtp->nr--;
-               }
-       }
-       return NOTIFY_OK;
+               }
+       }
+       return NOTIFY_OK;
 }
 
 void __init radix_tree_init(void)
@@ -1663,6 +1597,5 @@ void __init radix_tree_init(void)
                        sizeof(struct radix_tree_node), 0,
                        SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
                        radix_tree_node_ctor);
-       radix_tree_init_maxindex();
        hotcpu_notifier(radix_tree_callback, 0);
 }