new helper: d_same_name()
[cascardo/linux.git] / fs / dcache.c
index c622872..040c258 100644 (file)
@@ -226,10 +226,9 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char
 
 static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
 {
-       const unsigned char *cs;
        /*
         * Be careful about RCU walk racing with rename:
-        * use ACCESS_ONCE to fetch the name pointer.
+        * use 'lockless_dereference' to fetch the name pointer.
         *
         * NOTE! Even if a rename will mean that the length
         * was not loaded atomically, we don't care. The
@@ -243,8 +242,8 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
         * early because the data cannot match (there can
         * be no NUL in the ct/tcount data)
         */
-       cs = ACCESS_ONCE(dentry->d_name.name);
-       smp_read_barrier_depends();
+       const unsigned char *cs = lockless_dereference(dentry->d_name.name);
+
        return dentry_string_cmp(cs, ct, tcount);
 }
 
@@ -507,6 +506,44 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);
 
+static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
+{
+       struct dentry *next;
+       /*
+        * Inform d_walk() and shrink_dentry_list() that we are no longer
+        * attached to the dentry tree
+        */
+       dentry->d_flags |= DCACHE_DENTRY_KILLED;
+       if (unlikely(list_empty(&dentry->d_child)))
+               return;
+       __list_del_entry(&dentry->d_child);
+       /*
+        * Cursors can move around the list of children.  While we'd been
+        * a normal list member, it didn't matter - ->d_child.next would've
+        * been updated.  However, from now on it won't be and for the
+        * things like d_walk() it might end up with a nasty surprise.
+        * Normally d_walk() doesn't care about cursors moving around -
+        * ->d_lock on parent prevents that and since a cursor has no children
+        * of its own, we get through it without ever unlocking the parent.
+        * There is one exception, though - if we ascend from a child that
+        * gets killed as soon as we unlock it, the next sibling is found
+        * using the value left in its ->d_child.next.  And if _that_
+        * pointed to a cursor, and cursor got moved (e.g. by lseek())
+        * before d_walk() regains parent->d_lock, we'll end up skipping
+        * everything the cursor had been moved past.
+        *
+        * Solution: make sure that the pointer left behind in ->d_child.next
+        * points to something that won't be moving around.  I.e. skip the
+        * cursors.
+        */
+       while (dentry->d_child.next != &parent->d_subdirs) {
+               next = list_entry(dentry->d_child.next, struct dentry, d_child);
+               if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
+                       break;
+               dentry->d_child.next = next->d_child.next;
+       }
+}
+
 static void __dentry_kill(struct dentry *dentry)
 {
        struct dentry *parent = NULL;
@@ -532,12 +569,7 @@ static void __dentry_kill(struct dentry *dentry)
        }
        /* if it was on the hash then remove it */
        __d_drop(dentry);
-       __list_del_entry(&dentry->d_child);
-       /*
-        * Inform d_walk() that we are no longer attached to the
-        * dentry tree
-        */
-       dentry->d_flags |= DCACHE_DENTRY_KILLED;
+       dentry_unlist(dentry, parent);
        if (parent)
                spin_unlock(&parent->d_lock);
        dentry_iput(dentry);
@@ -1203,6 +1235,9 @@ resume:
                struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
                next = tmp->next;
 
+               if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
+                       continue;
+
                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 
                ret = enter(data, dentry);
@@ -1636,7 +1671,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
        struct dentry *dentry = __d_alloc(parent->d_sb, name);
        if (!dentry)
                return NULL;
-
+       dentry->d_flags |= DCACHE_RCUACCESS;
        spin_lock(&parent->d_lock);
        /*
         * don't need child lock because it is not subject
@@ -1651,6 +1686,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 }
 EXPORT_SYMBOL(d_alloc);
 
+struct dentry *d_alloc_cursor(struct dentry * parent)
+{
+       struct dentry *dentry = __d_alloc(parent->d_sb, NULL);
+       if (dentry) {
+               dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
+               dentry->d_parent = dget(parent);
+       }
+       return dentry;
+}
+
 /**
  * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
  * @sb: the superblock
@@ -1670,8 +1715,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
        struct qstr q;
 
        q.name = name;
-       q.len = strlen(name);
-       q.hash = full_name_hash(q.name, q.len);
+       q.hash_len = hashlen_string(name);
        return d_alloc(parent, &q);
 }
 EXPORT_SYMBOL(d_alloc_name);
@@ -1770,7 +1814,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
        raw_write_seqcount_begin(&dentry->d_seq);
        __d_set_inode_and_type(dentry, inode, add_flags);
        raw_write_seqcount_end(&dentry->d_seq);
-       __fsnotify_d_instantiate(dentry);
+       fsnotify_update_flags(dentry);
        spin_unlock(&dentry->d_lock);
 }
 
@@ -2022,42 +2066,19 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode,
 }
 EXPORT_SYMBOL(d_add_ci);
 
-/*
- * Do the slow-case of the dentry name compare.
- *
- * Unlike the dentry_cmp() function, we need to atomically
- * load the name and length information, so that the
- * filesystem can rely on them, and can use the 'name' and
- * 'len' information without worrying about walking off the
- * end of memory etc.
- *
- * Thus the read_seqcount_retry() and the "duplicate" info
- * in arguments (the low-level filesystem should not look
- * at the dentry inode or name contents directly, since
- * rename can change them while we're in RCU mode).
- */
-enum slow_d_compare {
-       D_COMP_OK,
-       D_COMP_NOMATCH,
-       D_COMP_SEQRETRY,
-};
 
-static noinline enum slow_d_compare slow_dentry_cmp(
-               const struct dentry *parent,
-               struct dentry *dentry,
-               unsigned int seq,
-               const struct qstr *name)
+static inline bool d_same_name(const struct dentry *dentry,
+                               const struct dentry *parent,
+                               const struct qstr *name)
 {
-       int tlen = dentry->d_name.len;
-       const char *tname = dentry->d_name.name;
-
-       if (read_seqcount_retry(&dentry->d_seq, seq)) {
-               cpu_relax();
-               return D_COMP_SEQRETRY;
+       if (likely(!(parent->d_flags & DCACHE_OP_COMPARE))) {
+               if (dentry->d_name.len != name->len)
+                       return false;
+               return dentry_cmp(dentry, name->name, name->len) == 0;
        }
-       if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
-               return D_COMP_NOMATCH;
-       return D_COMP_OK;
+       return parent->d_op->d_compare(parent, dentry,
+                                      dentry->d_name.len, dentry->d_name.name,
+                                      name) == 0;
 }
 
 /**
@@ -2136,6 +2157,9 @@ seqretry:
                 * dentry compare, we will do seqretries until it is stable,
                 * and if we end up with a successful lookup, we actually
                 * want to exit RCU lookup anyway.
+                *
+                * Note that raw_seqcount_begin still *does* smp_rmb(), so
+                * we are still guaranteed NUL-termination of ->d_name.name.
                 */
                seq = raw_seqcount_begin(&dentry->d_seq);
                if (dentry->d_parent != parent)
@@ -2144,24 +2168,28 @@ seqretry:
                        continue;
 
                if (unlikely(parent->d_flags & DCACHE_OP_COMPARE)) {
+                       int tlen;
+                       const char *tname;
                        if (dentry->d_name.hash != hashlen_hash(hashlen))
                                continue;
-                       *seqp = seq;
-                       switch (slow_dentry_cmp(parent, dentry, seq, name)) {
-                       case D_COMP_OK:
-                               return dentry;
-                       case D_COMP_NOMATCH:
-                               continue;
-                       default:
+                       tlen = dentry->d_name.len;
+                       tname = dentry->d_name.name;
+                       /* we want a consistent (name,len) pair */
+                       if (read_seqcount_retry(&dentry->d_seq, seq)) {
+                               cpu_relax();
                                goto seqretry;
                        }
+                       if (parent->d_op->d_compare(parent, dentry,
+                                                   tlen, tname, name) != 0)
+                               continue;
+               } else {
+                       if (dentry->d_name.hash_len != hashlen)
+                               continue;
+                       if (dentry_cmp(dentry, str, hashlen_len(hashlen)) != 0)
+                               continue;
                }
-
-               if (dentry->d_name.hash_len != hashlen)
-                       continue;
                *seqp = seq;
-               if (!dentry_cmp(dentry, str, hashlen_len(hashlen)))
-                       return dentry;
+               return dentry;
        }
        return NULL;
 }
@@ -2209,9 +2237,7 @@ EXPORT_SYMBOL(d_lookup);
  */
 struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
 {
-       unsigned int len = name->len;
        unsigned int hash = name->hash;
-       const unsigned char *str = name->name;
        struct hlist_bl_head *b = d_hash(parent, hash);
        struct hlist_bl_node *node;
        struct dentry *found = NULL;
@@ -2250,21 +2276,8 @@ struct dentry *__d_lookup(const struct dentry *parent, const struct qstr *name)
                if (d_unhashed(dentry))
                        goto next;
 
-               /*
-                * It is safe to compare names since d_move() cannot
-                * change the qstr (protected by d_lock).
-                */
-               if (parent->d_flags & DCACHE_OP_COMPARE) {
-                       int tlen = dentry->d_name.len;
-                       const char *tname = dentry->d_name.name;
-                       if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
-                               goto next;
-               } else {
-                       if (dentry->d_name.len != len)
-                               goto next;
-                       if (dentry_cmp(dentry, str, len))
-                               goto next;
-               }
+               if (!d_same_name(dentry, parent, name))
+                       goto next;
 
                dentry->d_lockref.count++;
                found = dentry;
@@ -2359,7 +2372,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
 {
        BUG_ON(!d_unhashed(entry));
        hlist_bl_lock(b);
-       entry->d_flags |= DCACHE_RCUACCESS;
        hlist_bl_add_head_rcu(&entry->d_hash, b);
        hlist_bl_unlock(b);
 }
@@ -2418,9 +2430,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
                                const struct qstr *name,
                                wait_queue_head_t *wq)
 {
-       unsigned int len = name->len;
        unsigned int hash = name->hash;
-       const unsigned char *str = name->name;
        struct hlist_bl_head *b = in_lookup_hash(parent, hash);
        struct hlist_bl_node *node;
        struct dentry *new = d_alloc(parent, name);
@@ -2459,7 +2469,6 @@ retry:
                rcu_read_unlock();
                goto retry;
        }
-       rcu_read_unlock();
        /*
         * No changes for the parent since the beginning of d_lookup().
         * Since all removals from the chain happen with hlist_bl_lock(),
@@ -2472,22 +2481,20 @@ retry:
                        continue;
                if (dentry->d_parent != parent)
                        continue;
-               if (d_unhashed(dentry))
+               if (!d_same_name(dentry, parent, name))
                        continue;
-               if (parent->d_flags & DCACHE_OP_COMPARE) {
-                       int tlen = dentry->d_name.len;
-                       const char *tname = dentry->d_name.name;
-                       if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
-                               continue;
-               } else {
-                       if (dentry->d_name.len != len)
-                               continue;
-                       if (dentry_cmp(dentry, str, len))
-                               continue;
-               }
-               dget(dentry);
                hlist_bl_unlock(b);
-               /* somebody is doing lookup for it right now; wait for it */
+               /* now we can try to grab a reference */
+               if (!lockref_get_not_dead(&dentry->d_lockref)) {
+                       rcu_read_unlock();
+                       goto retry;
+               }
+
+               rcu_read_unlock();
+               /*
+                * somebody is likely to be still doing lookup for it;
+                * wait for them to finish
+                */
                spin_lock(&dentry->d_lock);
                d_wait_lookup(dentry);
                /*
@@ -2502,22 +2509,14 @@ retry:
                        goto mismatch;
                if (unlikely(d_unhashed(dentry)))
                        goto mismatch;
-               if (parent->d_flags & DCACHE_OP_COMPARE) {
-                       int tlen = dentry->d_name.len;
-                       const char *tname = dentry->d_name.name;
-                       if (parent->d_op->d_compare(parent, dentry, tlen, tname, name))
-                               goto mismatch;
-               } else {
-                       if (unlikely(dentry->d_name.len != len))
-                               goto mismatch;
-                       if (unlikely(dentry_cmp(dentry, str, len)))
-                               goto mismatch;
-               }
+               if (unlikely(!d_same_name(dentry, parent, name)))
+                       goto mismatch;
                /* OK, it *is* a hashed match; return it */
                spin_unlock(&dentry->d_lock);
                dput(new);
                return dentry;
        }
+       rcu_read_unlock();
        /* we can't take ->d_lock here; it's OK, though. */
        new->d_flags |= DCACHE_PAR_LOOKUP;
        new->d_wait = wq;
@@ -2564,7 +2563,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode)
                raw_write_seqcount_begin(&dentry->d_seq);
                __d_set_inode_and_type(dentry, inode, add_flags);
                raw_write_seqcount_end(&dentry->d_seq);
-               __fsnotify_d_instantiate(dentry);
+               fsnotify_update_flags(dentry);
        }
        _d_rehash(dentry);
        if (dir)
@@ -2607,8 +2606,6 @@ EXPORT_SYMBOL(d_add);
 struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
 {
        struct dentry *alias;
-       int len = entry->d_name.len;
-       const char *name = entry->d_name.name;
        unsigned int hash = entry->d_name.hash;
 
        spin_lock(&inode->i_lock);
@@ -2622,9 +2619,7 @@ struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
                        continue;
                if (alias->d_parent != entry->d_parent)
                        continue;
-               if (alias->d_name.len != len)
-                       continue;
-               if (dentry_cmp(alias, name, len))
+               if (!d_same_name(alias, entry->d_parent, &entry->d_name))
                        continue;
                spin_lock(&alias->d_lock);
                if (!d_unhashed(alias)) {
@@ -2844,6 +2839,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
        /* ... and switch them in the tree */
        if (IS_ROOT(dentry)) {
                /* splicing a tree */
+               dentry->d_flags |= DCACHE_RCUACCESS;
                dentry->d_parent = target->d_parent;
                target->d_parent = target;
                list_del_init(&target->d_child);
@@ -2854,8 +2850,8 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
                list_move(&target->d_child, &target->d_parent->d_subdirs);
                list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
                if (exchange)
-                       fsnotify_d_move(target);
-               fsnotify_d_move(dentry);
+                       fsnotify_update_flags(target);
+               fsnotify_update_flags(dentry);
        }
 
        write_seqcount_end(&target->d_seq);