dentry_cmp(): use lockless_dereference() instead of smp_read_barrier_depends()
[cascardo/linux.git] / fs / dcache.c
index ad4a542..dc37c02 100644 (file)
@@ -226,10 +226,9 @@ static inline int dentry_string_cmp(const unsigned char *cs, const unsigned char
 
 static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *ct, unsigned tcount)
 {
-       const unsigned char *cs;
        /*
         * Be careful about RCU walk racing with rename:
-        * use ACCESS_ONCE to fetch the name pointer.
+        * use 'lockless_dereference' to fetch the name pointer.
         *
         * NOTE! Even if a rename will mean that the length
         * was not loaded atomically, we don't care. The
@@ -243,8 +242,8 @@ static inline int dentry_cmp(const struct dentry *dentry, const unsigned char *c
         * early because the data cannot match (there can
         * be no NUL in the ct/tcount data)
         */
-       cs = ACCESS_ONCE(dentry->d_name.name);
-       smp_read_barrier_depends();
+       const unsigned char *cs = lockless_dereference(dentry->d_name.name);
+
        return dentry_string_cmp(cs, ct, tcount);
 }
 
@@ -507,6 +506,44 @@ void d_drop(struct dentry *dentry)
 }
 EXPORT_SYMBOL(d_drop);
 
+static inline void dentry_unlist(struct dentry *dentry, struct dentry *parent)
+{
+       struct dentry *next;
+       /*
+        * Inform d_walk() and shrink_dentry_list() that we are no longer
+        * attached to the dentry tree
+        */
+       dentry->d_flags |= DCACHE_DENTRY_KILLED;
+       if (unlikely(list_empty(&dentry->d_child)))
+               return;
+       __list_del_entry(&dentry->d_child);
+       /*
+        * Cursors can move around the list of children.  While we'd been
+        * a normal list member, it didn't matter - ->d_child.next would've
+        * been updated.  However, from now on it won't be and for the
+        * things like d_walk() it might end up with a nasty surprise.
+        * Normally d_walk() doesn't care about cursors moving around -
+        * ->d_lock on parent prevents that and since a cursor has no children
+        * of its own, we get through it without ever unlocking the parent.
+        * There is one exception, though - if we ascend from a child that
+        * gets killed as soon as we unlock it, the next sibling is found
+        * using the value left in its ->d_child.next.  And if _that_
+        * pointed to a cursor, and cursor got moved (e.g. by lseek())
+        * before d_walk() regains parent->d_lock, we'll end up skipping
+        * everything the cursor had been moved past.
+        *
+        * Solution: make sure that the pointer left behind in ->d_child.next
+        * points to something that won't be moving around.  I.e. skip the
+        * cursors.
+        */
+       while (dentry->d_child.next != &parent->d_subdirs) {
+               next = list_entry(dentry->d_child.next, struct dentry, d_child);
+               if (likely(!(next->d_flags & DCACHE_DENTRY_CURSOR)))
+                       break;
+               dentry->d_child.next = next->d_child.next;
+       }
+}
+
 static void __dentry_kill(struct dentry *dentry)
 {
        struct dentry *parent = NULL;
@@ -532,12 +569,7 @@ static void __dentry_kill(struct dentry *dentry)
        }
        /* if it was on the hash then remove it */
        __d_drop(dentry);
-       __list_del_entry(&dentry->d_child);
-       /*
-        * Inform d_walk() that we are no longer attached to the
-        * dentry tree
-        */
-       dentry->d_flags |= DCACHE_DENTRY_KILLED;
+       dentry_unlist(dentry, parent);
        if (parent)
                spin_unlock(&parent->d_lock);
        dentry_iput(dentry);
@@ -1203,6 +1235,9 @@ resume:
                struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
                next = tmp->next;
 
+               if (unlikely(dentry->d_flags & DCACHE_DENTRY_CURSOR))
+                       continue;
+
                spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 
                ret = enter(data, dentry);
@@ -1636,7 +1671,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
        struct dentry *dentry = __d_alloc(parent->d_sb, name);
        if (!dentry)
                return NULL;
-
+       dentry->d_flags |= DCACHE_RCUACCESS;
        spin_lock(&parent->d_lock);
        /*
         * don't need child lock because it is not subject
@@ -1651,6 +1686,16 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 }
 EXPORT_SYMBOL(d_alloc);
 
+struct dentry *d_alloc_cursor(struct dentry * parent)
+{
+       struct dentry *dentry = __d_alloc(parent->d_sb, NULL);
+       if (dentry) {
+               dentry->d_flags |= DCACHE_RCUACCESS | DCACHE_DENTRY_CURSOR;
+               dentry->d_parent = dget(parent);
+       }
+       return dentry;
+}
+
 /**
  * d_alloc_pseudo - allocate a dentry (for lookup-less filesystems)
  * @sb: the superblock
@@ -1769,7 +1814,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
        raw_write_seqcount_begin(&dentry->d_seq);
        __d_set_inode_and_type(dentry, inode, add_flags);
        raw_write_seqcount_end(&dentry->d_seq);
-       __fsnotify_d_instantiate(dentry);
+       fsnotify_update_flags(dentry);
        spin_unlock(&dentry->d_lock);
 }
 
@@ -2358,7 +2403,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
 {
        BUG_ON(!d_unhashed(entry));
        hlist_bl_lock(b);
-       entry->d_flags |= DCACHE_RCUACCESS;
        hlist_bl_add_head_rcu(&entry->d_hash, b);
        hlist_bl_unlock(b);
 }
@@ -2458,7 +2502,6 @@ retry:
                rcu_read_unlock();
                goto retry;
        }
-       rcu_read_unlock();
        /*
         * No changes for the parent since the beginning of d_lookup().
         * Since all removals from the chain happen with hlist_bl_lock(),
@@ -2471,8 +2514,6 @@ retry:
                        continue;
                if (dentry->d_parent != parent)
                        continue;
-               if (d_unhashed(dentry))
-                       continue;
                if (parent->d_flags & DCACHE_OP_COMPARE) {
                        int tlen = dentry->d_name.len;
                        const char *tname = dentry->d_name.name;
@@ -2484,9 +2525,18 @@ retry:
                        if (dentry_cmp(dentry, str, len))
                                continue;
                }
-               dget(dentry);
                hlist_bl_unlock(b);
-               /* somebody is doing lookup for it right now; wait for it */
+               /* now we can try to grab a reference */
+               if (!lockref_get_not_dead(&dentry->d_lockref)) {
+                       rcu_read_unlock();
+                       goto retry;
+               }
+
+               rcu_read_unlock();
+               /*
+                * somebody is likely to be still doing lookup for it;
+                * wait for them to finish
+                */
                spin_lock(&dentry->d_lock);
                d_wait_lookup(dentry);
                /*
@@ -2517,6 +2567,7 @@ retry:
                dput(new);
                return dentry;
        }
+       rcu_read_unlock();
        /* we can't take ->d_lock here; it's OK, though. */
        new->d_flags |= DCACHE_PAR_LOOKUP;
        new->d_wait = wq;
@@ -2563,7 +2614,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode)
                raw_write_seqcount_begin(&dentry->d_seq);
                __d_set_inode_and_type(dentry, inode, add_flags);
                raw_write_seqcount_end(&dentry->d_seq);
-               __fsnotify_d_instantiate(dentry);
+               fsnotify_update_flags(dentry);
        }
        _d_rehash(dentry);
        if (dir)
@@ -2843,6 +2894,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
        /* ... and switch them in the tree */
        if (IS_ROOT(dentry)) {
                /* splicing a tree */
+               dentry->d_flags |= DCACHE_RCUACCESS;
                dentry->d_parent = target->d_parent;
                target->d_parent = target;
                list_del_init(&target->d_child);
@@ -2853,8 +2905,8 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
                list_move(&target->d_child, &target->d_parent->d_subdirs);
                list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
                if (exchange)
-                       fsnotify_d_move(target);
-               fsnotify_d_move(dentry);
+                       fsnotify_update_flags(target);
+               fsnotify_update_flags(dentry);
        }
 
        write_seqcount_end(&target->d_seq);