ocfs2: add missing return value check of ocfs2_get_clusters()
[cascardo/linux.git] / fs / dcache.c
index 761e31b..4d9df3c 100644 (file)
@@ -88,6 +88,35 @@ EXPORT_SYMBOL(rename_lock);
 
 static struct kmem_cache *dentry_cache __read_mostly;
 
+/**
+ * read_seqbegin_or_lock - begin a sequence number check or locking block
+ * lock: sequence lock
+ * seq : sequence number to be checked
+ *
+ * First try it once optimistically without taking the lock. If that fails,
+ * take the lock. The sequence number is also used as a marker for deciding
+ * whether to be a reader (even) or writer (odd).
+ * N.B. seq must be initialized to an even number to begin with.
+ */
+static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
+{
+       if (!(*seq & 1))        /* Even */
+               *seq = read_seqbegin(lock);
+       else                    /* Odd */
+               write_seqlock(lock);
+}
+
+static inline int need_seqretry(seqlock_t *lock, int seq)
+{
+       return !(seq & 1) && read_seqretry(lock, seq);
+}
+
+static inline void done_seqretry(seqlock_t *lock, int seq)
+{
+       if (seq & 1)
+               write_sequnlock(lock);
+}
+
 /*
  * This is the single most critical data structure when it comes
  * to the dcache: the hashtable for lookups. Somebody should try
@@ -229,7 +258,7 @@ static void __d_free(struct rcu_head *head)
  */
 static void d_free(struct dentry *dentry)
 {
-       BUG_ON(dentry->d_lockref.count);
+       BUG_ON((int)dentry->d_lockref.count > 0);
        this_cpu_dec(nr_dentry);
        if (dentry->d_op && dentry->d_op->d_release)
                dentry->d_op->d_release(dentry);
@@ -308,8 +337,9 @@ static void dentry_unlink_inode(struct dentry * dentry)
  */
 static void dentry_lru_add(struct dentry *dentry)
 {
-       if (list_empty(&dentry->d_lru)) {
+       if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) {
                spin_lock(&dcache_lru_lock);
+               dentry->d_flags |= DCACHE_LRU_LIST;
                list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
                dentry->d_sb->s_nr_dentry_unused++;
                dentry_stat.nr_unused++;
@@ -320,7 +350,7 @@ static void dentry_lru_add(struct dentry *dentry)
 static void __dentry_lru_del(struct dentry *dentry)
 {
        list_del_init(&dentry->d_lru);
-       dentry->d_flags &= ~DCACHE_SHRINK_LIST;
+       dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
        dentry->d_sb->s_nr_dentry_unused--;
        dentry_stat.nr_unused--;
 }
@@ -341,6 +371,7 @@ static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
 {
        spin_lock(&dcache_lru_lock);
        if (list_empty(&dentry->d_lru)) {
+               dentry->d_flags |= DCACHE_LRU_LIST;
                list_add_tail(&dentry->d_lru, list);
                dentry->d_sb->s_nr_dentry_unused++;
                dentry_stat.nr_unused++;
@@ -443,7 +474,7 @@ EXPORT_SYMBOL(d_drop);
  * If ref is non-zero, then decrement the refcount too.
  * Returns dentry requiring refcount drop, or NULL if we're done.
  */
-static inline struct dentry *dentry_kill(struct dentry *dentry, int ref)
+static inline struct dentry *dentry_kill(struct dentry *dentry)
        __releases(dentry->d_lock)
 {
        struct inode *inode;
@@ -466,8 +497,11 @@ relock:
                goto relock;
        }
 
-       if (ref)
-               dentry->d_lockref.count--;
+       /*
+        * The dentry is now unrecoverably dead to the world.
+        */
+       lockref_mark_dead(&dentry->d_lockref);
+
        /*
         * inform the fs via d_prune that this dentry is about to be
         * unhashed and destroyed.
@@ -509,24 +543,22 @@ relock:
  */
 void dput(struct dentry *dentry)
 {
-       if (!dentry)
+       if (unlikely(!dentry))
                return;
 
 repeat:
-       if (dentry->d_lockref.count == 1)
-               might_sleep();
        if (lockref_put_or_lock(&dentry->d_lockref))
                return;
 
-       if (dentry->d_flags & DCACHE_OP_DELETE) {
+       /* Unreachable? Get rid of it */
+       if (unlikely(d_unhashed(dentry)))
+               goto kill_it;
+
+       if (unlikely(dentry->d_flags & DCACHE_OP_DELETE)) {
                if (dentry->d_op->d_delete(dentry))
                        goto kill_it;
        }
 
-       /* Unreachable? Get rid of it */
-       if (d_unhashed(dentry))
-               goto kill_it;
-
        dentry->d_flags |= DCACHE_REFERENCED;
        dentry_lru_add(dentry);
 
@@ -535,7 +567,7 @@ repeat:
        return;
 
 kill_it:
-       dentry = dentry_kill(dentry, 1);
+       dentry = dentry_kill(dentry);
        if (dentry)
                goto repeat;
 }
@@ -760,7 +792,7 @@ static void try_prune_one_dentry(struct dentry *dentry)
 {
        struct dentry *parent;
 
-       parent = dentry_kill(dentry, 0);
+       parent = dentry_kill(dentry);
        /*
         * If dentry_kill returns NULL, we have nothing more to do.
         * if it returns the same dentry, trylocks failed. In either
@@ -781,7 +813,7 @@ static void try_prune_one_dentry(struct dentry *dentry)
        while (dentry) {
                if (lockref_put_or_lock(&dentry->d_lockref))
                        return;
-               dentry = dentry_kill(dentry, 1);
+               dentry = dentry_kill(dentry);
        }
 }
 
@@ -1009,7 +1041,7 @@ void shrink_dcache_for_umount(struct super_block *sb)
  * the parenthood after dropping the lock and check
  * that the sequence number still matches.
  */
-static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
+static struct dentry *try_to_ascend(struct dentry *old, unsigned seq)
 {
        struct dentry *new = old->d_parent;
 
@@ -1023,7 +1055,7 @@ static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq
         */
        if (new != old->d_parent ||
                 (old->d_flags & DCACHE_DENTRY_KILLED) ||
-                (!locked && read_seqretry(&rename_lock, seq))) {
+                need_seqretry(&rename_lock, seq)) {
                spin_unlock(&new->d_lock);
                new = NULL;
        }
@@ -1060,13 +1092,12 @@ static void d_walk(struct dentry *parent, void *data,
 {
        struct dentry *this_parent;
        struct list_head *next;
-       unsigned seq;
-       int locked = 0;
+       unsigned seq = 0;
        enum d_walk_ret ret;
        bool retry = true;
 
-       seq = read_seqbegin(&rename_lock);
 again:
+       read_seqbegin_or_lock(&rename_lock, &seq);
        this_parent = parent;
        spin_lock(&this_parent->d_lock);
 
@@ -1120,13 +1151,13 @@ resume:
         */
        if (this_parent != parent) {
                struct dentry *child = this_parent;
-               this_parent = try_to_ascend(this_parent, locked, seq);
+               this_parent = try_to_ascend(this_parent, seq);
                if (!this_parent)
                        goto rename_retry;
                next = child->d_u.d_child.next;
                goto resume;
        }
-       if (!locked && read_seqretry(&rename_lock, seq)) {
+       if (need_seqretry(&rename_lock, seq)) {
                spin_unlock(&this_parent->d_lock);
                goto rename_retry;
        }
@@ -1135,17 +1166,13 @@ resume:
 
 out_unlock:
        spin_unlock(&this_parent->d_lock);
-       if (locked)
-               write_sequnlock(&rename_lock);
+       done_seqretry(&rename_lock, seq);
        return;
 
 rename_retry:
        if (!retry)
                return;
-       if (locked)
-               goto again;
-       locked = 1;
-       write_seqlock(&rename_lock);
+       seq = 1;
        goto again;
 }
 
@@ -2644,9 +2671,39 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen)
        return 0;
 }
 
+/**
+ * prepend_name - prepend a pathname in front of current buffer pointer
+ * buffer: buffer pointer
+ * buflen: allocated length of the buffer
+ * name:   name string and length qstr structure
+ *
+ * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to
+ * make sure that either the old or the new name pointer and length are
+ * fetched. However, there may be mismatch between length and pointer.
+ * The length cannot be trusted, we need to copy it byte-by-byte until
+ * the length is reached or a null byte is found. It also prepends "/" at
+ * the beginning of the name. The sequence number check at the caller will
+ * retry it again when a d_move() does happen. So any garbage in the buffer
+ * due to mismatched pointer and length will be discarded.
+ */
 static int prepend_name(char **buffer, int *buflen, struct qstr *name)
 {
-       return prepend(buffer, buflen, name->name, name->len);
+       const char *dname = ACCESS_ONCE(name->name);
+       u32 dlen = ACCESS_ONCE(name->len);
+       char *p;
+
+       if (*buflen < dlen + 1)
+               return -ENAMETOOLONG;
+       *buflen -= dlen + 1;
+       p = *buffer -= dlen + 1;
+       *p++ = '/';
+       while (dlen--) {
+               char c = *dname++;
+               if (!c)
+                       break;
+               *p++ = c;
+       }
+       return 0;
 }
 
 /**
@@ -2656,7 +2713,14 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
  * @buffer: pointer to the end of the buffer
  * @buflen: pointer to buffer length
  *
- * Caller holds the rename_lock.
+ * The function tries to write out the pathname without taking any lock other
+ * than the RCU read lock to make sure that dentries won't go away. It only
+ * checks the sequence number of the global rename_lock as any change in the
+ * dentry's d_seq will be preceded by changes in the rename_lock sequence
+ * number. If the sequence number had been change, it will restart the whole
+ * pathname back-tracing sequence again. It performs a total of 3 trials of
+ * lockless back-tracing sequences before falling back to take the
+ * rename_lock.
  */
 static int prepend_path(const struct path *path,
                        const struct path *root,
@@ -2665,54 +2729,66 @@ static int prepend_path(const struct path *path,
        struct dentry *dentry = path->dentry;
        struct vfsmount *vfsmnt = path->mnt;
        struct mount *mnt = real_mount(vfsmnt);
-       bool slash = false;
        int error = 0;
+       unsigned seq = 0;
+       char *bptr;
+       int blen;
 
+       rcu_read_lock();
+restart:
+       bptr = *buffer;
+       blen = *buflen;
+       read_seqbegin_or_lock(&rename_lock, &seq);
        while (dentry != root->dentry || vfsmnt != root->mnt) {
                struct dentry * parent;
 
                if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
                        /* Global root? */
-                       if (!mnt_has_parent(mnt))
-                               goto global_root;
-                       dentry = mnt->mnt_mountpoint;
-                       mnt = mnt->mnt_parent;
-                       vfsmnt = &mnt->mnt;
-                       continue;
+                       if (mnt_has_parent(mnt)) {
+                               dentry = mnt->mnt_mountpoint;
+                               mnt = mnt->mnt_parent;
+                               vfsmnt = &mnt->mnt;
+                               continue;
+                       }
+                       /*
+                        * Filesystems needing to implement special "root names"
+                        * should do so with ->d_dname()
+                        */
+                       if (IS_ROOT(dentry) &&
+                          (dentry->d_name.len != 1 ||
+                           dentry->d_name.name[0] != '/')) {
+                               WARN(1, "Root dentry has weird name <%.*s>\n",
+                                    (int) dentry->d_name.len,
+                                    dentry->d_name.name);
+                       }
+                       if (!error)
+                               error = is_mounted(vfsmnt) ? 1 : 2;
+                       break;
                }
                parent = dentry->d_parent;
                prefetch(parent);
-               spin_lock(&dentry->d_lock);
-               error = prepend_name(buffer, buflen, &dentry->d_name);
-               spin_unlock(&dentry->d_lock);
-               if (!error)
-                       error = prepend(buffer, buflen, "/", 1);
+               error = prepend_name(&bptr, &blen, &dentry->d_name);
                if (error)
                        break;
 
-               slash = true;
                dentry = parent;
        }
+       if (!(seq & 1))
+               rcu_read_unlock();
+       if (need_seqretry(&rename_lock, seq)) {
+               seq = 1;
+               goto restart;
+       }
+       done_seqretry(&rename_lock, seq);
 
-       if (!error && !slash)
-               error = prepend(buffer, buflen, "/", 1);
-
-       return error;
-
-global_root:
-       /*
-        * Filesystems needing to implement special "root names"
-        * should do so with ->d_dname()
-        */
-       if (IS_ROOT(dentry) &&
-           (dentry->d_name.len != 1 || dentry->d_name.name[0] != '/')) {
-               WARN(1, "Root dentry has weird name <%.*s>\n",
-                    (int) dentry->d_name.len, dentry->d_name.name);
-       }
-       if (!slash)
-               error = prepend(buffer, buflen, "/", 1);
-       if (!error)
-               error = is_mounted(vfsmnt) ? 1 : 2;
+       if (error >= 0 && bptr == *buffer) {
+               if (--blen < 0)
+                       error = -ENAMETOOLONG;
+               else
+                       *--bptr = '/';
+       }
+       *buffer = bptr;
+       *buflen = blen;
        return error;
 }
 
@@ -2741,9 +2817,7 @@ char *__d_path(const struct path *path,
 
        prepend(&res, &buflen, "\0", 1);
        br_read_lock(&vfsmount_lock);
-       write_seqlock(&rename_lock);
        error = prepend_path(path, root, &res, &buflen);
-       write_sequnlock(&rename_lock);
        br_read_unlock(&vfsmount_lock);
 
        if (error < 0)
@@ -2762,9 +2836,7 @@ char *d_absolute_path(const struct path *path,
 
        prepend(&res, &buflen, "\0", 1);
        br_read_lock(&vfsmount_lock);
-       write_seqlock(&rename_lock);
        error = prepend_path(path, &root, &res, &buflen);
-       write_sequnlock(&rename_lock);
        br_read_unlock(&vfsmount_lock);
 
        if (error > 1)
@@ -2830,9 +2902,7 @@ char *d_path(const struct path *path, char *buf, int buflen)
 
        get_fs_root(current->fs, &root);
        br_read_lock(&vfsmount_lock);
-       write_seqlock(&rename_lock);
        error = path_with_deleted(path, &root, &res, &buflen);
-       write_sequnlock(&rename_lock);
        br_read_unlock(&vfsmount_lock);
        if (error < 0)
                res = ERR_PTR(error);
@@ -2867,10 +2937,10 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
        char *end = buffer + buflen;
        /* these dentries are never renamed, so d_lock is not needed */
        if (prepend(&end, &buflen, " (deleted)", 11) ||
-           prepend_name(&end, &buflen, &dentry->d_name) ||
+           prepend(&end, &buflen, dentry->d_name.name, dentry->d_name.len) ||
            prepend(&end, &buflen, "/", 1))  
                end = ERR_PTR(-ENAMETOOLONG);
-       return end;  
+       return end;
 }
 
 /*
@@ -2878,30 +2948,42 @@ char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
  */
 static char *__dentry_path(struct dentry *dentry, char *buf, int buflen)
 {
-       char *end = buf + buflen;
-       char *retval;
+       char *end, *retval;
+       int len, seq = 0;
+       int error = 0;
 
-       prepend(&end, &buflen, "\0", 1);
+       rcu_read_lock();
+restart:
+       end = buf + buflen;
+       len = buflen;
+       prepend(&end, &len, "\0", 1);
        if (buflen < 1)
                goto Elong;
        /* Get '/' right */
        retval = end-1;
        *retval = '/';
-
+       read_seqbegin_or_lock(&rename_lock, &seq);
        while (!IS_ROOT(dentry)) {
                struct dentry *parent = dentry->d_parent;
                int error;
 
                prefetch(parent);
-               spin_lock(&dentry->d_lock);
-               error = prepend_name(&end, &buflen, &dentry->d_name);
-               spin_unlock(&dentry->d_lock);
-               if (error != 0 || prepend(&end, &buflen, "/", 1) != 0)
-                       goto Elong;
+               error = prepend_name(&end, &len, &dentry->d_name);
+               if (error)
+                       break;
 
                retval = end;
                dentry = parent;
        }
+       if (!(seq & 1))
+               rcu_read_unlock();
+       if (need_seqretry(&rename_lock, seq)) {
+               seq = 1;
+               goto restart;
+       }
+       done_seqretry(&rename_lock, seq);
+       if (error)
+               goto Elong;
        return retval;
 Elong:
        return ERR_PTR(-ENAMETOOLONG);
@@ -2909,13 +2991,7 @@ Elong:
 
 char *dentry_path_raw(struct dentry *dentry, char *buf, int buflen)
 {
-       char *retval;
-
-       write_seqlock(&rename_lock);
-       retval = __dentry_path(dentry, buf, buflen);
-       write_sequnlock(&rename_lock);
-
-       return retval;
+       return __dentry_path(dentry, buf, buflen);
 }
 EXPORT_SYMBOL(dentry_path_raw);
 
@@ -2924,7 +3000,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
        char *p = NULL;
        char *retval;
 
-       write_seqlock(&rename_lock);
        if (d_unlinked(dentry)) {
                p = buf + buflen;
                if (prepend(&p, &buflen, "//deleted", 10) != 0)
@@ -2932,7 +3007,6 @@ char *dentry_path(struct dentry *dentry, char *buf, int buflen)
                buflen++;
        }
        retval = __dentry_path(dentry, buf, buflen);
-       write_sequnlock(&rename_lock);
        if (!IS_ERR(retval) && p)
                *p = '/';       /* restore '/' overriden with '\0' */
        return retval;
@@ -2971,7 +3045,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 
        error = -ENOENT;
        br_read_lock(&vfsmount_lock);
-       write_seqlock(&rename_lock);
        if (!d_unlinked(pwd.dentry)) {
                unsigned long len;
                char *cwd = page + PAGE_SIZE;
@@ -2979,7 +3052,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 
                prepend(&cwd, &buflen, "\0", 1);
                error = prepend_path(&pwd, &root, &cwd, &buflen);
-               write_sequnlock(&rename_lock);
                br_read_unlock(&vfsmount_lock);
 
                if (error < 0)
@@ -3000,7 +3072,6 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
                                error = -EFAULT;
                }
        } else {
-               write_sequnlock(&rename_lock);
                br_read_unlock(&vfsmount_lock);
        }