Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

[cascardo/linux.git] / fs / namei.c
diff --git a/fs/namei.c b/fs/namei.c

index caa2805..e029a4c 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -482,18 +482,6 @@ EXPORT_SYMBOL(path_put);
   * to restart the path walk from the beginning in ref-walk mode.
   */
  
-static inline void lock_rcu_walk(void)
-{
-       br_read_lock(&vfsmount_lock);
-       rcu_read_lock();
-}
-
-static inline void unlock_rcu_walk(void)
-{
-       rcu_read_unlock();
-       br_read_unlock(&vfsmount_lock);
-}
-
  /**
   * unlazy_walk - try to switch to ref-walk mode.
   * @nd: nameidata pathwalk data
@@ -512,26 +500,23 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
         BUG_ON(!(nd->flags & LOOKUP_RCU));
  
         /*
-        * Get a reference to the parent first: we're
-        * going to make "path_put(nd->path)" valid in
-        * non-RCU context for "terminate_walk()".
-        *
-        * If this doesn't work, return immediately with
-        * RCU walking still active (and then we will do
-        * the RCU walk cleanup in terminate_walk()).
+        * After legitimizing the bastards, terminate_walk()
+        * will do the right thing for non-RCU mode, and all our
+        * subsequent exit cases should rcu_read_unlock()
+        * before returning.  Do vfsmount first; if dentry
+        * can't be legitimized, just set nd->path.dentry to NULL
+        * and rely on dput(NULL) being a no-op.
          */
-       if (!lockref_get_not_dead(&parent->d_lockref))
+       if (!legitimize_mnt(nd->path.mnt, nd->m_seq))
                 return -ECHILD;
-
-       /*
-        * After the mntget(), we terminate_walk() will do
-        * the right thing for non-RCU mode, and all our
-        * subsequent exit cases should unlock_rcu_walk()
-        * before returning.
-        */
-       mntget(nd->path.mnt);
         nd->flags &= ~LOOKUP_RCU;
  
+       if (!lockref_get_not_dead(&parent->d_lockref)) {
+               nd->path.dentry = NULL; 
+               rcu_read_unlock();
+               return -ECHILD;
+       }
+
         /*
          * For a negative lookup, the lookup sequence point is the parents
          * sequence point, and it only needs to revalidate the parent dentry.
@@ -566,17 +551,17 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry)
                 spin_unlock(&fs->lock);
         }
  
-       unlock_rcu_walk();
+       rcu_read_unlock();
         return 0;
  
  unlock_and_drop_dentry:
         spin_unlock(&fs->lock);
  drop_dentry:
-       unlock_rcu_walk();
+       rcu_read_unlock();
         dput(dentry);
         goto drop_root_mnt;
  out:
-       unlock_rcu_walk();
+       rcu_read_unlock();
  drop_root_mnt:
         if (!(nd->flags & LOOKUP_ROOT))
                 nd->root.mnt = NULL;
@@ -608,17 +593,22 @@ static int complete_walk(struct nameidata *nd)
                 if (!(nd->flags & LOOKUP_ROOT))
                         nd->root.mnt = NULL;
  
+               if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) {
+                       rcu_read_unlock();
+                       return -ECHILD;
+               }
                 if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) {
-                       unlock_rcu_walk();
+                       rcu_read_unlock();
+                       mntput(nd->path.mnt);
                         return -ECHILD;
                 }
                 if (read_seqcount_retry(&dentry->d_seq, nd->seq)) {
-                       unlock_rcu_walk();
+                       rcu_read_unlock();
                         dput(dentry);
+                       mntput(nd->path.mnt);
                         return -ECHILD;
                 }
-               mntget(nd->path.mnt);
-               unlock_rcu_walk();
+               rcu_read_unlock();
         }
  
         if (likely(!(nd->flags & LOOKUP_JUMPED)))
@@ -909,15 +899,15 @@ int follow_up(struct path *path)
         struct mount *parent;
         struct dentry *mountpoint;
  
-       br_read_lock(&vfsmount_lock);
+       read_seqlock_excl(&mount_lock);
         parent = mnt->mnt_parent;
         if (parent == mnt) {
-               br_read_unlock(&vfsmount_lock);
+               read_sequnlock_excl(&mount_lock);
                 return 0;
         }
         mntget(&parent->mnt);
         mountpoint = dget(mnt->mnt_mountpoint);
-       br_read_unlock(&vfsmount_lock);
+       read_sequnlock_excl(&mount_lock);
         dput(path->dentry);
         path->dentry = mountpoint;
         mntput(path->mnt);
@@ -1048,8 +1038,8 @@ static int follow_managed(struct path *path, unsigned flags)
  
                         /* Something is mounted on this dentry in another
                          * namespace and/or whatever was mounted there in this
-                        * namespace got unmounted before we managed to get the
-                        * vfsmount_lock */
+                        * namespace got unmounted before lookup_mnt() could
+                        * get it */
                 }
  
                 /* Handle an automount point */
@@ -1111,7 +1101,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
                 if (!d_mountpoint(path->dentry))
                         break;
  
-               mounted = __lookup_mnt(path->mnt, path->dentry, 1);
+               mounted = __lookup_mnt(path->mnt, path->dentry);
                 if (!mounted)
                         break;
                 path->mnt = &mounted->mnt;
@@ -1132,7 +1122,7 @@ static void follow_mount_rcu(struct nameidata *nd)
  {
         while (d_mountpoint(nd->path.dentry)) {
                 struct mount *mounted;
-               mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry, 1);
+               mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
                 if (!mounted)
                         break;
                 nd->path.mnt = &mounted->mnt;
@@ -1174,7 +1164,7 @@ failed:
         nd->flags &= ~LOOKUP_RCU;
         if (!(nd->flags & LOOKUP_ROOT))
                 nd->root.mnt = NULL;
-       unlock_rcu_walk();
+       rcu_read_unlock();
         return -ECHILD;
  }
  
@@ -1308,8 +1298,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
  }
  
  /*
- * Call i_op->lookup on the dentry.  The dentry must be negative but may be
- * hashed if it was pouplated with DCACHE_NEED_LOOKUP.
+ * Call i_op->lookup on the dentry.  The dentry must be negative and
+ * unhashed.
   *
   * dir->d_inode->i_mutex must be held
   */
@@ -1501,7 +1491,7 @@ static void terminate_walk(struct nameidata *nd)
                 nd->flags &= ~LOOKUP_RCU;
                 if (!(nd->flags & LOOKUP_ROOT))
                         nd->root.mnt = NULL;
-               unlock_rcu_walk();
+               rcu_read_unlock();
         }
  }
  
@@ -1511,18 +1501,9 @@ static void terminate_walk(struct nameidata *nd)
   * so we keep a cache of "no, this doesn't need follow_link"
   * for the common case.
   */
-static inline int should_follow_link(struct inode *inode, int follow)
+static inline int should_follow_link(struct dentry *dentry, int follow)
  {
-       if (unlikely(!(inode->i_opflags & IOP_NOFOLLOW))) {
-               if (likely(inode->i_op->follow_link))
-                       return follow;
-
-               /* This gets set once for the inode lifetime */
-               spin_lock(&inode->i_lock);
-               inode->i_opflags |= IOP_NOFOLLOW;
-               spin_unlock(&inode->i_lock);
-       }
-       return 0;
+       return unlikely(d_is_symlink(dentry)) ? follow : 0;
  }
  
  static inline int walk_component(struct nameidata *nd, struct path *path,
@@ -1552,7 +1533,7 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
         if (!inode)
                 goto out_path_put;
  
-       if (should_follow_link(inode, follow)) {
+       if (should_follow_link(path->dentry, follow)) {
                 if (nd->flags & LOOKUP_RCU) {
                         if (unlikely(unlazy_walk(nd, path->dentry))) {
                                 err = -ECHILD;
@@ -1610,26 +1591,6 @@ static inline int nested_symlink(struct path *path, struct nameidata *nd)
         return res;
  }
  
-/*
- * We really don't want to look at inode->i_op->lookup
- * when we don't have to. So we keep a cache bit in
- * the inode ->i_opflags field that says "yes, we can
- * do lookup on this inode".
- */
-static inline int can_lookup(struct inode *inode)
-{
-       if (likely(inode->i_opflags & IOP_LOOKUP))
-               return 1;
-       if (likely(!inode->i_op->lookup))
-               return 0;
-
-       /* We do this once for the lifetime of the inode */
-       spin_lock(&inode->i_lock);
-       inode->i_opflags |= IOP_LOOKUP;
-       spin_unlock(&inode->i_lock);
-       return 1;
-}
-
  /*
   * We can do the critical dentry name comparison and hashing
   * operations one word at a time, but we are limited to:
@@ -1833,7 +1794,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
                         if (err)
                                 return err;
                 }
-               if (!can_lookup(nd->inode)) {
+               if (!d_is_directory(nd->path.dentry)) {
                         err = -ENOTDIR; 
                         break;
                 }
@@ -1851,9 +1812,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
         nd->flags = flags | LOOKUP_JUMPED;
         nd->depth = 0;
         if (flags & LOOKUP_ROOT) {
-               struct inode *inode = nd->root.dentry->d_inode;
+               struct dentry *root = nd->root.dentry;
+               struct inode *inode = root->d_inode;
                 if (*name) {
-                       if (!can_lookup(inode))
+                       if (!d_is_directory(root))
                                 return -ENOTDIR;
                         retval = inode_permission(inode, MAY_EXEC);
                         if (retval)
@@ -1862,8 +1824,9 @@ static int path_init(int dfd, const char *name, unsigned int flags,
                 nd->path = nd->root;
                 nd->inode = inode;
                 if (flags & LOOKUP_RCU) {
-                       lock_rcu_walk();
+                       rcu_read_lock();
                         nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
+                       nd->m_seq = read_seqbegin(&mount_lock);
                 } else {
                         path_get(&nd->path);
                 }
@@ -1872,9 +1835,10 @@ static int path_init(int dfd, const char *name, unsigned int flags,
  
         nd->root.mnt = NULL;
  
+       nd->m_seq = read_seqbegin(&mount_lock);
         if (*name=='/') {
                 if (flags & LOOKUP_RCU) {
-                       lock_rcu_walk();
+                       rcu_read_lock();
                         set_root_rcu(nd);
                 } else {
                         set_root(nd);
@@ -1886,7 +1850,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
                         struct fs_struct *fs = current->fs;
                         unsigned seq;
  
-                       lock_rcu_walk();
+                       rcu_read_lock();
  
                         do {
                                 seq = read_seqcount_begin(&fs->seq);
@@ -1907,7 +1871,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
                 dentry = f.file->f_path.dentry;
  
                 if (*name) {
-                       if (!can_lookup(dentry->d_inode)) {
+                       if (!d_is_directory(dentry)) {
                                 fdput(f);
                                 return -ENOTDIR;
                         }
@@ -1918,7 +1882,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
                         if (f.need_put)
                                 *fp = f.file;
                         nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
-                       lock_rcu_walk();
+                       rcu_read_lock();
                 } else {
                         path_get(&nd->path);
                         fdput(f);
@@ -1989,7 +1953,7 @@ static int path_lookupat(int dfd, const char *name,
                 err = complete_walk(nd);
  
         if (!err && nd->flags & LOOKUP_DIRECTORY) {
-               if (!can_lookup(nd->inode)) {
+               if (!d_is_directory(nd->path.dentry)) {
                         path_put(&nd->path);
                         err = -ENOTDIR;
                 }
@@ -2281,7 +2245,7 @@ done:
         }
         path->dentry = dentry;
         path->mnt = mntget(nd->path.mnt);
-       if (should_follow_link(dentry->d_inode, nd->flags & LOOKUP_FOLLOW))
+       if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW))
                 return 1;
         follow_mount(path);
         error = 0;
@@ -2426,12 +2390,14 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
   * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
   *     nfs_async_unlink().
   */
-static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
+static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
  {
+       struct inode *inode = victim->d_inode;
         int error;
  
-       if (!victim->d_inode)
+       if (d_is_negative(victim))
                 return -ENOENT;
+       BUG_ON(!inode);
  
         BUG_ON(victim->d_parent->d_inode != dir);
         audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
@@ -2441,15 +2407,16 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
                 return error;
         if (IS_APPEND(dir))
                 return -EPERM;
-       if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
-           IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+
+       if (check_sticky(dir, inode) || IS_APPEND(inode) ||
+           IS_IMMUTABLE(inode) || IS_SWAPFILE(inode))
                 return -EPERM;
         if (isdir) {
-               if (!S_ISDIR(victim->d_inode->i_mode))
+               if (!d_is_directory(victim) && !d_is_autodir(victim))
                         return -ENOTDIR;
                 if (IS_ROOT(victim))
                         return -EBUSY;
-       } else if (S_ISDIR(victim->d_inode->i_mode))
+       } else if (d_is_directory(victim) || d_is_autodir(victim))
                 return -EISDIR;
         if (IS_DEADDIR(dir))
                 return -ENOENT;
@@ -2983,7 +2950,7 @@ retry_lookup:
         /*
          * create/update audit record if it already exists.
          */
-       if (path->dentry->d_inode)
+       if (d_is_positive(path->dentry))
                 audit_inode(name, path->dentry, 0);
  
         /*
@@ -3012,12 +2979,12 @@ retry_lookup:
  finish_lookup:
         /* we _can_ be in RCU mode here */
         error = -ENOENT;
-       if (!inode) {
+       if (d_is_negative(path->dentry)) {
                 path_to_nameidata(path, nd);
                 goto out;
         }
  
-       if (should_follow_link(inode, !symlink_ok)) {
+       if (should_follow_link(path->dentry, !symlink_ok)) {
                 if (nd->flags & LOOKUP_RCU) {
                         if (unlikely(unlazy_walk(nd, path->dentry))) {
                                 error = -ECHILD;
@@ -3046,10 +3013,11 @@ finish_open:
         }
         audit_inode(name, nd->path.dentry, 0);
         error = -EISDIR;
-       if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
+       if ((open_flag & O_CREAT) &&
+           (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry)))
                 goto out;
         error = -ENOTDIR;
-       if ((nd->flags & LOOKUP_DIRECTORY) && !can_lookup(nd->inode))
+       if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry))
                 goto out;
         if (!S_ISREG(nd->inode->i_mode))
                 will_truncate = false;
@@ -3275,7 +3243,7 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt,
         nd.root.mnt = mnt;
         nd.root.dentry = dentry;
  
-       if (dentry->d_inode->i_op->follow_link && op->intent & LOOKUP_OPEN)
+       if (d_is_symlink(dentry) && op->intent & LOOKUP_OPEN)
                 return ERR_PTR(-ELOOP);
  
         file = path_openat(-1, &filename, &nd, op, flags | LOOKUP_RCU);
@@ -3325,8 +3293,9 @@ struct dentry *kern_path_create(int dfd, const char *pathname,
                 goto unlock;
  
         error = -EEXIST;
-       if (dentry->d_inode)
+       if (d_is_positive(dentry))
                 goto fail;
+
         /*
          * Special case - lookup gave negative, but... we had foo/bar/
          * From the vfs_mknod() POV we just have a negative dentry -
@@ -3647,8 +3616,27 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
         return do_rmdir(AT_FDCWD, pathname);
  }
  
-int vfs_unlink(struct inode *dir, struct dentry *dentry)
+/**
+ * vfs_unlink - unlink a filesystem object
+ * @dir:       parent directory
+ * @dentry:    victim
+ * @delegated_inode: returns victim inode, if the inode is delegated.
+ *
+ * The caller must hold dir->i_mutex.
+ *
+ * If vfs_unlink discovers a delegation, it will return -EWOULDBLOCK and
+ * return a reference to the inode in delegated_inode.  The caller
+ * should then break the delegation on that inode and retry.  Because
+ * breaking a delegation may take a long time, the caller should drop
+ * dir->i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
  {
+       struct inode *target = dentry->d_inode;
         int error = may_delete(dir, dentry, 0);
  
         if (error)
@@ -3657,22 +3645,26 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
         if (!dir->i_op->unlink)
                 return -EPERM;
  
-       mutex_lock(&dentry->d_inode->i_mutex);
+       mutex_lock(&target->i_mutex);
         if (d_mountpoint(dentry))
                 error = -EBUSY;
         else {
                 error = security_inode_unlink(dir, dentry);
                 if (!error) {
+                       error = try_break_deleg(target, delegated_inode);
+                       if (error)
+                               goto out;
                         error = dir->i_op->unlink(dir, dentry);
                         if (!error)
                                 dont_mount(dentry);
                 }
         }
-       mutex_unlock(&dentry->d_inode->i_mutex);
+out:
+       mutex_unlock(&target->i_mutex);
  
         /* We don't d_delete() NFS sillyrenamed files--they still exist. */
         if (!error && !(dentry->d_flags & DCACHE_NFSFS_RENAMED)) {
-               fsnotify_link_count(dentry->d_inode);
+               fsnotify_link_count(target);
                 d_delete(dentry);
         }
  
@@ -3692,6 +3684,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
         struct dentry *dentry;
         struct nameidata nd;
         struct inode *inode = NULL;
+       struct inode *delegated_inode = NULL;
         unsigned int lookup_flags = 0;
  retry:
         name = user_path_parent(dfd, pathname, &nd, lookup_flags);
@@ -3706,7 +3699,7 @@ retry:
         error = mnt_want_write(nd.path.mnt);
         if (error)
                 goto exit1;
-
+retry_deleg:
         mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
         dentry = lookup_hash(&nd);
         error = PTR_ERR(dentry);
@@ -3715,19 +3708,25 @@ retry:
                 if (nd.last.name[nd.last.len])
                         goto slashes;
                 inode = dentry->d_inode;
-               if (!inode)
+               if (d_is_negative(dentry))
                         goto slashes;
                 ihold(inode);
                 error = security_path_unlink(&nd.path, dentry);
                 if (error)
                         goto exit2;
-               error = vfs_unlink(nd.path.dentry->d_inode, dentry);
+               error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode);
  exit2:
                 dput(dentry);
         }
         mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
         if (inode)
                 iput(inode);    /* truncate the inode here */
+       inode = NULL;
+       if (delegated_inode) {
+               error = break_deleg_wait(&delegated_inode);
+               if (!error)
+                       goto retry_deleg;
+       }
         mnt_drop_write(nd.path.mnt);
  exit1:
         path_put(&nd.path);
@@ -3740,8 +3739,12 @@ exit1:
         return error;
  
  slashes:
-       error = !dentry->d_inode ? -ENOENT :
-               S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
+       if (d_is_negative(dentry))
+               error = -ENOENT;
+       else if (d_is_directory(dentry) || d_is_autodir(dentry))
+               error = -EISDIR;
+       else
+               error = -ENOTDIR;
         goto exit2;
  }
  
@@ -3817,7 +3820,26 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
         return sys_symlinkat(oldname, AT_FDCWD, newname);
  }
  
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
+/**
+ * vfs_link - create a new link
+ * @old_dentry:        object to be linked
+ * @dir:       new parent
+ * @new_dentry:        where to create the new link
+ * @delegated_inode: returns inode needing a delegation break
+ *
+ * The caller must hold dir->i_mutex
+ *
+ * If vfs_link discovers a delegation on the to-be-linked file in need
+ * of breaking, it will return -EWOULDBLOCK and return a reference to the
+ * inode in delegated_inode.  The caller should then break the delegation
+ * and retry.  Because breaking a delegation may take a long time, the
+ * caller should drop the i_mutex before doing so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
+int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
  {
         struct inode *inode = old_dentry->d_inode;
         unsigned max_links = dir->i_sb->s_max_links;
@@ -3853,8 +3875,11 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
                 error =  -ENOENT;
         else if (max_links && inode->i_nlink >= max_links)
                 error = -EMLINK;
-       else
-               error = dir->i_op->link(old_dentry, dir, new_dentry);
+       else {
+               error = try_break_deleg(inode, delegated_inode);
+               if (!error)
+                       error = dir->i_op->link(old_dentry, dir, new_dentry);
+       }
  
         if (!error && (inode->i_state & I_LINKABLE)) {
                 spin_lock(&inode->i_lock);
@@ -3881,6 +3906,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
  {
         struct dentry *new_dentry;
         struct path old_path, new_path;
+       struct inode *delegated_inode = NULL;
         int how = 0;
         int error;
  
@@ -3919,9 +3945,14 @@ retry:
         error = security_path_link(old_path.dentry, &new_path, new_dentry);
         if (error)
                 goto out_dput;
-       error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
+       error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
  out_dput:
         done_path_create(&new_path, new_dentry);
+       if (delegated_inode) {
+               error = break_deleg_wait(&delegated_inode);
+               if (!error)
+                       goto retry;
+       }
         if (retry_estale(error, how)) {
                 how |= LOOKUP_REVAL;
                 goto retry;
@@ -3946,7 +3977,8 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
   *        That's where 4.4 screws up. Current fix: serialization on
   *        sb->s_vfs_rename_mutex. We might be more accurate, but that's another
   *        story.
- *     c) we have to lock _three_ objects - parents and victim (if it exists).
+ *     c) we have to lock _four_ objects - parents and victim (if it exists),
+ *        and source (if it is not a directory).
   *        And that - after we got ->i_mutex on parents (until then we don't know
   *        whether the target exists).  Solution: try to be smart with locking
   *        order for inodes.  We rely on the fact that tree topology may change
@@ -4019,9 +4051,11 @@ out:
  }
  
  static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
-                           struct inode *new_dir, struct dentry *new_dentry)
+                           struct inode *new_dir, struct dentry *new_dentry,
+                           struct inode **delegated_inode)
  {
         struct inode *target = new_dentry->d_inode;
+       struct inode *source = old_dentry->d_inode;
         int error;
  
         error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry);
@@ -4029,13 +4063,20 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
                 return error;
  
         dget(new_dentry);
-       if (target)
-               mutex_lock(&target->i_mutex);
+       lock_two_nondirectories(source, target);
  
         error = -EBUSY;
         if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
                 goto out;
  
+       error = try_break_deleg(source, delegated_inode);
+       if (error)
+               goto out;
+       if (target) {
+               error = try_break_deleg(target, delegated_inode);
+               if (error)
+                       goto out;
+       }
         error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
         if (error)
                 goto out;
@@ -4045,17 +4086,38 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
         if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
                 d_move(old_dentry, new_dentry);
  out:
-       if (target)
-               mutex_unlock(&target->i_mutex);
+       unlock_two_nondirectories(source, target);
         dput(new_dentry);
         return error;
  }
  
+/**
+ * vfs_rename - rename a filesystem object
+ * @old_dir:   parent of source
+ * @old_dentry:        source
+ * @new_dir:   parent of destination
+ * @new_dentry:        destination
+ * @delegated_inode: returns an inode needing a delegation break
+ *
+ * The caller must hold multiple mutexes--see lock_rename()).
+ *
+ * If vfs_rename discovers a delegation in need of breaking at either
+ * the source or destination, it will return -EWOULDBLOCK and return a
+ * reference to the inode in delegated_inode.  The caller should then
+ * break the delegation and retry.  Because breaking a delegation may
+ * take a long time, the caller should drop all locks before doing
+ * so.
+ *
+ * Alternatively, a caller may pass NULL for delegated_inode.  This may
+ * be appropriate for callers that expect the underlying filesystem not
+ * to be NFS exported.
+ */
  int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-              struct inode *new_dir, struct dentry *new_dentry)
+              struct inode *new_dir, struct dentry *new_dentry,
+              struct inode **delegated_inode)
  {
         int error;
-       int is_dir = S_ISDIR(old_dentry->d_inode->i_mode);
+       int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry);
         const unsigned char *old_name;
  
         if (old_dentry->d_inode == new_dentry->d_inode)
@@ -4080,7 +4142,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         if (is_dir)
                 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
         else
-               error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
+               error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode);
         if (!error)
                 fsnotify_move(old_dir, new_dir, old_name, is_dir,
                               new_dentry->d_inode, old_dentry);
@@ -4096,6 +4158,7 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
         struct dentry *old_dentry, *new_dentry;
         struct dentry *trap;
         struct nameidata oldnd, newnd;
+       struct inode *delegated_inode = NULL;
         struct filename *from;
         struct filename *to;
         unsigned int lookup_flags = 0;
@@ -4135,6 +4198,7 @@ retry:
         newnd.flags &= ~LOOKUP_PARENT;
         newnd.flags |= LOOKUP_RENAME_TARGET;
  
+retry_deleg:
         trap = lock_rename(new_dir, old_dir);
  
         old_dentry = lookup_hash(&oldnd);
@@ -4143,10 +4207,10 @@ retry:
                 goto exit3;
         /* source must exist */
         error = -ENOENT;
-       if (!old_dentry->d_inode)
+       if (d_is_negative(old_dentry))
                 goto exit4;
         /* unless the source is a directory trailing slashes give -ENOTDIR */
-       if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
+       if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) {
                 error = -ENOTDIR;
                 if (oldnd.last.name[oldnd.last.len])
                         goto exit4;
@@ -4171,13 +4235,19 @@ retry:
         if (error)
                 goto exit5;
         error = vfs_rename(old_dir->d_inode, old_dentry,
-                                  new_dir->d_inode, new_dentry);
+                                  new_dir->d_inode, new_dentry,
+                                  &delegated_inode);
  exit5:
         dput(new_dentry);
  exit4:
         dput(old_dentry);
  exit3:
         unlock_rename(new_dir, old_dir);
+       if (delegated_inode) {
+               error = break_deleg_wait(&delegated_inode);
+               if (!error)
+                       goto retry_deleg;
+       }
         mnt_drop_write(oldnd.path.mnt);
  exit2:
         if (retry_estale(error, lookup_flags))