Btrfs: use insert_inode_locked4 for inode creation
authorChris Mason <clm@fb.com>
Mon, 8 Sep 2014 20:08:51 +0000 (13:08 -0700)
committerChris Mason <clm@fb.com>
Mon, 8 Sep 2014 20:56:45 +0000 (13:56 -0700)
Btrfs was inserting inodes into the hash table before we had fully
set the inode up on disk.  This leaves us open to rare races that allow
two different inodes in memory for the same [root, inode] pair.

This patch fixes things by using insert_inode_locked4 to insert an I_NEW
inode and unlock_new_inode when we're ready for the rest of the kernel
to use the inode.

It also makes sure to init the operations pointers on the inode before
going into the error handling paths.

Signed-off-by: Chris Mason <clm@fb.com>
Reported-by: Al Viro <viro@zeniv.linux.org.uk>
fs/btrfs/inode.c

index 88823f4..214b936 100644 (file)
@@ -5634,6 +5634,17 @@ int btrfs_set_inode_index(struct inode *dir, u64 *index)
        return ret;
 }
 
+static int btrfs_insert_inode_locked(struct inode *inode)
+{
+       struct btrfs_iget_args args;
+       args.location = &BTRFS_I(inode)->location;
+       args.root = BTRFS_I(inode)->root;
+
+       return insert_inode_locked4(inode,
+                  btrfs_inode_hash(inode->i_ino, BTRFS_I(inode)->root),
+                  btrfs_find_actor, &args);
+}
+
 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                                     struct btrfs_root *root,
                                     struct inode *dir,
@@ -5726,10 +5737,19 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                sizes[1] = name_len + sizeof(*ref);
        }
 
+       location = &BTRFS_I(inode)->location;
+       location->objectid = objectid;
+       location->offset = 0;
+       btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
+
+       ret = btrfs_insert_inode_locked(inode);
+       if (ret < 0)
+               goto fail;
+
        path->leave_spinning = 1;
        ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);
        if (ret != 0)
-               goto fail;
+               goto fail_unlock;
 
        inode_init_owner(inode, dir, mode);
        inode_set_bytes(inode, 0);
@@ -5752,11 +5772,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        btrfs_mark_buffer_dirty(path->nodes[0]);
        btrfs_free_path(path);
 
-       location = &BTRFS_I(inode)->location;
-       location->objectid = objectid;
-       location->offset = 0;
-       btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
-
        btrfs_inherit_iflags(inode, dir);
 
        if (S_ISREG(mode)) {
@@ -5767,7 +5782,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                                BTRFS_INODE_NODATASUM;
        }
 
-       btrfs_insert_inode_hash(inode);
        inode_tree_add(inode);
 
        trace_btrfs_inode_new(inode);
@@ -5782,6 +5796,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
                          btrfs_ino(inode), root->root_key.objectid, ret);
 
        return inode;
+
+fail_unlock:
+       unlock_new_inode(inode);
 fail:
        if (dir && name)
                BTRFS_I(dir)->index_cnt--;
@@ -5916,28 +5933,28 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
 
-       err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
-       if (err) {
-               drop_inode = 1;
-               goto out_unlock;
-       }
-
        /*
        * If the active LSM wants to access the inode during
        * d_instantiate it needs these. Smack checks to see
        * if the filesystem supports xattrs by looking at the
        * ops vector.
        */
-
        inode->i_op = &btrfs_special_inode_operations;
-       err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+       init_special_inode(inode, inode->i_mode, rdev);
+
+       err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
        if (err)
-               drop_inode = 1;
-       else {
-               init_special_inode(inode, inode->i_mode, rdev);
+               goto out_unlock_inode;
+
+       err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
+       if (err) {
+               goto out_unlock_inode;
+       } else {
                btrfs_update_inode(trans, root, inode);
+               unlock_new_inode(inode);
                d_instantiate(dentry, inode);
        }
+
 out_unlock:
        btrfs_end_transaction(trans, root);
        btrfs_balance_delayed_items(root);
@@ -5947,6 +5964,12 @@ out_unlock:
                iput(inode);
        }
        return err;
+
+out_unlock_inode:
+       drop_inode = 1;
+       unlock_new_inode(inode);
+       goto out_unlock;
+
 }
 
 static int btrfs_create(struct inode *dir, struct dentry *dentry,
@@ -5981,15 +6004,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
        drop_inode_on_err = 1;
-
-       err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
-       if (err)
-               goto out_unlock;
-
-       err = btrfs_update_inode(trans, root, inode);
-       if (err)
-               goto out_unlock;
-
        /*
        * If the active LSM wants to access the inode during
        * d_instantiate it needs these. Smack checks to see
@@ -5998,14 +6012,23 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
        */
        inode->i_fop = &btrfs_file_operations;
        inode->i_op = &btrfs_file_inode_operations;
+       inode->i_mapping->a_ops = &btrfs_aops;
+       inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+
+       err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
+       if (err)
+               goto out_unlock_inode;
+
+       err = btrfs_update_inode(trans, root, inode);
+       if (err)
+               goto out_unlock_inode;
 
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
-               goto out_unlock;
+               goto out_unlock_inode;
 
-       inode->i_mapping->a_ops = &btrfs_aops;
-       inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
        BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+       unlock_new_inode(inode);
        d_instantiate(dentry, inode);
 
 out_unlock:
@@ -6017,6 +6040,11 @@ out_unlock:
        btrfs_balance_delayed_items(root);
        btrfs_btree_balance_dirty(root);
        return err;
+
+out_unlock_inode:
+       unlock_new_inode(inode);
+       goto out_unlock;
+
 }
 
 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
@@ -6124,25 +6152,30 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        }
 
        drop_on_err = 1;
+       /* these must be set before we unlock the inode */
+       inode->i_op = &btrfs_dir_inode_operations;
+       inode->i_fop = &btrfs_dir_file_operations;
 
        err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
        if (err)
-               goto out_fail;
-
-       inode->i_op = &btrfs_dir_inode_operations;
-       inode->i_fop = &btrfs_dir_file_operations;
+               goto out_fail_inode;
 
        btrfs_i_size_write(inode, 0);
        err = btrfs_update_inode(trans, root, inode);
        if (err)
-               goto out_fail;
+               goto out_fail_inode;
 
        err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
                             dentry->d_name.len, 0, index);
        if (err)
-               goto out_fail;
+               goto out_fail_inode;
 
        d_instantiate(dentry, inode);
+       /*
+        * mkdir is special.  We're unlocking after we call d_instantiate
+        * to avoid a race with nfsd calling d_instantiate.
+        */
+       unlock_new_inode(inode);
        drop_on_err = 0;
 
 out_fail:
@@ -6152,6 +6185,10 @@ out_fail:
        btrfs_balance_delayed_items(root);
        btrfs_btree_balance_dirty(root);
        return err;
+
+out_fail_inode:
+       unlock_new_inode(inode);
+       goto out_fail;
 }
 
 /* helper for btfs_get_extent.  Given an existing extent in the tree,
@@ -8107,6 +8144,7 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,
 
        set_nlink(inode, 1);
        btrfs_i_size_write(inode, 0);
+       unlock_new_inode(inode);
 
        err = btrfs_subvol_inherit_props(trans, new_root, parent_root);
        if (err)
@@ -8757,12 +8795,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
                goto out_unlock;
        }
 
-       err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
-       if (err) {
-               drop_inode = 1;
-               goto out_unlock;
-       }
-
        /*
        * If the active LSM wants to access the inode during
        * d_instantiate it needs these. Smack checks to see
@@ -8771,23 +8803,22 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        */
        inode->i_fop = &btrfs_file_operations;
        inode->i_op = &btrfs_file_inode_operations;
+       inode->i_mapping->a_ops = &btrfs_aops;
+       inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
+       BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
+
+       err = btrfs_init_inode_security(trans, inode, dir, &dentry->d_name);
+       if (err)
+               goto out_unlock_inode;
 
        err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
        if (err)
-               drop_inode = 1;
-       else {
-               inode->i_mapping->a_ops = &btrfs_aops;
-               inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
-               BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
-       }
-       if (drop_inode)
-               goto out_unlock;
+               goto out_unlock_inode;
 
        path = btrfs_alloc_path();
        if (!path) {
                err = -ENOMEM;
-               drop_inode = 1;
-               goto out_unlock;
+               goto out_unlock_inode;
        }
        key.objectid = btrfs_ino(inode);
        key.offset = 0;
@@ -8796,9 +8827,8 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        err = btrfs_insert_empty_item(trans, root, path, &key,
                                      datasize);
        if (err) {
-               drop_inode = 1;
                btrfs_free_path(path);
-               goto out_unlock;
+               goto out_unlock_inode;
        }
        leaf = path->nodes[0];
        ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -8822,12 +8852,15 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
        inode_set_bytes(inode, name_len);
        btrfs_i_size_write(inode, name_len);
        err = btrfs_update_inode(trans, root, inode);
-       if (err)
+       if (err) {
                drop_inode = 1;
+               goto out_unlock_inode;
+       }
+
+       unlock_new_inode(inode);
+       d_instantiate(dentry, inode);
 
 out_unlock:
-       if (!err)
-               d_instantiate(dentry, inode);
        btrfs_end_transaction(trans, root);
        if (drop_inode) {
                inode_dec_link_count(inode);
@@ -8835,6 +8868,11 @@ out_unlock:
        }
        btrfs_btree_balance_dirty(root);
        return err;
+
+out_unlock_inode:
+       drop_inode = 1;
+       unlock_new_inode(inode);
+       goto out_unlock;
 }
 
 static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -9018,14 +9056,6 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
                goto out;
        }
 
-       ret = btrfs_init_inode_security(trans, inode, dir, NULL);
-       if (ret)
-               goto out;
-
-       ret = btrfs_update_inode(trans, root, inode);
-       if (ret)
-               goto out;
-
        inode->i_fop = &btrfs_file_operations;
        inode->i_op = &btrfs_file_inode_operations;
 
@@ -9033,9 +9063,16 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
        inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
        BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
 
+       ret = btrfs_init_inode_security(trans, inode, dir, NULL);
+       if (ret)
+               goto out_inode;
+
+       ret = btrfs_update_inode(trans, root, inode);
+       if (ret)
+               goto out_inode;
        ret = btrfs_orphan_add(trans, inode);
        if (ret)
-               goto out;
+               goto out_inode;
 
        /*
         * We set number of links to 0 in btrfs_new_inode(), and here we set
@@ -9045,6 +9082,7 @@ static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
         *    d_tmpfile() -> inode_dec_link_count() -> drop_nlink()
         */
        set_nlink(inode, 1);
+       unlock_new_inode(inode);
        d_tmpfile(dentry, inode);
        mark_inode_dirty(inode);
 
@@ -9054,8 +9092,12 @@ out:
                iput(inode);
        btrfs_balance_delayed_items(root);
        btrfs_btree_balance_dirty(root);
-
        return ret;
+
+out_inode:
+       unlock_new_inode(inode);
+       goto out;
+
 }
 
 static const struct inode_operations btrfs_dir_inode_operations = {