Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[cascardo/linux.git] / fs / btrfs / ioctl.c
index a6d8efa..0401397 100644 (file)
 #include "props.h"
 #include "sysfs.h"
 
+#ifdef CONFIG_64BIT
+/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
+ * structures are incorrect, as the timespec structure from userspace
+ * is 4 bytes too small. We define these alternatives here to teach
+ * the kernel about the 32-bit struct packing.
+ */
+struct btrfs_ioctl_timespec_32 {
+       __u64 sec;
+       __u32 nsec;
+} __attribute__ ((__packed__));
+
+struct btrfs_ioctl_received_subvol_args_32 {
+       char    uuid[BTRFS_UUID_SIZE];  /* in */
+       __u64   stransid;               /* in */
+       __u64   rtransid;               /* out */
+       struct btrfs_ioctl_timespec_32 stime; /* in */
+       struct btrfs_ioctl_timespec_32 rtime; /* out */
+       __u64   flags;                  /* in */
+       __u64   reserved[16];           /* in */
+} __attribute__ ((__packed__));
+
+#define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \
+                               struct btrfs_ioctl_received_subvol_args_32)
+#endif
+
+
 static int btrfs_clone(struct inode *src, struct inode *inode,
                       u64 off, u64 olen, u64 olen_aligned, u64 destoff);
 
@@ -585,6 +611,23 @@ fail:
        return ret;
 }
 
+static void btrfs_wait_nocow_write(struct btrfs_root *root)
+{
+       s64 writers;
+       DEFINE_WAIT(wait);
+
+       do {
+               prepare_to_wait(&root->subv_writers->wait, &wait,
+                               TASK_UNINTERRUPTIBLE);
+
+               writers = percpu_counter_sum(&root->subv_writers->counter);
+               if (writers)
+                       schedule();
+
+               finish_wait(&root->subv_writers->wait, &wait);
+       } while (writers);
+}
+
 static int create_snapshot(struct btrfs_root *root, struct inode *dir,
                           struct dentry *dentry, char *name, int namelen,
                           u64 *async_transid, bool readonly,
@@ -598,15 +641,21 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
        if (!root->ref_cows)
                return -EINVAL;
 
+       atomic_inc(&root->will_be_snapshoted);
+       smp_mb__after_atomic_inc();
+       btrfs_wait_nocow_write(root);
+
        ret = btrfs_start_delalloc_inodes(root, 0);
        if (ret)
-               return ret;
+               goto out;
 
        btrfs_wait_ordered_extents(root, -1);
 
        pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
-       if (!pending_snapshot)
-               return -ENOMEM;
+       if (!pending_snapshot) {
+               ret = -ENOMEM;
+               goto out;
+       }
 
        btrfs_init_block_rsv(&pending_snapshot->block_rsv,
                             BTRFS_BLOCK_RSV_TEMP);
@@ -623,7 +672,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
                                        &pending_snapshot->qgroup_reserved,
                                        false);
        if (ret)
-               goto out;
+               goto free;
 
        pending_snapshot->dentry = dentry;
        pending_snapshot->root = root;
@@ -674,8 +723,10 @@ fail:
        btrfs_subvolume_release_metadata(BTRFS_I(dir)->root,
                                         &pending_snapshot->block_rsv,
                                         pending_snapshot->qgroup_reserved);
-out:
+free:
        kfree(pending_snapshot);
+out:
+       atomic_dec(&root->will_be_snapshoted);
        return ret;
 }
 
@@ -884,12 +935,14 @@ static int find_new_extents(struct btrfs_root *root,
        min_key.type = BTRFS_EXTENT_DATA_KEY;
        min_key.offset = *off;
 
-       path->keep_locks = 1;
-
        while (1) {
+               path->keep_locks = 1;
                ret = btrfs_search_forward(root, &min_key, path, newer_than);
                if (ret != 0)
                        goto none;
+               path->keep_locks = 0;
+               btrfs_unlock_up_safe(path, 1);
+process_slot:
                if (min_key.objectid != ino)
                        goto none;
                if (min_key.type != BTRFS_EXTENT_DATA_KEY)
@@ -908,6 +961,12 @@ static int find_new_extents(struct btrfs_root *root,
                        return 0;
                }
 
+               path->slots[0]++;
+               if (path->slots[0] < btrfs_header_nritems(leaf)) {
+                       btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]);
+                       goto process_slot;
+               }
+
                if (min_key.offset == (u64)-1)
                        goto none;
 
@@ -935,10 +994,13 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
        read_unlock(&em_tree->lock);
 
        if (!em) {
+               struct extent_state *cached = NULL;
+               u64 end = start + len - 1;
+
                /* get the big lock and read metadata off disk */
-               lock_extent(io_tree, start, start + len - 1);
+               lock_extent_bits(io_tree, start, end, 0, &cached);
                em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
-               unlock_extent(io_tree, start, start + len - 1);
+               unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS);
 
                if (IS_ERR(em))
                        return NULL;
@@ -957,7 +1019,8 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)
                return false;
 
        next = defrag_lookup_extent(inode, em->start + em->len);
-       if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE)
+       if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE ||
+           (em->block_start + em->block_len == next->block_start))
                ret = false;
 
        free_extent_map(next);
@@ -1076,10 +1139,12 @@ again:
                page_start = page_offset(page);
                page_end = page_start + PAGE_CACHE_SIZE - 1;
                while (1) {
-                       lock_extent(tree, page_start, page_end);
+                       lock_extent_bits(tree, page_start, page_end,
+                                        0, &cached_state);
                        ordered = btrfs_lookup_ordered_extent(inode,
                                                              page_start);
-                       unlock_extent(tree, page_start, page_end);
+                       unlock_extent_cached(tree, page_start, page_end,
+                                            &cached_state, GFP_NOFS);
                        if (!ordered)
                                break;
 
@@ -1356,8 +1421,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                }
        }
 
-       if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO))
+       if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) {
                filemap_flush(inode->i_mapping);
+               if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT,
+                            &BTRFS_I(inode)->runtime_flags))
+                       filemap_flush(inode->i_mapping);
+       }
 
        if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {
                /* the filemap_flush will queue IO into the worker threads, but
@@ -1573,7 +1642,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
                if (src_inode->i_sb != file_inode(file)->i_sb) {
                        btrfs_info(BTRFS_I(src_inode)->root->fs_info,
                                   "Snapshot src from another FS");
-                       ret = -EINVAL;
+                       ret = -EXDEV;
                } else if (!inode_owner_or_capable(src_inode)) {
                        /*
                         * Subvolume creation is not restricted, but snapshots
@@ -1797,7 +1866,9 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)
        if (di && !IS_ERR(di)) {
                btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
                if (key.objectid == root->root_key.objectid) {
-                       ret = -ENOTEMPTY;
+                       ret = -EPERM;
+                       btrfs_err(root->fs_info, "deleting default subvolume "
+                                 "%llu is not allowed", key.objectid);
                        goto out;
                }
                btrfs_release_path(path);
@@ -2994,8 +3065,9 @@ process_slot:
                                                         new_key.offset + datal,
                                                         1);
                                if (ret) {
-                                       btrfs_abort_transaction(trans, root,
-                                                               ret);
+                                       if (ret != -EINVAL)
+                                               btrfs_abort_transaction(trans,
+                                                               root, ret);
                                        btrfs_end_transaction(trans, root);
                                        goto out;
                                }
@@ -3153,8 +3225,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
         *   decompress into destination's address_space (the file offset
         *   may change, so source mapping won't do), then recompress (or
         *   otherwise reinsert) a subrange.
-        * - allow ranges within the same file to be cloned (provided
-        *   they don't overlap)?
+        *
+        * - split destination inode's inline extents.  The inline extents can
+        *   be either compressed or non-compressed.
         */
 
        /* the destination must be opened for writing */
@@ -4353,10 +4426,9 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
        return btrfs_qgroup_wait_for_completion(root->fs_info);
 }
 
-static long btrfs_ioctl_set_received_subvol(struct file *file,
-                                           void __user *arg)
+static long _btrfs_ioctl_set_received_subvol(struct file *file,
+                                           struct btrfs_ioctl_received_subvol_args *sa)
 {
-       struct btrfs_ioctl_received_subvol_args *sa = NULL;
        struct inode *inode = file_inode(file);
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_root_item *root_item = &root->root_item;
@@ -4384,13 +4456,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
                goto out;
        }
 
-       sa = memdup_user(arg, sizeof(*sa));
-       if (IS_ERR(sa)) {
-               ret = PTR_ERR(sa);
-               sa = NULL;
-               goto out;
-       }
-
        /*
         * 1 - root item
         * 2 - uuid items (received uuid + subvol uuid)
@@ -4444,14 +4509,91 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
                goto out;
        }
 
+out:
+       up_write(&root->fs_info->subvol_sem);
+       mnt_drop_write_file(file);
+       return ret;
+}
+
+#ifdef CONFIG_64BIT
+static long btrfs_ioctl_set_received_subvol_32(struct file *file,
+                                               void __user *arg)
+{
+       struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL;
+       struct btrfs_ioctl_received_subvol_args *args64 = NULL;
+       int ret = 0;
+
+       args32 = memdup_user(arg, sizeof(*args32));
+       if (IS_ERR(args32)) {
+               ret = PTR_ERR(args32);
+               args32 = NULL;
+               goto out;
+       }
+
+       args64 = kmalloc(sizeof(*args64), GFP_NOFS);
+       if (IS_ERR(args64)) {
+               ret = PTR_ERR(args64);
+               args64 = NULL;
+               goto out;
+       }
+
+       memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE);
+       args64->stransid = args32->stransid;
+       args64->rtransid = args32->rtransid;
+       args64->stime.sec = args32->stime.sec;
+       args64->stime.nsec = args32->stime.nsec;
+       args64->rtime.sec = args32->rtime.sec;
+       args64->rtime.nsec = args32->rtime.nsec;
+       args64->flags = args32->flags;
+
+       ret = _btrfs_ioctl_set_received_subvol(file, args64);
+       if (ret)
+               goto out;
+
+       memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE);
+       args32->stransid = args64->stransid;
+       args32->rtransid = args64->rtransid;
+       args32->stime.sec = args64->stime.sec;
+       args32->stime.nsec = args64->stime.nsec;
+       args32->rtime.sec = args64->rtime.sec;
+       args32->rtime.nsec = args64->rtime.nsec;
+       args32->flags = args64->flags;
+
+       ret = copy_to_user(arg, args32, sizeof(*args32));
+       if (ret)
+               ret = -EFAULT;
+
+out:
+       kfree(args32);
+       kfree(args64);
+       return ret;
+}
+#endif
+
+static long btrfs_ioctl_set_received_subvol(struct file *file,
+                                           void __user *arg)
+{
+       struct btrfs_ioctl_received_subvol_args *sa = NULL;
+       int ret = 0;
+
+       sa = memdup_user(arg, sizeof(*sa));
+       if (IS_ERR(sa)) {
+               ret = PTR_ERR(sa);
+               sa = NULL;
+               goto out;
+       }
+
+       ret = _btrfs_ioctl_set_received_subvol(file, sa);
+
+       if (ret)
+               goto out;
+
        ret = copy_to_user(arg, sa, sizeof(*sa));
        if (ret)
                ret = -EFAULT;
 
 out:
        kfree(sa);
-       up_write(&root->fs_info->subvol_sem);
-       mnt_drop_write_file(file);
        return ret;
 }
 
@@ -4746,7 +4888,7 @@ long btrfs_ioctl(struct file *file, unsigned int
        case BTRFS_IOC_SYNC: {
                int ret;
 
-               ret = btrfs_start_delalloc_roots(root->fs_info, 0);
+               ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1);
                if (ret)
                        return ret;
                ret = btrfs_sync_fs(file->f_dentry->d_sb, 1);
@@ -4770,6 +4912,10 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_balance_progress(root, argp);
        case BTRFS_IOC_SET_RECEIVED_SUBVOL:
                return btrfs_ioctl_set_received_subvol(file, argp);
+#ifdef CONFIG_64BIT
+       case BTRFS_IOC_SET_RECEIVED_SUBVOL_32:
+               return btrfs_ioctl_set_received_subvol_32(file, argp);
+#endif
        case BTRFS_IOC_SEND:
                return btrfs_ioctl_send(file, argp);
        case BTRFS_IOC_GET_DEV_STATS: