Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 11 Apr 2014 21:16:53 +0000 (14:16 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 11 Apr 2014 21:16:53 +0000 (14:16 -0700)
Pull second set of btrfs updates from Chris Mason:
 "The most important changes here are from Josef, fixing a btrfs
  regression in 3.14 that can cause corruptions in the extent allocation
  tree when snapshots are in use.

  Josef also fixed some deadlocks in send/recv and other assorted races
  when balance is running"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: (23 commits)
  Btrfs: fix compile warnings on on avr32 platform
  btrfs: allow mounting btrfs subvolumes with different ro/rw options
  btrfs: export global block reserve size as space_info
  btrfs: fix crash in remount(thread_pool=) case
  Btrfs: abort the transaction when we don't find our extent ref
  Btrfs: fix EINVAL checks in btrfs_clone
  Btrfs: fix unlock in __start_delalloc_inodes()
  Btrfs: scrub raid56 stripes in the right way
  Btrfs: don't compress for a small write
  Btrfs: more efficient io tree navigation on wait_extent_bit
  Btrfs: send, build path string only once in send_hole
  btrfs: filter invalid arg for btrfs resize
  Btrfs: send, fix data corruption due to incorrect hole detection
  Btrfs: kmalloc() doesn't return an ERR_PTR
  Btrfs: fix snapshot vs nocow writting
  btrfs: Change the expanding write sequence to fix snapshot related bug.
  btrfs: make device scan less noisy
  btrfs: fix lockdep warning with reclaim lock inversion
  Btrfs: hold the commit_root_sem when getting the commit root during send
  Btrfs: remove transaction from send
  ...

19 files changed:
fs/btrfs/async-thread.c
fs/btrfs/backref.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file.c
fs/btrfs/inode-map.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/relocation.c
fs/btrfs/scrub.c
fs/btrfs/send.c
fs/btrfs/super.c
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/volumes.c

index ecb5832..5a201d8 100644 (file)
@@ -323,6 +323,8 @@ void btrfs_destroy_workqueue(struct btrfs_workqueue *wq)
 
 void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max)
 {
+       if (!wq)
+               return;
        wq->normal->max_active = max;
        if (wq->high)
                wq->high->max_active = max;
index aad7201..10db21f 100644 (file)
@@ -330,7 +330,10 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
                goto out;
        }
 
-       root_level = btrfs_old_root_level(root, time_seq);
+       if (path->search_commit_root)
+               root_level = btrfs_header_level(root->commit_root);
+       else
+               root_level = btrfs_old_root_level(root, time_seq);
 
        if (root_level + 1 == level) {
                srcu_read_unlock(&fs_info->subvol_srcu, index);
@@ -1099,9 +1102,9 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans,
  *
  * returns 0 on success, < 0 on error.
  */
-int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
-                               struct btrfs_fs_info *fs_info, u64 bytenr,
-                               u64 time_seq, struct ulist **roots)
+static int __btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+                                 struct btrfs_fs_info *fs_info, u64 bytenr,
+                                 u64 time_seq, struct ulist **roots)
 {
        struct ulist *tmp;
        struct ulist_node *node = NULL;
@@ -1137,6 +1140,20 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
        return 0;
 }
 
+int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
+                        struct btrfs_fs_info *fs_info, u64 bytenr,
+                        u64 time_seq, struct ulist **roots)
+{
+       int ret;
+
+       if (!trans)
+               down_read(&fs_info->commit_root_sem);
+       ret = __btrfs_find_all_roots(trans, fs_info, bytenr, time_seq, roots);
+       if (!trans)
+               up_read(&fs_info->commit_root_sem);
+       return ret;
+}
+
 /*
  * this makes the path point to (inum INODE_ITEM ioff)
  */
@@ -1516,6 +1533,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
                if (IS_ERR(trans))
                        return PTR_ERR(trans);
                btrfs_get_tree_mod_seq(fs_info, &tree_mod_seq_elem);
+       } else {
+               down_read(&fs_info->commit_root_sem);
        }
 
        ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid,
@@ -1526,8 +1545,8 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
 
        ULIST_ITER_INIT(&ref_uiter);
        while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) {
-               ret = btrfs_find_all_roots(trans, fs_info, ref_node->val,
-                                          tree_mod_seq_elem.seq, &roots);
+               ret = __btrfs_find_all_roots(trans, fs_info, ref_node->val,
+                                            tree_mod_seq_elem.seq, &roots);
                if (ret)
                        break;
                ULIST_ITER_INIT(&root_uiter);
@@ -1549,6 +1568,8 @@ out:
        if (!search_commit_root) {
                btrfs_put_tree_mod_seq(fs_info, &tree_mod_seq_elem);
                btrfs_end_transaction(trans, fs_info->extent_root);
+       } else {
+               up_read(&fs_info->commit_root_sem);
        }
 
        return ret;
index 88d1b1e..1bcfcdb 100644 (file)
@@ -2769,9 +2769,13 @@ again:
                 * the commit roots are read only
                 * so we always do read locks
                 */
+               if (p->need_commit_sem)
+                       down_read(&root->fs_info->commit_root_sem);
                b = root->commit_root;
                extent_buffer_get(b);
                level = btrfs_header_level(b);
+               if (p->need_commit_sem)
+                       up_read(&root->fs_info->commit_root_sem);
                if (!p->skip_locking)
                        btrfs_tree_read_lock(b);
        } else {
@@ -5360,7 +5364,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
 {
        int ret;
        int cmp;
-       struct btrfs_trans_handle *trans = NULL;
        struct btrfs_path *left_path = NULL;
        struct btrfs_path *right_path = NULL;
        struct btrfs_key left_key;
@@ -5378,9 +5381,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
        u64 right_blockptr;
        u64 left_gen;
        u64 right_gen;
-       u64 left_start_ctransid;
-       u64 right_start_ctransid;
-       u64 ctransid;
 
        left_path = btrfs_alloc_path();
        if (!left_path) {
@@ -5404,21 +5404,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
        right_path->search_commit_root = 1;
        right_path->skip_locking = 1;
 
-       spin_lock(&left_root->root_item_lock);
-       left_start_ctransid = btrfs_root_ctransid(&left_root->root_item);
-       spin_unlock(&left_root->root_item_lock);
-
-       spin_lock(&right_root->root_item_lock);
-       right_start_ctransid = btrfs_root_ctransid(&right_root->root_item);
-       spin_unlock(&right_root->root_item_lock);
-
-       trans = btrfs_join_transaction(left_root);
-       if (IS_ERR(trans)) {
-               ret = PTR_ERR(trans);
-               trans = NULL;
-               goto out;
-       }
-
        /*
         * Strategy: Go to the first items of both trees. Then do
         *
@@ -5455,6 +5440,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
         *   the right if possible or go up and right.
         */
 
+       down_read(&left_root->fs_info->commit_root_sem);
        left_level = btrfs_header_level(left_root->commit_root);
        left_root_level = left_level;
        left_path->nodes[left_level] = left_root->commit_root;
@@ -5464,6 +5450,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
        right_root_level = right_level;
        right_path->nodes[right_level] = right_root->commit_root;
        extent_buffer_get(right_path->nodes[right_level]);
+       up_read(&left_root->fs_info->commit_root_sem);
 
        if (left_level == 0)
                btrfs_item_key_to_cpu(left_path->nodes[left_level],
@@ -5482,67 +5469,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
        advance_left = advance_right = 0;
 
        while (1) {
-               /*
-                * We need to make sure the transaction does not get committed
-                * while we do anything on commit roots. This means, we need to
-                * join and leave transactions for every item that we process.
-                */
-               if (trans && btrfs_should_end_transaction(trans, left_root)) {
-                       btrfs_release_path(left_path);
-                       btrfs_release_path(right_path);
-
-                       ret = btrfs_end_transaction(trans, left_root);
-                       trans = NULL;
-                       if (ret < 0)
-                               goto out;
-               }
-               /* now rejoin the transaction */
-               if (!trans) {
-                       trans = btrfs_join_transaction(left_root);
-                       if (IS_ERR(trans)) {
-                               ret = PTR_ERR(trans);
-                               trans = NULL;
-                               goto out;
-                       }
-
-                       spin_lock(&left_root->root_item_lock);
-                       ctransid = btrfs_root_ctransid(&left_root->root_item);
-                       spin_unlock(&left_root->root_item_lock);
-                       if (ctransid != left_start_ctransid)
-                               left_start_ctransid = 0;
-
-                       spin_lock(&right_root->root_item_lock);
-                       ctransid = btrfs_root_ctransid(&right_root->root_item);
-                       spin_unlock(&right_root->root_item_lock);
-                       if (ctransid != right_start_ctransid)
-                               right_start_ctransid = 0;
-
-                       if (!left_start_ctransid || !right_start_ctransid) {
-                               WARN(1, KERN_WARNING
-                                       "BTRFS: btrfs_compare_tree detected "
-                                       "a change in one of the trees while "
-                                       "iterating. This is probably a "
-                                       "bug.\n");
-                               ret = -EIO;
-                               goto out;
-                       }
-
-                       /*
-                        * the commit root may have changed, so start again
-                        * where we stopped
-                        */
-                       left_path->lowest_level = left_level;
-                       right_path->lowest_level = right_level;
-                       ret = btrfs_search_slot(NULL, left_root,
-                                       &left_key, left_path, 0, 0);
-                       if (ret < 0)
-                               goto out;
-                       ret = btrfs_search_slot(NULL, right_root,
-                                       &right_key, right_path, 0, 0);
-                       if (ret < 0)
-                               goto out;
-               }
-
                if (advance_left && !left_end_reached) {
                        ret = tree_advance(left_root, left_path, &left_level,
                                        left_root_level,
@@ -5672,14 +5598,6 @@ out:
        btrfs_free_path(left_path);
        btrfs_free_path(right_path);
        kfree(tmp_buf);
-
-       if (trans) {
-               if (!ret)
-                       ret = btrfs_end_transaction(trans, left_root);
-               else
-                       btrfs_end_transaction(trans, left_root);
-       }
-
        return ret;
 }
 
index bc96c03..4c48df5 100644 (file)
@@ -609,6 +609,7 @@ struct btrfs_path {
        unsigned int skip_locking:1;
        unsigned int leave_spinning:1;
        unsigned int search_commit_root:1;
+       unsigned int need_commit_sem:1;
 };
 
 /*
@@ -986,7 +987,8 @@ struct btrfs_dev_replace_item {
 #define BTRFS_BLOCK_GROUP_RAID10       (1ULL << 6)
 #define BTRFS_BLOCK_GROUP_RAID5         (1ULL << 7)
 #define BTRFS_BLOCK_GROUP_RAID6         (1ULL << 8)
-#define BTRFS_BLOCK_GROUP_RESERVED     BTRFS_AVAIL_ALLOC_BIT_SINGLE
+#define BTRFS_BLOCK_GROUP_RESERVED     (BTRFS_AVAIL_ALLOC_BIT_SINGLE | \
+                                        BTRFS_SPACE_INFO_GLOBAL_RSV)
 
 enum btrfs_raid_types {
        BTRFS_RAID_RAID10,
@@ -1018,6 +1020,12 @@ enum btrfs_raid_types {
  */
 #define BTRFS_AVAIL_ALLOC_BIT_SINGLE   (1ULL << 48)
 
+/*
+ * A fake block group type that is used to communicate global block reserve
+ * size to userspace via the SPACE_INFO ioctl.
+ */
+#define BTRFS_SPACE_INFO_GLOBAL_RSV    (1ULL << 49)
+
 #define BTRFS_EXTENDED_PROFILE_MASK    (BTRFS_BLOCK_GROUP_PROFILE_MASK | \
                                         BTRFS_AVAIL_ALLOC_BIT_SINGLE)
 
@@ -1440,7 +1448,7 @@ struct btrfs_fs_info {
         */
        struct mutex ordered_extent_flush_mutex;
 
-       struct rw_semaphore extent_commit_sem;
+       struct rw_semaphore commit_root_sem;
 
        struct rw_semaphore cleanup_work_sem;
 
@@ -1711,7 +1719,6 @@ struct btrfs_root {
        struct btrfs_block_rsv *block_rsv;
 
        /* free ino cache stuff */
-       struct mutex fs_commit_mutex;
        struct btrfs_free_space_ctl *free_ino_ctl;
        enum btrfs_caching_type cached;
        spinlock_t cache_lock;
index bd0f752..029d46c 100644 (file)
@@ -329,6 +329,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
 {
        struct extent_state *cached_state = NULL;
        int ret;
+       bool need_lock = (current->journal_info ==
+                         (void *)BTRFS_SEND_TRANS_STUB);
 
        if (!parent_transid || btrfs_header_generation(eb) == parent_transid)
                return 0;
@@ -336,6 +338,11 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
        if (atomic)
                return -EAGAIN;
 
+       if (need_lock) {
+               btrfs_tree_read_lock(eb);
+               btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+       }
+
        lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1,
                         0, &cached_state);
        if (extent_buffer_uptodate(eb) &&
@@ -347,10 +354,21 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
                       "found %llu\n",
                       eb->start, parent_transid, btrfs_header_generation(eb));
        ret = 1;
-       clear_extent_buffer_uptodate(eb);
+
+       /*
+        * Things reading via commit roots that don't have normal protection,
+        * like send, can have a really old block in cache that may point at a
+        * block that has been free'd and re-allocated.  So don't clear uptodate
+        * if we find an eb that is under IO (dirty/writeback) because we could
+        * end up reading in the stale data and then writing it back out and
+        * making everybody very sad.
+        */
+       if (!extent_buffer_under_io(eb))
+               clear_extent_buffer_uptodate(eb);
 out:
        unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1,
                             &cached_state, GFP_NOFS);
+       btrfs_tree_read_unlock_blocking(eb);
        return ret;
 }
 
@@ -1546,7 +1564,6 @@ int btrfs_init_fs_root(struct btrfs_root *root)
        root->subv_writers = writers;
 
        btrfs_init_free_ino_ctl(root);
-       mutex_init(&root->fs_commit_mutex);
        spin_lock_init(&root->cache_lock);
        init_waitqueue_head(&root->cache_wait);
 
@@ -2324,7 +2341,7 @@ int open_ctree(struct super_block *sb,
        mutex_init(&fs_info->transaction_kthread_mutex);
        mutex_init(&fs_info->cleaner_mutex);
        mutex_init(&fs_info->volume_mutex);
-       init_rwsem(&fs_info->extent_commit_sem);
+       init_rwsem(&fs_info->commit_root_sem);
        init_rwsem(&fs_info->cleanup_work_sem);
        init_rwsem(&fs_info->subvol_sem);
        sema_init(&fs_info->uuid_tree_rescan_sem, 1);
index c6b6a6e..1306487 100644 (file)
@@ -419,7 +419,7 @@ static noinline void caching_thread(struct btrfs_work *work)
 again:
        mutex_lock(&caching_ctl->mutex);
        /* need to make sure the commit_root doesn't disappear */
-       down_read(&fs_info->extent_commit_sem);
+       down_read(&fs_info->commit_root_sem);
 
 next:
        ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
@@ -443,10 +443,10 @@ next:
                                break;
 
                        if (need_resched() ||
-                           rwsem_is_contended(&fs_info->extent_commit_sem)) {
+                           rwsem_is_contended(&fs_info->commit_root_sem)) {
                                caching_ctl->progress = last;
                                btrfs_release_path(path);
-                               up_read(&fs_info->extent_commit_sem);
+                               up_read(&fs_info->commit_root_sem);
                                mutex_unlock(&caching_ctl->mutex);
                                cond_resched();
                                goto again;
@@ -513,7 +513,7 @@ next:
 
 err:
        btrfs_free_path(path);
-       up_read(&fs_info->extent_commit_sem);
+       up_read(&fs_info->commit_root_sem);
 
        free_excluded_extents(extent_root, block_group);
 
@@ -633,10 +633,10 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
                return 0;
        }
 
-       down_write(&fs_info->extent_commit_sem);
+       down_write(&fs_info->commit_root_sem);
        atomic_inc(&caching_ctl->count);
        list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups);
-       up_write(&fs_info->extent_commit_sem);
+       up_write(&fs_info->commit_root_sem);
 
        btrfs_get_block_group(cache);
 
@@ -2444,7 +2444,8 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
                        spin_unlock(&locked_ref->lock);
                        spin_lock(&delayed_refs->lock);
                        spin_lock(&locked_ref->lock);
-                       if (rb_first(&locked_ref->ref_root)) {
+                       if (rb_first(&locked_ref->ref_root) ||
+                           locked_ref->extent_op) {
                                spin_unlock(&locked_ref->lock);
                                spin_unlock(&delayed_refs->lock);
                                continue;
@@ -5470,7 +5471,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
        struct btrfs_block_group_cache *cache;
        struct btrfs_space_info *space_info;
 
-       down_write(&fs_info->extent_commit_sem);
+       down_write(&fs_info->commit_root_sem);
 
        list_for_each_entry_safe(caching_ctl, next,
                                 &fs_info->caching_block_groups, list) {
@@ -5489,7 +5490,7 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
        else
                fs_info->pinned_extents = &fs_info->freed_extents[0];
 
-       up_write(&fs_info->extent_commit_sem);
+       up_write(&fs_info->commit_root_sem);
 
        list_for_each_entry_rcu(space_info, &fs_info->space_info, list)
                percpu_counter_set(&space_info->total_bytes_pinned, 0);
@@ -5744,6 +5745,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                        "unable to find ref byte nr %llu parent %llu root %llu  owner %llu offset %llu",
                        bytenr, parent, root_objectid, owner_objectid,
                        owner_offset);
+               btrfs_abort_transaction(trans, extent_root, ret);
+               goto out;
        } else {
                btrfs_abort_transaction(trans, extent_root, ret);
                goto out;
@@ -8255,14 +8258,14 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
        struct btrfs_caching_control *caching_ctl;
        struct rb_node *n;
 
-       down_write(&info->extent_commit_sem);
+       down_write(&info->commit_root_sem);
        while (!list_empty(&info->caching_block_groups)) {
                caching_ctl = list_entry(info->caching_block_groups.next,
                                         struct btrfs_caching_control, list);
                list_del(&caching_ctl->list);
                put_caching_control(caching_ctl);
        }
-       up_write(&info->extent_commit_sem);
+       up_write(&info->commit_root_sem);
 
        spin_lock(&info->block_group_cache_lock);
        while ((n = rb_last(&info->block_group_cache_tree)) != NULL) {
@@ -8336,9 +8339,15 @@ static void __link_block_group(struct btrfs_space_info *space_info,
                               struct btrfs_block_group_cache *cache)
 {
        int index = get_block_group_index(cache);
+       bool first = false;
 
        down_write(&space_info->groups_sem);
-       if (list_empty(&space_info->block_groups[index])) {
+       if (list_empty(&space_info->block_groups[index]))
+               first = true;
+       list_add_tail(&cache->list, &space_info->block_groups[index]);
+       up_write(&space_info->groups_sem);
+
+       if (first) {
                struct kobject *kobj = &space_info->block_group_kobjs[index];
                int ret;
 
@@ -8350,8 +8359,6 @@ static void __link_block_group(struct btrfs_space_info *space_info,
                        kobject_put(&space_info->kobj);
                }
        }
-       list_add_tail(&cache->list, &space_info->block_groups[index]);
-       up_write(&space_info->groups_sem);
 }
 
 static struct btrfs_block_group_cache *
index ae69a00..3955e47 100644 (file)
@@ -749,6 +749,7 @@ again:
                 * our range starts
                 */
                node = tree_search(tree, start);
+process_node:
                if (!node)
                        break;
 
@@ -769,7 +770,10 @@ again:
                if (start > end)
                        break;
 
-               cond_resched_lock(&tree->lock);
+               if (!cond_resched_lock(&tree->lock)) {
+                       node = rb_next(node);
+                       goto process_node;
+               }
        }
 out:
        spin_unlock(&tree->lock);
@@ -4306,7 +4310,7 @@ static void __free_extent_buffer(struct extent_buffer *eb)
        kmem_cache_free(extent_buffer_cache, eb);
 }
 
-static int extent_buffer_under_io(struct extent_buffer *eb)
+int extent_buffer_under_io(struct extent_buffer *eb)
 {
        return (atomic_read(&eb->io_pages) ||
                test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
index 58b27e5..c488b45 100644 (file)
@@ -320,6 +320,7 @@ int set_extent_buffer_dirty(struct extent_buffer *eb);
 int set_extent_buffer_uptodate(struct extent_buffer *eb);
 int clear_extent_buffer_uptodate(struct extent_buffer *eb);
 int extent_buffer_uptodate(struct extent_buffer *eb);
+int extent_buffer_under_io(struct extent_buffer *eb);
 int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
                      unsigned long min_len, char **map,
                      unsigned long *map_start,
index c660527..c599847 100644 (file)
@@ -1727,6 +1727,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
        struct btrfs_root *root = BTRFS_I(inode)->root;
        loff_t *ppos = &iocb->ki_pos;
        u64 start_pos;
+       u64 end_pos;
        ssize_t num_written = 0;
        ssize_t err = 0;
        size_t count, ocount;
@@ -1781,7 +1782,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 
        start_pos = round_down(pos, root->sectorsize);
        if (start_pos > i_size_read(inode)) {
-               err = btrfs_cont_expand(inode, i_size_read(inode), start_pos);
+               /* Expand hole size to cover write data, preventing empty gap */
+               end_pos = round_up(pos + iov->iov_len, root->sectorsize);
+               err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
                if (err) {
                        mutex_unlock(&inode->i_mutex);
                        goto out;
index ab485e5..cc8ca19 100644 (file)
@@ -55,7 +55,7 @@ static int caching_kthread(void *data)
        key.type = BTRFS_INODE_ITEM_KEY;
 again:
        /* need to make sure the commit_root doesn't disappear */
-       mutex_lock(&root->fs_commit_mutex);
+       down_read(&fs_info->commit_root_sem);
 
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
@@ -88,7 +88,7 @@ again:
                                btrfs_item_key_to_cpu(leaf, &key, 0);
                                btrfs_release_path(path);
                                root->cache_progress = last;
-                               mutex_unlock(&root->fs_commit_mutex);
+                               up_read(&fs_info->commit_root_sem);
                                schedule_timeout(1);
                                goto again;
                        } else
@@ -127,7 +127,7 @@ next:
        btrfs_unpin_free_ino(root);
 out:
        wake_up(&root->cache_wait);
-       mutex_unlock(&root->fs_commit_mutex);
+       up_read(&fs_info->commit_root_sem);
 
        btrfs_free_path(path);
 
@@ -223,11 +223,11 @@ again:
                 * or the caching work is done.
                 */
 
-               mutex_lock(&root->fs_commit_mutex);
+               down_write(&root->fs_info->commit_root_sem);
                spin_lock(&root->cache_lock);
                if (root->cached == BTRFS_CACHE_FINISHED) {
                        spin_unlock(&root->cache_lock);
-                       mutex_unlock(&root->fs_commit_mutex);
+                       up_write(&root->fs_info->commit_root_sem);
                        goto again;
                }
                spin_unlock(&root->cache_lock);
@@ -240,7 +240,7 @@ again:
                else
                        __btrfs_add_free_space(pinned, objectid, 1);
 
-               mutex_unlock(&root->fs_commit_mutex);
+               up_write(&root->fs_info->commit_root_sem);
        }
 }
 
@@ -250,7 +250,7 @@ again:
  * and others will just be dropped, because the commit root we were
  * searching has changed.
  *
- * Must be called with root->fs_commit_mutex held
+ * Must be called with root->fs_info->commit_root_sem held
  */
 void btrfs_unpin_free_ino(struct btrfs_root *root)
 {
index 06e9a41..5f805bc 100644 (file)
@@ -394,6 +394,14 @@ static noinline int compress_file_range(struct inode *inode,
            (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
                btrfs_add_inode_defrag(NULL, inode);
 
+       /*
+        * skip compression for a small file range(<=blocksize) that
+        * isn't an inline extent, since it dosen't save disk space at all.
+        */
+       if ((end - start + 1) <= blocksize &&
+           (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
+               goto cleanup_and_bail_uncompressed;
+
        actual_end = min_t(u64, isize, end + 1);
 again:
        will_compress = 0;
@@ -1270,6 +1278,15 @@ next_slot:
                        disk_bytenr += extent_offset;
                        disk_bytenr += cur_offset - found_key.offset;
                        num_bytes = min(end + 1, extent_end) - cur_offset;
+                       /*
+                        * if there are pending snapshots for this root,
+                        * we fall into common COW way.
+                        */
+                       if (!nolock) {
+                               err = btrfs_start_nocow_write(root);
+                               if (!err)
+                                       goto out_check;
+                       }
                        /*
                         * force cow if csum exists in the range.
                         * this ensure that csum for a given extent are
@@ -1289,6 +1306,8 @@ next_slot:
 out_check:
                if (extent_end <= start) {
                        path->slots[0]++;
+                       if (!nolock && nocow)
+                               btrfs_end_nocow_write(root);
                        goto next_slot;
                }
                if (!nocow) {
@@ -1306,8 +1325,11 @@ out_check:
                        ret = cow_file_range(inode, locked_page,
                                             cow_start, found_key.offset - 1,
                                             page_started, nr_written, 1);
-                       if (ret)
+                       if (ret) {
+                               if (!nolock && nocow)
+                                       btrfs_end_nocow_write(root);
                                goto error;
+                       }
                        cow_start = (u64)-1;
                }
 
@@ -1354,8 +1376,11 @@ out_check:
                    BTRFS_DATA_RELOC_TREE_OBJECTID) {
                        ret = btrfs_reloc_clone_csums(inode, cur_offset,
                                                      num_bytes);
-                       if (ret)
+                       if (ret) {
+                               if (!nolock && nocow)
+                                       btrfs_end_nocow_write(root);
                                goto error;
+                       }
                }
 
                extent_clear_unlock_delalloc(inode, cur_offset,
@@ -1363,6 +1388,8 @@ out_check:
                                             locked_page, EXTENT_LOCKED |
                                             EXTENT_DELALLOC, PAGE_UNLOCK |
                                             PAGE_SET_PRIVATE2);
+               if (!nolock && nocow)
+                       btrfs_end_nocow_write(root);
                cur_offset = extent_end;
                if (cur_offset > end)
                        break;
@@ -8476,19 +8503,20 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput,
                        else
                                iput(inode);
                        ret = -ENOMEM;
-                       break;
+                       goto out;
                }
                list_add_tail(&work->list, &works);
                btrfs_queue_work(root->fs_info->flush_workers,
                                 &work->work);
                ret++;
                if (nr != -1 && ret >= nr)
-                       break;
+                       goto out;
                cond_resched();
                spin_lock(&root->delalloc_lock);
        }
        spin_unlock(&root->delalloc_lock);
 
+out:
        list_for_each_entry_safe(work, next, &works, list) {
                list_del_init(&work->list);
                btrfs_wait_and_free_delalloc_work(work);
index 0401397..e79ff6b 100644 (file)
@@ -1472,6 +1472,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
        struct btrfs_trans_handle *trans;
        struct btrfs_device *device = NULL;
        char *sizestr;
+       char *retptr;
        char *devstr = NULL;
        int ret = 0;
        int mod = 0;
@@ -1539,8 +1540,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
                        mod = 1;
                        sizestr++;
                }
-               new_size = memparse(sizestr, NULL);
-               if (new_size == 0) {
+               new_size = memparse(sizestr, &retptr);
+               if (*retptr != '\0' || new_size == 0) {
                        ret = -EINVAL;
                        goto out_free;
                }
@@ -3140,8 +3141,9 @@ process_slot:
                                                         new_key.offset + datal,
                                                         1);
                                if (ret) {
-                                       btrfs_abort_transaction(trans, root,
-                                                               ret);
+                                       if (ret != -EINVAL)
+                                               btrfs_abort_transaction(trans,
+                                                       root, ret);
                                        btrfs_end_transaction(trans, root);
                                        goto out;
                                }
@@ -3538,6 +3540,11 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
                up_read(&info->groups_sem);
        }
 
+       /*
+        * Global block reserve, exported as a space_info
+        */
+       slot_count++;
+
        /* space_slots == 0 means they are asking for a count */
        if (space_args.space_slots == 0) {
                space_args.total_spaces = slot_count;
@@ -3596,6 +3603,21 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
                up_read(&info->groups_sem);
        }
 
+       /*
+        * Add global block reserve
+        */
+       if (slot_count) {
+               struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv;
+
+               spin_lock(&block_rsv->lock);
+               space.total_bytes = block_rsv->size;
+               space.used_bytes = block_rsv->size - block_rsv->reserved;
+               spin_unlock(&block_rsv->lock);
+               space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV;
+               memcpy(dest, &space, sizeof(space));
+               space_args.total_spaces++;
+       }
+
        user_dest = (struct btrfs_ioctl_space_info __user *)
                (arg + sizeof(struct btrfs_ioctl_space_args));
 
@@ -4531,9 +4553,8 @@ static long btrfs_ioctl_set_received_subvol_32(struct file *file,
        }
 
        args64 = kmalloc(sizeof(*args64), GFP_NOFS);
-       if (IS_ERR(args64)) {
-               ret = PTR_ERR(args64);
-               args64 = NULL;
+       if (!args64) {
+               ret = -ENOMEM;
                goto out;
        }
 
index def428a..7f92ab1 100644 (file)
@@ -2317,7 +2317,6 @@ void free_reloc_roots(struct list_head *list)
 static noinline_for_stack
 int merge_reloc_roots(struct reloc_control *rc)
 {
-       struct btrfs_trans_handle *trans;
        struct btrfs_root *root;
        struct btrfs_root *reloc_root;
        u64 last_snap;
@@ -2375,26 +2374,6 @@ again:
                                list_add_tail(&reloc_root->root_list,
                                              &reloc_roots);
                        goto out;
-               } else if (!ret) {
-                       /*
-                        * recover the last snapshot tranid to avoid
-                        * the space balance break NOCOW.
-                        */
-                       root = read_fs_root(rc->extent_root->fs_info,
-                                           objectid);
-                       if (IS_ERR(root))
-                               continue;
-
-                       trans = btrfs_join_transaction(root);
-                       BUG_ON(IS_ERR(trans));
-
-                       /* Check if the fs/file tree was snapshoted or not. */
-                       if (btrfs_root_last_snapshot(&root->root_item) ==
-                           otransid - 1)
-                               btrfs_set_root_last_snapshot(&root->root_item,
-                                                            last_snap);
-                               
-                       btrfs_end_transaction(trans, root);
                }
        }
 
index 93e6d71..0be7799 100644 (file)
@@ -2235,6 +2235,47 @@ behind_scrub_pages:
        return 0;
 }
 
+/*
+ * Given a physical address, this will calculate it's
+ * logical offset. if this is a parity stripe, it will return
+ * the most left data stripe's logical offset.
+ *
+ * return 0 if it is a data stripe, 1 means parity stripe.
+ */
+static int get_raid56_logic_offset(u64 physical, int num,
+                                  struct map_lookup *map, u64 *offset)
+{
+       int i;
+       int j = 0;
+       u64 stripe_nr;
+       u64 last_offset;
+       int stripe_index;
+       int rot;
+
+       last_offset = (physical - map->stripes[num].physical) *
+                     nr_data_stripes(map);
+       *offset = last_offset;
+       for (i = 0; i < nr_data_stripes(map); i++) {
+               *offset = last_offset + i * map->stripe_len;
+
+               stripe_nr = *offset;
+               do_div(stripe_nr, map->stripe_len);
+               do_div(stripe_nr, nr_data_stripes(map));
+
+               /* Work out the disk rotation on this stripe-set */
+               rot = do_div(stripe_nr, map->num_stripes);
+               /* calculate which stripe this data locates */
+               rot += i;
+               stripe_index = rot % map->num_stripes;
+               if (stripe_index == num)
+                       return 0;
+               if (stripe_index < num)
+                       j++;
+       }
+       *offset = last_offset + j * map->stripe_len;
+       return 1;
+}
+
 static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                                           struct map_lookup *map,
                                           struct btrfs_device *scrub_dev,
@@ -2256,6 +2297,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        u64 physical;
        u64 logical;
        u64 logic_end;
+       u64 physical_end;
        u64 generation;
        int mirror_num;
        struct reada_control *reada1;
@@ -2269,16 +2311,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        u64 extent_len;
        struct btrfs_device *extent_dev;
        int extent_mirror_num;
-       int stop_loop;
-
-       if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
-                        BTRFS_BLOCK_GROUP_RAID6)) {
-               if (num >= nr_data_stripes(map)) {
-                       return 0;
-               }
-       }
+       int stop_loop = 0;
 
        nstripes = length;
+       physical = map->stripes[num].physical;
        offset = 0;
        do_div(nstripes, map->stripe_len);
        if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
@@ -2296,6 +2332,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
                increment = map->stripe_len;
                mirror_num = num % map->num_stripes + 1;
+       } else if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+                               BTRFS_BLOCK_GROUP_RAID6)) {
+               get_raid56_logic_offset(physical, num, map, &offset);
+               increment = map->stripe_len * nr_data_stripes(map);
+               mirror_num = 1;
        } else {
                increment = map->stripe_len;
                mirror_num = 1;
@@ -2319,7 +2360,15 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
         * to not hold off transaction commits
         */
        logical = base + offset;
-
+       physical_end = physical + nstripes * map->stripe_len;
+       if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+                        BTRFS_BLOCK_GROUP_RAID6)) {
+               get_raid56_logic_offset(physical_end, num,
+                                       map, &logic_end);
+               logic_end += base;
+       } else {
+               logic_end = logical + increment * nstripes;
+       }
        wait_event(sctx->list_wait,
                   atomic_read(&sctx->bios_in_flight) == 0);
        scrub_blocked_if_needed(fs_info);
@@ -2328,7 +2377,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        key_start.objectid = logical;
        key_start.type = BTRFS_EXTENT_ITEM_KEY;
        key_start.offset = (u64)0;
-       key_end.objectid = base + offset + nstripes * increment;
+       key_end.objectid = logic_end;
        key_end.type = BTRFS_METADATA_ITEM_KEY;
        key_end.offset = (u64)-1;
        reada1 = btrfs_reada_add(root, &key_start, &key_end);
@@ -2338,7 +2387,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        key_start.offset = logical;
        key_end.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
        key_end.type = BTRFS_EXTENT_CSUM_KEY;
-       key_end.offset = base + offset + nstripes * increment;
+       key_end.offset = logic_end;
        reada2 = btrfs_reada_add(csum_root, &key_start, &key_end);
 
        if (!IS_ERR(reada1))
@@ -2356,11 +2405,17 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
        /*
         * now find all extents for each stripe and scrub them
         */
-       logical = base + offset;
-       physical = map->stripes[num].physical;
-       logic_end = logical + increment * nstripes;
        ret = 0;
-       while (logical < logic_end) {
+       while (physical < physical_end) {
+               /* for raid56, we skip parity stripe */
+               if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+                               BTRFS_BLOCK_GROUP_RAID6)) {
+                       ret = get_raid56_logic_offset(physical, num,
+                                       map, &logical);
+                       logical += base;
+                       if (ret)
+                               goto skip;
+               }
                /*
                 * canceled?
                 */
@@ -2504,15 +2559,29 @@ again:
                        scrub_free_csums(sctx);
                        if (extent_logical + extent_len <
                            key.objectid + bytes) {
-                               logical += increment;
-                               physical += map->stripe_len;
-
+                               if (map->type & (BTRFS_BLOCK_GROUP_RAID5 |
+                                       BTRFS_BLOCK_GROUP_RAID6)) {
+                                       /*
+                                        * loop until we find next data stripe
+                                        * or we have finished all stripes.
+                                        */
+                                       do {
+                                               physical += map->stripe_len;
+                                               ret = get_raid56_logic_offset(
+                                                               physical, num,
+                                                               map, &logical);
+                                               logical += base;
+                                       } while (physical < physical_end && ret);
+                               } else {
+                                       physical += map->stripe_len;
+                                       logical += increment;
+                               }
                                if (logical < key.objectid + bytes) {
                                        cond_resched();
                                        goto again;
                                }
 
-                               if (logical >= logic_end) {
+                               if (physical >= physical_end) {
                                        stop_loop = 1;
                                        break;
                                }
@@ -2521,6 +2590,7 @@ next:
                        path->slots[0]++;
                }
                btrfs_release_path(path);
+skip:
                logical += increment;
                physical += map->stripe_len;
                spin_lock(&sctx->stat_lock);
index 9b6da9d..1ac3ca9 100644 (file)
@@ -493,6 +493,7 @@ static struct btrfs_path *alloc_path_for_send(void)
                return NULL;
        path->search_commit_root = 1;
        path->skip_locking = 1;
+       path->need_commit_sem = 1;
        return path;
 }
 
@@ -771,29 +772,22 @@ out:
 /*
  * Helper function to retrieve some fields from an inode item.
  */
-static int get_inode_info(struct btrfs_root *root,
-                         u64 ino, u64 *size, u64 *gen,
-                         u64 *mode, u64 *uid, u64 *gid,
-                         u64 *rdev)
+static int __get_inode_info(struct btrfs_root *root, struct btrfs_path *path,
+                         u64 ino, u64 *size, u64 *gen, u64 *mode, u64 *uid,
+                         u64 *gid, u64 *rdev)
 {
        int ret;
        struct btrfs_inode_item *ii;
        struct btrfs_key key;
-       struct btrfs_path *path;
-
-       path = alloc_path_for_send();
-       if (!path)
-               return -ENOMEM;
 
        key.objectid = ino;
        key.type = BTRFS_INODE_ITEM_KEY;
        key.offset = 0;
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
-       if (ret < 0)
-               goto out;
        if (ret) {
-               ret = -ENOENT;
-               goto out;
+               if (ret > 0)
+                       ret = -ENOENT;
+               return ret;
        }
 
        ii = btrfs_item_ptr(path->nodes[0], path->slots[0],
@@ -811,7 +805,22 @@ static int get_inode_info(struct btrfs_root *root,
        if (rdev)
                *rdev = btrfs_inode_rdev(path->nodes[0], ii);
 
-out:
+       return ret;
+}
+
+static int get_inode_info(struct btrfs_root *root,
+                         u64 ino, u64 *size, u64 *gen,
+                         u64 *mode, u64 *uid, u64 *gid,
+                         u64 *rdev)
+{
+       struct btrfs_path *path;
+       int ret;
+
+       path = alloc_path_for_send();
+       if (!path)
+               return -ENOMEM;
+       ret = __get_inode_info(root, path, ino, size, gen, mode, uid, gid,
+                              rdev);
        btrfs_free_path(path);
        return ret;
 }
@@ -1085,6 +1094,7 @@ out:
 struct backref_ctx {
        struct send_ctx *sctx;
 
+       struct btrfs_path *path;
        /* number of total found references */
        u64 found;
 
@@ -1155,8 +1165,9 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_)
         * There are inodes that have extents that lie behind its i_size. Don't
         * accept clones from these extents.
         */
-       ret = get_inode_info(found->root, ino, &i_size, NULL, NULL, NULL, NULL,
-                       NULL);
+       ret = __get_inode_info(found->root, bctx->path, ino, &i_size, NULL, NULL,
+                              NULL, NULL, NULL);
+       btrfs_release_path(bctx->path);
        if (ret < 0)
                return ret;
 
@@ -1235,12 +1246,17 @@ static int find_extent_clone(struct send_ctx *sctx,
        if (!tmp_path)
                return -ENOMEM;
 
+       /* We only use this path under the commit sem */
+       tmp_path->need_commit_sem = 0;
+
        backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
        if (!backref_ctx) {
                ret = -ENOMEM;
                goto out;
        }
 
+       backref_ctx->path = tmp_path;
+
        if (data_offset >= ino_size) {
                /*
                 * There may be extents that lie behind the file's size.
@@ -1268,8 +1284,10 @@ static int find_extent_clone(struct send_ctx *sctx,
        }
        logical = disk_byte + btrfs_file_extent_offset(eb, fi);
 
+       down_read(&sctx->send_root->fs_info->commit_root_sem);
        ret = extent_from_logical(sctx->send_root->fs_info, disk_byte, tmp_path,
                                  &found_key, &flags);
+       up_read(&sctx->send_root->fs_info->commit_root_sem);
        btrfs_release_path(tmp_path);
 
        if (ret < 0)
@@ -4418,14 +4436,14 @@ static int send_hole(struct send_ctx *sctx, u64 end)
        p = fs_path_alloc();
        if (!p)
                return -ENOMEM;
+       ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
+       if (ret < 0)
+               goto tlv_put_failure;
        memset(sctx->read_buf, 0, BTRFS_SEND_READ_SIZE);
        while (offset < end) {
                len = min_t(u64, end - offset, BTRFS_SEND_READ_SIZE);
 
                ret = begin_cmd(sctx, BTRFS_SEND_C_WRITE);
-               if (ret < 0)
-                       break;
-               ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, p);
                if (ret < 0)
                        break;
                TLV_PUT_PATH(sctx, BTRFS_SEND_A_PATH, p);
@@ -4968,7 +4986,9 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end)
 
        if (S_ISREG(sctx->cur_inode_mode)) {
                if (need_send_hole(sctx)) {
-                       if (sctx->cur_inode_last_extent == (u64)-1) {
+                       if (sctx->cur_inode_last_extent == (u64)-1 ||
+                           sctx->cur_inode_last_extent <
+                           sctx->cur_inode_size) {
                                ret = get_last_extent(sctx, (u64)-1);
                                if (ret)
                                        goto out;
@@ -5367,57 +5387,21 @@ out:
 static int full_send_tree(struct send_ctx *sctx)
 {
        int ret;
-       struct btrfs_trans_handle *trans = NULL;
        struct btrfs_root *send_root = sctx->send_root;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct btrfs_path *path;
        struct extent_buffer *eb;
        int slot;
-       u64 start_ctransid;
-       u64 ctransid;
 
        path = alloc_path_for_send();
        if (!path)
                return -ENOMEM;
 
-       spin_lock(&send_root->root_item_lock);
-       start_ctransid = btrfs_root_ctransid(&send_root->root_item);
-       spin_unlock(&send_root->root_item_lock);
-
        key.objectid = BTRFS_FIRST_FREE_OBJECTID;
        key.type = BTRFS_INODE_ITEM_KEY;
        key.offset = 0;
 
-join_trans:
-       /*
-        * We need to make sure the transaction does not get committed
-        * while we do anything on commit roots. Join a transaction to prevent
-        * this.
-        */
-       trans = btrfs_join_transaction(send_root);
-       if (IS_ERR(trans)) {
-               ret = PTR_ERR(trans);
-               trans = NULL;
-               goto out;
-       }
-
-       /*
-        * Make sure the tree has not changed after re-joining. We detect this
-        * by comparing start_ctransid and ctransid. They should always match.
-        */
-       spin_lock(&send_root->root_item_lock);
-       ctransid = btrfs_root_ctransid(&send_root->root_item);
-       spin_unlock(&send_root->root_item_lock);
-
-       if (ctransid != start_ctransid) {
-               WARN(1, KERN_WARNING "BTRFS: the root that you're trying to "
-                                    "send was modified in between. This is "
-                                    "probably a bug.\n");
-               ret = -EIO;
-               goto out;
-       }
-
        ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
        if (ret < 0)
                goto out;
@@ -5425,19 +5409,6 @@ join_trans:
                goto out_finish;
 
        while (1) {
-               /*
-                * When someone want to commit while we iterate, end the
-                * joined transaction and rejoin.
-                */
-               if (btrfs_should_end_transaction(trans, send_root)) {
-                       ret = btrfs_end_transaction(trans, send_root);
-                       trans = NULL;
-                       if (ret < 0)
-                               goto out;
-                       btrfs_release_path(path);
-                       goto join_trans;
-               }
-
                eb = path->nodes[0];
                slot = path->slots[0];
                btrfs_item_key_to_cpu(eb, &found_key, slot);
@@ -5465,12 +5436,6 @@ out_finish:
 
 out:
        btrfs_free_path(path);
-       if (trans) {
-               if (!ret)
-                       ret = btrfs_end_transaction(trans, send_root);
-               else
-                       btrfs_end_transaction(trans, send_root);
-       }
        return ret;
 }
 
@@ -5718,7 +5683,9 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
                        NULL);
        sort_clone_roots = 1;
 
+       current->journal_info = (void *)BTRFS_SEND_TRANS_STUB;
        ret = send_subvol(sctx);
+       current->journal_info = NULL;
        if (ret < 0)
                goto out;
 
index 9dbf423..5011aad 100644 (file)
@@ -66,6 +66,8 @@
 static const struct super_operations btrfs_super_ops;
 static struct file_system_type btrfs_fs_type;
 
+static int btrfs_remount(struct super_block *sb, int *flags, char *data);
+
 static const char *btrfs_decode_error(int errno)
 {
        char *errstr = "unknown";
@@ -1185,6 +1187,26 @@ static struct dentry *mount_subvol(const char *subvol_name, int flags,
        mnt = vfs_kern_mount(&btrfs_fs_type, flags, device_name,
                             newargs);
        kfree(newargs);
+
+       if (PTR_RET(mnt) == -EBUSY) {
+               if (flags & MS_RDONLY) {
+                       mnt = vfs_kern_mount(&btrfs_fs_type, flags & ~MS_RDONLY, device_name,
+                                            newargs);
+               } else {
+                       int r;
+                       mnt = vfs_kern_mount(&btrfs_fs_type, flags | MS_RDONLY, device_name,
+                                            newargs);
+                       if (IS_ERR(mnt))
+                               return ERR_CAST(mnt);
+
+                       r = btrfs_remount(mnt->mnt_sb, &flags, NULL);
+                       if (r < 0) {
+                               /* FIXME: release vfsmount mnt ??*/
+                               return ERR_PTR(r);
+                       }
+               }
+       }
+
        if (IS_ERR(mnt))
                return ERR_CAST(mnt);
 
index a04707f..7579f6d 100644 (file)
@@ -75,10 +75,21 @@ void btrfs_put_transaction(struct btrfs_transaction *transaction)
        }
 }
 
-static noinline void switch_commit_root(struct btrfs_root *root)
+static noinline void switch_commit_roots(struct btrfs_transaction *trans,
+                                        struct btrfs_fs_info *fs_info)
 {
-       free_extent_buffer(root->commit_root);
-       root->commit_root = btrfs_root_node(root);
+       struct btrfs_root *root, *tmp;
+
+       down_write(&fs_info->commit_root_sem);
+       list_for_each_entry_safe(root, tmp, &trans->switch_commits,
+                                dirty_list) {
+               list_del_init(&root->dirty_list);
+               free_extent_buffer(root->commit_root);
+               root->commit_root = btrfs_root_node(root);
+               if (is_fstree(root->objectid))
+                       btrfs_unpin_free_ino(root);
+       }
+       up_write(&fs_info->commit_root_sem);
 }
 
 static inline void extwriter_counter_inc(struct btrfs_transaction *trans,
@@ -208,6 +219,7 @@ loop:
        INIT_LIST_HEAD(&cur_trans->pending_snapshots);
        INIT_LIST_HEAD(&cur_trans->ordered_operations);
        INIT_LIST_HEAD(&cur_trans->pending_chunks);
+       INIT_LIST_HEAD(&cur_trans->switch_commits);
        list_add_tail(&cur_trans->list, &fs_info->trans_list);
        extent_io_tree_init(&cur_trans->dirty_pages,
                             fs_info->btree_inode->i_mapping);
@@ -375,7 +387,8 @@ start_transaction(struct btrfs_root *root, u64 num_items, unsigned int type,
        if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))
                return ERR_PTR(-EROFS);
 
-       if (current->journal_info) {
+       if (current->journal_info &&
+           current->journal_info != (void *)BTRFS_SEND_TRANS_STUB) {
                WARN_ON(type & TRANS_EXTWRITERS);
                h = current->journal_info;
                h->use_count++;
@@ -919,9 +932,6 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
                        return ret;
        }
 
-       if (root != root->fs_info->extent_root)
-               switch_commit_root(root);
-
        return 0;
 }
 
@@ -977,15 +987,16 @@ static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans,
                list_del_init(next);
                root = list_entry(next, struct btrfs_root, dirty_list);
 
+               if (root != fs_info->extent_root)
+                       list_add_tail(&root->dirty_list,
+                                     &trans->transaction->switch_commits);
                ret = update_cowonly_root(trans, root);
                if (ret)
                        return ret;
        }
 
-       down_write(&fs_info->extent_commit_sem);
-       switch_commit_root(fs_info->extent_root);
-       up_write(&fs_info->extent_commit_sem);
-
+       list_add_tail(&fs_info->extent_root->dirty_list,
+                     &trans->transaction->switch_commits);
        btrfs_after_dev_replace_commit(fs_info);
 
        return 0;
@@ -1042,11 +1053,8 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans,
                        smp_wmb();
 
                        if (root->commit_root != root->node) {
-                               mutex_lock(&root->fs_commit_mutex);
-                               switch_commit_root(root);
-                               btrfs_unpin_free_ino(root);
-                               mutex_unlock(&root->fs_commit_mutex);
-
+                               list_add_tail(&root->dirty_list,
+                                       &trans->transaction->switch_commits);
                                btrfs_set_root_node(&root->root_item,
                                                    root->node);
                        }
@@ -1857,11 +1865,15 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
        btrfs_set_root_node(&root->fs_info->tree_root->root_item,
                            root->fs_info->tree_root->node);
-       switch_commit_root(root->fs_info->tree_root);
+       list_add_tail(&root->fs_info->tree_root->dirty_list,
+                     &cur_trans->switch_commits);
 
        btrfs_set_root_node(&root->fs_info->chunk_root->root_item,
                            root->fs_info->chunk_root->node);
-       switch_commit_root(root->fs_info->chunk_root);
+       list_add_tail(&root->fs_info->chunk_root->dirty_list,
+                     &cur_trans->switch_commits);
+
+       switch_commit_roots(cur_trans, root->fs_info);
 
        assert_qgroups_uptodate(trans);
        update_super_roots(root);
index 6ac037e..b57b924 100644 (file)
@@ -57,6 +57,7 @@ struct btrfs_transaction {
        struct list_head pending_snapshots;
        struct list_head ordered_operations;
        struct list_head pending_chunks;
+       struct list_head switch_commits;
        struct btrfs_delayed_ref_root delayed_refs;
        int aborted;
 };
@@ -78,6 +79,8 @@ struct btrfs_transaction {
 #define TRANS_EXTWRITERS       (__TRANS_USERSPACE | __TRANS_START |    \
                                 __TRANS_ATTACH)
 
+#define BTRFS_SEND_TRANS_STUB  1
+
 struct btrfs_trans_handle {
        u64 transid;
        u64 bytes_reserved;
index d241130..49d7fab 100644 (file)
@@ -448,6 +448,14 @@ static void pending_bios_fn(struct btrfs_work *work)
        run_scheduled_bios(device);
 }
 
+/*
+ * Add new device to list of registered devices
+ *
+ * Returns:
+ * 1   - first time device is seen
+ * 0   - device already known
+ * < 0 - error
+ */
 static noinline int device_list_add(const char *path,
                           struct btrfs_super_block *disk_super,
                           u64 devid, struct btrfs_fs_devices **fs_devices_ret)
@@ -455,6 +463,7 @@ static noinline int device_list_add(const char *path,
        struct btrfs_device *device;
        struct btrfs_fs_devices *fs_devices;
        struct rcu_string *name;
+       int ret = 0;
        u64 found_transid = btrfs_super_generation(disk_super);
 
        fs_devices = find_fsid(disk_super->fsid);
@@ -495,6 +504,7 @@ static noinline int device_list_add(const char *path,
                fs_devices->num_devices++;
                mutex_unlock(&fs_devices->device_list_mutex);
 
+               ret = 1;
                device->fs_devices = fs_devices;
        } else if (!device->name || strcmp(device->name->str, path)) {
                name = rcu_string_strdup(path, GFP_NOFS);
@@ -513,7 +523,8 @@ static noinline int device_list_add(const char *path,
                fs_devices->latest_trans = found_transid;
        }
        *fs_devices_ret = fs_devices;
-       return 0;
+
+       return ret;
 }
 
 static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
@@ -910,17 +921,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
        transid = btrfs_super_generation(disk_super);
        total_devices = btrfs_super_num_devices(disk_super);
 
-       if (disk_super->label[0]) {
-               if (disk_super->label[BTRFS_LABEL_SIZE - 1])
-                       disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
-               printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
-       } else {
-               printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
-       }
-
-       printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
-
        ret = device_list_add(path, disk_super, devid, fs_devices_ret);
+       if (ret > 0) {
+               if (disk_super->label[0]) {
+                       if (disk_super->label[BTRFS_LABEL_SIZE - 1])
+                               disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
+                       printk(KERN_INFO "BTRFS: device label %s ", disk_super->label);
+               } else {
+                       printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid);
+               }
+
+               printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
+               ret = 0;
+       }
        if (!ret && fs_devices_ret)
                (*fs_devices_ret)->total_devices = total_devices;