Merge branch 'integrity-check-patch-v2' of git://btrfs.giantdisaster.de/git/btrfs...
[cascardo/linux.git] / fs / btrfs / volumes.c
index 4c60ca0..59e878f 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/random.h>
 #include <linux/iocontext.h>
 #include <linux/capability.h>
+#include <linux/kthread.h>
 #include <asm/div64.h>
 #include "compat.h"
 #include "ctree.h"
@@ -32,6 +33,7 @@
 #include "print-tree.h"
 #include "volumes.h"
 #include "async-thread.h"
+#include "check-integrity.h"
 
 static int init_first_rw_device(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root,
@@ -246,7 +248,7 @@ loop_lock:
                        sync_pending = 0;
                }
 
-               submit_bio(cur->bi_rw, cur);
+               btrfsic_submit_bio(cur->bi_rw, cur);
                num_run++;
                batch_run++;
                if (need_resched())
@@ -829,7 +831,6 @@ out:
 
 /*
  * find_free_dev_extent - find free space in the specified device
- * @trans:     transaction handler
  * @device:    the device which we search the free space in
  * @num_bytes: the size of the free space that we need
  * @start:     store the start of the free space.
@@ -848,8 +849,7 @@ out:
  * But if we don't find suitable free space, it is used to store the size of
  * the max free space.
  */
-int find_free_dev_extent(struct btrfs_trans_handle *trans,
-                        struct btrfs_device *device, u64 num_bytes,
+int find_free_dev_extent(struct btrfs_device *device, u64 num_bytes,
                         u64 *start, u64 *len)
 {
        struct btrfs_key key;
@@ -893,7 +893,7 @@ int find_free_dev_extent(struct btrfs_trans_handle *trans,
        key.offset = search_start;
        key.type = BTRFS_DEV_EXTENT_KEY;
 
-       ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                goto out;
        if (ret > 0) {
@@ -1467,8 +1467,7 @@ error_undo:
 /*
  * does all the dirty work required for changing file system's UUID.
  */
-static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans,
-                               struct btrfs_root *root)
+static int btrfs_prepare_sprout(struct btrfs_root *root)
 {
        struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
        struct btrfs_fs_devices *old_devices;
@@ -1692,7 +1691,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
 
        if (seeding_dev) {
                sb->s_flags &= ~MS_RDONLY;
-               ret = btrfs_prepare_sprout(trans, root);
+               ret = btrfs_prepare_sprout(root);
                BUG_ON(ret);
        }
 
@@ -2164,6 +2163,46 @@ out:
        return ret;
 }
 
+/*
+ * This is a heuristic used to reduce the number of chunks balanced on
+ * resume after balance was interrupted.
+ */
+static void update_balance_args(struct btrfs_balance_control *bctl)
+{
+       /*
+        * Turn on soft mode for chunk types that were being converted.
+        */
+       if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
+               bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
+       if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
+               bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
+       if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
+               bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
+
+       /*
+        * Turn on usage filter if is not already used.  The idea is
+        * that chunks that we have already balanced should be
+        * reasonably full.  Don't do it for chunks that are being
+        * converted - that will keep us from relocating unconverted
+        * (albeit full) chunks.
+        */
+       if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
+           !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
+               bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
+               bctl->data.usage = 90;
+       }
+       if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
+           !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
+               bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
+               bctl->sys.usage = 90;
+       }
+       if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
+           !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
+               bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
+               bctl->meta.usage = 90;
+       }
+}
+
 /*
  * Should be called with both balance and volume mutexes held to
  * serialize other volume operations (add_dev/rm_dev/resize) with
@@ -2400,6 +2439,7 @@ static u64 div_factor(u64 num, int factor)
 
 static int __btrfs_balance(struct btrfs_fs_info *fs_info)
 {
+       struct btrfs_balance_control *bctl = fs_info->balance_ctl;
        struct btrfs_root *chunk_root = fs_info->chunk_root;
        struct btrfs_root *dev_root = fs_info->dev_root;
        struct list_head *devices;
@@ -2415,6 +2455,7 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
        int slot;
        int ret;
        int enospc_errors = 0;
+       bool counting = true;
 
        /* step one make some room on all the devices */
        devices = &fs_info->fs_devices->devices;
@@ -2446,11 +2487,23 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
                ret = -ENOMEM;
                goto error;
        }
+
+       /* zero out stat counters */
+       spin_lock(&fs_info->balance_lock);
+       memset(&bctl->stat, 0, sizeof(bctl->stat));
+       spin_unlock(&fs_info->balance_lock);
+again:
        key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
        key.offset = (u64)-1;
        key.type = BTRFS_CHUNK_ITEM_KEY;
 
        while (1) {
+               if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
+                   atomic_read(&fs_info->balance_cancel_req)) {
+                       ret = -ECANCELED;
+                       goto error;
+               }
+
                ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
                if (ret < 0)
                        goto error;
@@ -2482,24 +2535,47 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info)
 
                chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
 
+               if (!counting) {
+                       spin_lock(&fs_info->balance_lock);
+                       bctl->stat.considered++;
+                       spin_unlock(&fs_info->balance_lock);
+               }
+
                ret = should_balance_chunk(chunk_root, leaf, chunk,
                                           found_key.offset);
                btrfs_release_path(path);
                if (!ret)
                        goto loop;
 
+               if (counting) {
+                       spin_lock(&fs_info->balance_lock);
+                       bctl->stat.expected++;
+                       spin_unlock(&fs_info->balance_lock);
+                       goto loop;
+               }
+
                ret = btrfs_relocate_chunk(chunk_root,
                                           chunk_root->root_key.objectid,
                                           found_key.objectid,
                                           found_key.offset);
                if (ret && ret != -ENOSPC)
                        goto error;
-               if (ret == -ENOSPC)
+               if (ret == -ENOSPC) {
                        enospc_errors++;
+               } else {
+                       spin_lock(&fs_info->balance_lock);
+                       bctl->stat.completed++;
+                       spin_unlock(&fs_info->balance_lock);
+               }
 loop:
                key.offset = found_key.offset - 1;
        }
 
+       if (counting) {
+               btrfs_release_path(path);
+               counting = false;
+               goto again;
+       }
 error:
        btrfs_free_path(path);
        if (enospc_errors) {
@@ -2512,6 +2588,14 @@ error:
        return ret;
 }
 
+static inline int balance_need_close(struct btrfs_fs_info *fs_info)
+{
+       /* cancel requested || normal exit path */
+       return atomic_read(&fs_info->balance_cancel_req) ||
+               (atomic_read(&fs_info->balance_pause_req) == 0 &&
+                atomic_read(&fs_info->balance_cancel_req) == 0);
+}
+
 static void __cancel_balance(struct btrfs_fs_info *fs_info)
 {
        int ret;
@@ -2521,7 +2605,7 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
        BUG_ON(ret);
 }
 
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info,
+void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
                               struct btrfs_ioctl_balance_args *bargs);
 
 /*
@@ -2534,7 +2618,9 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        u64 allowed;
        int ret;
 
-       if (btrfs_fs_closing(fs_info)) {
+       if (btrfs_fs_closing(fs_info) ||
+           atomic_read(&fs_info->balance_pause_req) ||
+           atomic_read(&fs_info->balance_cancel_req)) {
                ret = -EINVAL;
                goto out;
        }
@@ -2626,30 +2712,196 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
 
 do_balance:
        ret = insert_balance_item(fs_info->tree_root, bctl);
-       if (ret)
+       if (ret && ret != -EEXIST)
                goto out;
 
-       set_balance_control(bctl);
+       if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
+               BUG_ON(ret == -EEXIST);
+               set_balance_control(bctl);
+       } else {
+               BUG_ON(ret != -EEXIST);
+               spin_lock(&fs_info->balance_lock);
+               update_balance_args(bctl);
+               spin_unlock(&fs_info->balance_lock);
+       }
 
+       atomic_inc(&fs_info->balance_running);
        mutex_unlock(&fs_info->balance_mutex);
 
        ret = __btrfs_balance(fs_info);
 
        mutex_lock(&fs_info->balance_mutex);
+       atomic_dec(&fs_info->balance_running);
 
        if (bargs) {
                memset(bargs, 0, sizeof(*bargs));
-               update_ioctl_balance_args(fs_info, bargs);
+               update_ioctl_balance_args(fs_info, 0, bargs);
        }
 
-       __cancel_balance(fs_info);
+       if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
+           balance_need_close(fs_info)) {
+               __cancel_balance(fs_info);
+       }
+
+       wake_up(&fs_info->balance_wait_q);
 
        return ret;
 out:
+       if (bctl->flags & BTRFS_BALANCE_RESUME)
+               __cancel_balance(fs_info);
+       else
+               kfree(bctl);
+       return ret;
+}
+
+static int balance_kthread(void *data)
+{
+       struct btrfs_balance_control *bctl =
+                       (struct btrfs_balance_control *)data;
+       struct btrfs_fs_info *fs_info = bctl->fs_info;
+       int ret = 0;
+
+       mutex_lock(&fs_info->volume_mutex);
+       mutex_lock(&fs_info->balance_mutex);
+
+       set_balance_control(bctl);
+
+       if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
+               printk(KERN_INFO "btrfs: force skipping balance\n");
+       } else {
+               printk(KERN_INFO "btrfs: continuing balance\n");
+               ret = btrfs_balance(bctl, NULL);
+       }
+
+       mutex_unlock(&fs_info->balance_mutex);
+       mutex_unlock(&fs_info->volume_mutex);
+       return ret;
+}
+
+int btrfs_recover_balance(struct btrfs_root *tree_root)
+{
+       struct task_struct *tsk;
+       struct btrfs_balance_control *bctl;
+       struct btrfs_balance_item *item;
+       struct btrfs_disk_balance_args disk_bargs;
+       struct btrfs_path *path;
+       struct extent_buffer *leaf;
+       struct btrfs_key key;
+       int ret;
+
+       path = btrfs_alloc_path();
+       if (!path)
+               return -ENOMEM;
+
+       bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+       if (!bctl) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       key.objectid = BTRFS_BALANCE_OBJECTID;
+       key.type = BTRFS_BALANCE_ITEM_KEY;
+       key.offset = 0;
+
+       ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+       if (ret < 0)
+               goto out_bctl;
+       if (ret > 0) { /* ret = -ENOENT; */
+               ret = 0;
+               goto out_bctl;
+       }
+
+       leaf = path->nodes[0];
+       item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
+
+       bctl->fs_info = tree_root->fs_info;
+       bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
+
+       btrfs_balance_data(leaf, item, &disk_bargs);
+       btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
+       btrfs_balance_meta(leaf, item, &disk_bargs);
+       btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
+       btrfs_balance_sys(leaf, item, &disk_bargs);
+       btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
+
+       tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
+       if (IS_ERR(tsk))
+               ret = PTR_ERR(tsk);
+       else
+               goto out;
+
+out_bctl:
        kfree(bctl);
+out:
+       btrfs_free_path(path);
        return ret;
 }
 
+int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
+{
+       int ret = 0;
+
+       mutex_lock(&fs_info->balance_mutex);
+       if (!fs_info->balance_ctl) {
+               mutex_unlock(&fs_info->balance_mutex);
+               return -ENOTCONN;
+       }
+
+       if (atomic_read(&fs_info->balance_running)) {
+               atomic_inc(&fs_info->balance_pause_req);
+               mutex_unlock(&fs_info->balance_mutex);
+
+               wait_event(fs_info->balance_wait_q,
+                          atomic_read(&fs_info->balance_running) == 0);
+
+               mutex_lock(&fs_info->balance_mutex);
+               /* we are good with balance_ctl ripped off from under us */
+               BUG_ON(atomic_read(&fs_info->balance_running));
+               atomic_dec(&fs_info->balance_pause_req);
+       } else {
+               ret = -ENOTCONN;
+       }
+
+       mutex_unlock(&fs_info->balance_mutex);
+       return ret;
+}
+
+int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
+{
+       mutex_lock(&fs_info->balance_mutex);
+       if (!fs_info->balance_ctl) {
+               mutex_unlock(&fs_info->balance_mutex);
+               return -ENOTCONN;
+       }
+
+       atomic_inc(&fs_info->balance_cancel_req);
+       /*
+        * if we are running just wait and return, balance item is
+        * deleted in btrfs_balance in this case
+        */
+       if (atomic_read(&fs_info->balance_running)) {
+               mutex_unlock(&fs_info->balance_mutex);
+               wait_event(fs_info->balance_wait_q,
+                          atomic_read(&fs_info->balance_running) == 0);
+               mutex_lock(&fs_info->balance_mutex);
+       } else {
+               /* __cancel_balance needs volume_mutex */
+               mutex_unlock(&fs_info->balance_mutex);
+               mutex_lock(&fs_info->volume_mutex);
+               mutex_lock(&fs_info->balance_mutex);
+
+               if (fs_info->balance_ctl)
+                       __cancel_balance(fs_info);
+
+               mutex_unlock(&fs_info->volume_mutex);
+       }
+
+       BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
+       atomic_dec(&fs_info->balance_cancel_req);
+       mutex_unlock(&fs_info->balance_mutex);
+       return 0;
+}
+
 /*
  * shrinking a device means finding all of the device extents past
  * the new size, and then following the back refs to the chunks.
@@ -2790,8 +3042,7 @@ done:
        return ret;
 }
 
-static int btrfs_add_system_chunk(struct btrfs_trans_handle *trans,
-                          struct btrfs_root *root,
+static int btrfs_add_system_chunk(struct btrfs_root *root,
                           struct btrfs_key *key,
                           struct btrfs_chunk *chunk, int item_size)
 {
@@ -2908,7 +3159,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                max_stripe_size = 1024 * 1024 * 1024;
                max_chunk_size = 10 * max_stripe_size;
        } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
-               max_stripe_size = 256 * 1024 * 1024;
+               /* for larger filesystems, use larger metadata chunks */
+               if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024)
+                       max_stripe_size = 1024 * 1024 * 1024;
+               else
+                       max_stripe_size = 256 * 1024 * 1024;
                max_chunk_size = max_stripe_size;
        } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
                max_stripe_size = 8 * 1024 * 1024;
@@ -2963,7 +3218,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                if (total_avail == 0)
                        continue;
 
-               ret = find_free_dev_extent(trans, device,
+               ret = find_free_dev_extent(device,
                                           max_stripe_size * dev_stripes,
                                           &dev_offset, &max_avail);
                if (ret && ret != -ENOSPC)
@@ -3154,7 +3409,7 @@ static int __finish_chunk_alloc(struct btrfs_trans_handle *trans,
        BUG_ON(ret);
 
        if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
-               ret = btrfs_add_system_chunk(trans, chunk_root, &key, chunk,
+               ret = btrfs_add_system_chunk(chunk_root, &key, chunk,
                                             item_size);
                BUG_ON(ret);
        }
@@ -3366,26 +3621,13 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
        u64 stripe_nr;
        u64 stripe_nr_orig;
        u64 stripe_nr_end;
-       int stripes_allocated = 8;
-       int stripes_required = 1;
        int stripe_index;
        int i;
+       int ret = 0;
        int num_stripes;
        int max_errors = 0;
        struct btrfs_bio *bbio = NULL;
 
-       if (bbio_ret && !(rw & (REQ_WRITE | REQ_DISCARD)))
-               stripes_allocated = 1;
-again:
-       if (bbio_ret) {
-               bbio = kzalloc(btrfs_bio_size(stripes_allocated),
-                               GFP_NOFS);
-               if (!bbio)
-                       return -ENOMEM;
-
-               atomic_set(&bbio->error, 0);
-       }
-
        read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree, logical, *length);
        read_unlock(&em_tree->lock);
@@ -3404,28 +3646,6 @@ again:
        if (mirror_num > map->num_stripes)
                mirror_num = 0;
 
-       /* if our btrfs_bio struct is too small, back off and try again */
-       if (rw & REQ_WRITE) {
-               if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
-                                BTRFS_BLOCK_GROUP_DUP)) {
-                       stripes_required = map->num_stripes;
-                       max_errors = 1;
-               } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
-                       stripes_required = map->sub_stripes;
-                       max_errors = 1;
-               }
-       }
-       if (rw & REQ_DISCARD) {
-               if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK)
-                       stripes_required = map->num_stripes;
-       }
-       if (bbio_ret && (rw & (REQ_WRITE | REQ_DISCARD)) &&
-           stripes_allocated < stripes_required) {
-               stripes_allocated = map->num_stripes;
-               free_extent_map(em);
-               kfree(bbio);
-               goto again;
-       }
        stripe_nr = offset;
        /*
         * stripe_nr counts the total number of stripes we have to stride
@@ -3517,81 +3737,55 @@ again:
        }
        BUG_ON(stripe_index >= map->num_stripes);
 
+       bbio = kzalloc(btrfs_bio_size(num_stripes), GFP_NOFS);
+       if (!bbio) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       atomic_set(&bbio->error, 0);
+
        if (rw & REQ_DISCARD) {
+               int factor = 0;
+               int sub_stripes = 0;
+               u64 stripes_per_dev = 0;
+               u32 remaining_stripes = 0;
+
+               if (map->type &
+                   (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) {
+                       if (map->type & BTRFS_BLOCK_GROUP_RAID0)
+                               sub_stripes = 1;
+                       else
+                               sub_stripes = map->sub_stripes;
+
+                       factor = map->num_stripes / sub_stripes;
+                       stripes_per_dev = div_u64_rem(stripe_nr_end -
+                                                     stripe_nr_orig,
+                                                     factor,
+                                                     &remaining_stripes);
+               }
+
                for (i = 0; i < num_stripes; i++) {
                        bbio->stripes[i].physical =
                                map->stripes[stripe_index].physical +
                                stripe_offset + stripe_nr * map->stripe_len;
                        bbio->stripes[i].dev = map->stripes[stripe_index].dev;
 
-                       if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
-                               u64 stripes;
-                               u32 last_stripe = 0;
-                               int j;
-
-                               div_u64_rem(stripe_nr_end - 1,
-                                           map->num_stripes,
-                                           &last_stripe);
-
-                               for (j = 0; j < map->num_stripes; j++) {
-                                       u32 test;
-
-                                       div_u64_rem(stripe_nr_end - 1 - j,
-                                                   map->num_stripes, &test);
-                                       if (test == stripe_index)
-                                               break;
-                               }
-                               stripes = stripe_nr_end - 1 - j;
-                               do_div(stripes, map->num_stripes);
-                               bbio->stripes[i].length = map->stripe_len *
-                                       (stripes - stripe_nr + 1);
-
-                               if (i == 0) {
-                                       bbio->stripes[i].length -=
-                                               stripe_offset;
-                                       stripe_offset = 0;
-                               }
-                               if (stripe_index == last_stripe)
-                                       bbio->stripes[i].length -=
-                                               stripe_end_offset;
-                       } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
-                               u64 stripes;
-                               int j;
-                               int factor = map->num_stripes /
-                                            map->sub_stripes;
-                               u32 last_stripe = 0;
-
-                               div_u64_rem(stripe_nr_end - 1,
-                                           factor, &last_stripe);
-                               last_stripe *= map->sub_stripes;
-
-                               for (j = 0; j < factor; j++) {
-                                       u32 test;
-
-                                       div_u64_rem(stripe_nr_end - 1 - j,
-                                                   factor, &test);
-
-                                       if (test ==
-                                           stripe_index / map->sub_stripes)
-                                               break;
-                               }
-                               stripes = stripe_nr_end - 1 - j;
-                               do_div(stripes, factor);
-                               bbio->stripes[i].length = map->stripe_len *
-                                       (stripes - stripe_nr + 1);
-
-                               if (i < map->sub_stripes) {
+                       if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
+                                        BTRFS_BLOCK_GROUP_RAID10)) {
+                               bbio->stripes[i].length = stripes_per_dev *
+                                                         map->stripe_len;
+                               if (i / sub_stripes < remaining_stripes)
+                                       bbio->stripes[i].length +=
+                                               map->stripe_len;
+                               if (i < sub_stripes)
                                        bbio->stripes[i].length -=
                                                stripe_offset;
-                                       if (i == map->sub_stripes - 1)
-                                               stripe_offset = 0;
-                               }
-                               if (stripe_index >= last_stripe &&
-                                   stripe_index <= (last_stripe +
-                                                    map->sub_stripes - 1)) {
+                               if ((i / sub_stripes + 1) %
+                                   sub_stripes == remaining_stripes)
                                        bbio->stripes[i].length -=
                                                stripe_end_offset;
-                               }
+                               if (i == sub_stripes - 1)
+                                       stripe_offset = 0;
                        } else
                                bbio->stripes[i].length = *length;
 
@@ -3613,15 +3807,22 @@ again:
                        stripe_index++;
                }
        }
-       if (bbio_ret) {
-               *bbio_ret = bbio;
-               bbio->num_stripes = num_stripes;
-               bbio->max_errors = max_errors;
-               bbio->mirror_num = mirror_num;
+
+       if (rw & REQ_WRITE) {
+               if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
+                                BTRFS_BLOCK_GROUP_RAID10 |
+                                BTRFS_BLOCK_GROUP_DUP)) {
+                       max_errors = 1;
+               }
        }
+
+       *bbio_ret = bbio;
+       bbio->num_stripes = num_stripes;
+       bbio->max_errors = max_errors;
+       bbio->mirror_num = mirror_num;
 out:
        free_extent_map(em);
-       return 0;
+       return ret;
 }
 
 int btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw,
@@ -3762,7 +3963,7 @@ static noinline int schedule_bio(struct btrfs_root *root,
        /* don't bother with additional async steps for reads, right now */
        if (!(rw & REQ_WRITE)) {
                bio_get(bio);
-               submit_bio(rw, bio);
+               btrfsic_submit_bio(rw, bio);
                bio_put(bio);
                return 0;
        }
@@ -3857,7 +4058,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
                        if (async_submit)
                                schedule_bio(root, dev, rw, bio);
                        else
-                               submit_bio(rw, bio);
+                               btrfsic_submit_bio(rw, bio);
                } else {
                        bio->bi_bdev = root->fs_info->fs_devices->latest_bdev;
                        bio->bi_sector = logical >> 9;
@@ -4026,7 +4227,7 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
        struct btrfs_fs_devices *fs_devices;
        int ret;
 
-       mutex_lock(&uuid_mutex);
+       BUG_ON(!mutex_is_locked(&uuid_mutex));
 
        fs_devices = root->fs_info->fs_devices->seed;
        while (fs_devices) {
@@ -4064,7 +4265,6 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid)
        fs_devices->seed = root->fs_info->fs_devices->seed;
        root->fs_info->fs_devices->seed = fs_devices;
 out:
-       mutex_unlock(&uuid_mutex);
        return ret;
 }
 
@@ -4207,6 +4407,9 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
        if (!path)
                return -ENOMEM;
 
+       mutex_lock(&uuid_mutex);
+       lock_chunks(root);
+
        /* first we search for all of the device items, and then we
         * read in all of the chunk items.  This way we can create chunk
         * mappings that reference all of the devices that are afound
@@ -4257,6 +4460,9 @@ again:
        }
        ret = 0;
 error:
+       unlock_chunks(root);
+       mutex_unlock(&uuid_mutex);
+
        btrfs_free_path(path);
        return ret;
 }