Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 4 Jun 2016 18:56:28 +0000 (11:56 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 4 Jun 2016 18:56:28 +0000 (11:56 -0700)
Pull btrfs fixes from Chris Mason:
 "The important part of this pull is Filipe's set of fixes for btrfs
  device replacement.  Filipe fixed a few issues seen on the list and a
  number he found on his own"

* 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: deal with duplciates during extent_map insertion in btrfs_get_extent
  Btrfs: fix race between device replace and read repair
  Btrfs: fix race between device replace and discard
  Btrfs: fix race between device replace and chunk allocation
  Btrfs: fix race setting block group back to RW mode during device replace
  Btrfs: fix unprotected assignment of the left cursor for device replace
  Btrfs: fix race setting block group readonly during device replace
  Btrfs: fix race between device replace and block group removal
  Btrfs: fix race between readahead and device replace/removal

fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/inode.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/reada.c
fs/btrfs/scrub.c
fs/btrfs/volumes.c

index a400951..689d25a 100644 (file)
@@ -2042,6 +2042,11 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
        struct btrfs_bio *bbio = NULL;
 
 
+       /*
+        * Avoid races with device replace and make sure our bbio has devices
+        * associated to its stripes that don't go away while we are discarding.
+        */
+       btrfs_bio_counter_inc_blocked(root->fs_info);
        /* Tell the block device(s) that the sectors can be discarded */
        ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
                              bytenr, &num_bytes, &bbio, 0);
@@ -2074,6 +2079,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
                }
                btrfs_put_bbio(bbio);
        }
+       btrfs_bio_counter_dec(root->fs_info);
 
        if (actual_bytes)
                *actual_bytes = discarded_bytes;
index 3cd5782..6e953de 100644 (file)
@@ -2025,9 +2025,16 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
        bio->bi_iter.bi_size = 0;
        map_length = length;
 
+       /*
+        * Avoid races with device replace and make sure our bbio has devices
+        * associated to its stripes that don't go away while we are doing the
+        * read repair operation.
+        */
+       btrfs_bio_counter_inc_blocked(fs_info);
        ret = btrfs_map_block(fs_info, WRITE, logical,
                              &map_length, &bbio, mirror_num);
        if (ret) {
+               btrfs_bio_counter_dec(fs_info);
                bio_put(bio);
                return -EIO;
        }
@@ -2037,6 +2044,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
        dev = bbio->stripes[mirror_num-1].dev;
        btrfs_put_bbio(bbio);
        if (!dev || !dev->bdev || !dev->writeable) {
+               btrfs_bio_counter_dec(fs_info);
                bio_put(bio);
                return -EIO;
        }
@@ -2045,6 +2053,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
 
        if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
                /* try to remap that extent elsewhere? */
+               btrfs_bio_counter_dec(fs_info);
                bio_put(bio);
                btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
                return -EIO;
@@ -2054,6 +2063,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
                "read error corrected: ino %llu off %llu (dev %s sector %llu)",
                                  btrfs_ino(inode), start,
                                  rcu_str_deref(dev->name), sector);
+       btrfs_bio_counter_dec(fs_info);
        bio_put(bio);
        return 0;
 }
index 2704995..8b1212e 100644 (file)
@@ -6979,7 +6979,18 @@ insert:
                 * existing will always be non-NULL, since there must be
                 * extent causing the -EEXIST.
                 */
-               if (start >= extent_map_end(existing) ||
+               if (existing->start == em->start &&
+                   extent_map_end(existing) == extent_map_end(em) &&
+                   em->block_start == existing->block_start) {
+                       /*
+                        * these two extents are the same, it happens
+                        * with inlines especially
+                        */
+                       free_extent_map(em);
+                       em = existing;
+                       err = 0;
+
+               } else if (start >= extent_map_end(existing) ||
                    start <= existing->start) {
                        /*
                         * The existing extent map is the one nearest to
index 5591704..e96634a 100644 (file)
@@ -718,12 +718,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
        return count;
 }
 
-void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
                              const u64 range_start, const u64 range_len)
 {
        struct btrfs_root *root;
        struct list_head splice;
        int done;
+       int total_done = 0;
 
        INIT_LIST_HEAD(&splice);
 
@@ -742,6 +743,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
                done = btrfs_wait_ordered_extents(root, nr,
                                                  range_start, range_len);
                btrfs_put_fs_root(root);
+               total_done += done;
 
                spin_lock(&fs_info->ordered_root_lock);
                if (nr != -1) {
@@ -752,6 +754,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
        list_splice_tail(&splice, &fs_info->ordered_roots);
        spin_unlock(&fs_info->ordered_root_lock);
        mutex_unlock(&fs_info->ordered_operations_mutex);
+
+       return total_done;
 }
 
 /*
index 2049c9b..4515077 100644 (file)
@@ -199,7 +199,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
                           u32 *sum, int len);
 int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
                               const u64 range_start, const u64 range_len);
-void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
+int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
                              const u64 range_start, const u64 range_len);
 void btrfs_get_logged_extents(struct inode *inode,
                              struct list_head *logged_list,
index 298631e..8428db7 100644 (file)
@@ -761,12 +761,14 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
 
        do {
                enqueued = 0;
+               mutex_lock(&fs_devices->device_list_mutex);
                list_for_each_entry(device, &fs_devices->devices, dev_list) {
                        if (atomic_read(&device->reada_in_flight) <
                            MAX_IN_FLIGHT)
                                enqueued += reada_start_machine_dev(fs_info,
                                                                    device);
                }
+               mutex_unlock(&fs_devices->device_list_mutex);
                total += enqueued;
        } while (enqueued && total < 10000);
 
index 46d847f..70427ef 100644 (file)
@@ -3582,6 +3582,46 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                 */
                scrub_pause_on(fs_info);
                ret = btrfs_inc_block_group_ro(root, cache);
+               if (!ret && is_dev_replace) {
+                       /*
+                        * If we are doing a device replace wait for any tasks
+                        * that started dellaloc right before we set the block
+                        * group to RO mode, as they might have just allocated
+                        * an extent from it or decided they could do a nocow
+                        * write. And if any such tasks did that, wait for their
+                        * ordered extents to complete and then commit the
+                        * current transaction, so that we can later see the new
+                        * extent items in the extent tree - the ordered extents
+                        * create delayed data references (for cow writes) when
+                        * they complete, which will be run and insert the
+                        * corresponding extent items into the extent tree when
+                        * we commit the transaction they used when running
+                        * inode.c:btrfs_finish_ordered_io(). We later use
+                        * the commit root of the extent tree to find extents
+                        * to copy from the srcdev into the tgtdev, and we don't
+                        * want to miss any new extents.
+                        */
+                       btrfs_wait_block_group_reservations(cache);
+                       btrfs_wait_nocow_writers(cache);
+                       ret = btrfs_wait_ordered_roots(fs_info, -1,
+                                                      cache->key.objectid,
+                                                      cache->key.offset);
+                       if (ret > 0) {
+                               struct btrfs_trans_handle *trans;
+
+                               trans = btrfs_join_transaction(root);
+                               if (IS_ERR(trans))
+                                       ret = PTR_ERR(trans);
+                               else
+                                       ret = btrfs_commit_transaction(trans,
+                                                                      root);
+                               if (ret) {
+                                       scrub_pause_off(fs_info);
+                                       btrfs_put_block_group(cache);
+                                       break;
+                               }
+                       }
+               }
                scrub_pause_off(fs_info);
 
                if (ret == 0) {
@@ -3602,9 +3642,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                        break;
                }
 
+               btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
                dev_replace->cursor_right = found_key.offset + length;
                dev_replace->cursor_left = found_key.offset;
                dev_replace->item_needs_writeback = 1;
+               btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
                ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
                                  found_key.offset, cache, is_dev_replace);
 
@@ -3640,6 +3682,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
 
                scrub_pause_off(fs_info);
 
+               btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
+               dev_replace->cursor_left = dev_replace->cursor_right;
+               dev_replace->item_needs_writeback = 1;
+               btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
+
                if (ro_set)
                        btrfs_dec_block_group_ro(root, cache);
 
@@ -3677,9 +3724,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                        ret = -ENOMEM;
                        break;
                }
-
-               dev_replace->cursor_left = dev_replace->cursor_right;
-               dev_replace->item_needs_writeback = 1;
 skip:
                key.offset = found_key.offset + length;
                btrfs_release_path(path);
index bdc6256..da9e003 100644 (file)
@@ -2761,6 +2761,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
        u64 dev_extent_len = 0;
        u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
        int i, ret = 0;
+       struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
 
        /* Just in case */
        root = root->fs_info->chunk_root;
@@ -2787,12 +2788,19 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
        check_system_chunk(trans, extent_root, map->type);
        unlock_chunks(root->fs_info->chunk_root);
 
+       /*
+        * Take the device list mutex to prevent races with the final phase of
+        * a device replace operation that replaces the device object associated
+        * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
+        */
+       mutex_lock(&fs_devices->device_list_mutex);
        for (i = 0; i < map->num_stripes; i++) {
                struct btrfs_device *device = map->stripes[i].dev;
                ret = btrfs_free_dev_extent(trans, device,
                                            map->stripes[i].physical,
                                            &dev_extent_len);
                if (ret) {
+                       mutex_unlock(&fs_devices->device_list_mutex);
                        btrfs_abort_transaction(trans, root, ret);
                        goto out;
                }
@@ -2811,11 +2819,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
                if (map->stripes[i].dev) {
                        ret = btrfs_update_device(trans, map->stripes[i].dev);
                        if (ret) {
+                               mutex_unlock(&fs_devices->device_list_mutex);
                                btrfs_abort_transaction(trans, root, ret);
                                goto out;
                        }
                }
        }
+       mutex_unlock(&fs_devices->device_list_mutex);
+
        ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
        if (ret) {
                btrfs_abort_transaction(trans, root, ret);
@@ -5762,20 +5773,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
                        }
                }
                if (found) {
-                       if (physical_of_found + map->stripe_len <=
-                           dev_replace->cursor_left) {
-                               struct btrfs_bio_stripe *tgtdev_stripe =
-                                       bbio->stripes + num_stripes;
+                       struct btrfs_bio_stripe *tgtdev_stripe =
+                               bbio->stripes + num_stripes;
 
-                               tgtdev_stripe->physical = physical_of_found;
-                               tgtdev_stripe->length =
-                                       bbio->stripes[index_srcdev].length;
-                               tgtdev_stripe->dev = dev_replace->tgtdev;
-                               bbio->tgtdev_map[index_srcdev] = num_stripes;
+                       tgtdev_stripe->physical = physical_of_found;
+                       tgtdev_stripe->length =
+                               bbio->stripes[index_srcdev].length;
+                       tgtdev_stripe->dev = dev_replace->tgtdev;
+                       bbio->tgtdev_map[index_srcdev] = num_stripes;
 
-                               tgtdev_indexes++;
-                               num_stripes++;
-                       }
+                       tgtdev_indexes++;
+                       num_stripes++;
                }
        }