Btrfs: fix chunk allocation error handling
[cascardo/linux.git] / fs / btrfs / volumes.c
index 4696098..13efbcf 100644 (file)
@@ -792,26 +792,77 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
        return ret;
 }
 
+/*
+ * Look for a btrfs signature on a device. This may be called out of the mount path
+ * and we are not allowed to call set_blocksize during the scan. The superblock
+ * is read via pagecache
+ */
 int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
                          struct btrfs_fs_devices **fs_devices_ret)
 {
        struct btrfs_super_block *disk_super;
        struct block_device *bdev;
-       struct buffer_head *bh;
-       int ret;
+       struct page *page;
+       void *p;
+       int ret = -EINVAL;
        u64 devid;
        u64 transid;
        u64 total_devices;
+       u64 bytenr;
+       pgoff_t index;
 
+       /*
+        * we would like to check all the supers, but that would make
+        * a btrfs mount succeed after a mkfs from a different FS.
+        * So, we need to add a special mount option to scan for
+        * later supers, using BTRFS_SUPER_MIRROR_MAX instead
+        */
+       bytenr = btrfs_sb_offset(0);
        flags |= FMODE_EXCL;
        mutex_lock(&uuid_mutex);
-       ret = btrfs_get_bdev_and_sb(path, flags, holder, 0, &bdev, &bh);
-       if (ret)
+
+       bdev = blkdev_get_by_path(path, flags, holder);
+
+       if (IS_ERR(bdev)) {
+               ret = PTR_ERR(bdev);
+               printk(KERN_INFO "btrfs: open %s failed\n", path);
                goto error;
-       disk_super = (struct btrfs_super_block *)bh->b_data;
+       }
+
+       /* make sure our super fits in the device */
+       if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode))
+               goto error_bdev_put;
+
+       /* make sure our super fits in the page */
+       if (sizeof(*disk_super) > PAGE_CACHE_SIZE)
+               goto error_bdev_put;
+
+       /* make sure our super doesn't straddle pages on disk */
+       index = bytenr >> PAGE_CACHE_SHIFT;
+       if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index)
+               goto error_bdev_put;
+
+       /* pull in the page with our super */
+       page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
+                                  index, GFP_NOFS);
+
+       if (IS_ERR_OR_NULL(page))
+               goto error_bdev_put;
+
+       p = kmap(page);
+
+       /* align our pointer to the offset of the super block */
+       disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
+
+       if (btrfs_super_bytenr(disk_super) != bytenr ||
+           strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
+                   sizeof(disk_super->magic)))
+               goto error_unmap;
+
        devid = btrfs_stack_device_id(&disk_super->dev_item);
        transid = btrfs_super_generation(disk_super);
        total_devices = btrfs_super_num_devices(disk_super);
+
        if (disk_super->label[0]) {
                if (disk_super->label[BTRFS_LABEL_SIZE - 1])
                        disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0';
@@ -819,12 +870,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
        } else {
                printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
        }
+
        printk(KERN_CONT "devid %llu transid %llu %s\n",
               (unsigned long long)devid, (unsigned long long)transid, path);
+
        ret = device_list_add(path, disk_super, devid, fs_devices_ret);
        if (!ret && fs_devices_ret)
                (*fs_devices_ret)->total_devices = total_devices;
-       brelse(bh);
+
+error_unmap:
+       kunmap(page);
+       page_cache_release(page);
+
+error_bdev_put:
        blkdev_put(bdev, flags);
 error:
        mutex_unlock(&uuid_mutex);
@@ -1372,14 +1430,19 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        u64 devid;
        u64 num_devices;
        u8 *dev_uuid;
+       unsigned seq;
        int ret = 0;
        bool clear_super = false;
 
        mutex_lock(&uuid_mutex);
 
-       all_avail = root->fs_info->avail_data_alloc_bits |
-               root->fs_info->avail_system_alloc_bits |
-               root->fs_info->avail_metadata_alloc_bits;
+       do {
+               seq = read_seqbegin(&root->fs_info->profiles_lock);
+
+               all_avail = root->fs_info->avail_data_alloc_bits |
+                           root->fs_info->avail_system_alloc_bits |
+                           root->fs_info->avail_metadata_alloc_bits;
+       } while (read_seqretry(&root->fs_info->profiles_lock, seq));
 
        num_devices = root->fs_info->fs_devices->num_devices;
        btrfs_dev_replace_lock(&root->fs_info->dev_replace);
@@ -1556,7 +1619,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        ret = 0;
 
        /* Notify udev that device has changed */
-       btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
+       if (bdev)
+               btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
 
 error_brelse:
        brelse(bh);
@@ -2984,6 +3048,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        int mixed = 0;
        int ret;
        u64 num_devices;
+       unsigned seq;
 
        if (btrfs_fs_closing(fs_info) ||
            atomic_read(&fs_info->balance_pause_req) ||
@@ -3067,22 +3132,26 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
        /* allow to reduce meta or sys integrity only if force set */
        allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
                        BTRFS_BLOCK_GROUP_RAID10;
-       if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-            (fs_info->avail_system_alloc_bits & allowed) &&
-            !(bctl->sys.target & allowed)) ||
-           ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
-            (fs_info->avail_metadata_alloc_bits & allowed) &&
-            !(bctl->meta.target & allowed))) {
-               if (bctl->flags & BTRFS_BALANCE_FORCE) {
-                       printk(KERN_INFO "btrfs: force reducing metadata "
-                              "integrity\n");
-               } else {
-                       printk(KERN_ERR "btrfs: balance will reduce metadata "
-                              "integrity, use force if you want this\n");
-                       ret = -EINVAL;
-                       goto out;
+       do {
+               seq = read_seqbegin(&fs_info->profiles_lock);
+
+               if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+                    (fs_info->avail_system_alloc_bits & allowed) &&
+                    !(bctl->sys.target & allowed)) ||
+                   ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
+                    (fs_info->avail_metadata_alloc_bits & allowed) &&
+                    !(bctl->meta.target & allowed))) {
+                       if (bctl->flags & BTRFS_BALANCE_FORCE) {
+                               printk(KERN_INFO "btrfs: force reducing metadata "
+                                      "integrity\n");
+                       } else {
+                               printk(KERN_ERR "btrfs: balance will reduce metadata "
+                                      "integrity, use force if you want this\n");
+                               ret = -EINVAL;
+                               goto out;
+                       }
                }
-       }
+       } while (read_seqretry(&fs_info->profiles_lock, seq));
 
        if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) {
                int num_tolerated_disk_barrier_failures;
@@ -3503,13 +3572,48 @@ static int btrfs_cmp_device_info(const void *a, const void *b)
 }
 
 struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
-       { 2, 1, 0, 4, 2, 2 /* raid10 */ },
-       { 1, 1, 2, 2, 2, 2 /* raid1 */ },
-       { 1, 2, 1, 1, 1, 2 /* dup */ },
-       { 1, 1, 0, 2, 1, 1 /* raid0 */ },
-       { 1, 1, 0, 1, 1, 1 /* single */ },
+       [BTRFS_RAID_RAID10] = {
+               .sub_stripes    = 2,
+               .dev_stripes    = 1,
+               .devs_max       = 0,    /* 0 == as many as possible */
+               .devs_min       = 4,
+               .devs_increment = 2,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_RAID1] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 2,
+               .devs_min       = 2,
+               .devs_increment = 2,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_DUP] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 2,
+               .devs_max       = 1,
+               .devs_min       = 1,
+               .devs_increment = 1,
+               .ncopies        = 2,
+       },
+       [BTRFS_RAID_RAID0] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 0,
+               .devs_min       = 2,
+               .devs_increment = 1,
+               .ncopies        = 1,
+       },
+       [BTRFS_RAID_SINGLE] = {
+               .sub_stripes    = 1,
+               .dev_stripes    = 1,
+               .devs_max       = 1,
+               .devs_min       = 1,
+               .devs_increment = 1,
+               .ncopies        = 1,
+       },
 };
-
 static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                               struct btrfs_root *extent_root,
                               struct map_lookup **map_ret,
@@ -3721,12 +3825,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
        if (ret)
                goto error;
 
-       ret = btrfs_make_block_group(trans, extent_root, 0, type,
-                                    BTRFS_FIRST_CHUNK_TREE_OBJECTID,
-                                    start, num_bytes);
-       if (ret)
-               goto error;
-
        for (i = 0; i < map->num_stripes; ++i) {
                struct btrfs_device *device;
                u64 dev_offset;
@@ -3738,15 +3836,33 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
                                info->chunk_root->root_key.objectid,
                                BTRFS_FIRST_CHUNK_TREE_OBJECTID,
                                start, dev_offset, stripe_size);
-               if (ret) {
-                       btrfs_abort_transaction(trans, extent_root, ret);
-                       goto error;
-               }
+               if (ret)
+                       goto error_dev_extent;
+       }
+
+       ret = btrfs_make_block_group(trans, extent_root, 0, type,
+                                    BTRFS_FIRST_CHUNK_TREE_OBJECTID,
+                                    start, num_bytes);
+       if (ret) {
+               i = map->num_stripes - 1;
+               goto error_dev_extent;
        }
 
        kfree(devices_info);
        return 0;
 
+error_dev_extent:
+       for (; i >= 0; i--) {
+               struct btrfs_device *device;
+               int err;
+
+               device = map->stripes[i].dev;
+               err = btrfs_free_dev_extent(trans, device, start);
+               if (err) {
+                       btrfs_abort_transaction(trans, extent_root, err);
+                       break;
+               }
+       }
 error:
        kfree(map);
        kfree(devices_info);
@@ -3886,10 +4002,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
        if (ret)
                return ret;
 
-       alloc_profile = BTRFS_BLOCK_GROUP_METADATA |
-                               fs_info->avail_metadata_alloc_bits;
-       alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
-
+       alloc_profile = btrfs_get_alloc_profile(extent_root, 0);
        ret = __btrfs_alloc_chunk(trans, extent_root, &map, &chunk_size,
                                  &stripe_size, chunk_offset, alloc_profile);
        if (ret)
@@ -3897,10 +4010,7 @@ static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
 
        sys_chunk_offset = chunk_offset + chunk_size;
 
-       alloc_profile = BTRFS_BLOCK_GROUP_SYSTEM |
-                               fs_info->avail_system_alloc_bits;
-       alloc_profile = btrfs_reduce_alloc_profile(root, alloc_profile);
-
+       alloc_profile = btrfs_get_alloc_profile(fs_info->chunk_root, 0);
        ret = __btrfs_alloc_chunk(trans, extent_root, &sys_map,
                                  &sys_chunk_size, &sys_stripe_size,
                                  sys_chunk_offset, alloc_profile);