Merge tag 'metag-for-v4.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jhoga...
[cascardo/linux.git] / fs / btrfs / disk-io.c
index 54bc8c7..e720d3e 100644 (file)
@@ -326,8 +326,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
 
                        read_extent_buffer(buf, &val, 0, csum_size);
                        btrfs_warn_rl(fs_info,
-                               "%s checksum verify failed on %llu wanted %X found %X "
-                               "level %d",
+                               "%s checksum verify failed on %llu wanted %X found %X level %d",
                                fs_info->sb->s_id, buf->start,
                                val, found, btrfs_header_level(buf));
                        if (result != (char *)&inline_result)
@@ -402,7 +401,8 @@ out:
  * Return 0 if the superblock checksum type matches the checksum value of that
  * algorithm. Pass the raw disk superblock data.
  */
-static int btrfs_check_super_csum(char *raw_disk_sb)
+static int btrfs_check_super_csum(struct btrfs_fs_info *fs_info,
+                                 char *raw_disk_sb)
 {
        struct btrfs_super_block *disk_sb =
                (struct btrfs_super_block *)raw_disk_sb;
@@ -428,7 +428,7 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
        }
 
        if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
-               printk(KERN_ERR "BTRFS: unsupported checksum algorithm %u\n",
+               btrfs_err(fs_info, "unsupported checksum algorithm %u",
                                csum_type);
                ret = 1;
        }
@@ -442,7 +442,7 @@ static int btrfs_check_super_csum(char *raw_disk_sb)
  */
 static int btree_read_extent_buffer_pages(struct btrfs_root *root,
                                          struct extent_buffer *eb,
-                                         u64 start, u64 parent_transid)
+                                         u64 parent_transid)
 {
        struct extent_io_tree *io_tree;
        int failed = 0;
@@ -454,8 +454,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
        clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags);
        io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree;
        while (1) {
-               ret = read_extent_buffer_pages(io_tree, eb, start,
-                                              WAIT_COMPLETE,
+               ret = read_extent_buffer_pages(io_tree, eb, WAIT_COMPLETE,
                                               btree_get_extent, mirror_num);
                if (!ret) {
                        if (!verify_parent_transid(io_tree, eb,
@@ -547,9 +546,10 @@ static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
 }
 
 #define CORRUPT(reason, eb, root, slot)                                \
-       btrfs_crit(root->fs_info, "corrupt leaf, %s: block=%llu,"       \
-                  "root=%llu, slot=%d", reason,                        \
-              btrfs_header_bytenr(eb), root->objectid, slot)
+       btrfs_crit(root->fs_info, "corrupt %s, %s: block=%llu," \
+                  " root=%llu, slot=%d",                       \
+                  btrfs_header_level(eb) == 0 ? "leaf" : "node",\
+                  reason, btrfs_header_bytenr(eb), root->objectid, slot)
 
 static noinline int check_leaf(struct btrfs_root *root,
                               struct extent_buffer *leaf)
@@ -636,6 +636,10 @@ static noinline int check_leaf(struct btrfs_root *root,
 static int check_node(struct btrfs_root *root, struct extent_buffer *node)
 {
        unsigned long nr = btrfs_header_nritems(node);
+       struct btrfs_key key, next_key;
+       int slot;
+       u64 bytenr;
+       int ret = 0;
 
        if (nr == 0 || nr > BTRFS_NODEPTRS_PER_BLOCK(root)) {
                btrfs_crit(root->fs_info,
@@ -643,7 +647,26 @@ static int check_node(struct btrfs_root *root, struct extent_buffer *node)
                           node->start, root->objectid, nr);
                return -EIO;
        }
-       return 0;
+
+       for (slot = 0; slot < nr - 1; slot++) {
+               bytenr = btrfs_node_blockptr(node, slot);
+               btrfs_node_key_to_cpu(node, &key, slot);
+               btrfs_node_key_to_cpu(node, &next_key, slot + 1);
+
+               if (!bytenr) {
+                       CORRUPT("invalid item slot", node, root, slot);
+                       ret = -EIO;
+                       goto out;
+               }
+
+               if (btrfs_comp_cpu_keys(&key, &next_key) >= 0) {
+                       CORRUPT("bad key order", node, root, slot);
+                       ret = -EIO;
+                       goto out;
+               }
+       }
+out:
+       return ret;
 }
 
 static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
@@ -1132,7 +1155,7 @@ void readahead_tree_block(struct btrfs_root *root, u64 bytenr)
        if (IS_ERR(buf))
                return;
        read_extent_buffer_pages(&BTRFS_I(btree_inode)->io_tree,
-                                buf, 0, WAIT_NONE, btree_get_extent, 0);
+                                buf, WAIT_NONE, btree_get_extent, 0);
        free_extent_buffer(buf);
 }
 
@@ -1150,7 +1173,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr,
 
        set_bit(EXTENT_BUFFER_READAHEAD, &buf->bflags);
 
-       ret = read_extent_buffer_pages(io_tree, buf, 0, WAIT_PAGE_LOCK,
+       ret = read_extent_buffer_pages(io_tree, buf, WAIT_PAGE_LOCK,
                                       btree_get_extent, mirror_num);
        if (ret) {
                free_extent_buffer(buf);
@@ -1206,7 +1229,7 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
        if (IS_ERR(buf))
                return buf;
 
-       ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
+       ret = btree_read_extent_buffer_pages(root, buf, parent_transid);
        if (ret) {
                free_extent_buffer(buf);
                return ERR_PTR(ret);
@@ -1839,7 +1862,7 @@ static int cleaner_kthread(void *arg)
                 * Do not do anything if we might cause open_ctree() to block
                 * before we have finished mounting the filesystem.
                 */
-               if (!root->fs_info->open)
+               if (!test_bit(BTRFS_FS_OPEN, &root->fs_info->flags))
                        goto sleep;
 
                if (!mutex_trylock(&root->fs_info->cleaner_mutex))
@@ -2332,8 +2355,6 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
        fs_info->qgroup_op_tree = RB_ROOT;
        INIT_LIST_HEAD(&fs_info->dirty_qgroups);
        fs_info->qgroup_seq = 1;
-       fs_info->quota_enabled = 0;
-       fs_info->pending_quota_state = 0;
        fs_info->qgroup_ulist = NULL;
        fs_info->qgroup_rescan_running = false;
        mutex_init(&fs_info->qgroup_rescan_lock);
@@ -2518,8 +2539,7 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info,
        root = btrfs_read_tree_root(tree_root, &location);
        if (!IS_ERR(root)) {
                set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
-               fs_info->quota_enabled = 1;
-               fs_info->pending_quota_state = 1;
+               set_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags);
                fs_info->quota_root = root;
        }
 
@@ -2710,8 +2730,7 @@ int open_ctree(struct super_block *sb,
        extent_io_tree_init(&fs_info->freed_extents[1],
                             fs_info->btree_inode->i_mapping);
        fs_info->pinned_extents = &fs_info->freed_extents[0];
-       fs_info->do_barriers = 1;
-
+       set_bit(BTRFS_FS_BARRIER, &fs_info->flags);
 
        mutex_init(&fs_info->ordered_operations_mutex);
        mutex_init(&fs_info->tree_log_mutex);
@@ -2762,7 +2781,7 @@ int open_ctree(struct super_block *sb,
         * We want to check superblock checksum, the type is stored inside.
         * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
         */
-       if (btrfs_check_super_csum(bh->b_data)) {
+       if (btrfs_check_super_csum(fs_info, bh->b_data)) {
                btrfs_err(fs_info, "superblock checksum mismatch");
                err = -EINVAL;
                brelse(bh);
@@ -3199,10 +3218,9 @@ retry_root_backup:
                        return ret;
                }
        } else {
-               fs_info->update_uuid_tree_gen = 1;
+               set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
        }
-
-       fs_info->open = 1;
+       set_bit(BTRFS_FS_OPEN, &fs_info->flags);
 
        /*
         * backuproot only affect mount behavior, and if open_ctree succeeded,
@@ -3607,7 +3625,7 @@ int btrfs_get_num_tolerated_disk_barrier_failures(u64 flags)
        }
 
        if (min_tolerated == INT_MAX) {
-               pr_warn("BTRFS: unknown raid flag: %llu\n", flags);
+               pr_warn("BTRFS: unknown raid flag: %llu", flags);
                min_tolerated = 0;
        }
 
@@ -3893,8 +3911,7 @@ void close_ctree(struct btrfs_root *root)
        struct btrfs_fs_info *fs_info = root->fs_info;
        int ret;
 
-       fs_info->closing = 1;
-       smp_mb();
+       set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
 
        /* wait for the qgroup rescan worker to stop */
        btrfs_qgroup_wait_for_completion(fs_info, false);
@@ -3939,8 +3956,7 @@ void close_ctree(struct btrfs_root *root)
        kthread_stop(fs_info->transaction_kthread);
        kthread_stop(fs_info->cleaner_kthread);
 
-       fs_info->closing = 2;
-       smp_mb();
+       set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags);
 
        btrfs_free_qgroup_config(fs_info);
 
@@ -3965,7 +3981,7 @@ void close_ctree(struct btrfs_root *root)
        invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
        btrfs_stop_all_workers(fs_info);
 
-       fs_info->open = 0;
+       clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
        free_root_pointers(fs_info, 1);
 
        iput(fs_info->btree_inode);
@@ -4036,8 +4052,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
        root = BTRFS_I(buf->pages[0]->mapping->host)->root;
        btrfs_assert_tree_locked(buf);
        if (transid != root->fs_info->generation)
-               WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
-                      "found %llu running %llu\n",
+               WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, found %llu running %llu\n",
                        buf->start, transid, root->fs_info->generation);
        was_dirty = set_extent_buffer_dirty(buf);
        if (!was_dirty)
@@ -4088,7 +4103,7 @@ void btrfs_btree_balance_dirty_nodelay(struct btrfs_root *root)
 int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
 {
        struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root;
-       return btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
+       return btree_read_extent_buffer_pages(root, buf, parent_transid);
 }
 
 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
@@ -4100,24 +4115,24 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
        int ret = 0;
 
        if (btrfs_super_magic(sb) != BTRFS_MAGIC) {
-               printk(KERN_ERR "BTRFS: no valid FS found\n");
+               btrfs_err(fs_info, "no valid FS found");
                ret = -EINVAL;
        }
        if (btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP)
-               printk(KERN_WARNING "BTRFS: unrecognized super flag: %llu\n",
+               btrfs_warn(fs_info, "unrecognized super flag: %llu",
                                btrfs_super_flags(sb) & ~BTRFS_SUPER_FLAG_SUPP);
        if (btrfs_super_root_level(sb) >= BTRFS_MAX_LEVEL) {
-               printk(KERN_ERR "BTRFS: tree_root level too big: %d >= %d\n",
+               btrfs_err(fs_info, "tree_root level too big: %d >= %d",
                                btrfs_super_root_level(sb), BTRFS_MAX_LEVEL);
                ret = -EINVAL;
        }
        if (btrfs_super_chunk_root_level(sb) >= BTRFS_MAX_LEVEL) {
-               printk(KERN_ERR "BTRFS: chunk_root level too big: %d >= %d\n",
+               btrfs_err(fs_info, "chunk_root level too big: %d >= %d",
                                btrfs_super_chunk_root_level(sb), BTRFS_MAX_LEVEL);
                ret = -EINVAL;
        }
        if (btrfs_super_log_root_level(sb) >= BTRFS_MAX_LEVEL) {
-               printk(KERN_ERR "BTRFS: log_root level too big: %d >= %d\n",
+               btrfs_err(fs_info, "log_root level too big: %d >= %d",
                                btrfs_super_log_root_level(sb), BTRFS_MAX_LEVEL);
                ret = -EINVAL;
        }
@@ -4128,47 +4143,48 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
         */
        if (!is_power_of_2(sectorsize) || sectorsize < 4096 ||
            sectorsize > BTRFS_MAX_METADATA_BLOCKSIZE) {
-               printk(KERN_ERR "BTRFS: invalid sectorsize %llu\n", sectorsize);
+               btrfs_err(fs_info, "invalid sectorsize %llu", sectorsize);
                ret = -EINVAL;
        }
        /* Only PAGE SIZE is supported yet */
        if (sectorsize != PAGE_SIZE) {
-               printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n",
-                               sectorsize, PAGE_SIZE);
+               btrfs_err(fs_info,
+                       "sectorsize %llu not supported yet, only support %lu",
+                       sectorsize, PAGE_SIZE);
                ret = -EINVAL;
        }
        if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
            nodesize > BTRFS_MAX_METADATA_BLOCKSIZE) {
-               printk(KERN_ERR "BTRFS: invalid nodesize %llu\n", nodesize);
+               btrfs_err(fs_info, "invalid nodesize %llu", nodesize);
                ret = -EINVAL;
        }
        if (nodesize != le32_to_cpu(sb->__unused_leafsize)) {
-               printk(KERN_ERR "BTRFS: invalid leafsize %u, should be %llu\n",
-                               le32_to_cpu(sb->__unused_leafsize),
-                               nodesize);
+               btrfs_err(fs_info, "invalid leafsize %u, should be %llu",
+                         le32_to_cpu(sb->__unused_leafsize), nodesize);
                ret = -EINVAL;
        }
 
        /* Root alignment check */
        if (!IS_ALIGNED(btrfs_super_root(sb), sectorsize)) {
-               printk(KERN_WARNING "BTRFS: tree_root block unaligned: %llu\n",
-                               btrfs_super_root(sb));
+               btrfs_warn(fs_info, "tree_root block unaligned: %llu",
+                          btrfs_super_root(sb));
                ret = -EINVAL;
        }
        if (!IS_ALIGNED(btrfs_super_chunk_root(sb), sectorsize)) {
-               printk(KERN_WARNING "BTRFS: chunk_root block unaligned: %llu\n",
-                               btrfs_super_chunk_root(sb));
+               btrfs_warn(fs_info, "chunk_root block unaligned: %llu",
+                          btrfs_super_chunk_root(sb));
                ret = -EINVAL;
        }
        if (!IS_ALIGNED(btrfs_super_log_root(sb), sectorsize)) {
-               printk(KERN_WARNING "BTRFS: log_root block unaligned: %llu\n",
-                               btrfs_super_log_root(sb));
+               btrfs_warn(fs_info, "log_root block unaligned: %llu",
+                          btrfs_super_log_root(sb));
                ret = -EINVAL;
        }
 
        if (memcmp(fs_info->fsid, sb->dev_item.fsid, BTRFS_UUID_SIZE) != 0) {
-               printk(KERN_ERR "BTRFS: dev_item UUID does not match fsid: %pU != %pU\n",
-                               fs_info->fsid, sb->dev_item.fsid);
+               btrfs_err(fs_info,
+                          "dev_item UUID does not match fsid: %pU != %pU",
+                          fs_info->fsid, sb->dev_item.fsid);
                ret = -EINVAL;
        }
 
@@ -4178,25 +4194,25 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
         */
        if (btrfs_super_bytes_used(sb) < 6 * btrfs_super_nodesize(sb)) {
                btrfs_err(fs_info, "bytes_used is too small %llu",
-                      btrfs_super_bytes_used(sb));
+                         btrfs_super_bytes_used(sb));
                ret = -EINVAL;
        }
        if (!is_power_of_2(btrfs_super_stripesize(sb))) {
                btrfs_err(fs_info, "invalid stripesize %u",
-                      btrfs_super_stripesize(sb));
+                         btrfs_super_stripesize(sb));
                ret = -EINVAL;
        }
        if (btrfs_super_num_devices(sb) > (1UL << 31))
-               printk(KERN_WARNING "BTRFS: suspicious number of devices: %llu\n",
-                               btrfs_super_num_devices(sb));
+               btrfs_warn(fs_info, "suspicious number of devices: %llu",
+                          btrfs_super_num_devices(sb));
        if (btrfs_super_num_devices(sb) == 0) {
-               printk(KERN_ERR "BTRFS: number of devices is 0\n");
+               btrfs_err(fs_info, "number of devices is 0");
                ret = -EINVAL;
        }
 
        if (btrfs_super_bytenr(sb) != BTRFS_SUPER_INFO_OFFSET) {
-               printk(KERN_ERR "BTRFS: super offset mismatch %llu != %u\n",
-                               btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
+               btrfs_err(fs_info, "super offset mismatch %llu != %u",
+                         btrfs_super_bytenr(sb), BTRFS_SUPER_INFO_OFFSET);
                ret = -EINVAL;
        }
 
@@ -4205,17 +4221,17 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
         * and one chunk
         */
        if (btrfs_super_sys_array_size(sb) > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
-               printk(KERN_ERR "BTRFS: system chunk array too big %u > %u\n",
-                               btrfs_super_sys_array_size(sb),
-                               BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
+               btrfs_err(fs_info, "system chunk array too big %u > %u",
+                         btrfs_super_sys_array_size(sb),
+                         BTRFS_SYSTEM_CHUNK_ARRAY_SIZE);
                ret = -EINVAL;
        }
        if (btrfs_super_sys_array_size(sb) < sizeof(struct btrfs_disk_key)
                        + sizeof(struct btrfs_chunk)) {
-               printk(KERN_ERR "BTRFS: system chunk array too small %u < %zu\n",
-                               btrfs_super_sys_array_size(sb),
-                               sizeof(struct btrfs_disk_key)
-                               + sizeof(struct btrfs_chunk));
+               btrfs_err(fs_info, "system chunk array too small %u < %zu",
+                         btrfs_super_sys_array_size(sb),
+                         sizeof(struct btrfs_disk_key)
+                         + sizeof(struct btrfs_chunk));
                ret = -EINVAL;
        }
 
@@ -4224,14 +4240,16 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
         * but it's still possible that it's the one that's wrong.
         */
        if (btrfs_super_generation(sb) < btrfs_super_chunk_root_generation(sb))
-               printk(KERN_WARNING
-                       "BTRFS: suspicious: generation < chunk_root_generation: %llu < %llu\n",
-                       btrfs_super_generation(sb), btrfs_super_chunk_root_generation(sb));
+               btrfs_warn(fs_info,
+                       "suspicious: generation < chunk_root_generation: %llu < %llu",
+                       btrfs_super_generation(sb),
+                       btrfs_super_chunk_root_generation(sb));
        if (btrfs_super_generation(sb) < btrfs_super_cache_generation(sb)
            && btrfs_super_cache_generation(sb) != (u64)-1)
-               printk(KERN_WARNING
-                       "BTRFS: suspicious: generation < cache_generation: %llu < %llu\n",
-                       btrfs_super_generation(sb), btrfs_super_cache_generation(sb));
+               btrfs_warn(fs_info,
+                       "suspicious: generation < cache_generation: %llu < %llu",
+                       btrfs_super_generation(sb),
+                       btrfs_super_cache_generation(sb));
 
        return ret;
 }
@@ -4475,9 +4493,80 @@ again:
        return 0;
 }
 
+static void btrfs_cleanup_bg_io(struct btrfs_block_group_cache *cache)
+{
+       struct inode *inode;
+
+       inode = cache->io_ctl.inode;
+       if (inode) {
+               invalidate_inode_pages2(inode->i_mapping);
+               BTRFS_I(inode)->generation = 0;
+               cache->io_ctl.inode = NULL;
+               iput(inode);
+       }
+       btrfs_put_block_group(cache);
+}
+
+void btrfs_cleanup_dirty_bgs(struct btrfs_transaction *cur_trans,
+                            struct btrfs_root *root)
+{
+       struct btrfs_block_group_cache *cache;
+
+       spin_lock(&cur_trans->dirty_bgs_lock);
+       while (!list_empty(&cur_trans->dirty_bgs)) {
+               cache = list_first_entry(&cur_trans->dirty_bgs,
+                                        struct btrfs_block_group_cache,
+                                        dirty_list);
+               if (!cache) {
+                       btrfs_err(root->fs_info,
+                                 "orphan block group dirty_bgs list");
+                       spin_unlock(&cur_trans->dirty_bgs_lock);
+                       return;
+               }
+
+               if (!list_empty(&cache->io_list)) {
+                       spin_unlock(&cur_trans->dirty_bgs_lock);
+                       list_del_init(&cache->io_list);
+                       btrfs_cleanup_bg_io(cache);
+                       spin_lock(&cur_trans->dirty_bgs_lock);
+               }
+
+               list_del_init(&cache->dirty_list);
+               spin_lock(&cache->lock);
+               cache->disk_cache_state = BTRFS_DC_ERROR;
+               spin_unlock(&cache->lock);
+
+               spin_unlock(&cur_trans->dirty_bgs_lock);
+               btrfs_put_block_group(cache);
+               spin_lock(&cur_trans->dirty_bgs_lock);
+       }
+       spin_unlock(&cur_trans->dirty_bgs_lock);
+
+       while (!list_empty(&cur_trans->io_bgs)) {
+               cache = list_first_entry(&cur_trans->io_bgs,
+                                        struct btrfs_block_group_cache,
+                                        io_list);
+               if (!cache) {
+                       btrfs_err(root->fs_info,
+                                 "orphan block group on io_bgs list");
+                       return;
+               }
+
+               list_del_init(&cache->io_list);
+               spin_lock(&cache->lock);
+               cache->disk_cache_state = BTRFS_DC_ERROR;
+               spin_unlock(&cache->lock);
+               btrfs_cleanup_bg_io(cache);
+       }
+}
+
 void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
                                   struct btrfs_root *root)
 {
+       btrfs_cleanup_dirty_bgs(cur_trans, root);
+       ASSERT(list_empty(&cur_trans->dirty_bgs));
+       ASSERT(list_empty(&cur_trans->io_bgs));
+
        btrfs_destroy_delayed_refs(cur_trans, root);
 
        cur_trans->state = TRANS_STATE_COMMIT_START;