Merge branch 'for-linus-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/mason...
[cascardo/linux.git] / fs / btrfs / super.c
index a0434c1..9b9eab6 100644 (file)
@@ -295,10 +295,11 @@ enum {
        Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
        Opt_compress_type, Opt_compress_force, Opt_compress_force_type,
        Opt_notreelog, Opt_ratio, Opt_flushoncommit, Opt_discard,
-       Opt_space_cache, Opt_clear_cache, Opt_user_subvol_rm_allowed,
-       Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
-       Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
-       Opt_check_integrity, Opt_check_integrity_including_extent_data,
+       Opt_space_cache, Opt_space_cache_version, Opt_clear_cache,
+       Opt_user_subvol_rm_allowed, Opt_enospc_debug, Opt_subvolrootid,
+       Opt_defrag, Opt_inode_cache, Opt_no_space_cache, Opt_recovery,
+       Opt_skip_balance, Opt_check_integrity,
+       Opt_check_integrity_including_extent_data,
        Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
        Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
        Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
@@ -309,7 +310,7 @@ enum {
        Opt_err,
 };
 
-static match_table_t tokens = {
+static const match_table_t tokens = {
        {Opt_degraded, "degraded"},
        {Opt_subvol, "subvol=%s"},
        {Opt_subvolid, "subvolid=%s"},
@@ -340,6 +341,7 @@ static match_table_t tokens = {
        {Opt_discard, "discard"},
        {Opt_nodiscard, "nodiscard"},
        {Opt_space_cache, "space_cache"},
+       {Opt_space_cache_version, "space_cache=%s"},
        {Opt_clear_cache, "clear_cache"},
        {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
        {Opt_enospc_debug, "enospc_debug"},
@@ -383,7 +385,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
        bool compress_force = false;
 
        cache_gen = btrfs_super_cache_generation(root->fs_info->super_copy);
-       if (cache_gen)
+       if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE))
+               btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE);
+       else if (cache_gen)
                btrfs_set_opt(info->mount_opt, SPACE_CACHE);
 
        if (!options)
@@ -617,15 +621,35 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                             "turning off discard");
                        break;
                case Opt_space_cache:
-                       btrfs_set_and_info(root, SPACE_CACHE,
-                                          "enabling disk space caching");
+               case Opt_space_cache_version:
+                       if (token == Opt_space_cache ||
+                           strcmp(args[0].from, "v1") == 0) {
+                               btrfs_clear_opt(root->fs_info->mount_opt,
+                                               FREE_SPACE_TREE);
+                               btrfs_set_and_info(root, SPACE_CACHE,
+                                                  "enabling disk space caching");
+                       } else if (strcmp(args[0].from, "v2") == 0) {
+                               btrfs_clear_opt(root->fs_info->mount_opt,
+                                               SPACE_CACHE);
+                               btrfs_set_and_info(root, FREE_SPACE_TREE,
+                                                  "enabling free space tree");
+                       } else {
+                               ret = -EINVAL;
+                               goto out;
+                       }
                        break;
                case Opt_rescan_uuid_tree:
                        btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
                        break;
                case Opt_no_space_cache:
-                       btrfs_clear_and_info(root, SPACE_CACHE,
-                                            "disabling disk space caching");
+                       if (btrfs_test_opt(root, SPACE_CACHE)) {
+                               btrfs_clear_and_info(root, SPACE_CACHE,
+                                                    "disabling disk space caching");
+                       }
+                       if (btrfs_test_opt(root, FREE_SPACE_TREE)) {
+                               btrfs_clear_and_info(root, FREE_SPACE_TREE,
+                                                    "disabling free space tree");
+                       }
                        break;
                case Opt_inode_cache:
                        btrfs_set_pending_and_info(info, INODE_MAP_CACHE,
@@ -754,8 +778,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                }
        }
 out:
+       if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) &&
+           !btrfs_test_opt(root, FREE_SPACE_TREE) &&
+           !btrfs_test_opt(root, CLEAR_CACHE)) {
+               btrfs_err(root->fs_info, "cannot disable free space tree");
+               ret = -EINVAL;
+
+       }
        if (!ret && btrfs_test_opt(root, SPACE_CACHE))
                btrfs_info(root->fs_info, "disk space caching is enabled");
+       if (!ret && btrfs_test_opt(root, FREE_SPACE_TREE))
+               btrfs_info(root->fs_info, "using free space tree");
        kfree(orig);
        return ret;
 }
@@ -1162,6 +1195,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
                seq_puts(seq, ",noacl");
        if (btrfs_test_opt(root, SPACE_CACHE))
                seq_puts(seq, ",space_cache");
+       else if (btrfs_test_opt(root, FREE_SPACE_TREE))
+               seq_puts(seq, ",space_cache=v2");
        else
                seq_puts(seq, ",nospace_cache");
        if (btrfs_test_opt(root, RESCAN_UUID_TREE))
@@ -1863,7 +1898,7 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
                 * btrfs starts at an offset of at least 1MB when doing chunk
                 * allocation.
                 */
-               skip_space = 1024 * 1024;
+               skip_space = SZ_1M;
 
                /* user can set the offset in fs_info->alloc_start. */
                if (fs_info->alloc_start &&
@@ -1954,6 +1989,8 @@ static int btrfs_calc_avail_data_space(struct btrfs_root *root, u64 *free_bytes)
  * there are other factors that may change the result (like a new metadata
  * chunk).
  *
+ * If metadata is exhausted, f_bavail will be 0.
+ *
  * FIXME: not accurate for mixed block groups, total and free/used are ok,
  * available appears slightly larger.
  */
@@ -1965,11 +2002,13 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        struct btrfs_space_info *found;
        u64 total_used = 0;
        u64 total_free_data = 0;
+       u64 total_free_meta = 0;
        int bits = dentry->d_sb->s_blocksize_bits;
        __be32 *fsid = (__be32 *)fs_info->fsid;
        unsigned factor = 1;
        struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
        int ret;
+       u64 thresh = 0;
 
        /*
         * holding chunk_muext to avoid allocating new chunks, holding
@@ -1995,6 +2034,8 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
                                }
                        }
                }
+               if (found->flags & BTRFS_BLOCK_GROUP_METADATA)
+                       total_free_meta += found->disk_total - found->disk_used;
 
                total_used += found->disk_used;
        }
@@ -2017,6 +2058,24 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
        buf->f_bavail += div_u64(total_free_data, factor);
        buf->f_bavail = buf->f_bavail >> bits;
 
+       /*
+        * We calculate the remaining metadata space minus global reserve. If
+        * this is (supposedly) smaller than zero, there's no space. But this
+        * does not hold in practice, the exhausted state happens where's still
+        * some positive delta. So we apply some guesswork and compare the
+        * delta to a 4M threshold.  (Practically observed delta was ~2M.)
+        *
+        * We probably cannot calculate the exact threshold value because this
+        * depends on the internal reservations requested by various
+        * operations, so some operations that consume a few metadata will
+        * succeed even if the Avail is zero. But this is better than the other
+        * way around.
+        */
+       thresh = 4 * 1024 * 1024;
+
+       if (total_free_meta - thresh < block_rsv->size)
+               buf->f_bavail = 0;
+
        buf->f_type = BTRFS_SUPER_MAGIC;
        buf->f_bsize = dentry->d_sb->s_blocksize;
        buf->f_namelen = BTRFS_NAME_LEN;
@@ -2223,6 +2282,9 @@ static int btrfs_run_sanity_tests(void)
        if (ret)
                goto out;
        ret = btrfs_test_qgroups();
+       if (ret)
+               goto out;
+       ret = btrfs_test_free_space_tree();
 out:
        btrfs_destroy_test_fs();
        return ret;