Merge tag 'pm+acpi-3.19-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafae...
[cascardo/linux.git] / fs / btrfs / free-space-cache.c
index 3384819..030847b 100644 (file)
 #include "disk-io.h"
 #include "extent_io.h"
 #include "inode-map.h"
+#include "volumes.h"
 
 #define BITS_PER_BITMAP                (PAGE_CACHE_SIZE * 8)
 #define MAX_CACHE_BYTES_PER_GIG        (32 * 1024)
 
+struct btrfs_trim_range {
+       u64 start;
+       u64 bytes;
+       struct list_head list;
+};
+
 static int link_free_space(struct btrfs_free_space_ctl *ctl,
                           struct btrfs_free_space *info);
 static void unlink_free_space(struct btrfs_free_space_ctl *ctl,
@@ -881,6 +888,7 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
        int ret;
        struct btrfs_free_cluster *cluster = NULL;
        struct rb_node *node = rb_first(&ctl->free_space_offset);
+       struct btrfs_trim_range *trim_entry;
 
        /* Get the cluster for this block_group if it exists */
        if (block_group && !list_empty(&block_group->cluster_list)) {
@@ -916,6 +924,21 @@ int write_cache_extent_entries(struct io_ctl *io_ctl,
                        cluster = NULL;
                }
        }
+
+       /*
+        * Make sure we don't miss any range that was removed from our rbtree
+        * because trimming is running. Otherwise after a umount+mount (or crash
+        * after committing the transaction) we would leak free space and get
+        * an inconsistent free space cache report from fsck.
+        */
+       list_for_each_entry(trim_entry, &ctl->trimming_ranges, list) {
+               ret = io_ctl_add_entry(io_ctl, trim_entry->start,
+                                      trim_entry->bytes, NULL);
+               if (ret)
+                       goto fail;
+               *entries += 1;
+       }
+
        return 0;
 fail:
        return -ENOSPC;
@@ -1135,12 +1158,15 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
 
        io_ctl_set_generation(&io_ctl, trans->transid);
 
+       mutex_lock(&ctl->cache_writeout_mutex);
        /* Write out the extent entries in the free space cache */
        ret = write_cache_extent_entries(&io_ctl, ctl,
                                         block_group, &entries, &bitmaps,
                                         &bitmap_list);
-       if (ret)
+       if (ret) {
+               mutex_unlock(&ctl->cache_writeout_mutex);
                goto out_nospc;
+       }
 
        /*
         * Some spaces that are freed in the current transaction are pinned,
@@ -1148,11 +1174,18 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
         * committed, we shouldn't lose them.
         */
        ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries);
-       if (ret)
+       if (ret) {
+               mutex_unlock(&ctl->cache_writeout_mutex);
                goto out_nospc;
+       }
 
-       /* At last, we write out all the bitmaps. */
+       /*
+        * At last, we write out all the bitmaps and keep cache_writeout_mutex
+        * locked while doing it because a concurrent trim can be manipulating
+        * or freeing the bitmap.
+        */
        ret = write_bitmap_entries(&io_ctl, &bitmap_list);
+       mutex_unlock(&ctl->cache_writeout_mutex);
        if (ret)
                goto out_nospc;
 
@@ -2295,6 +2328,8 @@ void btrfs_init_free_space_ctl(struct btrfs_block_group_cache *block_group)
        ctl->start = block_group->key.objectid;
        ctl->private = block_group;
        ctl->op = &free_space_op;
+       INIT_LIST_HEAD(&ctl->trimming_ranges);
+       mutex_init(&ctl->cache_writeout_mutex);
 
        /*
         * we only want to have 32k of ram per block group for keeping
@@ -2911,10 +2946,12 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster)
 
 static int do_trimming(struct btrfs_block_group_cache *block_group,
                       u64 *total_trimmed, u64 start, u64 bytes,
-                      u64 reserved_start, u64 reserved_bytes)
+                      u64 reserved_start, u64 reserved_bytes,
+                      struct btrfs_trim_range *trim_entry)
 {
        struct btrfs_space_info *space_info = block_group->space_info;
        struct btrfs_fs_info *fs_info = block_group->fs_info;
+       struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
        int ret;
        int update = 0;
        u64 trimmed = 0;
@@ -2934,7 +2971,10 @@ static int do_trimming(struct btrfs_block_group_cache *block_group,
        if (!ret)
                *total_trimmed += trimmed;
 
+       mutex_lock(&ctl->cache_writeout_mutex);
        btrfs_add_free_space(block_group, reserved_start, reserved_bytes);
+       list_del(&trim_entry->list);
+       mutex_unlock(&ctl->cache_writeout_mutex);
 
        if (update) {
                spin_lock(&space_info->lock);
@@ -2962,16 +3002,21 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
        u64 bytes;
 
        while (start < end) {
+               struct btrfs_trim_range trim_entry;
+
+               mutex_lock(&ctl->cache_writeout_mutex);
                spin_lock(&ctl->tree_lock);
 
                if (ctl->free_space < minlen) {
                        spin_unlock(&ctl->tree_lock);
+                       mutex_unlock(&ctl->cache_writeout_mutex);
                        break;
                }
 
                entry = tree_search_offset(ctl, start, 0, 1);
                if (!entry) {
                        spin_unlock(&ctl->tree_lock);
+                       mutex_unlock(&ctl->cache_writeout_mutex);
                        break;
                }
 
@@ -2980,6 +3025,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
                        node = rb_next(&entry->offset_index);
                        if (!node) {
                                spin_unlock(&ctl->tree_lock);
+                               mutex_unlock(&ctl->cache_writeout_mutex);
                                goto out;
                        }
                        entry = rb_entry(node, struct btrfs_free_space,
@@ -2988,6 +3034,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
 
                if (entry->offset >= end) {
                        spin_unlock(&ctl->tree_lock);
+                       mutex_unlock(&ctl->cache_writeout_mutex);
                        break;
                }
 
@@ -2997,6 +3044,7 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
                bytes = min(extent_start + extent_bytes, end) - start;
                if (bytes < minlen) {
                        spin_unlock(&ctl->tree_lock);
+                       mutex_unlock(&ctl->cache_writeout_mutex);
                        goto next;
                }
 
@@ -3004,9 +3052,13 @@ static int trim_no_bitmap(struct btrfs_block_group_cache *block_group,
                kmem_cache_free(btrfs_free_space_cachep, entry);
 
                spin_unlock(&ctl->tree_lock);
+               trim_entry.start = extent_start;
+               trim_entry.bytes = extent_bytes;
+               list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
+               mutex_unlock(&ctl->cache_writeout_mutex);
 
                ret = do_trimming(block_group, total_trimmed, start, bytes,
-                                 extent_start, extent_bytes);
+                                 extent_start, extent_bytes, &trim_entry);
                if (ret)
                        break;
 next:
@@ -3035,17 +3087,21 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
 
        while (offset < end) {
                bool next_bitmap = false;
+               struct btrfs_trim_range trim_entry;
 
+               mutex_lock(&ctl->cache_writeout_mutex);
                spin_lock(&ctl->tree_lock);
 
                if (ctl->free_space < minlen) {
                        spin_unlock(&ctl->tree_lock);
+                       mutex_unlock(&ctl->cache_writeout_mutex);
                        break;
                }
 
                entry = tree_search_offset(ctl, offset, 1, 0);
                if (!entry) {
                        spin_unlock(&ctl->tree_lock);
+                       mutex_unlock(&ctl->cache_writeout_mutex);
                        next_bitmap = true;
                        goto next;
                }
@@ -3054,6 +3110,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
                ret2 = search_bitmap(ctl, entry, &start, &bytes);
                if (ret2 || start >= end) {
                        spin_unlock(&ctl->tree_lock);
+                       mutex_unlock(&ctl->cache_writeout_mutex);
                        next_bitmap = true;
                        goto next;
                }
@@ -3061,6 +3118,7 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
                bytes = min(bytes, end - start);
                if (bytes < minlen) {
                        spin_unlock(&ctl->tree_lock);
+                       mutex_unlock(&ctl->cache_writeout_mutex);
                        goto next;
                }
 
@@ -3069,9 +3127,13 @@ static int trim_bitmaps(struct btrfs_block_group_cache *block_group,
                        free_bitmap(ctl, entry);
 
                spin_unlock(&ctl->tree_lock);
+               trim_entry.start = start;
+               trim_entry.bytes = bytes;
+               list_add_tail(&trim_entry.list, &ctl->trimming_ranges);
+               mutex_unlock(&ctl->cache_writeout_mutex);
 
                ret = do_trimming(block_group, total_trimmed, start, bytes,
-                                 start, bytes);
+                                 start, bytes, &trim_entry);
                if (ret)
                        break;
 next:
@@ -3101,11 +3163,52 @@ int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
 
        *trimmed = 0;
 
+       spin_lock(&block_group->lock);
+       if (block_group->removed) {
+               spin_unlock(&block_group->lock);
+               return 0;
+       }
+       atomic_inc(&block_group->trimming);
+       spin_unlock(&block_group->lock);
+
        ret = trim_no_bitmap(block_group, trimmed, start, end, minlen);
        if (ret)
-               return ret;
+               goto out;
 
        ret = trim_bitmaps(block_group, trimmed, start, end, minlen);
+out:
+       spin_lock(&block_group->lock);
+       if (atomic_dec_and_test(&block_group->trimming) &&
+           block_group->removed) {
+               struct extent_map_tree *em_tree;
+               struct extent_map *em;
+
+               spin_unlock(&block_group->lock);
+
+               em_tree = &block_group->fs_info->mapping_tree.map_tree;
+               write_lock(&em_tree->lock);
+               em = lookup_extent_mapping(em_tree, block_group->key.objectid,
+                                          1);
+               BUG_ON(!em); /* logic error, can't happen */
+               remove_extent_mapping(em_tree, em);
+               write_unlock(&em_tree->lock);
+
+               lock_chunks(block_group->fs_info->chunk_root);
+               list_del_init(&em->list);
+               unlock_chunks(block_group->fs_info->chunk_root);
+
+               /* once for us and once for the tree */
+               free_extent_map(em);
+               free_extent_map(em);
+
+               /*
+                * We've left one free space entry and other tasks trimming
+                * this block group have left 1 entry each one. Free them.
+                */
+               __btrfs_remove_free_space_cache(block_group->free_space_ctl);
+       } else {
+               spin_unlock(&block_group->lock);
+       }
 
        return ret;
 }