Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux...
[cascardo/linux.git] / fs / btrfs / disk-io.c
index 81ea553..bd0f752 100644 (file)
@@ -678,32 +678,31 @@ static void end_workqueue_bio(struct bio *bio, int err)
 
        fs_info = end_io_wq->info;
        end_io_wq->error = err;
-       end_io_wq->work.func = end_workqueue_fn;
-       end_io_wq->work.flags = 0;
+       btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
 
        if (bio->bi_rw & REQ_WRITE) {
                if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
-                       btrfs_queue_worker(&fs_info->endio_meta_write_workers,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_meta_write_workers,
+                                        &end_io_wq->work);
                else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
-                       btrfs_queue_worker(&fs_info->endio_freespace_worker,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_freespace_worker,
+                                        &end_io_wq->work);
                else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
-                       btrfs_queue_worker(&fs_info->endio_raid56_workers,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_raid56_workers,
+                                        &end_io_wq->work);
                else
-                       btrfs_queue_worker(&fs_info->endio_write_workers,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_write_workers,
+                                        &end_io_wq->work);
        } else {
                if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
-                       btrfs_queue_worker(&fs_info->endio_raid56_workers,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_raid56_workers,
+                                        &end_io_wq->work);
                else if (end_io_wq->metadata)
-                       btrfs_queue_worker(&fs_info->endio_meta_workers,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_meta_workers,
+                                        &end_io_wq->work);
                else
-                       btrfs_queue_worker(&fs_info->endio_workers,
-                                          &end_io_wq->work);
+                       btrfs_queue_work(fs_info->endio_workers,
+                                        &end_io_wq->work);
        }
 }
 
@@ -738,7 +737,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info)
 {
        unsigned long limit = min_t(unsigned long,
-                                   info->workers.max_workers,
+                                   info->thread_pool_size,
                                    info->fs_devices->open_devices);
        return 256 * limit;
 }
@@ -811,11 +810,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
        async->submit_bio_start = submit_bio_start;
        async->submit_bio_done = submit_bio_done;
 
-       async->work.func = run_one_async_start;
-       async->work.ordered_func = run_one_async_done;
-       async->work.ordered_free = run_one_async_free;
+       btrfs_init_work(&async->work, run_one_async_start,
+                       run_one_async_done, run_one_async_free);
 
-       async->work.flags = 0;
        async->bio_flags = bio_flags;
        async->bio_offset = bio_offset;
 
@@ -824,9 +821,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
        atomic_inc(&fs_info->nr_async_submits);
 
        if (rw & REQ_SYNC)
-               btrfs_set_work_high_prio(&async->work);
+               btrfs_set_work_high_priority(&async->work);
 
-       btrfs_queue_worker(&fs_info->workers, &async->work);
+       btrfs_queue_work(fs_info->workers, &async->work);
 
        while (atomic_read(&fs_info->async_submit_draining) &&
              atomic_read(&fs_info->nr_async_submits)) {
@@ -1149,6 +1146,32 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
        }
 }
 
+static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
+{
+       struct btrfs_subvolume_writers *writers;
+       int ret;
+
+       writers = kmalloc(sizeof(*writers), GFP_NOFS);
+       if (!writers)
+               return ERR_PTR(-ENOMEM);
+
+       ret = percpu_counter_init(&writers->counter, 0);
+       if (ret < 0) {
+               kfree(writers);
+               return ERR_PTR(ret);
+       }
+
+       init_waitqueue_head(&writers->wait);
+       return writers;
+}
+
+static void
+btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers)
+{
+       percpu_counter_destroy(&writers->counter);
+       kfree(writers);
+}
+
 static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
                         u32 stripesize, struct btrfs_root *root,
                         struct btrfs_fs_info *fs_info,
@@ -1194,16 +1217,22 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
        spin_lock_init(&root->log_extents_lock[1]);
        mutex_init(&root->objectid_mutex);
        mutex_init(&root->log_mutex);
+       mutex_init(&root->ordered_extent_mutex);
+       mutex_init(&root->delalloc_mutex);
        init_waitqueue_head(&root->log_writer_wait);
        init_waitqueue_head(&root->log_commit_wait[0]);
        init_waitqueue_head(&root->log_commit_wait[1]);
+       INIT_LIST_HEAD(&root->log_ctxs[0]);
+       INIT_LIST_HEAD(&root->log_ctxs[1]);
        atomic_set(&root->log_commit[0], 0);
        atomic_set(&root->log_commit[1], 0);
        atomic_set(&root->log_writers, 0);
        atomic_set(&root->log_batch, 0);
        atomic_set(&root->orphan_inodes, 0);
        atomic_set(&root->refs, 1);
+       atomic_set(&root->will_be_snapshoted, 0);
        root->log_transid = 0;
+       root->log_transid_committed = -1;
        root->last_log_commit = 0;
        if (fs_info)
                extent_io_tree_init(&root->dirty_log_pages,
@@ -1417,6 +1446,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
        WARN_ON(root->log_root);
        root->log_root = log_root;
        root->log_transid = 0;
+       root->log_transid_committed = -1;
        root->last_log_commit = 0;
        return 0;
 }
@@ -1498,6 +1528,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
 int btrfs_init_fs_root(struct btrfs_root *root)
 {
        int ret;
+       struct btrfs_subvolume_writers *writers;
 
        root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
        root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
@@ -1507,6 +1538,13 @@ int btrfs_init_fs_root(struct btrfs_root *root)
                goto fail;
        }
 
+       writers = btrfs_alloc_subvolume_writers();
+       if (IS_ERR(writers)) {
+               ret = PTR_ERR(writers);
+               goto fail;
+       }
+       root->subv_writers = writers;
+
        btrfs_init_free_ino_ctl(root);
        mutex_init(&root->fs_commit_mutex);
        spin_lock_init(&root->cache_lock);
@@ -1514,8 +1552,11 @@ int btrfs_init_fs_root(struct btrfs_root *root)
 
        ret = get_anon_bdev(&root->anon_dev);
        if (ret)
-               goto fail;
+               goto free_writers;
        return 0;
+
+free_writers:
+       btrfs_free_subvolume_writers(root->subv_writers);
 fail:
        kfree(root->free_ino_ctl);
        kfree(root->free_ino_pinned);
@@ -1990,23 +2031,22 @@ static noinline int next_root_backup(struct btrfs_fs_info *info,
 /* helper to cleanup workers */
 static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
 {
-       btrfs_stop_workers(&fs_info->generic_worker);
-       btrfs_stop_workers(&fs_info->fixup_workers);
-       btrfs_stop_workers(&fs_info->delalloc_workers);
-       btrfs_stop_workers(&fs_info->workers);
-       btrfs_stop_workers(&fs_info->endio_workers);
-       btrfs_stop_workers(&fs_info->endio_meta_workers);
-       btrfs_stop_workers(&fs_info->endio_raid56_workers);
-       btrfs_stop_workers(&fs_info->rmw_workers);
-       btrfs_stop_workers(&fs_info->endio_meta_write_workers);
-       btrfs_stop_workers(&fs_info->endio_write_workers);
-       btrfs_stop_workers(&fs_info->endio_freespace_worker);
-       btrfs_stop_workers(&fs_info->submit_workers);
-       btrfs_stop_workers(&fs_info->delayed_workers);
-       btrfs_stop_workers(&fs_info->caching_workers);
-       btrfs_stop_workers(&fs_info->readahead_workers);
-       btrfs_stop_workers(&fs_info->flush_workers);
-       btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
+       btrfs_destroy_workqueue(fs_info->fixup_workers);
+       btrfs_destroy_workqueue(fs_info->delalloc_workers);
+       btrfs_destroy_workqueue(fs_info->workers);
+       btrfs_destroy_workqueue(fs_info->endio_workers);
+       btrfs_destroy_workqueue(fs_info->endio_meta_workers);
+       btrfs_destroy_workqueue(fs_info->endio_raid56_workers);
+       btrfs_destroy_workqueue(fs_info->rmw_workers);
+       btrfs_destroy_workqueue(fs_info->endio_meta_write_workers);
+       btrfs_destroy_workqueue(fs_info->endio_write_workers);
+       btrfs_destroy_workqueue(fs_info->endio_freespace_worker);
+       btrfs_destroy_workqueue(fs_info->submit_workers);
+       btrfs_destroy_workqueue(fs_info->delayed_workers);
+       btrfs_destroy_workqueue(fs_info->caching_workers);
+       btrfs_destroy_workqueue(fs_info->readahead_workers);
+       btrfs_destroy_workqueue(fs_info->flush_workers);
+       btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
 }
 
 static void free_root_extent_buffers(struct btrfs_root *root)
@@ -2097,6 +2137,8 @@ int open_ctree(struct super_block *sb,
        int err = -EINVAL;
        int num_backups_tried = 0;
        int backup_index = 0;
+       int max_active;
+       int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND;
        bool create_uuid_tree;
        bool check_uuid_tree;
 
@@ -2133,10 +2175,16 @@ int open_ctree(struct super_block *sb,
                goto fail_dirty_metadata_bytes;
        }
 
+       ret = percpu_counter_init(&fs_info->bio_counter, 0);
+       if (ret) {
+               err = ret;
+               goto fail_delalloc_bytes;
+       }
+
        fs_info->btree_inode = new_inode(sb);
        if (!fs_info->btree_inode) {
                err = -ENOMEM;
-               goto fail_delalloc_bytes;
+               goto fail_bio_counter;
        }
 
        mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
@@ -2159,6 +2207,7 @@ int open_ctree(struct super_block *sb,
        spin_lock_init(&fs_info->buffer_lock);
        rwlock_init(&fs_info->tree_mod_log_lock);
        mutex_init(&fs_info->reloc_mutex);
+       mutex_init(&fs_info->delalloc_root_mutex);
        seqlock_init(&fs_info->profiles_lock);
 
        init_completion(&fs_info->kobj_unregister);
@@ -2211,6 +2260,7 @@ int open_ctree(struct super_block *sb,
        atomic_set(&fs_info->scrub_pause_req, 0);
        atomic_set(&fs_info->scrubs_paused, 0);
        atomic_set(&fs_info->scrub_cancel_req, 0);
+       init_waitqueue_head(&fs_info->replace_wait);
        init_waitqueue_head(&fs_info->scrub_pause_wait);
        fs_info->scrub_workers_refcnt = 0;
 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
@@ -2458,104 +2508,68 @@ int open_ctree(struct super_block *sb,
                goto fail_alloc;
        }
 
-       btrfs_init_workers(&fs_info->generic_worker,
-                          "genwork", 1, NULL);
-
-       btrfs_init_workers(&fs_info->workers, "worker",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
+       max_active = fs_info->thread_pool_size;
 
-       btrfs_init_workers(&fs_info->delalloc_workers, "delalloc",
-                          fs_info->thread_pool_size, NULL);
+       fs_info->workers =
+               btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI,
+                                     max_active, 16);
 
-       btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc",
-                          fs_info->thread_pool_size, NULL);
+       fs_info->delalloc_workers =
+               btrfs_alloc_workqueue("delalloc", flags, max_active, 2);
 
-       btrfs_init_workers(&fs_info->submit_workers, "submit",
-                          min_t(u64, fs_devices->num_devices,
-                          fs_info->thread_pool_size), NULL);
+       fs_info->flush_workers =
+               btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0);
 
-       btrfs_init_workers(&fs_info->caching_workers, "cache",
-                          fs_info->thread_pool_size, NULL);
+       fs_info->caching_workers =
+               btrfs_alloc_workqueue("cache", flags, max_active, 0);
 
-       /* a higher idle thresh on the submit workers makes it much more
+       /*
+        * a higher idle thresh on the submit workers makes it much more
         * likely that bios will be send down in a sane order to the
         * devices
         */
-       fs_info->submit_workers.idle_thresh = 64;
-
-       fs_info->workers.idle_thresh = 16;
-       fs_info->workers.ordered = 1;
-
-       fs_info->delalloc_workers.idle_thresh = 2;
-       fs_info->delalloc_workers.ordered = 1;
-
-       btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->endio_workers, "endio",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->endio_meta_write_workers,
-                          "endio-meta-write", fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->endio_raid56_workers,
-                          "endio-raid56", fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->rmw_workers,
-                          "rmw", fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
-                          1, &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->readahead_workers, "readahead",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
-       btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1,
-                          &fs_info->generic_worker);
+       fs_info->submit_workers =
+               btrfs_alloc_workqueue("submit", flags,
+                                     min_t(u64, fs_devices->num_devices,
+                                           max_active), 64);
+
+       fs_info->fixup_workers =
+               btrfs_alloc_workqueue("fixup", flags, 1, 0);
 
        /*
         * endios are largely parallel and should have a very
         * low idle thresh
         */
-       fs_info->endio_workers.idle_thresh = 4;
-       fs_info->endio_meta_workers.idle_thresh = 4;
-       fs_info->endio_raid56_workers.idle_thresh = 4;
-       fs_info->rmw_workers.idle_thresh = 2;
-
-       fs_info->endio_write_workers.idle_thresh = 2;
-       fs_info->endio_meta_write_workers.idle_thresh = 2;
-       fs_info->readahead_workers.idle_thresh = 2;
-
-       /*
-        * btrfs_start_workers can really only fail because of ENOMEM so just
-        * return -ENOMEM if any of these fail.
-        */
-       ret = btrfs_start_workers(&fs_info->workers);
-       ret |= btrfs_start_workers(&fs_info->generic_worker);
-       ret |= btrfs_start_workers(&fs_info->submit_workers);
-       ret |= btrfs_start_workers(&fs_info->delalloc_workers);
-       ret |= btrfs_start_workers(&fs_info->fixup_workers);
-       ret |= btrfs_start_workers(&fs_info->endio_workers);
-       ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
-       ret |= btrfs_start_workers(&fs_info->rmw_workers);
-       ret |= btrfs_start_workers(&fs_info->endio_raid56_workers);
-       ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
-       ret |= btrfs_start_workers(&fs_info->endio_write_workers);
-       ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
-       ret |= btrfs_start_workers(&fs_info->delayed_workers);
-       ret |= btrfs_start_workers(&fs_info->caching_workers);
-       ret |= btrfs_start_workers(&fs_info->readahead_workers);
-       ret |= btrfs_start_workers(&fs_info->flush_workers);
-       ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
-       if (ret) {
+       fs_info->endio_workers =
+               btrfs_alloc_workqueue("endio", flags, max_active, 4);
+       fs_info->endio_meta_workers =
+               btrfs_alloc_workqueue("endio-meta", flags, max_active, 4);
+       fs_info->endio_meta_write_workers =
+               btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2);
+       fs_info->endio_raid56_workers =
+               btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4);
+       fs_info->rmw_workers =
+               btrfs_alloc_workqueue("rmw", flags, max_active, 2);
+       fs_info->endio_write_workers =
+               btrfs_alloc_workqueue("endio-write", flags, max_active, 2);
+       fs_info->endio_freespace_worker =
+               btrfs_alloc_workqueue("freespace-write", flags, max_active, 0);
+       fs_info->delayed_workers =
+               btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0);
+       fs_info->readahead_workers =
+               btrfs_alloc_workqueue("readahead", flags, max_active, 2);
+       fs_info->qgroup_rescan_workers =
+               btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0);
+
+       if (!(fs_info->workers && fs_info->delalloc_workers &&
+             fs_info->submit_workers && fs_info->flush_workers &&
+             fs_info->endio_workers && fs_info->endio_meta_workers &&
+             fs_info->endio_meta_write_workers &&
+             fs_info->endio_write_workers && fs_info->endio_raid56_workers &&
+             fs_info->endio_freespace_worker && fs_info->rmw_workers &&
+             fs_info->caching_workers && fs_info->readahead_workers &&
+             fs_info->fixup_workers && fs_info->delayed_workers &&
+             fs_info->qgroup_rescan_workers)) {
                err = -ENOMEM;
                goto fail_sb_buffer;
        }
@@ -2963,6 +2977,8 @@ fail_iput:
        btrfs_mapping_tree_free(&fs_info->mapping_tree);
 
        iput(fs_info->btree_inode);
+fail_bio_counter:
+       percpu_counter_destroy(&fs_info->bio_counter);
 fail_delalloc_bytes:
        percpu_counter_destroy(&fs_info->delalloc_bytes);
 fail_dirty_metadata_bytes:
@@ -3244,6 +3260,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
        /* send down all the barriers */
        head = &info->fs_devices->devices;
        list_for_each_entry_rcu(dev, head, dev_list) {
+               if (dev->missing)
+                       continue;
                if (!dev->bdev) {
                        errors_send++;
                        continue;
@@ -3258,6 +3276,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
 
        /* wait for all the barriers */
        list_for_each_entry_rcu(dev, head, dev_list) {
+               if (dev->missing)
+                       continue;
                if (!dev->bdev) {
                        errors_wait++;
                        continue;
@@ -3477,6 +3497,8 @@ static void free_fs_root(struct btrfs_root *root)
        root->orphan_block_rsv = NULL;
        if (root->anon_dev)
                free_anon_bdev(root->anon_dev);
+       if (root->subv_writers)
+               btrfs_free_subvolume_writers(root->subv_writers);
        free_extent_buffer(root->node);
        free_extent_buffer(root->commit_root);
        kfree(root->free_ino_ctl);
@@ -3610,6 +3632,7 @@ int close_ctree(struct btrfs_root *root)
 
        percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
        percpu_counter_destroy(&fs_info->delalloc_bytes);
+       percpu_counter_destroy(&fs_info->bio_counter);
        bdi_destroy(&fs_info->bdi);
        cleanup_srcu_struct(&fs_info->subvol_srcu);
 
@@ -3791,9 +3814,11 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
                list_move_tail(&root->ordered_root,
                               &fs_info->ordered_roots);
 
+               spin_unlock(&fs_info->ordered_root_lock);
                btrfs_destroy_ordered_extents(root);
 
-               cond_resched_lock(&fs_info->ordered_root_lock);
+               cond_resched();
+               spin_lock(&fs_info->ordered_root_lock);
        }
        spin_unlock(&fs_info->ordered_root_lock);
 }