Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/jmorris/linux...
[cascardo/linux.git] / drivers / md / dm-thin.c
index fc803d5..197ea20 100644 (file)
@@ -253,6 +253,7 @@ struct pool {
        struct bio_list deferred_flush_bios;
        struct list_head prepared_mappings;
        struct list_head prepared_discards;
+       struct list_head prepared_discards_pt2;
        struct list_head active_thins;
 
        struct dm_deferred_set *shared_read_ds;
@@ -269,6 +270,7 @@ struct pool {
 
        process_mapping_fn process_prepared_mapping;
        process_mapping_fn process_prepared_discard;
+       process_mapping_fn process_prepared_discard_pt2;
 
        struct dm_bio_prison_cell **cell_sort_array;
 };
@@ -360,7 +362,7 @@ static int issue_discard(struct discard_op *op, dm_block_t data_b, dm_block_t da
        sector_t len = block_to_sectors(tc->pool, data_e - data_b);
 
        return __blkdev_issue_discard(tc->pool_dev->bdev, s, len,
-                                     GFP_NOWAIT, REQ_WRITE | REQ_DISCARD, &op->bio);
+                                     GFP_NOWAIT, 0, &op->bio);
 }
 
 static void end_discard(struct discard_op *op, int r)
@@ -371,7 +373,8 @@ static void end_discard(struct discard_op *op, int r)
                 * need to wait for the chain to complete.
                 */
                bio_chain(op->bio, op->parent_bio);
-               submit_bio(REQ_WRITE | REQ_DISCARD, op->bio);
+               bio_set_op_attrs(op->bio, REQ_OP_DISCARD, 0);
+               submit_bio(op->bio);
        }
 
        blk_finish_plug(&op->plug);
@@ -696,7 +699,7 @@ static void remap_to_origin(struct thin_c *tc, struct bio *bio)
 
 static int bio_triggers_commit(struct thin_c *tc, struct bio *bio)
 {
-       return (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) &&
+       return (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA)) &&
                dm_thin_changed_this_transaction(tc->td);
 }
 
@@ -704,7 +707,7 @@ static void inc_all_io_entry(struct pool *pool, struct bio *bio)
 {
        struct dm_thin_endio_hook *h;
 
-       if (bio->bi_rw & REQ_DISCARD)
+       if (bio_op(bio) == REQ_OP_DISCARD)
                return;
 
        h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));
@@ -867,7 +870,8 @@ static void __inc_remap_and_issue_cell(void *context,
        struct bio *bio;
 
        while ((bio = bio_list_pop(&cell->bios))) {
-               if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA))
+               if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA) ||
+                   bio_op(bio) == REQ_OP_DISCARD)
                        bio_list_add(&info->defer_bios, bio);
                else {
                        inc_all_io_entry(info->tc->pool, bio);
@@ -999,7 +1003,8 @@ static void process_prepared_discard_no_passdown(struct dm_thin_new_mapping *m)
 
 /*----------------------------------------------------------------*/
 
-static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m)
+static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m,
+                                                  struct bio *discard_parent)
 {
        /*
         * We've already unmapped this range of blocks, but before we
@@ -1012,7 +1017,7 @@ static void passdown_double_checking_shared_status(struct dm_thin_new_mapping *m
        dm_block_t b = m->data_block, e, end = m->data_block + m->virt_end - m->virt_begin;
        struct discard_op op;
 
-       begin_discard(&op, tc, m->bio);
+       begin_discard(&op, tc, discard_parent);
        while (b != end) {
                /* find start of unmapped run */
                for (; b < end; b++) {
@@ -1047,28 +1052,101 @@ out:
        end_discard(&op, r);
 }
 
-static void process_prepared_discard_passdown(struct dm_thin_new_mapping *m)
+static void queue_passdown_pt2(struct dm_thin_new_mapping *m)
+{
+       unsigned long flags;
+       struct pool *pool = m->tc->pool;
+
+       spin_lock_irqsave(&pool->lock, flags);
+       list_add_tail(&m->list, &pool->prepared_discards_pt2);
+       spin_unlock_irqrestore(&pool->lock, flags);
+       wake_worker(pool);
+}
+
+static void passdown_endio(struct bio *bio)
+{
+       /*
+        * It doesn't matter if the passdown discard failed, we still want
+        * to unmap (we ignore err).
+        */
+       queue_passdown_pt2(bio->bi_private);
+}
+
+static void process_prepared_discard_passdown_pt1(struct dm_thin_new_mapping *m)
 {
        int r;
        struct thin_c *tc = m->tc;
        struct pool *pool = tc->pool;
+       struct bio *discard_parent;
+       dm_block_t data_end = m->data_block + (m->virt_end - m->virt_begin);
 
+       /*
+        * Only this thread allocates blocks, so we can be sure that the
+        * newly unmapped blocks will not be allocated before the end of
+        * the function.
+        */
        r = dm_thin_remove_range(tc->td, m->virt_begin, m->virt_end);
        if (r) {
                metadata_operation_failed(pool, "dm_thin_remove_range", r);
                bio_io_error(m->bio);
+               cell_defer_no_holder(tc, m->cell);
+               mempool_free(m, pool->mapping_pool);
+               return;
+       }
 
-       } else if (m->maybe_shared) {
-               passdown_double_checking_shared_status(m);
+       discard_parent = bio_alloc(GFP_NOIO, 1);
+       if (!discard_parent) {
+               DMWARN("%s: unable to allocate top level discard bio for passdown. Skipping passdown.",
+                      dm_device_name(tc->pool->pool_md));
+               queue_passdown_pt2(m);
 
        } else {
-               struct discard_op op;
-               begin_discard(&op, tc, m->bio);
-               r = issue_discard(&op, m->data_block,
-                                 m->data_block + (m->virt_end - m->virt_begin));
-               end_discard(&op, r);
+               discard_parent->bi_end_io = passdown_endio;
+               discard_parent->bi_private = m;
+
+               if (m->maybe_shared)
+                       passdown_double_checking_shared_status(m, discard_parent);
+               else {
+                       struct discard_op op;
+
+                       begin_discard(&op, tc, discard_parent);
+                       r = issue_discard(&op, m->data_block, data_end);
+                       end_discard(&op, r);
+               }
        }
 
+       /*
+        * Increment the unmapped blocks.  This prevents a race between the
+        * passdown io and reallocation of freed blocks.
+        */
+       r = dm_pool_inc_data_range(pool->pmd, m->data_block, data_end);
+       if (r) {
+               metadata_operation_failed(pool, "dm_pool_inc_data_range", r);
+               bio_io_error(m->bio);
+               cell_defer_no_holder(tc, m->cell);
+               mempool_free(m, pool->mapping_pool);
+               return;
+       }
+}
+
+static void process_prepared_discard_passdown_pt2(struct dm_thin_new_mapping *m)
+{
+       int r;
+       struct thin_c *tc = m->tc;
+       struct pool *pool = tc->pool;
+
+       /*
+        * The passdown has completed, so now we can decrement all those
+        * unmapped blocks.
+        */
+       r = dm_pool_dec_data_range(pool->pmd, m->data_block,
+                                  m->data_block + (m->virt_end - m->virt_begin));
+       if (r) {
+               metadata_operation_failed(pool, "dm_pool_dec_data_range", r);
+               bio_io_error(m->bio);
+       } else
+               bio_endio(m->bio);
+
        cell_defer_no_holder(tc, m->cell);
        mempool_free(m, pool->mapping_pool);
 }
@@ -1639,7 +1717,8 @@ static void __remap_and_issue_shared_cell(void *context,
 
        while ((bio = bio_list_pop(&cell->bios))) {
                if ((bio_data_dir(bio) == WRITE) ||
-                   (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)))
+                   (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA) ||
+                    bio_op(bio) == REQ_OP_DISCARD))
                        bio_list_add(&info->defer_bios, bio);
                else {
                        struct dm_thin_endio_hook *h = dm_per_bio_data(bio, sizeof(struct dm_thin_endio_hook));;
@@ -2028,7 +2107,7 @@ static void process_thin_deferred_bios(struct thin_c *tc)
                        break;
                }
 
-               if (bio->bi_rw & REQ_DISCARD)
+               if (bio_op(bio) == REQ_OP_DISCARD)
                        pool->process_discard(tc, bio);
                else
                        pool->process_bio(tc, bio);
@@ -2115,7 +2194,7 @@ static void process_thin_deferred_cells(struct thin_c *tc)
                                return;
                        }
 
-                       if (cell->holder->bi_rw & REQ_DISCARD)
+                       if (bio_op(cell->holder) == REQ_OP_DISCARD)
                                pool->process_discard_cell(tc, cell);
                        else
                                pool->process_cell(tc, cell);
@@ -2212,6 +2291,8 @@ static void do_worker(struct work_struct *ws)
        throttle_work_update(&pool->throttle);
        process_prepared(pool, &pool->prepared_discards, &pool->process_prepared_discard);
        throttle_work_update(&pool->throttle);
+       process_prepared(pool, &pool->prepared_discards_pt2, &pool->process_prepared_discard_pt2);
+       throttle_work_update(&pool->throttle);
        process_deferred_bios(pool);
        throttle_work_complete(&pool->throttle);
 }
@@ -2340,7 +2421,8 @@ static void set_discard_callbacks(struct pool *pool)
 
        if (passdown_enabled(pt)) {
                pool->process_discard_cell = process_discard_cell_passdown;
-               pool->process_prepared_discard = process_prepared_discard_passdown;
+               pool->process_prepared_discard = process_prepared_discard_passdown_pt1;
+               pool->process_prepared_discard_pt2 = process_prepared_discard_passdown_pt2;
        } else {
                pool->process_discard_cell = process_discard_cell_no_passdown;
                pool->process_prepared_discard = process_prepared_discard_no_passdown;
@@ -2553,7 +2635,8 @@ static int thin_bio_map(struct dm_target *ti, struct bio *bio)
                return DM_MAPIO_SUBMITTED;
        }
 
-       if (bio->bi_rw & (REQ_DISCARD | REQ_FLUSH | REQ_FUA)) {
+       if (bio->bi_rw & (REQ_PREFLUSH | REQ_FUA) ||
+           bio_op(bio) == REQ_OP_DISCARD) {
                thin_defer_bio_with_throttle(tc, bio);
                return DM_MAPIO_SUBMITTED;
        }
@@ -2826,6 +2909,7 @@ static struct pool *pool_create(struct mapped_device *pool_md,
        bio_list_init(&pool->deferred_flush_bios);
        INIT_LIST_HEAD(&pool->prepared_mappings);
        INIT_LIST_HEAD(&pool->prepared_discards);
+       INIT_LIST_HEAD(&pool->prepared_discards_pt2);
        INIT_LIST_HEAD(&pool->active_thins);
        pool->low_water_triggered = false;
        pool->suspended = true;