Merge tag 'dm-4.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device...
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Oct 2016 00:16:18 +0000 (17:16 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 10 Oct 2016 00:16:18 +0000 (17:16 -0700)
Pull device mapper updates from Mike Snitzer:

 - various fixes and cleanups for request-based DM core

 - add support for delaying the requeue of requests; used by DM
   multipath when all paths have failed and 'queue_if_no_path' is
   enabled

 - DM cache improvements to speedup the loading metadata and the writing
   of the hint array

 - fix potential for a dm-crypt crash on device teardown

 - remove dm_bufio_cond_resched() and just using cond_resched()

 - change DM multipath to return a reservation conflict error
   immediately; rather than failing the path and retrying (potentially
   indefinitely)

* tag 'dm-4.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (24 commits)
  dm mpath: always return reservation conflict without failing over
  dm bufio: remove dm_bufio_cond_resched()
  dm crypt: fix crash on exit
  dm cache metadata: switch to using the new cursor api for loading metadata
  dm array: introduce cursor api
  dm btree: introduce cursor api
  dm cache policy smq: distribute entries to random levels when switching to smq
  dm cache: speed up writing of the hint array
  dm array: add dm_array_new()
  dm mpath: delay the requeue of blk-mq requests while all paths down
  dm mpath: use dm_mq_kick_requeue_list()
  dm rq: introduce dm_mq_kick_requeue_list()
  dm rq: reduce arguments passed to map_request() and dm_requeue_original_request()
  dm rq: add DM_MAPIO_DELAY_REQUEUE to delay requeue of blk-mq requests
  dm: convert wait loops to use autoremove_wake_function()
  dm: use signal_pending_state() in dm_wait_for_completion()
  dm: rename task state function arguments
  dm: add two lockdep_assert_held() statements
  dm rq: simplify dm_old_stop_queue()
  dm mpath: check if path's request_queue is dying in activate_path()
  ...

1  2 
drivers/md/dm-bufio.c
drivers/md/dm-crypt.c
drivers/md/dm-rq.c

diff --combined drivers/md/dm-bufio.c
@@@ -191,19 -191,6 +191,6 @@@ static void dm_bufio_unlock(struct dm_b
        mutex_unlock(&c->lock);
  }
  
- /*
-  * FIXME Move to sched.h?
-  */
- #ifdef CONFIG_PREEMPT_VOLUNTARY
- #  define dm_bufio_cond_resched()             \
- do {                                          \
-       if (unlikely(need_resched()))           \
-               _cond_resched();                \
- } while (0)
- #else
- #  define dm_bufio_cond_resched()                do { } while (0)
- #endif
  /*----------------------------------------------------------------*/
  
  /*
@@@ -741,7 -728,7 +728,7 @@@ static void __flush_write_list(struct l
                        list_entry(write_list->next, struct dm_buffer, write_list);
                list_del(&b->write_list);
                submit_io(b, WRITE, b->block, write_endio);
-               dm_bufio_cond_resched();
+               cond_resched();
        }
        blk_finish_plug(&plug);
  }
@@@ -780,7 -767,7 +767,7 @@@ static struct dm_buffer *__get_unclaime
                        __unlink_buffer(b);
                        return b;
                }
-               dm_bufio_cond_resched();
+               cond_resched();
        }
  
        list_for_each_entry_reverse(b, &c->lru[LIST_DIRTY], lru_list) {
                        __unlink_buffer(b);
                        return b;
                }
-               dm_bufio_cond_resched();
+               cond_resched();
        }
  
        return NULL;
@@@ -923,7 -910,7 +910,7 @@@ static void __write_dirty_buffers_async
                        return;
  
                __write_dirty_buffer(b, write_list);
-               dm_bufio_cond_resched();
+               cond_resched();
        }
  }
  
@@@ -973,7 -960,7 +960,7 @@@ static void __check_watermark(struct dm
                        return;
  
                __free_buffer_wake(b);
-               dm_bufio_cond_resched();
+               cond_resched();
        }
  
        if (c->n_buffers[LIST_DIRTY] > threshold_buffers)
@@@ -1170,7 -1157,7 +1157,7 @@@ void dm_bufio_prefetch(struct dm_bufio_
                                submit_io(b, READ, b->block, read_endio);
                        dm_bufio_release(b);
  
-                       dm_bufio_cond_resched();
+                       cond_resched();
  
                        if (!n_blocks)
                                goto flush_plug;
@@@ -1291,7 -1278,7 +1278,7 @@@ again
                    !test_bit(B_WRITING, &b->state))
                        __relink_lru(b, LIST_CLEAN);
  
-               dm_bufio_cond_resched();
+               cond_resched();
  
                /*
                 * If we dropped the lock, the list is no longer consistent,
@@@ -1574,7 -1561,7 +1561,7 @@@ static unsigned long __scan(struct dm_b
                                freed++;
                        if (!--nr_to_scan || ((count - freed) <= retain_target))
                                return freed;
-                       dm_bufio_cond_resched();
+                       cond_resched();
                }
        }
        return freed;
@@@ -1808,7 -1795,7 +1795,7 @@@ static void __evict_old_buffers(struct 
                if (__try_evict_buffer(b, 0))
                        count--;
  
-               dm_bufio_cond_resched();
+               cond_resched();
        }
  
        dm_bufio_unlock(c);
@@@ -1879,7 -1866,7 +1866,7 @@@ static int __init dm_bufio_init(void
        __cache_size_refresh();
        mutex_unlock(&dm_bufio_clients_lock);
  
 -      dm_bufio_wq = create_singlethread_workqueue("dm_bufio_cache");
 +      dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0);
        if (!dm_bufio_wq)
                return -ENOMEM;
  
diff --combined drivers/md/dm-crypt.c
@@@ -113,8 -113,7 +113,7 @@@ struct iv_tcw_private 
   * and encrypts / decrypts at the same time.
   */
  enum flags { DM_CRYPT_SUSPENDED, DM_CRYPT_KEY_VALID,
-            DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD,
-            DM_CRYPT_EXIT_THREAD};
+            DM_CRYPT_SAME_CPU, DM_CRYPT_NO_OFFLOAD };
  
  /*
   * The fields in here must be read only after initialization.
@@@ -1207,18 -1206,20 +1206,20 @@@ continue_locked
                if (!RB_EMPTY_ROOT(&cc->write_tree))
                        goto pop_from_list;
  
-               if (unlikely(test_bit(DM_CRYPT_EXIT_THREAD, &cc->flags))) {
-                       spin_unlock_irq(&cc->write_thread_wait.lock);
-                       break;
-               }
-               __set_current_state(TASK_INTERRUPTIBLE);
+               set_current_state(TASK_INTERRUPTIBLE);
                __add_wait_queue(&cc->write_thread_wait, &wait);
  
                spin_unlock_irq(&cc->write_thread_wait.lock);
  
+               if (unlikely(kthread_should_stop())) {
+                       set_task_state(current, TASK_RUNNING);
+                       remove_wait_queue(&cc->write_thread_wait, &wait);
+                       break;
+               }
                schedule();
  
+               set_task_state(current, TASK_RUNNING);
                spin_lock_irq(&cc->write_thread_wait.lock);
                __remove_wait_queue(&cc->write_thread_wait, &wait);
                goto continue_locked;
@@@ -1453,7 -1454,7 +1454,7 @@@ static int crypt_alloc_tfms(struct cryp
        unsigned i;
        int err;
  
 -      cc->tfms = kmalloc(cc->tfms_count * sizeof(struct crypto_skcipher *),
 +      cc->tfms = kzalloc(cc->tfms_count * sizeof(struct crypto_skcipher *),
                           GFP_KERNEL);
        if (!cc->tfms)
                return -ENOMEM;
@@@ -1533,13 -1534,8 +1534,8 @@@ static void crypt_dtr(struct dm_target 
        if (!cc)
                return;
  
-       if (cc->write_thread) {
-               spin_lock_irq(&cc->write_thread_wait.lock);
-               set_bit(DM_CRYPT_EXIT_THREAD, &cc->flags);
-               wake_up_locked(&cc->write_thread_wait);
-               spin_unlock_irq(&cc->write_thread_wait.lock);
+       if (cc->write_thread)
                kthread_stop(cc->write_thread);
-       }
  
        if (cc->io_queue)
                destroy_workqueue(cc->io_queue);
@@@ -1924,13 -1920,6 +1920,13 @@@ static int crypt_map(struct dm_target *
                return DM_MAPIO_REMAPPED;
        }
  
 +      /*
 +       * Check if bio is too large, split as needed.
 +       */
 +      if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) &&
 +          bio_data_dir(bio) == WRITE)
 +              dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT));
 +
        io = dm_per_bio_data(bio, cc->per_bio_data_size);
        crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector));
        io->ctx.req = (struct skcipher_request *)(io + 1);
diff --combined drivers/md/dm-rq.c
@@@ -73,43 -73,60 +73,60 @@@ static void dm_old_start_queue(struct r
        spin_unlock_irqrestore(q->queue_lock, flags);
  }
  
+ static void dm_mq_start_queue(struct request_queue *q)
+ {
+       unsigned long flags;
+       spin_lock_irqsave(q->queue_lock, flags);
+       queue_flag_clear(QUEUE_FLAG_STOPPED, q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+       blk_mq_start_stopped_hw_queues(q, true);
+       blk_mq_kick_requeue_list(q);
+ }
  void dm_start_queue(struct request_queue *q)
  {
        if (!q->mq_ops)
                dm_old_start_queue(q);
-       else {
-               queue_flag_clear_unlocked(QUEUE_FLAG_STOPPED, q);
-               blk_mq_start_stopped_hw_queues(q, true);
-               blk_mq_kick_requeue_list(q);
-       }
+       else
+               dm_mq_start_queue(q);
  }
  
  static void dm_old_stop_queue(struct request_queue *q)
  {
        unsigned long flags;
  
+       spin_lock_irqsave(q->queue_lock, flags);
+       if (!blk_queue_stopped(q))
+               blk_stop_queue(q);
+       spin_unlock_irqrestore(q->queue_lock, flags);
+ }
+ static void dm_mq_stop_queue(struct request_queue *q)
+ {
+       unsigned long flags;
        spin_lock_irqsave(q->queue_lock, flags);
        if (blk_queue_stopped(q)) {
                spin_unlock_irqrestore(q->queue_lock, flags);
                return;
        }
  
-       blk_stop_queue(q);
+       queue_flag_set(QUEUE_FLAG_STOPPED, q);
        spin_unlock_irqrestore(q->queue_lock, flags);
+       /* Avoid that requeuing could restart the queue. */
+       blk_mq_cancel_requeue_work(q);
+       blk_mq_stop_hw_queues(q);
  }
  
  void dm_stop_queue(struct request_queue *q)
  {
        if (!q->mq_ops)
                dm_old_stop_queue(q);
-       else {
-               spin_lock_irq(q->queue_lock);
-               queue_flag_set(QUEUE_FLAG_STOPPED, q);
-               spin_unlock_irq(q->queue_lock);
-               blk_mq_cancel_requeue_work(q);
-               blk_mq_stop_hw_queues(q);
-       }
+       else
+               dm_mq_stop_queue(q);
  }
  
  static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
@@@ -319,21 -336,32 +336,32 @@@ static void dm_old_requeue_request(stru
        spin_unlock_irqrestore(q->queue_lock, flags);
  }
  
- static void dm_mq_requeue_request(struct request *rq)
+ static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
  {
-       struct request_queue *q = rq->q;
        unsigned long flags;
  
-       blk_mq_requeue_request(rq);
        spin_lock_irqsave(q->queue_lock, flags);
        if (!blk_queue_stopped(q))
-               blk_mq_kick_requeue_list(q);
+               blk_mq_delay_kick_requeue_list(q, msecs);
        spin_unlock_irqrestore(q->queue_lock, flags);
  }
  
- static void dm_requeue_original_request(struct mapped_device *md,
-                                       struct request *rq)
+ void dm_mq_kick_requeue_list(struct mapped_device *md)
+ {
+       __dm_mq_kick_requeue_list(dm_get_md_queue(md), 0);
+ }
+ EXPORT_SYMBOL(dm_mq_kick_requeue_list);
+ static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs)
+ {
+       blk_mq_requeue_request(rq);
+       __dm_mq_kick_requeue_list(rq->q, msecs);
+ }
+ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue)
  {
+       struct mapped_device *md = tio->md;
+       struct request *rq = tio->orig;
        int rw = rq_data_dir(rq);
  
        rq_end_stats(md, rq);
        if (!rq->q->mq_ops)
                dm_old_requeue_request(rq);
        else
-               dm_mq_requeue_request(rq);
+               dm_mq_delay_requeue_request(rq, delay_requeue ? 5000 : 0);
  
        rq_completed(md, rw, false);
  }
@@@ -372,7 -400,7 +400,7 @@@ static void dm_done(struct request *clo
                return;
        else if (r == DM_ENDIO_REQUEUE)
                /* The target wants to requeue the I/O */
-               dm_requeue_original_request(tio->md, tio->orig);
+               dm_requeue_original_request(tio, false);
        else {
                DMWARN("unimplemented target endio return value: %d", r);
                BUG();
@@@ -612,20 -640,23 +640,23 @@@ static int dm_old_prep_fn(struct reques
  
  /*
   * Returns:
-  * 0                : the request has been processed
-  * DM_MAPIO_REQUEUE : the original request needs to be requeued
+  * DM_MAPIO_*       : the request has been processed as indicated
+  * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued
   * < 0              : the request was completed due to failure
   */
- static int map_request(struct dm_rq_target_io *tio, struct request *rq,
-                      struct mapped_device *md)
+ static int map_request(struct dm_rq_target_io *tio)
  {
        int r;
        struct dm_target *ti = tio->ti;
+       struct mapped_device *md = tio->md;
+       struct request *rq = tio->orig;
        struct request *clone = NULL;
  
        if (tio->clone) {
                clone = tio->clone;
                r = ti->type->map_rq(ti, clone, &tio->info);
+               if (r == DM_MAPIO_DELAY_REQUEUE)
+                       return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */
        } else {
                r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
                if (r < 0) {
                        dm_kill_unmapped_request(rq, r);
                        return r;
                }
-               if (r != DM_MAPIO_REMAPPED)
-                       return r;
-               if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
+               if (r == DM_MAPIO_REMAPPED &&
+                   setup_clone(clone, rq, tio, GFP_ATOMIC)) {
                        /* -ENOMEM */
                        ti->type->release_clone_rq(clone);
                        return DM_MAPIO_REQUEUE;
                break;
        case DM_MAPIO_REQUEUE:
                /* The target wants to requeue the I/O */
-               dm_requeue_original_request(md, tio->orig);
+               break;
+       case DM_MAPIO_DELAY_REQUEUE:
+               /* The target wants to requeue the I/O after a delay */
+               dm_requeue_original_request(tio, true);
                break;
        default:
                if (r > 0) {
  
                /* The target wants to complete the I/O */
                dm_kill_unmapped_request(rq, r);
-               return r;
        }
  
-       return 0;
+       return r;
  }
  
  static void dm_start_request(struct mapped_device *md, struct request *orig)
  static void map_tio_request(struct kthread_work *work)
  {
        struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
-       struct request *rq = tio->orig;
-       struct mapped_device *md = tio->md;
  
-       if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
-               dm_requeue_original_request(md, rq);
+       if (map_request(tio) == DM_MAPIO_REQUEUE)
+               dm_requeue_original_request(tio, false);
  }
  
  ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
@@@ -896,7 -926,7 +926,7 @@@ static int dm_mq_queue_rq(struct blk_mq
        tio->ti = ti;
  
        /* Direct call is fine since .queue_rq allows allocations */
-       if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
+       if (map_request(tio) == DM_MAPIO_REQUEUE) {
                /* Undo dm_start_request() before requeuing */
                rq_end_stats(md, rq);
                rq_completed(md, rq_data_dir(rq), false);
@@@ -955,7 -985,7 +985,7 @@@ int dm_mq_init_request_queue(struct map
        dm_init_md_queue(md);
  
        /* backfill 'mq' sysfs registration normally done in blk_register_queue */
 -      blk_mq_register_disk(md->disk);
 +      blk_mq_register_dev(disk_to_dev(md->disk), q);
  
        return 0;