Merge tag 'trace-v4.8-1' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[cascardo/linux.git] / drivers / md / dm-rq.c
1 /*
2  * Copyright (C) 2016 Red Hat, Inc. All rights reserved.
3  *
4  * This file is released under the GPL.
5  */
6
7 #include "dm-core.h"
8 #include "dm-rq.h"
9
10 #include <linux/elevator.h> /* for rq_end_sector() */
11 #include <linux/blk-mq.h>
12
13 #define DM_MSG_PREFIX "core-rq"
14
15 #define DM_MQ_NR_HW_QUEUES 1
16 #define DM_MQ_QUEUE_DEPTH 2048
17 static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
18 static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH;
19
20 /*
21  * Request-based DM's mempools' reserved IOs set by the user.
22  */
23 #define RESERVED_REQUEST_BASED_IOS      256
24 static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
25
26 #ifdef CONFIG_DM_MQ_DEFAULT
27 static bool use_blk_mq = true;
28 #else
29 static bool use_blk_mq = false;
30 #endif
31
32 bool dm_use_blk_mq_default(void)
33 {
34         return use_blk_mq;
35 }
36
37 bool dm_use_blk_mq(struct mapped_device *md)
38 {
39         return md->use_blk_mq;
40 }
41 EXPORT_SYMBOL_GPL(dm_use_blk_mq);
42
43 unsigned dm_get_reserved_rq_based_ios(void)
44 {
45         return __dm_get_module_param(&reserved_rq_based_ios,
46                                      RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS);
47 }
48 EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios);
49
50 static unsigned dm_get_blk_mq_nr_hw_queues(void)
51 {
52         return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32);
53 }
54
55 static unsigned dm_get_blk_mq_queue_depth(void)
56 {
57         return __dm_get_module_param(&dm_mq_queue_depth,
58                                      DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH);
59 }
60
61 int dm_request_based(struct mapped_device *md)
62 {
63         return blk_queue_stackable(md->queue);
64 }
65
66 static void dm_old_start_queue(struct request_queue *q)
67 {
68         unsigned long flags;
69
70         spin_lock_irqsave(q->queue_lock, flags);
71         if (blk_queue_stopped(q))
72                 blk_start_queue(q);
73         spin_unlock_irqrestore(q->queue_lock, flags);
74 }
75
76 void dm_start_queue(struct request_queue *q)
77 {
78         if (!q->mq_ops)
79                 dm_old_start_queue(q);
80         else {
81                 blk_mq_start_stopped_hw_queues(q, true);
82                 blk_mq_kick_requeue_list(q);
83         }
84 }
85
86 static void dm_old_stop_queue(struct request_queue *q)
87 {
88         unsigned long flags;
89
90         spin_lock_irqsave(q->queue_lock, flags);
91         if (blk_queue_stopped(q)) {
92                 spin_unlock_irqrestore(q->queue_lock, flags);
93                 return;
94         }
95
96         blk_stop_queue(q);
97         spin_unlock_irqrestore(q->queue_lock, flags);
98 }
99
100 void dm_stop_queue(struct request_queue *q)
101 {
102         if (!q->mq_ops)
103                 dm_old_stop_queue(q);
104         else
105                 blk_mq_stop_hw_queues(q);
106 }
107
108 static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md,
109                                                 gfp_t gfp_mask)
110 {
111         return mempool_alloc(md->io_pool, gfp_mask);
112 }
113
114 static void free_old_rq_tio(struct dm_rq_target_io *tio)
115 {
116         mempool_free(tio, tio->md->io_pool);
117 }
118
119 static struct request *alloc_old_clone_request(struct mapped_device *md,
120                                                gfp_t gfp_mask)
121 {
122         return mempool_alloc(md->rq_pool, gfp_mask);
123 }
124
125 static void free_old_clone_request(struct mapped_device *md, struct request *rq)
126 {
127         mempool_free(rq, md->rq_pool);
128 }
129
130 /*
131  * Partial completion handling for request-based dm
132  */
133 static void end_clone_bio(struct bio *clone)
134 {
135         struct dm_rq_clone_bio_info *info =
136                 container_of(clone, struct dm_rq_clone_bio_info, clone);
137         struct dm_rq_target_io *tio = info->tio;
138         struct bio *bio = info->orig;
139         unsigned int nr_bytes = info->orig->bi_iter.bi_size;
140         int error = clone->bi_error;
141
142         bio_put(clone);
143
144         if (tio->error)
145                 /*
146                  * An error has already been detected on the request.
147                  * Once error occurred, just let clone->end_io() handle
148                  * the remainder.
149                  */
150                 return;
151         else if (error) {
152                 /*
153                  * Don't notice the error to the upper layer yet.
154                  * The error handling decision is made by the target driver,
155                  * when the request is completed.
156                  */
157                 tio->error = error;
158                 return;
159         }
160
161         /*
162          * I/O for the bio successfully completed.
163          * Notice the data completion to the upper layer.
164          */
165
166         /*
167          * bios are processed from the head of the list.
168          * So the completing bio should always be rq->bio.
169          * If it's not, something wrong is happening.
170          */
171         if (tio->orig->bio != bio)
172                 DMERR("bio completion is going in the middle of the request");
173
174         /*
175          * Update the original request.
176          * Do not use blk_end_request() here, because it may complete
177          * the original request before the clone, and break the ordering.
178          */
179         blk_update_request(tio->orig, 0, nr_bytes);
180 }
181
182 static struct dm_rq_target_io *tio_from_request(struct request *rq)
183 {
184         return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
185 }
186
187 static void rq_end_stats(struct mapped_device *md, struct request *orig)
188 {
189         if (unlikely(dm_stats_used(&md->stats))) {
190                 struct dm_rq_target_io *tio = tio_from_request(orig);
191                 tio->duration_jiffies = jiffies - tio->duration_jiffies;
192                 dm_stats_account_io(&md->stats, rq_data_dir(orig),
193                                     blk_rq_pos(orig), tio->n_sectors, true,
194                                     tio->duration_jiffies, &tio->stats_aux);
195         }
196 }
197
198 /*
199  * Don't touch any member of the md after calling this function because
200  * the md may be freed in dm_put() at the end of this function.
201  * Or do dm_get() before calling this function and dm_put() later.
202  */
203 static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
204 {
205         atomic_dec(&md->pending[rw]);
206
207         /* nudge anyone waiting on suspend queue */
208         if (!md_in_flight(md))
209                 wake_up(&md->wait);
210
211         /*
212          * Run this off this callpath, as drivers could invoke end_io while
213          * inside their request_fn (and holding the queue lock). Calling
214          * back into ->request_fn() could deadlock attempting to grab the
215          * queue lock again.
216          */
217         if (!md->queue->mq_ops && run_queue)
218                 blk_run_queue_async(md->queue);
219
220         /*
221          * dm_put() must be at the end of this function. See the comment above
222          */
223         dm_put(md);
224 }
225
226 static void free_rq_clone(struct request *clone)
227 {
228         struct dm_rq_target_io *tio = clone->end_io_data;
229         struct mapped_device *md = tio->md;
230
231         blk_rq_unprep_clone(clone);
232
233         /*
234          * It is possible for a clone_old_rq() allocated clone to
235          * get passed in -- it may not yet have a request_queue.
236          * This is known to occur if the error target replaces
237          * a multipath target that has a request_fn queue stacked
238          * on blk-mq queue(s).
239          */
240         if (clone->q && clone->q->mq_ops)
241                 /* stacked on blk-mq queue(s) */
242                 tio->ti->type->release_clone_rq(clone);
243         else if (!md->queue->mq_ops)
244                 /* request_fn queue stacked on request_fn queue(s) */
245                 free_old_clone_request(md, clone);
246
247         if (!md->queue->mq_ops)
248                 free_old_rq_tio(tio);
249 }
250
251 /*
252  * Complete the clone and the original request.
253  * Must be called without clone's queue lock held,
254  * see end_clone_request() for more details.
255  */
256 static void dm_end_request(struct request *clone, int error)
257 {
258         int rw = rq_data_dir(clone);
259         struct dm_rq_target_io *tio = clone->end_io_data;
260         struct mapped_device *md = tio->md;
261         struct request *rq = tio->orig;
262
263         if (rq->cmd_type == REQ_TYPE_BLOCK_PC) {
264                 rq->errors = clone->errors;
265                 rq->resid_len = clone->resid_len;
266
267                 if (rq->sense)
268                         /*
269                          * We are using the sense buffer of the original
270                          * request.
271                          * So setting the length of the sense data is enough.
272                          */
273                         rq->sense_len = clone->sense_len;
274         }
275
276         free_rq_clone(clone);
277         rq_end_stats(md, rq);
278         if (!rq->q->mq_ops)
279                 blk_end_request_all(rq, error);
280         else
281                 blk_mq_end_request(rq, error);
282         rq_completed(md, rw, true);
283 }
284
285 static void dm_unprep_request(struct request *rq)
286 {
287         struct dm_rq_target_io *tio = tio_from_request(rq);
288         struct request *clone = tio->clone;
289
290         if (!rq->q->mq_ops) {
291                 rq->special = NULL;
292                 rq->cmd_flags &= ~REQ_DONTPREP;
293         }
294
295         if (clone)
296                 free_rq_clone(clone);
297         else if (!tio->md->queue->mq_ops)
298                 free_old_rq_tio(tio);
299 }
300
301 /*
302  * Requeue the original request of a clone.
303  */
304 static void dm_old_requeue_request(struct request *rq)
305 {
306         struct request_queue *q = rq->q;
307         unsigned long flags;
308
309         spin_lock_irqsave(q->queue_lock, flags);
310         blk_requeue_request(q, rq);
311         blk_run_queue_async(q);
312         spin_unlock_irqrestore(q->queue_lock, flags);
313 }
314
315 static void dm_mq_requeue_request(struct request *rq)
316 {
317         struct request_queue *q = rq->q;
318         unsigned long flags;
319
320         blk_mq_requeue_request(rq);
321         spin_lock_irqsave(q->queue_lock, flags);
322         if (!blk_queue_stopped(q))
323                 blk_mq_kick_requeue_list(q);
324         spin_unlock_irqrestore(q->queue_lock, flags);
325 }
326
327 static void dm_requeue_original_request(struct mapped_device *md,
328                                         struct request *rq)
329 {
330         int rw = rq_data_dir(rq);
331
332         rq_end_stats(md, rq);
333         dm_unprep_request(rq);
334
335         if (!rq->q->mq_ops)
336                 dm_old_requeue_request(rq);
337         else
338                 dm_mq_requeue_request(rq);
339
340         rq_completed(md, rw, false);
341 }
342
343 static void dm_done(struct request *clone, int error, bool mapped)
344 {
345         int r = error;
346         struct dm_rq_target_io *tio = clone->end_io_data;
347         dm_request_endio_fn rq_end_io = NULL;
348
349         if (tio->ti) {
350                 rq_end_io = tio->ti->type->rq_end_io;
351
352                 if (mapped && rq_end_io)
353                         r = rq_end_io(tio->ti, clone, error, &tio->info);
354         }
355
356         if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) &&
357                      !clone->q->limits.max_write_same_sectors))
358                 disable_write_same(tio->md);
359
360         if (r <= 0)
361                 /* The target wants to complete the I/O */
362                 dm_end_request(clone, r);
363         else if (r == DM_ENDIO_INCOMPLETE)
364                 /* The target will handle the I/O */
365                 return;
366         else if (r == DM_ENDIO_REQUEUE)
367                 /* The target wants to requeue the I/O */
368                 dm_requeue_original_request(tio->md, tio->orig);
369         else {
370                 DMWARN("unimplemented target endio return value: %d", r);
371                 BUG();
372         }
373 }
374
375 /*
376  * Request completion handler for request-based dm
377  */
378 static void dm_softirq_done(struct request *rq)
379 {
380         bool mapped = true;
381         struct dm_rq_target_io *tio = tio_from_request(rq);
382         struct request *clone = tio->clone;
383         int rw;
384
385         if (!clone) {
386                 rq_end_stats(tio->md, rq);
387                 rw = rq_data_dir(rq);
388                 if (!rq->q->mq_ops) {
389                         blk_end_request_all(rq, tio->error);
390                         rq_completed(tio->md, rw, false);
391                         free_old_rq_tio(tio);
392                 } else {
393                         blk_mq_end_request(rq, tio->error);
394                         rq_completed(tio->md, rw, false);
395                 }
396                 return;
397         }
398
399         if (rq->cmd_flags & REQ_FAILED)
400                 mapped = false;
401
402         dm_done(clone, tio->error, mapped);
403 }
404
405 /*
406  * Complete the clone and the original request with the error status
407  * through softirq context.
408  */
409 static void dm_complete_request(struct request *rq, int error)
410 {
411         struct dm_rq_target_io *tio = tio_from_request(rq);
412
413         tio->error = error;
414         if (!rq->q->mq_ops)
415                 blk_complete_request(rq);
416         else
417                 blk_mq_complete_request(rq, error);
418 }
419
420 /*
421  * Complete the not-mapped clone and the original request with the error status
422  * through softirq context.
423  * Target's rq_end_io() function isn't called.
424  * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
425  */
426 static void dm_kill_unmapped_request(struct request *rq, int error)
427 {
428         rq->cmd_flags |= REQ_FAILED;
429         dm_complete_request(rq, error);
430 }
431
432 /*
433  * Called with the clone's queue lock held (in the case of .request_fn)
434  */
435 static void end_clone_request(struct request *clone, int error)
436 {
437         struct dm_rq_target_io *tio = clone->end_io_data;
438
439         if (!clone->q->mq_ops) {
440                 /*
441                  * For just cleaning up the information of the queue in which
442                  * the clone was dispatched.
443                  * The clone is *NOT* freed actually here because it is alloced
444                  * from dm own mempool (REQ_ALLOCED isn't set).
445                  */
446                 __blk_put_request(clone->q, clone);
447         }
448
449         /*
450          * Actual request completion is done in a softirq context which doesn't
451          * hold the clone's queue lock.  Otherwise, deadlock could occur because:
452          *     - another request may be submitted by the upper level driver
453          *       of the stacking during the completion
454          *     - the submission which requires queue lock may be done
455          *       against this clone's queue
456          */
457         dm_complete_request(tio->orig, error);
458 }
459
460 static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
461 {
462         int r;
463
464         if (blk_queue_io_stat(clone->q))
465                 clone->cmd_flags |= REQ_IO_STAT;
466
467         clone->start_time = jiffies;
468         r = blk_insert_cloned_request(clone->q, clone);
469         if (r)
470                 /* must complete clone in terms of original request */
471                 dm_complete_request(rq, r);
472 }
473
474 static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
475                                  void *data)
476 {
477         struct dm_rq_target_io *tio = data;
478         struct dm_rq_clone_bio_info *info =
479                 container_of(bio, struct dm_rq_clone_bio_info, clone);
480
481         info->orig = bio_orig;
482         info->tio = tio;
483         bio->bi_end_io = end_clone_bio;
484
485         return 0;
486 }
487
488 static int setup_clone(struct request *clone, struct request *rq,
489                        struct dm_rq_target_io *tio, gfp_t gfp_mask)
490 {
491         int r;
492
493         r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask,
494                               dm_rq_bio_constructor, tio);
495         if (r)
496                 return r;
497
498         clone->cmd = rq->cmd;
499         clone->cmd_len = rq->cmd_len;
500         clone->sense = rq->sense;
501         clone->end_io = end_clone_request;
502         clone->end_io_data = tio;
503
504         tio->clone = clone;
505
506         return 0;
507 }
508
509 static struct request *clone_old_rq(struct request *rq, struct mapped_device *md,
510                                     struct dm_rq_target_io *tio, gfp_t gfp_mask)
511 {
512         /*
513          * Create clone for use with .request_fn request_queue
514          */
515         struct request *clone;
516
517         clone = alloc_old_clone_request(md, gfp_mask);
518         if (!clone)
519                 return NULL;
520
521         blk_rq_init(NULL, clone);
522         if (setup_clone(clone, rq, tio, gfp_mask)) {
523                 /* -ENOMEM */
524                 free_old_clone_request(md, clone);
525                 return NULL;
526         }
527
528         return clone;
529 }
530
531 static void map_tio_request(struct kthread_work *work);
532
533 static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
534                      struct mapped_device *md)
535 {
536         tio->md = md;
537         tio->ti = NULL;
538         tio->clone = NULL;
539         tio->orig = rq;
540         tio->error = 0;
541         /*
542          * Avoid initializing info for blk-mq; it passes
543          * target-specific data through info.ptr
544          * (see: dm_mq_init_request)
545          */
546         if (!md->init_tio_pdu)
547                 memset(&tio->info, 0, sizeof(tio->info));
548         if (md->kworker_task)
549                 init_kthread_work(&tio->work, map_tio_request);
550 }
551
552 static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq,
553                                                struct mapped_device *md,
554                                                gfp_t gfp_mask)
555 {
556         struct dm_rq_target_io *tio;
557         int srcu_idx;
558         struct dm_table *table;
559
560         tio = alloc_old_rq_tio(md, gfp_mask);
561         if (!tio)
562                 return NULL;
563
564         init_tio(tio, rq, md);
565
566         table = dm_get_live_table(md, &srcu_idx);
567         /*
568          * Must clone a request if this .request_fn DM device
569          * is stacked on .request_fn device(s).
570          */
571         if (!dm_table_all_blk_mq_devices(table)) {
572                 if (!clone_old_rq(rq, md, tio, gfp_mask)) {
573                         dm_put_live_table(md, srcu_idx);
574                         free_old_rq_tio(tio);
575                         return NULL;
576                 }
577         }
578         dm_put_live_table(md, srcu_idx);
579
580         return tio;
581 }
582
583 /*
584  * Called with the queue lock held.
585  */
586 static int dm_old_prep_fn(struct request_queue *q, struct request *rq)
587 {
588         struct mapped_device *md = q->queuedata;
589         struct dm_rq_target_io *tio;
590
591         if (unlikely(rq->special)) {
592                 DMWARN("Already has something in rq->special.");
593                 return BLKPREP_KILL;
594         }
595
596         tio = dm_old_prep_tio(rq, md, GFP_ATOMIC);
597         if (!tio)
598                 return BLKPREP_DEFER;
599
600         rq->special = tio;
601         rq->cmd_flags |= REQ_DONTPREP;
602
603         return BLKPREP_OK;
604 }
605
606 /*
607  * Returns:
608  * 0                : the request has been processed
609  * DM_MAPIO_REQUEUE : the original request needs to be requeued
610  * < 0              : the request was completed due to failure
611  */
612 static int map_request(struct dm_rq_target_io *tio, struct request *rq,
613                        struct mapped_device *md)
614 {
615         int r;
616         struct dm_target *ti = tio->ti;
617         struct request *clone = NULL;
618
619         if (tio->clone) {
620                 clone = tio->clone;
621                 r = ti->type->map_rq(ti, clone, &tio->info);
622         } else {
623                 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
624                 if (r < 0) {
625                         /* The target wants to complete the I/O */
626                         dm_kill_unmapped_request(rq, r);
627                         return r;
628                 }
629                 if (r != DM_MAPIO_REMAPPED)
630                         return r;
631                 if (setup_clone(clone, rq, tio, GFP_ATOMIC)) {
632                         /* -ENOMEM */
633                         ti->type->release_clone_rq(clone);
634                         return DM_MAPIO_REQUEUE;
635                 }
636         }
637
638         switch (r) {
639         case DM_MAPIO_SUBMITTED:
640                 /* The target has taken the I/O to submit by itself later */
641                 break;
642         case DM_MAPIO_REMAPPED:
643                 /* The target has remapped the I/O so dispatch it */
644                 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
645                                      blk_rq_pos(rq));
646                 dm_dispatch_clone_request(clone, rq);
647                 break;
648         case DM_MAPIO_REQUEUE:
649                 /* The target wants to requeue the I/O */
650                 dm_requeue_original_request(md, tio->orig);
651                 break;
652         default:
653                 if (r > 0) {
654                         DMWARN("unimplemented target map return value: %d", r);
655                         BUG();
656                 }
657
658                 /* The target wants to complete the I/O */
659                 dm_kill_unmapped_request(rq, r);
660                 return r;
661         }
662
663         return 0;
664 }
665
666 static void dm_start_request(struct mapped_device *md, struct request *orig)
667 {
668         if (!orig->q->mq_ops)
669                 blk_start_request(orig);
670         else
671                 blk_mq_start_request(orig);
672         atomic_inc(&md->pending[rq_data_dir(orig)]);
673
674         if (md->seq_rq_merge_deadline_usecs) {
675                 md->last_rq_pos = rq_end_sector(orig);
676                 md->last_rq_rw = rq_data_dir(orig);
677                 md->last_rq_start_time = ktime_get();
678         }
679
680         if (unlikely(dm_stats_used(&md->stats))) {
681                 struct dm_rq_target_io *tio = tio_from_request(orig);
682                 tio->duration_jiffies = jiffies;
683                 tio->n_sectors = blk_rq_sectors(orig);
684                 dm_stats_account_io(&md->stats, rq_data_dir(orig),
685                                     blk_rq_pos(orig), tio->n_sectors, false, 0,
686                                     &tio->stats_aux);
687         }
688
689         /*
690          * Hold the md reference here for the in-flight I/O.
691          * We can't rely on the reference count by device opener,
692          * because the device may be closed during the request completion
693          * when all bios are completed.
694          * See the comment in rq_completed() too.
695          */
696         dm_get(md);
697 }
698
699 static void map_tio_request(struct kthread_work *work)
700 {
701         struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
702         struct request *rq = tio->orig;
703         struct mapped_device *md = tio->md;
704
705         if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
706                 dm_requeue_original_request(md, rq);
707 }
708
709 ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
710 {
711         return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
712 }
713
714 #define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
715
716 ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
717                                                      const char *buf, size_t count)
718 {
719         unsigned deadline;
720
721         if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED)
722                 return count;
723
724         if (kstrtouint(buf, 10, &deadline))
725                 return -EINVAL;
726
727         if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
728                 deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
729
730         md->seq_rq_merge_deadline_usecs = deadline;
731
732         return count;
733 }
734
735 static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md)
736 {
737         ktime_t kt_deadline;
738
739         if (!md->seq_rq_merge_deadline_usecs)
740                 return false;
741
742         kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
743         kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
744
745         return !ktime_after(ktime_get(), kt_deadline);
746 }
747
748 /*
749  * q->request_fn for old request-based dm.
750  * Called with the queue lock held.
751  */
752 static void dm_old_request_fn(struct request_queue *q)
753 {
754         struct mapped_device *md = q->queuedata;
755         struct dm_target *ti = md->immutable_target;
756         struct request *rq;
757         struct dm_rq_target_io *tio;
758         sector_t pos = 0;
759
760         if (unlikely(!ti)) {
761                 int srcu_idx;
762                 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
763
764                 ti = dm_table_find_target(map, pos);
765                 dm_put_live_table(md, srcu_idx);
766         }
767
768         /*
769          * For suspend, check blk_queue_stopped() and increment
770          * ->pending within a single queue_lock not to increment the
771          * number of in-flight I/Os after the queue is stopped in
772          * dm_suspend().
773          */
774         while (!blk_queue_stopped(q)) {
775                 rq = blk_peek_request(q);
776                 if (!rq)
777                         return;
778
779                 /* always use block 0 to find the target for flushes for now */
780                 pos = 0;
781                 if (req_op(rq) != REQ_OP_FLUSH)
782                         pos = blk_rq_pos(rq);
783
784                 if ((dm_old_request_peeked_before_merge_deadline(md) &&
785                      md_in_flight(md) && rq->bio && rq->bio->bi_vcnt == 1 &&
786                      md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
787                     (ti->type->busy && ti->type->busy(ti))) {
788                         blk_delay_queue(q, 10);
789                         return;
790                 }
791
792                 dm_start_request(md, rq);
793
794                 tio = tio_from_request(rq);
795                 /* Establish tio->ti before queuing work (map_tio_request) */
796                 tio->ti = ti;
797                 queue_kthread_work(&md->kworker, &tio->work);
798                 BUG_ON(!irqs_disabled());
799         }
800 }
801
802 /*
803  * Fully initialize a .request_fn request-based queue.
804  */
805 int dm_old_init_request_queue(struct mapped_device *md)
806 {
807         /* Fully initialize the queue */
808         if (!blk_init_allocated_queue(md->queue, dm_old_request_fn, NULL))
809                 return -EINVAL;
810
811         /* disable dm_old_request_fn's merge heuristic by default */
812         md->seq_rq_merge_deadline_usecs = 0;
813
814         dm_init_normal_md_queue(md);
815         blk_queue_softirq_done(md->queue, dm_softirq_done);
816         blk_queue_prep_rq(md->queue, dm_old_prep_fn);
817
818         /* Initialize the request-based DM worker thread */
819         init_kthread_worker(&md->kworker);
820         md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
821                                        "kdmwork-%s", dm_device_name(md));
822         if (IS_ERR(md->kworker_task))
823                 return PTR_ERR(md->kworker_task);
824
825         elv_register_queue(md->queue);
826
827         return 0;
828 }
829
830 static int dm_mq_init_request(void *data, struct request *rq,
831                        unsigned int hctx_idx, unsigned int request_idx,
832                        unsigned int numa_node)
833 {
834         struct mapped_device *md = data;
835         struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
836
837         /*
838          * Must initialize md member of tio, otherwise it won't
839          * be available in dm_mq_queue_rq.
840          */
841         tio->md = md;
842
843         if (md->init_tio_pdu) {
844                 /* target-specific per-io data is immediately after the tio */
845                 tio->info.ptr = tio + 1;
846         }
847
848         return 0;
849 }
850
851 static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
852                           const struct blk_mq_queue_data *bd)
853 {
854         struct request *rq = bd->rq;
855         struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
856         struct mapped_device *md = tio->md;
857         struct dm_target *ti = md->immutable_target;
858
859         if (unlikely(!ti)) {
860                 int srcu_idx;
861                 struct dm_table *map = dm_get_live_table(md, &srcu_idx);
862
863                 ti = dm_table_find_target(map, 0);
864                 dm_put_live_table(md, srcu_idx);
865         }
866
867         if (ti->type->busy && ti->type->busy(ti))
868                 return BLK_MQ_RQ_QUEUE_BUSY;
869
870         dm_start_request(md, rq);
871
872         /* Init tio using md established in .init_request */
873         init_tio(tio, rq, md);
874
875         /*
876          * Establish tio->ti before calling map_request().
877          */
878         tio->ti = ti;
879
880         /* Direct call is fine since .queue_rq allows allocations */
881         if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE) {
882                 /* Undo dm_start_request() before requeuing */
883                 rq_end_stats(md, rq);
884                 rq_completed(md, rq_data_dir(rq), false);
885                 return BLK_MQ_RQ_QUEUE_BUSY;
886         }
887
888         return BLK_MQ_RQ_QUEUE_OK;
889 }
890
891 static struct blk_mq_ops dm_mq_ops = {
892         .queue_rq = dm_mq_queue_rq,
893         .map_queue = blk_mq_map_queue,
894         .complete = dm_softirq_done,
895         .init_request = dm_mq_init_request,
896 };
897
898 int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
899 {
900         struct request_queue *q;
901         struct dm_target *immutable_tgt;
902         int err;
903
904         if (!dm_table_all_blk_mq_devices(t)) {
905                 DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
906                 return -EINVAL;
907         }
908
909         md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
910         if (!md->tag_set)
911                 return -ENOMEM;
912
913         md->tag_set->ops = &dm_mq_ops;
914         md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
915         md->tag_set->numa_node = md->numa_node_id;
916         md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
917         md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
918         md->tag_set->driver_data = md;
919
920         md->tag_set->cmd_size = sizeof(struct dm_rq_target_io);
921         immutable_tgt = dm_table_get_immutable_target(t);
922         if (immutable_tgt && immutable_tgt->per_io_data_size) {
923                 /* any target-specific per-io data is immediately after the tio */
924                 md->tag_set->cmd_size += immutable_tgt->per_io_data_size;
925                 md->init_tio_pdu = true;
926         }
927
928         err = blk_mq_alloc_tag_set(md->tag_set);
929         if (err)
930                 goto out_kfree_tag_set;
931
932         q = blk_mq_init_allocated_queue(md->tag_set, md->queue);
933         if (IS_ERR(q)) {
934                 err = PTR_ERR(q);
935                 goto out_tag_set;
936         }
937         dm_init_md_queue(md);
938
939         /* backfill 'mq' sysfs registration normally done in blk_register_queue */
940         blk_mq_register_disk(md->disk);
941
942         return 0;
943
944 out_tag_set:
945         blk_mq_free_tag_set(md->tag_set);
946 out_kfree_tag_set:
947         kfree(md->tag_set);
948
949         return err;
950 }
951
952 void dm_mq_cleanup_mapped_device(struct mapped_device *md)
953 {
954         if (md->tag_set) {
955                 blk_mq_free_tag_set(md->tag_set);
956                 kfree(md->tag_set);
957         }
958 }
959
960 module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
961 MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
962
963 module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
964 MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
965
966 module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR);
967 MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices");
968
969 module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR);
970 MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices");