#include <linux/sched/sysctl.h>
#include <linux/delay.h>
#include <linux/crash_dump.h>
+#include <linux/prefetch.h>
#include <trace/events/block.h>
return ERR_PTR(ret);
ctx = blk_mq_get_ctx(q);
- hctx = q->mq_ops->map_queue(q, ctx->cpu);
+ hctx = blk_mq_map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
blk_mq_put_ctx(ctx);
ctx = blk_mq_get_ctx(q);
- hctx = q->mq_ops->map_queue(q, ctx->cpu);
+ hctx = blk_mq_map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(&alloc_data, q, flags, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, rw, 0);
ctx = alloc_data.ctx;
void blk_mq_free_request(struct request *rq)
{
- struct blk_mq_hw_ctx *hctx;
- struct request_queue *q = rq->q;
-
- hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu);
- blk_mq_free_hctx_request(hctx, rq);
+ blk_mq_free_hctx_request(blk_mq_map_queue(rq->q, rq->mq_ctx->cpu), rq);
}
EXPORT_SYMBOL_GPL(blk_mq_free_request);
static void blk_mq_requeue_work(struct work_struct *work)
{
struct request_queue *q =
- container_of(work, struct request_queue, requeue_work);
+ container_of(work, struct request_queue, requeue_work.work);
LIST_HEAD(rq_list);
struct request *rq, *next;
unsigned long flags;
void blk_mq_cancel_requeue_work(struct request_queue *q)
{
- cancel_work_sync(&q->requeue_work);
+ cancel_delayed_work_sync(&q->requeue_work);
}
EXPORT_SYMBOL_GPL(blk_mq_cancel_requeue_work);
void blk_mq_kick_requeue_list(struct request_queue *q)
{
- kblockd_schedule_work(&q->requeue_work);
+ kblockd_schedule_delayed_work(&q->requeue_work, 0);
}
EXPORT_SYMBOL(blk_mq_kick_requeue_list);
+void blk_mq_delay_kick_requeue_list(struct request_queue *q,
+ unsigned long msecs)
+{
+ kblockd_schedule_delayed_work(&q->requeue_work,
+ msecs_to_jiffies(msecs));
+}
+EXPORT_SYMBOL(blk_mq_delay_kick_requeue_list);
+
void blk_mq_abort_requeue_list(struct request_queue *q)
{
unsigned long flags;
struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
{
- if (tag < tags->nr_tags)
+ if (tag < tags->nr_tags) {
+ prefetch(tags->rqs[tag]);
return tags->rqs[tag];
+ }
return NULL;
}
struct list_head *dptr;
int queued;
- WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
-
if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
return;
+ WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
+ cpu_online(hctx->next_cpu));
+
hctx->run++;
/*
put_cpu();
}
- kblockd_schedule_delayed_work_on(blk_mq_hctx_next_cpu(hctx),
- &hctx->run_work, 0);
+ kblockd_schedule_work_on(blk_mq_hctx_next_cpu(hctx), &hctx->run_work);
}
void blk_mq_run_hw_queues(struct request_queue *q, bool async)
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
{
- cancel_delayed_work(&hctx->run_work);
+ cancel_work(&hctx->run_work);
cancel_delayed_work(&hctx->delay_work);
set_bit(BLK_MQ_S_STOPPED, &hctx->state);
}
{
struct blk_mq_hw_ctx *hctx;
- hctx = container_of(work, struct blk_mq_hw_ctx, run_work.work);
+ hctx = container_of(work, struct blk_mq_hw_ctx, run_work);
__blk_mq_run_hw_queue(hctx);
}
EXPORT_SYMBOL(blk_mq_delay_queue);
static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
- struct blk_mq_ctx *ctx,
struct request *rq,
bool at_head)
{
+ struct blk_mq_ctx *ctx = rq->mq_ctx;
+
trace_block_rq_insert(hctx->queue, rq);
if (at_head)
{
struct blk_mq_ctx *ctx = rq->mq_ctx;
- __blk_mq_insert_req_list(hctx, ctx, rq, at_head);
+ __blk_mq_insert_req_list(hctx, rq, at_head);
blk_mq_hctx_mark_pending(hctx, ctx);
}
void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
- bool async)
+ bool async)
{
+ struct blk_mq_ctx *ctx = rq->mq_ctx;
struct request_queue *q = rq->q;
- struct blk_mq_hw_ctx *hctx;
- struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx;
-
- current_ctx = blk_mq_get_ctx(q);
- if (!cpu_online(ctx->cpu))
- rq->mq_ctx = ctx = current_ctx;
-
- hctx = q->mq_ops->map_queue(q, ctx->cpu);
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
spin_lock(&ctx->lock);
__blk_mq_insert_request(hctx, rq, at_head);
if (run_queue)
blk_mq_run_hw_queue(hctx, async);
-
- blk_mq_put_ctx(current_ctx);
}
static void blk_mq_insert_requests(struct request_queue *q,
bool from_schedule)
{
- struct blk_mq_hw_ctx *hctx;
- struct blk_mq_ctx *current_ctx;
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu);
trace_block_unplug(q, depth, !from_schedule);
- current_ctx = blk_mq_get_ctx(q);
-
- if (!cpu_online(ctx->cpu))
- ctx = current_ctx;
- hctx = q->mq_ops->map_queue(q, ctx->cpu);
-
/*
* preemption doesn't flush plug list, so it's possible ctx->cpu is
* offline now
struct request *rq;
rq = list_first_entry(list, struct request, queuelist);
+ BUG_ON(rq->mq_ctx != ctx);
list_del_init(&rq->queuelist);
- rq->mq_ctx = ctx;
- __blk_mq_insert_req_list(hctx, ctx, rq, false);
+ __blk_mq_insert_req_list(hctx, rq, false);
}
blk_mq_hctx_mark_pending(hctx, ctx);
spin_unlock(&ctx->lock);
blk_mq_run_hw_queue(hctx, from_schedule);
- blk_mq_put_ctx(current_ctx);
}
static int plug_ctx_cmp(void *priv, struct list_head *a, struct list_head *b)
blk_queue_enter_live(q);
ctx = blk_mq_get_ctx(q);
- hctx = q->mq_ops->map_queue(q, ctx->cpu);
+ hctx = blk_mq_map_queue(q, ctx->cpu);
if (rw_is_sync(bio_op(bio), bio->bi_opf))
op_flags |= REQ_SYNC;
trace_block_sleeprq(q, bio, op);
ctx = blk_mq_get_ctx(q);
- hctx = q->mq_ops->map_queue(q, ctx->cpu);
+ hctx = blk_mq_map_queue(q, ctx->cpu);
blk_mq_set_alloc_data(&alloc_data, q, 0, ctx, hctx);
rq = __blk_mq_alloc_request(&alloc_data, op, op_flags);
ctx = alloc_data.ctx;
{
int ret;
struct request_queue *q = rq->q;
- struct blk_mq_hw_ctx *hctx = q->mq_ops->map_queue(q,
- rq->mq_ctx->cpu);
+ struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, rq->mq_ctx->cpu);
struct blk_mq_queue_data bd = {
.rq = rq,
.list = NULL,
return cookie;
}
-/*
- * Default mapping to a software queue, since we use one per CPU.
- */
-struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *q, const int cpu)
-{
- return q->queue_hw_ctx[q->mq_map[cpu]];
-}
-EXPORT_SYMBOL(blk_mq_map_queue);
-
static void blk_mq_free_rq_map(struct blk_mq_tag_set *set,
struct blk_mq_tags *tags, unsigned int hctx_idx)
{
return 0;
}
+/*
+ * 'cpu' is going away. splice any existing rq_list entries from this
+ * software queue to the hw queue dispatch list, and ensure that it
+ * gets run.
+ */
static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu)
{
- struct request_queue *q = hctx->queue;
struct blk_mq_ctx *ctx;
LIST_HEAD(tmp);
- /*
- * Move ctx entries to new CPU, if this one is going away.
- */
- ctx = __blk_mq_get_ctx(q, cpu);
+ ctx = __blk_mq_get_ctx(hctx->queue, cpu);
spin_lock(&ctx->lock);
if (!list_empty(&ctx->rq_list)) {
if (list_empty(&tmp))
return NOTIFY_OK;
- ctx = blk_mq_get_ctx(q);
- spin_lock(&ctx->lock);
-
- while (!list_empty(&tmp)) {
- struct request *rq;
-
- rq = list_first_entry(&tmp, struct request, queuelist);
- rq->mq_ctx = ctx;
- list_move_tail(&rq->queuelist, &ctx->rq_list);
- }
-
- hctx = q->mq_ops->map_queue(q, ctx->cpu);
- blk_mq_hctx_mark_pending(hctx, ctx);
-
- spin_unlock(&ctx->lock);
+ spin_lock(&hctx->lock);
+ list_splice_tail_init(&tmp, &hctx->dispatch);
+ spin_unlock(&hctx->lock);
blk_mq_run_hw_queue(hctx, true);
- blk_mq_put_ctx(ctx);
return NOTIFY_OK;
}
if (node == NUMA_NO_NODE)
node = hctx->numa_node = set->numa_node;
- INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
+ INIT_WORK(&hctx->run_work, blk_mq_run_work_fn);
INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
spin_lock_init(&hctx->lock);
INIT_LIST_HEAD(&hctx->dispatch);
if (!cpu_online(i))
continue;
- hctx = q->mq_ops->map_queue(q, i);
+ hctx = blk_mq_map_queue(q, i);
/*
* Set local node, IFF we have more than one hw queue. If
continue;
ctx = per_cpu_ptr(q->queue_ctx, i);
- hctx = q->mq_ops->map_queue(q, i);
+ hctx = blk_mq_map_queue(q, i);
cpumask_set_cpu(i, hctx->cpumask);
ctx->index_hw = hctx->nr_ctx;
hctx->tags = set->tags[i];
WARN_ON(!hctx->tags);
- cpumask_copy(hctx->tags->cpumask, hctx->cpumask);
/*
* Set the map size to the number of mapped software queues.
* This is more accurate and more efficient than looping
kfree(hctx);
}
- kfree(q->mq_map);
q->mq_map = NULL;
kfree(q->queue_hw_ctx);
if (!q->queue_hw_ctx)
goto err_percpu;
- q->mq_map = blk_mq_make_queue_map(set);
- if (!q->mq_map)
- goto err_map;
+ q->mq_map = set->mq_map;
blk_mq_realloc_hw_ctxs(set, q);
if (!q->nr_hw_queues)
q->sg_reserved_size = INT_MAX;
- INIT_WORK(&q->requeue_work, blk_mq_requeue_work);
+ INIT_DELAYED_WORK(&q->requeue_work, blk_mq_requeue_work);
INIT_LIST_HEAD(&q->requeue_list);
spin_lock_init(&q->requeue_lock);
return q;
err_hctxs:
- kfree(q->mq_map);
-err_map:
kfree(q->queue_hw_ctx);
err_percpu:
free_percpu(q->queue_ctx);
blk_mq_sysfs_unregister(q);
- blk_mq_update_queue_map(q->mq_map, q->nr_hw_queues, online_mask);
-
/*
* redo blk_mq_init_cpu_queues and blk_mq_init_hw_queues. FIXME: maybe
* we should change hctx numa_node according to new topology (this
return 0;
}
-struct cpumask *blk_mq_tags_cpumask(struct blk_mq_tags *tags)
-{
- return tags->cpumask;
-}
-EXPORT_SYMBOL_GPL(blk_mq_tags_cpumask);
-
/*
* Alloc a tag set to be associated with one or more request queues.
* May fail with EINVAL for various error conditions. May adjust the
*/
int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
{
+ int ret;
+
BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);
if (!set->nr_hw_queues)
if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN)
return -EINVAL;
- if (!set->ops->queue_rq || !set->ops->map_queue)
+ if (!set->ops->queue_rq)
return -EINVAL;
if (set->queue_depth > BLK_MQ_MAX_DEPTH) {
if (!set->tags)
return -ENOMEM;
- if (blk_mq_alloc_rq_maps(set))
- goto enomem;
+ ret = -ENOMEM;
+ set->mq_map = kzalloc_node(sizeof(*set->mq_map) * nr_cpu_ids,
+ GFP_KERNEL, set->numa_node);
+ if (!set->mq_map)
+ goto out_free_tags;
+
+ if (set->ops->map_queues)
+ ret = set->ops->map_queues(set);
+ else
+ ret = blk_mq_map_queues(set);
+ if (ret)
+ goto out_free_mq_map;
+
+ ret = blk_mq_alloc_rq_maps(set);
+ if (ret)
+ goto out_free_mq_map;
mutex_init(&set->tag_list_lock);
INIT_LIST_HEAD(&set->tag_list);
return 0;
-enomem:
+
+out_free_mq_map:
+ kfree(set->mq_map);
+ set->mq_map = NULL;
+out_free_tags:
kfree(set->tags);
set->tags = NULL;
- return -ENOMEM;
+ return ret;
}
EXPORT_SYMBOL(blk_mq_alloc_tag_set);
blk_mq_free_rq_map(set, set->tags[i], i);
}
+ kfree(set->mq_map);
+ set->mq_map = NULL;
+
kfree(set->tags);
set->tags = NULL;
}