From b094f89ca42fbb8ce40174d5f85ca8430e499da6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 20 Nov 2015 20:29:45 -0700 Subject: [PATCH] blk-mq: fix calling unplug callbacks with preempt disabled Liu reported that running certain parts of xfstests threw the following error: BUG: sleeping function called from invalid context at mm/page_alloc.c:3190 in_atomic(): 1, irqs_disabled(): 0, pid: 6, name: kworker/u16:0 3 locks held by kworker/u16:0/6: #0: ("writeback"){++++.+}, at: [] process_one_work+0x173/0x730 #1: ((&(&wb->dwork)->work)){+.+.+.}, at: [] process_one_work+0x173/0x730 #2: (&type->s_umount_key#44){+++++.}, at: [] trylock_super+0x25/0x60 CPU: 5 PID: 6 Comm: kworker/u16:0 Tainted: G OE 4.3.0+ #3 Hardware name: Red Hat KVM, BIOS Bochs 01/01/2011 Workqueue: writeback wb_workfn (flush-btrfs-108) ffffffff81a3abab ffff88042e282ba8 ffffffff8130191b ffffffff81a3abab 0000000000000c76 ffff88042e282ba8 ffff88042e27c180 ffff88042e282bd8 ffffffff8108ed95 ffff880400000004 0000000000000000 0000000000000c76 Call Trace: [] dump_stack+0x4f/0x74 [] ___might_sleep+0x185/0x240 [] __might_sleep+0x52/0x90 [] __alloc_pages_nodemask+0x268/0x410 [] ? sched_clock_local+0x1c/0x90 [] ? local_clock+0x21/0x40 [] ? __lock_release+0x420/0x510 [] ? __lock_acquired+0x16c/0x3c0 [] alloc_pages_current+0xc5/0x210 [] ? rbio_is_full+0x55/0x70 [btrfs] [] ? mark_held_locks+0x78/0xa0 [] ? _raw_spin_unlock_irqrestore+0x40/0x60 [] full_stripe_write+0x5a/0xc0 [btrfs] [] __raid56_parity_write+0x39/0x60 [btrfs] [] run_plug+0x11b/0x140 [btrfs] [] btrfs_raid_unplug+0x23/0x70 [btrfs] [] blk_flush_plug_list+0x82/0x1f0 [] blk_sq_make_request+0x1f9/0x740 [] ? generic_make_request_checks+0x222/0x7c0 [] ? blk_queue_enter+0x124/0x310 [] ? blk_queue_enter+0x92/0x310 [] generic_make_request+0x172/0x2c0 [] ? generic_make_request+0x164/0x2c0 [] submit_bio+0x70/0x140 [] ? rbio_add_io_page+0x99/0x150 [btrfs] [] finish_rmw+0x4d9/0x600 [btrfs] [] full_stripe_write+0x9c/0xc0 [btrfs] [] raid56_parity_write+0xef/0x160 [btrfs] [] btrfs_map_bio+0xe3/0x2d0 [btrfs] [] btrfs_submit_bio_hook+0x8d/0x1d0 [btrfs] [] submit_one_bio+0x74/0xb0 [btrfs] [] submit_extent_page+0xe5/0x1c0 [btrfs] [] __extent_writepage_io+0x408/0x4c0 [btrfs] [] ? alloc_dummy_extent_buffer+0x140/0x140 [btrfs] [] __extent_writepage+0x218/0x3a0 [btrfs] [] ? mark_held_locks+0x78/0xa0 [] extent_write_cache_pages.clone.0+0x2f9/0x400 [btrfs] [] extent_writepages+0x52/0x70 [btrfs] [] ? btrfs_set_inode_index+0x70/0x70 [btrfs] [] btrfs_writepages+0x27/0x30 [btrfs] [] do_writepages+0x23/0x40 [] __writeback_single_inode+0x89/0x4d0 [] ? writeback_sb_inodes+0x260/0x480 [] ? writeback_sb_inodes+0x260/0x480 [] ? writeback_sb_inodes+0x15f/0x480 [] writeback_sb_inodes+0x2d2/0x480 [] ? down_read_trylock+0x57/0x60 [] ? trylock_super+0x25/0x60 [] ? rcu_read_lock_sched_held+0x4f/0x90 [] __writeback_inodes_wb+0x8c/0xc0 [] wb_writeback+0x2b5/0x500 [] ? mark_held_locks+0x78/0xa0 [] ? __local_bh_enable_ip+0x68/0xc0 [] ? wb_do_writeback+0x62/0x310 [] wb_do_writeback+0xc1/0x310 [] ? set_worker_desc+0x79/0x90 [] wb_workfn+0x92/0x330 [] process_one_work+0x223/0x730 [] ? process_one_work+0x173/0x730 [] ? worker_thread+0x18f/0x430 [] worker_thread+0x11d/0x430 [] ? maybe_create_worker+0xf0/0xf0 [] ? maybe_create_worker+0xf0/0xf0 [] kthread+0xef/0x110 [] ? schedule_tail+0x1e/0xd0 [] ? __init_kthread_worker+0x70/0x70 [] ret_from_fork+0x3f/0x70 [] ? __init_kthread_worker+0x70/0x70 The issue is that we've got the software context pinned while calling blk_flush_plug_list(), which flushes callbacks that are allowed to sleep. btrfs and raid has such callbacks. Flip the checks around a bit, so we can enable preempt a bit earlier and flush plugs without having preempt disabled. This only affects blk-mq driven devices, and only those that register a single queue. Reported-by: Liu Bo Tested-by: Liu Bo Cc: stable@kernel.org Signed-off-by: Jens Axboe --- block/blk-mq.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 3ae09de62f19..6d6f8feb48c0 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1291,15 +1291,16 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) blk_mq_bio_to_request(rq, bio); /* - * we do limited pluging. If bio can be merged, do merge. + * We do limited pluging. If the bio can be merged, do that. * Otherwise the existing request in the plug list will be * issued. So the plug list will have one request at most */ if (plug) { /* * The plug list might get flushed before this. If that - * happens, same_queue_rq is invalid and plug list is empty - **/ + * happens, same_queue_rq is invalid and plug list is + * empty + */ if (same_queue_rq && !list_empty(&plug->mq_list)) { old_rq = same_queue_rq; list_del_init(&old_rq->queuelist); @@ -1380,12 +1381,15 @@ static blk_qc_t blk_sq_make_request(struct request_queue *q, struct bio *bio) blk_mq_bio_to_request(rq, bio); if (!request_count) trace_block_plug(q); - else if (request_count >= BLK_MAX_REQUEST_COUNT) { + + blk_mq_put_ctx(data.ctx); + + if (request_count >= BLK_MAX_REQUEST_COUNT) { blk_flush_plug_list(plug, false); trace_block_plug(q); } + list_add_tail(&rq->queuelist, &plug->mq_list); - blk_mq_put_ctx(data.ctx); return cookie; } -- 2.20.1