blk-mq: improve layout of blk_mq_hw_ctx
authorJens Axboe <axboe@fb.com>
Thu, 25 Aug 2016 14:00:28 +0000 (08:00 -0600)
committerJens Axboe <axboe@fb.com>
Mon, 29 Aug 2016 14:13:21 +0000 (08:13 -0600)
Various cache line optimizations:

- Move delay_work towards the end. It's huge, and we don't use it
  a lot (only SCSI).

- Move the atomic state into the same cacheline as the the dispatch
  list and lock.

- Rearrange a few members to pack it better.

- Shrink the max-order for dispatch accounting from 10 to 7. This
  means that ->dispatched[] and ->run now take up their own
  cacheline.

This shrinks struct blk_mq_hw_ctx down to 8 cachelines.

Signed-off-by: Jens Axboe <axboe@fb.com>
include/linux/blk-mq.h

index d579252..e1544f0 100644 (file)
@@ -22,11 +22,10 @@ struct blk_mq_hw_ctx {
        struct {
                spinlock_t              lock;
                struct list_head        dispatch;
+               unsigned long           state;          /* BLK_MQ_S_* flags */
        } ____cacheline_aligned_in_smp;
 
-       unsigned long           state;          /* BLK_MQ_S_* flags */
        struct work_struct      run_work;
-       struct delayed_work     delay_work;
        cpumask_var_t           cpumask;
        int                     next_cpu;
        int                     next_cpu_batch;
@@ -40,8 +39,8 @@ struct blk_mq_hw_ctx {
 
        struct blk_mq_ctxmap    ctx_map;
 
-       unsigned int            nr_ctx;
        struct blk_mq_ctx       **ctxs;
+       unsigned int            nr_ctx;
 
        atomic_t                wait_index;
 
@@ -49,7 +48,7 @@ struct blk_mq_hw_ctx {
 
        unsigned long           queued;
        unsigned long           run;
-#define BLK_MQ_MAX_DISPATCH_ORDER      10
+#define BLK_MQ_MAX_DISPATCH_ORDER      7
        unsigned long           dispatched[BLK_MQ_MAX_DISPATCH_ORDER];
 
        unsigned int            numa_node;
@@ -57,6 +56,8 @@ struct blk_mq_hw_ctx {
 
        atomic_t                nr_active;
 
+       struct delayed_work     delay_work;
+
        struct blk_mq_cpu_notifier      cpu_notifier;
        struct kobject          kobj;