The current code copies 'nr_io_queues' into 'q_count', modifies
'nr_io_queues' during MSI-X setup, then resets 'nr_io_queues' for
MSI setup. Instead, copy 'nr_io_queues' into 'vecs' and modify 'vecs'
during both MSI-X and MSI setup.
This lets us simplify the for-loops that set up MSI-X and MSI, and opens
the possibility of using more I/O queues than we have interrupt vectors,
should future benchmarking prove that to be a useful feature.
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct pci_dev *pdev = dev->pci_dev;
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct pci_dev *pdev = dev->pci_dev;
- int result, cpu, i, nr_io_queues, db_bar_size, q_depth, q_count;
+ int result, cpu, i, vecs, nr_io_queues, db_bar_size, q_depth;
nr_io_queues = num_online_cpus();
result = set_queue_count(dev, nr_io_queues);
nr_io_queues = num_online_cpus();
result = set_queue_count(dev, nr_io_queues);
if (result < nr_io_queues)
nr_io_queues = result;
if (result < nr_io_queues)
nr_io_queues = result;
- q_count = nr_io_queues;
/* Deregister the admin queue's interrupt */
free_irq(dev->entry[0].vector, dev->queues[0]);
/* Deregister the admin queue's interrupt */
free_irq(dev->entry[0].vector, dev->queues[0]);
dev->queues[0]->q_db = dev->dbs;
}
dev->queues[0]->q_db = dev->dbs;
}
- for (i = 0; i < nr_io_queues; i++)
+ vecs = nr_io_queues;
+ for (i = 0; i < vecs; i++)
dev->entry[i].entry = i;
for (;;) {
dev->entry[i].entry = i;
for (;;) {
- result = pci_enable_msix(pdev, dev->entry, nr_io_queues);
- if (result == 0) {
+ result = pci_enable_msix(pdev, dev->entry, vecs);
+ if (result <= 0)
- } else if (result > 0) {
- nr_io_queues = result;
- continue;
- } else {
- nr_io_queues = 0;
- break;
- }
- if (nr_io_queues == 0) {
- nr_io_queues = q_count;
+ if (result < 0) {
+ vecs = nr_io_queues;
+ if (vecs > 32)
+ vecs = 32;
- result = pci_enable_msi_block(pdev, nr_io_queues);
+ result = pci_enable_msi_block(pdev, vecs);
- for (i = 0; i < nr_io_queues; i++)
+ for (i = 0; i < vecs; i++)
dev->entry[i].vector = i + pdev->irq;
break;
dev->entry[i].vector = i + pdev->irq;
break;
- } else if (result > 0) {
- nr_io_queues = result;
- continue;
- } else {
- nr_io_queues = 1;
+ } else if (result < 0) {
+ vecs = 1;
+ /*
+ * Should investigate if there's a performance win from allocating
+ * more queues than interrupt vectors; it might allow the submission
+ * path to scale better, even if the receive path is limited by the
+ * number of interrupts.
+ */
+ nr_io_queues = vecs;
+
result = queue_request_irq(dev, dev->queues[0], "nvme admin");
/* XXX: handle failure here */
result = queue_request_irq(dev, dev->queues[0], "nvme admin");
/* XXX: handle failure here */