2 #include <linux/spinlock.h>
3 #include <linux/slab.h>
4 #include <linux/blkdev.h>
5 #include <linux/hdreg.h>
6 #include <linux/module.h>
7 #include <linux/mutex.h>
8 #include <linux/virtio.h>
9 #include <linux/virtio_blk.h>
10 #include <linux/scatterlist.h>
11 #include <linux/string_helpers.h>
12 #include <scsi/scsi_cmnd.h>
13 #include <linux/idr.h>
14 #include <linux/blk-mq.h>
15 #include <linux/numa.h>
20 static DEFINE_IDA(vd_index_ida);
22 static struct workqueue_struct *virtblk_wq;
26 struct virtio_device *vdev;
30 /* The disk structure for the kernel. */
33 /* Process context for config space updates */
34 struct work_struct config_work;
36 /* Lock for config space updates */
37 struct mutex config_lock;
39 /* enable config space updates */
42 /* What host tells us, plus 2 for header & tailer. */
43 unsigned int sg_elems;
45 /* Ida index - used to track minor number allocations. */
52 struct virtio_blk_outhdr out_hdr;
53 struct virtio_scsi_inhdr in_hdr;
55 struct scatterlist sg[];
58 static inline int virtblk_result(struct virtblk_req *vbr)
60 switch (vbr->status) {
63 case VIRTIO_BLK_S_UNSUPP:
70 static int __virtblk_add_req(struct virtqueue *vq,
71 struct virtblk_req *vbr,
72 struct scatterlist *data_sg,
75 struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6];
76 unsigned int num_out = 0, num_in = 0;
77 int type = vbr->out_hdr.type & ~VIRTIO_BLK_T_OUT;
79 sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
80 sgs[num_out++] = &hdr;
83 * If this is a packet command we need a couple of additional headers.
84 * Behind the normal outhdr we put a segment with the scsi command
85 * block, and before the normal inhdr we put the sense data and the
86 * inhdr with additional status information.
88 if (type == VIRTIO_BLK_T_SCSI_CMD) {
89 sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len);
90 sgs[num_out++] = &cmd;
94 if (vbr->out_hdr.type & VIRTIO_BLK_T_OUT)
95 sgs[num_out++] = data_sg;
97 sgs[num_out + num_in++] = data_sg;
100 if (type == VIRTIO_BLK_T_SCSI_CMD) {
101 sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
102 sgs[num_out + num_in++] = &sense;
103 sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
104 sgs[num_out + num_in++] = &inhdr;
107 sg_init_one(&status, &vbr->status, sizeof(vbr->status));
108 sgs[num_out + num_in++] = &status;
110 return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
113 static inline void virtblk_request_done(struct virtblk_req *vbr)
115 struct request *req = vbr->req;
116 int error = virtblk_result(vbr);
118 if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
119 req->resid_len = vbr->in_hdr.residual;
120 req->sense_len = vbr->in_hdr.sense_len;
121 req->errors = vbr->in_hdr.errors;
122 } else if (req->cmd_type == REQ_TYPE_SPECIAL) {
123 req->errors = (error != 0);
126 blk_mq_end_io(req, error);
129 static void virtblk_done(struct virtqueue *vq)
131 struct virtio_blk *vblk = vq->vdev->priv;
132 bool req_done = false;
133 struct virtblk_req *vbr;
137 spin_lock_irqsave(&vblk->vq_lock, flags);
139 virtqueue_disable_cb(vq);
140 while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
141 virtblk_request_done(vbr);
144 } while (!virtqueue_enable_cb(vq));
145 spin_unlock_irqrestore(&vblk->vq_lock, flags);
147 /* In case queue is stopped waiting for more buffers. */
149 blk_mq_start_stopped_hw_queues(vblk->disk->queue);
152 static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
154 struct virtio_blk *vblk = hctx->queue->queuedata;
155 struct virtblk_req *vbr = req->special;
158 const bool last = (req->cmd_flags & REQ_END) != 0;
160 BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems);
163 if (req->cmd_flags & REQ_FLUSH) {
164 vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
165 vbr->out_hdr.sector = 0;
166 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
168 switch (req->cmd_type) {
170 vbr->out_hdr.type = 0;
171 vbr->out_hdr.sector = blk_rq_pos(vbr->req);
172 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
174 case REQ_TYPE_BLOCK_PC:
175 vbr->out_hdr.type = VIRTIO_BLK_T_SCSI_CMD;
176 vbr->out_hdr.sector = 0;
177 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
179 case REQ_TYPE_SPECIAL:
180 vbr->out_hdr.type = VIRTIO_BLK_T_GET_ID;
181 vbr->out_hdr.sector = 0;
182 vbr->out_hdr.ioprio = req_get_ioprio(vbr->req);
185 /* We don't put anything else in the queue. */
190 num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
192 if (rq_data_dir(vbr->req) == WRITE)
193 vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
195 vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
198 spin_lock_irqsave(&vblk->vq_lock, flags);
199 if (__virtblk_add_req(vblk->vq, vbr, vbr->sg, num) < 0) {
200 spin_unlock_irqrestore(&vblk->vq_lock, flags);
201 blk_mq_stop_hw_queue(hctx);
202 virtqueue_kick(vblk->vq);
203 return BLK_MQ_RQ_QUEUE_BUSY;
205 spin_unlock_irqrestore(&vblk->vq_lock, flags);
208 virtqueue_kick(vblk->vq);
209 return BLK_MQ_RQ_QUEUE_OK;
212 /* return id (s/n) string for *disk to *id_str
214 static int virtblk_get_id(struct gendisk *disk, char *id_str)
216 struct virtio_blk *vblk = disk->private_data;
221 bio = bio_map_kern(vblk->disk->queue, id_str, VIRTIO_BLK_ID_BYTES,
226 req = blk_make_request(vblk->disk->queue, bio, GFP_KERNEL);
232 req->cmd_type = REQ_TYPE_SPECIAL;
233 err = blk_execute_rq(vblk->disk->queue, vblk->disk, req, false);
234 blk_put_request(req);
239 static int virtblk_ioctl(struct block_device *bdev, fmode_t mode,
240 unsigned int cmd, unsigned long data)
242 struct gendisk *disk = bdev->bd_disk;
243 struct virtio_blk *vblk = disk->private_data;
246 * Only allow the generic SCSI ioctls if the host can support it.
248 if (!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_SCSI))
251 return scsi_cmd_blk_ioctl(bdev, mode, cmd,
252 (void __user *)data);
255 /* We provide getgeo only to please some old bootloader/partitioning tools */
256 static int virtblk_getgeo(struct block_device *bd, struct hd_geometry *geo)
258 struct virtio_blk *vblk = bd->bd_disk->private_data;
259 struct virtio_blk_geometry vgeo;
262 /* see if the host passed in geometry config */
263 err = virtio_config_val(vblk->vdev, VIRTIO_BLK_F_GEOMETRY,
264 offsetof(struct virtio_blk_config, geometry),
268 geo->heads = vgeo.heads;
269 geo->sectors = vgeo.sectors;
270 geo->cylinders = vgeo.cylinders;
272 /* some standard values, similar to sd */
274 geo->sectors = 1 << 5;
275 geo->cylinders = get_capacity(bd->bd_disk) >> 11;
280 static const struct block_device_operations virtblk_fops = {
281 .ioctl = virtblk_ioctl,
282 .owner = THIS_MODULE,
283 .getgeo = virtblk_getgeo,
286 static int index_to_minor(int index)
288 return index << PART_BITS;
291 static int minor_to_index(int minor)
293 return minor >> PART_BITS;
296 static ssize_t virtblk_serial_show(struct device *dev,
297 struct device_attribute *attr, char *buf)
299 struct gendisk *disk = dev_to_disk(dev);
302 /* sysfs gives us a PAGE_SIZE buffer */
303 BUILD_BUG_ON(PAGE_SIZE < VIRTIO_BLK_ID_BYTES);
305 buf[VIRTIO_BLK_ID_BYTES] = '\0';
306 err = virtblk_get_id(disk, buf);
310 if (err == -EIO) /* Unsupported? Make it empty. */
315 DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL);
317 static void virtblk_config_changed_work(struct work_struct *work)
319 struct virtio_blk *vblk =
320 container_of(work, struct virtio_blk, config_work);
321 struct virtio_device *vdev = vblk->vdev;
322 struct request_queue *q = vblk->disk->queue;
323 char cap_str_2[10], cap_str_10[10];
324 char *envp[] = { "RESIZE=1", NULL };
327 mutex_lock(&vblk->config_lock);
328 if (!vblk->config_enable)
331 /* Host must always specify the capacity. */
332 vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
333 &capacity, sizeof(capacity));
335 /* If capacity is too big, truncate with warning. */
336 if ((sector_t)capacity != capacity) {
337 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
338 (unsigned long long)capacity);
339 capacity = (sector_t)-1;
342 size = capacity * queue_logical_block_size(q);
343 string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2));
344 string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10));
346 dev_notice(&vdev->dev,
347 "new size: %llu %d-byte logical blocks (%s/%s)\n",
348 (unsigned long long)capacity,
349 queue_logical_block_size(q),
350 cap_str_10, cap_str_2);
352 set_capacity(vblk->disk, capacity);
353 revalidate_disk(vblk->disk);
354 kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp);
356 mutex_unlock(&vblk->config_lock);
359 static void virtblk_config_changed(struct virtio_device *vdev)
361 struct virtio_blk *vblk = vdev->priv;
363 queue_work(virtblk_wq, &vblk->config_work);
366 static int init_vq(struct virtio_blk *vblk)
370 /* We expect one virtqueue, for output. */
371 vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
372 if (IS_ERR(vblk->vq))
373 err = PTR_ERR(vblk->vq);
379 * Legacy naming scheme used for virtio devices. We are stuck with it for
380 * virtio blk but don't ever use it for any new driver.
382 static int virtblk_name_format(char *prefix, int index, char *buf, int buflen)
384 const int base = 'z' - 'a' + 1;
385 char *begin = buf + strlen(prefix);
386 char *end = buf + buflen;
396 *--p = 'a' + (index % unit);
397 index = (index / unit) - 1;
398 } while (index >= 0);
400 memmove(begin, p, end - p);
401 memcpy(buf, prefix, strlen(prefix));
406 static int virtblk_get_cache_mode(struct virtio_device *vdev)
411 err = virtio_config_val(vdev, VIRTIO_BLK_F_CONFIG_WCE,
412 offsetof(struct virtio_blk_config, wce),
415 writeback = virtio_has_feature(vdev, VIRTIO_BLK_F_WCE);
420 static void virtblk_update_cache_mode(struct virtio_device *vdev)
422 u8 writeback = virtblk_get_cache_mode(vdev);
423 struct virtio_blk *vblk = vdev->priv;
426 blk_queue_flush(vblk->disk->queue, REQ_FLUSH);
428 blk_queue_flush(vblk->disk->queue, 0);
430 revalidate_disk(vblk->disk);
433 static const char *const virtblk_cache_types[] = {
434 "write through", "write back"
438 virtblk_cache_type_store(struct device *dev, struct device_attribute *attr,
439 const char *buf, size_t count)
441 struct gendisk *disk = dev_to_disk(dev);
442 struct virtio_blk *vblk = disk->private_data;
443 struct virtio_device *vdev = vblk->vdev;
447 BUG_ON(!virtio_has_feature(vblk->vdev, VIRTIO_BLK_F_CONFIG_WCE));
448 for (i = ARRAY_SIZE(virtblk_cache_types); --i >= 0; )
449 if (sysfs_streq(buf, virtblk_cache_types[i]))
456 vdev->config->set(vdev,
457 offsetof(struct virtio_blk_config, wce),
458 &writeback, sizeof(writeback));
460 virtblk_update_cache_mode(vdev);
465 virtblk_cache_type_show(struct device *dev, struct device_attribute *attr,
468 struct gendisk *disk = dev_to_disk(dev);
469 struct virtio_blk *vblk = disk->private_data;
470 u8 writeback = virtblk_get_cache_mode(vblk->vdev);
472 BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types));
473 return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
476 static const struct device_attribute dev_attr_cache_type_ro =
477 __ATTR(cache_type, S_IRUGO,
478 virtblk_cache_type_show, NULL);
479 static const struct device_attribute dev_attr_cache_type_rw =
480 __ATTR(cache_type, S_IRUGO|S_IWUSR,
481 virtblk_cache_type_show, virtblk_cache_type_store);
483 static struct blk_mq_ops virtio_mq_ops = {
484 .queue_rq = virtio_queue_rq,
485 .map_queue = blk_mq_map_queue,
486 .alloc_hctx = blk_mq_alloc_single_hw_queue,
487 .free_hctx = blk_mq_free_single_hw_queue,
490 static struct blk_mq_reg virtio_mq_reg = {
491 .ops = &virtio_mq_ops,
494 .numa_node = NUMA_NO_NODE,
495 .flags = BLK_MQ_F_SHOULD_MERGE,
498 static void virtblk_init_vbr(void *data, struct blk_mq_hw_ctx *hctx,
499 struct request *rq, unsigned int nr)
501 struct virtio_blk *vblk = data;
502 struct virtblk_req *vbr = rq->special;
504 sg_init_table(vbr->sg, vblk->sg_elems);
507 static int virtblk_probe(struct virtio_device *vdev)
509 struct virtio_blk *vblk;
510 struct request_queue *q;
514 u32 v, blk_size, sg_elems, opt_io_size;
516 u8 physical_block_exp, alignment_offset;
518 err = ida_simple_get(&vd_index_ida, 0, minor_to_index(1 << MINORBITS),
524 /* We need to know how many segments before we allocate. */
525 err = virtio_config_val(vdev, VIRTIO_BLK_F_SEG_MAX,
526 offsetof(struct virtio_blk_config, seg_max),
529 /* We need at least one SG element, whatever they say. */
530 if (err || !sg_elems)
533 /* We need an extra sg elements at head and tail. */
535 vdev->priv = vblk = kmalloc(sizeof(*vblk), GFP_KERNEL);
542 vblk->sg_elems = sg_elems;
543 mutex_init(&vblk->config_lock);
545 INIT_WORK(&vblk->config_work, virtblk_config_changed_work);
546 vblk->config_enable = true;
551 spin_lock_init(&vblk->vq_lock);
553 /* FIXME: How many partitions? How long is a piece of string? */
554 vblk->disk = alloc_disk(1 << PART_BITS);
560 virtio_mq_reg.cmd_size =
561 sizeof(struct virtblk_req) +
562 sizeof(struct scatterlist) * sg_elems;
564 q = vblk->disk->queue = blk_mq_init_queue(&virtio_mq_reg, vblk);
570 blk_mq_init_commands(q, virtblk_init_vbr, vblk);
574 virtblk_name_format("vd", index, vblk->disk->disk_name, DISK_NAME_LEN);
576 vblk->disk->major = major;
577 vblk->disk->first_minor = index_to_minor(index);
578 vblk->disk->private_data = vblk;
579 vblk->disk->fops = &virtblk_fops;
580 vblk->disk->driverfs_dev = &vdev->dev;
583 /* configure queue flush support */
584 virtblk_update_cache_mode(vdev);
586 /* If disk is read-only in the host, the guest should obey */
587 if (virtio_has_feature(vdev, VIRTIO_BLK_F_RO))
588 set_disk_ro(vblk->disk, 1);
590 /* Host must always specify the capacity. */
591 vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity),
594 /* If capacity is too big, truncate with warning. */
595 if ((sector_t)cap != cap) {
596 dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n",
597 (unsigned long long)cap);
600 set_capacity(vblk->disk, cap);
602 /* We can handle whatever the host told us to handle. */
603 blk_queue_max_segments(q, vblk->sg_elems-2);
605 /* No need to bounce any requests */
606 blk_queue_bounce_limit(q, BLK_BOUNCE_ANY);
608 /* No real sector limit. */
609 blk_queue_max_hw_sectors(q, -1U);
611 /* Host can optionally specify maximum segment size and number of
613 err = virtio_config_val(vdev, VIRTIO_BLK_F_SIZE_MAX,
614 offsetof(struct virtio_blk_config, size_max),
617 blk_queue_max_segment_size(q, v);
619 blk_queue_max_segment_size(q, -1U);
621 /* Host can optionally specify the block size of the device */
622 err = virtio_config_val(vdev, VIRTIO_BLK_F_BLK_SIZE,
623 offsetof(struct virtio_blk_config, blk_size),
626 blk_queue_logical_block_size(q, blk_size);
628 blk_size = queue_logical_block_size(q);
630 /* Use topology information if available */
631 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
632 offsetof(struct virtio_blk_config, physical_block_exp),
633 &physical_block_exp);
634 if (!err && physical_block_exp)
635 blk_queue_physical_block_size(q,
636 blk_size * (1 << physical_block_exp));
638 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
639 offsetof(struct virtio_blk_config, alignment_offset),
641 if (!err && alignment_offset)
642 blk_queue_alignment_offset(q, blk_size * alignment_offset);
644 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
645 offsetof(struct virtio_blk_config, min_io_size),
647 if (!err && min_io_size)
648 blk_queue_io_min(q, blk_size * min_io_size);
650 err = virtio_config_val(vdev, VIRTIO_BLK_F_TOPOLOGY,
651 offsetof(struct virtio_blk_config, opt_io_size),
653 if (!err && opt_io_size)
654 blk_queue_io_opt(q, blk_size * opt_io_size);
656 add_disk(vblk->disk);
657 err = device_create_file(disk_to_dev(vblk->disk), &dev_attr_serial);
661 if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
662 err = device_create_file(disk_to_dev(vblk->disk),
663 &dev_attr_cache_type_rw);
665 err = device_create_file(disk_to_dev(vblk->disk),
666 &dev_attr_cache_type_ro);
672 del_gendisk(vblk->disk);
673 blk_cleanup_queue(vblk->disk->queue);
675 put_disk(vblk->disk);
677 vdev->config->del_vqs(vdev);
681 ida_simple_remove(&vd_index_ida, index);
686 static void virtblk_remove(struct virtio_device *vdev)
688 struct virtio_blk *vblk = vdev->priv;
689 int index = vblk->index;
692 /* Prevent config work handler from accessing the device. */
693 mutex_lock(&vblk->config_lock);
694 vblk->config_enable = false;
695 mutex_unlock(&vblk->config_lock);
697 del_gendisk(vblk->disk);
698 blk_cleanup_queue(vblk->disk->queue);
700 /* Stop all the virtqueues. */
701 vdev->config->reset(vdev);
703 flush_work(&vblk->config_work);
705 refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
706 put_disk(vblk->disk);
707 vdev->config->del_vqs(vdev);
710 /* Only free device id if we don't have any users */
712 ida_simple_remove(&vd_index_ida, index);
716 static int virtblk_freeze(struct virtio_device *vdev)
718 struct virtio_blk *vblk = vdev->priv;
720 /* Ensure we don't receive any more interrupts */
721 vdev->config->reset(vdev);
723 /* Prevent config work handler from accessing the device. */
724 mutex_lock(&vblk->config_lock);
725 vblk->config_enable = false;
726 mutex_unlock(&vblk->config_lock);
728 flush_work(&vblk->config_work);
730 blk_mq_stop_hw_queues(vblk->disk->queue);
732 vdev->config->del_vqs(vdev);
736 static int virtblk_restore(struct virtio_device *vdev)
738 struct virtio_blk *vblk = vdev->priv;
741 vblk->config_enable = true;
742 ret = init_vq(vdev->priv);
744 blk_mq_start_stopped_hw_queues(vblk->disk->queue);
750 static const struct virtio_device_id id_table[] = {
751 { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
755 static unsigned int features[] = {
756 VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
757 VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
758 VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
761 static struct virtio_driver virtio_blk = {
762 .feature_table = features,
763 .feature_table_size = ARRAY_SIZE(features),
764 .driver.name = KBUILD_MODNAME,
765 .driver.owner = THIS_MODULE,
766 .id_table = id_table,
767 .probe = virtblk_probe,
768 .remove = virtblk_remove,
769 .config_changed = virtblk_config_changed,
771 .freeze = virtblk_freeze,
772 .restore = virtblk_restore,
776 static int __init init(void)
780 virtblk_wq = alloc_workqueue("virtio-blk", 0, 0);
784 major = register_blkdev(0, "virtblk");
787 goto out_destroy_workqueue;
790 error = register_virtio_driver(&virtio_blk);
792 goto out_unregister_blkdev;
795 out_unregister_blkdev:
796 unregister_blkdev(major, "virtblk");
797 out_destroy_workqueue:
798 destroy_workqueue(virtblk_wq);
802 static void __exit fini(void)
804 unregister_blkdev(major, "virtblk");
805 unregister_virtio_driver(&virtio_blk);
806 destroy_workqueue(virtblk_wq);
811 MODULE_DEVICE_TABLE(virtio, id_table);
812 MODULE_DESCRIPTION("Virtio block driver");
813 MODULE_LICENSE("GPL");