| #include "kvm/virtio-blk.h" |
| |
| #include "kvm/virtio-pci-dev.h" |
| #include "kvm/disk-image.h" |
| #include "kvm/mutex.h" |
| #include "kvm/util.h" |
| #include "kvm/kvm.h" |
| #include "kvm/pci.h" |
| #include "kvm/threadpool.h" |
| #include "kvm/ioeventfd.h" |
| #include "kvm/guest_compat.h" |
| #include "kvm/virtio-pci.h" |
| #include "kvm/virtio.h" |
| |
| #include <linux/virtio_ring.h> |
| #include <linux/virtio_blk.h> |
| #include <linux/kernel.h> |
| #include <linux/list.h> |
| #include <linux/types.h> |
| #include <pthread.h> |
| |
| #define VIRTIO_BLK_MAX_DEV 4 |
| |
| /* |
| * the header and status consume too entries |
| */ |
| #define DISK_SEG_MAX (VIRTIO_BLK_QUEUE_SIZE - 2) |
| #define VIRTIO_BLK_QUEUE_SIZE 256 |
| #define NUM_VIRT_QUEUES 1 |
| |
| struct blk_dev_req { |
| struct virt_queue *vq; |
| struct blk_dev *bdev; |
| struct iovec iov[VIRTIO_BLK_QUEUE_SIZE]; |
| u16 out, in, head; |
| struct kvm *kvm; |
| }; |
| |
| struct blk_dev { |
| struct mutex mutex; |
| |
| struct list_head list; |
| |
| struct virtio_device vdev; |
| struct virtio_blk_config blk_config; |
| struct disk_image *disk; |
| u32 features; |
| |
| struct virt_queue vqs[NUM_VIRT_QUEUES]; |
| struct blk_dev_req reqs[VIRTIO_BLK_QUEUE_SIZE]; |
| |
| pthread_t io_thread; |
| int io_efd; |
| |
| struct kvm *kvm; |
| }; |
| |
| static LIST_HEAD(bdevs); |
| static int compat_id = -1; |
| |
| void virtio_blk_complete(void *param, long len) |
| { |
| struct blk_dev_req *req = param; |
| struct blk_dev *bdev = req->bdev; |
| int queueid = req->vq - bdev->vqs; |
| u8 *status; |
| |
| /* status */ |
| status = req->iov[req->out + req->in - 1].iov_base; |
| *status = (len < 0) ? VIRTIO_BLK_S_IOERR : VIRTIO_BLK_S_OK; |
| |
| mutex_lock(&bdev->mutex); |
| virt_queue__set_used_elem(req->vq, req->head, len); |
| mutex_unlock(&bdev->mutex); |
| |
| if (virtio_queue__should_signal(&bdev->vqs[queueid])) |
| bdev->vdev.ops->signal_vq(req->kvm, &bdev->vdev, queueid); |
| } |
| |
| static void virtio_blk_do_io_request(struct kvm *kvm, struct virt_queue *vq, struct blk_dev_req *req) |
| { |
| struct virtio_blk_outhdr *req_hdr; |
| ssize_t block_cnt; |
| struct blk_dev *bdev; |
| struct iovec *iov; |
| u16 out, in; |
| u32 type; |
| u64 sector; |
| |
| block_cnt = -1; |
| bdev = req->bdev; |
| iov = req->iov; |
| out = req->out; |
| in = req->in; |
| req_hdr = iov[0].iov_base; |
| |
| type = virtio_guest_to_host_u32(vq, req_hdr->type); |
| sector = virtio_guest_to_host_u64(vq, req_hdr->sector); |
| |
| switch (type) { |
| case VIRTIO_BLK_T_IN: |
| block_cnt = disk_image__read(bdev->disk, sector, |
| iov + 1, in + out - 2, req); |
| break; |
| case VIRTIO_BLK_T_OUT: |
| block_cnt = disk_image__write(bdev->disk, sector, |
| iov + 1, in + out - 2, req); |
| break; |
| case VIRTIO_BLK_T_FLUSH: |
| block_cnt = disk_image__flush(bdev->disk); |
| virtio_blk_complete(req, block_cnt); |
| break; |
| case VIRTIO_BLK_T_GET_ID: |
| block_cnt = VIRTIO_BLK_ID_BYTES; |
| disk_image__get_serial(bdev->disk, |
| (iov + 1)->iov_base, &block_cnt); |
| virtio_blk_complete(req, block_cnt); |
| break; |
| default: |
| pr_warning("request type %d", type); |
| block_cnt = -1; |
| break; |
| } |
| } |
| |
| static void virtio_blk_do_io(struct kvm *kvm, struct virt_queue *vq, struct blk_dev *bdev) |
| { |
| struct blk_dev_req *req; |
| u16 head; |
| |
| while (virt_queue__available(vq)) { |
| head = virt_queue__pop(vq); |
| req = &bdev->reqs[head]; |
| req->head = virt_queue__get_head_iov(vq, req->iov, &req->out, |
| &req->in, head, kvm); |
| req->vq = vq; |
| |
| virtio_blk_do_io_request(kvm, vq, req); |
| } |
| } |
| |
| static u8 *get_config(struct kvm *kvm, void *dev) |
| { |
| struct blk_dev *bdev = dev; |
| |
| return ((u8 *)(&bdev->blk_config)); |
| } |
| |
| static size_t get_config_size(struct kvm *kvm, void *dev) |
| { |
| struct blk_dev *bdev = dev; |
| |
| return sizeof(bdev->blk_config); |
| } |
| |
| static u32 get_host_features(struct kvm *kvm, void *dev) |
| { |
| struct blk_dev *bdev = dev; |
| |
| return 1UL << VIRTIO_BLK_F_SEG_MAX |
| | 1UL << VIRTIO_BLK_F_FLUSH |
| | 1UL << VIRTIO_RING_F_EVENT_IDX |
| | 1UL << VIRTIO_RING_F_INDIRECT_DESC |
| | (bdev->disk->readonly ? 1UL << VIRTIO_BLK_F_RO : 0); |
| } |
| |
| static void set_guest_features(struct kvm *kvm, void *dev, u32 features) |
| { |
| struct blk_dev *bdev = dev; |
| struct virtio_blk_config *conf = &bdev->blk_config; |
| |
| bdev->features = features; |
| |
| conf->capacity = virtio_host_to_guest_u64(&bdev->vdev, conf->capacity); |
| conf->size_max = virtio_host_to_guest_u32(&bdev->vdev, conf->size_max); |
| conf->seg_max = virtio_host_to_guest_u32(&bdev->vdev, conf->seg_max); |
| |
| /* Geometry */ |
| conf->geometry.cylinders = virtio_host_to_guest_u16(&bdev->vdev, |
| conf->geometry.cylinders); |
| |
| conf->blk_size = virtio_host_to_guest_u32(&bdev->vdev, conf->blk_size); |
| conf->min_io_size = virtio_host_to_guest_u16(&bdev->vdev, conf->min_io_size); |
| conf->opt_io_size = virtio_host_to_guest_u32(&bdev->vdev, conf->opt_io_size); |
| } |
| |
| static void notify_status(struct kvm *kvm, void *dev, u32 status) |
| { |
| } |
| |
| static void *virtio_blk_thread(void *dev) |
| { |
| struct blk_dev *bdev = dev; |
| u64 data; |
| int r; |
| |
| kvm__set_thread_name("virtio-blk-io"); |
| |
| while (1) { |
| r = read(bdev->io_efd, &data, sizeof(u64)); |
| if (r < 0) |
| continue; |
| virtio_blk_do_io(bdev->kvm, &bdev->vqs[0], bdev); |
| } |
| |
| pthread_exit(NULL); |
| return NULL; |
| } |
| |
| static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 page_size, u32 align, |
| u32 pfn) |
| { |
| unsigned int i; |
| struct blk_dev *bdev = dev; |
| struct virt_queue *queue; |
| void *p; |
| |
| compat__remove_message(compat_id); |
| |
| queue = &bdev->vqs[vq]; |
| queue->pfn = pfn; |
| p = virtio_get_vq(kvm, queue->pfn, page_size); |
| |
| vring_init(&queue->vring, VIRTIO_BLK_QUEUE_SIZE, p, align); |
| virtio_init_device_vq(&bdev->vdev, queue); |
| |
| if (vq != 0) |
| return 0; |
| |
| for (i = 0; i < ARRAY_SIZE(bdev->reqs); i++) { |
| bdev->reqs[i] = (struct blk_dev_req) { |
| .bdev = bdev, |
| .kvm = kvm, |
| }; |
| } |
| |
| mutex_init(&bdev->mutex); |
| bdev->io_efd = eventfd(0, 0); |
| if (bdev->io_efd < 0) |
| return -errno; |
| |
| if (pthread_create(&bdev->io_thread, NULL, virtio_blk_thread, bdev)) |
| return -errno; |
| |
| return 0; |
| } |
| |
| static void exit_vq(struct kvm *kvm, void *dev, u32 vq) |
| { |
| struct blk_dev *bdev = dev; |
| |
| if (vq != 0) |
| return; |
| |
| close(bdev->io_efd); |
| pthread_cancel(bdev->io_thread); |
| pthread_join(bdev->io_thread, NULL); |
| |
| disk_image__wait(bdev->disk); |
| } |
| |
| static int notify_vq(struct kvm *kvm, void *dev, u32 vq) |
| { |
| struct blk_dev *bdev = dev; |
| u64 data = 1; |
| int r; |
| |
| r = write(bdev->io_efd, &data, sizeof(data)); |
| if (r < 0) |
| return r; |
| |
| return 0; |
| } |
| |
| static struct virt_queue *get_vq(struct kvm *kvm, void *dev, u32 vq) |
| { |
| struct blk_dev *bdev = dev; |
| |
| return &bdev->vqs[vq]; |
| } |
| |
| static int get_size_vq(struct kvm *kvm, void *dev, u32 vq) |
| { |
| /* FIXME: dynamic */ |
| return VIRTIO_BLK_QUEUE_SIZE; |
| } |
| |
| static int set_size_vq(struct kvm *kvm, void *dev, u32 vq, int size) |
| { |
| /* FIXME: dynamic */ |
| return size; |
| } |
| |
| static int get_vq_count(struct kvm *kvm, void *dev) |
| { |
| return NUM_VIRT_QUEUES; |
| } |
| |
| static struct virtio_ops blk_dev_virtio_ops = { |
| .get_config = get_config, |
| .get_config_size = get_config_size, |
| .get_host_features = get_host_features, |
| .set_guest_features = set_guest_features, |
| .get_vq_count = get_vq_count, |
| .init_vq = init_vq, |
| .exit_vq = exit_vq, |
| .notify_status = notify_status, |
| .notify_vq = notify_vq, |
| .get_vq = get_vq, |
| .get_size_vq = get_size_vq, |
| .set_size_vq = set_size_vq, |
| }; |
| |
| static int virtio_blk__init_one(struct kvm *kvm, struct disk_image *disk) |
| { |
| struct blk_dev *bdev; |
| int r; |
| |
| if (!disk) |
| return -EINVAL; |
| |
| bdev = calloc(1, sizeof(struct blk_dev)); |
| if (bdev == NULL) |
| return -ENOMEM; |
| |
| *bdev = (struct blk_dev) { |
| .disk = disk, |
| .blk_config = (struct virtio_blk_config) { |
| .capacity = disk->size / SECTOR_SIZE, |
| .seg_max = DISK_SEG_MAX, |
| }, |
| .kvm = kvm, |
| }; |
| |
| list_add_tail(&bdev->list, &bdevs); |
| |
| r = virtio_init(kvm, bdev, &bdev->vdev, &blk_dev_virtio_ops, |
| VIRTIO_DEFAULT_TRANS(kvm), PCI_DEVICE_ID_VIRTIO_BLK, |
| VIRTIO_ID_BLOCK, PCI_CLASS_BLK); |
| if (r < 0) |
| return r; |
| |
| disk_image__set_callback(bdev->disk, virtio_blk_complete); |
| |
| if (compat_id == -1) |
| compat_id = virtio_compat_add_message("virtio-blk", "CONFIG_VIRTIO_BLK"); |
| |
| return 0; |
| } |
| |
| static int virtio_blk__exit_one(struct kvm *kvm, struct blk_dev *bdev) |
| { |
| list_del(&bdev->list); |
| free(bdev); |
| |
| return 0; |
| } |
| |
| int virtio_blk__init(struct kvm *kvm) |
| { |
| int i, r = 0; |
| |
| for (i = 0; i < kvm->nr_disks; i++) { |
| if (kvm->disks[i]->wwpn) |
| continue; |
| r = virtio_blk__init_one(kvm, kvm->disks[i]); |
| if (r < 0) |
| goto cleanup; |
| } |
| |
| return 0; |
| cleanup: |
| virtio_blk__exit(kvm); |
| return r; |
| } |
| virtio_dev_init(virtio_blk__init); |
| |
| int virtio_blk__exit(struct kvm *kvm) |
| { |
| while (!list_empty(&bdevs)) { |
| struct blk_dev *bdev; |
| |
| bdev = list_first_entry(&bdevs, struct blk_dev, list); |
| virtio_blk__exit_one(kvm, bdev); |
| } |
| |
| return 0; |
| } |
| virtio_dev_exit(virtio_blk__exit); |