blob: a77e23bc9b34235a572b4dfe1dfbfa6ac676bb93 [file] [log] [blame]
#include <linux/virtio_ring.h>
#include <linux/types.h>
#include <sys/uio.h>
#include <stdlib.h>
#include "kvm/guest_compat.h"
#include "kvm/barrier.h"
#include "kvm/virtio.h"
#include "kvm/virtio-pci.h"
#include "kvm/virtio-mmio.h"
#include "kvm/util.h"
#include "kvm/kvm.h"
const char* virtio_trans_name(enum virtio_trans trans)
{
if (trans == VIRTIO_PCI || trans == VIRTIO_PCI_LEGACY)
return "pci";
else if (trans == VIRTIO_MMIO || trans == VIRTIO_MMIO_LEGACY)
return "mmio";
return "unknown";
}
int virtio_transport_parser(const struct option *opt, const char *arg, int unset)
{
enum virtio_trans *type = opt->value;
struct kvm *kvm;
if (!strcmp(opt->long_name, "virtio-transport")) {
if (!strcmp(arg, "pci")) {
*type = VIRTIO_PCI;
} else if (!strcmp(arg, "pci-legacy")) {
*type = VIRTIO_PCI_LEGACY;
#if defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV)
} else if (!strcmp(arg, "mmio")) {
*type = VIRTIO_MMIO;
} else if (!strcmp(arg, "mmio-legacy")) {
*type = VIRTIO_MMIO_LEGACY;
#endif
} else {
pr_err("virtio-transport: unknown type \"%s\"\n", arg);
return -1;
}
} else if (!strcmp(opt->long_name, "virtio-legacy")) {
*type = VIRTIO_PCI_LEGACY;
} else if (!strcmp(opt->long_name, "force-pci")) {
kvm = opt->ptr;
kvm->cfg.virtio_transport = VIRTIO_PCI;
}
return 0;
}
void virt_queue__used_idx_advance(struct virt_queue *queue, u16 jump)
{
u16 idx = virtio_guest_to_host_u16(queue->endian,
queue->vring.used->idx);
/*
* Use wmb to assure that used elem was updated with head and len.
* We need a wmb here since we can't advance idx unless we're ready
* to pass the used element to the guest.
*/
wmb();
idx += jump;
queue->vring.used->idx = virtio_host_to_guest_u16(queue->endian, idx);
}
struct vring_used_elem *
virt_queue__set_used_elem_no_update(struct virt_queue *queue, u32 head,
u32 len, u16 offset)
{
struct vring_used_elem *used_elem;
u16 idx = virtio_guest_to_host_u16(queue->endian, queue->vring.used->idx);
idx += offset;
used_elem = &queue->vring.used->ring[idx % queue->vring.num];
used_elem->id = virtio_host_to_guest_u32(queue->endian, head);
used_elem->len = virtio_host_to_guest_u32(queue->endian, len);
return used_elem;
}
struct vring_used_elem *virt_queue__set_used_elem(struct virt_queue *queue, u32 head, u32 len)
{
struct vring_used_elem *used_elem;
used_elem = virt_queue__set_used_elem_no_update(queue, head, len, 0);
virt_queue__used_idx_advance(queue, 1);
return used_elem;
}
static inline bool virt_desc__test_flag(struct virt_queue *vq,
struct vring_desc *desc, u16 flag)
{
return !!(virtio_guest_to_host_u16(vq->endian, desc->flags) & flag);
}
/*
* Each buffer in the virtqueues is actually a chain of descriptors. This
* function returns the next descriptor in the chain, or max if we're at the
* end.
*/
static unsigned next_desc(struct virt_queue *vq, struct vring_desc *desc,
unsigned int i, unsigned int max)
{
unsigned int next;
/* If this descriptor says it doesn't chain, we're done. */
if (!virt_desc__test_flag(vq, &desc[i], VRING_DESC_F_NEXT))
return max;
next = virtio_guest_to_host_u16(vq->endian, desc[i].next);
/* Ensure they're not leading us off end of descriptors. */
return min(next, max);
}
u16 virt_queue__get_head_iov(struct virt_queue *vq, struct iovec iov[], u16 *out, u16 *in, u16 head, struct kvm *kvm)
{
struct vring_desc *desc;
u16 idx;
u16 max;
idx = head;
*out = *in = 0;
max = vq->vring.num;
desc = vq->vring.desc;
if (virt_desc__test_flag(vq, &desc[idx], VRING_DESC_F_INDIRECT)) {
max = virtio_guest_to_host_u32(vq->endian, desc[idx].len) / sizeof(struct vring_desc);
desc = guest_flat_to_host(kvm, virtio_guest_to_host_u64(vq->endian, desc[idx].addr));
idx = 0;
}
do {
/* Grab the first descriptor, and check it's OK. */
iov[*out + *in].iov_len = virtio_guest_to_host_u32(vq->endian, desc[idx].len);
iov[*out + *in].iov_base = guest_flat_to_host(kvm,
virtio_guest_to_host_u64(vq->endian, desc[idx].addr));
/* If this is an input descriptor, increment that count. */
if (virt_desc__test_flag(vq, &desc[idx], VRING_DESC_F_WRITE))
(*in)++;
else
(*out)++;
} while ((idx = next_desc(vq, desc, idx, max)) != max);
return head;
}
u16 virt_queue__get_iov(struct virt_queue *vq, struct iovec iov[], u16 *out, u16 *in, struct kvm *kvm)
{
u16 head;
head = virt_queue__pop(vq);
return virt_queue__get_head_iov(vq, iov, out, in, head, kvm);
}
/* in and out are relative to guest */
u16 virt_queue__get_inout_iov(struct kvm *kvm, struct virt_queue *queue,
struct iovec in_iov[], struct iovec out_iov[],
u16 *in, u16 *out)
{
struct vring_desc *desc;
u16 head, idx;
idx = head = virt_queue__pop(queue);
*out = *in = 0;
do {
u64 addr;
desc = virt_queue__get_desc(queue, idx);
addr = virtio_guest_to_host_u64(queue->endian, desc->addr);
if (virt_desc__test_flag(queue, desc, VRING_DESC_F_WRITE)) {
in_iov[*in].iov_base = guest_flat_to_host(kvm, addr);
in_iov[*in].iov_len = virtio_guest_to_host_u32(queue->endian, desc->len);
(*in)++;
} else {
out_iov[*out].iov_base = guest_flat_to_host(kvm, addr);
out_iov[*out].iov_len = virtio_guest_to_host_u32(queue->endian, desc->len);
(*out)++;
}
if (virt_desc__test_flag(queue, desc, VRING_DESC_F_NEXT))
idx = virtio_guest_to_host_u16(queue->endian, desc->next);
else
break;
} while (1);
return head;
}
void virtio_init_device_vq(struct kvm *kvm, struct virtio_device *vdev,
struct virt_queue *vq, size_t nr_descs)
{
struct vring_addr *addr = &vq->vring_addr;
vq->endian = vdev->endian;
vq->use_event_idx = (vdev->features & (1UL << VIRTIO_RING_F_EVENT_IDX));
vq->enabled = true;
vq->vdev = vdev;
if (addr->legacy) {
unsigned long base = (u64)addr->pfn * addr->pgsize;
void *p = guest_flat_to_host(kvm, base);
vring_init(&vq->vring, nr_descs, p, addr->align);
} else {
u64 desc = (u64)addr->desc_hi << 32 | addr->desc_lo;
u64 avail = (u64)addr->avail_hi << 32 | addr->avail_lo;
u64 used = (u64)addr->used_hi << 32 | addr->used_lo;
vq->vring = (struct vring) {
.desc = guest_flat_to_host(kvm, desc),
.used = guest_flat_to_host(kvm, used),
.avail = guest_flat_to_host(kvm, avail),
.num = nr_descs,
};
}
}
void virtio_exit_vq(struct kvm *kvm, struct virtio_device *vdev,
void *dev, int num)
{
struct virt_queue *vq = vdev->ops->get_vq(kvm, dev, num);
if (vq->enabled && vdev->ops->exit_vq)
vdev->ops->exit_vq(kvm, dev, num);
memset(vq, 0, sizeof(*vq));
}
int virtio__get_dev_specific_field(int offset, bool msix, u32 *config_off)
{
if (msix) {
if (offset < 4)
return VIRTIO_PCI_O_MSIX;
else
offset -= 4;
}
*config_off = offset;
return VIRTIO_PCI_O_CONFIG;
}
bool virtio_queue__should_signal(struct virt_queue *vq)
{
u16 old_idx, new_idx, event_idx;
/*
* Use mb to assure used idx has been increased before we signal the
* guest, and we don't read a stale value for used_event. Without a mb
* here we might not send a notification that we need to send, or the
* guest may ignore the queue since it won't see an updated idx.
*/
mb();
if (!vq->use_event_idx) {
/*
* When VIRTIO_RING_F_EVENT_IDX isn't negotiated, interrupt the
* guest if it didn't explicitly request to be left alone.
*/
return !(virtio_guest_to_host_u16(vq->endian, vq->vring.avail->flags) &
VRING_AVAIL_F_NO_INTERRUPT);
}
old_idx = vq->last_used_signalled;
new_idx = virtio_guest_to_host_u16(vq->endian, vq->vring.used->idx);
event_idx = virtio_guest_to_host_u16(vq->endian, vring_used_event(&vq->vring));
if (vring_need_event(event_idx, new_idx, old_idx)) {
vq->last_used_signalled = new_idx;
return true;
}
return false;
}
void virtio_set_guest_features(struct kvm *kvm, struct virtio_device *vdev,
void *dev, u64 features)
{
/* TODO: fail negotiation if features & ~host_features */
vdev->features |= features;
}
void virtio_notify_status(struct kvm *kvm, struct virtio_device *vdev,
void *dev, u8 status)
{
u32 ext_status = status;
vdev->status &= ~VIRTIO_CONFIG_S_MASK;
vdev->status |= status;
/* Add a few hints to help devices */
if ((status & VIRTIO_CONFIG_S_DRIVER_OK) &&
!(vdev->status & VIRTIO__STATUS_START)) {
vdev->status |= VIRTIO__STATUS_START;
ext_status |= VIRTIO__STATUS_START;
} else if (!status && (vdev->status & VIRTIO__STATUS_START)) {
vdev->status &= ~VIRTIO__STATUS_START;
ext_status |= VIRTIO__STATUS_STOP;
/*
* Reset virtqueues and stop all traffic now, so that the device
* can safely reset the backend in notify_status().
*/
vdev->ops->reset(kvm, vdev);
}
if (!status)
ext_status |= VIRTIO__STATUS_CONFIG;
if (vdev->ops->notify_status)
vdev->ops->notify_status(kvm, dev, ext_status);
}
bool virtio_access_config(struct kvm *kvm, struct virtio_device *vdev,
void *dev, unsigned long offset, void *data,
size_t size, bool is_write)
{
void *in, *out, *config;
size_t config_size = vdev->ops->get_config_size(kvm, dev);
if (WARN_ONCE(offset + size > config_size,
"Config access offset (%lu) is beyond config size (%zu)\n",
offset, config_size))
return false;
config = vdev->ops->get_config(kvm, dev) + offset;
in = is_write ? data : config;
out = is_write ? config : data;
switch (size) {
case 1:
*(u8 *)out = *(u8 *)in;
break;
case 2:
*(u16 *)out = *(u16 *)in;
break;
case 4:
*(u32 *)out = *(u32 *)in;
break;
case 8:
*(u64 *)out = *(u64 *)in;
break;
default:
WARN_ONCE(1, "%s: invalid access size\n", __func__);
return false;
}
return true;
}
int virtio_init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
struct virtio_ops *ops, enum virtio_trans trans,
int device_id, int subsys_id, int class)
{
void *virtio;
int r;
switch (trans) {
case VIRTIO_PCI_LEGACY:
vdev->legacy = true;
/* fall through */
case VIRTIO_PCI:
virtio = calloc(sizeof(struct virtio_pci), 1);
if (!virtio)
return -ENOMEM;
vdev->virtio = virtio;
vdev->ops = ops;
vdev->ops->signal_vq = virtio_pci__signal_vq;
vdev->ops->signal_config = virtio_pci__signal_config;
vdev->ops->init = virtio_pci__init;
vdev->ops->exit = virtio_pci__exit;
vdev->ops->reset = virtio_pci__reset;
r = vdev->ops->init(kvm, dev, vdev, device_id, subsys_id, class);
break;
case VIRTIO_MMIO_LEGACY:
vdev->legacy = true;
/* fall through */
case VIRTIO_MMIO:
virtio = calloc(sizeof(struct virtio_mmio), 1);
if (!virtio)
return -ENOMEM;
vdev->virtio = virtio;
vdev->ops = ops;
vdev->ops->signal_vq = virtio_mmio_signal_vq;
vdev->ops->signal_config = virtio_mmio_signal_config;
vdev->ops->init = virtio_mmio_init;
vdev->ops->exit = virtio_mmio_exit;
vdev->ops->reset = virtio_mmio_reset;
r = vdev->ops->init(kvm, dev, vdev, device_id, subsys_id, class);
break;
default:
r = -1;
};
return r;
}
int virtio_compat_add_message(const char *device, const char *config)
{
int len = 1024;
int compat_id;
char *title;
char *desc;
title = malloc(len);
if (!title)
return -ENOMEM;
desc = malloc(len);
if (!desc) {
free(title);
return -ENOMEM;
}
snprintf(title, len, "%s device was not detected.", device);
snprintf(desc, len, "While you have requested a %s device, "
"the guest kernel did not initialize it.\n"
"\tPlease make sure that the guest kernel was "
"compiled with %s=y enabled in .config.",
device, config);
compat_id = compat__add_message(title, desc);
free(desc);
free(title);
return compat_id;
}