blob: 99653cad2c0f11af400b5eab0be5cf82fe033698 [file] [log] [blame]
#include "kvm/virtio-pci.h"
#include "kvm/ioport.h"
#include "kvm/kvm.h"
#include "kvm/kvm-cpu.h"
#include "kvm/virtio-pci-dev.h"
#include "kvm/irq.h"
#include "kvm/virtio.h"
#include "kvm/ioeventfd.h"
#include <sys/ioctl.h>
#include <linux/virtio_pci.h>
#include <linux/byteorder.h>
#include <string.h>
static void virtio_pci__ioevent_callback(struct kvm *kvm, void *param)
{
struct virtio_pci_ioevent_param *ioeventfd = param;
struct virtio_pci *vpci = ioeventfd->vdev->virtio;
ioeventfd->vdev->ops->notify_vq(kvm, vpci->dev, ioeventfd->vq);
}
static int virtio_pci__init_ioeventfd(struct kvm *kvm, struct virtio_device *vdev, u32 vq)
{
struct ioevent ioevent;
struct virtio_pci *vpci = vdev->virtio;
int r, flags = 0;
int fd;
vpci->ioeventfds[vq] = (struct virtio_pci_ioevent_param) {
.vdev = vdev,
.vq = vq,
};
ioevent = (struct ioevent) {
.fn = virtio_pci__ioevent_callback,
.fn_ptr = &vpci->ioeventfds[vq],
.datamatch = vq,
.fn_kvm = kvm,
};
/*
* Vhost will poll the eventfd in host kernel side, otherwise we
* need to poll in userspace.
*/
if (!vdev->use_vhost)
flags |= IOEVENTFD_FLAG_USER_POLL;
/* ioport */
ioevent.io_addr = vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY;
ioevent.io_len = sizeof(u16);
ioevent.fd = fd = eventfd(0, 0);
r = ioeventfd__add_event(&ioevent, flags | IOEVENTFD_FLAG_PIO);
if (r)
return r;
/* mmio */
ioevent.io_addr = vpci->mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY;
ioevent.io_len = sizeof(u16);
ioevent.fd = eventfd(0, 0);
r = ioeventfd__add_event(&ioevent, flags);
if (r)
goto free_ioport_evt;
if (vdev->ops->notify_vq_eventfd)
vdev->ops->notify_vq_eventfd(kvm, vpci->dev, vq, fd);
return 0;
free_ioport_evt:
ioeventfd__del_event(vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq);
return r;
}
static void virtio_pci_exit_vq(struct kvm *kvm, struct virtio_device *vdev,
int vq)
{
struct virtio_pci *vpci = vdev->virtio;
ioeventfd__del_event(vpci->mmio_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq);
ioeventfd__del_event(vpci->port_addr + VIRTIO_PCI_QUEUE_NOTIFY, vq);
virtio_exit_vq(kvm, vdev, vpci->dev, vq);
}
static inline bool virtio_pci__msix_enabled(struct virtio_pci *vpci)
{
return vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_ENABLE);
}
static bool virtio_pci__specific_io_in(struct kvm *kvm, struct virtio_device *vdev, u16 port,
void *data, int size, int offset)
{
u32 config_offset;
struct virtio_pci *vpci = vdev->virtio;
int type = virtio__get_dev_specific_field(offset - 20,
virtio_pci__msix_enabled(vpci),
&config_offset);
if (type == VIRTIO_PCI_O_MSIX) {
switch (offset) {
case VIRTIO_MSI_CONFIG_VECTOR:
ioport__write16(data, vpci->config_vector);
break;
case VIRTIO_MSI_QUEUE_VECTOR:
ioport__write16(data, vpci->vq_vector[vpci->queue_selector]);
break;
};
return true;
} else if (type == VIRTIO_PCI_O_CONFIG) {
u8 cfg;
cfg = vdev->ops->get_config(kvm, vpci->dev)[config_offset];
ioport__write8(data, cfg);
return true;
}
return false;
}
static bool virtio_pci__io_in(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
{
unsigned long offset;
bool ret = true;
struct virtio_device *vdev;
struct virtio_pci *vpci;
struct virt_queue *vq;
struct kvm *kvm;
u32 val;
kvm = vcpu->kvm;
vdev = ioport->priv;
vpci = vdev->virtio;
offset = port - vpci->port_addr;
switch (offset) {
case VIRTIO_PCI_HOST_FEATURES:
val = vdev->ops->get_host_features(kvm, vpci->dev);
ioport__write32(data, val);
break;
case VIRTIO_PCI_QUEUE_PFN:
vq = vdev->ops->get_vq(kvm, vpci->dev, vpci->queue_selector);
ioport__write32(data, vq->pfn);
break;
case VIRTIO_PCI_QUEUE_NUM:
val = vdev->ops->get_size_vq(kvm, vpci->dev, vpci->queue_selector);
ioport__write16(data, val);
break;
case VIRTIO_PCI_STATUS:
ioport__write8(data, vpci->status);
break;
case VIRTIO_PCI_ISR:
ioport__write8(data, vpci->isr);
kvm__irq_line(kvm, vpci->legacy_irq_line, VIRTIO_IRQ_LOW);
vpci->isr = VIRTIO_IRQ_LOW;
break;
default:
ret = virtio_pci__specific_io_in(kvm, vdev, port, data, size, offset);
break;
};
return ret;
}
static void update_msix_map(struct virtio_pci *vpci,
struct msix_table *msix_entry, u32 vecnum)
{
u32 gsi, i;
/* Find the GSI number used for that vector */
if (vecnum == vpci->config_vector) {
gsi = vpci->config_gsi;
} else {
for (i = 0; i < VIRTIO_PCI_MAX_VQ; i++)
if (vpci->vq_vector[i] == vecnum)
break;
if (i == VIRTIO_PCI_MAX_VQ)
return;
gsi = vpci->gsis[i];
}
if (gsi == 0)
return;
msix_entry = &msix_entry[vecnum];
irq__update_msix_route(vpci->kvm, gsi, &msix_entry->msg);
}
static bool virtio_pci__specific_io_out(struct kvm *kvm, struct virtio_device *vdev, u16 port,
void *data, int size, int offset)
{
struct virtio_pci *vpci = vdev->virtio;
u32 config_offset, vec;
int gsi;
int type = virtio__get_dev_specific_field(offset - 20, virtio_pci__msix_enabled(vpci),
&config_offset);
if (type == VIRTIO_PCI_O_MSIX) {
switch (offset) {
case VIRTIO_MSI_CONFIG_VECTOR:
vec = vpci->config_vector = ioport__read16(data);
if (vec == VIRTIO_MSI_NO_VECTOR)
break;
gsi = irq__add_msix_route(kvm,
&vpci->msix_table[vec].msg,
vpci->dev_hdr.dev_num << 3);
/*
* We don't need IRQ routing if we can use
* MSI injection via the KVM_SIGNAL_MSI ioctl.
*/
if (gsi == -ENXIO &&
vpci->features & VIRTIO_PCI_F_SIGNAL_MSI)
break;
if (gsi < 0) {
die("failed to configure MSIs");
break;
}
vpci->config_gsi = gsi;
break;
case VIRTIO_MSI_QUEUE_VECTOR:
vec = ioport__read16(data);
vpci->vq_vector[vpci->queue_selector] = vec;
if (vec == VIRTIO_MSI_NO_VECTOR)
break;
gsi = irq__add_msix_route(kvm,
&vpci->msix_table[vec].msg,
vpci->dev_hdr.dev_num << 3);
/*
* We don't need IRQ routing if we can use
* MSI injection via the KVM_SIGNAL_MSI ioctl.
*/
if (gsi == -ENXIO &&
vpci->features & VIRTIO_PCI_F_SIGNAL_MSI)
break;
if (gsi < 0) {
die("failed to configure MSIs");
break;
}
vpci->gsis[vpci->queue_selector] = gsi;
if (vdev->ops->notify_vq_gsi)
vdev->ops->notify_vq_gsi(kvm, vpci->dev,
vpci->queue_selector,
gsi);
break;
};
return true;
} else if (type == VIRTIO_PCI_O_CONFIG) {
vdev->ops->get_config(kvm, vpci->dev)[config_offset] = *(u8 *)data;
return true;
}
return false;
}
static bool virtio_pci__io_out(struct ioport *ioport, struct kvm_cpu *vcpu, u16 port, void *data, int size)
{
unsigned long offset;
bool ret = true;
struct virtio_device *vdev;
struct virtio_pci *vpci;
struct kvm *kvm;
u32 val;
kvm = vcpu->kvm;
vdev = ioport->priv;
vpci = vdev->virtio;
offset = port - vpci->port_addr;
switch (offset) {
case VIRTIO_PCI_GUEST_FEATURES:
val = ioport__read32(data);
virtio_set_guest_features(kvm, vdev, vpci->dev, val);
break;
case VIRTIO_PCI_QUEUE_PFN:
val = ioport__read32(data);
if (val) {
virtio_pci__init_ioeventfd(kvm, vdev,
vpci->queue_selector);
vdev->ops->init_vq(kvm, vpci->dev, vpci->queue_selector,
1 << VIRTIO_PCI_QUEUE_ADDR_SHIFT,
VIRTIO_PCI_VRING_ALIGN, val);
} else {
virtio_pci_exit_vq(kvm, vdev, vpci->queue_selector);
}
break;
case VIRTIO_PCI_QUEUE_SEL:
vpci->queue_selector = ioport__read16(data);
break;
case VIRTIO_PCI_QUEUE_NOTIFY:
val = ioport__read16(data);
vdev->ops->notify_vq(kvm, vpci->dev, val);
break;
case VIRTIO_PCI_STATUS:
vpci->status = ioport__read8(data);
if (!vpci->status) /* Sample endianness on reset */
vdev->endian = kvm_cpu__get_endianness(vcpu);
virtio_notify_status(kvm, vdev, vpci->dev, vpci->status);
break;
default:
ret = virtio_pci__specific_io_out(kvm, vdev, port, data, size, offset);
break;
};
return ret;
}
static struct ioport_operations virtio_pci__io_ops = {
.io_in = virtio_pci__io_in,
.io_out = virtio_pci__io_out,
};
static void virtio_pci__msix_mmio_callback(struct kvm_cpu *vcpu,
u64 addr, u8 *data, u32 len,
u8 is_write, void *ptr)
{
struct virtio_pci *vpci = ptr;
struct msix_table *table;
int vecnum;
size_t offset;
if (addr > vpci->msix_io_block + PCI_IO_SIZE) {
if (is_write)
return;
table = (struct msix_table *)&vpci->msix_pba;
offset = addr - (vpci->msix_io_block + PCI_IO_SIZE);
} else {
table = vpci->msix_table;
offset = addr - vpci->msix_io_block;
}
vecnum = offset / sizeof(struct msix_table);
offset = offset % sizeof(struct msix_table);
if (!is_write) {
memcpy(data, (void *)&table[vecnum] + offset, len);
return;
}
memcpy((void *)&table[vecnum] + offset, data, len);
/* Did we just update the address or payload? */
if (offset < offsetof(struct msix_table, ctrl))
update_msix_map(vpci, table, vecnum);
}
static void virtio_pci__signal_msi(struct kvm *kvm, struct virtio_pci *vpci,
int vec)
{
struct kvm_msi msi = {
.address_lo = vpci->msix_table[vec].msg.address_lo,
.address_hi = vpci->msix_table[vec].msg.address_hi,
.data = vpci->msix_table[vec].msg.data,
};
if (kvm->msix_needs_devid) {
msi.flags = KVM_MSI_VALID_DEVID;
msi.devid = vpci->dev_hdr.dev_num << 3;
}
irq__signal_msi(kvm, &msi);
}
int virtio_pci__signal_vq(struct kvm *kvm, struct virtio_device *vdev, u32 vq)
{
struct virtio_pci *vpci = vdev->virtio;
int tbl = vpci->vq_vector[vq];
if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) {
if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) ||
vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) {
vpci->msix_pba |= 1 << tbl;
return 0;
}
if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI)
virtio_pci__signal_msi(kvm, vpci, vpci->vq_vector[vq]);
else
kvm__irq_trigger(kvm, vpci->gsis[vq]);
} else {
vpci->isr = VIRTIO_IRQ_HIGH;
kvm__irq_trigger(kvm, vpci->legacy_irq_line);
}
return 0;
}
int virtio_pci__signal_config(struct kvm *kvm, struct virtio_device *vdev)
{
struct virtio_pci *vpci = vdev->virtio;
int tbl = vpci->config_vector;
if (virtio_pci__msix_enabled(vpci) && tbl != VIRTIO_MSI_NO_VECTOR) {
if (vpci->pci_hdr.msix.ctrl & cpu_to_le16(PCI_MSIX_FLAGS_MASKALL) ||
vpci->msix_table[tbl].ctrl & cpu_to_le16(PCI_MSIX_ENTRY_CTRL_MASKBIT)) {
vpci->msix_pba |= 1 << tbl;
return 0;
}
if (vpci->features & VIRTIO_PCI_F_SIGNAL_MSI)
virtio_pci__signal_msi(kvm, vpci, tbl);
else
kvm__irq_trigger(kvm, vpci->config_gsi);
} else {
vpci->isr = VIRTIO_PCI_ISR_CONFIG;
kvm__irq_trigger(kvm, vpci->legacy_irq_line);
}
return 0;
}
static void virtio_pci__io_mmio_callback(struct kvm_cpu *vcpu,
u64 addr, u8 *data, u32 len,
u8 is_write, void *ptr)
{
struct virtio_pci *vpci = ptr;
int direction = is_write ? KVM_EXIT_IO_OUT : KVM_EXIT_IO_IN;
u16 port = vpci->port_addr + (addr & (IOPORT_SIZE - 1));
kvm__emulate_io(vcpu, port, data, direction, len, 1);
}
int virtio_pci__init(struct kvm *kvm, void *dev, struct virtio_device *vdev,
int device_id, int subsys_id, int class)
{
struct virtio_pci *vpci = vdev->virtio;
int r;
vpci->kvm = kvm;
vpci->dev = dev;
r = ioport__register(kvm, IOPORT_EMPTY, &virtio_pci__io_ops, IOPORT_SIZE, vdev);
if (r < 0)
return r;
vpci->port_addr = (u16)r;
vpci->mmio_addr = pci_get_io_space_block(IOPORT_SIZE);
r = kvm__register_mmio(kvm, vpci->mmio_addr, IOPORT_SIZE, false,
virtio_pci__io_mmio_callback, vpci);
if (r < 0)
goto free_ioport;
vpci->msix_io_block = pci_get_io_space_block(PCI_IO_SIZE * 2);
r = kvm__register_mmio(kvm, vpci->msix_io_block, PCI_IO_SIZE * 2, false,
virtio_pci__msix_mmio_callback, vpci);
if (r < 0)
goto free_mmio;
vpci->pci_hdr = (struct pci_device_header) {
.vendor_id = cpu_to_le16(PCI_VENDOR_ID_REDHAT_QUMRANET),
.device_id = cpu_to_le16(device_id),
.command = PCI_COMMAND_IO | PCI_COMMAND_MEMORY,
.header_type = PCI_HEADER_TYPE_NORMAL,
.revision_id = 0,
.class[0] = class & 0xff,
.class[1] = (class >> 8) & 0xff,
.class[2] = (class >> 16) & 0xff,
.subsys_vendor_id = cpu_to_le16(PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET),
.subsys_id = cpu_to_le16(subsys_id),
.bar[0] = cpu_to_le32(vpci->port_addr
| PCI_BASE_ADDRESS_SPACE_IO),
.bar[1] = cpu_to_le32(vpci->mmio_addr
| PCI_BASE_ADDRESS_SPACE_MEMORY),
.bar[2] = cpu_to_le32(vpci->msix_io_block
| PCI_BASE_ADDRESS_SPACE_MEMORY),
.status = cpu_to_le16(PCI_STATUS_CAP_LIST),
.capabilities = (void *)&vpci->pci_hdr.msix - (void *)&vpci->pci_hdr,
.bar_size[0] = cpu_to_le32(IOPORT_SIZE),
.bar_size[1] = cpu_to_le32(IOPORT_SIZE),
.bar_size[2] = cpu_to_le32(PCI_IO_SIZE*2),
};
vpci->dev_hdr = (struct device_header) {
.bus_type = DEVICE_BUS_PCI,
.data = &vpci->pci_hdr,
};
vpci->pci_hdr.msix.cap = PCI_CAP_ID_MSIX;
vpci->pci_hdr.msix.next = 0;
/*
* We at most have VIRTIO_PCI_MAX_VQ entries for virt queue,
* VIRTIO_PCI_MAX_CONFIG entries for config.
*
* To quote the PCI spec:
*
* System software reads this field to determine the
* MSI-X Table Size N, which is encoded as N-1.
* For example, a returned value of "00000000011"
* indicates a table size of 4.
*/
vpci->pci_hdr.msix.ctrl = cpu_to_le16(VIRTIO_PCI_MAX_VQ + VIRTIO_PCI_MAX_CONFIG - 1);
/* Both table and PBA are mapped to the same BAR (2) */
vpci->pci_hdr.msix.table_offset = cpu_to_le32(2);
vpci->pci_hdr.msix.pba_offset = cpu_to_le32(2 | PCI_IO_SIZE);
vpci->config_vector = 0;
if (irq__can_signal_msi(kvm))
vpci->features |= VIRTIO_PCI_F_SIGNAL_MSI;
r = device__register(&vpci->dev_hdr);
if (r < 0)
goto free_msix_mmio;
/* save the IRQ that device__register() has allocated */
vpci->legacy_irq_line = vpci->pci_hdr.irq_line;
return 0;
free_msix_mmio:
kvm__deregister_mmio(kvm, vpci->msix_io_block);
free_mmio:
kvm__deregister_mmio(kvm, vpci->mmio_addr);
free_ioport:
ioport__unregister(kvm, vpci->port_addr);
return r;
}
int virtio_pci__reset(struct kvm *kvm, struct virtio_device *vdev)
{
int vq;
struct virtio_pci *vpci = vdev->virtio;
for (vq = 0; vq < vdev->ops->get_vq_count(kvm, vpci->dev); vq++)
virtio_pci_exit_vq(kvm, vdev, vq);
return 0;
}
int virtio_pci__exit(struct kvm *kvm, struct virtio_device *vdev)
{
struct virtio_pci *vpci = vdev->virtio;
virtio_pci__reset(kvm, vdev);
kvm__deregister_mmio(kvm, vpci->mmio_addr);
kvm__deregister_mmio(kvm, vpci->msix_io_block);
ioport__unregister(kvm, vpci->port_addr);
return 0;
}