blob: 5a114e9997d9599530c2c93c8ee951023f3f4157 [file] [log] [blame]
#include "kvm/kvm.h"
#include "kvm/kvm-cpu.h"
#include "kvm/rbtree-interval.h"
#include "kvm/mutex.h"
#include <stdio.h>
#include <stdlib.h>
#include <sys/ioctl.h>
#include <linux/kvm.h>
#include <linux/types.h>
#include <linux/rbtree.h>
#include <linux/err.h>
#include <errno.h>
#define mmio_node(n) rb_entry(n, struct mmio_mapping, node)
static DEFINE_MUTEX(mmio_lock);
struct mmio_mapping {
struct rb_int_node node;
mmio_handler_fn mmio_fn;
void *ptr;
u32 refcount;
bool remove;
};
static struct rb_root mmio_tree = RB_ROOT;
static struct rb_root pio_tree = RB_ROOT;
static struct mmio_mapping *mmio_search(struct rb_root *root, u64 addr, u64 len)
{
struct rb_int_node *node;
/* If len is zero or if there's an overflow, the MMIO op is invalid. */
if (addr + len <= addr)
return NULL;
node = rb_int_search_range(root, addr, addr + len);
if (node == NULL)
return NULL;
return mmio_node(node);
}
/* Find lowest match, Check for overlap */
static struct mmio_mapping *mmio_search_single(struct rb_root *root, u64 addr)
{
struct rb_int_node *node;
node = rb_int_search_single(root, addr);
if (node == NULL)
return NULL;
return mmio_node(node);
}
static int mmio_insert(struct rb_root *root, struct mmio_mapping *data)
{
return rb_int_insert(root, &data->node);
}
static void mmio_remove(struct rb_root *root, struct mmio_mapping *data)
{
rb_int_erase(root, &data->node);
}
static const char *to_direction(u8 is_write)
{
if (is_write)
return "write";
return "read";
}
static struct mmio_mapping *mmio_get(struct rb_root *root, u64 phys_addr, u32 len)
{
struct mmio_mapping *mmio;
mutex_lock(&mmio_lock);
mmio = mmio_search(root, phys_addr, len);
if (mmio)
mmio->refcount++;
mutex_unlock(&mmio_lock);
return mmio;
}
/* Called with mmio_lock held. */
static void mmio_deregister(struct kvm *kvm, struct rb_root *root, struct mmio_mapping *mmio)
{
struct kvm_coalesced_mmio_zone zone = (struct kvm_coalesced_mmio_zone) {
.addr = rb_int_start(&mmio->node),
.size = 1,
};
ioctl(kvm->vm_fd, KVM_UNREGISTER_COALESCED_MMIO, &zone);
mmio_remove(root, mmio);
free(mmio);
}
static void mmio_put(struct kvm *kvm, struct rb_root *root, struct mmio_mapping *mmio)
{
mutex_lock(&mmio_lock);
mmio->refcount--;
if (mmio->remove && mmio->refcount == 0)
mmio_deregister(kvm, root, mmio);
mutex_unlock(&mmio_lock);
}
static bool trap_is_mmio(unsigned int flags)
{
return (flags & IOTRAP_BUS_MASK) == DEVICE_BUS_MMIO;
}
int kvm__register_iotrap(struct kvm *kvm, u64 phys_addr, u64 phys_addr_len,
mmio_handler_fn mmio_fn, void *ptr,
unsigned int flags)
{
struct mmio_mapping *mmio;
struct kvm_coalesced_mmio_zone zone;
int ret;
mmio = malloc(sizeof(*mmio));
if (mmio == NULL)
return -ENOMEM;
*mmio = (struct mmio_mapping) {
.node = RB_INT_INIT(phys_addr, phys_addr + phys_addr_len),
.mmio_fn = mmio_fn,
.ptr = ptr,
/*
* Start from 0 because kvm__deregister_mmio() doesn't decrement
* the reference count.
*/
.refcount = 0,
.remove = false,
};
if (trap_is_mmio(flags) && (flags & IOTRAP_COALESCE)) {
zone = (struct kvm_coalesced_mmio_zone) {
.addr = phys_addr,
.size = phys_addr_len,
};
ret = ioctl(kvm->vm_fd, KVM_REGISTER_COALESCED_MMIO, &zone);
if (ret < 0) {
free(mmio);
return -errno;
}
}
mutex_lock(&mmio_lock);
if (trap_is_mmio(flags))
ret = mmio_insert(&mmio_tree, mmio);
else
ret = mmio_insert(&pio_tree, mmio);
mutex_unlock(&mmio_lock);
return ret;
}
bool kvm__deregister_iotrap(struct kvm *kvm, u64 phys_addr, unsigned int flags)
{
struct mmio_mapping *mmio;
struct rb_root *tree;
if (trap_is_mmio(flags))
tree = &mmio_tree;
else
tree = &pio_tree;
mutex_lock(&mmio_lock);
mmio = mmio_search_single(tree, phys_addr);
if (mmio == NULL) {
mutex_unlock(&mmio_lock);
return false;
}
/*
* The PCI emulation code calls this function when memory access is
* disabled for a device, or when a BAR has a new address assigned. PCI
* emulation doesn't use any locks and as a result we can end up in a
* situation where we have called mmio_get() to do emulation on one VCPU
* thread (let's call it VCPU0), and several other VCPU threads have
* called kvm__deregister_mmio(). In this case, if we decrement refcount
* kvm__deregister_mmio() (either directly, or by calling mmio_put()),
* refcount will reach 0 and we will free the mmio node before VCPU0 has
* called mmio_put(). This will trigger use-after-free errors on VCPU0.
*/
if (mmio->refcount == 0)
mmio_deregister(kvm, tree, mmio);
else
mmio->remove = true;
mutex_unlock(&mmio_lock);
return true;
}
bool kvm__emulate_mmio(struct kvm_cpu *vcpu, u64 phys_addr, u8 *data,
u32 len, u8 is_write)
{
struct mmio_mapping *mmio;
mmio = mmio_get(&mmio_tree, phys_addr, len);
if (!mmio) {
if (vcpu->kvm->cfg.mmio_debug)
fprintf(stderr, "Warning: Ignoring MMIO %s at %016llx (length %u)\n",
to_direction(is_write),
(unsigned long long)phys_addr, len);
goto out;
}
mmio->mmio_fn(vcpu, phys_addr, data, len, is_write, mmio->ptr);
mmio_put(vcpu->kvm, &mmio_tree, mmio);
out:
return true;
}
bool kvm__emulate_io(struct kvm_cpu *vcpu, u16 port, void *data,
int direction, int size, u32 count)
{
struct mmio_mapping *mmio;
bool is_write = direction == KVM_EXIT_IO_OUT;
mmio = mmio_get(&pio_tree, port, size);
if (!mmio) {
if (vcpu->kvm->cfg.ioport_debug) {
fprintf(stderr, "IO error: %s port=%x, size=%d, count=%u\n",
to_direction(direction), port, size, count);
return false;
}
return true;
}
while (count--) {
mmio->mmio_fn(vcpu, port, data, size, is_write, mmio->ptr);
data += size;
}
mmio_put(vcpu->kvm, &pio_tree, mmio);
return true;
}