blob: f8d7276ae7934f1515bd027368573ca0223bc3fa [file] [log] [blame]
#include "kvm/virtio-net.h"
#include "kvm/virtio-pci.h"
#include "kvm/virtio.h"
#include "kvm/ioport.h"
#include "kvm/types.h"
#include "kvm/mutex.h"
#include "kvm/util.h"
#include "kvm/kvm.h"
#include "kvm/pci.h"
#include <linux/virtio_net.h>
#include <linux/if_tun.h>
#include <net/if.h>
#include <sys/ioctl.h>
#include <assert.h>
#include <fcntl.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <unistd.h>
#include <sys/wait.h>
#define VIRTIO_NET_IRQ 14
#define VIRTIO_NET_QUEUE_SIZE 128
#define VIRTIO_NET_NUM_QUEUES 2
#define VIRTIO_NET_RX_QUEUE 0
#define VIRTIO_NET_TX_QUEUE 1
#define PCI_VIRTIO_NET_DEVNUM 3
struct net_device {
pthread_mutex_t mutex;
struct virt_queue vqs[VIRTIO_NET_NUM_QUEUES];
struct virtio_net_config net_config;
uint32_t host_features;
uint32_t guest_features;
uint16_t config_vector;
uint8_t status;
uint16_t queue_selector;
pthread_t io_rx_thread;
pthread_mutex_t io_rx_mutex;
pthread_cond_t io_rx_cond;
pthread_t io_tx_thread;
pthread_mutex_t io_tx_mutex;
pthread_cond_t io_tx_cond;
int tap_fd;
char tap_name[IFNAMSIZ];
};
static struct net_device net_device = {
.mutex = PTHREAD_MUTEX_INITIALIZER,
.net_config = {
.mac = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
.status = VIRTIO_NET_S_LINK_UP,
},
.host_features = 1UL << VIRTIO_NET_F_MAC,
};
static void *virtio_net_rx_thread(void *p)
{
struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
struct virt_queue *vq;
struct kvm *self;
uint16_t out, in;
uint16_t head;
int len;
self = p;
vq = &net_device.vqs[VIRTIO_NET_RX_QUEUE];
while (1) {
mutex_lock(&net_device.io_rx_mutex);
if (!virt_queue__available(vq))
pthread_cond_wait(&net_device.io_rx_cond, &net_device.io_rx_mutex);
mutex_unlock(&net_device.io_rx_mutex);
while (virt_queue__available(vq)) {
head = virt_queue__get_iov(vq, iov, &out, &in, self);
/* We do not specify GSO or CSUM features, So we can ignore virtio_net_hdr */
len = readv(net_device.tap_fd, iov + 1, in - 1);
/* However, We have to tell guest we have write the virtio_net_hdr */
virt_queue__set_used_elem(vq, head, sizeof(struct virtio_net_hdr) + len);
/* We should interrupt guest right now, otherwise latency is huge. */
kvm__irq_line(self, VIRTIO_NET_IRQ, 1);
}
}
pthread_exit(NULL);
return NULL;
}
static void *virtio_net_tx_thread(void *p)
{
struct iovec iov[VIRTIO_NET_QUEUE_SIZE];
struct virt_queue *vq;
struct kvm *self;
uint16_t out, in;
uint16_t head;
int len;
self = p;
vq = &net_device.vqs[VIRTIO_NET_TX_QUEUE];
while (1) {
mutex_lock(&net_device.io_tx_mutex);
if (!virt_queue__available(vq))
pthread_cond_wait(&net_device.io_tx_cond, &net_device.io_tx_mutex);
mutex_unlock(&net_device.io_tx_mutex);
while (virt_queue__available(vq)) {
head = virt_queue__get_iov(vq, iov, &out, &in, self);
len = writev(net_device.tap_fd, iov + 1, out - 1);
virt_queue__set_used_elem(vq, head, len);
}
kvm__irq_line(self, VIRTIO_NET_IRQ, 1);
}
pthread_exit(NULL);
return NULL;
}
static bool virtio_net_pci_io_device_specific_in(void *data, unsigned long offset, int size, uint32_t count)
{
uint8_t *config_space = (uint8_t *) &net_device.net_config;
if (size != 1 || count != 1)
return false;
if ((offset - VIRTIO_PCI_CONFIG_NOMSI) > sizeof(struct virtio_net_config))
error("config offset is too big: %li", offset - VIRTIO_PCI_CONFIG_NOMSI);
ioport__write8(data, config_space[offset - VIRTIO_PCI_CONFIG_NOMSI]);
return true;
}
static bool virtio_net_pci_io_in(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
{
unsigned long offset = port - IOPORT_VIRTIO_NET;
bool ret = true;
mutex_lock(&net_device.mutex);
switch (offset) {
case VIRTIO_PCI_HOST_FEATURES:
ioport__write32(data, net_device.host_features);
break;
case VIRTIO_PCI_GUEST_FEATURES:
ret = false;
break;
case VIRTIO_PCI_QUEUE_PFN:
ioport__write32(data, net_device.vqs[net_device.queue_selector].pfn);
break;
case VIRTIO_PCI_QUEUE_NUM:
ioport__write16(data, VIRTIO_NET_QUEUE_SIZE);
break;
case VIRTIO_PCI_QUEUE_SEL:
case VIRTIO_PCI_QUEUE_NOTIFY:
ret = false;
break;
case VIRTIO_PCI_STATUS:
ioport__write8(data, net_device.status);
break;
case VIRTIO_PCI_ISR:
ioport__write8(data, 0x1);
kvm__irq_line(self, VIRTIO_NET_IRQ, 0);
break;
case VIRTIO_MSI_CONFIG_VECTOR:
ioport__write16(data, net_device.config_vector);
break;
default:
ret = virtio_net_pci_io_device_specific_in(data, offset, size, count);
};
mutex_unlock(&net_device.mutex);
return ret;
}
static void virtio_net_handle_callback(struct kvm *self, uint16_t queue_index)
{
if (queue_index == VIRTIO_NET_TX_QUEUE) {
mutex_lock(&net_device.io_tx_mutex);
pthread_cond_signal(&net_device.io_tx_cond);
mutex_unlock(&net_device.io_tx_mutex);
} else if (queue_index == VIRTIO_NET_RX_QUEUE) {
mutex_lock(&net_device.io_rx_mutex);
pthread_cond_signal(&net_device.io_rx_cond);
mutex_unlock(&net_device.io_rx_mutex);
}
}
static bool virtio_net_pci_io_out(struct kvm *self, uint16_t port, void *data, int size, uint32_t count)
{
unsigned long offset = port - IOPORT_VIRTIO_NET;
bool ret = true;
mutex_lock(&net_device.mutex);
switch (offset) {
case VIRTIO_PCI_GUEST_FEATURES:
net_device.guest_features = ioport__read32(data);
break;
case VIRTIO_PCI_QUEUE_PFN: {
struct virt_queue *queue;
void *p;
assert(net_device.queue_selector < VIRTIO_NET_NUM_QUEUES);
queue = &net_device.vqs[net_device.queue_selector];
queue->pfn = ioport__read32(data);
p = guest_flat_to_host(self, queue->pfn << 12);
vring_init(&queue->vring, VIRTIO_NET_QUEUE_SIZE, p, 4096);
break;
}
case VIRTIO_PCI_QUEUE_SEL:
net_device.queue_selector = ioport__read16(data);
break;
case VIRTIO_PCI_QUEUE_NOTIFY: {
uint16_t queue_index;
queue_index = ioport__read16(data);
virtio_net_handle_callback(self, queue_index);
break;
}
case VIRTIO_PCI_STATUS:
net_device.status = ioport__read8(data);
break;
case VIRTIO_MSI_CONFIG_VECTOR:
net_device.config_vector = VIRTIO_MSI_NO_VECTOR;
break;
case VIRTIO_MSI_QUEUE_VECTOR:
break;
default:
ret = false;
};
mutex_unlock(&net_device.mutex);
return ret;
}
static struct ioport_operations virtio_net_io_ops = {
.io_in = virtio_net_pci_io_in,
.io_out = virtio_net_pci_io_out,
};
#define PCI_VENDOR_ID_REDHAT_QUMRANET 0x1af4
#define PCI_DEVICE_ID_VIRTIO_NET 0x1000
#define PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET 0x1af4
#define PCI_SUBSYSTEM_ID_VIRTIO_NET 0x0001
static struct pci_device_header virtio_net_pci_device = {
.vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET,
.device_id = PCI_DEVICE_ID_VIRTIO_NET,
.header_type = PCI_HEADER_TYPE_NORMAL,
.revision_id = 0,
.class = 0x020000,
.subsys_vendor_id = PCI_SUBSYSTEM_VENDOR_ID_REDHAT_QUMRANET,
.subsys_id = PCI_SUBSYSTEM_ID_VIRTIO_NET,
.bar[0] = IOPORT_VIRTIO_NET | PCI_BASE_ADDRESS_SPACE_IO,
.irq_pin = 3,
.irq_line = VIRTIO_NET_IRQ,
};
static bool virtio_net__tap_init(const struct virtio_net_parameters *params)
{
struct ifreq ifr;
int sock = socket(AF_INET, SOCK_STREAM, 0);
int i, pid, status;
struct sockaddr_in sin = {0};
for (i = 0 ; i < 6 ; i++)
net_device.net_config.mac[i] = params->guest_mac[i];
net_device.tap_fd = open("/dev/net/tun", O_RDWR);
if (net_device.tap_fd < 0) {
warning("Unable to open /dev/net/tun\n");
goto fail;
}
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
if (ioctl(net_device.tap_fd, TUNSETIFF, &ifr) < 0) {
warning("Config tap device error. Are you root?");
goto fail;
}
strncpy(net_device.tap_name, ifr.ifr_name, sizeof(net_device.tap_name));
ioctl(net_device.tap_fd, TUNSETNOCSUM, 1);
if (strcmp(params->script, "none")) {
pid = fork();
if (pid == 0) {
execl(params->script, params->script, net_device.tap_name, NULL);
_exit(1);
} else {
waitpid(pid, &status, 0);
if (WIFEXITED(status) && WEXITSTATUS(status) != 0) {
warning("Fail to setup tap by %s", params->script);
goto fail;
}
}
} else {
memset(&ifr, 0, sizeof(ifr));
strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name));
sin.sin_addr.s_addr = inet_addr(params->host_ip);
memcpy(&(ifr.ifr_addr), &sin, sizeof(ifr.ifr_addr));
ifr.ifr_addr.sa_family = AF_INET;
if (ioctl(sock, SIOCSIFADDR, &ifr) < 0) {
warning("Can not set ip address on tap device");
goto fail;
}
}
memset(&ifr, 0, sizeof(ifr));
strncpy(ifr.ifr_name, net_device.tap_name, sizeof(net_device.tap_name));
ioctl(sock, SIOCGIFFLAGS, &ifr);
ifr.ifr_flags |= IFF_UP | IFF_RUNNING;
if (ioctl(sock, SIOCSIFFLAGS, &ifr) < 0)
warning("Could not bring tap device up");
close(sock);
return 1;
fail:
if (sock >= 0)
close(sock);
if (net_device.tap_fd >= 0)
close(net_device.tap_fd);
return 0;
}
static void virtio_net__io_thread_init(struct kvm *self)
{
pthread_mutex_init(&net_device.io_rx_mutex, NULL);
pthread_cond_init(&net_device.io_tx_cond, NULL);
pthread_mutex_init(&net_device.io_rx_mutex, NULL);
pthread_cond_init(&net_device.io_tx_cond, NULL);
pthread_create(&net_device.io_rx_thread, NULL, virtio_net_rx_thread, (void *)self);
pthread_create(&net_device.io_tx_thread, NULL, virtio_net_tx_thread, (void *)self);
}
void virtio_net__init(const struct virtio_net_parameters *params)
{
if (virtio_net__tap_init(params)) {
pci__register(&virtio_net_pci_device, PCI_VIRTIO_NET_DEVNUM);
ioport__register(IOPORT_VIRTIO_NET, &virtio_net_io_ops, IOPORT_VIRTIO_NET_SIZE);
virtio_net__io_thread_init(params->self);
}
}