blob: 8e0ad5235240568ca955f7875a19e5c35286d8e6 [file] [log] [blame]
#include "kvm/uip.h"
#include <kvm/kvm.h>
#include <linux/virtio_net.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <arpa/inet.h>
static int uip_tcp_socket_close(struct uip_tcp_socket *sk, int how)
{
shutdown(sk->fd, how);
if (sk->write_done && sk->read_done) {
shutdown(sk->fd, SHUT_RDWR);
close(sk->fd);
mutex_lock(sk->lock);
list_del(&sk->list);
mutex_unlock(sk->lock);
free(sk->buf);
free(sk);
}
return 0;
}
static struct uip_tcp_socket *uip_tcp_socket_find(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport)
{
struct list_head *sk_head;
struct mutex *sk_lock;
struct uip_tcp_socket *sk;
sk_head = &arg->info->tcp_socket_head;
sk_lock = &arg->info->tcp_socket_lock;
mutex_lock(sk_lock);
list_for_each_entry(sk, sk_head, list) {
if (sk->sip == sip && sk->dip == dip && sk->sport == sport && sk->dport == dport) {
mutex_unlock(sk_lock);
return sk;
}
}
mutex_unlock(sk_lock);
return NULL;
}
static struct uip_tcp_socket *uip_tcp_socket_alloc(struct uip_tx_arg *arg, u32 sip, u32 dip, u16 sport, u16 dport)
{
struct list_head *sk_head;
struct uip_tcp_socket *sk;
struct mutex *sk_lock;
struct uip_tcp *tcp;
struct uip_ip *ip;
int ret;
tcp = (struct uip_tcp *)arg->eth;
ip = (struct uip_ip *)arg->eth;
sk_head = &arg->info->tcp_socket_head;
sk_lock = &arg->info->tcp_socket_lock;
sk = malloc(sizeof(*sk));
memset(sk, 0, sizeof(*sk));
sk->lock = sk_lock;
sk->info = arg->info;
sk->fd = socket(AF_INET, SOCK_STREAM, 0);
sk->addr.sin_family = AF_INET;
sk->addr.sin_port = dport;
sk->addr.sin_addr.s_addr = dip;
pthread_cond_init(&sk->cond, NULL);
if (ntohl(dip) == arg->info->host_ip)
sk->addr.sin_addr.s_addr = inet_addr("127.0.0.1");
ret = connect(sk->fd, (struct sockaddr *)&sk->addr, sizeof(sk->addr));
if (ret) {
free(sk);
return NULL;
}
sk->sip = ip->sip;
sk->dip = ip->dip;
sk->sport = tcp->sport;
sk->dport = tcp->dport;
mutex_lock(sk_lock);
list_add_tail(&sk->list, sk_head);
mutex_unlock(sk_lock);
return sk;
}
/* Caller holds the sk lock */
static void uip_tcp_socket_free(struct uip_tcp_socket *sk)
{
/*
* Here we assume that the virtqueues are already inactive so we don't
* race with uip_tx_do_ipv4_tcp. We are racing with
* uip_tcp_socket_thread though, but holding the sk lock ensures that it
* cannot free data concurrently.
*/
if (sk->thread) {
pthread_cancel(sk->thread);
pthread_join(sk->thread, NULL);
}
sk->write_done = sk->read_done = 1;
uip_tcp_socket_close(sk, SHUT_RDWR);
}
static int uip_tcp_payload_send(struct uip_tcp_socket *sk, u8 flag, u16 payload_len)
{
struct uip_info *info;
struct uip_eth *eth2;
struct uip_tcp *tcp2;
struct uip_buf *buf;
struct uip_ip *ip2;
info = sk->info;
/*
* Get free buffer to send data to guest
*/
buf = uip_buf_get_free(info);
/*
* Cook a ethernet frame
*/
tcp2 = (struct uip_tcp *)buf->eth;
eth2 = (struct uip_eth *)buf->eth;
ip2 = (struct uip_ip *)buf->eth;
eth2->src = info->host_mac;
eth2->dst = info->guest_mac;
eth2->type = htons(UIP_ETH_P_IP);
ip2->vhl = UIP_IP_VER_4 | UIP_IP_HDR_LEN;
ip2->tos = 0;
ip2->id = 0;
ip2->flgfrag = 0;
ip2->ttl = UIP_IP_TTL;
ip2->proto = UIP_IP_P_TCP;
ip2->csum = 0;
ip2->sip = sk->dip;
ip2->dip = sk->sip;
tcp2->sport = sk->dport;
tcp2->dport = sk->sport;
tcp2->seq = htonl(sk->seq_server);
tcp2->ack = htonl(sk->ack_server);
/*
* Diable TCP options, tcp hdr len equals 20 bytes
*/
tcp2->off = UIP_TCP_HDR_LEN;
tcp2->flg = flag;
tcp2->win = htons(UIP_TCP_WIN_SIZE);
tcp2->csum = 0;
tcp2->urgent = 0;
if (payload_len > 0)
memcpy(uip_tcp_payload(tcp2), sk->payload, payload_len);
ip2->len = htons(uip_tcp_hdrlen(tcp2) + payload_len + uip_ip_hdrlen(ip2));
ip2->csum = uip_csum_ip(ip2);
tcp2->csum = uip_csum_tcp(tcp2);
/*
* virtio_net_hdr
*/
buf->vnet_len = info->vnet_hdr_len;
memset(buf->vnet, 0, buf->vnet_len);
buf->eth_len = ntohs(ip2->len) + uip_eth_hdrlen(&ip2->eth);
/*
* Increase server seq
*/
sk->seq_server += payload_len;
/*
* Send data received from socket to guest
*/
uip_buf_set_used(info, buf);
return 0;
}
static void *uip_tcp_socket_thread(void *p)
{
struct uip_tcp_socket *sk;
int len, left, ret;
u8 *pos;
kvm__set_thread_name("uip-tcp");
sk = p;
while (1) {
pos = sk->buf;
ret = read(sk->fd, sk->buf, UIP_MAX_TCP_PAYLOAD);
if (ret <= 0 || ret > UIP_MAX_TCP_PAYLOAD)
goto out;
left = ret;
while (left > 0) {
mutex_lock(sk->lock);
while ((len = sk->guest_acked + sk->window_size - sk->seq_server) <= 0)
pthread_cond_wait(&sk->cond, &sk->lock->mutex);
mutex_unlock(sk->lock);
sk->payload = pos;
if (len > left)
len = left;
if (len > UIP_MAX_TCP_PAYLOAD)
len = UIP_MAX_TCP_PAYLOAD;
left -= len;
pos += len;
uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, len);
}
}
out:
/*
* Close server to guest TCP connection
*/
uip_tcp_socket_close(sk, SHUT_RD);
uip_tcp_payload_send(sk, UIP_TCP_FLAG_FIN | UIP_TCP_FLAG_ACK, 0);
sk->seq_server += 1;
sk->read_done = 1;
pthread_exit(NULL);
return NULL;
}
static int uip_tcp_socket_receive(struct uip_tcp_socket *sk)
{
int ret;
if (sk->thread == 0) {
sk->buf = malloc(UIP_MAX_TCP_PAYLOAD);
if (!sk->buf)
return -ENOMEM;
ret = pthread_create(&sk->thread, NULL, uip_tcp_socket_thread,
(void *)sk);
if (ret)
free(sk->buf);
return ret;
}
return 0;
}
static int uip_tcp_socket_send(struct uip_tcp_socket *sk, struct uip_tcp *tcp)
{
int len;
int ret;
u8 *payload;
if (sk->write_done)
return 0;
payload = uip_tcp_payload(tcp);
len = uip_tcp_payloadlen(tcp);
ret = write(sk->fd, payload, len);
if (ret != len)
pr_warning("tcp send error");
return ret;
}
int uip_tx_do_ipv4_tcp(struct uip_tx_arg *arg)
{
struct uip_tcp_socket *sk;
struct uip_tcp *tcp;
struct uip_ip *ip;
int ret;
tcp = (struct uip_tcp *)arg->eth;
ip = (struct uip_ip *)arg->eth;
/*
* Guest is trying to start a TCP session, let's fake SYN-ACK to guest
*/
if (uip_tcp_is_syn(tcp)) {
sk = uip_tcp_socket_alloc(arg, ip->sip, ip->dip, tcp->sport, tcp->dport);
if (!sk)
return -1;
sk->window_size = ntohs(tcp->win);
/*
* Setup ISN number
*/
sk->isn_guest = uip_tcp_isn(tcp);
sk->isn_server = uip_tcp_isn_alloc();
sk->seq_server = sk->isn_server;
sk->ack_server = sk->isn_guest + 1;
uip_tcp_payload_send(sk, UIP_TCP_FLAG_SYN | UIP_TCP_FLAG_ACK, 0);
sk->seq_server += 1;
/*
* Start receive thread for data from remote to guest
*/
uip_tcp_socket_receive(sk);
goto out;
}
/*
* Find socket we have allocated
*/
sk = uip_tcp_socket_find(arg, ip->sip, ip->dip, tcp->sport, tcp->dport);
if (!sk)
return -1;
mutex_lock(sk->lock);
sk->window_size = ntohs(tcp->win);
sk->guest_acked = ntohl(tcp->ack);
pthread_cond_signal(&sk->cond);
mutex_unlock(sk->lock);
if (uip_tcp_is_fin(tcp)) {
if (sk->write_done)
goto out;
sk->write_done = 1;
sk->ack_server += 1;
uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0);
/*
* Close guest to server TCP connection
*/
uip_tcp_socket_close(sk, SHUT_WR);
goto out;
}
/*
* Ignore guest to server frames with zero tcp payload
*/
if (uip_tcp_payloadlen(tcp) == 0)
goto out;
/*
* Sent out TCP data to remote host
*/
ret = uip_tcp_socket_send(sk, tcp);
if (ret < 0)
return -1;
/*
* Send ACK to guest imediately
*/
sk->ack_server += ret;
uip_tcp_payload_send(sk, UIP_TCP_FLAG_ACK, 0);
out:
return 0;
}
void uip_tcp_exit(struct uip_info *info)
{
struct uip_tcp_socket *sk, *next;
mutex_lock(&info->tcp_socket_lock);
list_for_each_entry_safe(sk, next, &info->tcp_socket_head, list)
uip_tcp_socket_free(sk);
mutex_unlock(&info->tcp_socket_lock);
}