blob: f2b8c919011a054694092643561a62fd1df1225e [file] [log] [blame]
#include <inttypes.h>
#include <stdio.h>
#include <err.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
#include <linux/types.h>
#include <linux/kvm.h>
#include <linux/kcov.h>
#include <linux/ptrace.h>
#include <sys/sysinfo.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <sys/socket.h>
#include <errno.h>
#include "helpers.h"
/* ++ uapi/pkvm_shmem.h */
#define KVM_SHMEM_ALLOC_TYPE 1
enum kvm_shmem_alloc_type {
KVM_SHMEM_ALLOCTYPE_VMALLOC,
KVM_SHMEM_ALLOCTYPE_PAGES_EXACT };
#define KVM_SHMEM_ALLOC(alloc) _IO(KVM_SHMEM_ALLOC_TYPE, alloc)
#define KVM_SHMEM_ALLOC_PAGES KVM_SHMEM_ALLOC(KVM_SHMEM_ALLOCTYPE_PAGES_EXACT)
#define KVM_SHMEM_VMALLOC KVM_SHMEM_ALLOC(KVM_SHMEM_ALLOCTYPE_VMALLOC)
// ioctl on the mmapable fd from the KVM_SHMEM_ALLOC ioctl
#define KVM_SHMEM_AREA_KADDR _IOR(1, 0, void *)
#define KVM_SHMEM_AREA_PHYS _IOR(1, 1, void *)
#define KVM_SHMEM_AREA_MAKE_READONLY _IO(1, 2)
/* -- uapi/pkvm_shmem.h */
struct shmem_area {
int size;
int fd;
void *mmap;
ulong phys;
};
/* Early CoW breaks the share immediately by writing at an offset into
* the shared page. This should not affect the shared page, and the
* copy-break page must still start with the expected magic value.
*/
asm(
".global guest_code_early_cow_start, guest_code_early_cow_end\n"
"guest_code_early_cow_start:\n"
" mov x10, x0 \n"
" ldp x0, x1, [x10] \n"
" ldp x2, x3, [x10, #16]\n"
" str x0,[x3, #16] \n" /* early CoW */
" ldr w9, [x3] \n"
" add x0, x9, x0 \n"
" add x0, x1, x0 \n"
" str x0,[x2] \n"
" brk #0 \n"
"guest_code_early_cow_end:\n"
);
extern char guest_code_early_cow_start[], guest_code_early_cow_end[];
/* Late CoW writes to the share after reading from it. It overwrites the
* magic value at the start of the page. It should not affect the magic
* value in the original read-only shared page.
*/
asm(
".global guest_code_start, guest_code_end\n"
"guest_code_start: \n"
" mov x10, x0 \n"
" ldp x0, x1, [x10] \n"
" ldp x2, x3, [x10, #16]\n"
" ldr w9, [x3] \n"
" add x0, x9, x0 \n"
" add x0, x1, x0 \n"
" str x0,[x3] \n" /* late CoW */
" str x0,[x2] \n"
" brk #0 \n"
"guest_code_end: \n"
);
extern char guest_code_start[], guest_code_end[];
static int run_vm(struct shmem_area *area, bool is_protected,
bool is_early_cow)
{
int kvm, vmfd, vcpufd;
const uint64_t code_address = 0x1000;
const uint64_t mmio_address = 0x2000;
const uint64_t cow_address = 0x3000;
uint64_t *mem_code;
struct kvm_run *run;
printf("Making %sprotected VM\n", is_protected ? "" : "un");
printf("CoW happens %s\n", is_early_cow ? "early" : "late");
kvm = get_kvm();
vmfd = create_vm(kvm, is_protected);
/* Allocate one aligned page of guest memory to hold the code. */
mem_code = mmap(NULL, 0x1000,
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (!mem_code)
err(1, "allocating guest memory");
if (is_early_cow)
memcpy(mem_code, guest_code_early_cow_start,
guest_code_early_cow_end-guest_code_early_cow_start);
else
memcpy(mem_code, guest_code_start, guest_code_end-guest_code_start);
mem_code[0x100+0] = 0x110;
mem_code[0x100+1] = 0x220;
mem_code[0x100+2] = mmio_address;
mem_code[0x100+3] = cow_address;
/* Memory space. */
vm_add_mem_page(vmfd, 0, code_address, mem_code);
//vm_add_mmio_page(vmfd, 1, mmio_address);
vm_add_mem_page(vmfd, 2, cow_address, area->mmap);
/* Create one CPU to run in the VM. */
vcpufd = create_vcpu(kvm, vmfd, &run, false);
/* Protected VM with no firmware: Note that we only control X0 and PC. */
set_one_reg(vcpufd, REG_PC, code_address);
set_one_reg(vcpufd, REG_X(0), code_address + 0x800);
/* Repeatedly run code and handle VM exits. */
for (;;) {
KVM_IOCTL(vcpufd, KVM_RUN, NULL);
switch (run->exit_reason) {
case KVM_EXIT_DEBUG:
puts("KVM_EXIT_DEBUG");
return 0;
case KVM_EXIT_MMIO:
{
uint64_t payload = *(uint64_t*)(run->mmio.data); /* sorry */
printf("KVM_EXIT_MMIO: addr = 0x%llx, len = %u, is_write = %u, data = 0x%"PRIx64"\n",
run->mmio.phys_addr, run->mmio.len, run->mmio.is_write,
payload);
return 0;/* XXX */
break;
}
case KVM_EXIT_FAIL_ENTRY:
errx(1, "KVM_EXIT_FAIL_ENTRY: hardware_entry_failure_reason = 0x%llx",
(unsigned long long)run->fail_entry.hardware_entry_failure_reason);
case KVM_EXIT_INTERNAL_ERROR:
errx(1, "KVM_EXIT_INTERNAL_ERROR: suberror = 0x%x",
run->internal.suberror);
default:
errx(1, "exit_reason = 0x%x", run->exit_reason);
}
}
}
static void shmem_area_getinfo(struct shmem_area *area)
{
if (ioctl(area->fd, KVM_SHMEM_AREA_PHYS, &area->phys))
err(1, "Can't get kernel area physical address");
}
static void shmem_area_mmap(struct shmem_area *area)
{
area->mmap = mmap(NULL, area->size, PROT_READ | PROT_WRITE,
MAP_SHARED, area->fd, 0);
if ((void*)area->mmap == MAP_FAILED)
err(1, "Can't mmap kernel area");
}
static void recv_fd(int socket, int *fds, int n)
{
struct msghdr msg = {0};
struct cmsghdr *cmsg;
char buf[CMSG_SPACE(n * sizeof(int))], dup[256];
struct iovec io = { .iov_base = &dup, .iov_len = sizeof(dup) };
memset(buf, 0, sizeof(buf));
msg.msg_iov = &io;
msg.msg_iovlen = 1;
msg.msg_control = buf;
msg.msg_controllen = sizeof(buf);
if (recvmsg (socket, &msg, 0) < 0)
err(1, "Failed to receive message");
cmsg = CMSG_FIRSTHDR(&msg);
memcpy(fds, (int *)CMSG_DATA(cmsg), n * sizeof(int));
}
static int mk_uds(const char *path)
{
int sfd;
struct sockaddr_un addr;
sfd = socket(AF_UNIX, SOCK_STREAM, 0);
if (sfd == -1)
err(1, "Failed to create socket");
memset(&addr, 0, sizeof(struct sockaddr_un));
addr.sun_family = AF_UNIX;
strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1);
if (connect(sfd, (struct sockaddr *)&addr, sizeof(struct sockaddr_un)) == -1)
err(1, "Failed to connect to socket");
return sfd;
}
static int nr_sigsegv;
static void sigsegv_handler(int signr, siginfo_t *info, void *raw_context)
{
ucontext_t *uc = raw_context;
mcontext_t *mc = &uc->uc_mcontext;
nr_sigsegv++;
printf("SIGSEGV #%d at PC %llx\n", nr_sigsegv, mc->pc);
mc->pc += 4;
}
static void set_sigv_handler(void)
{
struct sigaction sa = { 0 };
sigemptyset(&sa.sa_mask);
sa.sa_sigaction = sigsegv_handler;
sa.sa_flags = SA_SIGINFO;
if (sigaction(SIGSEGV, &sa, NULL) != 0)
err(1, NULL);
}
static void clear_sigv_handler(void)
{
struct sigaction sa = { 0 };
sigemptyset(&sa.sa_mask);
sa.sa_handler = SIG_DFL;
if (sigaction(SIGSEGV, &sa, NULL) != 0)
err(1, NULL);
}
int main(int argc, char** argv)
{
int sock_fd;
struct shmem_area area = { .size = PAGE_SIZE };
bool is_protected_vm, is_early_cow;
if ((argc != 2) || (strlen(argv[1]) != 2)) {
printf("Usage: %s {up}{el}\n", argv[0]);
return 0;
}
is_protected_vm = (argv[1][0] == 'p');
is_early_cow = (argv[1][1] == 'e');
sock_fd = mk_uds("shmem.sock");
recv_fd(sock_fd, &area.fd, 1);
printf("Received FD %d\n", area.fd);
shmem_area_getinfo(&area);
shmem_area_mmap(&area);
printf("Area: pa=%lx va=%p size=%d\n",
area.phys, area.mmap, area.size);
printf("Value @ %lx: %08"PRIx32"\n",
area.phys, *(volatile uint32_t *)area.mmap);
/* Attempt to write to the read-only mapping. We expect SIGSEGV. */
printf("Testing write protection @ %lx\n", area.phys);
set_sigv_handler();
*(volatile uint32_t *)area.mmap = 0xdeadbeef;
clear_sigv_handler();
if (nr_sigsegv != 1) {
printf("Expected precisely one SIGSEGV signal\n");
return 0;
}
run_vm(&area, is_protected_vm, is_early_cow);
return 0;
}
/*
* Local variables:
* mode: C
* c-file-style: "Linux"
* c-basic-offset: 4
* tab-width: 4
* indent-tabs-mode: nil
* End:
*/