| #include <inttypes.h> |
| #include <stdio.h> |
| #include <err.h> |
| #include <string.h> |
| #include <stddef.h> |
| #include <stdint.h> |
| #include <stdlib.h> |
| #include <sys/types.h> |
| #include <sys/stat.h> |
| #include <sys/ioctl.h> |
| #include <sys/mman.h> |
| #include <unistd.h> |
| #include <fcntl.h> |
| #include <linux/types.h> |
| #include <linux/kvm.h> |
| #include <linux/kcov.h> |
| #include <linux/ptrace.h> |
| #include <sys/sysinfo.h> |
| |
| #include <sys/un.h> |
| #include <sys/wait.h> |
| #include <sys/socket.h> |
| #include <errno.h> |
| |
| #include "helpers.h" |
| |
| /* ++ uapi/pkvm_shmem.h */ |
| #define KVM_SHMEM_ALLOC_TYPE 1 |
| |
| enum kvm_shmem_alloc_type { |
| KVM_SHMEM_ALLOCTYPE_VMALLOC, |
| KVM_SHMEM_ALLOCTYPE_PAGES_EXACT }; |
| |
| #define KVM_SHMEM_ALLOC(alloc) _IO(KVM_SHMEM_ALLOC_TYPE, alloc) |
| #define KVM_SHMEM_ALLOC_PAGES KVM_SHMEM_ALLOC(KVM_SHMEM_ALLOCTYPE_PAGES_EXACT) |
| #define KVM_SHMEM_VMALLOC KVM_SHMEM_ALLOC(KVM_SHMEM_ALLOCTYPE_VMALLOC) |
| |
| // ioctl on the mmapable fd from the KVM_SHMEM_ALLOC ioctl |
| #define KVM_SHMEM_AREA_KADDR _IOR(1, 0, void *) |
| #define KVM_SHMEM_AREA_PHYS _IOR(1, 1, void *) |
| #define KVM_SHMEM_AREA_MAKE_READONLY _IO(1, 2) |
| /* -- uapi/pkvm_shmem.h */ |
| |
| struct shmem_area { |
| int size; |
| int fd; |
| void *mmap; |
| ulong phys; |
| }; |
| |
| /* Early CoW breaks the share immediately by writing at an offset into |
| * the shared page. This should not affect the shared page, and the |
| * copy-break page must still start with the expected magic value. |
| */ |
| asm( |
| ".global guest_code_early_cow_start, guest_code_early_cow_end\n" |
| "guest_code_early_cow_start:\n" |
| " mov x10, x0 \n" |
| " ldp x0, x1, [x10] \n" |
| " ldp x2, x3, [x10, #16]\n" |
| " str x0,[x3, #16] \n" /* early CoW */ |
| " ldr w9, [x3] \n" |
| " add x0, x9, x0 \n" |
| " add x0, x1, x0 \n" |
| " str x0,[x2] \n" |
| " brk #0 \n" |
| "guest_code_early_cow_end:\n" |
| ); |
| |
| extern char guest_code_early_cow_start[], guest_code_early_cow_end[]; |
| |
| /* Late CoW writes to the share after reading from it. It overwrites the |
| * magic value at the start of the page. It should not affect the magic |
| * value in the original read-only shared page. |
| */ |
| asm( |
| ".global guest_code_start, guest_code_end\n" |
| "guest_code_start: \n" |
| " mov x10, x0 \n" |
| " ldp x0, x1, [x10] \n" |
| " ldp x2, x3, [x10, #16]\n" |
| " ldr w9, [x3] \n" |
| " add x0, x9, x0 \n" |
| " add x0, x1, x0 \n" |
| " str x0,[x3] \n" /* late CoW */ |
| " str x0,[x2] \n" |
| " brk #0 \n" |
| "guest_code_end: \n" |
| ); |
| |
| extern char guest_code_start[], guest_code_end[]; |
| |
| static int run_vm(struct shmem_area *area, bool is_protected, |
| bool is_early_cow) |
| { |
| int kvm, vmfd, vcpufd; |
| const uint64_t code_address = 0x1000; |
| const uint64_t mmio_address = 0x2000; |
| const uint64_t cow_address = 0x3000; |
| uint64_t *mem_code; |
| struct kvm_run *run; |
| |
| printf("Making %sprotected VM\n", is_protected ? "" : "un"); |
| printf("CoW happens %s\n", is_early_cow ? "early" : "late"); |
| |
| kvm = get_kvm(); |
| |
| vmfd = create_vm(kvm, is_protected); |
| |
| /* Allocate one aligned page of guest memory to hold the code. */ |
| mem_code = mmap(NULL, 0x1000, |
| PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); |
| if (!mem_code) |
| err(1, "allocating guest memory"); |
| |
| if (is_early_cow) |
| memcpy(mem_code, guest_code_early_cow_start, |
| guest_code_early_cow_end-guest_code_early_cow_start); |
| else |
| memcpy(mem_code, guest_code_start, guest_code_end-guest_code_start); |
| |
| mem_code[0x100+0] = 0x110; |
| mem_code[0x100+1] = 0x220; |
| mem_code[0x100+2] = mmio_address; |
| mem_code[0x100+3] = cow_address; |
| |
| /* Memory space. */ |
| vm_add_mem_page(vmfd, 0, code_address, mem_code); |
| //vm_add_mmio_page(vmfd, 1, mmio_address); |
| vm_add_mem_page(vmfd, 2, cow_address, area->mmap); |
| |
| /* Create one CPU to run in the VM. */ |
| vcpufd = create_vcpu(kvm, vmfd, &run, false); |
| |
| /* Protected VM with no firmware: Note that we only control X0 and PC. */ |
| set_one_reg(vcpufd, REG_PC, code_address); |
| set_one_reg(vcpufd, REG_X(0), code_address + 0x800); |
| |
| /* Repeatedly run code and handle VM exits. */ |
| for (;;) { |
| KVM_IOCTL(vcpufd, KVM_RUN, NULL); |
| switch (run->exit_reason) { |
| case KVM_EXIT_DEBUG: |
| puts("KVM_EXIT_DEBUG"); |
| return 0; |
| case KVM_EXIT_MMIO: |
| { |
| uint64_t payload = *(uint64_t*)(run->mmio.data); /* sorry */ |
| printf("KVM_EXIT_MMIO: addr = 0x%llx, len = %u, is_write = %u, data = 0x%"PRIx64"\n", |
| run->mmio.phys_addr, run->mmio.len, run->mmio.is_write, |
| payload); |
| return 0;/* XXX */ |
| break; |
| } |
| case KVM_EXIT_FAIL_ENTRY: |
| errx(1, "KVM_EXIT_FAIL_ENTRY: hardware_entry_failure_reason = 0x%llx", |
| (unsigned long long)run->fail_entry.hardware_entry_failure_reason); |
| case KVM_EXIT_INTERNAL_ERROR: |
| errx(1, "KVM_EXIT_INTERNAL_ERROR: suberror = 0x%x", |
| run->internal.suberror); |
| default: |
| errx(1, "exit_reason = 0x%x", run->exit_reason); |
| } |
| } |
| } |
| |
| static void shmem_area_getinfo(struct shmem_area *area) |
| { |
| if (ioctl(area->fd, KVM_SHMEM_AREA_PHYS, &area->phys)) |
| err(1, "Can't get kernel area physical address"); |
| } |
| |
| static void shmem_area_mmap(struct shmem_area *area) |
| { |
| area->mmap = mmap(NULL, area->size, PROT_READ | PROT_WRITE, |
| MAP_SHARED, area->fd, 0); |
| if ((void*)area->mmap == MAP_FAILED) |
| err(1, "Can't mmap kernel area"); |
| } |
| |
| static void recv_fd(int socket, int *fds, int n) |
| { |
| struct msghdr msg = {0}; |
| struct cmsghdr *cmsg; |
| char buf[CMSG_SPACE(n * sizeof(int))], dup[256]; |
| struct iovec io = { .iov_base = &dup, .iov_len = sizeof(dup) }; |
| |
| memset(buf, 0, sizeof(buf)); |
| |
| msg.msg_iov = &io; |
| msg.msg_iovlen = 1; |
| msg.msg_control = buf; |
| msg.msg_controllen = sizeof(buf); |
| |
| if (recvmsg (socket, &msg, 0) < 0) |
| err(1, "Failed to receive message"); |
| |
| cmsg = CMSG_FIRSTHDR(&msg); |
| |
| memcpy(fds, (int *)CMSG_DATA(cmsg), n * sizeof(int)); |
| } |
| |
| static int mk_uds(const char *path) |
| { |
| int sfd; |
| struct sockaddr_un addr; |
| |
| sfd = socket(AF_UNIX, SOCK_STREAM, 0); |
| if (sfd == -1) |
| err(1, "Failed to create socket"); |
| |
| memset(&addr, 0, sizeof(struct sockaddr_un)); |
| addr.sun_family = AF_UNIX; |
| strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1); |
| |
| if (connect(sfd, (struct sockaddr *)&addr, sizeof(struct sockaddr_un)) == -1) |
| err(1, "Failed to connect to socket"); |
| |
| return sfd; |
| } |
| |
| static int nr_sigsegv; |
| static void sigsegv_handler(int signr, siginfo_t *info, void *raw_context) |
| { |
| ucontext_t *uc = raw_context; |
| mcontext_t *mc = &uc->uc_mcontext; |
| nr_sigsegv++; |
| printf("SIGSEGV #%d at PC %llx\n", nr_sigsegv, mc->pc); |
| mc->pc += 4; |
| } |
| |
| static void set_sigv_handler(void) |
| { |
| struct sigaction sa = { 0 }; |
| |
| sigemptyset(&sa.sa_mask); |
| sa.sa_sigaction = sigsegv_handler; |
| sa.sa_flags = SA_SIGINFO; |
| if (sigaction(SIGSEGV, &sa, NULL) != 0) |
| err(1, NULL); |
| } |
| |
| static void clear_sigv_handler(void) |
| { |
| struct sigaction sa = { 0 }; |
| |
| sigemptyset(&sa.sa_mask); |
| sa.sa_handler = SIG_DFL; |
| if (sigaction(SIGSEGV, &sa, NULL) != 0) |
| err(1, NULL); |
| } |
| |
| int main(int argc, char** argv) |
| { |
| int sock_fd; |
| struct shmem_area area = { .size = PAGE_SIZE }; |
| bool is_protected_vm, is_early_cow; |
| |
| if ((argc != 2) || (strlen(argv[1]) != 2)) { |
| printf("Usage: %s {up}{el}\n", argv[0]); |
| return 0; |
| } |
| |
| is_protected_vm = (argv[1][0] == 'p'); |
| is_early_cow = (argv[1][1] == 'e'); |
| |
| sock_fd = mk_uds("shmem.sock"); |
| recv_fd(sock_fd, &area.fd, 1); |
| printf("Received FD %d\n", area.fd); |
| |
| shmem_area_getinfo(&area); |
| shmem_area_mmap(&area); |
| printf("Area: pa=%lx va=%p size=%d\n", |
| area.phys, area.mmap, area.size); |
| printf("Value @ %lx: %08"PRIx32"\n", |
| area.phys, *(volatile uint32_t *)area.mmap); |
| |
| /* Attempt to write to the read-only mapping. We expect SIGSEGV. */ |
| printf("Testing write protection @ %lx\n", area.phys); |
| set_sigv_handler(); |
| *(volatile uint32_t *)area.mmap = 0xdeadbeef; |
| clear_sigv_handler(); |
| |
| if (nr_sigsegv != 1) { |
| printf("Expected precisely one SIGSEGV signal\n"); |
| return 0; |
| } |
| |
| run_vm(&area, is_protected_vm, is_early_cow); |
| |
| return 0; |
| } |
| |
| /* |
| * Local variables: |
| * mode: C |
| * c-file-style: "Linux" |
| * c-basic-offset: 4 |
| * tab-width: 4 |
| * indent-tabs-mode: nil |
| * End: |
| */ |