| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * KVM_GET/SET_* tests |
| * |
| * Copyright (C) 2018, Red Hat, Inc. |
| * |
| * Tests for vCPU state save/restore, including nested guest state. |
| */ |
| #define _GNU_SOURCE /* for program_invocation_short_name */ |
| #include <fcntl.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <sys/ioctl.h> |
| |
| #include "test_util.h" |
| |
| #include "kvm_util.h" |
| #include "processor.h" |
| #include "vmx.h" |
| #include "svm_util.h" |
| |
| #define L2_GUEST_STACK_SIZE 256 |
| |
| void svm_l2_guest_code(void) |
| { |
| GUEST_SYNC(4); |
| /* Exit to L1 */ |
| vmcall(); |
| GUEST_SYNC(6); |
| /* Done, exit to L1 and never come back. */ |
| vmcall(); |
| } |
| |
| static void svm_l1_guest_code(struct svm_test_data *svm) |
| { |
| unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; |
| struct vmcb *vmcb = svm->vmcb; |
| |
| GUEST_ASSERT(svm->vmcb_gpa); |
| /* Prepare for L2 execution. */ |
| generic_svm_setup(svm, svm_l2_guest_code, |
| &l2_guest_stack[L2_GUEST_STACK_SIZE]); |
| |
| GUEST_SYNC(3); |
| run_guest(vmcb, svm->vmcb_gpa); |
| GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); |
| GUEST_SYNC(5); |
| vmcb->save.rip += 3; |
| run_guest(vmcb, svm->vmcb_gpa); |
| GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); |
| GUEST_SYNC(7); |
| } |
| |
| void vmx_l2_guest_code(void) |
| { |
| GUEST_SYNC(6); |
| |
| /* Exit to L1 */ |
| vmcall(); |
| |
| /* L1 has now set up a shadow VMCS for us. */ |
| GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); |
| GUEST_SYNC(10); |
| GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); |
| GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee)); |
| GUEST_SYNC(11); |
| GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee); |
| GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee)); |
| GUEST_SYNC(12); |
| |
| /* Done, exit to L1 and never come back. */ |
| vmcall(); |
| } |
| |
| static void vmx_l1_guest_code(struct vmx_pages *vmx_pages) |
| { |
| unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; |
| |
| GUEST_ASSERT(vmx_pages->vmcs_gpa); |
| GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); |
| GUEST_SYNC(3); |
| GUEST_ASSERT(load_vmcs(vmx_pages)); |
| GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); |
| |
| GUEST_SYNC(4); |
| GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); |
| |
| prepare_vmcs(vmx_pages, vmx_l2_guest_code, |
| &l2_guest_stack[L2_GUEST_STACK_SIZE]); |
| |
| GUEST_SYNC(5); |
| GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); |
| GUEST_ASSERT(!vmlaunch()); |
| GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); |
| GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); |
| |
| /* Check that the launched state is preserved. */ |
| GUEST_ASSERT(vmlaunch()); |
| |
| GUEST_ASSERT(!vmresume()); |
| GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); |
| |
| GUEST_SYNC(7); |
| GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); |
| |
| GUEST_ASSERT(!vmresume()); |
| GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); |
| |
| vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); |
| |
| vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); |
| vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa); |
| |
| GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); |
| GUEST_ASSERT(vmlaunch()); |
| GUEST_SYNC(8); |
| GUEST_ASSERT(vmlaunch()); |
| GUEST_ASSERT(vmresume()); |
| |
| vmwrite(GUEST_RIP, 0xc0ffee); |
| GUEST_SYNC(9); |
| GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); |
| |
| GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa)); |
| GUEST_ASSERT(!vmresume()); |
| GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); |
| |
| GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); |
| GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); |
| GUEST_ASSERT(vmlaunch()); |
| GUEST_ASSERT(vmresume()); |
| GUEST_SYNC(13); |
| GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); |
| GUEST_ASSERT(vmlaunch()); |
| GUEST_ASSERT(vmresume()); |
| } |
| |
| static void __attribute__((__flatten__)) guest_code(void *arg) |
| { |
| GUEST_SYNC(1); |
| |
| if (this_cpu_has(X86_FEATURE_XSAVE)) { |
| uint64_t supported_xcr0 = this_cpu_supported_xcr0(); |
| uint8_t buffer[4096]; |
| |
| memset(buffer, 0xcc, sizeof(buffer)); |
| |
| set_cr4(get_cr4() | X86_CR4_OSXSAVE); |
| GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); |
| |
| xsetbv(0, xgetbv(0) | supported_xcr0); |
| |
| /* |
| * Modify state for all supported xfeatures to take them out of |
| * their "init" state, i.e. to make them show up in XSTATE_BV. |
| * |
| * Note off-by-default features, e.g. AMX, are out of scope for |
| * this particular testcase as they have a different ABI. |
| */ |
| GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP); |
| asm volatile ("fincstp"); |
| |
| GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE); |
| asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer)); |
| |
| if (supported_xcr0 & XFEATURE_MASK_YMM) |
| asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer)); |
| |
| if (supported_xcr0 & XFEATURE_MASK_AVX512) { |
| asm volatile ("kmovq %0, %%k1" :: "r" (-1ull)); |
| asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer)); |
| asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer)); |
| } |
| |
| if (this_cpu_has(X86_FEATURE_MPX)) { |
| uint64_t bounds[2] = { 10, 0xffffffffull }; |
| uint64_t output[2] = { }; |
| |
| GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); |
| GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); |
| |
| /* |
| * Don't bother trying to get BNDCSR into the INUSE |
| * state. MSR_IA32_BNDCFGS doesn't count as it isn't |
| * managed via XSAVE/XRSTOR, and BNDCFGU can only be |
| * modified by XRSTOR. Stuffing XSTATE_BV in the host |
| * is simpler than doing XRSTOR here in the guest. |
| * |
| * However, temporarily enable MPX in BNDCFGS so that |
| * BNDMOV actually loads BND1. If MPX isn't *fully* |
| * enabled, all MPX instructions are treated as NOPs. |
| * |
| * Hand encode "bndmov (%rax),%bnd1" as support for MPX |
| * mnemonics/registers has been removed from gcc and |
| * clang (and was never fully supported by clang). |
| */ |
| wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0)); |
| asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds)); |
| /* |
| * Hand encode "bndmov %bnd1, (%rax)" to sanity check |
| * that BND1 actually got loaded. |
| */ |
| asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output)); |
| wrmsr(MSR_IA32_BNDCFGS, 0); |
| |
| GUEST_ASSERT_EQ(bounds[0], output[0]); |
| GUEST_ASSERT_EQ(bounds[1], output[1]); |
| } |
| if (this_cpu_has(X86_FEATURE_PKU)) { |
| GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU); |
| set_cr4(get_cr4() | X86_CR4_PKE); |
| GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE)); |
| |
| wrpkru(-1u); |
| } |
| } |
| |
| GUEST_SYNC(2); |
| |
| if (arg) { |
| if (this_cpu_has(X86_FEATURE_SVM)) |
| svm_l1_guest_code(arg); |
| else |
| vmx_l1_guest_code(arg); |
| } |
| |
| GUEST_DONE(); |
| } |
| |
| int main(int argc, char *argv[]) |
| { |
| uint64_t *xstate_bv, saved_xstate_bv; |
| vm_vaddr_t nested_gva = 0; |
| struct kvm_cpuid2 empty_cpuid = {}; |
| struct kvm_regs regs1, regs2; |
| struct kvm_vcpu *vcpu, *vcpuN; |
| struct kvm_vm *vm; |
| struct kvm_x86_state *state; |
| struct ucall uc; |
| int stage; |
| |
| /* Create VM */ |
| vm = vm_create_with_one_vcpu(&vcpu, guest_code); |
| |
| vcpu_regs_get(vcpu, ®s1); |
| |
| if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { |
| if (kvm_cpu_has(X86_FEATURE_SVM)) |
| vcpu_alloc_svm(vm, &nested_gva); |
| else if (kvm_cpu_has(X86_FEATURE_VMX)) |
| vcpu_alloc_vmx(vm, &nested_gva); |
| } |
| |
| if (!nested_gva) |
| pr_info("will skip nested state checks\n"); |
| |
| vcpu_args_set(vcpu, 1, nested_gva); |
| |
| for (stage = 1;; stage++) { |
| vcpu_run(vcpu); |
| TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); |
| |
| switch (get_ucall(vcpu, &uc)) { |
| case UCALL_ABORT: |
| REPORT_GUEST_ASSERT(uc); |
| /* NOT REACHED */ |
| case UCALL_SYNC: |
| break; |
| case UCALL_DONE: |
| goto done; |
| default: |
| TEST_FAIL("Unknown ucall %lu", uc.cmd); |
| } |
| |
| /* UCALL_SYNC is handled here. */ |
| TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && |
| uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", |
| stage, (ulong)uc.args[1]); |
| |
| state = vcpu_save_state(vcpu); |
| memset(®s1, 0, sizeof(regs1)); |
| vcpu_regs_get(vcpu, ®s1); |
| |
| kvm_vm_release(vm); |
| |
| /* Restore state in a new VM. */ |
| vcpu = vm_recreate_with_one_vcpu(vm); |
| vcpu_load_state(vcpu, state); |
| |
| /* |
| * Restore XSAVE state in a dummy vCPU, first without doing |
| * KVM_SET_CPUID2, and then with an empty guest CPUID. Except |
| * for off-by-default xfeatures, e.g. AMX, KVM is supposed to |
| * allow KVM_SET_XSAVE regardless of guest CPUID. Manually |
| * load only XSAVE state, MSRs in particular have a much more |
| * convoluted ABI. |
| * |
| * Load two versions of XSAVE state: one with the actual guest |
| * XSAVE state, and one with all supported features forced "on" |
| * in xstate_bv, e.g. to ensure that KVM allows loading all |
| * supported features, even if something goes awry in saving |
| * the original snapshot. |
| */ |
| xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; |
| saved_xstate_bv = *xstate_bv; |
| |
| vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); |
| vcpu_xsave_set(vcpuN, state->xsave); |
| *xstate_bv = kvm_cpu_supported_xcr0(); |
| vcpu_xsave_set(vcpuN, state->xsave); |
| |
| vcpu_init_cpuid(vcpuN, &empty_cpuid); |
| vcpu_xsave_set(vcpuN, state->xsave); |
| *xstate_bv = saved_xstate_bv; |
| vcpu_xsave_set(vcpuN, state->xsave); |
| |
| kvm_x86_state_cleanup(state); |
| |
| memset(®s2, 0, sizeof(regs2)); |
| vcpu_regs_get(vcpu, ®s2); |
| TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), |
| "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", |
| (ulong) regs2.rdi, (ulong) regs2.rsi); |
| } |
| |
| done: |
| kvm_vm_free(vm); |
| } |