| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * vmx_nested_tsc_scaling_test |
| * |
| * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| * |
| * This test case verifies that nested TSC scaling behaves as expected when |
| * both L1 and L2 are scaled using different ratios. For this test we scale |
| * L1 down and scale L2 up. |
| */ |
| |
| #include <time.h> |
| |
| #include "kvm_util.h" |
| #include "vmx.h" |
| #include "kselftest.h" |
| |
| |
| #define VCPU_ID 0 |
| |
| /* L2 is scaled up (from L1's perspective) by this factor */ |
| #define L2_SCALE_FACTOR 4ULL |
| |
| #define TSC_OFFSET_L2 ((uint64_t) -33125236320908) |
| #define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48) |
| |
| #define L2_GUEST_STACK_SIZE 64 |
| |
| enum { USLEEP, UCHECK_L1, UCHECK_L2 }; |
| #define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec) |
| #define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq) |
| |
| |
| /* |
| * This function checks whether the "actual" TSC frequency of a guest matches |
| * its expected frequency. In order to account for delays in taking the TSC |
| * measurements, a difference of 1% between the actual and the expected value |
| * is tolerated. |
| */ |
| static void compare_tsc_freq(uint64_t actual, uint64_t expected) |
| { |
| uint64_t tolerance, thresh_low, thresh_high; |
| |
| tolerance = expected / 100; |
| thresh_low = expected - tolerance; |
| thresh_high = expected + tolerance; |
| |
| TEST_ASSERT(thresh_low < actual, |
| "TSC freq is expected to be between %"PRIu64" and %"PRIu64 |
| " but it actually is %"PRIu64, |
| thresh_low, thresh_high, actual); |
| TEST_ASSERT(thresh_high > actual, |
| "TSC freq is expected to be between %"PRIu64" and %"PRIu64 |
| " but it actually is %"PRIu64, |
| thresh_low, thresh_high, actual); |
| } |
| |
| static void check_tsc_freq(int level) |
| { |
| uint64_t tsc_start, tsc_end, tsc_freq; |
| |
| /* |
| * Reading the TSC twice with about a second's difference should give |
| * us an approximation of the TSC frequency from the guest's |
| * perspective. Now, this won't be completely accurate, but it should |
| * be good enough for the purposes of this test. |
| */ |
| tsc_start = rdmsr(MSR_IA32_TSC); |
| GUEST_SLEEP(1); |
| tsc_end = rdmsr(MSR_IA32_TSC); |
| |
| tsc_freq = tsc_end - tsc_start; |
| |
| GUEST_CHECK(level, tsc_freq); |
| } |
| |
| static void l2_guest_code(void) |
| { |
| check_tsc_freq(UCHECK_L2); |
| |
| /* exit to L1 */ |
| __asm__ __volatile__("vmcall"); |
| } |
| |
| static void l1_guest_code(struct vmx_pages *vmx_pages) |
| { |
| unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; |
| uint32_t control; |
| |
| /* check that L1's frequency looks alright before launching L2 */ |
| check_tsc_freq(UCHECK_L1); |
| |
| GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); |
| GUEST_ASSERT(load_vmcs(vmx_pages)); |
| |
| /* prepare the VMCS for L2 execution */ |
| prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); |
| |
| /* enable TSC offsetting and TSC scaling for L2 */ |
| control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); |
| control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING; |
| vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); |
| |
| control = vmreadz(SECONDARY_VM_EXEC_CONTROL); |
| control |= SECONDARY_EXEC_TSC_SCALING; |
| vmwrite(SECONDARY_VM_EXEC_CONTROL, control); |
| |
| vmwrite(TSC_OFFSET, TSC_OFFSET_L2); |
| vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2); |
| vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32); |
| |
| /* launch L2 */ |
| GUEST_ASSERT(!vmlaunch()); |
| GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); |
| |
| /* check that L1's frequency still looks good */ |
| check_tsc_freq(UCHECK_L1); |
| |
| GUEST_DONE(); |
| } |
| |
| static void tsc_scaling_check_supported(void) |
| { |
| if (!kvm_check_cap(KVM_CAP_TSC_CONTROL)) { |
| print_skip("TSC scaling not supported by the HW"); |
| exit(KSFT_SKIP); |
| } |
| } |
| |
| static void stable_tsc_check_supported(void) |
| { |
| FILE *fp; |
| char buf[4]; |
| |
| fp = fopen("/sys/devices/system/clocksource/clocksource0/current_clocksource", "r"); |
| if (fp == NULL) |
| goto skip_test; |
| |
| if (fgets(buf, sizeof(buf), fp) == NULL) |
| goto skip_test; |
| |
| if (strncmp(buf, "tsc", sizeof(buf))) |
| goto skip_test; |
| |
| return; |
| skip_test: |
| print_skip("Kernel does not use TSC clocksource - assuming that host TSC is not stable"); |
| exit(KSFT_SKIP); |
| } |
| |
| int main(int argc, char *argv[]) |
| { |
| struct kvm_vm *vm; |
| vm_vaddr_t vmx_pages_gva; |
| |
| uint64_t tsc_start, tsc_end; |
| uint64_t tsc_khz; |
| uint64_t l1_scale_factor; |
| uint64_t l0_tsc_freq = 0; |
| uint64_t l1_tsc_freq = 0; |
| uint64_t l2_tsc_freq = 0; |
| |
| nested_vmx_check_supported(); |
| tsc_scaling_check_supported(); |
| stable_tsc_check_supported(); |
| |
| /* |
| * We set L1's scale factor to be a random number from 2 to 10. |
| * Ideally we would do the same for L2's factor but that one is |
| * referenced by both main() and l1_guest_code() and using a global |
| * variable does not work. |
| */ |
| srand(time(NULL)); |
| l1_scale_factor = (rand() % 9) + 2; |
| printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor); |
| printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR); |
| |
| tsc_start = rdtsc(); |
| sleep(1); |
| tsc_end = rdtsc(); |
| |
| l0_tsc_freq = tsc_end - tsc_start; |
| printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq); |
| |
| vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); |
| vcpu_alloc_vmx(vm, &vmx_pages_gva); |
| vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva); |
| |
| tsc_khz = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_TSC_KHZ, NULL); |
| TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed"); |
| |
| /* scale down L1's TSC frequency */ |
| vcpu_ioctl(vm, VCPU_ID, KVM_SET_TSC_KHZ, |
| (void *) (tsc_khz / l1_scale_factor)); |
| |
| for (;;) { |
| volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); |
| struct ucall uc; |
| |
| vcpu_run(vm, VCPU_ID); |
| TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, |
| "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", |
| run->exit_reason, |
| exit_reason_str(run->exit_reason)); |
| |
| switch (get_ucall(vm, VCPU_ID, &uc)) { |
| case UCALL_ABORT: |
| TEST_FAIL("%s", (const char *) uc.args[0]); |
| case UCALL_SYNC: |
| switch (uc.args[0]) { |
| case USLEEP: |
| sleep(uc.args[1]); |
| break; |
| case UCHECK_L1: |
| l1_tsc_freq = uc.args[1]; |
| printf("L1's TSC frequency is around: %"PRIu64 |
| "\n", l1_tsc_freq); |
| |
| compare_tsc_freq(l1_tsc_freq, |
| l0_tsc_freq / l1_scale_factor); |
| break; |
| case UCHECK_L2: |
| l2_tsc_freq = uc.args[1]; |
| printf("L2's TSC frequency is around: %"PRIu64 |
| "\n", l2_tsc_freq); |
| |
| compare_tsc_freq(l2_tsc_freq, |
| l1_tsc_freq * L2_SCALE_FACTOR); |
| break; |
| } |
| break; |
| case UCALL_DONE: |
| goto done; |
| default: |
| TEST_FAIL("Unknown ucall %lu", uc.cmd); |
| } |
| } |
| |
| done: |
| kvm_vm_free(vm); |
| return 0; |
| } |