tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 /*
  * Test for x86 KVM_SET_PMU_EVENT_FILTER.
  *
  * Copyright (C) 2022, Google LLC.
  *
  * This work is licensed under the terms of the GNU GPL, version 2.
  *
  * Verifies the expected behavior of allow lists and deny lists for
  * virtual PMU events.
  */

 #define _GNU_SOURCE /* for program_invocation_short_name */
 #include "test_util.h"
 #include "kvm_util.h"
 #include "processor.h"

 /*
  * In lieu of copying perf_event.h into tools...
  */
 #define ARCH_PERFMON_EVENTSEL_OS			(1ULL << 17)
 #define ARCH_PERFMON_EVENTSEL_ENABLE			(1ULL << 22)

 union cpuid10_eax {
 	struct {
 		unsigned int version_id:8;
 		unsigned int num_counters:8;
 		unsigned int bit_width:8;
 		unsigned int mask_length:8;
 	} split;
 	unsigned int full;
 };

 union cpuid10_ebx {
 	struct {
 		unsigned int no_unhalted_core_cycles:1;
 		unsigned int no_instructions_retired:1;
 		unsigned int no_unhalted_reference_cycles:1;
 		unsigned int no_llc_reference:1;
 		unsigned int no_llc_misses:1;
 		unsigned int no_branch_instruction_retired:1;
 		unsigned int no_branch_misses_retired:1;
 	} split;
 	unsigned int full;
 };

 /* End of stuff taken from perf_event.h. */

 /* Oddly, this isn't in perf_event.h. */
 #define ARCH_PERFMON_BRANCHES_RETIRED		5

 #define VCPU_ID 0
 #define NUM_BRANCHES 42

 /*
  * This is how the event selector and unit mask are stored in an AMD
  * core performance event-select register. Intel's format is similar,
  * but the event selector is only 8 bits.
  */
 #define EVENT(select, umask) ((select & 0xf00UL) << 24 | (select & 0xff) | \
 			      (umask & 0xff) << 8)

 /*
  * "Branch instructions retired", from the Intel SDM, volume 3,
  * "Pre-defined Architectural Performance Events."
  */

 #define INTEL_BR_RETIRED EVENT(0xc4, 0)

 /*
  * "Retired branch instructions", from Processor Programming Reference
  * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
  * Preliminary Processor Programming Reference (PPR) for AMD Family
  * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
  * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
  * B1 Processors Volume 1 of 2.
  */

 #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)

 /*
  * This event list comprises Intel's eight architectural events plus
  * AMD's "retired branch instructions" for Zen[123] (and possibly
  * other AMD CPUs).
  */
 static const uint64_t event_list[] = {
 	EVENT(0x3c, 0),
 	EVENT(0xc0, 0),
 	EVENT(0x3c, 1),
 	EVENT(0x2e, 0x4f),
 	EVENT(0x2e, 0x41),
 	EVENT(0xc4, 0),
 	EVENT(0xc5, 0),
 	EVENT(0xa4, 1),
 	AMD_ZEN_BR_RETIRED,
 };

 /*
  * If we encounter a #GP during the guest PMU sanity check, then the guest
  * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
  */
 static void guest_gp_handler(struct ex_regs *regs)
 {
 	GUEST_SYNC(0);
 }

 /*
  * Check that we can write a new value to the given MSR and read it back.
  * The caller should provide a non-empty set of bits that are safe to flip.
  *
  * Return on success. GUEST_SYNC(0) on error.
  */
 static void check_msr(uint32_t msr, uint64_t bits_to_flip)
 {
 	uint64_t v = rdmsr(msr) ^ bits_to_flip;

 	wrmsr(msr, v);
 	if (rdmsr(msr) != v)
 		GUEST_SYNC(0);

 	v ^= bits_to_flip;
 	wrmsr(msr, v);
 	if (rdmsr(msr) != v)
 		GUEST_SYNC(0);
 }

 static void intel_guest_code(void)
 {
 	check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
 	check_msr(MSR_P6_EVNTSEL0, 0xffff);
 	check_msr(MSR_IA32_PMC0, 0xffff);
 	GUEST_SYNC(1);

 	for (;;) {
 		uint64_t br0, br1;

 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 		wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
 		      ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED);
 		wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
 		br0 = rdmsr(MSR_IA32_PMC0);
 		__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
 		br1 = rdmsr(MSR_IA32_PMC0);
 		GUEST_SYNC(br1 - br0);
 	}
 }

 /*
  * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
  * this code uses the always-available, legacy K7 PMU MSRs, which alias to
  * the first four of the six extended core PMU MSRs.
  */
 static void amd_guest_code(void)
 {
 	check_msr(MSR_K7_EVNTSEL0, 0xffff);
 	check_msr(MSR_K7_PERFCTR0, 0xffff);
 	GUEST_SYNC(1);

 	for (;;) {
 		uint64_t br0, br1;

 		wrmsr(MSR_K7_EVNTSEL0, 0);
 		wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
 		      ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED);
 		br0 = rdmsr(MSR_K7_PERFCTR0);
 		__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
 		br1 = rdmsr(MSR_K7_PERFCTR0);
 		GUEST_SYNC(br1 - br0);
 	}
 }

 /*
  * Run the VM to the next GUEST_SYNC(value), and return the value passed
  * to the sync. Any other exit from the guest is fatal.
  */
 static uint64_t run_vm_to_sync(struct kvm_vm *vm)
 {
 	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
 	struct ucall uc;

 	vcpu_run(vm, VCPU_ID);
 	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
 		    "Exit_reason other than KVM_EXIT_IO: %u (%s)\n",
 		    run->exit_reason,
 		    exit_reason_str(run->exit_reason));
 	get_ucall(vm, VCPU_ID, &uc);
 	TEST_ASSERT(uc.cmd == UCALL_SYNC,
 		    "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
 	return uc.args[1];
 }

 /*
  * In a nested environment or if the vPMU is disabled, the guest PMU
  * might not work as architected (accessing the PMU MSRs may raise
  * #GP, or writes could simply be discarded). In those situations,
  * there is no point in running these tests. The guest code will perform
  * a sanity check and then GUEST_SYNC(success). In the case of failure,
  * the behavior of the guest on resumption is undefined.
  */
 static bool sanity_check_pmu(struct kvm_vm *vm)
 {
 	bool success;

 	vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
 	success = run_vm_to_sync(vm);
 	vm_install_exception_handler(vm, GP_VECTOR, NULL);

 	return success;
 }

 static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
 {
 	struct kvm_pmu_event_filter *f;
 	int size = sizeof(*f) + nevents * sizeof(f->events[0]);

 	f = malloc(size);
 	TEST_ASSERT(f, "Out of memory");
 	memset(f, 0, size);
 	f->nevents = nevents;
 	return f;
 }


 static struct kvm_pmu_event_filter *
 create_pmu_event_filter(const uint64_t event_list[],
 			int nevents, uint32_t action)
 {
 	struct kvm_pmu_event_filter *f;
 	int i;

 	f = alloc_pmu_event_filter(nevents);
 	f->action = action;
 	for (i = 0; i < nevents; i++)
 		f->events[i] = event_list[i];

 	return f;
 }

 static struct kvm_pmu_event_filter *event_filter(uint32_t action)
 {
 	return create_pmu_event_filter(event_list,
 				       ARRAY_SIZE(event_list),
 				       action);
 }

 /*
  * Remove the first occurrence of 'event' (if any) from the filter's
  * event list.
  */
 static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f,
 						 uint64_t event)
 {
 	bool found = false;
 	int i;

 	for (i = 0; i < f->nevents; i++) {
 		if (found)
 			f->events[i - 1] = f->events[i];
 		else
 			found = f->events[i] == event;
 	}
 	if (found)
 		f->nevents--;
 	return f;
 }

 static void test_without_filter(struct kvm_vm *vm)
 {
 	uint64_t count = run_vm_to_sync(vm);

 	if (count != NUM_BRANCHES)
 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
 			__func__, count, NUM_BRANCHES);
 	TEST_ASSERT(count, "Allowed PMU event is not counting");
 }

 static uint64_t test_with_filter(struct kvm_vm *vm,
 				 struct kvm_pmu_event_filter *f)
 {
 	vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f);
 	return run_vm_to_sync(vm);
 }

 static void test_amd_deny_list(struct kvm_vm *vm)
 {
 	uint64_t event = EVENT(0x1C2, 0);
 	struct kvm_pmu_event_filter *f;
 	uint64_t count;

 	f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY);
 	count = test_with_filter(vm, f);

 	free(f);
 	if (count != NUM_BRANCHES)
 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
 			__func__, count, NUM_BRANCHES);
 	TEST_ASSERT(count, "Allowed PMU event is not counting");
 }

 static void test_member_deny_list(struct kvm_vm *vm)
 {
 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
 	uint64_t count = test_with_filter(vm, f);

 	free(f);
 	if (count)
 		pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
 			__func__, count);
 	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
 }

 static void test_member_allow_list(struct kvm_vm *vm)
 {
 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
 	uint64_t count = test_with_filter(vm, f);

 	free(f);
 	if (count != NUM_BRANCHES)
 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
 			__func__, count, NUM_BRANCHES);
 	TEST_ASSERT(count, "Allowed PMU event is not counting");
 }

 static void test_not_member_deny_list(struct kvm_vm *vm)
 {
 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
 	uint64_t count;

 	remove_event(f, INTEL_BR_RETIRED);
 	remove_event(f, AMD_ZEN_BR_RETIRED);
 	count = test_with_filter(vm, f);
 	free(f);
 	if (count != NUM_BRANCHES)
 		pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
 			__func__, count, NUM_BRANCHES);
 	TEST_ASSERT(count, "Allowed PMU event is not counting");
 }

 static void test_not_member_allow_list(struct kvm_vm *vm)
 {
 	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
 	uint64_t count;

 	remove_event(f, INTEL_BR_RETIRED);
 	remove_event(f, AMD_ZEN_BR_RETIRED);
 	count = test_with_filter(vm, f);
 	free(f);
 	if (count)
 		pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
 			__func__, count);
 	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
 }

 /*
  * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
  *
  * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
  */
 static void test_pmu_config_disable(void (*guest_code)(void))
 {
 	int r;
 	struct kvm_vm *vm;
 	struct kvm_enable_cap cap = { 0 };

 	r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
 	if (!(r & KVM_PMU_CAP_DISABLE))
 		return;

 	vm = vm_create_without_vcpus(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES);

 	cap.cap = KVM_CAP_PMU_CAPABILITY;
 	cap.args[0] = KVM_PMU_CAP_DISABLE;
 	TEST_ASSERT(!vm_enable_cap(vm, &cap), "Failed to set KVM_PMU_CAP_DISABLE.");

 	vm_vcpu_add_default(vm, VCPU_ID, guest_code);
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vm, VCPU_ID);

 	TEST_ASSERT(!sanity_check_pmu(vm),
 		    "Guest should not be able to use disabled PMU.");

 	kvm_vm_free(vm);
 }

 /*
  * Check for a non-zero PMU version, at least one general-purpose
  * counter per logical processor, an EBX bit vector of length greater
  * than 5, and EBX[5] clear.
  */
 static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry)
 {
 	union cpuid10_eax eax = { .full = entry->eax };
 	union cpuid10_ebx ebx = { .full = entry->ebx };

 	return eax.split.version_id && eax.split.num_counters > 0 &&
 		eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED &&
 		!ebx.split.no_branch_instruction_retired;
 }

 /*
  * Note that CPUID leaf 0xa is Intel-specific. This leaf should be
  * clear on AMD hardware.
  */
 static bool use_intel_pmu(void)
 {
 	struct kvm_cpuid_entry2 *entry;

 	entry = kvm_get_supported_cpuid_index(0xa, 0);
 	return is_intel_cpu() && entry && check_intel_pmu_leaf(entry);
 }

 static bool is_zen1(uint32_t eax)
 {
 	return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f;
 }

 static bool is_zen2(uint32_t eax)
 {
 	return x86_family(eax) == 0x17 &&
 		x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f;
 }

 static bool is_zen3(uint32_t eax)
 {
 	return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f;
 }

 /*
  * Determining AMD support for a PMU event requires consulting the AMD
  * PPR for the CPU or reference material derived therefrom. The AMD
  * test code herein has been verified to work on Zen1, Zen2, and Zen3.
  *
  * Feel free to add more AMD CPUs that are documented to support event
  * select 0xc2 umask 0 as "retired branch instructions."
  */
 static bool use_amd_pmu(void)
 {
 	struct kvm_cpuid_entry2 *entry;

 	entry = kvm_get_supported_cpuid_index(1, 0);
 	return is_amd_cpu() && entry &&
 		(is_zen1(entry->eax) ||
 		 is_zen2(entry->eax) ||
 		 is_zen3(entry->eax));
 }

 int main(int argc, char *argv[])
 {
 	void (*guest_code)(void) = NULL;
 	struct kvm_vm *vm;
 	int r;

 	/* Tell stdout not to buffer its content */
 	setbuf(stdout, NULL);

 	r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER);
 	if (!r) {
 		print_skip("KVM_CAP_PMU_EVENT_FILTER not supported");
 		exit(KSFT_SKIP);
 	}

 	if (use_intel_pmu())
 		guest_code = intel_guest_code;
 	else if (use_amd_pmu())
 		guest_code = amd_guest_code;

 	if (!guest_code) {
 		print_skip("Don't know how to test this guest PMU");
 		exit(KSFT_SKIP);
 	}

 	vm = vm_create_default(VCPU_ID, 0, guest_code);

 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vm, VCPU_ID);

 	if (!sanity_check_pmu(vm)) {
 		print_skip("Guest PMU is not functional");
 		exit(KSFT_SKIP);
 	}

 	if (use_amd_pmu())
 		test_amd_deny_list(vm);

 	test_without_filter(vm);
 	test_member_deny_list(vm);
 	test_member_allow_list(vm);
 	test_not_member_deny_list(vm);
 	test_not_member_allow_list(vm);

 	kvm_vm_free(vm);

 	test_pmu_config_disable(guest_code);

 	return 0;
 }
	// SPDX-License-Identifier: GPL-2.0
	/*
	* Test for x86 KVM_SET_PMU_EVENT_FILTER.
	*
	* Copyright (C) 2022, Google LLC.
	*
	* This work is licensed under the terms of the GNU GPL, version 2.
	*
	* Verifies the expected behavior of allow lists and deny lists for
	* virtual PMU events.
	*/

	#define _GNU_SOURCE /* for program_invocation_short_name */
	#include "test_util.h"
	#include "kvm_util.h"
	#include "processor.h"

	/*
	* In lieu of copying perf_event.h into tools...
	*/
	#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17)
	#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22)

	union cpuid10_eax {
	struct {
	unsigned int version_id:8;
	unsigned int num_counters:8;
	unsigned int bit_width:8;
	unsigned int mask_length:8;
	} split;
	unsigned int full;
	};

	union cpuid10_ebx {
	struct {
	unsigned int no_unhalted_core_cycles:1;
	unsigned int no_instructions_retired:1;
	unsigned int no_unhalted_reference_cycles:1;
	unsigned int no_llc_reference:1;
	unsigned int no_llc_misses:1;
	unsigned int no_branch_instruction_retired:1;
	unsigned int no_branch_misses_retired:1;
	} split;
	unsigned int full;
	};

	/* End of stuff taken from perf_event.h. */

	/* Oddly, this isn't in perf_event.h. */
	#define ARCH_PERFMON_BRANCHES_RETIRED 5

	#define VCPU_ID 0
	#define NUM_BRANCHES 42

	/*
	* This is how the event selector and unit mask are stored in an AMD
	* core performance event-select register. Intel's format is similar,
	* but the event selector is only 8 bits.
	*/
	#define EVENT(select, umask) ((select & 0xf00UL) << 24 \| (select & 0xff) \| \
	(umask & 0xff) << 8)

	/*
	* "Branch instructions retired", from the Intel SDM, volume 3,
	* "Pre-defined Architectural Performance Events."
	*/

	#define INTEL_BR_RETIRED EVENT(0xc4, 0)

	/*
	* "Retired branch instructions", from Processor Programming Reference
	* (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
	* Preliminary Processor Programming Reference (PPR) for AMD Family
	* 17h Model 31h, Revision B0 Processors, and Preliminary Processor
	* Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
	* B1 Processors Volume 1 of 2.
	*/

	#define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0)

	/*
	* This event list comprises Intel's eight architectural events plus
	* AMD's "retired branch instructions" for Zen[123] (and possibly
	* other AMD CPUs).
	*/
	static const uint64_t event_list[] = {
	EVENT(0x3c, 0),
	EVENT(0xc0, 0),
	EVENT(0x3c, 1),
	EVENT(0x2e, 0x4f),
	EVENT(0x2e, 0x41),
	EVENT(0xc4, 0),
	EVENT(0xc5, 0),
	EVENT(0xa4, 1),
	AMD_ZEN_BR_RETIRED,
	};

	/*
	* If we encounter a #GP during the guest PMU sanity check, then the guest
	* PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
	*/
	static void guest_gp_handler(struct ex_regs *regs)
	{
	GUEST_SYNC(0);
	}

	/*
	* Check that we can write a new value to the given MSR and read it back.
	* The caller should provide a non-empty set of bits that are safe to flip.
	*
	* Return on success. GUEST_SYNC(0) on error.
	*/
	static void check_msr(uint32_t msr, uint64_t bits_to_flip)
	{
	uint64_t v = rdmsr(msr) ^ bits_to_flip;

	wrmsr(msr, v);
	if (rdmsr(msr) != v)
	GUEST_SYNC(0);

	v ^= bits_to_flip;
	wrmsr(msr, v);
	if (rdmsr(msr) != v)
	GUEST_SYNC(0);
	}

	static void intel_guest_code(void)
	{
	check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
	check_msr(MSR_P6_EVNTSEL0, 0xffff);
	check_msr(MSR_IA32_PMC0, 0xffff);
	GUEST_SYNC(1);

	for (;;) {
	uint64_t br0, br1;

	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
	wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE \|
	ARCH_PERFMON_EVENTSEL_OS \| INTEL_BR_RETIRED);
	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
	br0 = rdmsr(MSR_IA32_PMC0);
	__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
	br1 = rdmsr(MSR_IA32_PMC0);
	GUEST_SYNC(br1 - br0);
	}
	}

	/*
	* To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
	* this code uses the always-available, legacy K7 PMU MSRs, which alias to
	* the first four of the six extended core PMU MSRs.
	*/
	static void amd_guest_code(void)
	{
	check_msr(MSR_K7_EVNTSEL0, 0xffff);
	check_msr(MSR_K7_PERFCTR0, 0xffff);
	GUEST_SYNC(1);

	for (;;) {
	uint64_t br0, br1;

	wrmsr(MSR_K7_EVNTSEL0, 0);
	wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE \|
	ARCH_PERFMON_EVENTSEL_OS \| AMD_ZEN_BR_RETIRED);
	br0 = rdmsr(MSR_K7_PERFCTR0);
	__asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
	br1 = rdmsr(MSR_K7_PERFCTR0);
	GUEST_SYNC(br1 - br0);
	}
	}

	/*
	* Run the VM to the next GUEST_SYNC(value), and return the value passed
	* to the sync. Any other exit from the guest is fatal.
	*/
	static uint64_t run_vm_to_sync(struct kvm_vm *vm)
	{
	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
	struct ucall uc;

	vcpu_run(vm, VCPU_ID);
	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
	"Exit_reason other than KVM_EXIT_IO: %u (%s)\n",
	run->exit_reason,
	exit_reason_str(run->exit_reason));
	get_ucall(vm, VCPU_ID, &uc);
	TEST_ASSERT(uc.cmd == UCALL_SYNC,
	"Received ucall other than UCALL_SYNC: %lu", uc.cmd);
	return uc.args[1];
	}

	/*
	* In a nested environment or if the vPMU is disabled, the guest PMU
	* might not work as architected (accessing the PMU MSRs may raise
	* #GP, or writes could simply be discarded). In those situations,
	* there is no point in running these tests. The guest code will perform
	* a sanity check and then GUEST_SYNC(success). In the case of failure,
	* the behavior of the guest on resumption is undefined.
	*/
	static bool sanity_check_pmu(struct kvm_vm *vm)
	{
	bool success;

	vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
	success = run_vm_to_sync(vm);
	vm_install_exception_handler(vm, GP_VECTOR, NULL);

	return success;
	}

	static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents)
	{
	struct kvm_pmu_event_filter *f;
	int size = sizeof(f) + nevents sizeof(f->events[0]);

	f = malloc(size);
	TEST_ASSERT(f, "Out of memory");
	memset(f, 0, size);
	f->nevents = nevents;
	return f;
	}


	static struct kvm_pmu_event_filter *
	create_pmu_event_filter(const uint64_t event_list[],
	int nevents, uint32_t action)
	{
	struct kvm_pmu_event_filter *f;
	int i;

	f = alloc_pmu_event_filter(nevents);
	f->action = action;
	for (i = 0; i < nevents; i++)
	f->events[i] = event_list[i];

	return f;
	}

	static struct kvm_pmu_event_filter *event_filter(uint32_t action)
	{
	return create_pmu_event_filter(event_list,
	ARRAY_SIZE(event_list),
	action);
	}

	/*
	* Remove the first occurrence of 'event' (if any) from the filter's
	* event list.
	*/
	static struct kvm_pmu_event_filter remove_event(struct kvm_pmu_event_filter f,
	uint64_t event)
	{
	bool found = false;
	int i;

	for (i = 0; i < f->nevents; i++) {
	if (found)
	f->events[i - 1] = f->events[i];
	else
	found = f->events[i] == event;
	}
	if (found)
	f->nevents--;
	return f;
	}

	static void test_without_filter(struct kvm_vm *vm)
	{
	uint64_t count = run_vm_to_sync(vm);

	if (count != NUM_BRANCHES)
	pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
	__func__, count, NUM_BRANCHES);
	TEST_ASSERT(count, "Allowed PMU event is not counting");
	}

	static uint64_t test_with_filter(struct kvm_vm *vm,
	struct kvm_pmu_event_filter *f)
	{
	vm_ioctl(vm, KVM_SET_PMU_EVENT_FILTER, (void *)f);
	return run_vm_to_sync(vm);
	}

	static void test_amd_deny_list(struct kvm_vm *vm)
	{
	uint64_t event = EVENT(0x1C2, 0);
	struct kvm_pmu_event_filter *f;
	uint64_t count;

	f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY);
	count = test_with_filter(vm, f);

	free(f);
	if (count != NUM_BRANCHES)
	pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
	__func__, count, NUM_BRANCHES);
	TEST_ASSERT(count, "Allowed PMU event is not counting");
	}

	static void test_member_deny_list(struct kvm_vm *vm)
	{
	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
	uint64_t count = test_with_filter(vm, f);

	free(f);
	if (count)
	pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
	__func__, count);
	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
	}

	static void test_member_allow_list(struct kvm_vm *vm)
	{
	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
	uint64_t count = test_with_filter(vm, f);

	free(f);
	if (count != NUM_BRANCHES)
	pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
	__func__, count, NUM_BRANCHES);
	TEST_ASSERT(count, "Allowed PMU event is not counting");
	}

	static void test_not_member_deny_list(struct kvm_vm *vm)
	{
	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY);
	uint64_t count;

	remove_event(f, INTEL_BR_RETIRED);
	remove_event(f, AMD_ZEN_BR_RETIRED);
	count = test_with_filter(vm, f);
	free(f);
	if (count != NUM_BRANCHES)
	pr_info("%s: Branch instructions retired = %lu (expected %u)\n",
	__func__, count, NUM_BRANCHES);
	TEST_ASSERT(count, "Allowed PMU event is not counting");
	}

	static void test_not_member_allow_list(struct kvm_vm *vm)
	{
	struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW);
	uint64_t count;

	remove_event(f, INTEL_BR_RETIRED);
	remove_event(f, AMD_ZEN_BR_RETIRED);
	count = test_with_filter(vm, f);
	free(f);
	if (count)
	pr_info("%s: Branch instructions retired = %lu (expected 0)\n",
	__func__, count);
	TEST_ASSERT(!count, "Disallowed PMU Event is counting");
	}

	/*
	* Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
	*
	* Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
	*/
	static void test_pmu_config_disable(void (*guest_code)(void))
	{
	int r;
	struct kvm_vm *vm;
	struct kvm_enable_cap cap = { 0 };

	r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
	if (!(r & KVM_PMU_CAP_DISABLE))
	return;

	vm = vm_create_without_vcpus(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES);

	cap.cap = KVM_CAP_PMU_CAPABILITY;
	cap.args[0] = KVM_PMU_CAP_DISABLE;
	TEST_ASSERT(!vm_enable_cap(vm, &cap), "Failed to set KVM_PMU_CAP_DISABLE.");

	vm_vcpu_add_default(vm, VCPU_ID, guest_code);
	vm_init_descriptor_tables(vm);
	vcpu_init_descriptor_tables(vm, VCPU_ID);

	TEST_ASSERT(!sanity_check_pmu(vm),
	"Guest should not be able to use disabled PMU.");

	kvm_vm_free(vm);
	}

	/*
	* Check for a non-zero PMU version, at least one general-purpose
	* counter per logical processor, an EBX bit vector of length greater
	* than 5, and EBX[5] clear.
	*/
	static bool check_intel_pmu_leaf(struct kvm_cpuid_entry2 *entry)
	{
	union cpuid10_eax eax = { .full = entry->eax };
	union cpuid10_ebx ebx = { .full = entry->ebx };

	return eax.split.version_id && eax.split.num_counters > 0 &&
	eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED &&
	!ebx.split.no_branch_instruction_retired;
	}

	/*
	* Note that CPUID leaf 0xa is Intel-specific. This leaf should be
	* clear on AMD hardware.
	*/
	static bool use_intel_pmu(void)
	{
	struct kvm_cpuid_entry2 *entry;

	entry = kvm_get_supported_cpuid_index(0xa, 0);
	return is_intel_cpu() && entry && check_intel_pmu_leaf(entry);
	}

	static bool is_zen1(uint32_t eax)
	{
	return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f;
	}

	static bool is_zen2(uint32_t eax)
	{
	return x86_family(eax) == 0x17 &&
	x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f;
	}

	static bool is_zen3(uint32_t eax)
	{
	return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f;
	}

	/*
	* Determining AMD support for a PMU event requires consulting the AMD
	* PPR for the CPU or reference material derived therefrom. The AMD
	* test code herein has been verified to work on Zen1, Zen2, and Zen3.
	*
	* Feel free to add more AMD CPUs that are documented to support event
	* select 0xc2 umask 0 as "retired branch instructions."
	*/
	static bool use_amd_pmu(void)
	{
	struct kvm_cpuid_entry2 *entry;

	entry = kvm_get_supported_cpuid_index(1, 0);
	return is_amd_cpu() && entry &&
	(is_zen1(entry->eax) \|\|
	is_zen2(entry->eax) \|\|
	is_zen3(entry->eax));
	}

	int main(int argc, char *argv[])
	{
	void (*guest_code)(void) = NULL;
	struct kvm_vm *vm;
	int r;

	/* Tell stdout not to buffer its content */
	setbuf(stdout, NULL);

	r = kvm_check_cap(KVM_CAP_PMU_EVENT_FILTER);
	if (!r) {
	print_skip("KVM_CAP_PMU_EVENT_FILTER not supported");
	exit(KSFT_SKIP);
	}

	if (use_intel_pmu())
	guest_code = intel_guest_code;
	else if (use_amd_pmu())
	guest_code = amd_guest_code;

	if (!guest_code) {
	print_skip("Don't know how to test this guest PMU");
	exit(KSFT_SKIP);
	}

	vm = vm_create_default(VCPU_ID, 0, guest_code);

	vm_init_descriptor_tables(vm);
	vcpu_init_descriptor_tables(vm, VCPU_ID);

	if (!sanity_check_pmu(vm)) {
	print_skip("Guest PMU is not functional");
	exit(KSFT_SKIP);
	}

	if (use_amd_pmu())
	test_amd_deny_list(vm);

	test_without_filter(vm);
	test_member_deny_list(vm);
	test_member_allow_list(vm);
	test_not_member_deny_list(vm);
	test_not_member_allow_list(vm);

	kvm_vm_free(vm);

	test_pmu_config_disable(guest_code);

	return 0;
	}