tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 /*
  * vgic_lpi_stress - Stress test for KVM's ITS emulation
  *
  * Copyright (c) 2024 Google LLC
  */

 #include <linux/sizes.h>
 #include <pthread.h>
 #include <stdatomic.h>
 #include <sys/sysinfo.h>

 #include "kvm_util.h"
 #include "gic.h"
 #include "gic_v3.h"
 #include "gic_v3_its.h"
 #include "processor.h"
 #include "ucall.h"
 #include "vgic.h"

 #define TEST_MEMSLOT_INDEX	1

 #define GIC_LPI_OFFSET	8192

 static size_t nr_iterations = 1000;
 static vm_paddr_t gpa_base;

 static struct kvm_vm *vm;
 static struct kvm_vcpu **vcpus;
 static int gic_fd, its_fd;

 static struct test_data {
 	bool		request_vcpus_stop;
 	u32		nr_cpus;
 	u32		nr_devices;
 	u32		nr_event_ids;

 	vm_paddr_t	device_table;
 	vm_paddr_t	collection_table;
 	vm_paddr_t	cmdq_base;
 	void		*cmdq_base_va;
 	vm_paddr_t	itt_tables;

 	vm_paddr_t	lpi_prop_table;
 	vm_paddr_t	lpi_pend_tables;
 } test_data =  {
 	.nr_cpus	= 1,
 	.nr_devices	= 1,
 	.nr_event_ids	= 16,
 };

 static void guest_irq_handler(struct ex_regs *regs)
 {
 	u32 intid = gic_get_and_ack_irq();

 	if (intid == IAR_SPURIOUS)
 		return;

 	GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
 	gic_set_eoi(intid);
 }

 static void guest_setup_its_mappings(void)
 {
 	u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
 	u32 nr_events = test_data.nr_event_ids;
 	u32 nr_devices = test_data.nr_devices;
 	u32 nr_cpus = test_data.nr_cpus;

 	for (coll_id = 0; coll_id < nr_cpus; coll_id++)
 		its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);

 	/* Round-robin the LPIs to all of the vCPUs in the VM */
 	coll_id = 0;
 	for (device_id = 0; device_id < nr_devices; device_id++) {
 		vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);

 		its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
 				  itt_base, SZ_64K, true);

 		for (event_id = 0; event_id < nr_events; event_id++) {
 			its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
 					   event_id, coll_id, intid++);

 			coll_id = (coll_id + 1) % test_data.nr_cpus;
 		}
 	}
 }

 static void guest_invalidate_all_rdists(void)
 {
 	int i;

 	for (i = 0; i < test_data.nr_cpus; i++)
 		its_send_invall_cmd(test_data.cmdq_base_va, i);
 }

 static void guest_setup_gic(void)
 {
 	static atomic_int nr_cpus_ready = 0;
 	u32 cpuid = guest_get_vcpuid();

 	gic_init(GIC_V3, test_data.nr_cpus);
 	gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
 			      test_data.lpi_pend_tables + (cpuid * SZ_64K));

 	atomic_fetch_add(&nr_cpus_ready, 1);

 	if (cpuid > 0)
 		return;

 	while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
 		cpu_relax();

 	its_init(test_data.collection_table, SZ_64K,
 		 test_data.device_table, SZ_64K,
 		 test_data.cmdq_base, SZ_64K);

 	guest_setup_its_mappings();
 	guest_invalidate_all_rdists();
 }

 static void guest_code(size_t nr_lpis)
 {
 	guest_setup_gic();

 	GUEST_SYNC(0);

 	/*
 	 * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
 	 * never getting the stop signal.
 	 */
 	while (!READ_ONCE(test_data.request_vcpus_stop))
 		cpu_relax();

 	GUEST_DONE();
 }

 static void setup_memslot(void)
 {
 	size_t pages;
 	size_t sz;

 	/*
 	 * For the ITS:
 	 *  - A single level device table
 	 *  - A single level collection table
 	 *  - The command queue
 	 *  - An ITT for each device
 	 */
 	sz = (3 + test_data.nr_devices) * SZ_64K;

 	/*
 	 * For the redistributors:
 	 *  - A shared LPI configuration table
 	 *  - An LPI pending table for each vCPU
 	 */
 	sz += (1 + test_data.nr_cpus) * SZ_64K;

 	pages = sz / vm->page_size;
 	gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
 				    TEST_MEMSLOT_INDEX, pages, 0);
 }

 #define LPI_PROP_DEFAULT_PRIO	0xa0

 static void configure_lpis(void)
 {
 	size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
 	u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
 	size_t i;

 	for (i = 0; i < nr_lpis; i++) {
 		tbl[i] = LPI_PROP_DEFAULT_PRIO |
 			 LPI_PROP_GROUP1 |
 			 LPI_PROP_ENABLED;
 	}
 }

 static void setup_test_data(void)
 {
 	size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
 	u32 nr_devices = test_data.nr_devices;
 	u32 nr_cpus = test_data.nr_cpus;
 	vm_paddr_t cmdq_base;

 	test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
 						    gpa_base,
 						    TEST_MEMSLOT_INDEX);

 	test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
 							gpa_base,
 							TEST_MEMSLOT_INDEX);

 	cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
 				       TEST_MEMSLOT_INDEX);
 	virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
 	test_data.cmdq_base = cmdq_base;
 	test_data.cmdq_base_va = (void *)cmdq_base;

 	test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
 						  gpa_base, TEST_MEMSLOT_INDEX);

 	test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
 						      gpa_base, TEST_MEMSLOT_INDEX);
 	configure_lpis();

 	test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
 						       gpa_base, TEST_MEMSLOT_INDEX);

 	sync_global_to_guest(vm, test_data);
 }

 static void setup_gic(void)
 {
 	gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
 	__TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");

 	its_fd = vgic_its_setup(vm);
 }

 static void signal_lpi(u32 device_id, u32 event_id)
 {
 	vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;

 	struct kvm_msi msi = {
 		.address_lo	= db_addr,
 		.address_hi	= db_addr >> 32,
 		.data		= event_id,
 		.devid		= device_id,
 		.flags		= KVM_MSI_VALID_DEVID,
 	};

 	/*
 	 * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
 	 * which for arm64 implies having a valid translation in the ITS.
 	 */
 	TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
 		    "KVM_SIGNAL_MSI ioctl failed");
 }

 static pthread_barrier_t test_setup_barrier;

 static void *lpi_worker_thread(void *data)
 {
 	u32 device_id = (size_t)data;
 	u32 event_id;
 	size_t i;

 	pthread_barrier_wait(&test_setup_barrier);

 	for (i = 0; i < nr_iterations; i++)
 		for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
 			signal_lpi(device_id, event_id);

 	return NULL;
 }

 static void *vcpu_worker_thread(void *data)
 {
 	struct kvm_vcpu *vcpu = data;
 	struct ucall uc;

 	while (true) {
 		vcpu_run(vcpu);

 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_SYNC:
 			pthread_barrier_wait(&test_setup_barrier);
 			continue;
 		case UCALL_DONE:
 			return NULL;
 		case UCALL_ABORT:
 			REPORT_GUEST_ASSERT(uc);
 			break;
 		default:
 			TEST_FAIL("Unknown ucall: %lu", uc.cmd);
 		}
 	}

 	return NULL;
 }

 static void report_stats(struct timespec delta)
 {
 	double nr_lpis;
 	double time;

 	nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;

 	time = delta.tv_sec;
 	time += ((double)delta.tv_nsec) / NSEC_PER_SEC;

 	pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
 }

 static void run_test(void)
 {
 	u32 nr_devices = test_data.nr_devices;
 	u32 nr_vcpus = test_data.nr_cpus;
 	pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
 	pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
 	struct timespec start, delta;
 	size_t i;

 	TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");

 	pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);

 	for (i = 0; i < nr_vcpus; i++)
 		pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);

 	for (i = 0; i < nr_devices; i++)
 		pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);

 	pthread_barrier_wait(&test_setup_barrier);

 	clock_gettime(CLOCK_MONOTONIC, &start);

 	for (i = 0; i < nr_devices; i++)
 		pthread_join(lpi_threads[i], NULL);

 	delta = timespec_elapsed(start);
 	write_guest_global(vm, test_data.request_vcpus_stop, true);

 	for (i = 0; i < nr_vcpus; i++)
 		pthread_join(vcpu_threads[i], NULL);

 	report_stats(delta);
 }

 static void setup_vm(void)
 {
 	int i;

 	vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
 	TEST_ASSERT(vcpus, "Failed to allocate vCPU array");

 	vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);

 	vm_init_descriptor_tables(vm);
 	for (i = 0; i < test_data.nr_cpus; i++)
 		vcpu_init_descriptor_tables(vcpus[i]);

 	vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);

 	setup_memslot();

 	setup_gic();

 	setup_test_data();
 }

 static void destroy_vm(void)
 {
 	close(its_fd);
 	close(gic_fd);
 	kvm_vm_free(vm);
 	free(vcpus);
 }

 static void pr_usage(const char *name)
 {
 	pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
 	pr_info("  -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
 	pr_info("  -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
 	pr_info("  -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
 	pr_info("  -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
 }

 int main(int argc, char **argv)
 {
 	u32 nr_threads;
 	int c;

 	while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
 		switch (c) {
 		case 'v':
 			test_data.nr_cpus = atoi(optarg);
 			break;
 		case 'd':
 			test_data.nr_devices = atoi(optarg);
 			break;
 		case 'e':
 			test_data.nr_event_ids = atoi(optarg);
 			break;
 		case 'i':
 			nr_iterations = strtoul(optarg, NULL, 0);
 			break;
 		case 'h':
 		default:
 			pr_usage(argv[0]);
 			return 1;
 		}
 	}

 	nr_threads = test_data.nr_cpus + test_data.nr_devices;
 	if (nr_threads > get_nprocs())
 		pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
 			 nr_threads, get_nprocs());

 	setup_vm();

 	run_test();

 	destroy_vm();

 	return 0;
 }
	// SPDX-License-Identifier: GPL-2.0
	/*
	* vgic_lpi_stress - Stress test for KVM's ITS emulation
	*
	* Copyright (c) 2024 Google LLC
	*/

	#include <linux/sizes.h>
	#include <pthread.h>
	#include <stdatomic.h>
	#include <sys/sysinfo.h>

	#include "kvm_util.h"
	#include "gic.h"
	#include "gic_v3.h"
	#include "gic_v3_its.h"
	#include "processor.h"
	#include "ucall.h"
	#include "vgic.h"

	#define TEST_MEMSLOT_INDEX 1

	#define GIC_LPI_OFFSET 8192

	static size_t nr_iterations = 1000;
	static vm_paddr_t gpa_base;

	static struct kvm_vm *vm;
	static struct kvm_vcpu **vcpus;
	static int gic_fd, its_fd;

	static struct test_data {
	bool request_vcpus_stop;
	u32 nr_cpus;
	u32 nr_devices;
	u32 nr_event_ids;

	vm_paddr_t device_table;
	vm_paddr_t collection_table;
	vm_paddr_t cmdq_base;
	void *cmdq_base_va;
	vm_paddr_t itt_tables;

	vm_paddr_t lpi_prop_table;
	vm_paddr_t lpi_pend_tables;
	} test_data = {
	.nr_cpus = 1,
	.nr_devices = 1,
	.nr_event_ids = 16,
	};

	static void guest_irq_handler(struct ex_regs *regs)
	{
	u32 intid = gic_get_and_ack_irq();

	if (intid == IAR_SPURIOUS)
	return;

	GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
	gic_set_eoi(intid);
	}

	static void guest_setup_its_mappings(void)
	{
	u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
	u32 nr_events = test_data.nr_event_ids;
	u32 nr_devices = test_data.nr_devices;
	u32 nr_cpus = test_data.nr_cpus;

	for (coll_id = 0; coll_id < nr_cpus; coll_id++)
	its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);

	/* Round-robin the LPIs to all of the vCPUs in the VM */
	coll_id = 0;
	for (device_id = 0; device_id < nr_devices; device_id++) {
	vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);

	its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
	itt_base, SZ_64K, true);

	for (event_id = 0; event_id < nr_events; event_id++) {
	its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
	event_id, coll_id, intid++);

	coll_id = (coll_id + 1) % test_data.nr_cpus;
	}
	}
	}

	static void guest_invalidate_all_rdists(void)
	{
	int i;

	for (i = 0; i < test_data.nr_cpus; i++)
	its_send_invall_cmd(test_data.cmdq_base_va, i);
	}

	static void guest_setup_gic(void)
	{
	static atomic_int nr_cpus_ready = 0;
	u32 cpuid = guest_get_vcpuid();

	gic_init(GIC_V3, test_data.nr_cpus);
	gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
	test_data.lpi_pend_tables + (cpuid * SZ_64K));

	atomic_fetch_add(&nr_cpus_ready, 1);

	if (cpuid > 0)
	return;

	while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
	cpu_relax();

	its_init(test_data.collection_table, SZ_64K,
	test_data.device_table, SZ_64K,
	test_data.cmdq_base, SZ_64K);

	guest_setup_its_mappings();
	guest_invalidate_all_rdists();
	}

	static void guest_code(size_t nr_lpis)
	{
	guest_setup_gic();

	GUEST_SYNC(0);

	/*
	* Don't use WFI here to avoid blocking the vCPU thread indefinitely and
	* never getting the stop signal.
	*/
	while (!READ_ONCE(test_data.request_vcpus_stop))
	cpu_relax();

	GUEST_DONE();
	}

	static void setup_memslot(void)
	{
	size_t pages;
	size_t sz;

	/*
	* For the ITS:
	* - A single level device table
	* - A single level collection table
	* - The command queue
	* - An ITT for each device
	*/
	sz = (3 + test_data.nr_devices) * SZ_64K;

	/*
	* For the redistributors:
	* - A shared LPI configuration table
	* - An LPI pending table for each vCPU
	*/
	sz += (1 + test_data.nr_cpus) * SZ_64K;

	pages = sz / vm->page_size;
	gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
	TEST_MEMSLOT_INDEX, pages, 0);
	}

	#define LPI_PROP_DEFAULT_PRIO 0xa0

	static void configure_lpis(void)
	{
	size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
	u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
	size_t i;

	for (i = 0; i < nr_lpis; i++) {
	tbl[i] = LPI_PROP_DEFAULT_PRIO \|
	LPI_PROP_GROUP1 \|
	LPI_PROP_ENABLED;
	}
	}

	static void setup_test_data(void)
	{
	size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
	u32 nr_devices = test_data.nr_devices;
	u32 nr_cpus = test_data.nr_cpus;
	vm_paddr_t cmdq_base;

	test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
	gpa_base,
	TEST_MEMSLOT_INDEX);

	test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
	gpa_base,
	TEST_MEMSLOT_INDEX);

	cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
	TEST_MEMSLOT_INDEX);
	virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
	test_data.cmdq_base = cmdq_base;
	test_data.cmdq_base_va = (void *)cmdq_base;

	test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
	gpa_base, TEST_MEMSLOT_INDEX);

	test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
	gpa_base, TEST_MEMSLOT_INDEX);
	configure_lpis();

	test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
	gpa_base, TEST_MEMSLOT_INDEX);

	sync_global_to_guest(vm, test_data);
	}

	static void setup_gic(void)
	{
	gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
	__TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");

	its_fd = vgic_its_setup(vm);
	}

	static void signal_lpi(u32 device_id, u32 event_id)
	{
	vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;

	struct kvm_msi msi = {
	.address_lo = db_addr,
	.address_hi = db_addr >> 32,
	.data = event_id,
	.devid = device_id,
	.flags = KVM_MSI_VALID_DEVID,
	};

	/*
	* KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
	* which for arm64 implies having a valid translation in the ITS.
	*/
	TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
	"KVM_SIGNAL_MSI ioctl failed");
	}

	static pthread_barrier_t test_setup_barrier;

	static void lpi_worker_thread(void data)
	{
	u32 device_id = (size_t)data;
	u32 event_id;
	size_t i;

	pthread_barrier_wait(&test_setup_barrier);

	for (i = 0; i < nr_iterations; i++)
	for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
	signal_lpi(device_id, event_id);

	return NULL;
	}

	static void vcpu_worker_thread(void data)
	{
	struct kvm_vcpu *vcpu = data;
	struct ucall uc;

	while (true) {
	vcpu_run(vcpu);

	switch (get_ucall(vcpu, &uc)) {
	case UCALL_SYNC:
	pthread_barrier_wait(&test_setup_barrier);
	continue;
	case UCALL_DONE:
	return NULL;
	case UCALL_ABORT:
	REPORT_GUEST_ASSERT(uc);
	break;
	default:
	TEST_FAIL("Unknown ucall: %lu", uc.cmd);
	}
	}

	return NULL;
	}

	static void report_stats(struct timespec delta)
	{
	double nr_lpis;
	double time;

	nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;

	time = delta.tv_sec;
	time += ((double)delta.tv_nsec) / NSEC_PER_SEC;

	pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
	}

	static void run_test(void)
	{
	u32 nr_devices = test_data.nr_devices;
	u32 nr_vcpus = test_data.nr_cpus;
	pthread_t lpi_threads = malloc(nr_devices sizeof(pthread_t));
	pthread_t vcpu_threads = malloc(nr_vcpus sizeof(pthread_t));
	struct timespec start, delta;
	size_t i;

	TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");

	pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);

	for (i = 0; i < nr_vcpus; i++)
	pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);

	for (i = 0; i < nr_devices; i++)
	pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);

	pthread_barrier_wait(&test_setup_barrier);

	clock_gettime(CLOCK_MONOTONIC, &start);

	for (i = 0; i < nr_devices; i++)
	pthread_join(lpi_threads[i], NULL);

	delta = timespec_elapsed(start);
	write_guest_global(vm, test_data.request_vcpus_stop, true);

	for (i = 0; i < nr_vcpus; i++)
	pthread_join(vcpu_threads[i], NULL);

	report_stats(delta);
	}

	static void setup_vm(void)
	{
	int i;

	vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
	TEST_ASSERT(vcpus, "Failed to allocate vCPU array");

	vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);

	vm_init_descriptor_tables(vm);
	for (i = 0; i < test_data.nr_cpus; i++)
	vcpu_init_descriptor_tables(vcpus[i]);

	vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);

	setup_memslot();

	setup_gic();

	setup_test_data();
	}

	static void destroy_vm(void)
	{
	close(its_fd);
	close(gic_fd);
	kvm_vm_free(vm);
	free(vcpus);
	}

	static void pr_usage(const char *name)
	{
	pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
	pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
	pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
	pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
	pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
	}

	int main(int argc, char **argv)
	{
	u32 nr_threads;
	int c;

	while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
	switch (c) {
	case 'v':
	test_data.nr_cpus = atoi(optarg);
	break;
	case 'd':
	test_data.nr_devices = atoi(optarg);
	break;
	case 'e':
	test_data.nr_event_ids = atoi(optarg);
	break;
	case 'i':
	nr_iterations = strtoul(optarg, NULL, 0);
	break;
	case 'h':
	default:
	pr_usage(argv[0]);
	return 1;
	}
	}

	nr_threads = test_data.nr_cpus + test_data.nr_devices;
	if (nr_threads > get_nprocs())
	pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
	nr_threads, get_nprocs());

	setup_vm();

	run_test();

	destroy_vm();

	return 0;
	}