x86/asyncpf: Update async page fault test for IRQ-based "page ready"

KVM switched to use interrupt for 'page ready' APF event since Linux v5.10
and the legacy mechanism using #PF was deprecated. Interrupt-based
'page-ready' notification requires KVM_ASYNC_PF_DELIVERY_AS_INT to be set
as well in MSR_KVM_ASYNC_PF_EN to enable asyncpf.

Update asyncpf.c for the new interrupt-based notification to check for
(KVM_FEATURE_ASYNC_PF && KVM_FEATURE_ASYNC_PF_INT) support, and implement
interrupt-based 'page-ready' handler with the necessary struct changes.

To run this test, add the QEMU option "-cpu host" to check CPUID, since
KVM_FEATURE_ASYNC_PF_INT can't be detected without "-cpu host".

Opportunistically update the "help" section to describe how to setup
cgroups for cgroup v1 vs. v2.

Signed-off-by: Dan Wu <dan1.wu@intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20240108063014.41117-1-dan1.wu@intel.com
[sean: report skip instead of fail if no async #PFs occur, massage changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index 44f4fd1..1a0f124 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h
@@ -264,6 +264,12 @@
 #define	X86_FEATURE_PKS			(CPUID(0x7, 0, ECX, 31))
 
 /*
+ * KVM defined leafs
+ */
+#define	KVM_FEATURE_ASYNC_PF		(CPUID(0x40000001, 0, EAX, 4))
+#define	KVM_FEATURE_ASYNC_PF_INT	(CPUID(0x40000001, 0, EAX, 14))
+
+/*
  * Extended Leafs, a.k.a. AMD defined
  */
 #define	X86_FEATURE_SVM			(CPUID(0x80000001, 0, ECX, 2))
diff --git a/x86/asyncpf.c b/x86/asyncpf.c
index bc515be..9bf2056 100644
--- a/x86/asyncpf.c
+++ b/x86/asyncpf.c
@@ -1,8 +1,12 @@
 /*
  * Async PF test. For the test to actually do anything it needs to be started
- * in memory cgroup with 512M of memory and with more then 1G memory provided
+ * in memory cgroup with 512M of memory and with more than 1G memory provided
  * to the guest.
  *
+ * To identify the cgroup version on Linux:
+ * stat -fc %T /sys/fs/cgroup/
+ *
+ * If the output is tmpfs, your system is using cgroup v1:
  * To create cgroup do as root:
  * mkdir /dev/cgroup
  * mount -t cgroup none -omemory /dev/cgroup
@@ -13,99 +17,135 @@
  * echo $$ >  /dev/cgroup/1/tasks
  * echo 512M > /dev/cgroup/1/memory.limit_in_bytes
  *
+ * If the output is cgroup2fs, your system is using cgroup v2:
+ * mkdir /sys/fs/cgroup/cg1
+ * echo $$ >  /sys/fs/cgroup/cg1/cgroup.procs
+ * echo 512M > /sys/fs/cgroup/cg1/memory.max
+ *
  */
-#include "x86/msr.h"
 #include "x86/processor.h"
-#include "x86/apic-defs.h"
 #include "x86/apic.h"
-#include "x86/desc.h"
 #include "x86/isr.h"
 #include "x86/vm.h"
-
-#include "asm/page.h"
 #include "alloc.h"
-#include "libcflat.h"
 #include "vmalloc.h"
-#include <stdint.h>
 
 #define KVM_PV_REASON_PAGE_NOT_PRESENT 1
-#define KVM_PV_REASON_PAGE_READY 2
 
 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
+#define MSR_KVM_ASYNC_PF_INT    0x4b564d06
+#define MSR_KVM_ASYNC_PF_ACK    0x4b564d07
 
 #define KVM_ASYNC_PF_ENABLED                    (1 << 0)
 #define KVM_ASYNC_PF_SEND_ALWAYS                (1 << 1)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT            (1 << 3)
 
-volatile uint32_t apf_reason __attribute__((aligned(64)));
+#define HYPERVISOR_CALLBACK_VECTOR	0xf3
+
+struct kvm_vcpu_pv_apf_data {
+      /* Used for 'page not present' events delivered via #PF */
+      uint32_t  flags;
+
+      /* Used for 'page ready' events delivered via interrupt notification */
+      uint32_t  token;
+
+      uint8_t  pad[56];
+      uint32_t  enabled;
+} apf_reason __attribute__((aligned(64)));
+
 char *buf;
+void* virt;
 volatile uint64_t  i;
 volatile uint64_t phys;
+volatile uint32_t saved_token;
+volatile uint32_t asyncpf_num;
 
-static inline uint32_t get_apf_reason(void)
+static inline uint32_t get_and_clear_apf_reason(void)
 {
-	uint32_t r = apf_reason;
-	apf_reason = 0;
+	uint32_t r = apf_reason.flags;
+	apf_reason.flags = 0;
 	return r;
 }
 
-static void pf_isr(struct ex_regs *r)
+static void handle_interrupt(isr_regs_t *regs)
 {
-	void* virt = (void*)((ulong)(buf+i) & ~(PAGE_SIZE-1));
-	uint32_t reason = get_apf_reason();
+	uint32_t apf_token = apf_reason.token;
 
+	apf_reason.token = 0;
+	wrmsr(MSR_KVM_ASYNC_PF_ACK, 1);
+
+	if (apf_token == 0xffffffff) {
+		report_pass("Wakeup all, got token 0x%x", apf_token);
+	} else if (apf_token == saved_token) {
+		asyncpf_num++;
+		install_pte(phys_to_virt(read_cr3()), 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK, 0);
+		phys = 0;
+	} else {
+		report_fail("unexpected async pf int token 0x%x", apf_token);
+	}
+
+	eoi();
+}
+
+static void handle_pf(struct ex_regs *r)
+{
+	virt = (void*)((ulong)(buf+i) & ~(PAGE_SIZE-1));
+	uint32_t reason = get_and_clear_apf_reason();
 	switch (reason) {
-		case 0:
-			report_fail("unexpected #PF at %#lx", read_cr2());
-			break;
-		case KVM_PV_REASON_PAGE_NOT_PRESENT:
-			phys = virt_to_pte_phys(phys_to_virt(read_cr3()), virt);
-			install_pte(phys_to_virt(read_cr3()), 1, virt, phys, 0);
-			write_cr3(read_cr3());
-			report_pass("Got not present #PF token %lx virt addr %p phys addr %#" PRIx64,
-				    read_cr2(), virt, phys);
-			while(phys) {
-				safe_halt(); /* enables irq */
-				cli();
-			}
-			break;
-		case KVM_PV_REASON_PAGE_READY:
-			report_pass("Got present #PF token %lx", read_cr2());
-			if ((uint32_t)read_cr2() == ~0)
-				break;
-			install_pte(phys_to_virt(read_cr3()), 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK, 0);
-			write_cr3(read_cr3());
-			phys = 0;
-			break;
-		default:
-			report_fail("unexpected async pf reason %" PRId32, reason);
-			break;
+	case 0:
+		report_fail("unexpected #PF at %#lx", read_cr2());
+		exit(report_summary());
+	case KVM_PV_REASON_PAGE_NOT_PRESENT:
+		phys = virt_to_pte_phys(phys_to_virt(read_cr3()), virt);
+		install_pte(phys_to_virt(read_cr3()), 1, virt, phys, 0);
+		write_cr3(read_cr3());
+		saved_token = read_cr2();
+		while (phys) {
+			safe_halt(); /* enables irq */
+		}
+		break;
+	default:
+		report_fail("unexpected async pf with reason 0x%x", reason);
+		exit(report_summary());
 	}
 }
 
-#define MEM 1ull*1024*1024*1024
+#define MEM (1ull*1024*1024*1024)
 
 int main(int ac, char **av)
 {
-	int loop = 2;
+	if (!this_cpu_has(KVM_FEATURE_ASYNC_PF)) {
+		report_skip("KVM_FEATURE_ASYNC_PF is not supported\n");
+		return report_summary();
+	}
+
+	if (!this_cpu_has(KVM_FEATURE_ASYNC_PF_INT)) {
+		report_skip("KVM_FEATURE_ASYNC_PF_INT is not supported\n");
+		return report_summary();
+	}
 
 	setup_vm();
-	printf("install handler\n");
-	handle_exception(14, pf_isr);
-	apf_reason = 0;
-	printf("enable async pf\n");
+
+	handle_exception(PF_VECTOR, handle_pf);
+	handle_irq(HYPERVISOR_CALLBACK_VECTOR, handle_interrupt);
+	memset(&apf_reason, 0, sizeof(apf_reason));
+
+	wrmsr(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
 	wrmsr(MSR_KVM_ASYNC_PF_EN, virt_to_phys((void*)&apf_reason) |
-			KVM_ASYNC_PF_SEND_ALWAYS | KVM_ASYNC_PF_ENABLED);
-	printf("alloc memory\n");
+			KVM_ASYNC_PF_SEND_ALWAYS | KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT);
+
 	buf = malloc(MEM);
 	sti();
-	while(loop--) {
-		printf("start loop\n");
-		/* access a lot of memory to make host swap it out */
-		for (i=0; i < MEM; i+=4096)
-			buf[i] = 1;
-		printf("end loop\n");
-	}
-	cli();
 
+	/* access a lot of memory to make host swap it out */
+	for (i = 0; i < MEM; i += 4096)
+		buf[i] = 1;
+
+	cli();
+	if (!asyncpf_num)
+		report_skip("No async page fault events, cgroup configuration likely needed");
+	else
+		report_pass("Serviced %d async page faults events (!PRESENT #PF + READY IRQ)",
+			    asyncpf_num);
 	return report_summary();
 }
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index 867a8ea..c4efaf5 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -154,7 +154,7 @@
 
 [asyncpf]
 file = asyncpf.flat
-extra_params = -m 2048
+extra_params = -cpu host -m 2048
 
 [emulator]
 file = emulator.flat