x86/asyncpf: Update async page fault test for IRQ-based "page ready"
KVM switched to use interrupt for 'page ready' APF event since Linux v5.10
and the legacy mechanism using #PF was deprecated. Interrupt-based
'page-ready' notification requires KVM_ASYNC_PF_DELIVERY_AS_INT to be set
as well in MSR_KVM_ASYNC_PF_EN to enable asyncpf.
Update asyncpf.c for the new interrupt-based notification to check for
(KVM_FEATURE_ASYNC_PF && KVM_FEATURE_ASYNC_PF_INT) support, and implement
interrupt-based 'page-ready' handler with the necessary struct changes.
To run this test, add the QEMU option "-cpu host" to check CPUID, since
KVM_FEATURE_ASYNC_PF_INT can't be detected without "-cpu host".
Opportunistically update the "help" section to describe how to setup
cgroups for cgroup v1 vs. v2.
Signed-off-by: Dan Wu <dan1.wu@intel.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Link: https://lore.kernel.org/r/20240108063014.41117-1-dan1.wu@intel.com
[sean: report skip instead of fail if no async #PFs occur, massage changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index 44f4fd1..1a0f124 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h
@@ -264,6 +264,12 @@
#define X86_FEATURE_PKS (CPUID(0x7, 0, ECX, 31))
/*
+ * KVM defined leafs
+ */
+#define KVM_FEATURE_ASYNC_PF (CPUID(0x40000001, 0, EAX, 4))
+#define KVM_FEATURE_ASYNC_PF_INT (CPUID(0x40000001, 0, EAX, 14))
+
+/*
* Extended Leafs, a.k.a. AMD defined
*/
#define X86_FEATURE_SVM (CPUID(0x80000001, 0, ECX, 2))
diff --git a/x86/asyncpf.c b/x86/asyncpf.c
index bc515be..9bf2056 100644
--- a/x86/asyncpf.c
+++ b/x86/asyncpf.c
@@ -1,8 +1,12 @@
/*
* Async PF test. For the test to actually do anything it needs to be started
- * in memory cgroup with 512M of memory and with more then 1G memory provided
+ * in memory cgroup with 512M of memory and with more than 1G memory provided
* to the guest.
*
+ * To identify the cgroup version on Linux:
+ * stat -fc %T /sys/fs/cgroup/
+ *
+ * If the output is tmpfs, your system is using cgroup v1:
* To create cgroup do as root:
* mkdir /dev/cgroup
* mount -t cgroup none -omemory /dev/cgroup
@@ -13,99 +17,135 @@
* echo $$ > /dev/cgroup/1/tasks
* echo 512M > /dev/cgroup/1/memory.limit_in_bytes
*
+ * If the output is cgroup2fs, your system is using cgroup v2:
+ * mkdir /sys/fs/cgroup/cg1
+ * echo $$ > /sys/fs/cgroup/cg1/cgroup.procs
+ * echo 512M > /sys/fs/cgroup/cg1/memory.max
+ *
*/
-#include "x86/msr.h"
#include "x86/processor.h"
-#include "x86/apic-defs.h"
#include "x86/apic.h"
-#include "x86/desc.h"
#include "x86/isr.h"
#include "x86/vm.h"
-
-#include "asm/page.h"
#include "alloc.h"
-#include "libcflat.h"
#include "vmalloc.h"
-#include <stdint.h>
#define KVM_PV_REASON_PAGE_NOT_PRESENT 1
-#define KVM_PV_REASON_PAGE_READY 2
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
+#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
+#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
#define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3)
-volatile uint32_t apf_reason __attribute__((aligned(64)));
+#define HYPERVISOR_CALLBACK_VECTOR 0xf3
+
+struct kvm_vcpu_pv_apf_data {
+ /* Used for 'page not present' events delivered via #PF */
+ uint32_t flags;
+
+ /* Used for 'page ready' events delivered via interrupt notification */
+ uint32_t token;
+
+ uint8_t pad[56];
+ uint32_t enabled;
+} apf_reason __attribute__((aligned(64)));
+
char *buf;
+void* virt;
volatile uint64_t i;
volatile uint64_t phys;
+volatile uint32_t saved_token;
+volatile uint32_t asyncpf_num;
-static inline uint32_t get_apf_reason(void)
+static inline uint32_t get_and_clear_apf_reason(void)
{
- uint32_t r = apf_reason;
- apf_reason = 0;
+ uint32_t r = apf_reason.flags;
+ apf_reason.flags = 0;
return r;
}
-static void pf_isr(struct ex_regs *r)
+static void handle_interrupt(isr_regs_t *regs)
{
- void* virt = (void*)((ulong)(buf+i) & ~(PAGE_SIZE-1));
- uint32_t reason = get_apf_reason();
+ uint32_t apf_token = apf_reason.token;
+ apf_reason.token = 0;
+ wrmsr(MSR_KVM_ASYNC_PF_ACK, 1);
+
+ if (apf_token == 0xffffffff) {
+ report_pass("Wakeup all, got token 0x%x", apf_token);
+ } else if (apf_token == saved_token) {
+ asyncpf_num++;
+ install_pte(phys_to_virt(read_cr3()), 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK, 0);
+ phys = 0;
+ } else {
+ report_fail("unexpected async pf int token 0x%x", apf_token);
+ }
+
+ eoi();
+}
+
+static void handle_pf(struct ex_regs *r)
+{
+ virt = (void*)((ulong)(buf+i) & ~(PAGE_SIZE-1));
+ uint32_t reason = get_and_clear_apf_reason();
switch (reason) {
- case 0:
- report_fail("unexpected #PF at %#lx", read_cr2());
- break;
- case KVM_PV_REASON_PAGE_NOT_PRESENT:
- phys = virt_to_pte_phys(phys_to_virt(read_cr3()), virt);
- install_pte(phys_to_virt(read_cr3()), 1, virt, phys, 0);
- write_cr3(read_cr3());
- report_pass("Got not present #PF token %lx virt addr %p phys addr %#" PRIx64,
- read_cr2(), virt, phys);
- while(phys) {
- safe_halt(); /* enables irq */
- cli();
- }
- break;
- case KVM_PV_REASON_PAGE_READY:
- report_pass("Got present #PF token %lx", read_cr2());
- if ((uint32_t)read_cr2() == ~0)
- break;
- install_pte(phys_to_virt(read_cr3()), 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK, 0);
- write_cr3(read_cr3());
- phys = 0;
- break;
- default:
- report_fail("unexpected async pf reason %" PRId32, reason);
- break;
+ case 0:
+ report_fail("unexpected #PF at %#lx", read_cr2());
+ exit(report_summary());
+ case KVM_PV_REASON_PAGE_NOT_PRESENT:
+ phys = virt_to_pte_phys(phys_to_virt(read_cr3()), virt);
+ install_pte(phys_to_virt(read_cr3()), 1, virt, phys, 0);
+ write_cr3(read_cr3());
+ saved_token = read_cr2();
+ while (phys) {
+ safe_halt(); /* enables irq */
+ }
+ break;
+ default:
+ report_fail("unexpected async pf with reason 0x%x", reason);
+ exit(report_summary());
}
}
-#define MEM 1ull*1024*1024*1024
+#define MEM (1ull*1024*1024*1024)
int main(int ac, char **av)
{
- int loop = 2;
+ if (!this_cpu_has(KVM_FEATURE_ASYNC_PF)) {
+ report_skip("KVM_FEATURE_ASYNC_PF is not supported\n");
+ return report_summary();
+ }
+
+ if (!this_cpu_has(KVM_FEATURE_ASYNC_PF_INT)) {
+ report_skip("KVM_FEATURE_ASYNC_PF_INT is not supported\n");
+ return report_summary();
+ }
setup_vm();
- printf("install handler\n");
- handle_exception(14, pf_isr);
- apf_reason = 0;
- printf("enable async pf\n");
+
+ handle_exception(PF_VECTOR, handle_pf);
+ handle_irq(HYPERVISOR_CALLBACK_VECTOR, handle_interrupt);
+ memset(&apf_reason, 0, sizeof(apf_reason));
+
+ wrmsr(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
wrmsr(MSR_KVM_ASYNC_PF_EN, virt_to_phys((void*)&apf_reason) |
- KVM_ASYNC_PF_SEND_ALWAYS | KVM_ASYNC_PF_ENABLED);
- printf("alloc memory\n");
+ KVM_ASYNC_PF_SEND_ALWAYS | KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT);
+
buf = malloc(MEM);
sti();
- while(loop--) {
- printf("start loop\n");
- /* access a lot of memory to make host swap it out */
- for (i=0; i < MEM; i+=4096)
- buf[i] = 1;
- printf("end loop\n");
- }
- cli();
+ /* access a lot of memory to make host swap it out */
+ for (i = 0; i < MEM; i += 4096)
+ buf[i] = 1;
+
+ cli();
+ if (!asyncpf_num)
+ report_skip("No async page fault events, cgroup configuration likely needed");
+ else
+ report_pass("Serviced %d async page faults events (!PRESENT #PF + READY IRQ)",
+ asyncpf_num);
return report_summary();
}
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index 867a8ea..c4efaf5 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -154,7 +154,7 @@
[asyncpf]
file = asyncpf.flat
-extra_params = -m 2048
+extra_params = -cpu host -m 2048
[emulator]
file = emulator.flat