Merge tag 'kvm-x86-2024.06.14' of https://github.com/kvm-x86/kvm-unit-tests into HEAD x86 fixes, cleanups, and new testcases: - Add a testcase to verify that KVM doesn't inject a triple fault (or any other "error") if a nested VM is run with an EP4TA pointing MMIO. - Play nice with CR4.CET in test_vmxon_bad_cr() - Force emulation when testing MSR_IA32_FLUSH_CMD to workaround an issue where Skylake CPUs don't follow the architecturally defined behavior, and so that the test doesn't break if/when new bits are supported by future CPUs. - Rework the async #PF test to support IRQ-based page-ready notifications. - Fix a variety of issues related to adaptive PEBS. - Add several nested VMX tests for virtual interrupt delivery and posted interrupts. - Ensure PAT is loaded with the default value after the nVMX PAT tests (failure to do so was causing tests to fail due to all memory being UC). - Misc cleanups.

commit: dcec966ff7423a29dad0e5ffdcf58e8a4095356f [log] [tgz]
author: Paolo Bonzini <pbonzini@redhat.com> Thu Jun 20 23:35:52 2024 +0200
committer: Paolo Bonzini <pbonzini@redhat.com> Thu Jun 20 23:35:52 2024 +0200
tree: 2734418795a09fb98501a9a00be42f7c28d0d67f
parent: 98eb2a39f7602d655b06ed6d845ed2895e7b4a9f [diff]
parent: ee1d79c3f0f871bf78f20930cb1a2441f28ac027 [diff]
diff --git a/lib/x86/apic.h b/lib/x86/apic.h
index c389d40..8df889b 100644
--- a/lib/x86/apic.h
+++ b/lib/x86/apic.h

@@ -81,6 +81,11 @@
 	return GET_APIC_MAXLVT(apic_read(APIC_LVR)) >= idx;
 }
 
+static inline u8 task_priority_class(u8 vector)
+{
+	return vector >> 4;
+}
+
 enum x2apic_reg_semantics {
 	X2APIC_INVALID	= 0,
 	X2APIC_READABLE	= BIT(0),

diff --git a/lib/x86/asm/bitops.h b/lib/x86/asm/bitops.h
index 13a25ec..54ec9c4 100644
--- a/lib/x86/asm/bitops.h
+++ b/lib/x86/asm/bitops.h

@@ -13,4 +13,12 @@
 
 #define HAVE_BUILTIN_FLS 1
 
+static inline void test_and_set_bit(long nr, unsigned long *addr)
+{
+	asm volatile("lock; bts %1,%0"
+		     : "+m" (*addr)
+		     : "Ir" (nr)
+		     : "memory");
+}
+
 #endif

diff --git a/lib/x86/desc.h b/lib/x86/desc.h
index 7778a0f..92c45a4 100644
--- a/lib/x86/desc.h
+++ b/lib/x86/desc.h

@@ -272,9 +272,9 @@
 extern unsigned long get_gdt_entry_base(gdt_entry_t *entry);
 extern unsigned long get_gdt_entry_limit(gdt_entry_t *entry);
 
-#define asm_safe(insn, inputs...)					\
+#define __asm_safe(fep, insn, inputs...)				\
 ({									\
-	asm volatile(ASM_TRY("1f")					\
+	asm volatile(__ASM_TRY(fep, "1f")				\
 		     insn "\n\t"					\
 		     "1:\n\t"						\
 		     :							\
@@ -283,9 +283,15 @@
 	exception_vector();						\
 })
 
-#define asm_safe_out1(insn, output, inputs...)				\
+#define asm_safe(insn, inputs...)					\
+	__asm_safe("", insn, inputs)
+
+#define asm_fep_safe(insn, output, inputs...)				\
+	__asm_safe_out1(KVM_FEP, insn, output, inputs)
+
+#define __asm_safe_out1(fep, insn, output, inputs...)			\
 ({									\
-	asm volatile(ASM_TRY("1f")					\
+	asm volatile(__ASM_TRY(fep, "1f")				\
 		     insn "\n\t"					\
 		     "1:\n\t"						\
 		     : output						\
@@ -294,9 +300,15 @@
 	exception_vector();						\
 })
 
-#define asm_safe_out2(insn, output1, output2, inputs...)		\
+#define asm_safe_out1(insn, output, inputs...)				\
+	__asm_safe_out1("", insn, output, inputs)
+
+#define asm_fep_safe_out1(insn, output, inputs...)			\
+	__asm_safe_out1(KVM_FEP, insn, output, inputs)
+
+#define __asm_safe_out2(fep, insn, output1, output2, inputs...)		\
 ({									\
-	asm volatile(ASM_TRY("1f")					\
+	asm volatile(__ASM_TRY(fep, "1f")				\
 		     insn "\n\t"					\
 		     "1:\n\t"						\
 		     : output1, output2					\
@@ -305,6 +317,12 @@
 	exception_vector();						\
 })
 
+#define asm_safe_out2(fep, insn, output1, output2, inputs...)		\
+	__asm_safe_out2("", insn, output1, output2, inputs)
+
+#define asm_fep_safe_out2(insn, output1, output2, inputs...)		\
+	__asm_safe_out2(KVM_FEP, insn, output1, output2, inputs)
+
 #define __asm_safe_report(want, insn, inputs...)			\
 do {									\
 	int vector = asm_safe(insn, inputs);				\

diff --git a/lib/x86/pmu.h b/lib/x86/pmu.h
index 8465e3c..f07fbd9 100644
--- a/lib/x86/pmu.h
+++ b/lib/x86/pmu.h

@@ -44,9 +44,13 @@
 #define GLOBAL_STATUS_BUFFER_OVF	BIT_ULL(GLOBAL_STATUS_BUFFER_OVF_BIT)
 
 #define PEBS_DATACFG_MEMINFO	BIT_ULL(0)
-#define PEBS_DATACFG_GP	BIT_ULL(1)
+#define PEBS_DATACFG_GPRS	BIT_ULL(1)
 #define PEBS_DATACFG_XMMS	BIT_ULL(2)
 #define PEBS_DATACFG_LBRS	BIT_ULL(3)
+#define PEBS_DATACFG_MASK	(PEBS_DATACFG_MEMINFO | \
+				 PEBS_DATACFG_GPRS | \
+				 PEBS_DATACFG_XMMS | \
+				 PEBS_DATACFG_LBRS)
 
 #define ICL_EVENTSEL_ADAPTIVE				(1ULL << 34)
 #define PEBS_DATACFG_LBR_SHIFT	24

diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index 44f4fd1..da1ed66 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h

@@ -264,6 +264,12 @@
 #define	X86_FEATURE_PKS			(CPUID(0x7, 0, ECX, 31))
 
 /*
+ * KVM defined leafs
+ */
+#define	KVM_FEATURE_ASYNC_PF		(CPUID(0x40000001, 0, EAX, 4))
+#define	KVM_FEATURE_ASYNC_PF_INT	(CPUID(0x40000001, 0, EAX, 14))
+
+/*
  * Extended Leafs, a.k.a. AMD defined
  */
 #define	X86_FEATURE_SVM			(CPUID(0x80000001, 0, ECX, 2))
@@ -430,12 +436,12 @@
 	asm volatile ("wrmsr" : : "a"(a), "d"(d), "c"(index) : "memory");
 }
 
-#define rdreg64_safe(insn, index, val)					\
+#define __rdreg64_safe(fep, insn, index, val)				\
 ({									\
 	uint32_t a, d;							\
 	int vector;							\
 									\
-	vector = asm_safe_out2(insn, "=a"(a), "=d"(d), "c"(index));	\
+	vector = __asm_safe_out2(fep, insn, "=a"(a), "=d"(d), "c"(index));\
 									\
 	if (vector)							\
 		*(val) = 0;						\
@@ -444,13 +450,18 @@
 	vector;								\
 })
 
-#define wrreg64_safe(insn, index, val)					\
+#define rdreg64_safe(insn, index, val)					\
+	__rdreg64_safe("", insn, index, val)
+
+#define __wrreg64_safe(fep, insn, index, val)				\
 ({									\
 	uint32_t eax = (val), edx = (val) >> 32;			\
 									\
-	asm_safe(insn, "a" (eax), "d" (edx), "c" (index));		\
+	__asm_safe(fep, insn, "a" (eax), "d" (edx), "c" (index));	\
 })
 
+#define wrreg64_safe(insn, index, val)					\
+	__wrreg64_safe("", insn, index, val)
 
 static inline int rdmsr_safe(u32 index, uint64_t *val)
 {
@@ -462,6 +473,11 @@
 	return wrreg64_safe("wrmsr", index, val);
 }
 
+static inline int wrmsr_fep_safe(u32 index, u64 val)
+{
+	return __wrreg64_safe(KVM_FEP, "wrmsr", index, val);
+}
+
 static inline int rdpmc_safe(u32 index, uint64_t *val)
 {
 	return rdreg64_safe("rdpmc", index, val);

diff --git a/x86/asyncpf.c b/x86/asyncpf.c
index bc515be..9bf2056 100644
--- a/x86/asyncpf.c
+++ b/x86/asyncpf.c

@@ -1,8 +1,12 @@
 /*
  * Async PF test. For the test to actually do anything it needs to be started
- * in memory cgroup with 512M of memory and with more then 1G memory provided
+ * in memory cgroup with 512M of memory and with more than 1G memory provided
  * to the guest.
  *
+ * To identify the cgroup version on Linux:
+ * stat -fc %T /sys/fs/cgroup/
+ *
+ * If the output is tmpfs, your system is using cgroup v1:
  * To create cgroup do as root:
  * mkdir /dev/cgroup
  * mount -t cgroup none -omemory /dev/cgroup
@@ -13,99 +17,135 @@
  * echo $$ >  /dev/cgroup/1/tasks
  * echo 512M > /dev/cgroup/1/memory.limit_in_bytes
  *
+ * If the output is cgroup2fs, your system is using cgroup v2:
+ * mkdir /sys/fs/cgroup/cg1
+ * echo $$ >  /sys/fs/cgroup/cg1/cgroup.procs
+ * echo 512M > /sys/fs/cgroup/cg1/memory.max
+ *
  */
-#include "x86/msr.h"
 #include "x86/processor.h"
-#include "x86/apic-defs.h"
 #include "x86/apic.h"
-#include "x86/desc.h"
 #include "x86/isr.h"
 #include "x86/vm.h"
-
-#include "asm/page.h"
 #include "alloc.h"
-#include "libcflat.h"
 #include "vmalloc.h"
-#include <stdint.h>
 
 #define KVM_PV_REASON_PAGE_NOT_PRESENT 1
-#define KVM_PV_REASON_PAGE_READY 2
 
 #define MSR_KVM_ASYNC_PF_EN 0x4b564d02
+#define MSR_KVM_ASYNC_PF_INT    0x4b564d06
+#define MSR_KVM_ASYNC_PF_ACK    0x4b564d07
 
 #define KVM_ASYNC_PF_ENABLED                    (1 << 0)
 #define KVM_ASYNC_PF_SEND_ALWAYS                (1 << 1)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT            (1 << 3)
 
-volatile uint32_t apf_reason __attribute__((aligned(64)));
+#define HYPERVISOR_CALLBACK_VECTOR	0xf3
+
+struct kvm_vcpu_pv_apf_data {
+      /* Used for 'page not present' events delivered via #PF */
+      uint32_t  flags;
+
+      /* Used for 'page ready' events delivered via interrupt notification */
+      uint32_t  token;
+
+      uint8_t  pad[56];
+      uint32_t  enabled;
+} apf_reason __attribute__((aligned(64)));
+
 char *buf;
+void* virt;
 volatile uint64_t  i;
 volatile uint64_t phys;
+volatile uint32_t saved_token;
+volatile uint32_t asyncpf_num;
 
-static inline uint32_t get_apf_reason(void)
+static inline uint32_t get_and_clear_apf_reason(void)
 {
-	uint32_t r = apf_reason;
-	apf_reason = 0;
+	uint32_t r = apf_reason.flags;
+	apf_reason.flags = 0;
 	return r;
 }
 
-static void pf_isr(struct ex_regs *r)
+static void handle_interrupt(isr_regs_t *regs)
 {
-	void* virt = (void*)((ulong)(buf+i) & ~(PAGE_SIZE-1));
-	uint32_t reason = get_apf_reason();
+	uint32_t apf_token = apf_reason.token;
 
+	apf_reason.token = 0;
+	wrmsr(MSR_KVM_ASYNC_PF_ACK, 1);
+
+	if (apf_token == 0xffffffff) {
+		report_pass("Wakeup all, got token 0x%x", apf_token);
+	} else if (apf_token == saved_token) {
+		asyncpf_num++;
+		install_pte(phys_to_virt(read_cr3()), 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK, 0);
+		phys = 0;
+	} else {
+		report_fail("unexpected async pf int token 0x%x", apf_token);
+	}
+
+	eoi();
+}
+
+static void handle_pf(struct ex_regs *r)
+{
+	virt = (void*)((ulong)(buf+i) & ~(PAGE_SIZE-1));
+	uint32_t reason = get_and_clear_apf_reason();
 	switch (reason) {
-		case 0:
-			report_fail("unexpected #PF at %#lx", read_cr2());
-			break;
-		case KVM_PV_REASON_PAGE_NOT_PRESENT:
-			phys = virt_to_pte_phys(phys_to_virt(read_cr3()), virt);
-			install_pte(phys_to_virt(read_cr3()), 1, virt, phys, 0);
-			write_cr3(read_cr3());
-			report_pass("Got not present #PF token %lx virt addr %p phys addr %#" PRIx64,
-				    read_cr2(), virt, phys);
-			while(phys) {
-				safe_halt(); /* enables irq */
-				cli();
-			}
-			break;
-		case KVM_PV_REASON_PAGE_READY:
-			report_pass("Got present #PF token %lx", read_cr2());
-			if ((uint32_t)read_cr2() == ~0)
-				break;
-			install_pte(phys_to_virt(read_cr3()), 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK, 0);
-			write_cr3(read_cr3());
-			phys = 0;
-			break;
-		default:
-			report_fail("unexpected async pf reason %" PRId32, reason);
-			break;
+	case 0:
+		report_fail("unexpected #PF at %#lx", read_cr2());
+		exit(report_summary());
+	case KVM_PV_REASON_PAGE_NOT_PRESENT:
+		phys = virt_to_pte_phys(phys_to_virt(read_cr3()), virt);
+		install_pte(phys_to_virt(read_cr3()), 1, virt, phys, 0);
+		write_cr3(read_cr3());
+		saved_token = read_cr2();
+		while (phys) {
+			safe_halt(); /* enables irq */
+		}
+		break;
+	default:
+		report_fail("unexpected async pf with reason 0x%x", reason);
+		exit(report_summary());
 	}
 }
 
-#define MEM 1ull*1024*1024*1024
+#define MEM (1ull*1024*1024*1024)
 
 int main(int ac, char **av)
 {
-	int loop = 2;
+	if (!this_cpu_has(KVM_FEATURE_ASYNC_PF)) {
+		report_skip("KVM_FEATURE_ASYNC_PF is not supported\n");
+		return report_summary();
+	}
+
+	if (!this_cpu_has(KVM_FEATURE_ASYNC_PF_INT)) {
+		report_skip("KVM_FEATURE_ASYNC_PF_INT is not supported\n");
+		return report_summary();
+	}
 
 	setup_vm();
-	printf("install handler\n");
-	handle_exception(14, pf_isr);
-	apf_reason = 0;
-	printf("enable async pf\n");
+
+	handle_exception(PF_VECTOR, handle_pf);
+	handle_irq(HYPERVISOR_CALLBACK_VECTOR, handle_interrupt);
+	memset(&apf_reason, 0, sizeof(apf_reason));
+
+	wrmsr(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
 	wrmsr(MSR_KVM_ASYNC_PF_EN, virt_to_phys((void*)&apf_reason) |
-			KVM_ASYNC_PF_SEND_ALWAYS | KVM_ASYNC_PF_ENABLED);
-	printf("alloc memory\n");
+			KVM_ASYNC_PF_SEND_ALWAYS | KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT);
+
 	buf = malloc(MEM);
 	sti();
-	while(loop--) {
-		printf("start loop\n");
-		/* access a lot of memory to make host swap it out */
-		for (i=0; i < MEM; i+=4096)
-			buf[i] = 1;
-		printf("end loop\n");
-	}
-	cli();
 
+	/* access a lot of memory to make host swap it out */
+	for (i = 0; i < MEM; i += 4096)
+		buf[i] = 1;
+
+	cli();
+	if (!asyncpf_num)
+		report_skip("No async page fault events, cgroup configuration likely needed");
+	else
+		report_pass("Serviced %d async page faults events (!PRESENT #PF + READY IRQ)",
+			    asyncpf_num);
 	return report_summary();
 }

diff --git a/x86/msr.c b/x86/msr.c
index 3a041fa..e21ff0a 100644
--- a/x86/msr.c
+++ b/x86/msr.c

@@ -90,7 +90,7 @@
 	unsigned char vector = wrmsr_safe(msr, val);
 
 	report(!vector,
-	       "Expected success on WRSMR(%s, 0x%llx), got vector %d",
+	       "Expected success on WRMSR(%s, 0x%llx), got vector %d",
 	       name, val, vector);
 }
 
@@ -99,7 +99,7 @@
 	unsigned char vector = wrmsr_safe(msr, val);
 
 	report(vector == GP_VECTOR,
-	       "Expected #GP on WRSMR(%s, 0x%llx), got vector %d",
+	       "Expected #GP on WRMSR(%s, 0x%llx), got vector %d",
 	       name, val, vector);
 }
 
@@ -109,7 +109,17 @@
 	unsigned char vector = rdmsr_safe(msr, &ignored);
 
 	report(vector == GP_VECTOR,
-	       "Expected #GP on RDSMR(%s), got vector %d", name, vector);
+	       "Expected #GP on RDMSR(%s), got vector %d", name, vector);
+}
+
+static void test_wrmsr_fep_fault(u32 msr, const char *name,
+				 unsigned long long val)
+{
+	unsigned char vector = wrmsr_fep_safe(msr, val);
+
+	report(vector == GP_VECTOR,
+	       "Expected #GP on emulated WRSMR(%s, 0x%llx), got vector %d",
+	       name, val, vector);
 }
 
 static void test_msr(struct msr_info *msr, bool is_64bit_host)
@@ -302,8 +312,11 @@
 		test_wrmsr_fault(MSR_IA32_FLUSH_CMD, "FLUSH_CMD", 0);
 		test_wrmsr_fault(MSR_IA32_FLUSH_CMD, "FLUSH_CMD", L1D_FLUSH);
 	}
-	for (i = 1; i < 64; i++)
-		test_wrmsr_fault(MSR_IA32_FLUSH_CMD, "FLUSH_CMD", BIT_ULL(i));
+
+	if (is_fep_available()) {
+		for (i = 1; i < 64; i++)
+			test_wrmsr_fep_fault(MSR_IA32_FLUSH_CMD, "FLUSH_CMD", BIT_ULL(i));
+	}
 }
 
 int main(int ac, char **av)

diff --git a/x86/pmu.c b/x86/pmu.c
index 7062c1a..ce9abbe 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c

@@ -69,6 +69,7 @@
 static void cnt_overflow(isr_regs_t *regs)
 {
 	irq_received++;
+	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
 	apic_write(APIC_EOI, 0);
 }
 

diff --git a/x86/pmu_pebs.c b/x86/pmu_pebs.c
index f7b52b9..77875c4 100644
--- a/x86/pmu_pebs.c
+++ b/x86/pmu_pebs.c

@@ -78,13 +78,6 @@
 	0x412e, /* PERF_COUNT_HW_CACHE_MISSES */
 };
 
-static u64 pebs_data_cfgs[] = {
-	PEBS_DATACFG_MEMINFO,
-	PEBS_DATACFG_GP,
-	PEBS_DATACFG_XMMS,
-	PEBS_DATACFG_LBRS | ((MAX_NUM_LBR_ENTRY -1) << PEBS_DATACFG_LBR_SHIFT),
-};
-
 /* Iterating each counter value is a waste of time, pick a few typical values. */
 static u64 counter_start_values[] = {
 	/* if PEBS counter doesn't overflow at all */
@@ -96,16 +89,16 @@
 	0xffffffffffff,
 };
 
-static unsigned int get_adaptive_pebs_record_size(u64 pebs_data_cfg)
+static unsigned int get_pebs_record_size(u64 pebs_data_cfg, bool use_adaptive)
 {
 	unsigned int sz = sizeof(struct pebs_basic);
 
-	if (!has_baseline)
+	if (!use_adaptive)
 		return sz;
 
 	if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
 		sz += sizeof(struct pebs_meminfo);
-	if (pebs_data_cfg & PEBS_DATACFG_GP)
+	if (pebs_data_cfg & PEBS_DATACFG_GPRS)
 		sz += sizeof(struct pebs_gprs);
 	if (pebs_data_cfg & PEBS_DATACFG_XMMS)
 		sz += sizeof(struct pebs_xmm);
@@ -117,6 +110,7 @@
 
 static void cnt_overflow(isr_regs_t *regs)
 {
+	apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
 	apic_write(APIC_EOI, 0);
 }
 
@@ -206,10 +200,10 @@
 		free_page(pebs_buffer);
 }
 
-static void pebs_enable(u64 bitmask, u64 pebs_data_cfg)
+static void pebs_enable(u64 bitmask, u64 pebs_data_cfg, bool use_adaptive)
 {
 	static struct debug_store *ds;
-	u64 baseline_extra_ctrl = 0, fixed_ctr_ctrl = 0;
+	u64 adaptive_ctrl = 0, fixed_ctr_ctrl = 0;
 	unsigned int idx;
 
 	if (has_baseline)
@@ -219,15 +213,15 @@
 	ds->pebs_index = ds->pebs_buffer_base = (unsigned long)pebs_buffer;
 	ds->pebs_absolute_maximum = (unsigned long)pebs_buffer + PAGE_SIZE;
 	ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
-		get_adaptive_pebs_record_size(pebs_data_cfg);
+		get_pebs_record_size(pebs_data_cfg, use_adaptive);
 
 	for (idx = 0; idx < pmu.nr_fixed_counters; idx++) {
 		if (!(BIT_ULL(FIXED_CNT_INDEX + idx) & bitmask))
 			continue;
-		if (has_baseline)
-			baseline_extra_ctrl = BIT(FIXED_CNT_INDEX + idx * 4);
+		if (use_adaptive)
+			adaptive_ctrl = BIT(FIXED_CNT_INDEX + idx * 4);
 		wrmsr(MSR_PERF_FIXED_CTRx(idx), ctr_start_val);
-		fixed_ctr_ctrl |= (0xbULL << (idx * 4) | baseline_extra_ctrl);
+		fixed_ctr_ctrl |= (0xbULL << (idx * 4) | adaptive_ctrl);
 	}
 	if (fixed_ctr_ctrl)
 		wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, fixed_ctr_ctrl);
@@ -235,10 +229,10 @@
 	for (idx = 0; idx < max_nr_gp_events; idx++) {
 		if (!(BIT_ULL(idx) & bitmask))
 			continue;
-		if (has_baseline)
-			baseline_extra_ctrl = ICL_EVENTSEL_ADAPTIVE;
+		if (use_adaptive)
+			adaptive_ctrl = ICL_EVENTSEL_ADAPTIVE;
 		wrmsr(MSR_GP_EVENT_SELECTx(idx), EVNTSEL_EN | EVNTSEL_OS | EVNTSEL_USR |
-						 intel_arch_events[idx] | baseline_extra_ctrl);
+						 intel_arch_events[idx] | adaptive_ctrl);
 		wrmsr(MSR_GP_COUNTERx(idx), ctr_start_val);
 	}
 
@@ -275,11 +269,11 @@
 	wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 }
 
-static void check_pebs_records(u64 bitmask, u64 pebs_data_cfg)
+static void check_pebs_records(u64 bitmask, u64 pebs_data_cfg, bool use_adaptive)
 {
 	struct pebs_basic *pebs_rec = (struct pebs_basic *)pebs_buffer;
 	struct debug_store *ds = (struct debug_store *)ds_bufer;
-	unsigned int pebs_record_size = get_adaptive_pebs_record_size(pebs_data_cfg);
+	unsigned int pebs_record_size;
 	unsigned int count = 0;
 	bool expected, pebs_idx_match, pebs_size_match, data_cfg_match;
 	void *cur_record;
@@ -300,15 +294,25 @@
 	do {
 		pebs_rec = (struct pebs_basic *)cur_record;
 		pebs_record_size = pebs_rec->format_size >> RECORD_SIZE_OFFSET;
-		pebs_idx_match =
-			pebs_rec->applicable_counters & bitmask;
-		pebs_size_match =
-			pebs_record_size == get_adaptive_pebs_record_size(pebs_data_cfg);
-		data_cfg_match =
-			(pebs_rec->format_size & GENMASK_ULL(47, 0)) == pebs_data_cfg;
+		pebs_idx_match = pebs_rec->applicable_counters & bitmask;
+		pebs_size_match = pebs_record_size == get_pebs_record_size(pebs_data_cfg, use_adaptive);
+		data_cfg_match = (pebs_rec->format_size & GENMASK_ULL(47, 0)) == pebs_data_cfg;
 		expected = pebs_idx_match && pebs_size_match && data_cfg_match;
 		report(expected,
 		       "PEBS record (written seq %d) is verified (including size, counters and cfg).", count);
+		if (use_adaptive && (pebs_data_cfg & PEBS_DATACFG_LBRS)) {
+			unsigned int lbrs_offset = get_pebs_record_size(pebs_data_cfg & ~PEBS_DATACFG_LBRS, true);
+			struct lbr_entry *pebs_lbrs = cur_record + lbrs_offset;
+			int i;
+
+			for (i = 0; i < MAX_NUM_LBR_ENTRY; i++) {
+				if (!pebs_lbrs[i].from && !pebs_lbrs[i].to)
+					continue;
+
+				report_fail("PEBS LBR record %u isn't empty, got from = '%lx', to = '%lx', info = '%lx'",
+					    i, pebs_lbrs[i].from, pebs_lbrs[i].to, pebs_lbrs[i].info);
+			}
+		}
 		cur_record = cur_record + pebs_record_size;
 		count++;
 	} while (expected && (void *)cur_record < (void *)ds->pebs_index);
@@ -318,56 +322,57 @@
 			printf("FAIL: The applicable_counters (0x%lx) doesn't match with pmc_bitmask (0x%lx).\n",
 			       pebs_rec->applicable_counters, bitmask);
 		if (!pebs_size_match)
-			printf("FAIL: The pebs_record_size (%d) doesn't match with MSR_PEBS_DATA_CFG (%d).\n",
-			       pebs_record_size, get_adaptive_pebs_record_size(pebs_data_cfg));
+			printf("FAIL: The pebs_record_size (%d) doesn't match with expected record size (%d).\n",
+			       pebs_record_size, get_pebs_record_size(pebs_data_cfg, use_adaptive));
 		if (!data_cfg_match)
-			printf("FAIL: The pebs_data_cfg (0x%lx) doesn't match with MSR_PEBS_DATA_CFG (0x%lx).\n",
-			       pebs_rec->format_size & 0xffffffffffff, pebs_data_cfg);
+			printf("FAIL: The pebs_data_cfg (0x%lx) doesn't match with the effective MSR_PEBS_DATA_CFG (0x%lx).\n",
+			       pebs_rec->format_size & 0xffffffffffff, use_adaptive ? pebs_data_cfg : 0);
 	}
 }
 
-static void check_one_counter(enum pmc_type type,
-			      unsigned int idx, u64 pebs_data_cfg)
+static void check_one_counter(enum pmc_type type, unsigned int idx,
+			      u64 pebs_data_cfg, bool use_adaptive)
 {
 	int pebs_bit = BIT_ULL(type == FIXED ? FIXED_CNT_INDEX + idx : idx);
 
 	report_prefix_pushf("%s counter %d (0x%lx)",
 			    type == FIXED ? "Extended Fixed" : "GP", idx, ctr_start_val);
 	reset_pebs();
-	pebs_enable(pebs_bit, pebs_data_cfg);
+	pebs_enable(pebs_bit, pebs_data_cfg, use_adaptive);
 	workload();
 	pebs_disable(idx);
-	check_pebs_records(pebs_bit, pebs_data_cfg);
+	check_pebs_records(pebs_bit, pebs_data_cfg, use_adaptive);
 	report_prefix_pop();
 }
 
 /* more than one PEBS records will be generated. */
-static void check_multiple_counters(u64 bitmask, u64 pebs_data_cfg)
+static void check_multiple_counters(u64 bitmask, u64 pebs_data_cfg,
+				    bool use_adaptive)
 {
 	reset_pebs();
-	pebs_enable(bitmask, pebs_data_cfg);
+	pebs_enable(bitmask, pebs_data_cfg, use_adaptive);
 	workload2();
 	pebs_disable(0);
-	check_pebs_records(bitmask, pebs_data_cfg);
+	check_pebs_records(bitmask, pebs_data_cfg, use_adaptive);
 }
 
-static void check_pebs_counters(u64 pebs_data_cfg)
+static void check_pebs_counters(u64 pebs_data_cfg, bool use_adaptive)
 {
 	unsigned int idx;
 	u64 bitmask = 0;
 
-	for (idx = 0; idx < pmu.nr_fixed_counters; idx++)
-		check_one_counter(FIXED, idx, pebs_data_cfg);
+	for (idx = 0; has_baseline && idx < pmu.nr_fixed_counters; idx++)
+		check_one_counter(FIXED, idx, pebs_data_cfg, use_adaptive);
 
 	for (idx = 0; idx < max_nr_gp_events; idx++)
-		check_one_counter(GP, idx, pebs_data_cfg);
+		check_one_counter(GP, idx, pebs_data_cfg, use_adaptive);
 
-	for (idx = 0; idx < pmu.nr_fixed_counters; idx++)
+	for (idx = 0; has_baseline && idx < pmu.nr_fixed_counters; idx++)
 		bitmask |= BIT_ULL(FIXED_CNT_INDEX + idx);
 	for (idx = 0; idx < max_nr_gp_events; idx += 2)
 		bitmask |= BIT_ULL(idx);
 	report_prefix_pushf("Multiple (0x%lx)", bitmask);
-	check_multiple_counters(bitmask, pebs_data_cfg);
+	check_multiple_counters(bitmask, pebs_data_cfg, use_adaptive);
 	report_prefix_pop();
 }
 
@@ -415,13 +420,22 @@
 
 	for (i = 0; i < ARRAY_SIZE(counter_start_values); i++) {
 		ctr_start_val = counter_start_values[i];
-		check_pebs_counters(0);
+		check_pebs_counters(0, false);
 		if (!has_baseline)
 			continue;
 
-		for (j = 0; j < ARRAY_SIZE(pebs_data_cfgs); j++) {
-			report_prefix_pushf("Adaptive (0x%lx)", pebs_data_cfgs[j]);
-			check_pebs_counters(pebs_data_cfgs[j]);
+		for (j = 0; j <= PEBS_DATACFG_MASK; j++) {
+			u64 pebs_data_cfg = j;
+
+			if (pebs_data_cfg & PEBS_DATACFG_LBRS)
+				pebs_data_cfg |= ((MAX_NUM_LBR_ENTRY -1) << PEBS_DATACFG_LBR_SHIFT);
+
+			report_prefix_pushf("Adaptive (0x%lx)", pebs_data_cfg);
+			check_pebs_counters(pebs_data_cfg, true);
+			report_prefix_pop();
+
+			report_prefix_pushf("Ignored Adaptive (0x%lx)", pebs_data_cfg);
+			check_pebs_counters(pebs_data_cfg, false);
 			report_prefix_pop();
 		}
 	}

diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index 867a8ea..7c1691a 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg

@@ -81,10 +81,13 @@
 extra_params = -append 'inl_from_pmtimer'
 groups = vmexit
 
+# To allow IPIs to be accelerated by SVM AVIC when the feature is available and
+# enabled, do not create a Programmable Interval Timer (PIT, a.k.a 8254), since
+# such device will disable/inhibit AVIC if exposed to the guest.
 [vmexit_ipi]
 file = vmexit.flat
 smp = 2
-extra_params = -append 'ipi'
+extra_params = -machine pit=off -append 'ipi'
 groups = vmexit
 
 [vmexit_ipi_halt]
@@ -154,7 +157,7 @@
 
 [asyncpf]
 file = asyncpf.flat
-extra_params = -m 2048
+extra_params = -cpu host -m 2048
 
 [emulator]
 file = emulator.flat
@@ -317,7 +320,7 @@
 
 [vmx]
 file = vmx.flat
-extra_params = -cpu max,+vmx -append "-exit_monitor_from_l2_test -ept_access* -vmx_smp* -vmx_vmcs_shadow_test -atomic_switch_overflow_msrs_test -vmx_init_signal_test -vmx_apic_passthrough_tpr_threshold_test -apic_reg_virt_test -virt_x2apic_mode_test -vmx_pf_exception_test -vmx_pf_exception_forced_emulation_test -vmx_pf_no_vpid_test -vmx_pf_invvpid_test -vmx_pf_vpid_test"
+extra_params = -cpu max,+vmx -append "-exit_monitor_from_l2_test -ept_access* -vmx_smp* -vmx_vmcs_shadow_test -atomic_switch_overflow_msrs_test -vmx_init_signal_test -vmx_apic_passthrough_tpr_threshold_test -apic_reg_virt_test -virt_x2apic_mode_test -vmx_pf_exception_test -vmx_pf_exception_forced_emulation_test -vmx_pf_no_vpid_test -vmx_pf_invvpid_test -vmx_pf_vpid_test -vmx_basic_vid_test -vmx_eoi_virt_test -vmx_posted_interrupts_test"
 arch = x86_64
 groups = vmx
 
@@ -343,7 +346,15 @@
 
 [vmx_apicv_test]
 file = vmx.flat
-extra_params = -cpu max,+vmx -append "apic_reg_virt_test virt_x2apic_mode_test"
+extra_params = -cpu max,+vmx -append "apic_reg_virt_test virt_x2apic_mode_test vmx_basic_vid_test vmx_eoi_virt_test"
+arch = x86_64
+groups = vmx
+timeout = 10
+
+[vmx_posted_intr_test]
+file = vmx.flat
+smp = 2
+extra_params = -cpu max,+vmx -append "vmx_posted_interrupts_test"
 arch = x86_64
 groups = vmx
 timeout = 10

diff --git a/x86/vmx.c b/x86/vmx.c
index 12e42b0..c803eaa 100644
--- a/x86/vmx.c
+++ b/x86/vmx.c

@@ -66,7 +66,7 @@
 static int guest_finished;
 static int in_guest;
 
-union vmx_basic basic;
+union vmx_basic_msr basic_msr;
 union vmx_ctrl_msr ctrl_pin_rev;
 union vmx_ctrl_msr ctrl_cpu_rev[2];
 union vmx_ctrl_msr ctrl_exit_rev;
@@ -369,7 +369,7 @@
 	struct vmcs *vmcs = alloc_page();
 	u32 vmcs_enum_max, max_index = 0;
 
-	vmcs->hdr.revision_id = basic.revision;
+	vmcs->hdr.revision_id = basic_msr.revision;
 	assert(!vmcs_clear(vmcs));
 	assert(!make_vmcs_current(vmcs));
 
@@ -430,7 +430,7 @@
 	void *vpage = alloc_vpage();
 
 	memset(vmcs, 0, PAGE_SIZE);
-	vmcs->hdr.revision_id = basic.revision;
+	vmcs->hdr.revision_id = basic_msr.revision;
 	assert(!vmcs_clear(vmcs));
 	assert(!make_vmcs_current(vmcs));
 
@@ -456,7 +456,7 @@
 {
 	struct vmcs *vmcs = alloc_page();
 
-	vmcs->hdr.revision_id = basic.revision;
+	vmcs->hdr.revision_id = basic_msr.revision;
 	assert(!vmcs_clear(vmcs));
 	assert(!make_vmcs_current(vmcs));
 
@@ -482,7 +482,7 @@
 
 	for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
 		vmcs[i] = alloc_page();
-		vmcs[i]->hdr.revision_id = basic.revision;
+		vmcs[i]->hdr.revision_id = basic_msr.revision;
 	}
 
 #define VMPTRLD(_i) do { \
@@ -731,13 +731,13 @@
 		vmcs[i] = alloc_page();
 	}
 
-	vmcs[0]->hdr.revision_id = basic.revision;
+	vmcs[0]->hdr.revision_id = basic_msr.revision;
 	assert(!vmcs_clear(vmcs[0]));
 	assert(!make_vmcs_current(vmcs[0]));
 	set_all_vmcs_fields(0x86);
 
 	assert(!vmcs_clear(vmcs[0]));
-	memcpy(vmcs[1], vmcs[0], basic.size);
+	memcpy(vmcs[1], vmcs[0], basic_msr.size);
 	assert(!make_vmcs_current(vmcs[1]));
 	report(check_all_vmcs_fields(0x86),
 	       "test vmclear flush (current VMCS)");
@@ -745,7 +745,7 @@
 	set_all_vmcs_fields(0x87);
 	assert(!make_vmcs_current(vmcs[0]));
 	assert(!vmcs_clear(vmcs[1]));
-	memcpy(vmcs[2], vmcs[1], basic.size);
+	memcpy(vmcs[2], vmcs[1], basic_msr.size);
 	assert(!make_vmcs_current(vmcs[2]));
 	report(check_all_vmcs_fields(0x87),
 	       "test vmclear flush (!current VMCS)");
@@ -1126,6 +1126,8 @@
 	vmcs_write(HOST_CR4, read_cr4());
 	vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
 	vmcs_write(HOST_SYSENTER_CS,  KERNEL_CS);
+	if (ctrl_exit_rev.clr & EXI_LOAD_PAT)
+		vmcs_write(HOST_PAT, rdmsr(MSR_IA32_CR_PAT));
 
 	/* 26.2.3 */
 	vmcs_write(HOST_SEL_CS, KERNEL_CS);
@@ -1232,7 +1234,7 @@
 int init_vmcs(struct vmcs **vmcs)
 {
 	*vmcs = alloc_page();
-	(*vmcs)->hdr.revision_id = basic.revision;
+	(*vmcs)->hdr.revision_id = basic_msr.revision;
 	/* vmclear first to init vmcs */
 	if (vmcs_clear(*vmcs)) {
 		printf("%s : vmcs_clear error\n", __func__);
@@ -1247,7 +1249,7 @@
 	/* All settings to pin/exit/enter/cpu
 	   control fields should be placed here */
 	ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
-	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
+	ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64 | EXI_LOAD_PAT;
 	ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
 	/* DIsable IO instruction VMEXIT now */
 	ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
@@ -1279,14 +1281,14 @@
 
 static void init_vmx_caps(void)
 {
-	basic.val = rdmsr(MSR_IA32_VMX_BASIC);
-	ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
+	basic_msr.val = rdmsr(MSR_IA32_VMX_BASIC);
+	ctrl_pin_rev.val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_PIN
 			: MSR_IA32_VMX_PINBASED_CTLS);
-	ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
+	ctrl_exit_rev.val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_EXIT
 			: MSR_IA32_VMX_EXIT_CTLS);
-	ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
+	ctrl_enter_rev.val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
 			: MSR_IA32_VMX_ENTRY_CTLS);
-	ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
+	ctrl_cpu_rev[0].val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_PROC
 			: MSR_IA32_VMX_PROCBASED_CTLS);
 	if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0)
 		ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
@@ -1311,7 +1313,7 @@
 	write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
 	write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
 
-	*vmxon_region = basic.revision;
+	*vmxon_region = basic_msr.revision;
 }
 
 static void alloc_bsp_vmx_pages(void)
@@ -1430,7 +1432,7 @@
 		 */
 		if ((cr_number == 0 && (bit == X86_CR0_PE || bit == X86_CR0_PG)) ||
 		    (cr_number == 4 && (bit == X86_CR4_PAE || bit == X86_CR4_SMAP ||
-					bit == X86_CR4_SMEP)))
+					bit == X86_CR4_SMEP || bit == X86_CR4_CET)))
 			continue;
 
 		if (!(bit & required1) && !(bit & disallowed1)) {
@@ -1515,7 +1517,7 @@
 	/* and finally a valid region, with valid-but-tweaked cr0/cr4 */
 	write_cr0(orig_cr0 ^ flexible_cr0);
 	write_cr4(orig_cr4 ^ flexible_cr4);
-	*bsp_vmxon_region = basic.revision;
+	*bsp_vmxon_region = basic_msr.revision;
 	ret = vmxon_safe();
 	report(!ret, "test vmxon with valid vmxon region");
 	write_cr0(orig_cr0);
@@ -1529,7 +1531,7 @@
 	int width = cpuid_maxphyaddr();
 
 	vmcs = alloc_page();
-	vmcs->hdr.revision_id = basic.revision;
+	vmcs->hdr.revision_id = basic_msr.revision;
 
 	/* Unaligned page access */
 	tmp_root = (struct vmcs *)((intptr_t)vmcs + 1);
@@ -1592,10 +1594,10 @@
 
 	printf("\nTest suite: VMX capability reporting\n");
 
-	report((basic.revision & (1ul << 31)) == 0 &&
-	       basic.size > 0 && basic.size <= 4096 &&
-	       (basic.type == 0 || basic.type == 6) &&
-	       basic.reserved1 == 0 && basic.reserved2 == 0,
+	report((basic_msr.revision & (1ul << 31)) == 0 &&
+	       basic_msr.size > 0 && basic_msr.size <= 4096 &&
+	       (basic_msr.type == 0 || basic_msr.type == 6) &&
+	       basic_msr.reserved1 == 0 && basic_msr.reserved2 == 0,
 	       "MSR_IA32_VMX_BASIC");
 
 	val = rdmsr(MSR_IA32_VMX_MISC);
@@ -1609,7 +1611,7 @@
 		default1 = vmx_ctl_msr[n].default1;
 		ok = (ctrl.set & default1) == default1;
 		ok = ok && (ctrl.set & ~ctrl.clr) == 0;
-		if (ok && basic.ctrl) {
+		if (ok && basic_msr.ctrl) {
 			true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index);
 			ok = ctrl.clr == true_ctrl.clr;
 			ok = ok && ctrl.set == (true_ctrl.set | default1);

diff --git a/x86/vmx.h b/x86/vmx.h
index bc61a58..9cd9048 100644
--- a/x86/vmx.h
+++ b/x86/vmx.h

@@ -130,7 +130,7 @@
 	void (*v2)(void);
 };
 
-union vmx_basic {
+union vmx_basic_msr {
 	u64 val;
 	struct {
 		u32 revision;
@@ -141,7 +141,8 @@
 			type:4,
 			insouts:1,
 			ctrl:1,
-			reserved2:8;
+			no_hw_errcode_cc:1,
+			reserved2:7;
 	};
 };
 
@@ -752,7 +753,7 @@
 
 extern struct regs regs;
 
-extern union vmx_basic basic;
+extern union vmx_basic_msr basic_msr;
 extern union vmx_ctrl_msr ctrl_pin_rev;
 extern union vmx_ctrl_msr ctrl_cpu_rev[2];
 extern union vmx_ctrl_msr ctrl_exit_rev;

diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
index 97b8e72..ffe7064 100644
--- a/x86/vmx_tests.c
+++ b/x86/vmx_tests.c

@@ -60,6 +60,16 @@
 	asm volatile("vmcall");
 }
 
+static u32 *get_vapic_page(void)
+{
+	return (u32 *)phys_to_virt(vmcs_read(APIC_VIRT_ADDR));
+}
+
+static u64 *get_pi_desc(void)
+{
+	return (u64 *)phys_to_virt(vmcs_read(POSTED_INTR_DESC_ADDR));
+}
+
 static void basic_guest_main(void)
 {
 	report_pass("Basic VMX test");
@@ -1039,11 +1049,14 @@
 		printf("\tEPT is not supported\n");
 		return 1;
 	}
-	if (!(ept_vpid.val & EPT_CAP_WB)) {
+	if (!is_ept_memtype_supported(EPT_MEM_TYPE_WB)) {
 		printf("\tWB memtype for EPT walks not supported\n");
 		return 1;
 	}
-	if (!(ept_vpid.val & EPT_CAP_PWL4)) {
+
+	if (!is_4_level_ept_supported()) {
+		/* Support for 4-level EPT is mandatory. */
+		report(false, "4-level EPT support check");
 		printf("\tPWL4 is not supported\n");
 		return 1;
 	}
@@ -3443,7 +3456,7 @@
 {
 	unsigned bit;
 
-	printf("%s: %lx\n", basic.ctrl ? "MSR_IA32_VMX_TRUE_PIN" :
+	printf("%s: %lx\n", basic_msr.ctrl ? "MSR_IA32_VMX_TRUE_PIN" :
 	       "MSR_IA32_VMX_PINBASED_CTLS", ctrl_pin_rev.val);
 	for (bit = 0; bit < 32; bit++)
 		test_rsvd_ctl_bit("pin-based controls",
@@ -3460,7 +3473,7 @@
 {
 	unsigned bit;
 
-	printf("\n%s: %lx\n", basic.ctrl ? "MSR_IA32_VMX_TRUE_PROC" :
+	printf("\n%s: %lx\n", basic_msr.ctrl ? "MSR_IA32_VMX_TRUE_PROC" :
 	       "MSR_IA32_VMX_PROCBASED_CTLS", ctrl_cpu_rev[0].val);
 	for (bit = 0; bit < 32; bit++)
 		test_rsvd_ctl_bit("primary processor-based controls",
@@ -4189,7 +4202,10 @@
 			    ent_intr_info);
 	vmcs_write(GUEST_CR0, guest_cr0_save & ~X86_CR0_PE & ~X86_CR0_PG);
 	vmcs_write(ENT_INTR_INFO, ent_intr_info);
-	test_vmx_invalid_controls();
+	if (basic_msr.no_hw_errcode_cc)
+		test_vmx_valid_controls();
+	else
+		test_vmx_invalid_controls();
 	report_prefix_pop();
 
 	ent_intr_info = ent_intr_info_base | INTR_INFO_DELIVER_CODE_MASK |
@@ -4222,7 +4238,10 @@
 			    ent_intr_info);
 	vmcs_write(GUEST_CR0, guest_cr0_save | X86_CR0_PE);
 	vmcs_write(ENT_INTR_INFO, ent_intr_info);
-	test_vmx_invalid_controls();
+	if (basic_msr.no_hw_errcode_cc)
+		test_vmx_valid_controls();
+	else
+		test_vmx_invalid_controls();
 	report_prefix_pop();
 
 	vmcs_write(CPU_EXEC_CTRL1, secondary_save);
@@ -4244,7 +4263,11 @@
 		report_prefix_pushf("VM-entry intr info=0x%x [-]",
 				    ent_intr_info);
 		vmcs_write(ENT_INTR_INFO, ent_intr_info);
-		test_vmx_invalid_controls();
+		if (exception_type_mask == INTR_TYPE_HARD_EXCEPTION &&
+		    basic_msr.no_hw_errcode_cc)
+			test_vmx_valid_controls();
+		else
+			test_vmx_invalid_controls();
 		report_prefix_pop();
 	}
 	report_prefix_pop();
@@ -4281,7 +4304,10 @@
 		report_prefix_pushf("VM-entry intr info=0x%x [-]",
 				    ent_intr_info);
 		vmcs_write(ENT_INTR_INFO, ent_intr_info);
-		test_vmx_invalid_controls();
+		if (basic_msr.no_hw_errcode_cc)
+			test_vmx_valid_controls();
+		else
+			test_vmx_invalid_controls();
 		report_prefix_pop();
 
 		/* Positive case */
@@ -4655,28 +4681,22 @@
 	u32 primary_saved = vmcs_read(CPU_EXEC_CTRL0);
 	u32 secondary_saved = vmcs_read(CPU_EXEC_CTRL1);
 	u64 eptp_saved = vmcs_read(EPTP);
-	u32 primary = primary_saved;
-	u32 secondary = secondary_saved;
-	u64 eptp = eptp_saved;
+	u32 secondary;
+	u64 eptp;
 	u32 i, maxphysaddr;
 	u64 j, resv_bits_mask = 0;
 
-	if (!((ctrl_cpu_rev[0].clr & CPU_SECONDARY) &&
-	    (ctrl_cpu_rev[1].clr & CPU_EPT))) {
-		report_skip("%s : \"CPU secondary\" and/or \"enable EPT\" exec control not supported", __func__);
+	if (__setup_ept(0xfed40000, false)) {
+		report_skip("%s : EPT not supported", __func__);
 		return;
 	}
 
-	/* Support for 4-level EPT is mandatory. */
-	report(is_4_level_ept_supported(), "4-level EPT support check");
+	test_vmx_valid_controls();
 
-	primary |= CPU_SECONDARY;
-	vmcs_write(CPU_EXEC_CTRL0, primary);
-	secondary |= CPU_EPT;
-	vmcs_write(CPU_EXEC_CTRL1, secondary);
-	eptp = (eptp & ~EPTP_PG_WALK_LEN_MASK) |
-	    (3ul << EPTP_PG_WALK_LEN_SHIFT);
-	vmcs_write(EPTP, eptp);
+	setup_dummy_ept();
+
+	secondary = vmcs_read(CPU_EXEC_CTRL1);
+	eptp = vmcs_read(EPTP);
 
 	for (i = 0; i < 8; i++) {
 		eptp = (eptp & ~EPT_MEM_TYPE_MASK) | i;
@@ -5303,7 +5323,7 @@
 		report_prefix_pop();
 	}
 
-	if (basic.val & (1ul << 48))
+	if (basic_msr.val & (1ul << 48))
 		addr_len = 32;
 
 	test_vmcs_addr_values("VM-entry-MSR-load address",
@@ -5431,7 +5451,7 @@
 		report_prefix_pop();
 	}
 
-	if (basic.val & (1ul << 48))
+	if (basic_msr.val & (1ul << 48))
 		addr_len = 32;
 
 	test_vmcs_addr_values("VM-exit-MSR-store address",
@@ -7213,6 +7233,7 @@
 static void test_pat(u32 field, const char * field_name, u32 ctrl_field,
 		     u64 ctrl_bit)
 {
+	u64 pat_msr_saved = rdmsr(MSR_IA32_CR_PAT);
 	u32 ctrl_saved = vmcs_read(ctrl_field);
 	u64 pat_saved = vmcs_read(field);
 	u64 i, val;
@@ -7232,7 +7253,7 @@
 				report_prefix_pop();
 
 			} else {	// GUEST_PAT
-				test_guest_state("ENT_LOAD_PAT enabled", false,
+				test_guest_state("ENT_LOAD_PAT disabled", false,
 						 val, "GUEST_PAT");
 			}
 		}
@@ -7254,12 +7275,22 @@
 					error = 0;
 
 				test_vmx_vmlaunch(error);
+
+				if (!error)
+					report(rdmsr(MSR_IA32_CR_PAT) == val,
+					       "Expected PAT = 0x%lx, got 0x%lx",
+						val, rdmsr(MSR_IA32_CR_PAT));
+				wrmsr(MSR_IA32_CR_PAT, pat_msr_saved);
+
 				report_prefix_pop();
 
 			} else {	// GUEST_PAT
 				error = (i == 0x2 || i == 0x3 || i >= 0x8);
 				test_guest_state("ENT_LOAD_PAT enabled", !!error,
 						 val, "GUEST_PAT");
+
+				if (!(ctrl_exit_rev.clr & EXI_LOAD_PAT))
+					wrmsr(MSR_IA32_CR_PAT, pat_msr_saved);
 			}
 
 		}
@@ -9305,6 +9336,7 @@
 
 	assert(cpu_has_apicv());
 
+	enable_x2apic();
 	disable_intercept_for_x2apic_msrs();
 
 	virtual_apic_page = alloc_page();
@@ -9321,6 +9353,18 @@
 	vmcs_set_bits(CPU_EXEC_CTRL1, CPU_VINTD | CPU_VIRT_X2APIC);
 }
 
+#define	PI_VECTOR	255
+
+static void enable_posted_interrupts(void)
+{
+	void *pi_desc = alloc_page();
+
+	vmcs_set_bits(PIN_CONTROLS, PIN_POST_INTR);
+	vmcs_set_bits(EXI_CONTROLS, EXI_INTA);
+	vmcs_write(PINV, PI_VECTOR);
+	vmcs_write(POSTED_INTR_DESC_ADDR, (u64)pi_desc);
+}
+
 static void trigger_ioapic_scan_thread(void *data)
 {
 	/* Wait until other CPU entered L2 */
@@ -10183,7 +10227,7 @@
 	vmcs_write(VMWRITE_BITMAP, virt_to_phys(bitmap[ACCESS_VMWRITE]));
 
 	shadow = alloc_page();
-	shadow->hdr.revision_id = basic.revision;
+	shadow->hdr.revision_id = basic_msr.revision;
 	shadow->hdr.shadow_vmcs = 1;
 	TEST_ASSERT(!vmcs_clear(shadow));
 
@@ -10710,6 +10754,402 @@
 	test_set_guest_finished();
 }
 
+enum Vid_op {
+	VID_OP_SET_ISR,
+	VID_OP_NOP,
+	VID_OP_SET_CR8,
+	VID_OP_SELF_IPI,
+	VID_OP_TERMINATE,
+	VID_OP_SPIN,
+	VID_OP_SPIN_IRR,
+	VID_OP_HLT,
+};
+
+struct vmx_basic_vid_test_guest_args {
+	enum Vid_op op;
+	u8 nr;
+	u32 isr_exec_cnt;
+	u32 *virtual_apic_page;
+	u64 *pi_desc;
+	u32 dest;
+	bool in_guest;
+} vmx_basic_vid_test_guest_args;
+
+/*
+ * From the SDM, Bit x of the VIRR is
+ *     at bit position (x & 1FH)
+ *     at offset (200H | ((x & E0H) >> 1)).
+ */
+static void set_virr_bit(volatile u32 *virtual_apic_page, u8 nr)
+{
+	u32 page_offset = (0x200 | ((nr & 0xE0) >> 1)) / sizeof(u32);
+	u32 mask = 1 << (nr & 0x1f);
+
+	virtual_apic_page[page_offset] |= mask;
+}
+
+static void clear_virr_bit(volatile u32 *virtual_apic_page, u8 nr)
+{
+	u32 page_offset = (0x200 | ((nr & 0xE0) >> 1)) / sizeof(u32);
+	u32 mask = 1 << (nr & 0x1f);
+
+	virtual_apic_page[page_offset] &= ~mask;
+}
+
+static bool get_virr_bit(volatile u32 *virtual_apic_page, u8 nr)
+{
+	u32 page_offset = (0x200 | ((nr & 0xE0) >> 1)) / sizeof(u32);
+	u32 mask = 1 << (nr & 0x1f);
+
+	return virtual_apic_page[page_offset] & mask;
+}
+
+static void vmx_vid_test_isr(isr_regs_t *regs)
+{
+	volatile struct vmx_basic_vid_test_guest_args *args =
+		&vmx_basic_vid_test_guest_args;
+
+	args->isr_exec_cnt++;
+	barrier();
+	eoi();
+}
+
+static void vmx_basic_vid_test_guest(void)
+{
+	volatile struct vmx_basic_vid_test_guest_args *args =
+		&vmx_basic_vid_test_guest_args;
+
+	sti_nop();
+	for (;;) {
+		enum Vid_op op = args->op;
+		u8 nr = args->nr;
+
+		switch (op) {
+		case VID_OP_TERMINATE:
+			return;
+		case VID_OP_SET_ISR:
+			handle_irq(nr, vmx_vid_test_isr);
+			break;
+		case VID_OP_SET_CR8:
+			write_cr8(nr);
+			break;
+		case VID_OP_SELF_IPI:
+			vmx_x2apic_write(APIC_SELF_IPI, nr);
+			break;
+		case VID_OP_HLT:
+			cli();
+			barrier();
+			args->in_guest = true;
+			barrier();
+			safe_halt();
+			break;
+		case VID_OP_SPIN:
+			args->in_guest = true;
+			while (!args->isr_exec_cnt)
+				pause();
+			break;
+		case VID_OP_SPIN_IRR: {
+			u32 *virtual_apic_page = args->virtual_apic_page;
+			u8 nr = args->nr;
+
+			args->in_guest = true;
+			while (!get_virr_bit(virtual_apic_page, nr))
+				pause();
+			clear_virr_bit(virtual_apic_page, nr);
+			break;
+		}
+		default:
+			break;
+		}
+
+		vmcall();
+	}
+}
+
+static void set_isrs_for_vmx_basic_vid_test(void)
+{
+	volatile struct vmx_basic_vid_test_guest_args *args =
+		&vmx_basic_vid_test_guest_args;
+	u16 nr;
+
+	/*
+	 * kvm-unit-tests uses vector 32 for IPIs, so don't install a test ISR
+	 * for that vector.
+	 */
+	for (nr = 0x21; nr < 0x100; nr++) {
+		vmcs_write(GUEST_INT_STATUS, 0);
+		args->virtual_apic_page = get_vapic_page();
+		args->op = VID_OP_SET_ISR;
+		args->nr = nr;
+		args->isr_exec_cnt = 0;
+		enter_guest();
+		skip_exit_vmcall();
+	}
+	report(true, "Set ISR for vectors 33-255.");
+}
+
+static void vmx_posted_interrupts_test_worker(void *data)
+{
+	volatile struct vmx_basic_vid_test_guest_args *args =
+		&vmx_basic_vid_test_guest_args;
+
+	while (!args->in_guest)
+		pause();
+
+	test_and_set_bit(args->nr, args->pi_desc);
+	test_and_set_bit(256, args->pi_desc);
+	apic_icr_write(PI_VECTOR, args->dest);
+}
+
+/*
+ * Test virtual interrupt delivery (VID) at VM-entry or TPR virtualization
+ *
+ * Args:
+ *   nr: vector under test
+ *   tpr: task priority under test
+ *   tpr_virt: If true, then test VID during TPR virtualization. Otherwise,
+ *       test VID during VM-entry.
+ */
+static void test_basic_vid(u8 nr, u8 tpr, enum Vid_op op, u32 isr_exec_cnt_want,
+			   bool eoi_exit_induced)
+{
+	volatile struct vmx_basic_vid_test_guest_args *args =
+		&vmx_basic_vid_test_guest_args;
+	u16 rvi_want = isr_exec_cnt_want ? 0 : nr;
+	u16 int_status;
+
+	/*
+	 * From the SDM:
+	 *     IF "interrupt-window exiting" is 0 AND
+	 *     RVI[7:4] > VPPR[7:4] (see Section 29.1.1 for definition of VPPR)
+	 *             THEN recognize a pending virtual interrupt;
+	 *         ELSE
+	 *             do not recognize a pending virtual interrupt;
+	 *     FI;
+	 *
+	 * Thus, VPPR dictates whether a virtual interrupt is recognized.
+	 * However, PPR virtualization, which occurs before virtual interrupt
+	 * delivery, sets VPPR to VTPR, when SVI is 0.
+	 */
+	args->isr_exec_cnt = 0;
+	args->virtual_apic_page = get_vapic_page();
+	args->op = op;
+	args->in_guest = false;
+	switch (op) {
+	case VID_OP_SELF_IPI:
+		vmcs_write(GUEST_INT_STATUS, 0);
+		args->nr = nr;
+		set_vtpr(0);
+		break;
+	case VID_OP_SET_CR8:
+		vmcs_write(GUEST_INT_STATUS, nr);
+		args->nr = task_priority_class(tpr);
+		set_vtpr(0xff);
+		break;
+	case VID_OP_SPIN:
+	case VID_OP_SPIN_IRR:
+	case VID_OP_HLT:
+		vmcs_write(GUEST_INT_STATUS, 0);
+		args->nr = nr;
+		set_vtpr(tpr);
+		barrier();
+		on_cpu_async(1, vmx_posted_interrupts_test_worker, NULL);
+		break;
+	default:
+		vmcs_write(GUEST_INT_STATUS, nr);
+		set_vtpr(tpr);
+		break;
+	}
+
+	enter_guest();
+	if (eoi_exit_induced) {
+		u32 exit_cnt;
+
+		assert_exit_reason(VMX_EOI_INDUCED);
+		for (exit_cnt = 1; exit_cnt < isr_exec_cnt_want; exit_cnt++) {
+			enter_guest();
+			assert_exit_reason(VMX_EOI_INDUCED);
+		}
+		enter_guest();
+	}
+	skip_exit_vmcall();
+	TEST_ASSERT_EQ(args->isr_exec_cnt, isr_exec_cnt_want);
+	int_status = vmcs_read(GUEST_INT_STATUS);
+	TEST_ASSERT_EQ(int_status, rvi_want);
+}
+
+/*
+ * Test recognizing and delivering virtual interrupts via "Virtual-interrupt
+ * delivery" for two scenarios:
+ *   1. When there is a pending interrupt at VM-entry.
+ *   2. When there is a pending interrupt during TPR virtualization.
+ */
+static void vmx_basic_vid_test(void)
+{
+	volatile struct vmx_basic_vid_test_guest_args *args =
+		&vmx_basic_vid_test_guest_args;
+	u8 nr_class;
+
+	if (!cpu_has_apicv()) {
+		report_skip("%s : Not all required APICv bits supported", __func__);
+		return;
+	}
+
+	enable_vid();
+	test_set_guest(vmx_basic_vid_test_guest);
+	set_isrs_for_vmx_basic_vid_test();
+
+	for (nr_class = 2; nr_class < 16; nr_class++) {
+		u16 nr;
+		u8 nr_sub_class;
+
+		for (nr_sub_class = 0; nr_sub_class < 16; nr_sub_class++) {
+			u16 tpr;
+
+			nr = (nr_class << 4) | nr_sub_class;
+
+			/*
+			 * Don't test the reserved IPI vector, as the test ISR
+			 * was not installed.
+			 */
+			if (nr == 0x20)
+				continue;
+
+			test_basic_vid(nr, /*tpr=*/0, VID_OP_SELF_IPI,
+				       /*isr_exec_cnt_want=*/1,
+				       /*eoi_exit_induced=*/false);
+			for (tpr = 0; tpr < 256; tpr++) {
+				u32 isr_exec_cnt_want =
+					task_priority_class(nr) >
+					task_priority_class(tpr) ? 1 : 0;
+
+				test_basic_vid(nr, tpr, VID_OP_NOP,
+					       isr_exec_cnt_want,
+					       /*eoi_exit_induced=*/false);
+				test_basic_vid(nr, tpr, VID_OP_SET_CR8,
+					       isr_exec_cnt_want,
+					       /*eoi_exit_induced=*/false);
+			}
+			report(true, "TPR 0-255 for vector 0x%x.", nr);
+		}
+	}
+
+	/* Terminate the guest */
+	args->op = VID_OP_TERMINATE;
+	enter_guest();
+	assert_exit_reason(VMX_VMCALL);
+}
+
+static void test_eoi_virt(u8 nr, u8 lo_pri_nr, bool eoi_exit_induced)
+{
+	u32 *virtual_apic_page = get_vapic_page();
+
+	set_virr_bit(virtual_apic_page, lo_pri_nr);
+	test_basic_vid(nr, /*tpr=*/0, VID_OP_NOP, /*isr_exec_cnt_want=*/2,
+		       eoi_exit_induced);
+	TEST_ASSERT(!get_virr_bit(virtual_apic_page, lo_pri_nr));
+	TEST_ASSERT(!get_virr_bit(virtual_apic_page, nr));
+}
+
+static void vmx_eoi_virt_test(void)
+{
+	volatile struct vmx_basic_vid_test_guest_args *args =
+		&vmx_basic_vid_test_guest_args;
+	u16 nr;
+	u16 lo_pri_nr;
+
+	if (!cpu_has_apicv()) {
+		report_skip("%s : Not all required APICv bits supported", __func__);
+		return;
+	}
+
+	enable_vid();  /* Note, enable_vid sets APIC_VIRT_ADDR field in VMCS. */
+	test_set_guest(vmx_basic_vid_test_guest);
+	set_isrs_for_vmx_basic_vid_test();
+
+	/* Now test EOI virtualization without induced EOI exits. */
+	for (nr = 0x22; nr < 0x100; nr++) {
+		for (lo_pri_nr = 0x21; lo_pri_nr < nr; lo_pri_nr++)
+			test_eoi_virt(nr, lo_pri_nr,
+				      /*eoi_exit_induced=*/false);
+
+		report(true, "Low priority nrs 0x21-0x%x for nr 0x%x.",
+		       nr - 1, nr);
+	}
+
+	/* Finally, test EOI virtualization with induced EOI exits. */
+	vmcs_write(EOI_EXIT_BITMAP0, GENMASK_ULL(63, 0));
+	vmcs_write(EOI_EXIT_BITMAP1, GENMASK_ULL(63, 0));
+	vmcs_write(EOI_EXIT_BITMAP2, GENMASK_ULL(63, 0));
+	vmcs_write(EOI_EXIT_BITMAP3, GENMASK_ULL(63, 0));
+	for (nr = 0x22; nr < 0x100; nr++) {
+		for (lo_pri_nr = 0x21; lo_pri_nr < nr; lo_pri_nr++)
+			test_eoi_virt(nr, lo_pri_nr,
+				      /*eoi_exit_induced=*/true);
+
+		report(true,
+		       "Low priority nrs 0x21-0x%x for nr 0x%x, with induced EOI exits.",
+		       nr - 1, nr);
+	}
+
+	/* Terminate the guest */
+	args->op = VID_OP_TERMINATE;
+	enter_guest();
+	assert_exit_reason(VMX_VMCALL);
+}
+
+static void vmx_posted_interrupts_test(void)
+{
+	volatile struct vmx_basic_vid_test_guest_args *args =
+		&vmx_basic_vid_test_guest_args;
+	u16 vector;
+	u8 class;
+
+	if (!cpu_has_apicv()) {
+		report_skip("%s : Not all required APICv bits supported", __func__);
+		return;
+	}
+
+	if (cpu_count() < 2) {
+		report_skip("%s : CPU count < 2", __func__);
+		return;
+	}
+
+	enable_vid();
+	enable_posted_interrupts();
+	args->pi_desc = get_pi_desc();
+	args->dest = apic_id();
+
+	test_set_guest(vmx_basic_vid_test_guest);
+	set_isrs_for_vmx_basic_vid_test();
+
+	for (class = 0; class < 16; class++) {
+		for (vector = 33; vector < 256; vector++) {
+			/*
+			 * If the vector isn't above TPR, then the vector should
+			 * be moved from PIR to the IRR, but never serviced.
+			 *
+			 * Only test posted interrupts to a halted vCPU if the
+			 * interrupt is expected to be serviced.  Otherwise, the
+			 * vCPU will HLT indefinitely.
+			 */
+			if (task_priority_class(vector) <= class) {
+				test_basic_vid(vector, class << 4,
+					       VID_OP_SPIN_IRR, 0, false);
+				continue;
+			}
+
+			test_basic_vid(vector, class << 4, VID_OP_SPIN, 1, false);
+			test_basic_vid(vector, class << 4, VID_OP_HLT, 1, false);
+		}
+	}
+	report(true, "Posted vectors 33-25 cross TPR classes 0-0xf, running and sometimes halted\n");
+
+	/* Terminate the guest */
+	args->op = VID_OP_TERMINATE;
+	enter_guest();
+}
+
 #define TEST(name) { #name, .v2 = name }
 
 /* name/init/guest_main/exit_handler/syscall_handler/guest_regs */
@@ -10764,6 +11204,9 @@
 	TEST(vmx_hlt_with_rvi_test),
 	TEST(apic_reg_virt_test),
 	TEST(virt_x2apic_mode_test),
+	TEST(vmx_basic_vid_test),
+	TEST(vmx_eoi_virt_test),
+	TEST(vmx_posted_interrupts_test),
 	/* APIC pass-through tests */
 	TEST(vmx_apic_passthrough_test),
 	TEST(vmx_apic_passthrough_thread_test),
commit	dcec966ff7423a29dad0e5ffdcf58e8a4095356f	[log] [tgz]
author	Paolo Bonzini <pbonzini@redhat.com>	Thu Jun 20 23:35:52 2024 +0200
committer	Paolo Bonzini <pbonzini@redhat.com>	Thu Jun 20 23:35:52 2024 +0200
tree	2734418795a09fb98501a9a00be42f7c28d0d67f
parent	98eb2a39f7602d655b06ed6d845ed2895e7b4a9f [diff]
parent	ee1d79c3f0f871bf78f20930cb1a2441f28ac027 [diff]