Merge tag 'kvm-x86-2024.06.14' of https://github.com/kvm-x86/kvm-unit-tests into HEAD
x86 fixes, cleanups, and new testcases:
- Add a testcase to verify that KVM doesn't inject a triple fault (or any
other "error") if a nested VM is run with an EP4TA pointing MMIO.
- Play nice with CR4.CET in test_vmxon_bad_cr()
- Force emulation when testing MSR_IA32_FLUSH_CMD to workaround an issue where
Skylake CPUs don't follow the architecturally defined behavior, and so that
the test doesn't break if/when new bits are supported by future CPUs.
- Rework the async #PF test to support IRQ-based page-ready notifications.
- Fix a variety of issues related to adaptive PEBS.
- Add several nested VMX tests for virtual interrupt delivery and posted
interrupts.
- Ensure PAT is loaded with the default value after the nVMX PAT tests
(failure to do so was causing tests to fail due to all memory being UC).
- Misc cleanups.
diff --git a/lib/x86/apic.h b/lib/x86/apic.h
index c389d40..8df889b 100644
--- a/lib/x86/apic.h
+++ b/lib/x86/apic.h
@@ -81,6 +81,11 @@
return GET_APIC_MAXLVT(apic_read(APIC_LVR)) >= idx;
}
+static inline u8 task_priority_class(u8 vector)
+{
+ return vector >> 4;
+}
+
enum x2apic_reg_semantics {
X2APIC_INVALID = 0,
X2APIC_READABLE = BIT(0),
diff --git a/lib/x86/asm/bitops.h b/lib/x86/asm/bitops.h
index 13a25ec..54ec9c4 100644
--- a/lib/x86/asm/bitops.h
+++ b/lib/x86/asm/bitops.h
@@ -13,4 +13,12 @@
#define HAVE_BUILTIN_FLS 1
+static inline void test_and_set_bit(long nr, unsigned long *addr)
+{
+ asm volatile("lock; bts %1,%0"
+ : "+m" (*addr)
+ : "Ir" (nr)
+ : "memory");
+}
+
#endif
diff --git a/lib/x86/desc.h b/lib/x86/desc.h
index 7778a0f..92c45a4 100644
--- a/lib/x86/desc.h
+++ b/lib/x86/desc.h
@@ -272,9 +272,9 @@
extern unsigned long get_gdt_entry_base(gdt_entry_t *entry);
extern unsigned long get_gdt_entry_limit(gdt_entry_t *entry);
-#define asm_safe(insn, inputs...) \
+#define __asm_safe(fep, insn, inputs...) \
({ \
- asm volatile(ASM_TRY("1f") \
+ asm volatile(__ASM_TRY(fep, "1f") \
insn "\n\t" \
"1:\n\t" \
: \
@@ -283,9 +283,15 @@
exception_vector(); \
})
-#define asm_safe_out1(insn, output, inputs...) \
+#define asm_safe(insn, inputs...) \
+ __asm_safe("", insn, inputs)
+
+#define asm_fep_safe(insn, output, inputs...) \
+ __asm_safe_out1(KVM_FEP, insn, output, inputs)
+
+#define __asm_safe_out1(fep, insn, output, inputs...) \
({ \
- asm volatile(ASM_TRY("1f") \
+ asm volatile(__ASM_TRY(fep, "1f") \
insn "\n\t" \
"1:\n\t" \
: output \
@@ -294,9 +300,15 @@
exception_vector(); \
})
-#define asm_safe_out2(insn, output1, output2, inputs...) \
+#define asm_safe_out1(insn, output, inputs...) \
+ __asm_safe_out1("", insn, output, inputs)
+
+#define asm_fep_safe_out1(insn, output, inputs...) \
+ __asm_safe_out1(KVM_FEP, insn, output, inputs)
+
+#define __asm_safe_out2(fep, insn, output1, output2, inputs...) \
({ \
- asm volatile(ASM_TRY("1f") \
+ asm volatile(__ASM_TRY(fep, "1f") \
insn "\n\t" \
"1:\n\t" \
: output1, output2 \
@@ -305,6 +317,12 @@
exception_vector(); \
})
+#define asm_safe_out2(fep, insn, output1, output2, inputs...) \
+ __asm_safe_out2("", insn, output1, output2, inputs)
+
+#define asm_fep_safe_out2(insn, output1, output2, inputs...) \
+ __asm_safe_out2(KVM_FEP, insn, output1, output2, inputs)
+
#define __asm_safe_report(want, insn, inputs...) \
do { \
int vector = asm_safe(insn, inputs); \
diff --git a/lib/x86/pmu.h b/lib/x86/pmu.h
index 8465e3c..f07fbd9 100644
--- a/lib/x86/pmu.h
+++ b/lib/x86/pmu.h
@@ -44,9 +44,13 @@
#define GLOBAL_STATUS_BUFFER_OVF BIT_ULL(GLOBAL_STATUS_BUFFER_OVF_BIT)
#define PEBS_DATACFG_MEMINFO BIT_ULL(0)
-#define PEBS_DATACFG_GP BIT_ULL(1)
+#define PEBS_DATACFG_GPRS BIT_ULL(1)
#define PEBS_DATACFG_XMMS BIT_ULL(2)
#define PEBS_DATACFG_LBRS BIT_ULL(3)
+#define PEBS_DATACFG_MASK (PEBS_DATACFG_MEMINFO | \
+ PEBS_DATACFG_GPRS | \
+ PEBS_DATACFG_XMMS | \
+ PEBS_DATACFG_LBRS)
#define ICL_EVENTSEL_ADAPTIVE (1ULL << 34)
#define PEBS_DATACFG_LBR_SHIFT 24
diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index 44f4fd1..da1ed66 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h
@@ -264,6 +264,12 @@
#define X86_FEATURE_PKS (CPUID(0x7, 0, ECX, 31))
/*
+ * KVM defined leafs
+ */
+#define KVM_FEATURE_ASYNC_PF (CPUID(0x40000001, 0, EAX, 4))
+#define KVM_FEATURE_ASYNC_PF_INT (CPUID(0x40000001, 0, EAX, 14))
+
+/*
* Extended Leafs, a.k.a. AMD defined
*/
#define X86_FEATURE_SVM (CPUID(0x80000001, 0, ECX, 2))
@@ -430,12 +436,12 @@
asm volatile ("wrmsr" : : "a"(a), "d"(d), "c"(index) : "memory");
}
-#define rdreg64_safe(insn, index, val) \
+#define __rdreg64_safe(fep, insn, index, val) \
({ \
uint32_t a, d; \
int vector; \
\
- vector = asm_safe_out2(insn, "=a"(a), "=d"(d), "c"(index)); \
+ vector = __asm_safe_out2(fep, insn, "=a"(a), "=d"(d), "c"(index));\
\
if (vector) \
*(val) = 0; \
@@ -444,13 +450,18 @@
vector; \
})
-#define wrreg64_safe(insn, index, val) \
+#define rdreg64_safe(insn, index, val) \
+ __rdreg64_safe("", insn, index, val)
+
+#define __wrreg64_safe(fep, insn, index, val) \
({ \
uint32_t eax = (val), edx = (val) >> 32; \
\
- asm_safe(insn, "a" (eax), "d" (edx), "c" (index)); \
+ __asm_safe(fep, insn, "a" (eax), "d" (edx), "c" (index)); \
})
+#define wrreg64_safe(insn, index, val) \
+ __wrreg64_safe("", insn, index, val)
static inline int rdmsr_safe(u32 index, uint64_t *val)
{
@@ -462,6 +473,11 @@
return wrreg64_safe("wrmsr", index, val);
}
+static inline int wrmsr_fep_safe(u32 index, u64 val)
+{
+ return __wrreg64_safe(KVM_FEP, "wrmsr", index, val);
+}
+
static inline int rdpmc_safe(u32 index, uint64_t *val)
{
return rdreg64_safe("rdpmc", index, val);
diff --git a/x86/asyncpf.c b/x86/asyncpf.c
index bc515be..9bf2056 100644
--- a/x86/asyncpf.c
+++ b/x86/asyncpf.c
@@ -1,8 +1,12 @@
/*
* Async PF test. For the test to actually do anything it needs to be started
- * in memory cgroup with 512M of memory and with more then 1G memory provided
+ * in memory cgroup with 512M of memory and with more than 1G memory provided
* to the guest.
*
+ * To identify the cgroup version on Linux:
+ * stat -fc %T /sys/fs/cgroup/
+ *
+ * If the output is tmpfs, your system is using cgroup v1:
* To create cgroup do as root:
* mkdir /dev/cgroup
* mount -t cgroup none -omemory /dev/cgroup
@@ -13,99 +17,135 @@
* echo $$ > /dev/cgroup/1/tasks
* echo 512M > /dev/cgroup/1/memory.limit_in_bytes
*
+ * If the output is cgroup2fs, your system is using cgroup v2:
+ * mkdir /sys/fs/cgroup/cg1
+ * echo $$ > /sys/fs/cgroup/cg1/cgroup.procs
+ * echo 512M > /sys/fs/cgroup/cg1/memory.max
+ *
*/
-#include "x86/msr.h"
#include "x86/processor.h"
-#include "x86/apic-defs.h"
#include "x86/apic.h"
-#include "x86/desc.h"
#include "x86/isr.h"
#include "x86/vm.h"
-
-#include "asm/page.h"
#include "alloc.h"
-#include "libcflat.h"
#include "vmalloc.h"
-#include <stdint.h>
#define KVM_PV_REASON_PAGE_NOT_PRESENT 1
-#define KVM_PV_REASON_PAGE_READY 2
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
+#define MSR_KVM_ASYNC_PF_INT 0x4b564d06
+#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07
#define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
+#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3)
-volatile uint32_t apf_reason __attribute__((aligned(64)));
+#define HYPERVISOR_CALLBACK_VECTOR 0xf3
+
+struct kvm_vcpu_pv_apf_data {
+ /* Used for 'page not present' events delivered via #PF */
+ uint32_t flags;
+
+ /* Used for 'page ready' events delivered via interrupt notification */
+ uint32_t token;
+
+ uint8_t pad[56];
+ uint32_t enabled;
+} apf_reason __attribute__((aligned(64)));
+
char *buf;
+void* virt;
volatile uint64_t i;
volatile uint64_t phys;
+volatile uint32_t saved_token;
+volatile uint32_t asyncpf_num;
-static inline uint32_t get_apf_reason(void)
+static inline uint32_t get_and_clear_apf_reason(void)
{
- uint32_t r = apf_reason;
- apf_reason = 0;
+ uint32_t r = apf_reason.flags;
+ apf_reason.flags = 0;
return r;
}
-static void pf_isr(struct ex_regs *r)
+static void handle_interrupt(isr_regs_t *regs)
{
- void* virt = (void*)((ulong)(buf+i) & ~(PAGE_SIZE-1));
- uint32_t reason = get_apf_reason();
+ uint32_t apf_token = apf_reason.token;
+ apf_reason.token = 0;
+ wrmsr(MSR_KVM_ASYNC_PF_ACK, 1);
+
+ if (apf_token == 0xffffffff) {
+ report_pass("Wakeup all, got token 0x%x", apf_token);
+ } else if (apf_token == saved_token) {
+ asyncpf_num++;
+ install_pte(phys_to_virt(read_cr3()), 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK, 0);
+ phys = 0;
+ } else {
+ report_fail("unexpected async pf int token 0x%x", apf_token);
+ }
+
+ eoi();
+}
+
+static void handle_pf(struct ex_regs *r)
+{
+ virt = (void*)((ulong)(buf+i) & ~(PAGE_SIZE-1));
+ uint32_t reason = get_and_clear_apf_reason();
switch (reason) {
- case 0:
- report_fail("unexpected #PF at %#lx", read_cr2());
- break;
- case KVM_PV_REASON_PAGE_NOT_PRESENT:
- phys = virt_to_pte_phys(phys_to_virt(read_cr3()), virt);
- install_pte(phys_to_virt(read_cr3()), 1, virt, phys, 0);
- write_cr3(read_cr3());
- report_pass("Got not present #PF token %lx virt addr %p phys addr %#" PRIx64,
- read_cr2(), virt, phys);
- while(phys) {
- safe_halt(); /* enables irq */
- cli();
- }
- break;
- case KVM_PV_REASON_PAGE_READY:
- report_pass("Got present #PF token %lx", read_cr2());
- if ((uint32_t)read_cr2() == ~0)
- break;
- install_pte(phys_to_virt(read_cr3()), 1, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK, 0);
- write_cr3(read_cr3());
- phys = 0;
- break;
- default:
- report_fail("unexpected async pf reason %" PRId32, reason);
- break;
+ case 0:
+ report_fail("unexpected #PF at %#lx", read_cr2());
+ exit(report_summary());
+ case KVM_PV_REASON_PAGE_NOT_PRESENT:
+ phys = virt_to_pte_phys(phys_to_virt(read_cr3()), virt);
+ install_pte(phys_to_virt(read_cr3()), 1, virt, phys, 0);
+ write_cr3(read_cr3());
+ saved_token = read_cr2();
+ while (phys) {
+ safe_halt(); /* enables irq */
+ }
+ break;
+ default:
+ report_fail("unexpected async pf with reason 0x%x", reason);
+ exit(report_summary());
}
}
-#define MEM 1ull*1024*1024*1024
+#define MEM (1ull*1024*1024*1024)
int main(int ac, char **av)
{
- int loop = 2;
+ if (!this_cpu_has(KVM_FEATURE_ASYNC_PF)) {
+ report_skip("KVM_FEATURE_ASYNC_PF is not supported\n");
+ return report_summary();
+ }
+
+ if (!this_cpu_has(KVM_FEATURE_ASYNC_PF_INT)) {
+ report_skip("KVM_FEATURE_ASYNC_PF_INT is not supported\n");
+ return report_summary();
+ }
setup_vm();
- printf("install handler\n");
- handle_exception(14, pf_isr);
- apf_reason = 0;
- printf("enable async pf\n");
+
+ handle_exception(PF_VECTOR, handle_pf);
+ handle_irq(HYPERVISOR_CALLBACK_VECTOR, handle_interrupt);
+ memset(&apf_reason, 0, sizeof(apf_reason));
+
+ wrmsr(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
wrmsr(MSR_KVM_ASYNC_PF_EN, virt_to_phys((void*)&apf_reason) |
- KVM_ASYNC_PF_SEND_ALWAYS | KVM_ASYNC_PF_ENABLED);
- printf("alloc memory\n");
+ KVM_ASYNC_PF_SEND_ALWAYS | KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT);
+
buf = malloc(MEM);
sti();
- while(loop--) {
- printf("start loop\n");
- /* access a lot of memory to make host swap it out */
- for (i=0; i < MEM; i+=4096)
- buf[i] = 1;
- printf("end loop\n");
- }
- cli();
+ /* access a lot of memory to make host swap it out */
+ for (i = 0; i < MEM; i += 4096)
+ buf[i] = 1;
+
+ cli();
+ if (!asyncpf_num)
+ report_skip("No async page fault events, cgroup configuration likely needed");
+ else
+ report_pass("Serviced %d async page faults events (!PRESENT #PF + READY IRQ)",
+ asyncpf_num);
return report_summary();
}
diff --git a/x86/msr.c b/x86/msr.c
index 3a041fa..e21ff0a 100644
--- a/x86/msr.c
+++ b/x86/msr.c
@@ -90,7 +90,7 @@
unsigned char vector = wrmsr_safe(msr, val);
report(!vector,
- "Expected success on WRSMR(%s, 0x%llx), got vector %d",
+ "Expected success on WRMSR(%s, 0x%llx), got vector %d",
name, val, vector);
}
@@ -99,7 +99,7 @@
unsigned char vector = wrmsr_safe(msr, val);
report(vector == GP_VECTOR,
- "Expected #GP on WRSMR(%s, 0x%llx), got vector %d",
+ "Expected #GP on WRMSR(%s, 0x%llx), got vector %d",
name, val, vector);
}
@@ -109,7 +109,17 @@
unsigned char vector = rdmsr_safe(msr, &ignored);
report(vector == GP_VECTOR,
- "Expected #GP on RDSMR(%s), got vector %d", name, vector);
+ "Expected #GP on RDMSR(%s), got vector %d", name, vector);
+}
+
+static void test_wrmsr_fep_fault(u32 msr, const char *name,
+ unsigned long long val)
+{
+ unsigned char vector = wrmsr_fep_safe(msr, val);
+
+ report(vector == GP_VECTOR,
+ "Expected #GP on emulated WRSMR(%s, 0x%llx), got vector %d",
+ name, val, vector);
}
static void test_msr(struct msr_info *msr, bool is_64bit_host)
@@ -302,8 +312,11 @@
test_wrmsr_fault(MSR_IA32_FLUSH_CMD, "FLUSH_CMD", 0);
test_wrmsr_fault(MSR_IA32_FLUSH_CMD, "FLUSH_CMD", L1D_FLUSH);
}
- for (i = 1; i < 64; i++)
- test_wrmsr_fault(MSR_IA32_FLUSH_CMD, "FLUSH_CMD", BIT_ULL(i));
+
+ if (is_fep_available()) {
+ for (i = 1; i < 64; i++)
+ test_wrmsr_fep_fault(MSR_IA32_FLUSH_CMD, "FLUSH_CMD", BIT_ULL(i));
+ }
}
int main(int ac, char **av)
diff --git a/x86/pmu.c b/x86/pmu.c
index 7062c1a..ce9abbe 100644
--- a/x86/pmu.c
+++ b/x86/pmu.c
@@ -69,6 +69,7 @@
static void cnt_overflow(isr_regs_t *regs)
{
irq_received++;
+ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
apic_write(APIC_EOI, 0);
}
diff --git a/x86/pmu_pebs.c b/x86/pmu_pebs.c
index f7b52b9..77875c4 100644
--- a/x86/pmu_pebs.c
+++ b/x86/pmu_pebs.c
@@ -78,13 +78,6 @@
0x412e, /* PERF_COUNT_HW_CACHE_MISSES */
};
-static u64 pebs_data_cfgs[] = {
- PEBS_DATACFG_MEMINFO,
- PEBS_DATACFG_GP,
- PEBS_DATACFG_XMMS,
- PEBS_DATACFG_LBRS | ((MAX_NUM_LBR_ENTRY -1) << PEBS_DATACFG_LBR_SHIFT),
-};
-
/* Iterating each counter value is a waste of time, pick a few typical values. */
static u64 counter_start_values[] = {
/* if PEBS counter doesn't overflow at all */
@@ -96,16 +89,16 @@
0xffffffffffff,
};
-static unsigned int get_adaptive_pebs_record_size(u64 pebs_data_cfg)
+static unsigned int get_pebs_record_size(u64 pebs_data_cfg, bool use_adaptive)
{
unsigned int sz = sizeof(struct pebs_basic);
- if (!has_baseline)
+ if (!use_adaptive)
return sz;
if (pebs_data_cfg & PEBS_DATACFG_MEMINFO)
sz += sizeof(struct pebs_meminfo);
- if (pebs_data_cfg & PEBS_DATACFG_GP)
+ if (pebs_data_cfg & PEBS_DATACFG_GPRS)
sz += sizeof(struct pebs_gprs);
if (pebs_data_cfg & PEBS_DATACFG_XMMS)
sz += sizeof(struct pebs_xmm);
@@ -117,6 +110,7 @@
static void cnt_overflow(isr_regs_t *regs)
{
+ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED);
apic_write(APIC_EOI, 0);
}
@@ -206,10 +200,10 @@
free_page(pebs_buffer);
}
-static void pebs_enable(u64 bitmask, u64 pebs_data_cfg)
+static void pebs_enable(u64 bitmask, u64 pebs_data_cfg, bool use_adaptive)
{
static struct debug_store *ds;
- u64 baseline_extra_ctrl = 0, fixed_ctr_ctrl = 0;
+ u64 adaptive_ctrl = 0, fixed_ctr_ctrl = 0;
unsigned int idx;
if (has_baseline)
@@ -219,15 +213,15 @@
ds->pebs_index = ds->pebs_buffer_base = (unsigned long)pebs_buffer;
ds->pebs_absolute_maximum = (unsigned long)pebs_buffer + PAGE_SIZE;
ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
- get_adaptive_pebs_record_size(pebs_data_cfg);
+ get_pebs_record_size(pebs_data_cfg, use_adaptive);
for (idx = 0; idx < pmu.nr_fixed_counters; idx++) {
if (!(BIT_ULL(FIXED_CNT_INDEX + idx) & bitmask))
continue;
- if (has_baseline)
- baseline_extra_ctrl = BIT(FIXED_CNT_INDEX + idx * 4);
+ if (use_adaptive)
+ adaptive_ctrl = BIT(FIXED_CNT_INDEX + idx * 4);
wrmsr(MSR_PERF_FIXED_CTRx(idx), ctr_start_val);
- fixed_ctr_ctrl |= (0xbULL << (idx * 4) | baseline_extra_ctrl);
+ fixed_ctr_ctrl |= (0xbULL << (idx * 4) | adaptive_ctrl);
}
if (fixed_ctr_ctrl)
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, fixed_ctr_ctrl);
@@ -235,10 +229,10 @@
for (idx = 0; idx < max_nr_gp_events; idx++) {
if (!(BIT_ULL(idx) & bitmask))
continue;
- if (has_baseline)
- baseline_extra_ctrl = ICL_EVENTSEL_ADAPTIVE;
+ if (use_adaptive)
+ adaptive_ctrl = ICL_EVENTSEL_ADAPTIVE;
wrmsr(MSR_GP_EVENT_SELECTx(idx), EVNTSEL_EN | EVNTSEL_OS | EVNTSEL_USR |
- intel_arch_events[idx] | baseline_extra_ctrl);
+ intel_arch_events[idx] | adaptive_ctrl);
wrmsr(MSR_GP_COUNTERx(idx), ctr_start_val);
}
@@ -275,11 +269,11 @@
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
}
-static void check_pebs_records(u64 bitmask, u64 pebs_data_cfg)
+static void check_pebs_records(u64 bitmask, u64 pebs_data_cfg, bool use_adaptive)
{
struct pebs_basic *pebs_rec = (struct pebs_basic *)pebs_buffer;
struct debug_store *ds = (struct debug_store *)ds_bufer;
- unsigned int pebs_record_size = get_adaptive_pebs_record_size(pebs_data_cfg);
+ unsigned int pebs_record_size;
unsigned int count = 0;
bool expected, pebs_idx_match, pebs_size_match, data_cfg_match;
void *cur_record;
@@ -300,15 +294,25 @@
do {
pebs_rec = (struct pebs_basic *)cur_record;
pebs_record_size = pebs_rec->format_size >> RECORD_SIZE_OFFSET;
- pebs_idx_match =
- pebs_rec->applicable_counters & bitmask;
- pebs_size_match =
- pebs_record_size == get_adaptive_pebs_record_size(pebs_data_cfg);
- data_cfg_match =
- (pebs_rec->format_size & GENMASK_ULL(47, 0)) == pebs_data_cfg;
+ pebs_idx_match = pebs_rec->applicable_counters & bitmask;
+ pebs_size_match = pebs_record_size == get_pebs_record_size(pebs_data_cfg, use_adaptive);
+ data_cfg_match = (pebs_rec->format_size & GENMASK_ULL(47, 0)) == pebs_data_cfg;
expected = pebs_idx_match && pebs_size_match && data_cfg_match;
report(expected,
"PEBS record (written seq %d) is verified (including size, counters and cfg).", count);
+ if (use_adaptive && (pebs_data_cfg & PEBS_DATACFG_LBRS)) {
+ unsigned int lbrs_offset = get_pebs_record_size(pebs_data_cfg & ~PEBS_DATACFG_LBRS, true);
+ struct lbr_entry *pebs_lbrs = cur_record + lbrs_offset;
+ int i;
+
+ for (i = 0; i < MAX_NUM_LBR_ENTRY; i++) {
+ if (!pebs_lbrs[i].from && !pebs_lbrs[i].to)
+ continue;
+
+ report_fail("PEBS LBR record %u isn't empty, got from = '%lx', to = '%lx', info = '%lx'",
+ i, pebs_lbrs[i].from, pebs_lbrs[i].to, pebs_lbrs[i].info);
+ }
+ }
cur_record = cur_record + pebs_record_size;
count++;
} while (expected && (void *)cur_record < (void *)ds->pebs_index);
@@ -318,56 +322,57 @@
printf("FAIL: The applicable_counters (0x%lx) doesn't match with pmc_bitmask (0x%lx).\n",
pebs_rec->applicable_counters, bitmask);
if (!pebs_size_match)
- printf("FAIL: The pebs_record_size (%d) doesn't match with MSR_PEBS_DATA_CFG (%d).\n",
- pebs_record_size, get_adaptive_pebs_record_size(pebs_data_cfg));
+ printf("FAIL: The pebs_record_size (%d) doesn't match with expected record size (%d).\n",
+ pebs_record_size, get_pebs_record_size(pebs_data_cfg, use_adaptive));
if (!data_cfg_match)
- printf("FAIL: The pebs_data_cfg (0x%lx) doesn't match with MSR_PEBS_DATA_CFG (0x%lx).\n",
- pebs_rec->format_size & 0xffffffffffff, pebs_data_cfg);
+ printf("FAIL: The pebs_data_cfg (0x%lx) doesn't match with the effective MSR_PEBS_DATA_CFG (0x%lx).\n",
+ pebs_rec->format_size & 0xffffffffffff, use_adaptive ? pebs_data_cfg : 0);
}
}
-static void check_one_counter(enum pmc_type type,
- unsigned int idx, u64 pebs_data_cfg)
+static void check_one_counter(enum pmc_type type, unsigned int idx,
+ u64 pebs_data_cfg, bool use_adaptive)
{
int pebs_bit = BIT_ULL(type == FIXED ? FIXED_CNT_INDEX + idx : idx);
report_prefix_pushf("%s counter %d (0x%lx)",
type == FIXED ? "Extended Fixed" : "GP", idx, ctr_start_val);
reset_pebs();
- pebs_enable(pebs_bit, pebs_data_cfg);
+ pebs_enable(pebs_bit, pebs_data_cfg, use_adaptive);
workload();
pebs_disable(idx);
- check_pebs_records(pebs_bit, pebs_data_cfg);
+ check_pebs_records(pebs_bit, pebs_data_cfg, use_adaptive);
report_prefix_pop();
}
/* more than one PEBS records will be generated. */
-static void check_multiple_counters(u64 bitmask, u64 pebs_data_cfg)
+static void check_multiple_counters(u64 bitmask, u64 pebs_data_cfg,
+ bool use_adaptive)
{
reset_pebs();
- pebs_enable(bitmask, pebs_data_cfg);
+ pebs_enable(bitmask, pebs_data_cfg, use_adaptive);
workload2();
pebs_disable(0);
- check_pebs_records(bitmask, pebs_data_cfg);
+ check_pebs_records(bitmask, pebs_data_cfg, use_adaptive);
}
-static void check_pebs_counters(u64 pebs_data_cfg)
+static void check_pebs_counters(u64 pebs_data_cfg, bool use_adaptive)
{
unsigned int idx;
u64 bitmask = 0;
- for (idx = 0; idx < pmu.nr_fixed_counters; idx++)
- check_one_counter(FIXED, idx, pebs_data_cfg);
+ for (idx = 0; has_baseline && idx < pmu.nr_fixed_counters; idx++)
+ check_one_counter(FIXED, idx, pebs_data_cfg, use_adaptive);
for (idx = 0; idx < max_nr_gp_events; idx++)
- check_one_counter(GP, idx, pebs_data_cfg);
+ check_one_counter(GP, idx, pebs_data_cfg, use_adaptive);
- for (idx = 0; idx < pmu.nr_fixed_counters; idx++)
+ for (idx = 0; has_baseline && idx < pmu.nr_fixed_counters; idx++)
bitmask |= BIT_ULL(FIXED_CNT_INDEX + idx);
for (idx = 0; idx < max_nr_gp_events; idx += 2)
bitmask |= BIT_ULL(idx);
report_prefix_pushf("Multiple (0x%lx)", bitmask);
- check_multiple_counters(bitmask, pebs_data_cfg);
+ check_multiple_counters(bitmask, pebs_data_cfg, use_adaptive);
report_prefix_pop();
}
@@ -415,13 +420,22 @@
for (i = 0; i < ARRAY_SIZE(counter_start_values); i++) {
ctr_start_val = counter_start_values[i];
- check_pebs_counters(0);
+ check_pebs_counters(0, false);
if (!has_baseline)
continue;
- for (j = 0; j < ARRAY_SIZE(pebs_data_cfgs); j++) {
- report_prefix_pushf("Adaptive (0x%lx)", pebs_data_cfgs[j]);
- check_pebs_counters(pebs_data_cfgs[j]);
+ for (j = 0; j <= PEBS_DATACFG_MASK; j++) {
+ u64 pebs_data_cfg = j;
+
+ if (pebs_data_cfg & PEBS_DATACFG_LBRS)
+ pebs_data_cfg |= ((MAX_NUM_LBR_ENTRY -1) << PEBS_DATACFG_LBR_SHIFT);
+
+ report_prefix_pushf("Adaptive (0x%lx)", pebs_data_cfg);
+ check_pebs_counters(pebs_data_cfg, true);
+ report_prefix_pop();
+
+ report_prefix_pushf("Ignored Adaptive (0x%lx)", pebs_data_cfg);
+ check_pebs_counters(pebs_data_cfg, false);
report_prefix_pop();
}
}
diff --git a/x86/unittests.cfg b/x86/unittests.cfg
index 867a8ea..7c1691a 100644
--- a/x86/unittests.cfg
+++ b/x86/unittests.cfg
@@ -81,10 +81,13 @@
extra_params = -append 'inl_from_pmtimer'
groups = vmexit
+# To allow IPIs to be accelerated by SVM AVIC when the feature is available and
+# enabled, do not create a Programmable Interval Timer (PIT, a.k.a 8254), since
+# such device will disable/inhibit AVIC if exposed to the guest.
[vmexit_ipi]
file = vmexit.flat
smp = 2
-extra_params = -append 'ipi'
+extra_params = -machine pit=off -append 'ipi'
groups = vmexit
[vmexit_ipi_halt]
@@ -154,7 +157,7 @@
[asyncpf]
file = asyncpf.flat
-extra_params = -m 2048
+extra_params = -cpu host -m 2048
[emulator]
file = emulator.flat
@@ -317,7 +320,7 @@
[vmx]
file = vmx.flat
-extra_params = -cpu max,+vmx -append "-exit_monitor_from_l2_test -ept_access* -vmx_smp* -vmx_vmcs_shadow_test -atomic_switch_overflow_msrs_test -vmx_init_signal_test -vmx_apic_passthrough_tpr_threshold_test -apic_reg_virt_test -virt_x2apic_mode_test -vmx_pf_exception_test -vmx_pf_exception_forced_emulation_test -vmx_pf_no_vpid_test -vmx_pf_invvpid_test -vmx_pf_vpid_test"
+extra_params = -cpu max,+vmx -append "-exit_monitor_from_l2_test -ept_access* -vmx_smp* -vmx_vmcs_shadow_test -atomic_switch_overflow_msrs_test -vmx_init_signal_test -vmx_apic_passthrough_tpr_threshold_test -apic_reg_virt_test -virt_x2apic_mode_test -vmx_pf_exception_test -vmx_pf_exception_forced_emulation_test -vmx_pf_no_vpid_test -vmx_pf_invvpid_test -vmx_pf_vpid_test -vmx_basic_vid_test -vmx_eoi_virt_test -vmx_posted_interrupts_test"
arch = x86_64
groups = vmx
@@ -343,7 +346,15 @@
[vmx_apicv_test]
file = vmx.flat
-extra_params = -cpu max,+vmx -append "apic_reg_virt_test virt_x2apic_mode_test"
+extra_params = -cpu max,+vmx -append "apic_reg_virt_test virt_x2apic_mode_test vmx_basic_vid_test vmx_eoi_virt_test"
+arch = x86_64
+groups = vmx
+timeout = 10
+
+[vmx_posted_intr_test]
+file = vmx.flat
+smp = 2
+extra_params = -cpu max,+vmx -append "vmx_posted_interrupts_test"
arch = x86_64
groups = vmx
timeout = 10
diff --git a/x86/vmx.c b/x86/vmx.c
index 12e42b0..c803eaa 100644
--- a/x86/vmx.c
+++ b/x86/vmx.c
@@ -66,7 +66,7 @@
static int guest_finished;
static int in_guest;
-union vmx_basic basic;
+union vmx_basic_msr basic_msr;
union vmx_ctrl_msr ctrl_pin_rev;
union vmx_ctrl_msr ctrl_cpu_rev[2];
union vmx_ctrl_msr ctrl_exit_rev;
@@ -369,7 +369,7 @@
struct vmcs *vmcs = alloc_page();
u32 vmcs_enum_max, max_index = 0;
- vmcs->hdr.revision_id = basic.revision;
+ vmcs->hdr.revision_id = basic_msr.revision;
assert(!vmcs_clear(vmcs));
assert(!make_vmcs_current(vmcs));
@@ -430,7 +430,7 @@
void *vpage = alloc_vpage();
memset(vmcs, 0, PAGE_SIZE);
- vmcs->hdr.revision_id = basic.revision;
+ vmcs->hdr.revision_id = basic_msr.revision;
assert(!vmcs_clear(vmcs));
assert(!make_vmcs_current(vmcs));
@@ -456,7 +456,7 @@
{
struct vmcs *vmcs = alloc_page();
- vmcs->hdr.revision_id = basic.revision;
+ vmcs->hdr.revision_id = basic_msr.revision;
assert(!vmcs_clear(vmcs));
assert(!make_vmcs_current(vmcs));
@@ -482,7 +482,7 @@
for (i = 0; i < ARRAY_SIZE(vmcs); i++) {
vmcs[i] = alloc_page();
- vmcs[i]->hdr.revision_id = basic.revision;
+ vmcs[i]->hdr.revision_id = basic_msr.revision;
}
#define VMPTRLD(_i) do { \
@@ -731,13 +731,13 @@
vmcs[i] = alloc_page();
}
- vmcs[0]->hdr.revision_id = basic.revision;
+ vmcs[0]->hdr.revision_id = basic_msr.revision;
assert(!vmcs_clear(vmcs[0]));
assert(!make_vmcs_current(vmcs[0]));
set_all_vmcs_fields(0x86);
assert(!vmcs_clear(vmcs[0]));
- memcpy(vmcs[1], vmcs[0], basic.size);
+ memcpy(vmcs[1], vmcs[0], basic_msr.size);
assert(!make_vmcs_current(vmcs[1]));
report(check_all_vmcs_fields(0x86),
"test vmclear flush (current VMCS)");
@@ -745,7 +745,7 @@
set_all_vmcs_fields(0x87);
assert(!make_vmcs_current(vmcs[0]));
assert(!vmcs_clear(vmcs[1]));
- memcpy(vmcs[2], vmcs[1], basic.size);
+ memcpy(vmcs[2], vmcs[1], basic_msr.size);
assert(!make_vmcs_current(vmcs[2]));
report(check_all_vmcs_fields(0x87),
"test vmclear flush (!current VMCS)");
@@ -1126,6 +1126,8 @@
vmcs_write(HOST_CR4, read_cr4());
vmcs_write(HOST_SYSENTER_EIP, (u64)(&entry_sysenter));
vmcs_write(HOST_SYSENTER_CS, KERNEL_CS);
+ if (ctrl_exit_rev.clr & EXI_LOAD_PAT)
+ vmcs_write(HOST_PAT, rdmsr(MSR_IA32_CR_PAT));
/* 26.2.3 */
vmcs_write(HOST_SEL_CS, KERNEL_CS);
@@ -1232,7 +1234,7 @@
int init_vmcs(struct vmcs **vmcs)
{
*vmcs = alloc_page();
- (*vmcs)->hdr.revision_id = basic.revision;
+ (*vmcs)->hdr.revision_id = basic_msr.revision;
/* vmclear first to init vmcs */
if (vmcs_clear(*vmcs)) {
printf("%s : vmcs_clear error\n", __func__);
@@ -1247,7 +1249,7 @@
/* All settings to pin/exit/enter/cpu
control fields should be placed here */
ctrl_pin |= PIN_EXTINT | PIN_NMI | PIN_VIRT_NMI;
- ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64;
+ ctrl_exit = EXI_LOAD_EFER | EXI_HOST_64 | EXI_LOAD_PAT;
ctrl_enter = (ENT_LOAD_EFER | ENT_GUEST_64);
/* DIsable IO instruction VMEXIT now */
ctrl_cpu[0] &= (~(CPU_IO | CPU_IO_BITMAP));
@@ -1279,14 +1281,14 @@
static void init_vmx_caps(void)
{
- basic.val = rdmsr(MSR_IA32_VMX_BASIC);
- ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PIN
+ basic_msr.val = rdmsr(MSR_IA32_VMX_BASIC);
+ ctrl_pin_rev.val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_PIN
: MSR_IA32_VMX_PINBASED_CTLS);
- ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT
+ ctrl_exit_rev.val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_EXIT
: MSR_IA32_VMX_EXIT_CTLS);
- ctrl_enter_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
+ ctrl_enter_rev.val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_ENTRY
: MSR_IA32_VMX_ENTRY_CTLS);
- ctrl_cpu_rev[0].val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PROC
+ ctrl_cpu_rev[0].val = rdmsr(basic_msr.ctrl ? MSR_IA32_VMX_TRUE_PROC
: MSR_IA32_VMX_PROCBASED_CTLS);
if ((ctrl_cpu_rev[0].clr & CPU_SECONDARY) != 0)
ctrl_cpu_rev[1].val = rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2);
@@ -1311,7 +1313,7 @@
write_cr0((read_cr0() & fix_cr0_clr) | fix_cr0_set);
write_cr4((read_cr4() & fix_cr4_clr) | fix_cr4_set | X86_CR4_VMXE);
- *vmxon_region = basic.revision;
+ *vmxon_region = basic_msr.revision;
}
static void alloc_bsp_vmx_pages(void)
@@ -1430,7 +1432,7 @@
*/
if ((cr_number == 0 && (bit == X86_CR0_PE || bit == X86_CR0_PG)) ||
(cr_number == 4 && (bit == X86_CR4_PAE || bit == X86_CR4_SMAP ||
- bit == X86_CR4_SMEP)))
+ bit == X86_CR4_SMEP || bit == X86_CR4_CET)))
continue;
if (!(bit & required1) && !(bit & disallowed1)) {
@@ -1515,7 +1517,7 @@
/* and finally a valid region, with valid-but-tweaked cr0/cr4 */
write_cr0(orig_cr0 ^ flexible_cr0);
write_cr4(orig_cr4 ^ flexible_cr4);
- *bsp_vmxon_region = basic.revision;
+ *bsp_vmxon_region = basic_msr.revision;
ret = vmxon_safe();
report(!ret, "test vmxon with valid vmxon region");
write_cr0(orig_cr0);
@@ -1529,7 +1531,7 @@
int width = cpuid_maxphyaddr();
vmcs = alloc_page();
- vmcs->hdr.revision_id = basic.revision;
+ vmcs->hdr.revision_id = basic_msr.revision;
/* Unaligned page access */
tmp_root = (struct vmcs *)((intptr_t)vmcs + 1);
@@ -1592,10 +1594,10 @@
printf("\nTest suite: VMX capability reporting\n");
- report((basic.revision & (1ul << 31)) == 0 &&
- basic.size > 0 && basic.size <= 4096 &&
- (basic.type == 0 || basic.type == 6) &&
- basic.reserved1 == 0 && basic.reserved2 == 0,
+ report((basic_msr.revision & (1ul << 31)) == 0 &&
+ basic_msr.size > 0 && basic_msr.size <= 4096 &&
+ (basic_msr.type == 0 || basic_msr.type == 6) &&
+ basic_msr.reserved1 == 0 && basic_msr.reserved2 == 0,
"MSR_IA32_VMX_BASIC");
val = rdmsr(MSR_IA32_VMX_MISC);
@@ -1609,7 +1611,7 @@
default1 = vmx_ctl_msr[n].default1;
ok = (ctrl.set & default1) == default1;
ok = ok && (ctrl.set & ~ctrl.clr) == 0;
- if (ok && basic.ctrl) {
+ if (ok && basic_msr.ctrl) {
true_ctrl.val = rdmsr(vmx_ctl_msr[n].true_index);
ok = ctrl.clr == true_ctrl.clr;
ok = ok && ctrl.set == (true_ctrl.set | default1);
diff --git a/x86/vmx.h b/x86/vmx.h
index bc61a58..9cd9048 100644
--- a/x86/vmx.h
+++ b/x86/vmx.h
@@ -130,7 +130,7 @@
void (*v2)(void);
};
-union vmx_basic {
+union vmx_basic_msr {
u64 val;
struct {
u32 revision;
@@ -141,7 +141,8 @@
type:4,
insouts:1,
ctrl:1,
- reserved2:8;
+ no_hw_errcode_cc:1,
+ reserved2:7;
};
};
@@ -752,7 +753,7 @@
extern struct regs regs;
-extern union vmx_basic basic;
+extern union vmx_basic_msr basic_msr;
extern union vmx_ctrl_msr ctrl_pin_rev;
extern union vmx_ctrl_msr ctrl_cpu_rev[2];
extern union vmx_ctrl_msr ctrl_exit_rev;
diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
index 97b8e72..ffe7064 100644
--- a/x86/vmx_tests.c
+++ b/x86/vmx_tests.c
@@ -60,6 +60,16 @@
asm volatile("vmcall");
}
+static u32 *get_vapic_page(void)
+{
+ return (u32 *)phys_to_virt(vmcs_read(APIC_VIRT_ADDR));
+}
+
+static u64 *get_pi_desc(void)
+{
+ return (u64 *)phys_to_virt(vmcs_read(POSTED_INTR_DESC_ADDR));
+}
+
static void basic_guest_main(void)
{
report_pass("Basic VMX test");
@@ -1039,11 +1049,14 @@
printf("\tEPT is not supported\n");
return 1;
}
- if (!(ept_vpid.val & EPT_CAP_WB)) {
+ if (!is_ept_memtype_supported(EPT_MEM_TYPE_WB)) {
printf("\tWB memtype for EPT walks not supported\n");
return 1;
}
- if (!(ept_vpid.val & EPT_CAP_PWL4)) {
+
+ if (!is_4_level_ept_supported()) {
+ /* Support for 4-level EPT is mandatory. */
+ report(false, "4-level EPT support check");
printf("\tPWL4 is not supported\n");
return 1;
}
@@ -3443,7 +3456,7 @@
{
unsigned bit;
- printf("%s: %lx\n", basic.ctrl ? "MSR_IA32_VMX_TRUE_PIN" :
+ printf("%s: %lx\n", basic_msr.ctrl ? "MSR_IA32_VMX_TRUE_PIN" :
"MSR_IA32_VMX_PINBASED_CTLS", ctrl_pin_rev.val);
for (bit = 0; bit < 32; bit++)
test_rsvd_ctl_bit("pin-based controls",
@@ -3460,7 +3473,7 @@
{
unsigned bit;
- printf("\n%s: %lx\n", basic.ctrl ? "MSR_IA32_VMX_TRUE_PROC" :
+ printf("\n%s: %lx\n", basic_msr.ctrl ? "MSR_IA32_VMX_TRUE_PROC" :
"MSR_IA32_VMX_PROCBASED_CTLS", ctrl_cpu_rev[0].val);
for (bit = 0; bit < 32; bit++)
test_rsvd_ctl_bit("primary processor-based controls",
@@ -4189,7 +4202,10 @@
ent_intr_info);
vmcs_write(GUEST_CR0, guest_cr0_save & ~X86_CR0_PE & ~X86_CR0_PG);
vmcs_write(ENT_INTR_INFO, ent_intr_info);
- test_vmx_invalid_controls();
+ if (basic_msr.no_hw_errcode_cc)
+ test_vmx_valid_controls();
+ else
+ test_vmx_invalid_controls();
report_prefix_pop();
ent_intr_info = ent_intr_info_base | INTR_INFO_DELIVER_CODE_MASK |
@@ -4222,7 +4238,10 @@
ent_intr_info);
vmcs_write(GUEST_CR0, guest_cr0_save | X86_CR0_PE);
vmcs_write(ENT_INTR_INFO, ent_intr_info);
- test_vmx_invalid_controls();
+ if (basic_msr.no_hw_errcode_cc)
+ test_vmx_valid_controls();
+ else
+ test_vmx_invalid_controls();
report_prefix_pop();
vmcs_write(CPU_EXEC_CTRL1, secondary_save);
@@ -4244,7 +4263,11 @@
report_prefix_pushf("VM-entry intr info=0x%x [-]",
ent_intr_info);
vmcs_write(ENT_INTR_INFO, ent_intr_info);
- test_vmx_invalid_controls();
+ if (exception_type_mask == INTR_TYPE_HARD_EXCEPTION &&
+ basic_msr.no_hw_errcode_cc)
+ test_vmx_valid_controls();
+ else
+ test_vmx_invalid_controls();
report_prefix_pop();
}
report_prefix_pop();
@@ -4281,7 +4304,10 @@
report_prefix_pushf("VM-entry intr info=0x%x [-]",
ent_intr_info);
vmcs_write(ENT_INTR_INFO, ent_intr_info);
- test_vmx_invalid_controls();
+ if (basic_msr.no_hw_errcode_cc)
+ test_vmx_valid_controls();
+ else
+ test_vmx_invalid_controls();
report_prefix_pop();
/* Positive case */
@@ -4655,28 +4681,22 @@
u32 primary_saved = vmcs_read(CPU_EXEC_CTRL0);
u32 secondary_saved = vmcs_read(CPU_EXEC_CTRL1);
u64 eptp_saved = vmcs_read(EPTP);
- u32 primary = primary_saved;
- u32 secondary = secondary_saved;
- u64 eptp = eptp_saved;
+ u32 secondary;
+ u64 eptp;
u32 i, maxphysaddr;
u64 j, resv_bits_mask = 0;
- if (!((ctrl_cpu_rev[0].clr & CPU_SECONDARY) &&
- (ctrl_cpu_rev[1].clr & CPU_EPT))) {
- report_skip("%s : \"CPU secondary\" and/or \"enable EPT\" exec control not supported", __func__);
+ if (__setup_ept(0xfed40000, false)) {
+ report_skip("%s : EPT not supported", __func__);
return;
}
- /* Support for 4-level EPT is mandatory. */
- report(is_4_level_ept_supported(), "4-level EPT support check");
+ test_vmx_valid_controls();
- primary |= CPU_SECONDARY;
- vmcs_write(CPU_EXEC_CTRL0, primary);
- secondary |= CPU_EPT;
- vmcs_write(CPU_EXEC_CTRL1, secondary);
- eptp = (eptp & ~EPTP_PG_WALK_LEN_MASK) |
- (3ul << EPTP_PG_WALK_LEN_SHIFT);
- vmcs_write(EPTP, eptp);
+ setup_dummy_ept();
+
+ secondary = vmcs_read(CPU_EXEC_CTRL1);
+ eptp = vmcs_read(EPTP);
for (i = 0; i < 8; i++) {
eptp = (eptp & ~EPT_MEM_TYPE_MASK) | i;
@@ -5303,7 +5323,7 @@
report_prefix_pop();
}
- if (basic.val & (1ul << 48))
+ if (basic_msr.val & (1ul << 48))
addr_len = 32;
test_vmcs_addr_values("VM-entry-MSR-load address",
@@ -5431,7 +5451,7 @@
report_prefix_pop();
}
- if (basic.val & (1ul << 48))
+ if (basic_msr.val & (1ul << 48))
addr_len = 32;
test_vmcs_addr_values("VM-exit-MSR-store address",
@@ -7213,6 +7233,7 @@
static void test_pat(u32 field, const char * field_name, u32 ctrl_field,
u64 ctrl_bit)
{
+ u64 pat_msr_saved = rdmsr(MSR_IA32_CR_PAT);
u32 ctrl_saved = vmcs_read(ctrl_field);
u64 pat_saved = vmcs_read(field);
u64 i, val;
@@ -7232,7 +7253,7 @@
report_prefix_pop();
} else { // GUEST_PAT
- test_guest_state("ENT_LOAD_PAT enabled", false,
+ test_guest_state("ENT_LOAD_PAT disabled", false,
val, "GUEST_PAT");
}
}
@@ -7254,12 +7275,22 @@
error = 0;
test_vmx_vmlaunch(error);
+
+ if (!error)
+ report(rdmsr(MSR_IA32_CR_PAT) == val,
+ "Expected PAT = 0x%lx, got 0x%lx",
+ val, rdmsr(MSR_IA32_CR_PAT));
+ wrmsr(MSR_IA32_CR_PAT, pat_msr_saved);
+
report_prefix_pop();
} else { // GUEST_PAT
error = (i == 0x2 || i == 0x3 || i >= 0x8);
test_guest_state("ENT_LOAD_PAT enabled", !!error,
val, "GUEST_PAT");
+
+ if (!(ctrl_exit_rev.clr & EXI_LOAD_PAT))
+ wrmsr(MSR_IA32_CR_PAT, pat_msr_saved);
}
}
@@ -9305,6 +9336,7 @@
assert(cpu_has_apicv());
+ enable_x2apic();
disable_intercept_for_x2apic_msrs();
virtual_apic_page = alloc_page();
@@ -9321,6 +9353,18 @@
vmcs_set_bits(CPU_EXEC_CTRL1, CPU_VINTD | CPU_VIRT_X2APIC);
}
+#define PI_VECTOR 255
+
+static void enable_posted_interrupts(void)
+{
+ void *pi_desc = alloc_page();
+
+ vmcs_set_bits(PIN_CONTROLS, PIN_POST_INTR);
+ vmcs_set_bits(EXI_CONTROLS, EXI_INTA);
+ vmcs_write(PINV, PI_VECTOR);
+ vmcs_write(POSTED_INTR_DESC_ADDR, (u64)pi_desc);
+}
+
static void trigger_ioapic_scan_thread(void *data)
{
/* Wait until other CPU entered L2 */
@@ -10183,7 +10227,7 @@
vmcs_write(VMWRITE_BITMAP, virt_to_phys(bitmap[ACCESS_VMWRITE]));
shadow = alloc_page();
- shadow->hdr.revision_id = basic.revision;
+ shadow->hdr.revision_id = basic_msr.revision;
shadow->hdr.shadow_vmcs = 1;
TEST_ASSERT(!vmcs_clear(shadow));
@@ -10710,6 +10754,402 @@
test_set_guest_finished();
}
+enum Vid_op {
+ VID_OP_SET_ISR,
+ VID_OP_NOP,
+ VID_OP_SET_CR8,
+ VID_OP_SELF_IPI,
+ VID_OP_TERMINATE,
+ VID_OP_SPIN,
+ VID_OP_SPIN_IRR,
+ VID_OP_HLT,
+};
+
+struct vmx_basic_vid_test_guest_args {
+ enum Vid_op op;
+ u8 nr;
+ u32 isr_exec_cnt;
+ u32 *virtual_apic_page;
+ u64 *pi_desc;
+ u32 dest;
+ bool in_guest;
+} vmx_basic_vid_test_guest_args;
+
+/*
+ * From the SDM, Bit x of the VIRR is
+ * at bit position (x & 1FH)
+ * at offset (200H | ((x & E0H) >> 1)).
+ */
+static void set_virr_bit(volatile u32 *virtual_apic_page, u8 nr)
+{
+ u32 page_offset = (0x200 | ((nr & 0xE0) >> 1)) / sizeof(u32);
+ u32 mask = 1 << (nr & 0x1f);
+
+ virtual_apic_page[page_offset] |= mask;
+}
+
+static void clear_virr_bit(volatile u32 *virtual_apic_page, u8 nr)
+{
+ u32 page_offset = (0x200 | ((nr & 0xE0) >> 1)) / sizeof(u32);
+ u32 mask = 1 << (nr & 0x1f);
+
+ virtual_apic_page[page_offset] &= ~mask;
+}
+
+static bool get_virr_bit(volatile u32 *virtual_apic_page, u8 nr)
+{
+ u32 page_offset = (0x200 | ((nr & 0xE0) >> 1)) / sizeof(u32);
+ u32 mask = 1 << (nr & 0x1f);
+
+ return virtual_apic_page[page_offset] & mask;
+}
+
+static void vmx_vid_test_isr(isr_regs_t *regs)
+{
+ volatile struct vmx_basic_vid_test_guest_args *args =
+ &vmx_basic_vid_test_guest_args;
+
+ args->isr_exec_cnt++;
+ barrier();
+ eoi();
+}
+
+static void vmx_basic_vid_test_guest(void)
+{
+ volatile struct vmx_basic_vid_test_guest_args *args =
+ &vmx_basic_vid_test_guest_args;
+
+ sti_nop();
+ for (;;) {
+ enum Vid_op op = args->op;
+ u8 nr = args->nr;
+
+ switch (op) {
+ case VID_OP_TERMINATE:
+ return;
+ case VID_OP_SET_ISR:
+ handle_irq(nr, vmx_vid_test_isr);
+ break;
+ case VID_OP_SET_CR8:
+ write_cr8(nr);
+ break;
+ case VID_OP_SELF_IPI:
+ vmx_x2apic_write(APIC_SELF_IPI, nr);
+ break;
+ case VID_OP_HLT:
+ cli();
+ barrier();
+ args->in_guest = true;
+ barrier();
+ safe_halt();
+ break;
+ case VID_OP_SPIN:
+ args->in_guest = true;
+ while (!args->isr_exec_cnt)
+ pause();
+ break;
+ case VID_OP_SPIN_IRR: {
+ u32 *virtual_apic_page = args->virtual_apic_page;
+ u8 nr = args->nr;
+
+ args->in_guest = true;
+ while (!get_virr_bit(virtual_apic_page, nr))
+ pause();
+ clear_virr_bit(virtual_apic_page, nr);
+ break;
+ }
+ default:
+ break;
+ }
+
+ vmcall();
+ }
+}
+
+static void set_isrs_for_vmx_basic_vid_test(void)
+{
+ volatile struct vmx_basic_vid_test_guest_args *args =
+ &vmx_basic_vid_test_guest_args;
+ u16 nr;
+
+ /*
+ * kvm-unit-tests uses vector 32 for IPIs, so don't install a test ISR
+ * for that vector.
+ */
+ for (nr = 0x21; nr < 0x100; nr++) {
+ vmcs_write(GUEST_INT_STATUS, 0);
+ args->virtual_apic_page = get_vapic_page();
+ args->op = VID_OP_SET_ISR;
+ args->nr = nr;
+ args->isr_exec_cnt = 0;
+ enter_guest();
+ skip_exit_vmcall();
+ }
+ report(true, "Set ISR for vectors 33-255.");
+}
+
+static void vmx_posted_interrupts_test_worker(void *data)
+{
+ volatile struct vmx_basic_vid_test_guest_args *args =
+ &vmx_basic_vid_test_guest_args;
+
+ while (!args->in_guest)
+ pause();
+
+ test_and_set_bit(args->nr, args->pi_desc);
+ test_and_set_bit(256, args->pi_desc);
+ apic_icr_write(PI_VECTOR, args->dest);
+}
+
+/*
+ * Test virtual interrupt delivery (VID) at VM-entry or TPR virtualization
+ *
+ * Args:
+ * nr: vector under test
+ * tpr: task priority under test
+ * tpr_virt: If true, then test VID during TPR virtualization. Otherwise,
+ * test VID during VM-entry.
+ */
+static void test_basic_vid(u8 nr, u8 tpr, enum Vid_op op, u32 isr_exec_cnt_want,
+ bool eoi_exit_induced)
+{
+ volatile struct vmx_basic_vid_test_guest_args *args =
+ &vmx_basic_vid_test_guest_args;
+ u16 rvi_want = isr_exec_cnt_want ? 0 : nr;
+ u16 int_status;
+
+ /*
+ * From the SDM:
+ * IF "interrupt-window exiting" is 0 AND
+ * RVI[7:4] > VPPR[7:4] (see Section 29.1.1 for definition of VPPR)
+ * THEN recognize a pending virtual interrupt;
+ * ELSE
+ * do not recognize a pending virtual interrupt;
+ * FI;
+ *
+ * Thus, VPPR dictates whether a virtual interrupt is recognized.
+ * However, PPR virtualization, which occurs before virtual interrupt
+ * delivery, sets VPPR to VTPR, when SVI is 0.
+ */
+ args->isr_exec_cnt = 0;
+ args->virtual_apic_page = get_vapic_page();
+ args->op = op;
+ args->in_guest = false;
+ switch (op) {
+ case VID_OP_SELF_IPI:
+ vmcs_write(GUEST_INT_STATUS, 0);
+ args->nr = nr;
+ set_vtpr(0);
+ break;
+ case VID_OP_SET_CR8:
+ vmcs_write(GUEST_INT_STATUS, nr);
+ args->nr = task_priority_class(tpr);
+ set_vtpr(0xff);
+ break;
+ case VID_OP_SPIN:
+ case VID_OP_SPIN_IRR:
+ case VID_OP_HLT:
+ vmcs_write(GUEST_INT_STATUS, 0);
+ args->nr = nr;
+ set_vtpr(tpr);
+ barrier();
+ on_cpu_async(1, vmx_posted_interrupts_test_worker, NULL);
+ break;
+ default:
+ vmcs_write(GUEST_INT_STATUS, nr);
+ set_vtpr(tpr);
+ break;
+ }
+
+ enter_guest();
+ if (eoi_exit_induced) {
+ u32 exit_cnt;
+
+ assert_exit_reason(VMX_EOI_INDUCED);
+ for (exit_cnt = 1; exit_cnt < isr_exec_cnt_want; exit_cnt++) {
+ enter_guest();
+ assert_exit_reason(VMX_EOI_INDUCED);
+ }
+ enter_guest();
+ }
+ skip_exit_vmcall();
+ TEST_ASSERT_EQ(args->isr_exec_cnt, isr_exec_cnt_want);
+ int_status = vmcs_read(GUEST_INT_STATUS);
+ TEST_ASSERT_EQ(int_status, rvi_want);
+}
+
+/*
+ * Test recognizing and delivering virtual interrupts via "Virtual-interrupt
+ * delivery" for two scenarios:
+ * 1. When there is a pending interrupt at VM-entry.
+ * 2. When there is a pending interrupt during TPR virtualization.
+ */
+static void vmx_basic_vid_test(void)
+{
+ volatile struct vmx_basic_vid_test_guest_args *args =
+ &vmx_basic_vid_test_guest_args;
+ u8 nr_class;
+
+ if (!cpu_has_apicv()) {
+ report_skip("%s : Not all required APICv bits supported", __func__);
+ return;
+ }
+
+ enable_vid();
+ test_set_guest(vmx_basic_vid_test_guest);
+ set_isrs_for_vmx_basic_vid_test();
+
+ for (nr_class = 2; nr_class < 16; nr_class++) {
+ u16 nr;
+ u8 nr_sub_class;
+
+ for (nr_sub_class = 0; nr_sub_class < 16; nr_sub_class++) {
+ u16 tpr;
+
+ nr = (nr_class << 4) | nr_sub_class;
+
+ /*
+ * Don't test the reserved IPI vector, as the test ISR
+ * was not installed.
+ */
+ if (nr == 0x20)
+ continue;
+
+ test_basic_vid(nr, /*tpr=*/0, VID_OP_SELF_IPI,
+ /*isr_exec_cnt_want=*/1,
+ /*eoi_exit_induced=*/false);
+ for (tpr = 0; tpr < 256; tpr++) {
+ u32 isr_exec_cnt_want =
+ task_priority_class(nr) >
+ task_priority_class(tpr) ? 1 : 0;
+
+ test_basic_vid(nr, tpr, VID_OP_NOP,
+ isr_exec_cnt_want,
+ /*eoi_exit_induced=*/false);
+ test_basic_vid(nr, tpr, VID_OP_SET_CR8,
+ isr_exec_cnt_want,
+ /*eoi_exit_induced=*/false);
+ }
+ report(true, "TPR 0-255 for vector 0x%x.", nr);
+ }
+ }
+
+ /* Terminate the guest */
+ args->op = VID_OP_TERMINATE;
+ enter_guest();
+ assert_exit_reason(VMX_VMCALL);
+}
+
+static void test_eoi_virt(u8 nr, u8 lo_pri_nr, bool eoi_exit_induced)
+{
+ u32 *virtual_apic_page = get_vapic_page();
+
+ set_virr_bit(virtual_apic_page, lo_pri_nr);
+ test_basic_vid(nr, /*tpr=*/0, VID_OP_NOP, /*isr_exec_cnt_want=*/2,
+ eoi_exit_induced);
+ TEST_ASSERT(!get_virr_bit(virtual_apic_page, lo_pri_nr));
+ TEST_ASSERT(!get_virr_bit(virtual_apic_page, nr));
+}
+
+static void vmx_eoi_virt_test(void)
+{
+ volatile struct vmx_basic_vid_test_guest_args *args =
+ &vmx_basic_vid_test_guest_args;
+ u16 nr;
+ u16 lo_pri_nr;
+
+ if (!cpu_has_apicv()) {
+ report_skip("%s : Not all required APICv bits supported", __func__);
+ return;
+ }
+
+ enable_vid(); /* Note, enable_vid sets APIC_VIRT_ADDR field in VMCS. */
+ test_set_guest(vmx_basic_vid_test_guest);
+ set_isrs_for_vmx_basic_vid_test();
+
+ /* Now test EOI virtualization without induced EOI exits. */
+ for (nr = 0x22; nr < 0x100; nr++) {
+ for (lo_pri_nr = 0x21; lo_pri_nr < nr; lo_pri_nr++)
+ test_eoi_virt(nr, lo_pri_nr,
+ /*eoi_exit_induced=*/false);
+
+ report(true, "Low priority nrs 0x21-0x%x for nr 0x%x.",
+ nr - 1, nr);
+ }
+
+ /* Finally, test EOI virtualization with induced EOI exits. */
+ vmcs_write(EOI_EXIT_BITMAP0, GENMASK_ULL(63, 0));
+ vmcs_write(EOI_EXIT_BITMAP1, GENMASK_ULL(63, 0));
+ vmcs_write(EOI_EXIT_BITMAP2, GENMASK_ULL(63, 0));
+ vmcs_write(EOI_EXIT_BITMAP3, GENMASK_ULL(63, 0));
+ for (nr = 0x22; nr < 0x100; nr++) {
+ for (lo_pri_nr = 0x21; lo_pri_nr < nr; lo_pri_nr++)
+ test_eoi_virt(nr, lo_pri_nr,
+ /*eoi_exit_induced=*/true);
+
+ report(true,
+ "Low priority nrs 0x21-0x%x for nr 0x%x, with induced EOI exits.",
+ nr - 1, nr);
+ }
+
+ /* Terminate the guest */
+ args->op = VID_OP_TERMINATE;
+ enter_guest();
+ assert_exit_reason(VMX_VMCALL);
+}
+
+static void vmx_posted_interrupts_test(void)
+{
+ volatile struct vmx_basic_vid_test_guest_args *args =
+ &vmx_basic_vid_test_guest_args;
+ u16 vector;
+ u8 class;
+
+ if (!cpu_has_apicv()) {
+ report_skip("%s : Not all required APICv bits supported", __func__);
+ return;
+ }
+
+ if (cpu_count() < 2) {
+ report_skip("%s : CPU count < 2", __func__);
+ return;
+ }
+
+ enable_vid();
+ enable_posted_interrupts();
+ args->pi_desc = get_pi_desc();
+ args->dest = apic_id();
+
+ test_set_guest(vmx_basic_vid_test_guest);
+ set_isrs_for_vmx_basic_vid_test();
+
+ for (class = 0; class < 16; class++) {
+ for (vector = 33; vector < 256; vector++) {
+ /*
+ * If the vector isn't above TPR, then the vector should
+ * be moved from PIR to the IRR, but never serviced.
+ *
+ * Only test posted interrupts to a halted vCPU if the
+ * interrupt is expected to be serviced. Otherwise, the
+ * vCPU will HLT indefinitely.
+ */
+ if (task_priority_class(vector) <= class) {
+ test_basic_vid(vector, class << 4,
+ VID_OP_SPIN_IRR, 0, false);
+ continue;
+ }
+
+ test_basic_vid(vector, class << 4, VID_OP_SPIN, 1, false);
+ test_basic_vid(vector, class << 4, VID_OP_HLT, 1, false);
+ }
+ }
+ report(true, "Posted vectors 33-25 cross TPR classes 0-0xf, running and sometimes halted\n");
+
+ /* Terminate the guest */
+ args->op = VID_OP_TERMINATE;
+ enter_guest();
+}
+
#define TEST(name) { #name, .v2 = name }
/* name/init/guest_main/exit_handler/syscall_handler/guest_regs */
@@ -10764,6 +11204,9 @@
TEST(vmx_hlt_with_rvi_test),
TEST(apic_reg_virt_test),
TEST(virt_x2apic_mode_test),
+ TEST(vmx_basic_vid_test),
+ TEST(vmx_eoi_virt_test),
+ TEST(vmx_posted_interrupts_test),
/* APIC pass-through tests */
TEST(vmx_apic_passthrough_test),
TEST(vmx_apic_passthrough_thread_test),