Merge tag 'pull-request-2020-06-16' of https://gitlab.com/huth/kvm-unit-tests
* Lots of CI-related fixes and improvements
* Update the gitlab-CI to Fedora 32
* Test compilation with Clang
diff --git a/lib/alloc.c b/lib/alloc.c
index ed8f5f9..6c89f98 100644
--- a/lib/alloc.c
+++ b/lib/alloc.c
@@ -1,14 +1,49 @@
#include "alloc.h"
#include "asm/page.h"
+#include "bitops.h"
void *malloc(size_t size)
{
return memalign(sizeof(long), size);
}
+static bool mult_overflow(size_t a, size_t b)
+{
+#if BITS_PER_LONG == 32
+ /* 32 bit system, easy case: just use u64 */
+ return (u64)a * (u64)b >= (1ULL << 32);
+#else
+#ifdef __SIZEOF_INT128__
+ /* if __int128 is available use it (like the u64 case above) */
+ unsigned __int128 res = a;
+ res *= b;
+ res >>= 64;
+ return res != 0;
+#else
+ u64 tmp;
+
+ if ((a >> 32) && (b >> 32))
+ return true;
+ if (!(a >> 32) && !(b >> 32))
+ return false;
+ tmp = (u32)a;
+ tmp *= (u32)b;
+ tmp >>= 32;
+ if (a < b)
+ tmp += a * (b >> 32);
+ else
+ tmp += b * (a >> 32);
+ return tmp >> 32;
+#endif /* __SIZEOF_INT128__ */
+#endif /* BITS_PER_LONG == 32 */
+}
+
void *calloc(size_t nmemb, size_t size)
{
- void *ptr = malloc(nmemb * size);
+ void *ptr;
+
+ assert(!mult_overflow(nmemb, size));
+ ptr = malloc(nmemb * size);
if (ptr)
memset(ptr, 0, nmemb * size);
return ptr;
diff --git a/lib/alloc_page.c b/lib/alloc_page.c
index 7c8461a..8769c3f 100644
--- a/lib/alloc_page.c
+++ b/lib/alloc_page.c
@@ -176,7 +176,7 @@
alloc_ops = &page_alloc_ops;
}
-int get_order(size_t size)
+unsigned int get_order(size_t size)
{
return is_power_of_2(size) ? fls(size) : fls(size) + 1;
}
diff --git a/lib/alloc_page.h b/lib/alloc_page.h
index e6a51d2..6181299 100644
--- a/lib/alloc_page.h
+++ b/lib/alloc_page.h
@@ -15,6 +15,6 @@
void free_page(void *page);
void free_pages(void *mem, unsigned long size);
void free_pages_by_order(void *mem, unsigned long order);
-int get_order(size_t size);
+unsigned int get_order(size_t size);
#endif
diff --git a/lib/alloc_phys.h b/lib/alloc_phys.h
index ea38f91..611aa70 100644
--- a/lib/alloc_phys.h
+++ b/lib/alloc_phys.h
@@ -39,7 +39,7 @@
/*
* phys_alloc_get_unused allocates all remaining memory from the region
* passed to phys_alloc_init, returning the newly allocated memory's base
- * and top addresses. phys_allo_get_unused will still return base and top
+ * and top addresses. phys_alloc_get_unused will still return base and top
* when no free memory is remaining, but base will equal top.
*/
extern void phys_alloc_get_unused(phys_addr_t *p_base, phys_addr_t *p_top);
diff --git a/lib/vmalloc.c b/lib/vmalloc.c
index 5022a31..10f15af 100644
--- a/lib/vmalloc.c
+++ b/lib/vmalloc.c
@@ -20,10 +20,16 @@
void *alloc_vpages(ulong nr)
{
+ uintptr_t ptr;
+
spin_lock(&lock);
- vfree_top -= PAGE_SIZE * nr;
+ ptr = (uintptr_t)vfree_top;
+ ptr -= PAGE_SIZE * nr;
+ vfree_top = (void *)ptr;
spin_unlock(&lock);
- return vfree_top;
+
+ /* Cannot return vfree_top here, we are outside the lock! */
+ return (void *)ptr;
}
void *alloc_vpage(void)
@@ -31,17 +37,12 @@
return alloc_vpages(1);
}
-void init_alloc_vpage(void *top)
-{
- vfree_top = top;
-}
-
void *vmap(phys_addr_t phys, size_t size)
{
void *mem, *p;
unsigned pages;
- size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+ size = PAGE_ALIGN(size);
pages = size / PAGE_SIZE;
mem = p = alloc_vpages(pages);
@@ -60,7 +61,7 @@
unsigned pages;
assert(alignment <= PAGE_SIZE);
- size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+ size = PAGE_ALIGN(size);
pages = size / PAGE_SIZE;
mem = p = alloc_vpages(pages);
while (pages--) {
@@ -90,6 +91,14 @@
{
}
+void init_alloc_vpage(void *top)
+{
+ spin_lock(&lock);
+ assert(alloc_ops != &vmalloc_ops);
+ vfree_top = top;
+ spin_unlock(&lock);
+}
+
void setup_vm()
{
phys_addr_t base, top;
@@ -104,7 +113,7 @@
* so that it can be used to allocate page tables.
*/
if (!page_alloc_initialized()) {
- base = (base + PAGE_SIZE - 1) & -PAGE_SIZE;
+ base = PAGE_ALIGN(base);
top = top & -PAGE_SIZE;
free_pages(phys_to_virt(base), top - base);
}
@@ -113,10 +122,13 @@
phys_alloc_get_unused(&base, &top);
page_root = setup_mmu(top);
if (base != top) {
- base = (base + PAGE_SIZE - 1) & -PAGE_SIZE;
+ base = PAGE_ALIGN(base);
top = top & -PAGE_SIZE;
free_pages(phys_to_virt(base), top - base);
}
+ spin_lock(&lock);
+ assert(alloc_ops != &vmalloc_ops);
alloc_ops = &vmalloc_ops;
+ spin_unlock(&lock);
}
diff --git a/lib/vmalloc.h b/lib/vmalloc.h
index 3658b80..2b563f4 100644
--- a/lib/vmalloc.h
+++ b/lib/vmalloc.h
@@ -3,15 +3,23 @@
#include <asm/page.h>
+/* Allocate consecutive virtual pages (without backing) */
extern void *alloc_vpages(ulong nr);
+/* Allocate one virtual page (without backing) */
extern void *alloc_vpage(void);
+/* Set the top of the virtual address space */
extern void init_alloc_vpage(void *top);
+/* Set up the virtual allocator; also sets up the page allocator if needed */
extern void setup_vm(void);
+/* Set up paging */
extern void *setup_mmu(phys_addr_t top);
+/* Walk the page table and resolve the virtual address to a physical address */
extern phys_addr_t virt_to_pte_phys(pgd_t *pgtable, void *virt);
+/* Map the virtual address to the physical address for the given page tables */
extern pteval_t *install_page(pgd_t *pgtable, phys_addr_t phys, void *virt);
+/* Map consecutive physical pages */
void *vmap(phys_addr_t phys, size_t size);
#endif
diff --git a/lib/x86/asm/page.h b/lib/x86/asm/page.h
index 073580a..7e2a3dd 100644
--- a/lib/x86/asm/page.h
+++ b/lib/x86/asm/page.h
@@ -19,6 +19,8 @@
#ifndef __ASSEMBLY__
+#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
+
#ifdef __x86_64__
#define LARGE_PAGE_SIZE (512 * PAGE_SIZE)
#else
diff --git a/x86/cstart.S b/x86/cstart.S
index 38ac19b..fa62e09 100644
--- a/x86/cstart.S
+++ b/x86/cstart.S
@@ -96,13 +96,13 @@
.globl start
start:
+ mov $stacktop, %esp
push %ebx
call setup_multiboot
call setup_libcflat
mov mb_cmdline(%ebx), %eax
mov %eax, __args
call __setup_args
- mov $stacktop, %esp
setup_percpu_area
call prepare_32
jmpl $8, $start32
diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index a645d66..610685b 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c
@@ -1789,6 +1789,105 @@
return get_test_stage(test) == 5;
}
+/*
+ * Detect nested guest RIP corruption as explained in kernel commit
+ * b6162e82aef19fee9c32cb3fe9ac30d9116a8c73
+ *
+ * In the assembly loop below 'ins' is executed while IO instructions
+ * are not intercepted; the instruction is emulated by L0.
+ *
+ * At the same time we are getting interrupts from the local APIC timer,
+ * and we do intercept them in L1
+ *
+ * If the interrupt happens on the insb instruction, L0 will VMexit, emulate
+ * the insb instruction and then it will inject the interrupt to L1 through
+ * a nested VMexit. Due to a bug, it would leave pre-emulation values of RIP,
+ * RAX and RSP in the VMCB.
+ *
+ * In our intercept handler we detect the bug by checking that RIP is that of
+ * the insb instruction, but its memory operand has already been written.
+ * This means that insb was already executed.
+ */
+
+static volatile int isr_cnt = 0;
+static volatile uint8_t io_port_var = 0xAA;
+extern const char insb_instruction_label[];
+
+static void reg_corruption_isr(isr_regs_t *regs)
+{
+ isr_cnt++;
+ apic_write(APIC_EOI, 0);
+}
+
+static void reg_corruption_prepare(struct svm_test *test)
+{
+ default_prepare(test);
+ set_test_stage(test, 0);
+
+ vmcb->control.int_ctl = V_INTR_MASKING_MASK;
+ vmcb->control.intercept |= (1ULL << INTERCEPT_INTR);
+
+ handle_irq(TIMER_VECTOR, reg_corruption_isr);
+
+ /* set local APIC to inject external interrupts */
+ apic_write(APIC_TMICT, 0);
+ apic_write(APIC_TDCR, 0);
+ apic_write(APIC_LVTT, TIMER_VECTOR | APIC_LVT_TIMER_PERIODIC);
+ apic_write(APIC_TMICT, 1000);
+}
+
+static void reg_corruption_test(struct svm_test *test)
+{
+ /* this is endless loop, which is interrupted by the timer interrupt */
+ asm volatile (
+ "1:\n\t"
+ "movw $0x4d0, %%dx\n\t" // IO port
+ "lea %[io_port_var], %%rdi\n\t"
+ "movb $0xAA, %[io_port_var]\n\t"
+ "insb_instruction_label:\n\t"
+ "insb\n\t"
+ "jmp 1b\n\t"
+
+ : [io_port_var] "=m" (io_port_var)
+ : /* no inputs*/
+ : "rdx", "rdi"
+ );
+}
+
+static bool reg_corruption_finished(struct svm_test *test)
+{
+ if (isr_cnt == 10000) {
+ report(true,
+ "No RIP corruption detected after %d timer interrupts",
+ isr_cnt);
+ set_test_stage(test, 1);
+ return true;
+ }
+
+ if (vmcb->control.exit_code == SVM_EXIT_INTR) {
+
+ void* guest_rip = (void*)vmcb->save.rip;
+
+ irq_enable();
+ asm volatile ("nop");
+ irq_disable();
+
+ if (guest_rip == insb_instruction_label && io_port_var != 0xAA) {
+ report(false,
+ "RIP corruption detected after %d timer interrupts",
+ isr_cnt);
+ return true;
+ }
+
+ }
+ return false;
+}
+
+static bool reg_corruption_check(struct svm_test *test)
+{
+ return get_test_stage(test) == 1;
+}
+
#define TEST(name) { #name, .v2 = name }
/*
@@ -1950,6 +2049,9 @@
{ "virq_inject", default_supported, virq_inject_prepare,
default_prepare_gif_clear, virq_inject_test,
virq_inject_finished, virq_inject_check },
+ { "reg_corruption", default_supported, reg_corruption_prepare,
+ default_prepare_gif_clear, reg_corruption_test,
+ reg_corruption_finished, reg_corruption_check },
TEST(svm_guest_state_test),
{ NULL, NULL, NULL, NULL, NULL, NULL, NULL }
};
diff --git a/x86/vmx.c b/x86/vmx.c
index fe7d5f1..07415b4 100644
--- a/x86/vmx.c
+++ b/x86/vmx.c
@@ -1674,8 +1674,27 @@
(val & 0xfffffffffffffc01Ull) == 0,
"MSR_IA32_VMX_VMCS_ENUM");
+ fixed0 = -1ull;
+ fixed0 &= ~(EPT_CAP_WT |
+ EPT_CAP_PWL4 |
+ EPT_CAP_PWL5 |
+ EPT_CAP_UC |
+ EPT_CAP_WB |
+ EPT_CAP_2M_PAGE |
+ EPT_CAP_1G_PAGE |
+ EPT_CAP_INVEPT |
+ EPT_CAP_AD_FLAG |
+ EPT_CAP_ADV_EPT_INFO |
+ EPT_CAP_INVEPT_SINGLE |
+ EPT_CAP_INVEPT_ALL |
+ VPID_CAP_INVVPID |
+ VPID_CAP_INVVPID_ADDR |
+ VPID_CAP_INVVPID_CXTGLB |
+ VPID_CAP_INVVPID_ALL |
+ VPID_CAP_INVVPID_CXTLOC);
+
val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
- report((val & 0xfffff07ef98cbebeUll) == 0,
+ report((val & fixed0) == 0,
"MSR_IA32_VMX_EPT_VPID_CAP");
}
diff --git a/x86/vmx.h b/x86/vmx.h
index 71fdaa0..d1c2436 100644
--- a/x86/vmx.h
+++ b/x86/vmx.h
@@ -639,14 +639,16 @@
#define EPT_CAP_WT 1ull
#define EPT_CAP_PWL4 (1ull << 6)
+#define EPT_CAP_PWL5 (1ull << 7)
#define EPT_CAP_UC (1ull << 8)
#define EPT_CAP_WB (1ull << 14)
#define EPT_CAP_2M_PAGE (1ull << 16)
#define EPT_CAP_1G_PAGE (1ull << 17)
#define EPT_CAP_INVEPT (1ull << 20)
+#define EPT_CAP_AD_FLAG (1ull << 21)
+#define EPT_CAP_ADV_EPT_INFO (1ull << 22)
#define EPT_CAP_INVEPT_SINGLE (1ull << 25)
#define EPT_CAP_INVEPT_ALL (1ull << 26)
-#define EPT_CAP_AD_FLAG (1ull << 21)
#define VPID_CAP_INVVPID (1ull << 32)
#define VPID_CAP_INVVPID_ADDR (1ull << 40)
#define VPID_CAP_INVVPID_CXTGLB (1ull << 41)
diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
index 36e94fa..9fec0f7 100644
--- a/x86/vmx_tests.c
+++ b/x86/vmx_tests.c
@@ -4786,8 +4786,8 @@
*
* - The EPT memory type (bits 2:0) must be a value supported by the
* processor as indicated in the IA32_VMX_EPT_VPID_CAP MSR.
- * - Bits 5:3 (1 less than the EPT page-walk length) must be 3,
- * indicating an EPT page-walk length of 4.
+ * - Bits 5:3 (1 less than the EPT page-walk length) must indicate a
+ * supported EPT page-walk length.
* - Bit 6 (enable bit for accessed and dirty flags for EPT) must be
* 0 if bit 21 of the IA32_VMX_EPT_VPID_CAP MSR is read as 0,
* indicating that the processor does not support accessed and dirty
@@ -4827,6 +4827,9 @@
if (msr & EPT_CAP_WB)
wr_bk = true;
+ /* Support for 4-level EPT is mandatory. */
+ report(msr & EPT_CAP_PWL4, "4-level EPT support check");
+
primary |= CPU_SECONDARY;
vmcs_write(CPU_EXEC_CTRL0, primary);
secondary |= CPU_EPT;
@@ -4868,12 +4871,13 @@
eptp = (eptp & ~EPT_MEM_TYPE_MASK) | 6ul;
/*
- * Page walk length (bits 5:3)
+ * Page walk length (bits 5:3). Note, the value in VMCS.EPTP "is 1
+ * less than the EPT page-walk length".
*/
for (i = 0; i < 8; i++) {
eptp = (eptp & ~EPTP_PG_WALK_LEN_MASK) |
(i << EPTP_PG_WALK_LEN_SHIFT);
- if (i == 3)
+ if (i == 3 || (i == 4 && (msr & EPT_CAP_PWL5)))
ctrl = true;
else
ctrl = false;