Merge tag 'pull-request-2020-06-16' of https://gitlab.com/huth/kvm-unit-tests * Lots of CI-related fixes and improvements * Update the gitlab-CI to Fedora 32 * Test compilation with Clang

commit: ca5efa6a3ed3b94776383eccfca3255f9745a8d7 [log] [tgz]
author: Paolo Bonzini <pbonzini@redhat.com> Tue Jun 23 03:29:56 2020 -0400
committer: Paolo Bonzini <pbonzini@redhat.com> Tue Jun 23 03:29:56 2020 -0400
tree: ab7aae006386627d9f16bf6410e229cf855d5ea0
parent: c88b7beaafd711e0ca98529a56c9d35eedc60b5f [diff]
parent: 9d7f9a2f0b3427bbeda242c64f698d14e21d2a42 [diff]
diff --git a/lib/alloc.c b/lib/alloc.c
index ed8f5f9..6c89f98 100644
--- a/lib/alloc.c
+++ b/lib/alloc.c

@@ -1,14 +1,49 @@
 #include "alloc.h"
 #include "asm/page.h"
+#include "bitops.h"
 
 void *malloc(size_t size)
 {
 	return memalign(sizeof(long), size);
 }
 
+static bool mult_overflow(size_t a, size_t b)
+{
+#if BITS_PER_LONG == 32
+	/* 32 bit system, easy case: just use u64 */
+	return (u64)a * (u64)b >= (1ULL << 32);
+#else
+#ifdef __SIZEOF_INT128__
+	/* if __int128 is available use it (like the u64 case above) */
+	unsigned __int128 res = a;
+	res *= b;
+	res >>= 64;
+	return res != 0;
+#else
+	u64 tmp;
+
+	if ((a >> 32) && (b >> 32))
+		return true;
+	if (!(a >> 32) && !(b >> 32))
+		return false;
+	tmp = (u32)a;
+	tmp *= (u32)b;
+	tmp >>= 32;
+	if (a < b)
+		tmp += a * (b >> 32);
+	else
+		tmp += b * (a >> 32);
+	return tmp >> 32;
+#endif /* __SIZEOF_INT128__ */
+#endif /* BITS_PER_LONG == 32 */
+}
+
 void *calloc(size_t nmemb, size_t size)
 {
-	void *ptr = malloc(nmemb * size);
+	void *ptr;
+
+	assert(!mult_overflow(nmemb, size));
+	ptr = malloc(nmemb * size);
 	if (ptr)
 		memset(ptr, 0, nmemb * size);
 	return ptr;

diff --git a/lib/alloc_page.c b/lib/alloc_page.c
index 7c8461a..8769c3f 100644
--- a/lib/alloc_page.c
+++ b/lib/alloc_page.c

@@ -176,7 +176,7 @@
 	alloc_ops = &page_alloc_ops;
 }
 
-int get_order(size_t size)
+unsigned int get_order(size_t size)
 {
 	return is_power_of_2(size) ? fls(size) : fls(size) + 1;
 }

diff --git a/lib/alloc_page.h b/lib/alloc_page.h
index e6a51d2..6181299 100644
--- a/lib/alloc_page.h
+++ b/lib/alloc_page.h

@@ -15,6 +15,6 @@
 void free_page(void *page);
 void free_pages(void *mem, unsigned long size);
 void free_pages_by_order(void *mem, unsigned long order);
-int get_order(size_t size);
+unsigned int get_order(size_t size);
 
 #endif

diff --git a/lib/alloc_phys.h b/lib/alloc_phys.h
index ea38f91..611aa70 100644
--- a/lib/alloc_phys.h
+++ b/lib/alloc_phys.h

@@ -39,7 +39,7 @@
 /*
  * phys_alloc_get_unused allocates all remaining memory from the region
  * passed to phys_alloc_init, returning the newly allocated memory's base
- * and top addresses. phys_allo_get_unused will still return base and top
+ * and top addresses. phys_alloc_get_unused will still return base and top
  * when no free memory is remaining, but base will equal top.
  */
 extern void phys_alloc_get_unused(phys_addr_t *p_base, phys_addr_t *p_top);

diff --git a/lib/vmalloc.c b/lib/vmalloc.c
index 5022a31..10f15af 100644
--- a/lib/vmalloc.c
+++ b/lib/vmalloc.c

@@ -20,10 +20,16 @@
 
 void *alloc_vpages(ulong nr)
 {
+	uintptr_t ptr;
+
 	spin_lock(&lock);
-	vfree_top -= PAGE_SIZE * nr;
+	ptr = (uintptr_t)vfree_top;
+	ptr -= PAGE_SIZE * nr;
+	vfree_top = (void *)ptr;
 	spin_unlock(&lock);
-	return vfree_top;
+
+	/* Cannot return vfree_top here, we are outside the lock! */
+	return (void *)ptr;
 }
 
 void *alloc_vpage(void)
@@ -31,17 +37,12 @@
 	return alloc_vpages(1);
 }
 
-void init_alloc_vpage(void *top)
-{
-	vfree_top = top;
-}
-
 void *vmap(phys_addr_t phys, size_t size)
 {
 	void *mem, *p;
 	unsigned pages;
 
-	size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+	size = PAGE_ALIGN(size);
 	pages = size / PAGE_SIZE;
 	mem = p = alloc_vpages(pages);
 
@@ -60,7 +61,7 @@
 	unsigned pages;
 
 	assert(alignment <= PAGE_SIZE);
-	size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+	size = PAGE_ALIGN(size);
 	pages = size / PAGE_SIZE;
 	mem = p = alloc_vpages(pages);
 	while (pages--) {
@@ -90,6 +91,14 @@
 {
 }
 
+void init_alloc_vpage(void *top)
+{
+	spin_lock(&lock);
+	assert(alloc_ops != &vmalloc_ops);
+	vfree_top = top;
+	spin_unlock(&lock);
+}
+
 void setup_vm()
 {
 	phys_addr_t base, top;
@@ -104,7 +113,7 @@
 	 * so that it can be used to allocate page tables.
 	 */
 	if (!page_alloc_initialized()) {
-		base = (base + PAGE_SIZE - 1) & -PAGE_SIZE;
+		base = PAGE_ALIGN(base);
 		top = top & -PAGE_SIZE;
 		free_pages(phys_to_virt(base), top - base);
 	}
@@ -113,10 +122,13 @@
 	phys_alloc_get_unused(&base, &top);
 	page_root = setup_mmu(top);
 	if (base != top) {
-		base = (base + PAGE_SIZE - 1) & -PAGE_SIZE;
+		base = PAGE_ALIGN(base);
 		top = top & -PAGE_SIZE;
 		free_pages(phys_to_virt(base), top - base);
 	}
 
+	spin_lock(&lock);
+	assert(alloc_ops != &vmalloc_ops);
 	alloc_ops = &vmalloc_ops;
+	spin_unlock(&lock);
 }

diff --git a/lib/vmalloc.h b/lib/vmalloc.h
index 3658b80..2b563f4 100644
--- a/lib/vmalloc.h
+++ b/lib/vmalloc.h

@@ -3,15 +3,23 @@
 
 #include <asm/page.h>
 
+/* Allocate consecutive virtual pages (without backing) */
 extern void *alloc_vpages(ulong nr);
+/* Allocate one virtual page (without backing) */
 extern void *alloc_vpage(void);
+/* Set the top of the virtual address space */
 extern void init_alloc_vpage(void *top);
+/* Set up the virtual allocator; also sets up the page allocator if needed */
 extern void setup_vm(void);
 
+/* Set up paging */
 extern void *setup_mmu(phys_addr_t top);
+/* Walk the page table and resolve the virtual address to a physical address */
 extern phys_addr_t virt_to_pte_phys(pgd_t *pgtable, void *virt);
+/* Map the virtual address to the physical address for the given page tables */
 extern pteval_t *install_page(pgd_t *pgtable, phys_addr_t phys, void *virt);
 
+/* Map consecutive physical pages */
 void *vmap(phys_addr_t phys, size_t size);
 
 #endif

diff --git a/lib/x86/asm/page.h b/lib/x86/asm/page.h
index 073580a..7e2a3dd 100644
--- a/lib/x86/asm/page.h
+++ b/lib/x86/asm/page.h

@@ -19,6 +19,8 @@
 
 #ifndef __ASSEMBLY__
 
+#define PAGE_ALIGN(addr)        ALIGN(addr, PAGE_SIZE)
+
 #ifdef __x86_64__
 #define LARGE_PAGE_SIZE	(512 * PAGE_SIZE)
 #else

diff --git a/x86/cstart.S b/x86/cstart.S
index 38ac19b..fa62e09 100644
--- a/x86/cstart.S
+++ b/x86/cstart.S

@@ -96,13 +96,13 @@
 
 .globl start
 start:
+        mov $stacktop, %esp
         push %ebx
         call setup_multiboot
         call setup_libcflat
         mov mb_cmdline(%ebx), %eax
         mov %eax, __args
         call __setup_args
-        mov $stacktop, %esp
         setup_percpu_area
         call prepare_32
         jmpl $8, $start32

diff --git a/x86/svm_tests.c b/x86/svm_tests.c
index a645d66..610685b 100644
--- a/x86/svm_tests.c
+++ b/x86/svm_tests.c

@@ -1789,6 +1789,105 @@
     return get_test_stage(test) == 5;
 }
 
+/*
+ * Detect nested guest RIP corruption as explained in kernel commit
+ * b6162e82aef19fee9c32cb3fe9ac30d9116a8c73
+ *
+ * In the assembly loop below 'ins' is executed while IO instructions
+ * are not intercepted; the instruction is emulated by L0.
+ *
+ * At the same time we are getting interrupts from the local APIC timer,
+ * and we do intercept them in L1
+ *
+ * If the interrupt happens on the insb instruction, L0 will VMexit, emulate
+ * the insb instruction and then it will inject the interrupt to L1 through
+ * a nested VMexit.  Due to a bug, it would leave pre-emulation values of RIP,
+ * RAX and RSP in the VMCB.
+ *
+ * In our intercept handler we detect the bug by checking that RIP is that of
+ * the insb instruction, but its memory operand has already been written.
+ * This means that insb was already executed.
+ */
+
+static volatile int isr_cnt = 0;
+static volatile uint8_t io_port_var = 0xAA;
+extern const char insb_instruction_label[];
+
+static void reg_corruption_isr(isr_regs_t *regs)
+{
+    isr_cnt++;
+    apic_write(APIC_EOI, 0);
+}
+
+static void reg_corruption_prepare(struct svm_test *test)
+{
+    default_prepare(test);
+    set_test_stage(test, 0);
+
+    vmcb->control.int_ctl = V_INTR_MASKING_MASK;
+    vmcb->control.intercept |= (1ULL << INTERCEPT_INTR);
+
+    handle_irq(TIMER_VECTOR, reg_corruption_isr);
+
+    /* set local APIC to inject external interrupts */
+    apic_write(APIC_TMICT, 0);
+    apic_write(APIC_TDCR, 0);
+    apic_write(APIC_LVTT, TIMER_VECTOR | APIC_LVT_TIMER_PERIODIC);
+    apic_write(APIC_TMICT, 1000);
+}
+
+static void reg_corruption_test(struct svm_test *test)
+{
+    /* this is endless loop, which is interrupted by the timer interrupt */
+    asm volatile (
+            "1:\n\t"
+            "movw $0x4d0, %%dx\n\t" // IO port
+            "lea %[io_port_var], %%rdi\n\t"
+            "movb $0xAA, %[io_port_var]\n\t"
+            "insb_instruction_label:\n\t"
+            "insb\n\t"
+            "jmp 1b\n\t"
+
+            : [io_port_var] "=m" (io_port_var)
+            : /* no inputs*/
+            : "rdx", "rdi"
+    );
+}
+
+static bool reg_corruption_finished(struct svm_test *test)
+{
+    if (isr_cnt == 10000) {
+        report(true,
+               "No RIP corruption detected after %d timer interrupts",
+               isr_cnt);
+        set_test_stage(test, 1);
+        return true;
+    }
+
+    if (vmcb->control.exit_code == SVM_EXIT_INTR) {
+
+        void* guest_rip = (void*)vmcb->save.rip;
+
+        irq_enable();
+        asm volatile ("nop");
+        irq_disable();
+
+        if (guest_rip == insb_instruction_label && io_port_var != 0xAA) {
+            report(false,
+                   "RIP corruption detected after %d timer interrupts",
+                   isr_cnt);
+            return true;
+        }
+
+    }
+    return false;
+}
+
+static bool reg_corruption_check(struct svm_test *test)
+{
+    return get_test_stage(test) == 1;
+}
+
 #define TEST(name) { #name, .v2 = name }
 
 /*
@@ -1950,6 +2049,9 @@
     { "virq_inject", default_supported, virq_inject_prepare,
       default_prepare_gif_clear, virq_inject_test,
       virq_inject_finished, virq_inject_check },
+    { "reg_corruption", default_supported, reg_corruption_prepare,
+      default_prepare_gif_clear, reg_corruption_test,
+      reg_corruption_finished, reg_corruption_check },
     TEST(svm_guest_state_test),
     { NULL, NULL, NULL, NULL, NULL, NULL, NULL }
 };

diff --git a/x86/vmx.c b/x86/vmx.c
index fe7d5f1..07415b4 100644
--- a/x86/vmx.c
+++ b/x86/vmx.c

@@ -1674,8 +1674,27 @@
 	       (val & 0xfffffffffffffc01Ull) == 0,
 	       "MSR_IA32_VMX_VMCS_ENUM");
 
+	fixed0 = -1ull;
+	fixed0 &= ~(EPT_CAP_WT |
+		    EPT_CAP_PWL4 |
+		    EPT_CAP_PWL5 |
+		    EPT_CAP_UC |
+		    EPT_CAP_WB |
+		    EPT_CAP_2M_PAGE |
+		    EPT_CAP_1G_PAGE |
+		    EPT_CAP_INVEPT |
+		    EPT_CAP_AD_FLAG |
+		    EPT_CAP_ADV_EPT_INFO |
+		    EPT_CAP_INVEPT_SINGLE |
+		    EPT_CAP_INVEPT_ALL |
+		    VPID_CAP_INVVPID |
+		    VPID_CAP_INVVPID_ADDR |
+		    VPID_CAP_INVVPID_CXTGLB |
+		    VPID_CAP_INVVPID_ALL |
+		    VPID_CAP_INVVPID_CXTLOC);
+
 	val = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
-	report((val & 0xfffff07ef98cbebeUll) == 0,
+	report((val & fixed0) == 0,
 	       "MSR_IA32_VMX_EPT_VPID_CAP");
 }
 

diff --git a/x86/vmx.h b/x86/vmx.h
index 71fdaa0..d1c2436 100644
--- a/x86/vmx.h
+++ b/x86/vmx.h

@@ -639,14 +639,16 @@
 
 #define EPT_CAP_WT		1ull
 #define EPT_CAP_PWL4		(1ull << 6)
+#define EPT_CAP_PWL5		(1ull << 7)
 #define EPT_CAP_UC		(1ull << 8)
 #define EPT_CAP_WB		(1ull << 14)
 #define EPT_CAP_2M_PAGE		(1ull << 16)
 #define EPT_CAP_1G_PAGE		(1ull << 17)
 #define EPT_CAP_INVEPT		(1ull << 20)
+#define EPT_CAP_AD_FLAG		(1ull << 21)
+#define EPT_CAP_ADV_EPT_INFO	(1ull << 22)
 #define EPT_CAP_INVEPT_SINGLE	(1ull << 25)
 #define EPT_CAP_INVEPT_ALL	(1ull << 26)
-#define EPT_CAP_AD_FLAG		(1ull << 21)
 #define VPID_CAP_INVVPID	(1ull << 32)
 #define VPID_CAP_INVVPID_ADDR   (1ull << 40)
 #define VPID_CAP_INVVPID_CXTGLB (1ull << 41)

diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
index 36e94fa..9fec0f7 100644
--- a/x86/vmx_tests.c
+++ b/x86/vmx_tests.c

@@ -4786,8 +4786,8 @@
  *
  *     - The EPT memory type (bits 2:0) must be a value supported by the
  *	 processor as indicated in the IA32_VMX_EPT_VPID_CAP MSR.
- *     - Bits 5:3 (1 less than the EPT page-walk length) must be 3,
- *	 indicating an EPT page-walk length of 4.
+ *     - Bits 5:3 (1 less than the EPT page-walk length) must indicate a
+ *	 supported EPT page-walk length.
  *     - Bit 6 (enable bit for accessed and dirty flags for EPT) must be
  *	 0 if bit 21 of the IA32_VMX_EPT_VPID_CAP MSR is read as 0,
  *	 indicating that the processor does not support accessed and dirty
@@ -4827,6 +4827,9 @@
 	if (msr & EPT_CAP_WB)
 		wr_bk = true;
 
+	/* Support for 4-level EPT is mandatory. */
+	report(msr & EPT_CAP_PWL4, "4-level EPT support check");
+
 	primary |= CPU_SECONDARY;
 	vmcs_write(CPU_EXEC_CTRL0, primary);
 	secondary |= CPU_EPT;
@@ -4868,12 +4871,13 @@
 	eptp = (eptp & ~EPT_MEM_TYPE_MASK) | 6ul;
 
 	/*
-	 * Page walk length (bits 5:3)
+	 * Page walk length (bits 5:3).  Note, the value in VMCS.EPTP "is 1
+	 * less than the EPT page-walk length".
 	 */
 	for (i = 0; i < 8; i++) {
 		eptp = (eptp & ~EPTP_PG_WALK_LEN_MASK) |
 		    (i << EPTP_PG_WALK_LEN_SHIFT);
-		if (i == 3)
+		if (i == 3 || (i == 4 && (msr & EPT_CAP_PWL5)))
 			ctrl = true;
 		else
 			ctrl = false;
commit	ca5efa6a3ed3b94776383eccfca3255f9745a8d7	[log] [tgz]
author	Paolo Bonzini <pbonzini@redhat.com>	Tue Jun 23 03:29:56 2020 -0400
committer	Paolo Bonzini <pbonzini@redhat.com>	Tue Jun 23 03:29:56 2020 -0400
tree	ab7aae006386627d9f16bf6410e229cf855d5ea0
parent	c88b7beaafd711e0ca98529a56c9d35eedc60b5f [diff]
parent	9d7f9a2f0b3427bbeda242c64f698d14e21d2a42 [diff]