Merge branch 's390x-next-20220217' into 'master'
s390x: smp lib improvements and more
See merge request kvm-unit-tests/kvm-unit-tests!25
diff --git a/lib/alloc.c b/lib/alloc.c
index f4266f5..51d774d 100644
--- a/lib/alloc.c
+++ b/lib/alloc.c
@@ -1,48 +1,19 @@
-#include "alloc.h"
-#include "asm/page.h"
-#include "bitops.h"
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#include <alloc.h>
+#include <bitops.h>
+#include <asm/page.h>
+#include <linux/compiler.h>
void *malloc(size_t size)
{
return memalign(sizeof(long), size);
}
-static bool mult_overflow(size_t a, size_t b)
-{
-#if BITS_PER_LONG == 32
- /* 32 bit system, easy case: just use u64 */
- return (u64)a * (u64)b >= (1ULL << 32);
-#else
-#ifdef __SIZEOF_INT128__
- /* if __int128 is available use it (like the u64 case above) */
- unsigned __int128 res = a;
- res *= b;
- res >>= 64;
- return res != 0;
-#else
- u64 tmp;
-
- if ((a >> 32) && (b >> 32))
- return true;
- if (!(a >> 32) && !(b >> 32))
- return false;
- tmp = (u32)a;
- tmp *= (u32)b;
- tmp >>= 32;
- if (a < b)
- tmp += a * (b >> 32);
- else
- tmp += b * (a >> 32);
- return tmp >> 32;
-#endif /* __SIZEOF_INT128__ */
-#endif /* BITS_PER_LONG == 32 */
-}
-
void *calloc(size_t nmemb, size_t size)
{
void *ptr;
- assert(!mult_overflow(nmemb, size));
+ assert(!check_mul_overflow(nmemb, size));
ptr = malloc(nmemb * size);
if (ptr)
memset(ptr, 0, nmemb * size);
diff --git a/lib/bitops.h b/lib/bitops.h
index 308aa86..81a06a4 100644
--- a/lib/bitops.h
+++ b/lib/bitops.h
@@ -1,6 +1,9 @@
#ifndef _BITOPS_H_
#define _BITOPS_H_
+#include <stdbool.h>
+#include <stddef.h>
+
/*
* Adapted from
* include/linux/bitops.h
diff --git a/lib/devicetree.c b/lib/devicetree.c
index 409d18b..fa8399a 100644
--- a/lib/devicetree.c
+++ b/lib/devicetree.c
@@ -288,7 +288,7 @@
int dt_get_initrd(const char **initrd, u32 *size)
{
const struct fdt_property *prop;
- const char *start, *end;
+ u64 start, end;
int node, len;
u32 *data;
@@ -303,7 +303,12 @@
if (!prop)
return len;
data = (u32 *)prop->data;
- start = (const char *)(unsigned long)fdt32_to_cpu(*data);
+ start = fdt32_to_cpu(*data);
+ if (len == 8) {
+ assert(sizeof(long) == 8);
+ data++;
+ start = (start << 32) | fdt32_to_cpu(*data);
+ }
prop = fdt_get_property(fdt, node, "linux,initrd-end", &len);
if (!prop) {
@@ -311,10 +316,14 @@
return len;
}
data = (u32 *)prop->data;
- end = (const char *)(unsigned long)fdt32_to_cpu(*data);
+ end = fdt32_to_cpu(*data);
+ if (len == 8) {
+ data++;
+ end = (end << 32) | fdt32_to_cpu(*data);
+ }
- *initrd = start;
- *size = (unsigned long)end - (unsigned long)start;
+ *initrd = (char *)(unsigned long)start;
+ *size = end - start;
return 0;
}
diff --git a/lib/x86/asm/debugreg.h b/lib/x86/asm/debugreg.h
index e86f5a6..a30f949 100644
--- a/lib/x86/asm/debugreg.h
+++ b/lib/x86/asm/debugreg.h
@@ -2,80 +2,63 @@
#ifndef _ASMX86_DEBUGREG_H_
#define _ASMX86_DEBUGREG_H_
-
-/* Indicate the register numbers for a number of the specific
- debug registers. Registers 0-3 contain the addresses we wish to trap on */
-#define DR_FIRSTADDR 0 /* u_debugreg[DR_FIRSTADDR] */
-#define DR_LASTADDR 3 /* u_debugreg[DR_LASTADDR] */
-
-#define DR_STATUS 6 /* u_debugreg[DR_STATUS] */
-#define DR_CONTROL 7 /* u_debugreg[DR_CONTROL] */
-
-/* Define a few things for the status register. We can use this to determine
- which debugging register was responsible for the trap. The other bits
- are either reserved or not of interest to us. */
-
-/* Define reserved bits in DR6 which are always set to 1 */
-#define DR6_RESERVED (0xFFFF0FF0)
-
-#define DR_TRAP0 (0x1) /* db0 */
-#define DR_TRAP1 (0x2) /* db1 */
-#define DR_TRAP2 (0x4) /* db2 */
-#define DR_TRAP3 (0x8) /* db3 */
-#define DR_TRAP_BITS (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
-
-#define DR_STEP (0x4000) /* single-step */
-#define DR_SWITCH (0x8000) /* task switch */
-
-/* Now define a bunch of things for manipulating the control register.
- The top two bytes of the control register consist of 4 fields of 4
- bits - each field corresponds to one of the four debug registers,
- and indicates what types of access we trap on, and how large the data
- field is that we are looking at */
-
-#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
-#define DR_CONTROL_SIZE 4 /* 4 control bits per register */
-
-#define DR_RW_EXECUTE (0x0) /* Settings for the access types to trap on */
-#define DR_RW_WRITE (0x1)
-#define DR_RW_READ (0x3)
-
-#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
-#define DR_LEN_2 (0x4)
-#define DR_LEN_4 (0xC)
-#define DR_LEN_8 (0x8)
-
-/* The low byte to the control register determine which registers are
- enabled. There are 4 fields of two bits. One bit is "local", meaning
- that the processor will reset the bit after a task switch and the other
- is global meaning that we have to explicitly reset the bit. With linux,
- you can use either one, since we explicitly zero the register when we enter
- kernel mode. */
-
-#define DR_LOCAL_ENABLE_SHIFT 0 /* Extra shift to the local enable bit */
-#define DR_GLOBAL_ENABLE_SHIFT 1 /* Extra shift to the global enable bit */
-#define DR_LOCAL_ENABLE (0x1) /* Local enable for reg 0 */
-#define DR_GLOBAL_ENABLE (0x2) /* Global enable for reg 0 */
-#define DR_ENABLE_SIZE 2 /* 2 enable bits per register */
-
-#define DR_LOCAL_ENABLE_MASK (0x55) /* Set local bits for all 4 regs */
-#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
-
-/* The second byte to the control register has a few special things.
- We can slow the instruction pipeline for instructions coming via the
- gdt or the ldt if we want to. I am not sure why this is an advantage */
-
-#ifdef __i386__
-#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
-#else
-#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */
-#endif
-
-#define DR_LOCAL_SLOWDOWN (0x100) /* Local slow the pipeline */
-#define DR_GLOBAL_SLOWDOWN (0x200) /* Global slow the pipeline */
+#include <bitops.h>
/*
- * HW breakpoint additions
+ * DR6_ACTIVE_LOW combines fixed-1 and active-low bits (e.g. RTM), and is also
+ * the init/reset value for DR6.
*/
+#define DR6_ACTIVE_LOW 0xffff0ff0
+#define DR6_VOLATILE 0x0001e80f
+#define DR6_FIXED_1 (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
+
+#define DR6_TRAP0 BIT(0) /* DR0 matched */
+#define DR6_TRAP1 BIT(1) /* DR1 matched */
+#define DR6_TRAP2 BIT(2) /* DR2 matched */
+#define DR6_TRAP3 BIT(3) /* DR3 matched */
+#define DR6_TRAP_BITS (DR6_TRAP0|DR6_TRAP1|DR6_TRAP2|DR6_TRAP3)
+
+#define DR6_BUS_LOCK BIT(11) /* Bus lock 0x800 */
+#define DR6_BD BIT(13) /* General Detect 0x2000 */
+#define DR6_BS BIT(14) /* Single-Step 0x4000 */
+#define DR6_BT BIT(15) /* Task Switch 0x8000 */
+#define DR6_RTM BIT(16) /* RTM / TSX 0x10000 */
+
+#define DR7_FIXED_1 0x00000400 /* init/reset value, too */
+#define DR7_VOLATILE 0xffff2bff
+#define DR7_BP_EN_MASK 0x000000ff
+#define DR7_LE BIT(8) /* Local Exact 0x100 */
+#define DR7_GE BIT(9) /* Global Exact 0x200 */
+#define DR7_RTM BIT(11) /* RTM / TSX 0x800 */
+#define DR7_GD BIT(13) /* General Detect 0x2000 */
+
+/*
+ * Enable bits for DR0-D3. Bits 0, 2, 4, and 6 are local enable bits (cleared
+ * by the CPU on task switch), bits 1, 3, 5, and 7 are global enable bits
+ * (never cleared by the CPU).
+ */
+#define DR7_LOCAL_ENABLE_DRx(x) (BIT(0) << (x))
+#define DR7_GLOBAL_ENABLE_DRx(x) (BIT(1) << (x))
+#define DR7_ENABLE_DRx(x) \
+ (DR7_LOCAL_ENABLE_DRx(x) | DR7_GLOBAL_ENABLE_DRx(x))
+
+#define DR7_GLOBAL_ENABLE_DR0 DR7_GLOBAL_ENABLE_DRx(0)
+#define DR7_GLOBAL_ENABLE_DR1 DR7_GLOBAL_ENABLE_DRx(1)
+#define DR7_GLOBAL_ENABLE_DR2 DR7_GLOBAL_ENABLE_DRx(2)
+#define DR7_GLOBAL_ENABLE_DR3 DR7_GLOBAL_ENABLE_DRx(3)
+
+/* Condition/type of the breakpoint for DR0-3. */
+#define DR7_RW_TYPE_DRx(x, rw) ((rw) << (((x) * 4) + 16))
+#define DR7_EXECUTE_DRx(x) DR7_RW_TYPE_DRx(x, 0)
+#define DR7_WRITE_DRx(x) DR7_RW_TYPE_DRx(x, 1)
+#define DR7_PORT_IO_DRx(x) DR7_RW_TYPE_DRx(x, 2)
+#define DR7_DATA_IO_DRx(x) DR7_RW_TYPE_DRx(x, 3) /* Read or Write */
+
+/* Length of the breakpoint for DR0-3. */
+#define DR7_LEN_DRx(x, enc) ((enc) << (((x) * 4) + 18))
+#define DR7_LEN_1_DRx(x) DR7_LEN_DRx(x, 0)
+#define DR7_LEN_2_DRx(x) DR7_LEN_DRx(x, 1)
+#define DR7_LEN_4_DRx(x) DR7_LEN_DRx(x, 3)
+#define DR7_LEN_8_DRx(x) DR7_LEN_DRx(x, 2) /* Out of sequence, undefined for 32-bit CPUs. */
#endif /* _ASMX86_DEBUGREG_H_ */
diff --git a/lib/x86/desc.c b/lib/x86/desc.c
index 16b7256..c2eb16e 100644
--- a/lib/x86/desc.c
+++ b/lib/x86/desc.c
@@ -91,7 +91,7 @@
extern struct ex_record exception_table_start, exception_table_end;
-static const char* exception_mnemonic(int vector)
+const char* exception_mnemonic(int vector)
{
switch(vector) {
case 0: return "#DE";
diff --git a/lib/x86/desc.h b/lib/x86/desc.h
index 9b81da0..ad6277b 100644
--- a/lib/x86/desc.h
+++ b/lib/x86/desc.h
@@ -224,6 +224,7 @@
void print_current_tss_info(void);
handler handle_exception(u8 v, handler fn);
void unhandled_exception(struct ex_regs *regs, bool cpu);
+const char* exception_mnemonic(int vector);
bool test_for_exception(unsigned int ex, void (*trigger_func)(void *data),
void *data);
diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index fe5add5..117032a 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h
@@ -592,9 +592,7 @@
static inline void wrtsc(u64 tsc)
{
- unsigned a = tsc, d = tsc >> 32;
-
- asm volatile("wrmsr" : : "a"(a), "d"(d), "c"(0x10));
+ wrmsr(MSR_IA32_TSC, tsc);
}
static inline void irq_disable(void)
diff --git a/x86/cstart.S b/x86/cstart.S
index 2c0eec7..6db6a38 100644
--- a/x86/cstart.S
+++ b/x86/cstart.S
@@ -143,6 +143,7 @@
online_cpus:
.fill (max_cpus + 7) / 8, 1, 0
+.align 2
cpu_online_count: .word 1
.code16
diff --git a/x86/cstart64.S b/x86/cstart64.S
index ff79ae7..7272452 100644
--- a/x86/cstart64.S
+++ b/x86/cstart64.S
@@ -256,4 +256,5 @@
jne 1b
ret
+.align 2
cpu_online_count: .word 1
diff --git a/x86/debug.c b/x86/debug.c
index 0019ebd..b66bf04 100644
--- a/x86/debug.c
+++ b/x86/debug.c
@@ -8,10 +8,12 @@
*
* This work is licensed under the terms of the GNU GPL, version 2.
*/
+#include <asm/debugreg.h>
#include "libcflat.h"
#include "processor.h"
#include "desc.h"
+#include "usermode.h"
static volatile unsigned long bp_addr;
static volatile unsigned long db_addr[10], dr6[10];
@@ -36,14 +38,29 @@
dr6[n] = read_dr6();
if (dr6[n] & 0x1)
- regs->rflags |= (1 << 16);
+ regs->rflags |= X86_EFLAGS_RF;
if (++n >= 10) {
- regs->rflags &= ~(1 << 8);
+ regs->rflags &= ~X86_EFLAGS_TF;
write_dr7(0x00000400);
}
}
+static inline bool is_single_step_db(unsigned long dr6_val)
+{
+ return dr6_val == (DR6_ACTIVE_LOW | DR6_BS);
+}
+
+static inline bool is_general_detect_db(unsigned long dr6_val)
+{
+ return dr6_val == (DR6_ACTIVE_LOW | DR6_BD);
+}
+
+static inline bool is_icebp_db(unsigned long dr6_val)
+{
+ return dr6_val == DR6_ACTIVE_LOW;
+}
+
extern unsigned char handle_db_save_rip;
asm("handle_db_save_rip:\n"
"stc\n"
@@ -64,102 +81,345 @@
got_ud = 1;
}
-int main(int ac, char **av)
+typedef unsigned long (*db_test_fn)(void);
+typedef void (*db_report_fn)(unsigned long, const char *);
+
+static unsigned long singlestep_with_movss_blocking_and_dr7_gd(void);
+
+static void __run_single_step_db_test(db_test_fn test, db_report_fn report_fn)
{
unsigned long start;
+ bool ign;
+
+ n = 0;
+ write_dr6(0);
+
+ start = test();
+ report_fn(start, "");
+
+ /* MOV DR #GPs at CPL>0, don't try to run the DR7.GD test in usermode. */
+ if (test == singlestep_with_movss_blocking_and_dr7_gd)
+ return;
+
+ n = 0;
+ write_dr6(0);
+
+ /*
+ * Run the test in usermode. Use the expected start RIP from the first
+ * run, the usermode framework doesn't make it easy to get the expected
+ * RIP out of the test, and it shouldn't change in any case. Run the
+ * test with IOPL=3 so that it can use OUT, CLI, STI, etc...
+ */
+ set_iopl(3);
+ run_in_user((usermode_func)test, GP_VECTOR, 0, 0, 0, 0, &ign);
+ set_iopl(0);
+
+ report_fn(start, "Usermode ");
+}
+
+#define run_ss_db_test(name) __run_single_step_db_test(name, report_##name)
+
+static void report_singlestep_basic(unsigned long start, const char *usermode)
+{
+ report(n == 3 &&
+ is_single_step_db(dr6[0]) && db_addr[0] == start &&
+ is_single_step_db(dr6[1]) && db_addr[1] == start + 1 &&
+ is_single_step_db(dr6[2]) && db_addr[2] == start + 1 + 1,
+ "%sSingle-step #DB basic test", usermode);
+}
+
+static unsigned long singlestep_basic(void)
+{
+ unsigned long start;
+
+ /*
+ * After being enabled, single-step breakpoints have a one instruction
+ * delay before the first #DB is generated.
+ */
+ asm volatile (
+ "pushf\n\t"
+ "pop %%rax\n\t"
+ "or $(1<<8),%%rax\n\t"
+ "push %%rax\n\t"
+ "popf\n\t"
+ "and $~(1<<8),%%rax\n\t"
+ "1:push %%rax\n\t"
+ "popf\n\t"
+ "lea 1b(%%rip), %0\n\t"
+ : "=r" (start) : : "rax"
+ );
+ return start;
+}
+
+static void report_singlestep_emulated_instructions(unsigned long start,
+ const char *usermode)
+{
+ report(n == 7 &&
+ is_single_step_db(dr6[0]) && db_addr[0] == start &&
+ is_single_step_db(dr6[1]) && db_addr[1] == start + 1 &&
+ is_single_step_db(dr6[2]) && db_addr[2] == start + 1 + 3 &&
+ is_single_step_db(dr6[3]) && db_addr[3] == start + 1 + 3 + 2 &&
+ is_single_step_db(dr6[4]) && db_addr[4] == start + 1 + 3 + 2 + 5 &&
+ is_single_step_db(dr6[5]) && db_addr[5] == start + 1 + 3 + 2 + 5 + 1 &&
+ is_single_step_db(dr6[6]) && db_addr[6] == start + 1 + 3 + 2 + 5 + 1 + 1,
+ "%sSingle-step #DB on emulated instructions", usermode);
+}
+
+static unsigned long singlestep_emulated_instructions(void)
+{
+ unsigned long start;
+
+ /*
+ * Verify single-step #DB are generated correctly on emulated
+ * instructions, e.g. CPUID and RDMSR.
+ */
+ asm volatile (
+ "pushf\n\t"
+ "pop %%rax\n\t"
+ "or $(1<<8),%%rax\n\t"
+ "push %%rax\n\t"
+ "popf\n\t"
+ "and $~(1<<8),%%rax\n\t"
+ "1:push %%rax\n\t"
+ "xor %%rax,%%rax\n\t"
+ "cpuid\n\t"
+ "movl $0x3fd, %%edx\n\t"
+ "inb %%dx, %%al\n\t"
+ "popf\n\t"
+ "lea 1b(%%rip),%0\n\t"
+ : "=r" (start) : : "rax", "ebx", "ecx", "edx"
+ );
+ return start;
+}
+
+static void report_singlestep_with_sti_blocking(unsigned long start,
+ const char *usermode)
+{
+ report(n == 4 &&
+ is_single_step_db(dr6[0]) && db_addr[0] == start &&
+ is_single_step_db(dr6[1]) && db_addr[1] == start + 6 &&
+ is_single_step_db(dr6[2]) && db_addr[2] == start + 6 + 1 &&
+ is_single_step_db(dr6[3]) && db_addr[3] == start + 6 + 1 + 1,
+ "%sSingle-step #DB w/ STI blocking", usermode);
+}
+
+
+static unsigned long singlestep_with_sti_blocking(void)
+{
+ unsigned long start_rip;
+
+ /*
+ * STI blocking doesn't suppress #DBs, thus the first single-step #DB
+ * should arrive after the standard one instruction delay.
+ */
+ asm volatile(
+ "cli\n\t"
+ "pushf\n\t"
+ "pop %%rax\n\t"
+ "or $(1<<8),%%rax\n\t"
+ "push %%rax\n\t"
+ "popf\n\t"
+ "sti\n\t"
+ "1:and $~(1<<8),%%rax\n\t"
+ "push %%rax\n\t"
+ "popf\n\t"
+ "lea 1b(%%rip),%0\n\t"
+ : "=r" (start_rip) : : "rax"
+ );
+ return start_rip;
+}
+
+static void report_singlestep_with_movss_blocking(unsigned long start,
+ const char *usermode)
+{
+ report(n == 3 &&
+ is_single_step_db(dr6[0]) && db_addr[0] == start &&
+ is_single_step_db(dr6[1]) && db_addr[1] == start + 1 &&
+ is_single_step_db(dr6[2]) && db_addr[2] == start + 1 + 1,
+ "%sSingle-step #DB w/ MOVSS blocking", usermode);
+}
+
+static unsigned long singlestep_with_movss_blocking(void)
+{
+ unsigned long start_rip;
+
+ /*
+ * MOVSS blocking suppresses single-step #DBs (and select other #DBs),
+ * thus the first single-step #DB should occur after MOVSS blocking
+ * expires, i.e. two instructions after #DBs are enabled in this case.
+ */
+ asm volatile(
+ "pushf\n\t"
+ "pop %%rax\n\t"
+ "or $(1<<8),%%rax\n\t"
+ "push %%rax\n\t"
+ "mov %%ss, %%ax\n\t"
+ "popf\n\t"
+ "mov %%ax, %%ss\n\t"
+ "and $~(1<<8),%%rax\n\t"
+ "1: push %%rax\n\t"
+ "popf\n\t"
+ "lea 1b(%%rip),%0\n\t"
+ : "=r" (start_rip) : : "rax"
+ );
+ return start_rip;
+}
+
+
+static void report_singlestep_with_movss_blocking_and_icebp(unsigned long start,
+ const char *usermode)
+{
+ report(n == 4 &&
+ is_icebp_db(dr6[0]) && db_addr[0] == start &&
+ is_single_step_db(dr6[1]) && db_addr[1] == start + 6 &&
+ is_single_step_db(dr6[2]) && db_addr[2] == start + 6 + 1 &&
+ is_single_step_db(dr6[3]) && db_addr[3] == start + 6 + 1 + 1,
+ "%sSingle-Step + ICEBP #DB w/ MOVSS blocking", usermode);
+}
+
+static unsigned long singlestep_with_movss_blocking_and_icebp(void)
+{
+ unsigned long start;
+
+ /*
+ * ICEBP, a.k.a. INT1 or int1icebrk, is an oddball. It generates a
+ * trap-like #DB, is intercepted if #DBs are intercepted, and manifests
+ * as a #DB VM-Exit, but the VM-Exit occurs on the ICEBP itself, i.e.
+ * it's treated as an instruction intercept. Verify that ICEBP is
+ * correctly emulated as a trap-like #DB when intercepted, and that
+ * MOVSS blocking is handled correctly with respect to single-step
+ * breakpoints being enabled.
+ */
+ asm volatile(
+ "pushf\n\t"
+ "pop %%rax\n\t"
+ "or $(1<<8),%%rax\n\t"
+ "push %%rax\n\t"
+ "mov %%ss, %%ax\n\t"
+ "popf\n\t"
+ "mov %%ax, %%ss\n\t"
+ ".byte 0xf1;"
+ "1:and $~(1<<8),%%rax\n\t"
+ "push %%rax\n\t"
+ "popf\n\t"
+ "lea 1b(%%rip),%0\n\t"
+ : "=r" (start) : : "rax"
+ );
+ return start;
+}
+
+static void report_singlestep_with_movss_blocking_and_dr7_gd(unsigned long start,
+ const char *ign)
+{
+ report(n == 5 &&
+ is_general_detect_db(dr6[0]) && db_addr[0] == start &&
+ is_single_step_db(dr6[1]) && db_addr[1] == start + 3 &&
+ is_single_step_db(dr6[2]) && db_addr[2] == start + 3 + 6 &&
+ is_single_step_db(dr6[3]) && db_addr[3] == start + 3 + 6 + 1 &&
+ is_single_step_db(dr6[4]) && db_addr[4] == start + 3 + 6 + 1 + 1,
+ "Single-step #DB w/ MOVSS blocking and DR7.GD=1");
+}
+
+static unsigned long singlestep_with_movss_blocking_and_dr7_gd(void)
+{
+ unsigned long start_rip;
+
+ write_dr7(DR7_GD);
+
+ /*
+ * MOVSS blocking does NOT suppress General Detect #DBs, which have
+ * fault-like behavior. Note, DR7.GD is cleared by the CPU upon
+ * successful delivery of the #DB. DR6.BD is NOT cleared by the CPU,
+ * but the MOV DR6 below will be re-executed after handling the
+ * General Detect #DB.
+ */
+ asm volatile(
+ "xor %0, %0\n\t"
+ "pushf\n\t"
+ "pop %%rax\n\t"
+ "or $(1<<8),%%rax\n\t"
+ "push %%rax\n\t"
+ "mov %%ss, %%ax\n\t"
+ "popf\n\t"
+ "mov %%ax, %%ss\n\t"
+ "1: mov %0, %%dr6\n\t"
+ "and $~(1<<8),%%rax\n\t"
+ "push %%rax\n\t"
+ "popf\n\t"
+ "lea 1b(%%rip),%0\n\t"
+ : "=r" (start_rip) : : "rax"
+ );
+ return start_rip;
+}
+
+int main(int ac, char **av)
+{
unsigned long cr4;
handle_exception(DB_VECTOR, handle_db);
handle_exception(BP_VECTOR, handle_bp);
handle_exception(UD_VECTOR, handle_ud);
+ /*
+ * DR4 is an alias for DR6 (and DR5 aliases DR7) if CR4.DE is NOT set,
+ * and is reserved if CR4.DE=1 (Debug Extensions enabled).
+ */
got_ud = 0;
cr4 = read_cr4();
write_cr4(cr4 & ~X86_CR4_DE);
write_dr4(0);
- write_dr6(0xffff4ff2);
- report(read_dr4() == 0xffff4ff2 && !got_ud, "reading DR4 with CR4.DE == 0");
+ write_dr6(DR6_ACTIVE_LOW | DR6_BS | DR6_TRAP1);
+ report(read_dr4() == (DR6_ACTIVE_LOW | DR6_BS | DR6_TRAP1) && !got_ud,
+ "DR4==DR6 with CR4.DE == 0");
cr4 = read_cr4();
write_cr4(cr4 | X86_CR4_DE);
read_dr4();
- report(got_ud, "reading DR4 with CR4.DE == 1");
+ report(got_ud, "DR4 read got #UD with CR4.DE == 1");
write_dr6(0);
extern unsigned char sw_bp;
asm volatile("int3; sw_bp:");
report(bp_addr == (unsigned long)&sw_bp, "#BP");
+ /*
+ * The CPU sets/clears bits 0-3 (trap bits for DR0-3) on #DB based on
+ * whether or not the corresponding DR0-3 got a match. All other bits
+ * in DR6 are set if and only if their associated breakpoint condition
+ * is active, and are never cleared by the CPU. Verify a match on DR0
+ * is reported correctly, and that DR6.BS is not set when single-step
+ * breakpoints are disabled, but is left set (if set by software).
+ */
n = 0;
extern unsigned char hw_bp1;
write_dr0(&hw_bp1);
- write_dr7(0x00000402);
+ write_dr7(DR7_FIXED_1 | DR7_GLOBAL_ENABLE_DR0);
asm volatile("hw_bp1: nop");
report(n == 1 &&
- db_addr[0] == ((unsigned long)&hw_bp1) && dr6[0] == 0xffff0ff1,
+ db_addr[0] == ((unsigned long)&hw_bp1) &&
+ dr6[0] == (DR6_ACTIVE_LOW | DR6_TRAP0),
"hw breakpoint (test that dr6.BS is not set)");
n = 0;
extern unsigned char hw_bp2;
write_dr0(&hw_bp2);
- write_dr6(0x00004002);
+ write_dr6(DR6_BS | DR6_TRAP1);
asm volatile("hw_bp2: nop");
report(n == 1 &&
- db_addr[0] == ((unsigned long)&hw_bp2) && dr6[0] == 0xffff4ff1,
+ db_addr[0] == ((unsigned long)&hw_bp2) &&
+ dr6[0] == (DR6_ACTIVE_LOW | DR6_BS | DR6_TRAP0),
"hw breakpoint (test that dr6.BS is not cleared)");
- n = 0;
- write_dr6(0);
- asm volatile(
- "pushf\n\t"
- "pop %%rax\n\t"
- "or $(1<<8),%%rax\n\t"
- "push %%rax\n\t"
- "lea (%%rip),%0\n\t"
- "popf\n\t"
- "and $~(1<<8),%%rax\n\t"
- "push %%rax\n\t"
- "popf\n\t"
- : "=r" (start) : : "rax");
- report(n == 3 &&
- db_addr[0] == start + 1 + 6 && dr6[0] == 0xffff4ff0 &&
- db_addr[1] == start + 1 + 6 + 1 && dr6[1] == 0xffff4ff0 &&
- db_addr[2] == start + 1 + 6 + 1 + 1 && dr6[2] == 0xffff4ff0,
- "single step");
-
- /*
- * cpuid and rdmsr (among others) trigger VM exits and are then
- * emulated. Test that single stepping works on emulated instructions.
- */
- n = 0;
- write_dr6(0);
- asm volatile(
- "pushf\n\t"
- "pop %%rax\n\t"
- "or $(1<<8),%%rax\n\t"
- "push %%rax\n\t"
- "lea (%%rip),%0\n\t"
- "popf\n\t"
- "and $~(1<<8),%%rax\n\t"
- "push %%rax\n\t"
- "xor %%rax,%%rax\n\t"
- "cpuid\n\t"
- "movl $0x1a0,%%ecx\n\t"
- "rdmsr\n\t"
- "popf\n\t"
- : "=r" (start) : : "rax", "ebx", "ecx", "edx");
- report(n == 7 &&
- db_addr[0] == start + 1 + 6 && dr6[0] == 0xffff4ff0 &&
- db_addr[1] == start + 1 + 6 + 1 && dr6[1] == 0xffff4ff0 &&
- db_addr[2] == start + 1 + 6 + 1 + 3 && dr6[2] == 0xffff4ff0 &&
- db_addr[3] == start + 1 + 6 + 1 + 3 + 2 && dr6[3] == 0xffff4ff0 &&
- db_addr[4] == start + 1 + 6 + 1 + 3 + 2 + 5 && dr6[4] == 0xffff4ff0 &&
- db_addr[5] == start + 1 + 6 + 1 + 3 + 2 + 5 + 2 && dr6[5] == 0xffff4ff0 &&
- db_addr[6] == start + 1 + 6 + 1 + 3 + 2 + 5 + 2 + 1 && dr6[6] == 0xffff4ff0,
- "single step emulated instructions");
+ run_ss_db_test(singlestep_basic);
+ run_ss_db_test(singlestep_emulated_instructions);
+ run_ss_db_test(singlestep_with_sti_blocking);
+ run_ss_db_test(singlestep_with_movss_blocking);
+ run_ss_db_test(singlestep_with_movss_blocking_and_icebp);
+ run_ss_db_test(singlestep_with_movss_blocking_and_dr7_gd);
n = 0;
write_dr1((void *)&value);
+ write_dr6(DR6_BS);
write_dr7(0x00d0040a); // 4-byte write
extern unsigned char hw_wp1;
@@ -168,7 +428,8 @@
"mov %%rax,%0\n\t; hw_wp1:"
: "=m" (value) : : "rax");
report(n == 1 &&
- db_addr[0] == ((unsigned long)&hw_wp1) && dr6[0] == 0xffff4ff2,
+ db_addr[0] == ((unsigned long)&hw_wp1) &&
+ dr6[0] == (DR6_ACTIVE_LOW | DR6_BS | DR6_TRAP1),
"hw watchpoint (test that dr6.BS is not cleared)");
n = 0;
@@ -180,7 +441,8 @@
"mov %%rax,%0\n\t; hw_wp2:"
: "=m" (value) : : "rax");
report(n == 1 &&
- db_addr[0] == ((unsigned long)&hw_wp2) && dr6[0] == 0xffff0ff2,
+ db_addr[0] == ((unsigned long)&hw_wp2) &&
+ dr6[0] == (DR6_ACTIVE_LOW | DR6_TRAP1),
"hw watchpoint (test that dr6.BS is not set)");
n = 0;
@@ -188,7 +450,7 @@
extern unsigned char sw_icebp;
asm volatile(".byte 0xf1; sw_icebp:");
report(n == 1 &&
- db_addr[0] == (unsigned long)&sw_icebp && dr6[0] == 0xffff0ff0,
+ db_addr[0] == (unsigned long)&sw_icebp && dr6[0] == DR6_ACTIVE_LOW,
"icebp");
write_dr7(0x400);
diff --git a/x86/emulator.c b/x86/emulator.c
index 22a518f..cd78e3c 100644
--- a/x86/emulator.c
+++ b/x86/emulator.c
@@ -1,3 +1,5 @@
+#include <asm/debugreg.h>
+
#include "ioram.h"
#include "vm.h"
#include "libcflat.h"
@@ -883,12 +885,14 @@
static void test_mov_dr(uint64_t *mem)
{
unsigned long rax;
- const unsigned long in_rax = 0;
- bool rtm_support = this_cpu_has(X86_FEATURE_RTM);
- unsigned long dr6_fixed_1 = rtm_support ? 0xfffe0ff0ul : 0xffff0ff0ul;
+
asm(KVM_FEP "movq %0, %%dr6\n\t"
- KVM_FEP "movq %%dr6, %0\n\t" : "=a" (rax) : "a" (in_rax));
- report(rax == dr6_fixed_1, "mov_dr6");
+ KVM_FEP "movq %%dr6, %0\n\t" : "=a" (rax) : "a" (0));
+
+ if (this_cpu_has(X86_FEATURE_RTM))
+ report(rax == (DR6_ACTIVE_LOW & ~DR6_RTM), "mov_dr6");
+ else
+ report(rax == DR6_ACTIVE_LOW, "mov_dr6");
}
static void test_push16(uint64_t *mem)
diff --git a/x86/tsc_adjust.c b/x86/tsc_adjust.c
index 3636b5e..c98c1ea 100644
--- a/x86/tsc_adjust.c
+++ b/x86/tsc_adjust.c
@@ -4,37 +4,33 @@
int main(void)
{
u64 t1, t2, t3, t4, t5;
- u64 est_delta_time;
- if (this_cpu_has(X86_FEATURE_TSC_ADJUST)) { // MSR_IA32_TSC_ADJUST Feature is enabled?
- report(rdmsr(MSR_IA32_TSC_ADJUST) == 0x0,
- "MSR_IA32_TSC_ADJUST msr initialization");
- t3 = 100000000000ull;
- t1 = rdtsc();
- wrmsr(MSR_IA32_TSC_ADJUST, t3);
- t2 = rdtsc();
- report(rdmsr(MSR_IA32_TSC_ADJUST) == t3,
- "MSR_IA32_TSC_ADJUST msr read / write");
- report((t2 - t1) >= t3,
- "TSC adjustment for MSR_IA32_TSC_ADJUST value");
- t3 = 0x0;
- wrmsr(MSR_IA32_TSC_ADJUST, t3);
- report(rdmsr(MSR_IA32_TSC_ADJUST) == t3,
- "MSR_IA32_TSC_ADJUST msr read / write");
- t4 = 100000000000ull;
- t1 = rdtsc();
- wrtsc(t4);
- t2 = rdtsc();
- t5 = rdmsr(MSR_IA32_TSC_ADJUST);
- // est of time between reading tsc and writing tsc,
- // (based on MSR_IA32_TSC_ADJUST msr value) should be small
- est_delta_time = t4 - t5 - t1;
- // arbitray 2x latency (wrtsc->rdtsc) threshold
- report(est_delta_time <= (2 * (t2 - t4)),
- "MSR_IA32_TSC_ADJUST msr adjustment on tsc write");
+ if (!this_cpu_has(X86_FEATURE_TSC_ADJUST)) {
+ report_skip("MSR_IA32_TSC_ADJUST feature not enabled");
+ return report_summary();
}
- else {
- report_pass("MSR_IA32_TSC_ADJUST feature not enabled");
- }
+
+ report(rdmsr(MSR_IA32_TSC_ADJUST) == 0x0,
+ "MSR_IA32_TSC_ADJUST msr initialization");
+ t3 = 100000000000ull;
+ t1 = rdtsc();
+ wrmsr(MSR_IA32_TSC_ADJUST, t3);
+ t2 = rdtsc();
+ report(rdmsr(MSR_IA32_TSC_ADJUST) == t3,
+ "MSR_IA32_TSC_ADJUST msr read / write");
+ report((t2 - t1) >= t3,
+ "TSC adjustment for MSR_IA32_TSC_ADJUST value");
+ t3 = 0x0;
+ wrmsr(MSR_IA32_TSC_ADJUST, t3);
+ report(rdmsr(MSR_IA32_TSC_ADJUST) == t3,
+ "MSR_IA32_TSC_ADJUST msr read / write");
+ t4 = 100000000000ull;
+ t1 = rdtsc();
+ wrtsc(t4);
+ t2 = rdtsc();
+ t5 = rdmsr(MSR_IA32_TSC_ADJUST);
+ report(t1 <= t4 - t5, "Internal TSC advances across write to IA32_TSC");
+ report(t2 >= t4, "IA32_TSC advances after write to IA32_TSC");
+
return report_summary();
}
diff --git a/x86/vmx.c b/x86/vmx.c
index f4fbb94..51eed8c 100644
--- a/x86/vmx.c
+++ b/x86/vmx.c
@@ -1884,15 +1884,35 @@
step->data = data;
}
+static void __test_set_guest(test_guest_func func)
+{
+ assert(current->v2);
+ v2_guest_main = func;
+}
+
/*
* Set the target of the first enter_guest call. Can only be called once per
* test. Must be called before first enter_guest call.
*/
void test_set_guest(test_guest_func func)
{
- assert(current->v2);
TEST_ASSERT_MSG(!v2_guest_main, "Already set guest func.");
- v2_guest_main = func;
+ __test_set_guest(func);
+}
+
+/*
+ * Set the target of the enter_guest call and reset the RIP so 'func' will
+ * start from the beginning. This can be called multiple times per test.
+ */
+void test_override_guest(test_guest_func func)
+{
+ __test_set_guest(func);
+ init_vmcs_guest();
+}
+
+void test_set_guest_finished(void)
+{
+ guest_finished = 1;
}
static void check_for_guest_termination(union exit_reason exit_reason)
diff --git a/x86/vmx.h b/x86/vmx.h
index 4423986..11cb665 100644
--- a/x86/vmx.h
+++ b/x86/vmx.h
@@ -1055,7 +1055,9 @@
typedef void (*test_guest_func)(void);
typedef void (*test_teardown_func)(void *data);
void test_set_guest(test_guest_func func);
+void test_override_guest(test_guest_func func);
void test_add_teardown(test_teardown_func func, void *data);
void test_skip(const char *msg);
+void test_set_guest_finished(void);
#endif
diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
index 3d57ed6..df93198 100644
--- a/x86/vmx_tests.c
+++ b/x86/vmx_tests.c
@@ -21,6 +21,14 @@
#include "smp.h"
#include "delay.h"
#include "access.h"
+#include "x86/usermode.h"
+
+/*
+ * vmcs.GUEST_PENDING_DEBUG has the same format as DR6, although some bits that
+ * are legal in DR6 are reserved in vmcs.GUEST_PENDING_DEBUG. And if any data
+ * or I/O breakpoint matches *and* was enabled, bit 12 is also set.
+ */
+#define PENDING_DBG_TRAP BIT(12)
#define VPID_CAP_INVVPID_TYPES_SHIFT 40
@@ -5080,9 +5088,9 @@
enter_guest();
report_mtf("OUT", (unsigned long) &test_mtf2);
pending_dbg = vmcs_read(GUEST_PENDING_DEBUG);
- report(pending_dbg & DR_STEP,
+ report(pending_dbg & DR6_BS,
"'pending debug exceptions' field after MTF VM-exit: 0x%lx (expected 0x%lx)",
- pending_dbg, (unsigned long) DR_STEP);
+ pending_dbg, (unsigned long) DR6_BS);
disable_mtf();
disable_tf();
@@ -8931,7 +8939,7 @@
static void vmx_preemption_timer_zero_set_pending_dbg(u32 exception_bitmap)
{
vmx_preemption_timer_zero_activate_preemption_timer();
- vmcs_write(GUEST_PENDING_DEBUG, BIT(12) | DR_TRAP1);
+ vmcs_write(GUEST_PENDING_DEBUG, PENDING_DBG_TRAP | DR6_TRAP1);
vmcs_write(EXC_BITMAP, exception_bitmap);
enter_guest();
}
@@ -9315,7 +9323,7 @@
* (b) stale bits in DR6 (DR6.BD, in particular) don't leak into
* the exit qualification field for a subsequent #DB exception.
*/
- const u64 starting_dr6 = DR6_RESERVED | BIT(13) | DR_TRAP3 | DR_TRAP1;
+ const u64 starting_dr6 = DR6_ACTIVE_LOW | DR6_BS | DR6_TRAP3 | DR6_TRAP1;
extern char post_nop asm(".Lpost_nop");
extern char post_movss_nop asm(".Lpost_movss_nop");
extern char post_wbinvd asm(".Lpost_wbinvd");
@@ -9339,7 +9347,7 @@
* standard that L0 has to follow for emulated instructions.
*/
single_step_guest("Hardware delivered single-step", starting_dr6, 0);
- check_db_exit(false, false, false, &post_nop, DR_STEP, starting_dr6);
+ check_db_exit(false, false, false, &post_nop, DR6_BS, starting_dr6);
/*
* Hardware-delivered #DB trap for single-step in MOVSS shadow
@@ -9349,8 +9357,8 @@
* data breakpoint as well as the single-step trap.
*/
single_step_guest("Hardware delivered single-step in MOVSS shadow",
- starting_dr6, BIT(12) | DR_STEP | DR_TRAP0 );
- check_db_exit(false, false, false, &post_movss_nop, DR_STEP | DR_TRAP0,
+ starting_dr6, DR6_BS | PENDING_DBG_TRAP | DR6_TRAP0);
+ check_db_exit(false, false, false, &post_movss_nop, DR6_BS | DR6_TRAP0,
starting_dr6);
/*
@@ -9360,7 +9368,7 @@
* modified DR6, but fails miserably.
*/
single_step_guest("Software synthesized single-step", starting_dr6, 0);
- check_db_exit(false, false, false, &post_wbinvd, DR_STEP, starting_dr6);
+ check_db_exit(false, false, false, &post_wbinvd, DR6_BS, starting_dr6);
/*
* L0 synthesized #DB trap for single-step in MOVSS shadow is
@@ -9369,8 +9377,8 @@
* the exit qualification field for the #DB exception.
*/
single_step_guest("Software synthesized single-step in MOVSS shadow",
- starting_dr6, BIT(12) | DR_STEP | DR_TRAP0);
- check_db_exit(true, false, true, &post_movss_wbinvd, DR_STEP | DR_TRAP0,
+ starting_dr6, DR6_BS | PENDING_DBG_TRAP | DR6_TRAP0);
+ check_db_exit(true, false, true, &post_movss_wbinvd, DR6_BS | DR6_TRAP0,
starting_dr6);
/*
@@ -10701,6 +10709,134 @@
__vmx_pf_vpid_test(invalidate_tlb_new_vpid, 1);
}
+static void vmx_l2_gp_test(void)
+{
+ *(volatile u64 *)NONCANONICAL = 0;
+}
+
+static void vmx_l2_ud_test(void)
+{
+ asm volatile ("ud2");
+}
+
+static void vmx_l2_de_test(void)
+{
+ asm volatile (
+ "xor %%eax, %%eax\n\t"
+ "xor %%ebx, %%ebx\n\t"
+ "xor %%edx, %%edx\n\t"
+ "idiv %%ebx\n\t"
+ ::: "eax", "ebx", "edx");
+}
+
+static void vmx_l2_bp_test(void)
+{
+ asm volatile ("int3");
+}
+
+static void vmx_l2_db_test(void)
+{
+ write_rflags(read_rflags() | X86_EFLAGS_TF);
+}
+
+static uint64_t usermode_callback(void)
+{
+ /*
+ * Trigger an #AC by writing 8 bytes to a 4-byte aligned address.
+ * Disclaimer: It is assumed that the stack pointer is aligned
+ * on a 16-byte boundary as x86_64 stacks should be.
+ */
+ asm volatile("movq $0, -0x4(%rsp)");
+
+ return 0;
+}
+
+static void vmx_l2_ac_test(void)
+{
+ bool hit_ac = false;
+
+ write_cr0(read_cr0() | X86_CR0_AM);
+ write_rflags(read_rflags() | X86_EFLAGS_AC);
+
+ run_in_user(usermode_callback, AC_VECTOR, 0, 0, 0, 0, &hit_ac);
+ report(hit_ac, "Usermode #AC handled in L2");
+ vmcall();
+}
+
+struct vmx_exception_test {
+ u8 vector;
+ void (*guest_code)(void);
+};
+
+struct vmx_exception_test vmx_exception_tests[] = {
+ { GP_VECTOR, vmx_l2_gp_test },
+ { UD_VECTOR, vmx_l2_ud_test },
+ { DE_VECTOR, vmx_l2_de_test },
+ { DB_VECTOR, vmx_l2_db_test },
+ { BP_VECTOR, vmx_l2_bp_test },
+ { AC_VECTOR, vmx_l2_ac_test },
+};
+
+static u8 vmx_exception_test_vector;
+
+static void vmx_exception_handler(struct ex_regs *regs)
+{
+ report(regs->vector == vmx_exception_test_vector,
+ "Handling %s in L2's exception handler",
+ exception_mnemonic(vmx_exception_test_vector));
+ vmcall();
+}
+
+static void handle_exception_in_l2(u8 vector)
+{
+ handler old_handler = handle_exception(vector, vmx_exception_handler);
+
+ vmx_exception_test_vector = vector;
+
+ enter_guest();
+ report(vmcs_read(EXI_REASON) == VMX_VMCALL,
+ "%s handled by L2", exception_mnemonic(vector));
+
+ handle_exception(vector, old_handler);
+}
+
+static void handle_exception_in_l1(u32 vector)
+{
+ u32 old_eb = vmcs_read(EXC_BITMAP);
+
+ vmcs_write(EXC_BITMAP, old_eb | (1u << vector));
+
+ enter_guest();
+
+ report((vmcs_read(EXI_REASON) == VMX_EXC_NMI) &&
+ ((vmcs_read(EXI_INTR_INFO) & 0xff) == vector),
+ "%s handled by L1", exception_mnemonic(vector));
+
+ vmcs_write(EXC_BITMAP, old_eb);
+}
+
+static void vmx_exception_test(void)
+{
+ struct vmx_exception_test *t;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(vmx_exception_tests); i++) {
+ t = &vmx_exception_tests[i];
+
+ /*
+ * Override the guest code before each run even though it's the
+ * same code, the VMCS guest state needs to be reinitialized.
+ */
+ test_override_guest(t->guest_code);
+ handle_exception_in_l2(t->vector);
+
+ test_override_guest(t->guest_code);
+ handle_exception_in_l1(t->vector);
+ }
+
+ test_set_guest_finished();
+}
+
#define TEST(name) { #name, .v2 = name }
/* name/init/guest_main/exit_handler/syscall_handler/guest_regs */
@@ -10810,5 +10946,6 @@
TEST(vmx_pf_no_vpid_test),
TEST(vmx_pf_invvpid_test),
TEST(vmx_pf_vpid_test),
+ TEST(vmx_exception_test),
{ NULL, NULL, NULL, NULL, NULL, {0} },
};