Merge branch 'misc/queue' into 'master'

Misc fixes

See merge request kvm-unit-tests/kvm-unit-tests!24
diff --git a/lib/bitops.h b/lib/bitops.h
index 308aa86..81a06a4 100644
--- a/lib/bitops.h
+++ b/lib/bitops.h
@@ -1,6 +1,9 @@
 #ifndef _BITOPS_H_
 #define _BITOPS_H_
 
+#include <stdbool.h>
+#include <stddef.h>
+
 /*
  * Adapted from
  *   include/linux/bitops.h
diff --git a/lib/x86/asm/debugreg.h b/lib/x86/asm/debugreg.h
index e86f5a6..a30f949 100644
--- a/lib/x86/asm/debugreg.h
+++ b/lib/x86/asm/debugreg.h
@@ -2,80 +2,63 @@
 #ifndef _ASMX86_DEBUGREG_H_
 #define _ASMX86_DEBUGREG_H_
 
-
-/* Indicate the register numbers for a number of the specific
-   debug registers.  Registers 0-3 contain the addresses we wish to trap on */
-#define DR_FIRSTADDR 0        /* u_debugreg[DR_FIRSTADDR] */
-#define DR_LASTADDR 3         /* u_debugreg[DR_LASTADDR]  */
-
-#define DR_STATUS 6           /* u_debugreg[DR_STATUS]     */
-#define DR_CONTROL 7          /* u_debugreg[DR_CONTROL] */
-
-/* Define a few things for the status register.  We can use this to determine
-   which debugging register was responsible for the trap.  The other bits
-   are either reserved or not of interest to us. */
-
-/* Define reserved bits in DR6 which are always set to 1 */
-#define DR6_RESERVED	(0xFFFF0FF0)
-
-#define DR_TRAP0	(0x1)		/* db0 */
-#define DR_TRAP1	(0x2)		/* db1 */
-#define DR_TRAP2	(0x4)		/* db2 */
-#define DR_TRAP3	(0x8)		/* db3 */
-#define DR_TRAP_BITS	(DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)
-
-#define DR_STEP		(0x4000)	/* single-step */
-#define DR_SWITCH	(0x8000)	/* task switch */
-
-/* Now define a bunch of things for manipulating the control register.
-   The top two bytes of the control register consist of 4 fields of 4
-   bits - each field corresponds to one of the four debug registers,
-   and indicates what types of access we trap on, and how large the data
-   field is that we are looking at */
-
-#define DR_CONTROL_SHIFT 16 /* Skip this many bits in ctl register */
-#define DR_CONTROL_SIZE 4   /* 4 control bits per register */
-
-#define DR_RW_EXECUTE (0x0)   /* Settings for the access types to trap on */
-#define DR_RW_WRITE (0x1)
-#define DR_RW_READ (0x3)
-
-#define DR_LEN_1 (0x0) /* Settings for data length to trap on */
-#define DR_LEN_2 (0x4)
-#define DR_LEN_4 (0xC)
-#define DR_LEN_8 (0x8)
-
-/* The low byte to the control register determine which registers are
-   enabled.  There are 4 fields of two bits.  One bit is "local", meaning
-   that the processor will reset the bit after a task switch and the other
-   is global meaning that we have to explicitly reset the bit.  With linux,
-   you can use either one, since we explicitly zero the register when we enter
-   kernel mode. */
-
-#define DR_LOCAL_ENABLE_SHIFT 0    /* Extra shift to the local enable bit */
-#define DR_GLOBAL_ENABLE_SHIFT 1   /* Extra shift to the global enable bit */
-#define DR_LOCAL_ENABLE (0x1)      /* Local enable for reg 0 */
-#define DR_GLOBAL_ENABLE (0x2)     /* Global enable for reg 0 */
-#define DR_ENABLE_SIZE 2           /* 2 enable bits per register */
-
-#define DR_LOCAL_ENABLE_MASK (0x55)  /* Set  local bits for all 4 regs */
-#define DR_GLOBAL_ENABLE_MASK (0xAA) /* Set global bits for all 4 regs */
-
-/* The second byte to the control register has a few special things.
-   We can slow the instruction pipeline for instructions coming via the
-   gdt or the ldt if we want to.  I am not sure why this is an advantage */
-
-#ifdef __i386__
-#define DR_CONTROL_RESERVED (0xFC00) /* Reserved by Intel */
-#else
-#define DR_CONTROL_RESERVED (0xFFFFFFFF0000FC00UL) /* Reserved */
-#endif
-
-#define DR_LOCAL_SLOWDOWN (0x100)   /* Local slow the pipeline */
-#define DR_GLOBAL_SLOWDOWN (0x200)  /* Global slow the pipeline */
+#include <bitops.h>
 
 /*
- * HW breakpoint additions
+ * DR6_ACTIVE_LOW combines fixed-1 and active-low bits (e.g. RTM), and is also
+ * the init/reset value for DR6.
  */
+#define DR6_ACTIVE_LOW	0xffff0ff0
+#define DR6_VOLATILE	0x0001e80f
+#define DR6_FIXED_1	(DR6_ACTIVE_LOW & ~DR6_VOLATILE)
+
+#define DR6_TRAP0	BIT(0)		/* DR0 matched */
+#define DR6_TRAP1	BIT(1)		/* DR1 matched */
+#define DR6_TRAP2	BIT(2)		/* DR2 matched */
+#define DR6_TRAP3	BIT(3)		/* DR3 matched */
+#define DR6_TRAP_BITS	(DR6_TRAP0|DR6_TRAP1|DR6_TRAP2|DR6_TRAP3)
+
+#define DR6_BUS_LOCK	BIT(11)		/* Bus lock	    0x800 */
+#define DR6_BD		BIT(13)		/* General Detect  0x2000 */
+#define DR6_BS		BIT(14)		/* Single-Step	   0x4000 */
+#define DR6_BT		BIT(15)		/* Task Switch	   0x8000 */
+#define DR6_RTM		BIT(16)		/* RTM / TSX	  0x10000 */
+
+#define DR7_FIXED_1	0x00000400	/* init/reset value, too */
+#define DR7_VOLATILE	0xffff2bff
+#define DR7_BP_EN_MASK	0x000000ff
+#define DR7_LE		BIT(8)		/* Local Exact	    0x100 */
+#define DR7_GE		BIT(9)		/* Global Exact     0x200 */
+#define DR7_RTM		BIT(11)		/* RTM / TSX	    0x800 */
+#define DR7_GD		BIT(13)		/* General Detect  0x2000 */
+
+/*
+ * Enable bits for DR0-D3.  Bits 0, 2, 4, and 6 are local enable bits (cleared
+ * by the CPU on task switch), bits 1, 3, 5, and 7 are global enable bits
+ * (never cleared by the CPU).
+ */
+#define DR7_LOCAL_ENABLE_DRx(x)		(BIT(0) << (x))
+#define DR7_GLOBAL_ENABLE_DRx(x)	(BIT(1) << (x))
+#define DR7_ENABLE_DRx(x) \
+	(DR7_LOCAL_ENABLE_DRx(x) | DR7_GLOBAL_ENABLE_DRx(x))
+
+#define DR7_GLOBAL_ENABLE_DR0	DR7_GLOBAL_ENABLE_DRx(0)
+#define DR7_GLOBAL_ENABLE_DR1	DR7_GLOBAL_ENABLE_DRx(1)
+#define DR7_GLOBAL_ENABLE_DR2	DR7_GLOBAL_ENABLE_DRx(2)
+#define DR7_GLOBAL_ENABLE_DR3	DR7_GLOBAL_ENABLE_DRx(3)
+
+/* Condition/type of the breakpoint for DR0-3. */
+#define DR7_RW_TYPE_DRx(x, rw)	((rw) << (((x) * 4) + 16))
+#define DR7_EXECUTE_DRx(x)	DR7_RW_TYPE_DRx(x, 0)
+#define DR7_WRITE_DRx(x)	DR7_RW_TYPE_DRx(x, 1)
+#define DR7_PORT_IO_DRx(x)	DR7_RW_TYPE_DRx(x, 2)
+#define DR7_DATA_IO_DRx(x)	DR7_RW_TYPE_DRx(x, 3)	/* Read or Write */
+
+/* Length of the breakpoint for DR0-3. */
+#define DR7_LEN_DRx(x, enc)	((enc) << (((x) * 4) + 18))
+#define DR7_LEN_1_DRx(x)	DR7_LEN_DRx(x, 0)
+#define DR7_LEN_2_DRx(x)	DR7_LEN_DRx(x, 1)
+#define DR7_LEN_4_DRx(x)	DR7_LEN_DRx(x, 3)
+#define DR7_LEN_8_DRx(x)	DR7_LEN_DRx(x, 2) /* Out of sequence, undefined for 32-bit CPUs. */
 
 #endif /* _ASMX86_DEBUGREG_H_ */
diff --git a/lib/x86/desc.c b/lib/x86/desc.c
index 16b7256..c2eb16e 100644
--- a/lib/x86/desc.c
+++ b/lib/x86/desc.c
@@ -91,7 +91,7 @@
 
 extern struct ex_record exception_table_start, exception_table_end;
 
-static const char* exception_mnemonic(int vector)
+const char* exception_mnemonic(int vector)
 {
 	switch(vector) {
 	case 0: return "#DE";
diff --git a/lib/x86/desc.h b/lib/x86/desc.h
index 9b81da0..ad6277b 100644
--- a/lib/x86/desc.h
+++ b/lib/x86/desc.h
@@ -224,6 +224,7 @@
 void print_current_tss_info(void);
 handler handle_exception(u8 v, handler fn);
 void unhandled_exception(struct ex_regs *regs, bool cpu);
+const char* exception_mnemonic(int vector);
 
 bool test_for_exception(unsigned int ex, void (*trigger_func)(void *data),
 			void *data);
diff --git a/lib/x86/processor.h b/lib/x86/processor.h
index fe5add5..117032a 100644
--- a/lib/x86/processor.h
+++ b/lib/x86/processor.h
@@ -592,9 +592,7 @@
 
 static inline void wrtsc(u64 tsc)
 {
-	unsigned a = tsc, d = tsc >> 32;
-
-	asm volatile("wrmsr" : : "a"(a), "d"(d), "c"(0x10));
+	wrmsr(MSR_IA32_TSC, tsc);
 }
 
 static inline void irq_disable(void)
diff --git a/x86/cstart.S b/x86/cstart.S
index 2c0eec7..6db6a38 100644
--- a/x86/cstart.S
+++ b/x86/cstart.S
@@ -143,6 +143,7 @@
 online_cpus:
 	.fill (max_cpus + 7) / 8, 1, 0
 
+.align 2
 cpu_online_count:	.word 1
 
 .code16
diff --git a/x86/cstart64.S b/x86/cstart64.S
index ff79ae7..7272452 100644
--- a/x86/cstart64.S
+++ b/x86/cstart64.S
@@ -256,4 +256,5 @@
 	jne 1b
 	ret
 
+.align 2
 cpu_online_count:	.word 1
diff --git a/x86/debug.c b/x86/debug.c
index 0019ebd..b66bf04 100644
--- a/x86/debug.c
+++ b/x86/debug.c
@@ -8,10 +8,12 @@
  *
  * This work is licensed under the terms of the GNU GPL, version 2.
  */
+#include <asm/debugreg.h>
 
 #include "libcflat.h"
 #include "processor.h"
 #include "desc.h"
+#include "usermode.h"
 
 static volatile unsigned long bp_addr;
 static volatile unsigned long db_addr[10], dr6[10];
@@ -36,14 +38,29 @@
 	dr6[n] = read_dr6();
 
 	if (dr6[n] & 0x1)
-		regs->rflags |= (1 << 16);
+		regs->rflags |= X86_EFLAGS_RF;
 
 	if (++n >= 10) {
-		regs->rflags &= ~(1 << 8);
+		regs->rflags &= ~X86_EFLAGS_TF;
 		write_dr7(0x00000400);
 	}
 }
 
+static inline bool is_single_step_db(unsigned long dr6_val)
+{
+	return dr6_val == (DR6_ACTIVE_LOW | DR6_BS);
+}
+
+static inline bool is_general_detect_db(unsigned long dr6_val)
+{
+	return dr6_val == (DR6_ACTIVE_LOW | DR6_BD);
+}
+
+static inline bool is_icebp_db(unsigned long dr6_val)
+{
+	return dr6_val == DR6_ACTIVE_LOW;
+}
+
 extern unsigned char handle_db_save_rip;
 asm("handle_db_save_rip:\n"
    "stc\n"
@@ -64,102 +81,345 @@
 	got_ud = 1;
 }
 
-int main(int ac, char **av)
+typedef unsigned long (*db_test_fn)(void);
+typedef void (*db_report_fn)(unsigned long, const char *);
+
+static unsigned long singlestep_with_movss_blocking_and_dr7_gd(void);
+
+static void __run_single_step_db_test(db_test_fn test, db_report_fn report_fn)
 {
 	unsigned long start;
+	bool ign;
+
+	n = 0;
+	write_dr6(0);
+
+	start = test();
+	report_fn(start, "");
+
+	/* MOV DR #GPs at CPL>0, don't try to run the DR7.GD test in usermode. */
+	if (test == singlestep_with_movss_blocking_and_dr7_gd)
+		return;
+
+	n = 0;
+	write_dr6(0);
+
+	/*
+	 * Run the test in usermode.  Use the expected start RIP from the first
+	 * run, the usermode framework doesn't make it easy to get the expected
+	 * RIP out of the test, and it shouldn't change in any case.  Run the
+	 * test with IOPL=3 so that it can use OUT, CLI, STI, etc...
+	 */
+	set_iopl(3);
+	run_in_user((usermode_func)test, GP_VECTOR, 0, 0, 0, 0, &ign);
+	set_iopl(0);
+
+	report_fn(start, "Usermode ");
+}
+
+#define run_ss_db_test(name) __run_single_step_db_test(name, report_##name)
+
+static void report_singlestep_basic(unsigned long start, const char *usermode)
+{
+	report(n == 3 &&
+	       is_single_step_db(dr6[0]) && db_addr[0] == start &&
+	       is_single_step_db(dr6[1]) && db_addr[1] == start + 1 &&
+	       is_single_step_db(dr6[2]) && db_addr[2] == start + 1 + 1,
+	       "%sSingle-step #DB basic test", usermode);
+}
+
+static unsigned long singlestep_basic(void)
+{
+	unsigned long start;
+
+	/*
+	 * After being enabled, single-step breakpoints have a one instruction
+	 * delay before the first #DB is generated.
+	 */
+	asm volatile (
+		"pushf\n\t"
+		"pop %%rax\n\t"
+		"or $(1<<8),%%rax\n\t"
+		"push %%rax\n\t"
+		"popf\n\t"
+		"and $~(1<<8),%%rax\n\t"
+		"1:push %%rax\n\t"
+		"popf\n\t"
+		"lea 1b(%%rip), %0\n\t"
+		: "=r" (start) : : "rax"
+	);
+	return start;
+}
+
+static void report_singlestep_emulated_instructions(unsigned long start,
+						    const char *usermode)
+{
+	report(n == 7 &&
+	       is_single_step_db(dr6[0]) && db_addr[0] == start &&
+	       is_single_step_db(dr6[1]) && db_addr[1] == start + 1 &&
+	       is_single_step_db(dr6[2]) && db_addr[2] == start + 1 + 3 &&
+	       is_single_step_db(dr6[3]) && db_addr[3] == start + 1 + 3 + 2 &&
+	       is_single_step_db(dr6[4]) && db_addr[4] == start + 1 + 3 + 2 + 5 &&
+	       is_single_step_db(dr6[5]) && db_addr[5] == start + 1 + 3 + 2 + 5 + 1 &&
+	       is_single_step_db(dr6[6]) && db_addr[6] == start + 1 + 3 + 2 + 5 + 1 + 1,
+	       "%sSingle-step #DB on emulated instructions", usermode);
+}
+
+static unsigned long singlestep_emulated_instructions(void)
+{
+	unsigned long start;
+
+	/*
+	 * Verify single-step #DB are generated correctly on emulated
+	 * instructions, e.g. CPUID and RDMSR.
+	 */
+	asm volatile (
+		"pushf\n\t"
+		"pop %%rax\n\t"
+		"or $(1<<8),%%rax\n\t"
+		"push %%rax\n\t"
+		"popf\n\t"
+		"and $~(1<<8),%%rax\n\t"
+		"1:push %%rax\n\t"
+		"xor %%rax,%%rax\n\t"
+		"cpuid\n\t"
+		"movl $0x3fd, %%edx\n\t"
+		"inb %%dx, %%al\n\t"
+		"popf\n\t"
+		"lea 1b(%%rip),%0\n\t"
+		: "=r" (start) : : "rax", "ebx", "ecx", "edx"
+	);
+	return start;
+}
+
+static void report_singlestep_with_sti_blocking(unsigned long start,
+						const char *usermode)
+{
+	report(n == 4 &&
+	       is_single_step_db(dr6[0]) && db_addr[0] == start &&
+	       is_single_step_db(dr6[1]) && db_addr[1] == start + 6 &&
+	       is_single_step_db(dr6[2]) && db_addr[2] == start + 6 + 1 &&
+	       is_single_step_db(dr6[3]) && db_addr[3] == start + 6 + 1 + 1,
+	       "%sSingle-step #DB w/ STI blocking", usermode);
+}
+
+
+static unsigned long singlestep_with_sti_blocking(void)
+{
+	unsigned long start_rip;
+
+	/*
+	 * STI blocking doesn't suppress #DBs, thus the first single-step #DB
+	 * should arrive after the standard one instruction delay.
+	 */
+	asm volatile(
+		"cli\n\t"
+		"pushf\n\t"
+		"pop %%rax\n\t"
+		"or $(1<<8),%%rax\n\t"
+		"push %%rax\n\t"
+		"popf\n\t"
+		"sti\n\t"
+		"1:and $~(1<<8),%%rax\n\t"
+		"push %%rax\n\t"
+		"popf\n\t"
+		"lea 1b(%%rip),%0\n\t"
+		: "=r" (start_rip) : : "rax"
+	);
+	return start_rip;
+}
+
+static void report_singlestep_with_movss_blocking(unsigned long start,
+						  const char *usermode)
+{
+	report(n == 3 &&
+	       is_single_step_db(dr6[0]) && db_addr[0] == start &&
+	       is_single_step_db(dr6[1]) && db_addr[1] == start + 1 &&
+	       is_single_step_db(dr6[2]) && db_addr[2] == start + 1 + 1,
+	       "%sSingle-step #DB w/ MOVSS blocking", usermode);
+}
+
+static unsigned long singlestep_with_movss_blocking(void)
+{
+	unsigned long start_rip;
+
+	/*
+	 * MOVSS blocking suppresses single-step #DBs (and select other #DBs),
+	 * thus the first single-step #DB should occur after MOVSS blocking
+	 * expires, i.e. two instructions after #DBs are enabled in this case.
+	 */ 
+	asm volatile(
+		"pushf\n\t"
+		"pop %%rax\n\t"
+		"or $(1<<8),%%rax\n\t"
+		"push %%rax\n\t"
+		"mov %%ss, %%ax\n\t"
+		"popf\n\t"
+		"mov %%ax, %%ss\n\t"
+		"and $~(1<<8),%%rax\n\t"
+		"1: push %%rax\n\t"
+		"popf\n\t"
+		"lea 1b(%%rip),%0\n\t"
+		: "=r" (start_rip) : : "rax"
+	);
+	return start_rip;
+}
+
+
+static void report_singlestep_with_movss_blocking_and_icebp(unsigned long start,
+							    const char *usermode)
+{
+	report(n == 4 &&
+	       is_icebp_db(dr6[0]) && db_addr[0] == start &&
+	       is_single_step_db(dr6[1]) && db_addr[1] == start + 6 &&
+	       is_single_step_db(dr6[2]) && db_addr[2] == start + 6 + 1 &&
+	       is_single_step_db(dr6[3]) && db_addr[3] == start + 6 + 1 + 1,
+	       "%sSingle-Step + ICEBP #DB w/ MOVSS blocking", usermode);
+}
+
+static unsigned long singlestep_with_movss_blocking_and_icebp(void)
+{
+	unsigned long start;
+
+	/*
+	 * ICEBP, a.k.a. INT1 or int1icebrk, is an oddball.  It generates a
+	 * trap-like #DB, is intercepted if #DBs are intercepted, and manifests
+	 * as a #DB VM-Exit, but the VM-Exit occurs on the ICEBP itself, i.e.
+	 * it's treated as an instruction intercept.  Verify that ICEBP is
+	 * correctly emulated as a trap-like #DB when intercepted, and that
+	 * MOVSS blocking is handled correctly with respect to single-step
+	 * breakpoints being enabled.
+	 */
+	asm volatile(
+		"pushf\n\t"
+		"pop %%rax\n\t"
+		"or $(1<<8),%%rax\n\t"
+		"push %%rax\n\t"
+		"mov %%ss, %%ax\n\t"
+		"popf\n\t"
+		"mov %%ax, %%ss\n\t"
+		".byte 0xf1;"
+		"1:and $~(1<<8),%%rax\n\t"
+		"push %%rax\n\t"
+		"popf\n\t"
+		"lea 1b(%%rip),%0\n\t"
+		: "=r" (start) : : "rax"
+	);
+	return start;
+}
+
+static void report_singlestep_with_movss_blocking_and_dr7_gd(unsigned long start,
+							     const char *ign)
+{
+	report(n == 5 &&
+	       is_general_detect_db(dr6[0]) && db_addr[0] == start &&
+	       is_single_step_db(dr6[1]) && db_addr[1] == start + 3 &&
+	       is_single_step_db(dr6[2]) && db_addr[2] == start + 3 + 6 &&
+	       is_single_step_db(dr6[3]) && db_addr[3] == start + 3 + 6 + 1 &&
+	       is_single_step_db(dr6[4]) && db_addr[4] == start + 3 + 6 + 1 + 1,
+	       "Single-step #DB w/ MOVSS blocking and DR7.GD=1");
+}
+
+static unsigned long singlestep_with_movss_blocking_and_dr7_gd(void)
+{
+	unsigned long start_rip;
+
+	write_dr7(DR7_GD);
+
+	/*
+	 * MOVSS blocking does NOT suppress General Detect #DBs, which have
+	 * fault-like behavior.  Note, DR7.GD is cleared by the CPU upon
+	 * successful delivery of the #DB.  DR6.BD is NOT cleared by the CPU,
+	 * but the MOV DR6 below will be re-executed after handling the
+	 * General Detect #DB.
+	 */
+	asm volatile(
+		"xor %0, %0\n\t"
+		"pushf\n\t"
+		"pop %%rax\n\t"
+		"or $(1<<8),%%rax\n\t"
+		"push %%rax\n\t"
+		"mov %%ss, %%ax\n\t"
+		"popf\n\t"
+		"mov %%ax, %%ss\n\t"
+		"1: mov %0, %%dr6\n\t"
+		"and $~(1<<8),%%rax\n\t"
+		"push %%rax\n\t"
+		"popf\n\t"
+		"lea 1b(%%rip),%0\n\t"
+		: "=r" (start_rip) : : "rax"
+	);
+	return start_rip;
+}
+
+int main(int ac, char **av)
+{
 	unsigned long cr4;
 
 	handle_exception(DB_VECTOR, handle_db);
 	handle_exception(BP_VECTOR, handle_bp);
 	handle_exception(UD_VECTOR, handle_ud);
 
+	/*
+	 * DR4 is an alias for DR6 (and DR5 aliases DR7) if CR4.DE is NOT set,
+	 * and is reserved if CR4.DE=1 (Debug Extensions enabled).
+	 */
 	got_ud = 0;
 	cr4 = read_cr4();
 	write_cr4(cr4 & ~X86_CR4_DE);
 	write_dr4(0);
-	write_dr6(0xffff4ff2);
-	report(read_dr4() == 0xffff4ff2 && !got_ud, "reading DR4 with CR4.DE == 0");
+	write_dr6(DR6_ACTIVE_LOW | DR6_BS | DR6_TRAP1);
+	report(read_dr4() == (DR6_ACTIVE_LOW | DR6_BS | DR6_TRAP1) && !got_ud,
+	       "DR4==DR6 with CR4.DE == 0");
 
 	cr4 = read_cr4();
 	write_cr4(cr4 | X86_CR4_DE);
 	read_dr4();
-	report(got_ud, "reading DR4 with CR4.DE == 1");
+	report(got_ud, "DR4 read got #UD with CR4.DE == 1");
 	write_dr6(0);
 
 	extern unsigned char sw_bp;
 	asm volatile("int3; sw_bp:");
 	report(bp_addr == (unsigned long)&sw_bp, "#BP");
 
+	/*
+	 * The CPU sets/clears bits 0-3 (trap bits for DR0-3) on #DB based on
+	 * whether or not the corresponding DR0-3 got a match.  All other bits
+	 * in DR6 are set if and only if their associated breakpoint condition
+	 * is active, and are never cleared by the CPU.  Verify a match on DR0
+	 * is reported correctly, and that DR6.BS is not set when single-step
+	 * breakpoints are disabled, but is left set (if set by software).
+	 */
 	n = 0;
 	extern unsigned char hw_bp1;
 	write_dr0(&hw_bp1);
-	write_dr7(0x00000402);
+	write_dr7(DR7_FIXED_1 | DR7_GLOBAL_ENABLE_DR0);
 	asm volatile("hw_bp1: nop");
 	report(n == 1 &&
-	       db_addr[0] == ((unsigned long)&hw_bp1) && dr6[0] == 0xffff0ff1,
+	       db_addr[0] == ((unsigned long)&hw_bp1) &&
+	       dr6[0] == (DR6_ACTIVE_LOW | DR6_TRAP0),
 	       "hw breakpoint (test that dr6.BS is not set)");
 
 	n = 0;
 	extern unsigned char hw_bp2;
 	write_dr0(&hw_bp2);
-	write_dr6(0x00004002);
+	write_dr6(DR6_BS | DR6_TRAP1);
 	asm volatile("hw_bp2: nop");
 	report(n == 1 &&
-	       db_addr[0] == ((unsigned long)&hw_bp2) && dr6[0] == 0xffff4ff1,
+	       db_addr[0] == ((unsigned long)&hw_bp2) &&
+	       dr6[0] == (DR6_ACTIVE_LOW | DR6_BS | DR6_TRAP0),
 	       "hw breakpoint (test that dr6.BS is not cleared)");
 
-	n = 0;
-	write_dr6(0);
-	asm volatile(
-		"pushf\n\t"
-		"pop %%rax\n\t"
-		"or $(1<<8),%%rax\n\t"
-		"push %%rax\n\t"
-		"lea (%%rip),%0\n\t"
-		"popf\n\t"
-		"and $~(1<<8),%%rax\n\t"
-		"push %%rax\n\t"
-		"popf\n\t"
-		: "=r" (start) : : "rax");
-	report(n == 3 &&
-	       db_addr[0] == start + 1 + 6 && dr6[0] == 0xffff4ff0 &&
-	       db_addr[1] == start + 1 + 6 + 1 && dr6[1] == 0xffff4ff0 &&
-	       db_addr[2] == start + 1 + 6 + 1 + 1 && dr6[2] == 0xffff4ff0,
-	       "single step");
-
-	/*
-	 * cpuid and rdmsr (among others) trigger VM exits and are then
-	 * emulated. Test that single stepping works on emulated instructions.
-	 */
-	n = 0;
-	write_dr6(0);
-	asm volatile(
-		"pushf\n\t"
-		"pop %%rax\n\t"
-		"or $(1<<8),%%rax\n\t"
-		"push %%rax\n\t"
-		"lea (%%rip),%0\n\t"
-		"popf\n\t"
-		"and $~(1<<8),%%rax\n\t"
-		"push %%rax\n\t"
-		"xor %%rax,%%rax\n\t"
-		"cpuid\n\t"
-		"movl $0x1a0,%%ecx\n\t"
-		"rdmsr\n\t"
-		"popf\n\t"
-		: "=r" (start) : : "rax", "ebx", "ecx", "edx");
-	report(n == 7 &&
-	       db_addr[0] == start + 1 + 6 && dr6[0] == 0xffff4ff0 &&
-	       db_addr[1] == start + 1 + 6 + 1 && dr6[1] == 0xffff4ff0 &&
-	       db_addr[2] == start + 1 + 6 + 1 + 3 && dr6[2] == 0xffff4ff0 &&
-	       db_addr[3] == start + 1 + 6 + 1 + 3 + 2 && dr6[3] == 0xffff4ff0 &&
-	       db_addr[4] == start + 1 + 6 + 1 + 3 + 2 + 5 && dr6[4] == 0xffff4ff0 &&
-	       db_addr[5] == start + 1 + 6 + 1 + 3 + 2 + 5 + 2 && dr6[5] == 0xffff4ff0 &&
-	       db_addr[6] == start + 1 + 6 + 1 + 3 + 2 + 5 + 2 + 1 && dr6[6] == 0xffff4ff0,
-	       "single step emulated instructions");
+	run_ss_db_test(singlestep_basic);
+	run_ss_db_test(singlestep_emulated_instructions);
+	run_ss_db_test(singlestep_with_sti_blocking);
+	run_ss_db_test(singlestep_with_movss_blocking);
+	run_ss_db_test(singlestep_with_movss_blocking_and_icebp);
+	run_ss_db_test(singlestep_with_movss_blocking_and_dr7_gd);
 
 	n = 0;
 	write_dr1((void *)&value);
+	write_dr6(DR6_BS);
 	write_dr7(0x00d0040a); // 4-byte write
 
 	extern unsigned char hw_wp1;
@@ -168,7 +428,8 @@
 		"mov %%rax,%0\n\t; hw_wp1:"
 		: "=m" (value) : : "rax");
 	report(n == 1 &&
-	       db_addr[0] == ((unsigned long)&hw_wp1) && dr6[0] == 0xffff4ff2,
+	       db_addr[0] == ((unsigned long)&hw_wp1) &&
+	       dr6[0] == (DR6_ACTIVE_LOW | DR6_BS | DR6_TRAP1),
 	       "hw watchpoint (test that dr6.BS is not cleared)");
 
 	n = 0;
@@ -180,7 +441,8 @@
 		"mov %%rax,%0\n\t; hw_wp2:"
 		: "=m" (value) : : "rax");
 	report(n == 1 &&
-	       db_addr[0] == ((unsigned long)&hw_wp2) && dr6[0] == 0xffff0ff2,
+	       db_addr[0] == ((unsigned long)&hw_wp2) &&
+	       dr6[0] == (DR6_ACTIVE_LOW | DR6_TRAP1),
 	       "hw watchpoint (test that dr6.BS is not set)");
 
 	n = 0;
@@ -188,7 +450,7 @@
 	extern unsigned char sw_icebp;
 	asm volatile(".byte 0xf1; sw_icebp:");
 	report(n == 1 &&
-	       db_addr[0] == (unsigned long)&sw_icebp && dr6[0] == 0xffff0ff0,
+	       db_addr[0] == (unsigned long)&sw_icebp && dr6[0] == DR6_ACTIVE_LOW,
 	       "icebp");
 
 	write_dr7(0x400);
diff --git a/x86/emulator.c b/x86/emulator.c
index 22a518f..cd78e3c 100644
--- a/x86/emulator.c
+++ b/x86/emulator.c
@@ -1,3 +1,5 @@
+#include <asm/debugreg.h>
+
 #include "ioram.h"
 #include "vm.h"
 #include "libcflat.h"
@@ -883,12 +885,14 @@
 static void test_mov_dr(uint64_t *mem)
 {
 	unsigned long rax;
-	const unsigned long in_rax = 0;
-	bool rtm_support = this_cpu_has(X86_FEATURE_RTM);
-	unsigned long dr6_fixed_1 = rtm_support ? 0xfffe0ff0ul : 0xffff0ff0ul;
+
 	asm(KVM_FEP "movq %0, %%dr6\n\t"
-	    KVM_FEP "movq %%dr6, %0\n\t" : "=a" (rax) : "a" (in_rax));
-	report(rax == dr6_fixed_1, "mov_dr6");
+	    KVM_FEP "movq %%dr6, %0\n\t" : "=a" (rax) : "a" (0));
+
+	if (this_cpu_has(X86_FEATURE_RTM))
+		report(rax == (DR6_ACTIVE_LOW & ~DR6_RTM), "mov_dr6");
+	else
+		report(rax == DR6_ACTIVE_LOW, "mov_dr6");
 }
 
 static void test_push16(uint64_t *mem)
diff --git a/x86/tsc_adjust.c b/x86/tsc_adjust.c
index 3636b5e..c98c1ea 100644
--- a/x86/tsc_adjust.c
+++ b/x86/tsc_adjust.c
@@ -4,37 +4,33 @@
 int main(void)
 {
 	u64 t1, t2, t3, t4, t5;
-	u64 est_delta_time;
 
-	if (this_cpu_has(X86_FEATURE_TSC_ADJUST)) { // MSR_IA32_TSC_ADJUST Feature is enabled?
-		report(rdmsr(MSR_IA32_TSC_ADJUST) == 0x0,
-		       "MSR_IA32_TSC_ADJUST msr initialization");
-		t3 = 100000000000ull;
-		t1 = rdtsc();
-		wrmsr(MSR_IA32_TSC_ADJUST, t3);
-		t2 = rdtsc();
-		report(rdmsr(MSR_IA32_TSC_ADJUST) == t3,
-		       "MSR_IA32_TSC_ADJUST msr read / write");
-		report((t2 - t1) >= t3,
-		       "TSC adjustment for MSR_IA32_TSC_ADJUST value");
-		t3 = 0x0;
-		wrmsr(MSR_IA32_TSC_ADJUST, t3);
-		report(rdmsr(MSR_IA32_TSC_ADJUST) == t3,
-		       "MSR_IA32_TSC_ADJUST msr read / write");
-		t4 = 100000000000ull;
-		t1 = rdtsc();
-		wrtsc(t4);
-		t2 = rdtsc();
-		t5 = rdmsr(MSR_IA32_TSC_ADJUST);
-		// est of time between reading tsc and writing tsc,
-		// (based on MSR_IA32_TSC_ADJUST msr value) should be small
-		est_delta_time = t4 - t5 - t1;
-		// arbitray 2x latency (wrtsc->rdtsc) threshold
-		report(est_delta_time <= (2 * (t2 - t4)),
-		       "MSR_IA32_TSC_ADJUST msr adjustment on tsc write");
+	if (!this_cpu_has(X86_FEATURE_TSC_ADJUST)) {
+		report_skip("MSR_IA32_TSC_ADJUST feature not enabled");
+		return report_summary();
 	}
-	else {
-		report_pass("MSR_IA32_TSC_ADJUST feature not enabled");
-	}
+
+	report(rdmsr(MSR_IA32_TSC_ADJUST) == 0x0,
+	       "MSR_IA32_TSC_ADJUST msr initialization");
+	t3 = 100000000000ull;
+	t1 = rdtsc();
+	wrmsr(MSR_IA32_TSC_ADJUST, t3);
+	t2 = rdtsc();
+	report(rdmsr(MSR_IA32_TSC_ADJUST) == t3,
+	       "MSR_IA32_TSC_ADJUST msr read / write");
+	report((t2 - t1) >= t3,
+	       "TSC adjustment for MSR_IA32_TSC_ADJUST value");
+	t3 = 0x0;
+	wrmsr(MSR_IA32_TSC_ADJUST, t3);
+	report(rdmsr(MSR_IA32_TSC_ADJUST) == t3,
+	       "MSR_IA32_TSC_ADJUST msr read / write");
+	t4 = 100000000000ull;
+	t1 = rdtsc();
+	wrtsc(t4);
+	t2 = rdtsc();
+	t5 = rdmsr(MSR_IA32_TSC_ADJUST);
+	report(t1 <= t4 - t5, "Internal TSC advances across write to IA32_TSC");
+	report(t2 >= t4, "IA32_TSC advances after write to IA32_TSC");
+
 	return report_summary();
 }
diff --git a/x86/vmx.c b/x86/vmx.c
index f4fbb94..51eed8c 100644
--- a/x86/vmx.c
+++ b/x86/vmx.c
@@ -1884,15 +1884,35 @@
 	step->data = data;
 }
 
+static void __test_set_guest(test_guest_func func)
+{
+	assert(current->v2);
+	v2_guest_main = func;
+}
+
 /*
  * Set the target of the first enter_guest call. Can only be called once per
  * test. Must be called before first enter_guest call.
  */
 void test_set_guest(test_guest_func func)
 {
-	assert(current->v2);
 	TEST_ASSERT_MSG(!v2_guest_main, "Already set guest func.");
-	v2_guest_main = func;
+	__test_set_guest(func);
+}
+
+/*
+ * Set the target of the enter_guest call and reset the RIP so 'func' will
+ * start from the beginning.  This can be called multiple times per test.
+ */
+void test_override_guest(test_guest_func func)
+{
+	__test_set_guest(func);
+	init_vmcs_guest();
+}
+
+void test_set_guest_finished(void)
+{
+	guest_finished = 1;
 }
 
 static void check_for_guest_termination(union exit_reason exit_reason)
diff --git a/x86/vmx.h b/x86/vmx.h
index 4423986..11cb665 100644
--- a/x86/vmx.h
+++ b/x86/vmx.h
@@ -1055,7 +1055,9 @@
 typedef void (*test_guest_func)(void);
 typedef void (*test_teardown_func)(void *data);
 void test_set_guest(test_guest_func func);
+void test_override_guest(test_guest_func func);
 void test_add_teardown(test_teardown_func func, void *data);
 void test_skip(const char *msg);
+void test_set_guest_finished(void);
 
 #endif
diff --git a/x86/vmx_tests.c b/x86/vmx_tests.c
index 3d57ed6..df93198 100644
--- a/x86/vmx_tests.c
+++ b/x86/vmx_tests.c
@@ -21,6 +21,14 @@
 #include "smp.h"
 #include "delay.h"
 #include "access.h"
+#include "x86/usermode.h"
+
+/*
+ * vmcs.GUEST_PENDING_DEBUG has the same format as DR6, although some bits that
+ * are legal in DR6 are reserved in vmcs.GUEST_PENDING_DEBUG.  And if any data
+ * or I/O breakpoint matches *and* was enabled, bit 12 is also set.
+ */
+#define PENDING_DBG_TRAP	BIT(12)
 
 #define VPID_CAP_INVVPID_TYPES_SHIFT 40
 
@@ -5080,9 +5088,9 @@
 	enter_guest();
 	report_mtf("OUT", (unsigned long) &test_mtf2);
 	pending_dbg = vmcs_read(GUEST_PENDING_DEBUG);
-	report(pending_dbg & DR_STEP,
+	report(pending_dbg & DR6_BS,
 	       "'pending debug exceptions' field after MTF VM-exit: 0x%lx (expected 0x%lx)",
-	       pending_dbg, (unsigned long) DR_STEP);
+	       pending_dbg, (unsigned long) DR6_BS);
 
 	disable_mtf();
 	disable_tf();
@@ -8931,7 +8939,7 @@
 static void vmx_preemption_timer_zero_set_pending_dbg(u32 exception_bitmap)
 {
 	vmx_preemption_timer_zero_activate_preemption_timer();
-	vmcs_write(GUEST_PENDING_DEBUG, BIT(12) | DR_TRAP1);
+	vmcs_write(GUEST_PENDING_DEBUG, PENDING_DBG_TRAP | DR6_TRAP1);
 	vmcs_write(EXC_BITMAP, exception_bitmap);
 	enter_guest();
 }
@@ -9315,7 +9323,7 @@
 	 * (b) stale bits in DR6 (DR6.BD, in particular) don't leak into
          *     the exit qualification field for a subsequent #DB exception.
 	 */
-	const u64 starting_dr6 = DR6_RESERVED | BIT(13) | DR_TRAP3 | DR_TRAP1;
+	const u64 starting_dr6 = DR6_ACTIVE_LOW | DR6_BS | DR6_TRAP3 | DR6_TRAP1;
 	extern char post_nop asm(".Lpost_nop");
 	extern char post_movss_nop asm(".Lpost_movss_nop");
 	extern char post_wbinvd asm(".Lpost_wbinvd");
@@ -9339,7 +9347,7 @@
 	 * standard that L0 has to follow for emulated instructions.
 	 */
 	single_step_guest("Hardware delivered single-step", starting_dr6, 0);
-	check_db_exit(false, false, false, &post_nop, DR_STEP, starting_dr6);
+	check_db_exit(false, false, false, &post_nop, DR6_BS, starting_dr6);
 
 	/*
 	 * Hardware-delivered #DB trap for single-step in MOVSS shadow
@@ -9349,8 +9357,8 @@
 	 * data breakpoint as well as the single-step trap.
 	 */
 	single_step_guest("Hardware delivered single-step in MOVSS shadow",
-			  starting_dr6, BIT(12) | DR_STEP | DR_TRAP0 );
-	check_db_exit(false, false, false, &post_movss_nop, DR_STEP | DR_TRAP0,
+			  starting_dr6, DR6_BS | PENDING_DBG_TRAP | DR6_TRAP0);
+	check_db_exit(false, false, false, &post_movss_nop, DR6_BS | DR6_TRAP0,
 		      starting_dr6);
 
 	/*
@@ -9360,7 +9368,7 @@
 	 * modified DR6, but fails miserably.
 	 */
 	single_step_guest("Software synthesized single-step", starting_dr6, 0);
-	check_db_exit(false, false, false, &post_wbinvd, DR_STEP, starting_dr6);
+	check_db_exit(false, false, false, &post_wbinvd, DR6_BS, starting_dr6);
 
 	/*
 	 * L0 synthesized #DB trap for single-step in MOVSS shadow is
@@ -9369,8 +9377,8 @@
 	 * the exit qualification field for the #DB exception.
 	 */
 	single_step_guest("Software synthesized single-step in MOVSS shadow",
-			  starting_dr6, BIT(12) | DR_STEP | DR_TRAP0);
-	check_db_exit(true, false, true, &post_movss_wbinvd, DR_STEP | DR_TRAP0,
+			  starting_dr6, DR6_BS | PENDING_DBG_TRAP | DR6_TRAP0);
+	check_db_exit(true, false, true, &post_movss_wbinvd, DR6_BS | DR6_TRAP0,
 		      starting_dr6);
 
 	/*
@@ -10701,6 +10709,134 @@
 	__vmx_pf_vpid_test(invalidate_tlb_new_vpid, 1);
 }
 
+static void vmx_l2_gp_test(void)
+{
+	*(volatile u64 *)NONCANONICAL = 0;
+}
+
+static void vmx_l2_ud_test(void)
+{
+	asm volatile ("ud2");
+}
+
+static void vmx_l2_de_test(void)
+{
+	asm volatile (
+		"xor %%eax, %%eax\n\t"
+		"xor %%ebx, %%ebx\n\t"
+		"xor %%edx, %%edx\n\t"
+		"idiv %%ebx\n\t"
+		::: "eax", "ebx", "edx");
+}
+
+static void vmx_l2_bp_test(void)
+{
+	asm volatile ("int3");
+}
+
+static void vmx_l2_db_test(void)
+{
+	write_rflags(read_rflags() | X86_EFLAGS_TF);
+}
+
+static uint64_t usermode_callback(void)
+{
+	/*
+	 * Trigger an #AC by writing 8 bytes to a 4-byte aligned address.
+	 * Disclaimer: It is assumed that the stack pointer is aligned
+	 * on a 16-byte boundary as x86_64 stacks should be.
+	 */
+	asm volatile("movq $0, -0x4(%rsp)");
+
+	return 0;
+}
+
+static void vmx_l2_ac_test(void)
+{
+	bool hit_ac = false;
+
+	write_cr0(read_cr0() | X86_CR0_AM);
+	write_rflags(read_rflags() | X86_EFLAGS_AC);
+
+	run_in_user(usermode_callback, AC_VECTOR, 0, 0, 0, 0, &hit_ac);
+	report(hit_ac, "Usermode #AC handled in L2");
+	vmcall();
+}
+
+struct vmx_exception_test {
+	u8 vector;
+	void (*guest_code)(void);
+};
+
+struct vmx_exception_test vmx_exception_tests[] = {
+	{ GP_VECTOR, vmx_l2_gp_test },
+	{ UD_VECTOR, vmx_l2_ud_test },
+	{ DE_VECTOR, vmx_l2_de_test },
+	{ DB_VECTOR, vmx_l2_db_test },
+	{ BP_VECTOR, vmx_l2_bp_test },
+	{ AC_VECTOR, vmx_l2_ac_test },
+};
+
+static u8 vmx_exception_test_vector;
+
+static void vmx_exception_handler(struct ex_regs *regs)
+{
+	report(regs->vector == vmx_exception_test_vector,
+	       "Handling %s in L2's exception handler",
+	       exception_mnemonic(vmx_exception_test_vector));
+	vmcall();
+}
+
+static void handle_exception_in_l2(u8 vector)
+{
+	handler old_handler = handle_exception(vector, vmx_exception_handler);
+
+	vmx_exception_test_vector = vector;
+
+	enter_guest();
+	report(vmcs_read(EXI_REASON) == VMX_VMCALL,
+	       "%s handled by L2", exception_mnemonic(vector));
+
+	handle_exception(vector, old_handler);
+}
+
+static void handle_exception_in_l1(u32 vector)
+{
+	u32 old_eb = vmcs_read(EXC_BITMAP);
+
+	vmcs_write(EXC_BITMAP, old_eb | (1u << vector));
+
+	enter_guest();
+
+	report((vmcs_read(EXI_REASON) == VMX_EXC_NMI) &&
+	       ((vmcs_read(EXI_INTR_INFO) & 0xff) == vector),
+	       "%s handled by L1", exception_mnemonic(vector));
+
+	vmcs_write(EXC_BITMAP, old_eb);
+}
+
+static void vmx_exception_test(void)
+{
+	struct vmx_exception_test *t;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(vmx_exception_tests); i++) {
+		t = &vmx_exception_tests[i];
+
+		/*
+		 * Override the guest code before each run even though it's the
+		 * same code, the VMCS guest state needs to be reinitialized.
+		 */
+		test_override_guest(t->guest_code);
+		handle_exception_in_l2(t->vector);
+
+		test_override_guest(t->guest_code);
+		handle_exception_in_l1(t->vector);
+	}
+
+	test_set_guest_finished();
+}
+
 #define TEST(name) { #name, .v2 = name }
 
 /* name/init/guest_main/exit_handler/syscall_handler/guest_regs */
@@ -10810,5 +10946,6 @@
 	TEST(vmx_pf_no_vpid_test),
 	TEST(vmx_pf_invvpid_test),
 	TEST(vmx_pf_vpid_test),
+	TEST(vmx_exception_test),
 	{ NULL, NULL, NULL, NULL, NULL, {0} },
 };