Merge branch 'linus' into x86/asm, to pick up fixes and resolve conflicts

Conflicts:
	arch/x86/kernel/cpu/Makefile

Signed-off-by: Ingo Molnar <mingo@kernel.org>
diff --git a/Documentation/x86/orc-unwinder.txt b/Documentation/x86/orc-unwinder.txt
index af0c9a4..cd4b29b 100644
--- a/Documentation/x86/orc-unwinder.txt
+++ b/Documentation/x86/orc-unwinder.txt
@@ -4,7 +4,7 @@
 Overview
 --------
 
-The kernel CONFIG_ORC_UNWINDER option enables the ORC unwinder, which is
+The kernel CONFIG_UNWINDER_ORC option enables the ORC unwinder, which is
 similar in concept to a DWARF unwinder.  The difference is that the
 format of the ORC data is much simpler than DWARF, which in turn allows
 the ORC unwinder to be much simpler and faster.
diff --git a/Documentation/x86/x86_64/mm.txt b/Documentation/x86/x86_64/mm.txt
index b0798e2..3448e67 100644
--- a/Documentation/x86/x86_64/mm.txt
+++ b/Documentation/x86/x86_64/mm.txt
@@ -34,7 +34,7 @@
 ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
 ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
 ... unused hole ...
-ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB)
+ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
 ... unused hole ...
 ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
 ... unused hole ...
diff --git a/Makefile b/Makefile
index bee2033..12d920f 100644
--- a/Makefile
+++ b/Makefile
@@ -934,8 +934,8 @@
   ifeq ($(has_libelf),1)
     objtool_target := tools/objtool FORCE
   else
-    ifdef CONFIG_ORC_UNWINDER
-      $(error "Cannot generate ORC metadata for CONFIG_ORC_UNWINDER=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
+    ifdef CONFIG_UNWINDER_ORC
+      $(error "Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
     else
       $(warning "Cannot use CONFIG_STACK_VALIDATION=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
     endif
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2fdb233..4ae940a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -171,7 +171,7 @@
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_RCU_TABLE_FREE
 	select HAVE_REGS_AND_STACK_ACCESS_API
-	select HAVE_RELIABLE_STACKTRACE		if X86_64 && FRAME_POINTER_UNWINDER && STACK_VALIDATION
+	select HAVE_RELIABLE_STACKTRACE		if X86_64 && UNWINDER_FRAME_POINTER && STACK_VALIDATION
 	select HAVE_STACK_VALIDATION		if X86_64
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_UNSTABLE_SCHED_CLOCK
@@ -303,7 +303,6 @@
 config KASAN_SHADOW_OFFSET
 	hex
 	depends on KASAN
-	default 0xdff8000000000000 if X86_5LEVEL
 	default 0xdffffc0000000000
 
 config HAVE_INTEL_TXT
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 90b1230..6293a87 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -359,28 +359,14 @@
 
 choice
 	prompt "Choose kernel unwinder"
-	default FRAME_POINTER_UNWINDER
+	default UNWINDER_ORC if X86_64
+	default UNWINDER_FRAME_POINTER if X86_32
 	---help---
 	  This determines which method will be used for unwinding kernel stack
 	  traces for panics, oopses, bugs, warnings, perf, /proc/<pid>/stack,
 	  livepatch, lockdep, and more.
 
-config FRAME_POINTER_UNWINDER
-	bool "Frame pointer unwinder"
-	select FRAME_POINTER
-	---help---
-	  This option enables the frame pointer unwinder for unwinding kernel
-	  stack traces.
-
-	  The unwinder itself is fast and it uses less RAM than the ORC
-	  unwinder, but the kernel text size will grow by ~3% and the kernel's
-	  overall performance will degrade by roughly 5-10%.
-
-	  This option is recommended if you want to use the livepatch
-	  consistency model, as this is currently the only way to get a
-	  reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
-
-config ORC_UNWINDER
+config UNWINDER_ORC
 	bool "ORC unwinder"
 	depends on X86_64
 	select STACK_VALIDATION
@@ -396,7 +382,22 @@
 	  Enabling this option will increase the kernel's runtime memory usage
 	  by roughly 2-4MB, depending on your kernel config.
 
-config GUESS_UNWINDER
+config UNWINDER_FRAME_POINTER
+	bool "Frame pointer unwinder"
+	select FRAME_POINTER
+	---help---
+	  This option enables the frame pointer unwinder for unwinding kernel
+	  stack traces.
+
+	  The unwinder itself is fast and it uses less RAM than the ORC
+	  unwinder, but the kernel text size will grow by ~3% and the kernel's
+	  overall performance will degrade by roughly 5-10%.
+
+	  This option is recommended if you want to use the livepatch
+	  consistency model, as this is currently the only way to get a
+	  reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
+
+config UNWINDER_GUESS
 	bool "Guess unwinder"
 	depends on EXPERT
 	---help---
@@ -411,7 +412,7 @@
 endchoice
 
 config FRAME_POINTER
-	depends on !ORC_UNWINDER && !GUESS_UNWINDER
+	depends on !UNWINDER_ORC && !UNWINDER_GUESS
 	bool
 
 endmenu
diff --git a/arch/x86/configs/tiny.config b/arch/x86/configs/tiny.config
index 550cd50..66c9e2a 100644
--- a/arch/x86/configs/tiny.config
+++ b/arch/x86/configs/tiny.config
@@ -1,5 +1,5 @@
 CONFIG_NOHIGHMEM=y
 # CONFIG_HIGHMEM4G is not set
 # CONFIG_HIGHMEM64G is not set
-CONFIG_GUESS_UNWINDER=y
-# CONFIG_FRAME_POINTER_UNWINDER is not set
+CONFIG_UNWINDER_GUESS=y
+# CONFIG_UNWINDER_FRAME_POINTER is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 4a4b16e..e32fc1f 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -299,6 +299,7 @@
 # CONFIG_DEBUG_RODATA_TEST is not set
 CONFIG_DEBUG_BOOT_PARAMS=y
 CONFIG_OPTIMIZE_INLINING=y
+CONFIG_UNWINDER_ORC=y
 CONFIG_SECURITY=y
 CONFIG_SECURITY_NETWORK=y
 CONFIG_SECURITY_SELINUX=y
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 6e16003..3fd8bc5 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -142,56 +142,25 @@ For 32-bit we have the following conventions - kernel is built with
 	UNWIND_HINT_REGS offset=\offset
 	.endm
 
-	.macro RESTORE_EXTRA_REGS offset=0
-	movq 0*8+\offset(%rsp), %r15
-	movq 1*8+\offset(%rsp), %r14
-	movq 2*8+\offset(%rsp), %r13
-	movq 3*8+\offset(%rsp), %r12
-	movq 4*8+\offset(%rsp), %rbp
-	movq 5*8+\offset(%rsp), %rbx
-	UNWIND_HINT_REGS offset=\offset extra=0
+	.macro POP_EXTRA_REGS
+	popq %r15
+	popq %r14
+	popq %r13
+	popq %r12
+	popq %rbp
+	popq %rbx
 	.endm
 
-	.macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
-	.if \rstor_r11
-	movq 6*8(%rsp), %r11
-	.endif
-	.if \rstor_r8910
-	movq 7*8(%rsp), %r10
-	movq 8*8(%rsp), %r9
-	movq 9*8(%rsp), %r8
-	.endif
-	.if \rstor_rax
-	movq 10*8(%rsp), %rax
-	.endif
-	.if \rstor_rcx
-	movq 11*8(%rsp), %rcx
-	.endif
-	.if \rstor_rdx
-	movq 12*8(%rsp), %rdx
-	.endif
-	movq 13*8(%rsp), %rsi
-	movq 14*8(%rsp), %rdi
-	UNWIND_HINT_IRET_REGS offset=16*8
-	.endm
-	.macro RESTORE_C_REGS
-	RESTORE_C_REGS_HELPER 1,1,1,1,1
-	.endm
-	.macro RESTORE_C_REGS_EXCEPT_RAX
-	RESTORE_C_REGS_HELPER 0,1,1,1,1
-	.endm
-	.macro RESTORE_C_REGS_EXCEPT_RCX
-	RESTORE_C_REGS_HELPER 1,0,1,1,1
-	.endm
-	.macro RESTORE_C_REGS_EXCEPT_R11
-	RESTORE_C_REGS_HELPER 1,1,0,1,1
-	.endm
-	.macro RESTORE_C_REGS_EXCEPT_RCX_R11
-	RESTORE_C_REGS_HELPER 1,0,0,1,1
-	.endm
-
-	.macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
-	subq $-(15*8+\addskip), %rsp
+	.macro POP_C_REGS
+	popq %r11
+	popq %r10
+	popq %r9
+	popq %r8
+	popq %rax
+	popq %rcx
+	popq %rdx
+	popq %rsi
+	popq %rdi
 	.endm
 
 	.macro icebp
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index bcfc566..a2b30ec 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -221,10 +221,9 @@
 	TRACE_IRQS_ON		/* user mode is traced as IRQs on */
 	movq	RIP(%rsp), %rcx
 	movq	EFLAGS(%rsp), %r11
-	RESTORE_C_REGS_EXCEPT_RCX_R11
-	movq	RSP(%rsp), %rsp
+	addq	$6*8, %rsp	/* skip extra regs -- they were preserved */
 	UNWIND_HINT_EMPTY
-	USERGS_SYSRET64
+	jmp	.Lpop_c_regs_except_rcx_r11_and_sysret
 
 1:
 	/*
@@ -246,17 +245,18 @@
 	call	do_syscall_64		/* returns with IRQs disabled */
 
 return_from_SYSCALL_64:
-	RESTORE_EXTRA_REGS
 	TRACE_IRQS_IRETQ		/* we're about to change IF */
 
 	/*
 	 * Try to use SYSRET instead of IRET if we're returning to
-	 * a completely clean 64-bit userspace context.
+	 * a completely clean 64-bit userspace context.  If we're not,
+	 * go to the slow exit path.
 	 */
 	movq	RCX(%rsp), %rcx
 	movq	RIP(%rsp), %r11
-	cmpq	%rcx, %r11			/* RCX == RIP */
-	jne	opportunistic_sysret_failed
+
+	cmpq	%rcx, %r11	/* SYSRET requires RCX == RIP */
+	jne	swapgs_restore_regs_and_return_to_usermode
 
 	/*
 	 * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
@@ -274,14 +274,14 @@
 
 	/* If this changed %rcx, it was not canonical */
 	cmpq	%rcx, %r11
-	jne	opportunistic_sysret_failed
+	jne	swapgs_restore_regs_and_return_to_usermode
 
 	cmpq	$__USER_CS, CS(%rsp)		/* CS must match SYSRET */
-	jne	opportunistic_sysret_failed
+	jne	swapgs_restore_regs_and_return_to_usermode
 
 	movq	R11(%rsp), %r11
 	cmpq	%r11, EFLAGS(%rsp)		/* R11 == RFLAGS */
-	jne	opportunistic_sysret_failed
+	jne	swapgs_restore_regs_and_return_to_usermode
 
 	/*
 	 * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
@@ -302,12 +302,12 @@
 	 * would never get past 'stuck_here'.
 	 */
 	testq	$(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
-	jnz	opportunistic_sysret_failed
+	jnz	swapgs_restore_regs_and_return_to_usermode
 
 	/* nothing to check for RSP */
 
 	cmpq	$__USER_DS, SS(%rsp)		/* SS must match SYSRET */
-	jne	opportunistic_sysret_failed
+	jne	swapgs_restore_regs_and_return_to_usermode
 
 	/*
 	 * We win! This label is here just for ease of understanding
@@ -315,14 +315,20 @@
 	 */
 syscall_return_via_sysret:
 	/* rcx and r11 are already restored (see code above) */
-	RESTORE_C_REGS_EXCEPT_RCX_R11
-	movq	RSP(%rsp), %rsp
 	UNWIND_HINT_EMPTY
+	POP_EXTRA_REGS
+.Lpop_c_regs_except_rcx_r11_and_sysret:
+	popq	%rsi	/* skip r11 */
+	popq	%r10
+	popq	%r9
+	popq	%r8
+	popq	%rax
+	popq	%rsi	/* skip rcx */
+	popq	%rdx
+	popq	%rsi
+	popq	%rdi
+	movq	RSP-ORIG_RAX(%rsp), %rsp
 	USERGS_SYSRET64
-
-opportunistic_sysret_failed:
-	SWAPGS
-	jmp	restore_c_regs_and_iret
 END(entry_SYSCALL_64)
 
 ENTRY(stub_ptregs_64)
@@ -423,8 +429,7 @@
 	movq	%rsp, %rdi
 	call	syscall_return_slowpath	/* returns with IRQs disabled */
 	TRACE_IRQS_ON			/* user mode is traced as IRQS on */
-	SWAPGS
-	jmp	restore_regs_and_iret
+	jmp	swapgs_restore_regs_and_return_to_usermode
 
 1:
 	/* kernel thread */
@@ -612,8 +617,21 @@
 	mov	%rsp,%rdi
 	call	prepare_exit_to_usermode
 	TRACE_IRQS_IRETQ
+
+GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+#ifdef CONFIG_DEBUG_ENTRY
+	/* Assert that pt_regs indicates user mode. */
+	testb	$3, CS(%rsp)
+	jnz	1f
+	ud2
+1:
+#endif
 	SWAPGS
-	jmp	restore_regs_and_iret
+	POP_EXTRA_REGS
+	POP_C_REGS
+	addq	$8, %rsp	/* skip regs->orig_ax */
+	INTERRUPT_RETURN
+
 
 /* Returning to kernel space */
 retint_kernel:
@@ -633,15 +651,17 @@
 	 */
 	TRACE_IRQS_IRETQ
 
-/*
- * At this label, code paths which return to kernel and to user,
- * which come from interrupts/exception and from syscalls, merge.
- */
-GLOBAL(restore_regs_and_iret)
-	RESTORE_EXTRA_REGS
-restore_c_regs_and_iret:
-	RESTORE_C_REGS
-	REMOVE_PT_GPREGS_FROM_STACK 8
+GLOBAL(restore_regs_and_return_to_kernel)
+#ifdef CONFIG_DEBUG_ENTRY
+	/* Assert that pt_regs indicates kernel mode. */
+	testb	$3, CS(%rsp)
+	jz	1f
+	ud2
+1:
+#endif
+	POP_EXTRA_REGS
+	POP_C_REGS
+	addq	$8, %rsp	/* skip regs->orig_ax */
 	INTERRUPT_RETURN
 
 ENTRY(native_iret)
@@ -818,7 +838,7 @@
 
 	ASM_CLAC
 
-	.ifeq \has_error_code
+	.if \has_error_code == 0
 	pushq	$-1				/* ORIG_RAX: no syscall to restart */
 	.endif
 
@@ -1059,6 +1079,7 @@
 idtentry stack_segment		do_stack_segment	has_error_code=1
 
 #ifdef CONFIG_XEN
+idtentry xennmi			do_nmi			has_error_code=0
 idtentry xendebug		do_debug		has_error_code=0
 idtentry xenint3		do_int3			has_error_code=0
 #endif
@@ -1112,17 +1133,14 @@
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF_DEBUG
 	testl	%ebx, %ebx			/* swapgs needed? */
-	jnz	paranoid_exit_no_swapgs
+	jnz	.Lparanoid_exit_no_swapgs
 	TRACE_IRQS_IRETQ
 	SWAPGS_UNSAFE_STACK
-	jmp	paranoid_exit_restore
-paranoid_exit_no_swapgs:
+	jmp	.Lparanoid_exit_restore
+.Lparanoid_exit_no_swapgs:
 	TRACE_IRQS_IRETQ_DEBUG
-paranoid_exit_restore:
-	RESTORE_EXTRA_REGS
-	RESTORE_C_REGS
-	REMOVE_PT_GPREGS_FROM_STACK 8
-	INTERRUPT_RETURN
+.Lparanoid_exit_restore:
+	jmp restore_regs_and_return_to_kernel
 END(paranoid_exit)
 
 /*
@@ -1223,10 +1241,13 @@
 	jmp	retint_user
 END(error_exit)
 
-/* Runs on exception stack */
-/* XXX: broken on Xen PV */
+/*
+ * Runs on exception stack.  Xen PV does not go through this path at all,
+ * so we can use real assembly here.
+ */
 ENTRY(nmi)
 	UNWIND_HINT_IRET_REGS
+
 	/*
 	 * We allow breakpoints in NMIs. If a breakpoint occurs, then
 	 * the iretq it performs will take us out of NMI context.
@@ -1284,7 +1305,7 @@
 	 * stacks lest we corrupt the "NMI executing" variable.
 	 */
 
-	SWAPGS_UNSAFE_STACK
+	swapgs
 	cld
 	movq	%rsp, %rdx
 	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
@@ -1328,8 +1349,7 @@
 	 * Return back to user mode.  We must *not* do the normal exit
 	 * work, because we don't want to enable interrupts.
 	 */
-	SWAPGS
-	jmp	restore_regs_and_iret
+	jmp	swapgs_restore_regs_and_return_to_usermode
 
 .Lnmi_from_kernel:
 	/*
@@ -1450,7 +1470,7 @@
 	popq	%rdx
 
 	/* We are returning to kernel mode, so this cannot result in a fault. */
-	INTERRUPT_RETURN
+	iretq
 
 first_nmi:
 	/* Restore rdx. */
@@ -1481,7 +1501,7 @@
 	pushfq			/* RFLAGS */
 	pushq	$__KERNEL_CS	/* CS */
 	pushq	$1f		/* RIP */
-	INTERRUPT_RETURN	/* continues at repeat_nmi below */
+	iretq			/* continues at repeat_nmi below */
 	UNWIND_HINT_IRET_REGS
 1:
 #endif
@@ -1544,29 +1564,34 @@
 nmi_swapgs:
 	SWAPGS_UNSAFE_STACK
 nmi_restore:
-	RESTORE_EXTRA_REGS
-	RESTORE_C_REGS
+	POP_EXTRA_REGS
+	POP_C_REGS
 
-	/* Point RSP at the "iret" frame. */
-	REMOVE_PT_GPREGS_FROM_STACK 6*8
+	/*
+	 * Skip orig_ax and the "outermost" frame to point RSP at the "iret"
+	 * at the "iret" frame.
+	 */
+	addq	$6*8, %rsp
 
 	/*
 	 * Clear "NMI executing".  Set DF first so that we can easily
 	 * distinguish the remaining code between here and IRET from
-	 * the SYSCALL entry and exit paths.  On a native kernel, we
-	 * could just inspect RIP, but, on paravirt kernels,
-	 * INTERRUPT_RETURN can translate into a jump into a
-	 * hypercall page.
+	 * the SYSCALL entry and exit paths.
+	 *
+	 * We arguably should just inspect RIP instead, but I (Andy) wrote
+	 * this code when I had the misapprehension that Xen PV supported
+	 * NMIs, and Xen PV would break that approach.
 	 */
 	std
 	movq	$0, 5*8(%rsp)		/* clear "NMI executing" */
 
 	/*
-	 * INTERRUPT_RETURN reads the "iret" frame and exits the NMI
-	 * stack in a single instruction.  We are returning to kernel
-	 * mode, so this cannot result in a fault.
+	 * iretq reads the "iret" frame and exits the NMI stack in a
+	 * single instruction.  We are returning to kernel mode, so this
+	 * cannot result in a fault.  Similarly, we don't need to worry
+	 * about espfix64 on the way back to kernel mode.
 	 */
-	INTERRUPT_RETURN
+	iretq
 END(nmi)
 
 ENTRY(ignore_sysret)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index b5c7a56..568e130 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -337,8 +337,7 @@
 
 	/* Go back to user mode. */
 	TRACE_IRQS_ON
-	SWAPGS
-	jmp	restore_regs_and_iret
+	jmp	swapgs_restore_regs_and_return_to_usermode
 END(entry_INT80_compat)
 
 ENTRY(stub32_clone)
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h
index 5b0579a..3ac991d 100644
--- a/arch/x86/include/asm/archrandom.h
+++ b/arch/x86/include/asm/archrandom.h
@@ -45,7 +45,7 @@ static inline bool rdrand_long(unsigned long *v)
 	bool ok;
 	unsigned int retry = RDRAND_RETRY_LOOPS;
 	do {
-		asm volatile(RDRAND_LONG "\n\t"
+		asm volatile(RDRAND_LONG
 			     CC_SET(c)
 			     : CC_OUT(c) (ok), "=a" (*v));
 		if (ok)
@@ -59,7 +59,7 @@ static inline bool rdrand_int(unsigned int *v)
 	bool ok;
 	unsigned int retry = RDRAND_RETRY_LOOPS;
 	do {
-		asm volatile(RDRAND_INT "\n\t"
+		asm volatile(RDRAND_INT
 			     CC_SET(c)
 			     : CC_OUT(c) (ok), "=a" (*v));
 		if (ok)
@@ -71,7 +71,7 @@ static inline bool rdrand_int(unsigned int *v)
 static inline bool rdseed_long(unsigned long *v)
 {
 	bool ok;
-	asm volatile(RDSEED_LONG "\n\t"
+	asm volatile(RDSEED_LONG
 		     CC_SET(c)
 		     : CC_OUT(c) (ok), "=a" (*v));
 	return ok;
@@ -80,7 +80,7 @@ static inline bool rdseed_long(unsigned long *v)
 static inline bool rdseed_int(unsigned int *v)
 {
 	bool ok;
-	asm volatile(RDSEED_INT "\n\t"
+	asm volatile(RDSEED_INT
 		     CC_SET(c)
 		     : CC_OUT(c) (ok), "=a" (*v));
 	return ok;
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 2bcf473..3fa0398 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -143,7 +143,7 @@ static __always_inline void __clear_bit(long nr, volatile unsigned long *addr)
 static __always_inline bool clear_bit_unlock_is_negative_byte(long nr, volatile unsigned long *addr)
 {
 	bool negative;
-	asm volatile(LOCK_PREFIX "andb %2,%1\n\t"
+	asm volatile(LOCK_PREFIX "andb %2,%1"
 		CC_SET(s)
 		: CC_OUT(s) (negative), ADDR
 		: "ir" ((char) ~(1 << nr)) : "memory");
@@ -246,7 +246,7 @@ static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *
 {
 	bool oldbit;
 
-	asm("bts %2,%1\n\t"
+	asm("bts %2,%1"
 	    CC_SET(c)
 	    : CC_OUT(c) (oldbit), ADDR
 	    : "Ir" (nr));
@@ -286,7 +286,7 @@ static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long
 {
 	bool oldbit;
 
-	asm volatile("btr %2,%1\n\t"
+	asm volatile("btr %2,%1"
 		     CC_SET(c)
 		     : CC_OUT(c) (oldbit), ADDR
 		     : "Ir" (nr));
@@ -298,7 +298,7 @@ static __always_inline bool __test_and_change_bit(long nr, volatile unsigned lon
 {
 	bool oldbit;
 
-	asm volatile("btc %2,%1\n\t"
+	asm volatile("btc %2,%1"
 		     CC_SET(c)
 		     : CC_OUT(c) (oldbit), ADDR
 		     : "Ir" (nr) : "memory");
@@ -329,7 +329,7 @@ static __always_inline bool variable_test_bit(long nr, volatile const unsigned l
 {
 	bool oldbit;
 
-	asm volatile("bt %2,%1\n\t"
+	asm volatile("bt %2,%1"
 		     CC_SET(c)
 		     : CC_OUT(c) (oldbit)
 		     : "m" (*(unsigned long *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 9eef9cc..a600a6c 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -7,6 +7,7 @@
  */
 #include <linux/types.h>
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <asm/processor.h>
 #include <asm/user32.h>
 #include <asm/unistd.h>
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 0dfa684..bf6a762 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -126,11 +126,10 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
 #define boot_cpu_has(bit)	cpu_has(&boot_cpu_data, bit)
 
 #define set_cpu_cap(c, bit)	set_bit(bit, (unsigned long *)((c)->x86_capability))
-#define clear_cpu_cap(c, bit)	clear_bit(bit, (unsigned long *)((c)->x86_capability))
-#define setup_clear_cpu_cap(bit) do { \
-	clear_cpu_cap(&boot_cpu_data, bit);	\
-	set_bit(bit, (unsigned long *)cpu_caps_cleared); \
-} while (0)
+
+extern void setup_clear_cpu_cap(unsigned int bit);
+extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit);
+
 #define setup_force_cpu_cap(bit) do { \
 	set_cpu_cap(&boot_cpu_data, bit);	\
 	set_bit(bit, (unsigned long *)cpu_caps_set);	\
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 793690f..7437073 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -22,6 +22,11 @@
  * this feature bit is not displayed in /proc/cpuinfo at all.
  */
 
+/*
+ * When adding new features here that depend on other features,
+ * please update the table in kernel/cpu/cpuid-deps.c
+ */
+
 /* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
 #define X86_FEATURE_FPU		( 0*32+ 0) /* Onboard FPU */
 #define X86_FEATURE_VME		( 0*32+ 1) /* Virtual Mode Extensions */
@@ -295,6 +300,12 @@
 #define X86_FEATURE_AVX512VBMI  (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
 #define X86_FEATURE_PKU		(16*32+ 3) /* Protection Keys for Userspace */
 #define X86_FEATURE_OSPKE	(16*32+ 4) /* OS Protection Keys Enable */
+#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_GFNI	(16*32+ 8) /* Galois Field New Instructions */
+#define X86_FEATURE_VAES	(16*32+ 9) /* Vector AES */
+#define X86_FEATURE_VPCLMULQDQ	(16*32+ 10) /* Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */
 #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
 #define X86_FEATURE_LA57	(16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID	(16*32+22) /* RDPID instruction */
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 8546faf..7948a17 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -6,7 +6,7 @@
 #include <asm/orc_types.h>
 
 struct mod_arch_specific {
-#ifdef CONFIG_ORC_UNWINDER
+#ifdef CONFIG_UNWINDER_ORC
 	unsigned int num_orcs;
 	int *orc_unwind_ip;
 	struct orc_entry *orc_unwind;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index fd81228..283efca 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -16,10 +16,9 @@
 #include <linux/cpumask.h>
 #include <asm/frame.h>
 
-static inline void load_sp0(struct tss_struct *tss,
-			     struct thread_struct *thread)
+static inline void load_sp0(unsigned long sp0)
 {
-	PVOP_VCALL2(pv_cpu_ops.load_sp0, tss, thread);
+	PVOP_VCALL1(pv_cpu_ops.load_sp0, sp0);
 }
 
 /* The paravirtualized CPUID instruction. */
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 10cc3b9..6ec54d0 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -134,7 +134,7 @@ struct pv_cpu_ops {
 	void (*alloc_ldt)(struct desc_struct *ldt, unsigned entries);
 	void (*free_ldt)(struct desc_struct *ldt, unsigned entries);
 
-	void (*load_sp0)(struct tss_struct *tss, struct thread_struct *t);
+	void (*load_sp0)(unsigned long sp0);
 
 	void (*set_iopl_mask)(unsigned mask);
 
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 377f1ff..ba3c523 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -526,7 +526,7 @@ static inline bool x86_this_cpu_variable_test_bit(int nr,
 {
 	bool oldbit;
 
-	asm volatile("bt "__percpu_arg(2)",%1\n\t"
+	asm volatile("bt "__percpu_arg(2)",%1"
 			CC_SET(c)
 			: CC_OUT(c) (oldbit)
 			: "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 59df7b4..9e9b05f 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -200,10 +200,9 @@ enum page_cache_mode {
 
 #define _PAGE_ENC	(_AT(pteval_t, sme_me_mask))
 
-#define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
-			 _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED |	\
 			 _PAGE_DIRTY | _PAGE_ENC)
+#define _PAGE_TABLE	(_KERNPG_TABLE | _PAGE_USER)
 
 #define __PAGE_KERNEL_ENC	(__PAGE_KERNEL | _PAGE_ENC)
 #define __PAGE_KERNEL_ENC_WP	(__PAGE_KERNEL_WP | _PAGE_ENC)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index bdac19a..2db7cf7 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -431,7 +431,9 @@ typedef struct {
 struct thread_struct {
 	/* Cached TLS descriptors: */
 	struct desc_struct	tls_array[GDT_ENTRY_TLS_ENTRIES];
+#ifdef CONFIG_X86_32
 	unsigned long		sp0;
+#endif
 	unsigned long		sp;
 #ifdef CONFIG_X86_32
 	unsigned long		sysenter_cs;
@@ -518,16 +520,9 @@ static inline void native_set_iopl_mask(unsigned mask)
 }
 
 static inline void
-native_load_sp0(struct tss_struct *tss, struct thread_struct *thread)
+native_load_sp0(unsigned long sp0)
 {
-	tss->x86_tss.sp0 = thread->sp0;
-#ifdef CONFIG_X86_32
-	/* Only happens when SEP is enabled, no need to test "SEP"arately: */
-	if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
-		tss->x86_tss.ss1 = thread->sysenter_cs;
-		wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
-	}
-#endif
+	this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
 }
 
 static inline void native_swapgs(void)
@@ -547,15 +542,20 @@ static inline unsigned long current_top_of_stack(void)
 #endif
 }
 
+static inline bool on_thread_stack(void)
+{
+	return (unsigned long)(current_top_of_stack() -
+			       current_stack_pointer) < THREAD_SIZE;
+}
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/paravirt.h>
 #else
 #define __cpuid			native_cpuid
 
-static inline void load_sp0(struct tss_struct *tss,
-			    struct thread_struct *thread)
+static inline void load_sp0(unsigned long sp0)
 {
-	native_load_sp0(tss, thread);
+	native_load_sp0(sp0);
 }
 
 #define set_iopl_mask native_set_iopl_mask
@@ -804,6 +804,15 @@ static inline void spin_lock_prefetch(const void *x)
 #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
 			   TOP_OF_KERNEL_STACK_PADDING)
 
+#define task_top_of_stack(task) ((unsigned long)(task_pt_regs(task) + 1))
+
+#define task_pt_regs(task) \
+({									\
+	unsigned long __ptr = (unsigned long)task_stack_page(task);	\
+	__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;		\
+	((struct pt_regs *)__ptr) - 1;					\
+})
+
 #ifdef CONFIG_X86_32
 /*
  * User space process size: 3GB (default).
@@ -823,23 +832,6 @@ static inline void spin_lock_prefetch(const void *x)
 	.addr_limit		= KERNEL_DS,				  \
 }
 
-/*
- * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
- * This is necessary to guarantee that the entire "struct pt_regs"
- * is accessible even if the CPU haven't stored the SS/ESP registers
- * on the stack (interrupt gate does not save these registers
- * when switching to the same priv ring).
- * Therefore beware: accessing the ss/esp fields of the
- * "struct pt_regs" is possible, but they may contain the
- * completely wrong values.
- */
-#define task_pt_regs(task) \
-({									\
-	unsigned long __ptr = (unsigned long)task_stack_page(task);	\
-	__ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING;		\
-	((struct pt_regs *)__ptr) - 1;					\
-})
-
 #define KSTK_ESP(task)		(task_pt_regs(task)->sp)
 
 #else
@@ -873,11 +865,9 @@ static inline void spin_lock_prefetch(const void *x)
 #define STACK_TOP_MAX		TASK_SIZE_MAX
 
 #define INIT_THREAD  {						\
-	.sp0			= TOP_OF_INIT_STACK,		\
 	.addr_limit		= KERNEL_DS,			\
 }
 
-#define task_pt_regs(tsk)	((struct pt_regs *)(tsk)->thread.sp0 - 1)
 extern unsigned long KSTK_ESP(struct task_struct *task);
 
 #endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index c0e3c45..14131dd 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -136,9 +136,9 @@ static inline int v8086_mode(struct pt_regs *regs)
 #endif
 }
 
-#ifdef CONFIG_X86_64
 static inline bool user_64bit_mode(struct pt_regs *regs)
 {
+#ifdef CONFIG_X86_64
 #ifndef CONFIG_PARAVIRT
 	/*
 	 * On non-paravirt systems, this is the only long mode CPL 3
@@ -149,8 +149,12 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
 	/* Headers are too twisted for this to go in paravirt.h. */
 	return regs->cs == __USER_CS || regs->cs == pv_info.extra_user_64bit_cs;
 #endif
+#else /* !CONFIG_X86_64 */
+	return false;
+#endif
 }
 
+#ifdef CONFIG_X86_64
 #define current_user_stack_pointer()	current_pt_regs()->sp
 #define compat_user_stack_pointer()	current_pt_regs()->sp
 #endif
diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h
index d8f3a6a..f91c365 100644
--- a/arch/x86/include/asm/rmwcc.h
+++ b/arch/x86/include/asm/rmwcc.h
@@ -29,7 +29,7 @@ cc_label:								\
 #define __GEN_RMWcc(fullop, var, cc, clobbers, ...)			\
 do {									\
 	bool c;								\
-	asm volatile (fullop ";" CC_SET(cc)				\
+	asm volatile (fullop CC_SET(cc)					\
 			: [counter] "+m" (var), CC_OUT(cc) (c)		\
 			: __VA_ARGS__ : clobbers);			\
 	return c;							\
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 899084b..8c6bd68 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_X86_SWITCH_TO_H
 #define _ASM_X86_SWITCH_TO_H
 
+#include <linux/sched/task_stack.h>
+
 struct task_struct; /* one of the stranger aspects of C forward declarations */
 
 struct task_struct *__switch_to_asm(struct task_struct *prev,
@@ -73,4 +75,26 @@ do {									\
 	((last) = __switch_to_asm((prev), (next)));			\
 } while (0)
 
+#ifdef CONFIG_X86_32
+static inline void refresh_sysenter_cs(struct thread_struct *thread)
+{
+	/* Only happens when SEP is enabled, no need to test "SEP"arately: */
+	if (unlikely(this_cpu_read(cpu_tss.x86_tss.ss1) == thread->sysenter_cs))
+		return;
+
+	this_cpu_write(cpu_tss.x86_tss.ss1, thread->sysenter_cs);
+	wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
+}
+#endif
+
+/* This is used when switching tasks or entering/exiting vm86 mode. */
+static inline void update_sp0(struct task_struct *task)
+{
+#ifdef CONFIG_X86_32
+	load_sp0(task->thread.sp0);
+#else
+	load_sp0(task_top_of_stack(task));
+#endif
+}
+
 #endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 91dfcaf..bad25bb 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -21,7 +21,7 @@ asmlinkage long sys_ioperm(unsigned long, unsigned long, int);
 asmlinkage long sys_iopl(unsigned int);
 
 /* kernel/ldt.c */
-asmlinkage int sys_modify_ldt(int, void __user *, unsigned long);
+asmlinkage long sys_modify_ldt(int, void __user *, unsigned long);
 
 /* kernel/signal.c */
 asmlinkage long sys_rt_sigreturn(void);
diff --git a/arch/x86/include/asm/trace/fpu.h b/arch/x86/include/asm/trace/fpu.h
index fa60398..069c04b 100644
--- a/arch/x86/include/asm/trace/fpu.h
+++ b/arch/x86/include/asm/trace/fpu.h
@@ -34,11 +34,6 @@ DECLARE_EVENT_CLASS(x86_fpu,
 	)
 );
 
-DEFINE_EVENT(x86_fpu, x86_fpu_state,
-	TP_PROTO(struct fpu *fpu),
-	TP_ARGS(fpu)
-);
-
 DEFINE_EVENT(x86_fpu, x86_fpu_before_save,
 	TP_PROTO(struct fpu *fpu),
 	TP_ARGS(fpu)
@@ -74,11 +69,6 @@ DEFINE_EVENT(x86_fpu, x86_fpu_activate_state,
 	TP_ARGS(fpu)
 );
 
-DEFINE_EVENT(x86_fpu, x86_fpu_deactivate_state,
-	TP_PROTO(struct fpu *fpu),
-	TP_ARGS(fpu)
-);
-
 DEFINE_EVENT(x86_fpu, x86_fpu_init_state,
 	TP_PROTO(struct fpu *fpu),
 	TP_ARGS(fpu)
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index b0cced9..1fadd31 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -38,9 +38,9 @@ asmlinkage void simd_coprocessor_error(void);
 
 #if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
 asmlinkage void xen_divide_error(void);
+asmlinkage void xen_xennmi(void);
 asmlinkage void xen_xendebug(void);
 asmlinkage void xen_xenint3(void);
-asmlinkage void xen_nmi(void);
 asmlinkage void xen_overflow(void);
 asmlinkage void xen_bounds(void);
 asmlinkage void xen_invalid_op(void);
@@ -145,4 +145,22 @@ enum {
 	X86_TRAP_IRET = 32,	/* 32, IRET Exception */
 };
 
+/*
+ * Page fault error code bits:
+ *
+ *   bit 0 ==	 0: no page found	1: protection fault
+ *   bit 1 ==	 0: read access		1: write access
+ *   bit 2 ==	 0: kernel-mode access	1: user-mode access
+ *   bit 3 ==				1: use of reserved bit detected
+ *   bit 4 ==				1: fault was an instruction fetch
+ *   bit 5 ==				1: protection keys block access
+ */
+enum x86_pf_error_code {
+	X86_PF_PROT	=		1 << 0,
+	X86_PF_WRITE	=		1 << 1,
+	X86_PF_USER	=		1 << 2,
+	X86_PF_RSVD	=		1 << 3,
+	X86_PF_INSTR	=		1 << 4,
+	X86_PF_PK	=		1 << 5,
+};
 #endif /* _ASM_X86_TRAPS_H */
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h
index 87adc0d..e9cc6fe 100644
--- a/arch/x86/include/asm/unwind.h
+++ b/arch/x86/include/asm/unwind.h
@@ -13,11 +13,11 @@ struct unwind_state {
 	struct task_struct *task;
 	int graph_idx;
 	bool error;
-#if defined(CONFIG_ORC_UNWINDER)
+#if defined(CONFIG_UNWINDER_ORC)
 	bool signal, full_regs;
 	unsigned long sp, bp, ip;
 	struct pt_regs *regs;
-#elif defined(CONFIG_FRAME_POINTER_UNWINDER)
+#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
 	bool got_irq;
 	unsigned long *bp, *orig_sp, ip;
 	struct pt_regs *regs;
@@ -51,7 +51,7 @@ void unwind_start(struct unwind_state *state, struct task_struct *task,
 	__unwind_start(state, task, regs, first_frame);
 }
 
-#if defined(CONFIG_ORC_UNWINDER) || defined(CONFIG_FRAME_POINTER_UNWINDER)
+#if defined(CONFIG_UNWINDER_ORC) || defined(CONFIG_UNWINDER_FRAME_POINTER)
 static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
 {
 	if (unwind_done(state))
@@ -66,7 +66,7 @@ static inline struct pt_regs *unwind_get_entry_regs(struct unwind_state *state)
 }
 #endif
 
-#ifdef CONFIG_ORC_UNWINDER
+#ifdef CONFIG_UNWINDER_ORC
 void unwind_init(void);
 void unwind_module_init(struct module *mod, void *orc_ip, size_t orc_ip_size,
 			void *orc, size_t orc_size);
diff --git a/arch/x86/include/uapi/asm/processor-flags.h b/arch/x86/include/uapi/asm/processor-flags.h
index 6f33553..53b4ca5 100644
--- a/arch/x86/include/uapi/asm/processor-flags.h
+++ b/arch/x86/include/uapi/asm/processor-flags.h
@@ -152,5 +152,8 @@
 #define CX86_ARR_BASE	0xc4
 #define CX86_RCR_BASE	0xdc
 
+#define CR0_STATE	(X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
+			 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
+			 X86_CR0_PG)
 
 #endif /* _UAPI_ASM_X86_PROCESSOR_FLAGS_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 5f70044..295abaa 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -25,9 +25,9 @@
 KASAN_SANITIZE_head$(BITS).o				:= n
 KASAN_SANITIZE_dumpstack.o				:= n
 KASAN_SANITIZE_dumpstack_$(BITS).o			:= n
-KASAN_SANITIZE_stacktrace.o := n
+KASAN_SANITIZE_stacktrace.o				:= n
+KASAN_SANITIZE_paravirt.o				:= n
 
-OBJECT_FILES_NON_STANDARD_head_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o	:= y
 OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o		:= y
 OBJECT_FILES_NON_STANDARD_test_nx.o			:= y
@@ -128,9 +128,9 @@
 obj-$(CONFIG_TRACING)			+= tracepoint.o
 obj-$(CONFIG_SCHED_MC_PRIO)		+= itmt.o
 
-obj-$(CONFIG_ORC_UNWINDER)		+= unwind_orc.o
-obj-$(CONFIG_FRAME_POINTER_UNWINDER)	+= unwind_frame.o
-obj-$(CONFIG_GUESS_UNWINDER)		+= unwind_guess.o
+obj-$(CONFIG_UNWINDER_ORC)		+= unwind_orc.o
+obj-$(CONFIG_UNWINDER_FRAME_POINTER)	+= unwind_frame.o
+obj-$(CONFIG_UNWINDER_GUESS)		+= unwind_guess.o
 
 ###
 # 64 bit specific files
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 236999c..570e8bb 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -23,6 +23,7 @@
 obj-y			+= match.o
 obj-y			+= bugs.o
 obj-y			+= aperfmperf.o
+obj-y			+= cpuid-deps.o
 
 obj-$(CONFIG_PROC_FS)	+= proc.o
 obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c9176ba..cdf79ab 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1301,18 +1301,16 @@ void print_cpu_info(struct cpuinfo_x86 *c)
 		pr_cont(")\n");
 }
 
-static __init int setup_disablecpuid(char *arg)
+/*
+ * clearcpuid= was already parsed in fpu__init_parse_early_param.
+ * But we need to keep a dummy __setup around otherwise it would
+ * show up as an environment variable for init.
+ */
+static __init int setup_clearcpuid(char *arg)
 {
-	int bit;
-
-	if (get_option(&arg, &bit) && bit >= 0 && bit < NCAPINTS * 32)
-		setup_clear_cpu_cap(bit);
-	else
-		return 0;
-
 	return 1;
 }
-__setup("clearcpuid=", setup_disablecpuid);
+__setup("clearcpuid=", setup_clearcpuid);
 
 #ifdef CONFIG_X86_64
 DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1572,9 +1570,13 @@ void cpu_init(void)
 	initialize_tlbstate_and_flush();
 	enter_lazy_tlb(&init_mm, me);
 
-	load_sp0(t, &current->thread);
+	/*
+	 * Initialize the TSS.  Don't bother initializing sp0, as the initial
+	 * task never enters user mode.
+	 */
 	set_tss_desc(cpu, t);
 	load_TR_desc();
+
 	load_mm_ldt(&init_mm);
 
 	clear_all_debug_regs();
@@ -1596,7 +1598,6 @@ void cpu_init(void)
 	int cpu = smp_processor_id();
 	struct task_struct *curr = current;
 	struct tss_struct *t = &per_cpu(cpu_tss, cpu);
-	struct thread_struct *thread = &curr->thread;
 
 	wait_for_master_cpu(cpu);
 
@@ -1627,9 +1628,13 @@ void cpu_init(void)
 	initialize_tlbstate_and_flush();
 	enter_lazy_tlb(&init_mm, curr);
 
-	load_sp0(t, thread);
+	/*
+	 * Initialize the TSS.  Don't bother initializing sp0, as the initial
+	 * task never enters user mode.
+	 */
 	set_tss_desc(cpu, t);
 	load_TR_desc();
+
 	load_mm_ldt(&init_mm);
 
 	t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c
new file mode 100644
index 0000000..904b0a3c4
--- /dev/null
+++ b/arch/x86/kernel/cpu/cpuid-deps.c
@@ -0,0 +1,121 @@
+/* Declare dependencies between CPUIDs */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <asm/cpufeature.h>
+
+struct cpuid_dep {
+	unsigned int	feature;
+	unsigned int	depends;
+};
+
+/*
+ * Table of CPUID features that depend on others.
+ *
+ * This only includes dependencies that can be usefully disabled, not
+ * features part of the base set (like FPU).
+ *
+ * Note this all is not __init / __initdata because it can be
+ * called from cpu hotplug. It shouldn't do anything in this case,
+ * but it's difficult to tell that to the init reference checker.
+ */
+const static struct cpuid_dep cpuid_deps[] = {
+	{ X86_FEATURE_XSAVEOPT,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_XSAVEC,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_XSAVES,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_AVX,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_PKU,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_MPX,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_XGETBV1,		X86_FEATURE_XSAVE     },
+	{ X86_FEATURE_FXSR_OPT,		X86_FEATURE_FXSR      },
+	{ X86_FEATURE_XMM,		X86_FEATURE_FXSR      },
+	{ X86_FEATURE_XMM2,		X86_FEATURE_XMM       },
+	{ X86_FEATURE_XMM3,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_XMM4_1,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_XMM4_2,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_XMM3,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_PCLMULQDQ,	X86_FEATURE_XMM2      },
+	{ X86_FEATURE_SSSE3,		X86_FEATURE_XMM2,     },
+	{ X86_FEATURE_F16C,		X86_FEATURE_XMM2,     },
+	{ X86_FEATURE_AES,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_SHA_NI,		X86_FEATURE_XMM2      },
+	{ X86_FEATURE_FMA,		X86_FEATURE_AVX       },
+	{ X86_FEATURE_AVX2,		X86_FEATURE_AVX,      },
+	{ X86_FEATURE_AVX512F,		X86_FEATURE_AVX,      },
+	{ X86_FEATURE_AVX512IFMA,	X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512PF,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512ER,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512CD,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512DQ,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512BW,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512VL,		X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512VBMI,	X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_VBMI2,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_GFNI,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_VAES,		X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_VPCLMULQDQ,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_VNNI,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_BITALG,	X86_FEATURE_AVX512VL  },
+	{ X86_FEATURE_AVX512_4VNNIW,	X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_4FMAPS,	X86_FEATURE_AVX512F   },
+	{ X86_FEATURE_AVX512_VPOPCNTDQ, X86_FEATURE_AVX512F   },
+	{}
+};
+
+static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
+{
+	/*
+	 * Note: This could use the non atomic __*_bit() variants, but the
+	 * rest of the cpufeature code uses atomics as well, so keep it for
+	 * consistency. Cleanup all of it separately.
+	 */
+	if (!c) {
+		clear_cpu_cap(&boot_cpu_data, feature);
+		set_bit(feature, (unsigned long *)cpu_caps_cleared);
+	} else {
+		clear_bit(feature, (unsigned long *)c->x86_capability);
+	}
+}
+
+/* Take the capabilities and the BUG bits into account */
+#define MAX_FEATURE_BITS ((NCAPINTS + NBUGINTS) * sizeof(u32) * 8)
+
+static void do_clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+	DECLARE_BITMAP(disable, MAX_FEATURE_BITS);
+	const struct cpuid_dep *d;
+	bool changed;
+
+	if (WARN_ON(feature >= MAX_FEATURE_BITS))
+		return;
+
+	clear_feature(c, feature);
+
+	/* Collect all features to disable, handling dependencies */
+	memset(disable, 0, sizeof(disable));
+	__set_bit(feature, disable);
+
+	/* Loop until we get a stable state. */
+	do {
+		changed = false;
+		for (d = cpuid_deps; d->feature; d++) {
+			if (!test_bit(d->depends, disable))
+				continue;
+			if (__test_and_set_bit(d->feature, disable))
+				continue;
+
+			changed = true;
+			clear_feature(c, d->feature);
+		}
+	} while (changed);
+}
+
+void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int feature)
+{
+	do_clear_cpu_cap(c, feature);
+}
+
+void setup_clear_cpu_cap(unsigned int feature)
+{
+	do_clear_cpu_cap(NULL, feature);
+}
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 7affb7e..6abd835 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -249,6 +249,10 @@ static void __init fpu__init_system_ctx_switch(void)
  */
 static void __init fpu__init_parse_early_param(void)
 {
+	char arg[32];
+	char *argptr = arg;
+	int bit;
+
 	if (cmdline_find_option_bool(boot_command_line, "no387"))
 		setup_clear_cpu_cap(X86_FEATURE_FPU);
 
@@ -266,6 +270,13 @@ static void __init fpu__init_parse_early_param(void)
 
 	if (cmdline_find_option_bool(boot_command_line, "noxsaves"))
 		setup_clear_cpu_cap(X86_FEATURE_XSAVES);
+
+	if (cmdline_find_option(boot_command_line, "clearcpuid", arg,
+				sizeof(arg)) &&
+	    get_option(&argptr, &bit) &&
+	    bit >= 0 &&
+	    bit < NCAPINTS * 32)
+		setup_clear_cpu_cap(bit);
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index f1d5476..87a57b7 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -15,6 +15,7 @@
 #include <asm/fpu/xstate.h>
 
 #include <asm/tlbflush.h>
+#include <asm/cpufeature.h>
 
 /*
  * Although we spell it out in here, the Processor Trace
@@ -36,6 +37,19 @@ static const char *xfeature_names[] =
 	"unknown xstate feature"	,
 };
 
+static short xsave_cpuid_features[] __initdata = {
+	X86_FEATURE_FPU,
+	X86_FEATURE_XMM,
+	X86_FEATURE_AVX,
+	X86_FEATURE_MPX,
+	X86_FEATURE_MPX,
+	X86_FEATURE_AVX512F,
+	X86_FEATURE_AVX512F,
+	X86_FEATURE_AVX512F,
+	X86_FEATURE_INTEL_PT,
+	X86_FEATURE_PKU,
+};
+
 /*
  * Mask of xstate features supported by the CPU and the kernel:
  */
@@ -59,26 +73,6 @@ unsigned int fpu_user_xstate_size;
 void fpu__xstate_clear_all_cpu_caps(void)
 {
 	setup_clear_cpu_cap(X86_FEATURE_XSAVE);
-	setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
-	setup_clear_cpu_cap(X86_FEATURE_XSAVEC);
-	setup_clear_cpu_cap(X86_FEATURE_XSAVES);
-	setup_clear_cpu_cap(X86_FEATURE_AVX);
-	setup_clear_cpu_cap(X86_FEATURE_AVX2);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512F);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512IFMA);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512PF);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512ER);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512CD);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512DQ);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512BW);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512VL);
-	setup_clear_cpu_cap(X86_FEATURE_MPX);
-	setup_clear_cpu_cap(X86_FEATURE_XGETBV1);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512VBMI);
-	setup_clear_cpu_cap(X86_FEATURE_PKU);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS);
-	setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ);
 }
 
 /*
@@ -726,6 +720,7 @@ void __init fpu__init_system_xstate(void)
 	unsigned int eax, ebx, ecx, edx;
 	static int on_boot_cpu __initdata = 1;
 	int err;
+	int i;
 
 	WARN_ON_FPU(!on_boot_cpu);
 	on_boot_cpu = 0;
@@ -759,6 +754,14 @@ void __init fpu__init_system_xstate(void)
 		goto out_disable;
 	}
 
+	/*
+	 * Clear XSAVE features that are disabled in the normal CPUID.
+	 */
+	for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) {
+		if (!boot_cpu_has(xsave_cpuid_features[i]))
+			xfeatures_mask &= ~BIT(i);
+	}
+
 	xfeatures_mask &= fpu__get_supported_xfeatures_mask();
 
 	/* Enable xstate instructions to be able to continue with initialization: */
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f1d528b..c290209 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -212,9 +212,6 @@
 #endif
 
 .Ldefault_entry:
-#define CR0_STATE	(X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
-			 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
-			 X86_CR0_PG)
 	movl $(CR0_STATE & ~X86_CR0_PG),%eax
 	movl %eax,%cr0
 
@@ -402,7 +399,7 @@
 	# 24(%rsp) error code
 	i = 0
 	.rept NUM_EXCEPTION_VECTORS
-	.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
+	.if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
 	pushl $0		# Dummy error code, to make stack frame uniform
 	.endif
 	pushl $i		# 20(%esp) Vector number
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 6dde3f3..7dca675 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -38,11 +38,12 @@
  *
  */
 
-#define p4d_index(x)	(((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
 #define pud_index(x)	(((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
 PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
 PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
+#endif
 L3_START_KERNEL = pud_index(__START_KERNEL_map)
 
 	.text
@@ -50,6 +51,7 @@
 	.code64
 	.globl startup_64
 startup_64:
+	UNWIND_HINT_EMPTY
 	/*
 	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
 	 * and someone has loaded an identity mapped page table
@@ -89,6 +91,7 @@
 	addq	$(early_top_pgt - __START_KERNEL_map), %rax
 	jmp 1f
 ENTRY(secondary_startup_64)
+	UNWIND_HINT_EMPTY
 	/*
 	 * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
 	 * and someone has loaded a mapped page table.
@@ -133,6 +136,7 @@
 	movq	$1f, %rax
 	jmp	*%rax
 1:
+	UNWIND_HINT_EMPTY
 
 	/* Check if nx is implemented */
 	movl	$0x80000001, %eax
@@ -150,9 +154,6 @@
 1:	wrmsr				/* Make changes effective */
 
 	/* Setup cr0 */
-#define CR0_STATE	(X86_CR0_PE | X86_CR0_MP | X86_CR0_ET | \
-			 X86_CR0_NE | X86_CR0_WP | X86_CR0_AM | \
-			 X86_CR0_PG)
 	movl	$CR0_STATE, %eax
 	/* Make changes effective */
 	movq	%rax, %cr0
@@ -235,7 +236,7 @@
 	pushq	%rax		# target address in negative space
 	lretq
 .Lafter_lret:
-ENDPROC(secondary_startup_64)
+END(secondary_startup_64)
 
 #include "verify_cpu.S"
 
@@ -247,6 +248,7 @@
  */
 ENTRY(start_cpu0)
 	movq	initial_stack(%rip), %rsp
+	UNWIND_HINT_EMPTY
 	jmp	.Ljump_to_C_code
 ENDPROC(start_cpu0)
 #endif
@@ -266,26 +268,24 @@
 	.quad  init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
 	__FINITDATA
 
-bad_address:
-	jmp bad_address
-
 	__INIT
 ENTRY(early_idt_handler_array)
-	# 104(%rsp) %rflags
-	#  96(%rsp) %cs
-	#  88(%rsp) %rip
-	#  80(%rsp) error code
 	i = 0
 	.rept NUM_EXCEPTION_VECTORS
-	.ifeq (EXCEPTION_ERRCODE_MASK >> i) & 1
-	pushq $0		# Dummy error code, to make stack frame uniform
+	.if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
+		UNWIND_HINT_IRET_REGS
+		pushq $0	# Dummy error code, to make stack frame uniform
+	.else
+		UNWIND_HINT_IRET_REGS offset=8
 	.endif
 	pushq $i		# 72(%rsp) Vector number
 	jmp early_idt_handler_common
+	UNWIND_HINT_IRET_REGS
 	i = i + 1
 	.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
 	.endr
-ENDPROC(early_idt_handler_array)
+	UNWIND_HINT_IRET_REGS offset=16
+END(early_idt_handler_array)
 
 early_idt_handler_common:
 	/*
@@ -313,6 +313,7 @@
 	pushq %r13				/* pt_regs->r13 */
 	pushq %r14				/* pt_regs->r14 */
 	pushq %r15				/* pt_regs->r15 */
+	UNWIND_HINT_REGS
 
 	cmpq $14,%rsi		/* Page fault? */
 	jnz 10f
@@ -327,8 +328,8 @@
 
 20:
 	decl early_recursion_flag(%rip)
-	jmp restore_regs_and_iret
-ENDPROC(early_idt_handler_common)
+	jmp restore_regs_and_return_to_kernel
+END(early_idt_handler_common)
 
 	__INITDATA
 
@@ -362,10 +363,7 @@
 
 	.data
 
-#ifndef CONFIG_XEN
-NEXT_PAGE(init_top_pgt)
-	.fill	512,8,0
-#else
+#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
 NEXT_PAGE(init_top_pgt)
 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
 	.org    init_top_pgt + PGD_PAGE_OFFSET*8, 0
@@ -382,6 +380,9 @@
 	 * Don't set NX because code runs from these pages.
 	 */
 	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+#else
+NEXT_PAGE(init_top_pgt)
+	.fill	512,8,0
 #endif
 
 #ifdef CONFIG_X86_5LEVEL
@@ -435,7 +436,7 @@
 EXPORT_SYMBOL(phys_base)
 
 #include "../../x86/xen/xen-head.S"
-	
+
 	__PAGE_ALIGNED_BSS
 NEXT_PAGE(empty_zero_page)
 	.skip PAGE_SIZE
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 4d17bac..ae5615b 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -13,6 +13,7 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/smp.h>
+#include <linux/syscalls.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
@@ -295,8 +296,8 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 	return error;
 }
 
-asmlinkage int sys_modify_ldt(int func, void __user *ptr,
-			      unsigned long bytecount)
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
+		unsigned long , bytecount)
 {
 	int ret = -ENOSYS;
 
@@ -314,5 +315,14 @@ asmlinkage int sys_modify_ldt(int func, void __user *ptr,
 		ret = write_ldt(ptr, bytecount, 0);
 		break;
 	}
-	return ret;
+	/*
+	 * The SYSCALL_DEFINE() macros give us an 'unsigned long'
+	 * return type, but tht ABI for sys_modify_ldt() expects
+	 * 'int'.  This cast gives us an int-sized value in %rax
+	 * for the return code.  The 'unsigned' is necessary so
+	 * the compiler does not try to sign-extend the negative
+	 * return codes into the high half of the register when
+	 * taking the value from int->long.
+	 */
+	return (unsigned int)ret;
 }
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index c676853..97fb3e5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -49,7 +49,13 @@
  */
 __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
 	.x86_tss = {
-		.sp0 = TOP_OF_INIT_STACK,
+		/*
+		 * .sp0 is only used when entering ring 0 from a lower
+		 * privilege level.  Since the init task never runs anything
+		 * but ring 0 code, there is no need for a valid value here.
+		 * Poison it.
+		 */
+		.sp0 = (1UL << (BITS_PER_LONG-1)) + 1,
 #ifdef CONFIG_X86_32
 		.ss0 = __KERNEL_DS,
 		.ss1 = __KERNEL_CS,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 1196625..45bf0c5 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -284,9 +284,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 	/*
 	 * Reload esp0 and cpu_current_top_of_stack.  This changes
-	 * current_thread_info().
+	 * current_thread_info().  Refresh the SYSENTER configuration in
+	 * case prev or next is vm86.
 	 */
-	load_sp0(tss, next);
+	update_sp0(next_p);
+	refresh_sysenter_cs(next);
 	this_cpu_write(cpu_current_top_of_stack,
 		       (unsigned long)task_stack_page(next_p) +
 		       THREAD_SIZE);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 302e7b2..eeeb34f 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -274,7 +274,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
 	struct inactive_task_frame *frame;
 	struct task_struct *me = current;
 
-	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
 	childregs = task_pt_regs(p);
 	fork_frame = container_of(childregs, struct fork_frame, regs);
 	frame = &fork_frame->frame;
@@ -464,8 +463,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 */
 	this_cpu_write(current_task, next_p);
 
-	/* Reload esp0 and ss1.  This changes current_thread_info(). */
-	load_sp0(tss, next);
+	/* Reload sp0. */
+	update_sp0(next_p);
 
 	/*
 	 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ad59edd..06c18fe 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -961,8 +961,7 @@ void common_cpu_up(unsigned int cpu, struct task_struct *idle)
 #ifdef CONFIG_X86_32
 	/* Stack for startup_32 can be just as for start_secondary onwards */
 	irq_ctx_init(cpu);
-	per_cpu(cpu_current_top_of_stack, cpu) =
-		(unsigned long)task_stack_page(idle) + THREAD_SIZE;
+	per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
 #else
 	initial_gs = per_cpu_offset(cpu);
 #endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 67db4f4..42a9c44 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -141,8 +141,7 @@ void ist_begin_non_atomic(struct pt_regs *regs)
 	 * will catch asm bugs and any attempt to use ist_preempt_enable
 	 * from double_fault.
 	 */
-	BUG_ON((unsigned long)(current_top_of_stack() -
-			       current_stack_pointer) >= THREAD_SIZE);
+	BUG_ON(!on_thread_stack());
 
 	preempt_enable_no_resched();
 }
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index 014ea59..3d3c2f7 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -33,7 +33,7 @@
 #include <asm/cpufeatures.h>
 #include <asm/msr-index.h>
 
-verify_cpu:
+ENTRY(verify_cpu)
 	pushf				# Save caller passed flags
 	push	$0			# Kill any dangerous flags
 	popf
@@ -139,3 +139,4 @@
 	popf				# Restore caller passed flags
 	xorl %eax, %eax
 	ret
+ENDPROC(verify_cpu)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 6824474..5edb27f 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -55,6 +55,7 @@
 #include <asm/irq.h>
 #include <asm/traps.h>
 #include <asm/vm86.h>
+#include <asm/switch_to.h>
 
 /*
  * Known problems:
@@ -94,7 +95,6 @@
 
 void save_v86_state(struct kernel_vm86_regs *regs, int retval)
 {
-	struct tss_struct *tss;
 	struct task_struct *tsk = current;
 	struct vm86plus_struct __user *user;
 	struct vm86 *vm86 = current->thread.vm86;
@@ -146,12 +146,13 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval)
 		do_exit(SIGSEGV);
 	}
 
-	tss = &per_cpu(cpu_tss, get_cpu());
+	preempt_disable();
 	tsk->thread.sp0 = vm86->saved_sp0;
 	tsk->thread.sysenter_cs = __KERNEL_CS;
-	load_sp0(tss, &tsk->thread);
+	update_sp0(tsk);
+	refresh_sysenter_cs(&tsk->thread);
 	vm86->saved_sp0 = 0;
-	put_cpu();
+	preempt_enable();
 
 	memcpy(&regs->pt, &vm86->regs32, sizeof(struct pt_regs));
 
@@ -237,7 +238,6 @@ SYSCALL_DEFINE2(vm86, unsigned long, cmd, unsigned long, arg)
 
 static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 {
-	struct tss_struct *tss;
 	struct task_struct *tsk = current;
 	struct vm86 *vm86 = tsk->thread.vm86;
 	struct kernel_vm86_regs vm86regs;
@@ -365,15 +365,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *user_vm86, bool plus)
 	vm86->saved_sp0 = tsk->thread.sp0;
 	lazy_save_gs(vm86->regs32.gs);
 
-	tss = &per_cpu(cpu_tss, get_cpu());
 	/* make room for real-mode segments */
+	preempt_disable();
 	tsk->thread.sp0 += 16;
 
-	if (static_cpu_has(X86_FEATURE_SEP))
+	if (static_cpu_has(X86_FEATURE_SEP)) {
 		tsk->thread.sysenter_cs = 0;
+		refresh_sysenter_cs(&tsk->thread);
+	}
 
-	load_sp0(tss, &tsk->thread);
-	put_cpu();
+	update_sp0(tsk);
+	preempt_enable();
 
 	if (vm86->flags & VM86_SCREEN_BITMAP)
 		mark_screen_rdonly(tsk->mm);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index b0ff3786..3109ba6 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -30,26 +30,6 @@
 #include <asm/trace/exceptions.h>
 
 /*
- * Page fault error code bits:
- *
- *   bit 0 ==	 0: no page found	1: protection fault
- *   bit 1 ==	 0: read access		1: write access
- *   bit 2 ==	 0: kernel-mode access	1: user-mode access
- *   bit 3 ==				1: use of reserved bit detected
- *   bit 4 ==				1: fault was an instruction fetch
- *   bit 5 ==				1: protection keys block access
- */
-enum x86_pf_error_code {
-
-	PF_PROT		=		1 << 0,
-	PF_WRITE	=		1 << 1,
-	PF_USER		=		1 << 2,
-	PF_RSVD		=		1 << 3,
-	PF_INSTR	=		1 << 4,
-	PF_PK		=		1 << 5,
-};
-
-/*
  * Returns 0 if mmiotrace is disabled, or if the fault is not
  * handled by mmiotrace:
  */
@@ -150,7 +130,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
 	 * If it was a exec (instruction fetch) fault on NX page, then
 	 * do not ignore the fault:
 	 */
-	if (error_code & PF_INSTR)
+	if (error_code & X86_PF_INSTR)
 		return 0;
 
 	instr = (void *)convert_ip_to_linear(current, regs);
@@ -180,7 +160,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
  * siginfo so userspace can discover which protection key was set
  * on the PTE.
  *
- * If we get here, we know that the hardware signaled a PF_PK
+ * If we get here, we know that the hardware signaled a X86_PF_PK
  * fault and that there was a VMA once we got in the fault
  * handler.  It does *not* guarantee that the VMA we find here
  * was the one that we faulted on.
@@ -205,7 +185,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
 	/*
 	 * force_sig_info_fault() is called from a number of
 	 * contexts, some of which have a VMA and some of which
-	 * do not.  The PF_PK handing happens after we have a
+	 * do not.  The X86_PF_PK handing happens after we have a
 	 * valid VMA, so we should never reach this without a
 	 * valid VMA.
 	 */
@@ -698,7 +678,7 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code,
 	if (!oops_may_print())
 		return;
 
-	if (error_code & PF_INSTR) {
+	if (error_code & X86_PF_INSTR) {
 		unsigned int level;
 		pgd_t *pgd;
 		pte_t *pte;
@@ -780,7 +760,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 		 */
 		if (current->thread.sig_on_uaccess_err && signal) {
 			tsk->thread.trap_nr = X86_TRAP_PF;
-			tsk->thread.error_code = error_code | PF_USER;
+			tsk->thread.error_code = error_code | X86_PF_USER;
 			tsk->thread.cr2 = address;
 
 			/* XXX: hwpoison faults will set the wrong code. */
@@ -898,7 +878,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 	struct task_struct *tsk = current;
 
 	/* User mode accesses just cause a SIGSEGV */
-	if (error_code & PF_USER) {
+	if (error_code & X86_PF_USER) {
 		/*
 		 * It's possible to have interrupts off here:
 		 */
@@ -919,7 +899,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 		 * Instruction fetch faults in the vsyscall page might need
 		 * emulation.
 		 */
-		if (unlikely((error_code & PF_INSTR) &&
+		if (unlikely((error_code & X86_PF_INSTR) &&
 			     ((address & ~0xfff) == VSYSCALL_ADDR))) {
 			if (emulate_vsyscall(regs, address))
 				return;
@@ -932,7 +912,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
 		 * are always protection faults.
 		 */
 		if (address >= TASK_SIZE_MAX)
-			error_code |= PF_PROT;
+			error_code |= X86_PF_PROT;
 
 		if (likely(show_unhandled_signals))
 			show_signal_msg(regs, error_code, address, tsk);
@@ -993,11 +973,11 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code,
 
 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
 		return false;
-	if (error_code & PF_PK)
+	if (error_code & X86_PF_PK)
 		return true;
 	/* this checks permission keys on the VMA: */
-	if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
-				(error_code & PF_INSTR), foreign))
+	if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
+				       (error_code & X86_PF_INSTR), foreign))
 		return true;
 	return false;
 }
@@ -1025,7 +1005,7 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
 	int code = BUS_ADRERR;
 
 	/* Kernel mode? Handle exceptions or die: */
-	if (!(error_code & PF_USER)) {
+	if (!(error_code & X86_PF_USER)) {
 		no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
 		return;
 	}
@@ -1053,14 +1033,14 @@ static noinline void
 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 	       unsigned long address, u32 *pkey, unsigned int fault)
 {
-	if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
+	if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
 		no_context(regs, error_code, address, 0, 0);
 		return;
 	}
 
 	if (fault & VM_FAULT_OOM) {
 		/* Kernel mode? Handle exceptions or die: */
-		if (!(error_code & PF_USER)) {
+		if (!(error_code & X86_PF_USER)) {
 			no_context(regs, error_code, address,
 				   SIGSEGV, SEGV_MAPERR);
 			return;
@@ -1085,16 +1065,16 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
 
 static int spurious_fault_check(unsigned long error_code, pte_t *pte)
 {
-	if ((error_code & PF_WRITE) && !pte_write(*pte))
+	if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
 		return 0;
 
-	if ((error_code & PF_INSTR) && !pte_exec(*pte))
+	if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
 		return 0;
 	/*
 	 * Note: We do not do lazy flushing on protection key
-	 * changes, so no spurious fault will ever set PF_PK.
+	 * changes, so no spurious fault will ever set X86_PF_PK.
 	 */
-	if ((error_code & PF_PK))
+	if ((error_code & X86_PF_PK))
 		return 1;
 
 	return 1;
@@ -1140,8 +1120,8 @@ spurious_fault(unsigned long error_code, unsigned long address)
 	 * change, so user accesses are not expected to cause spurious
 	 * faults.
 	 */
-	if (error_code != (PF_WRITE | PF_PROT)
-	    && error_code != (PF_INSTR | PF_PROT))
+	if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
+	    error_code != (X86_PF_INSTR | X86_PF_PROT))
 		return 0;
 
 	pgd = init_mm.pgd + pgd_index(address);
@@ -1201,19 +1181,19 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
 	 * always an unconditional error and can never result in
 	 * a follow-up action to resolve the fault, like a COW.
 	 */
-	if (error_code & PF_PK)
+	if (error_code & X86_PF_PK)
 		return 1;
 
 	/*
 	 * Make sure to check the VMA so that we do not perform
-	 * faults just to hit a PF_PK as soon as we fill in a
+	 * faults just to hit a X86_PF_PK as soon as we fill in a
 	 * page.
 	 */
-	if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE),
-				(error_code & PF_INSTR), foreign))
+	if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
+				       (error_code & X86_PF_INSTR), foreign))
 		return 1;
 
-	if (error_code & PF_WRITE) {
+	if (error_code & X86_PF_WRITE) {
 		/* write, present and write, not present: */
 		if (unlikely(!(vma->vm_flags & VM_WRITE)))
 			return 1;
@@ -1221,7 +1201,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma)
 	}
 
 	/* read, present: */
-	if (unlikely(error_code & PF_PROT))
+	if (unlikely(error_code & X86_PF_PROT))
 		return 1;
 
 	/* read, not present: */
@@ -1244,7 +1224,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
 	if (!static_cpu_has(X86_FEATURE_SMAP))
 		return false;
 
-	if (error_code & PF_USER)
+	if (error_code & X86_PF_USER)
 		return false;
 
 	if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
@@ -1297,7 +1277,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	 * protection error (error_code & 9) == 0.
 	 */
 	if (unlikely(fault_in_kernel_space(address))) {
-		if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
+		if (!(error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
 			if (vmalloc_fault(address) >= 0)
 				return;
 
@@ -1325,7 +1305,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	if (unlikely(kprobes_fault(regs)))
 		return;
 
-	if (unlikely(error_code & PF_RSVD))
+	if (unlikely(error_code & X86_PF_RSVD))
 		pgtable_bad(regs, error_code, address);
 
 	if (unlikely(smap_violation(error_code, regs))) {
@@ -1351,7 +1331,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	 */
 	if (user_mode(regs)) {
 		local_irq_enable();
-		error_code |= PF_USER;
+		error_code |= X86_PF_USER;
 		flags |= FAULT_FLAG_USER;
 	} else {
 		if (regs->flags & X86_EFLAGS_IF)
@@ -1360,9 +1340,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 
 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
 
-	if (error_code & PF_WRITE)
+	if (error_code & X86_PF_WRITE)
 		flags |= FAULT_FLAG_WRITE;
-	if (error_code & PF_INSTR)
+	if (error_code & X86_PF_INSTR)
 		flags |= FAULT_FLAG_INSTRUCTION;
 
 	/*
@@ -1382,7 +1362,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 	 * space check, thus avoiding the deadlock:
 	 */
 	if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
-		if ((error_code & PF_USER) == 0 &&
+		if (!(error_code & X86_PF_USER) &&
 		    !search_exception_tables(regs->ip)) {
 			bad_area_nosemaphore(regs, error_code, address, NULL);
 			return;
@@ -1409,7 +1389,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
 		bad_area(regs, error_code, address);
 		return;
 	}
-	if (error_code & PF_USER) {
+	if (error_code & X86_PF_USER) {
 		/*
 		 * Accessing the stack below %sp is always a bug.
 		 * The large cushion allows instructions like enter
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 048fbe8..adcea90 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
 
 #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
 void register_page_bootmem_memmap(unsigned long section_nr,
-				  struct page *start_page, unsigned long size)
+				  struct page *start_page, unsigned long nr_pages)
 {
 	unsigned long addr = (unsigned long)start_page;
-	unsigned long end = (unsigned long)(start_page + size);
+	unsigned long end = (unsigned long)(start_page + nr_pages);
 	unsigned long next;
 	pgd_t *pgd;
 	p4d_t *p4d;
 	pud_t *pud;
 	pmd_t *pmd;
-	unsigned int nr_pages;
+	unsigned int nr_pmd_pages;
 	struct page *page;
 
 	for (; addr < end; addr = next) {
@@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
 			if (pmd_none(*pmd))
 				continue;
 
-			nr_pages = 1 << (get_order(PMD_SIZE));
+			nr_pmd_pages = 1 << get_order(PMD_SIZE);
 			page = pmd_page(*pmd);
-			while (nr_pages--)
+			while (nr_pmd_pages--)
 				get_page_bootmem(section_nr, page++,
 						 SECTION_INFO);
 		}
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index 8f5be3e..2b60dc6 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -16,6 +16,8 @@
 
 extern struct range pfn_mapped[E820_MAX_ENTRIES];
 
+static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+
 static int __init map_range(struct range *range)
 {
 	unsigned long start;
@@ -31,8 +33,10 @@ static void __init clear_pgds(unsigned long start,
 			unsigned long end)
 {
 	pgd_t *pgd;
+	/* See comment in kasan_init() */
+	unsigned long pgd_end = end & PGDIR_MASK;
 
-	for (; start < end; start += PGDIR_SIZE) {
+	for (; start < pgd_end; start += PGDIR_SIZE) {
 		pgd = pgd_offset_k(start);
 		/*
 		 * With folded p4d, pgd_clear() is nop, use p4d_clear()
@@ -43,29 +47,61 @@ static void __init clear_pgds(unsigned long start,
 		else
 			pgd_clear(pgd);
 	}
+
+	pgd = pgd_offset_k(start);
+	for (; start < end; start += P4D_SIZE)
+		p4d_clear(p4d_offset(pgd, start));
+}
+
+static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
+{
+	unsigned long p4d;
+
+	if (!IS_ENABLED(CONFIG_X86_5LEVEL))
+		return (p4d_t *)pgd;
+
+	p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
+	p4d += __START_KERNEL_map - phys_base;
+	return (p4d_t *)p4d + p4d_index(addr);
+}
+
+static void __init kasan_early_p4d_populate(pgd_t *pgd,
+		unsigned long addr,
+		unsigned long end)
+{
+	pgd_t pgd_entry;
+	p4d_t *p4d, p4d_entry;
+	unsigned long next;
+
+	if (pgd_none(*pgd)) {
+		pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
+		set_pgd(pgd, pgd_entry);
+	}
+
+	p4d = early_p4d_offset(pgd, addr);
+	do {
+		next = p4d_addr_end(addr, end);
+
+		if (!p4d_none(*p4d))
+			continue;
+
+		p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
+		set_p4d(p4d, p4d_entry);
+	} while (p4d++, addr = next, addr != end && p4d_none(*p4d));
 }
 
 static void __init kasan_map_early_shadow(pgd_t *pgd)
 {
-	int i;
-	unsigned long start = KASAN_SHADOW_START;
+	/* See comment in kasan_init() */
+	unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
 	unsigned long end = KASAN_SHADOW_END;
+	unsigned long next;
 
-	for (i = pgd_index(start); start < end; i++) {
-		switch (CONFIG_PGTABLE_LEVELS) {
-		case 4:
-			pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) |
-					_KERNPG_TABLE);
-			break;
-		case 5:
-			pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
-					_KERNPG_TABLE);
-			break;
-		default:
-			BUILD_BUG();
-		}
-		start += PGDIR_SIZE;
-	}
+	pgd += pgd_index(addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		kasan_early_p4d_populate(pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
 }
 
 #ifdef CONFIG_KASAN_INLINE
@@ -102,7 +138,7 @@ void __init kasan_early_init(void)
 	for (i = 0; i < PTRS_PER_PUD; i++)
 		kasan_zero_pud[i] = __pud(pud_val);
 
-	for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++)
+	for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
 		kasan_zero_p4d[i] = __p4d(p4d_val);
 
 	kasan_map_early_shadow(early_top_pgt);
@@ -118,12 +154,35 @@ void __init kasan_init(void)
 #endif
 
 	memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
+
+	/*
+	 * We use the same shadow offset for 4- and 5-level paging to
+	 * facilitate boot-time switching between paging modes.
+	 * As result in 5-level paging mode KASAN_SHADOW_START and
+	 * KASAN_SHADOW_END are not aligned to PGD boundary.
+	 *
+	 * KASAN_SHADOW_START doesn't share PGD with anything else.
+	 * We claim whole PGD entry to make things easier.
+	 *
+	 * KASAN_SHADOW_END lands in the last PGD entry and it collides with
+	 * bunch of things like kernel code, modules, EFI mapping, etc.
+	 * We need to take extra steps to not overwrite them.
+	 */
+	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
+		void *ptr;
+
+		ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
+		memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
+		set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
+				__pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
+	}
+
 	load_cr3(early_top_pgt);
 	__flush_tlb_all();
 
-	clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END);
+	clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
 
-	kasan_populate_zero_shadow((void *)KASAN_SHADOW_START,
+	kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
 			kasan_mem_to_shadow((void *)PAGE_OFFSET));
 
 	for (i = 0; i < E820_MAX_ENTRIES; i++) {
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 836a1eb..3ee234b6 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -6,6 +6,7 @@
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <linux/syscalls.h>
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
 #include <os.h>
@@ -369,7 +370,9 @@ void free_ldt(struct mm_context *mm)
 	mm->arch.ldt.entry_count = 0;
 }
 
-int sys_modify_ldt(int func, void __user *ptr, unsigned long bytecount)
+SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
+		unsigned long , bytecount)
 {
-	return do_modify_ldt_skas(func, ptr, bytecount);
+	/* See non-um modify_ldt() for why we do this cast */
+	return (unsigned int)do_modify_ldt_skas(func, ptr, bytecount);
 }
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index d4396e2..e55d276 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -601,7 +601,7 @@ static struct trap_array_entry trap_array[] = {
 #ifdef CONFIG_X86_MCE
 	{ machine_check,               xen_machine_check,               true },
 #endif
-	{ nmi,                         xen_nmi,                         true },
+	{ nmi,                         xen_xennmi,                      true },
 	{ overflow,                    xen_overflow,                    false },
 #ifdef CONFIG_IA32_EMULATION
 	{ entry_INT80_compat,          xen_entry_INT80_compat,          false },
@@ -811,15 +811,14 @@ static void __init xen_write_gdt_entry_boot(struct desc_struct *dt, int entry,
 	}
 }
 
-static void xen_load_sp0(struct tss_struct *tss,
-			 struct thread_struct *thread)
+static void xen_load_sp0(unsigned long sp0)
 {
 	struct multicall_space mcs;
 
 	mcs = xen_mc_entry(0);
-	MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
+	MULTI_stack_switch(mcs.mc, __KERNEL_DS, sp0);
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
-	tss->x86_tss.sp0 = thread->sp0;
+	this_cpu_write(cpu_tss.x86_tss.sp0, sp0);
 }
 
 void xen_set_iopl_mask(unsigned mask)
diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c
index 71495f1..2ccdaba 100644
--- a/arch/x86/xen/mmu_pv.c
+++ b/arch/x86/xen/mmu_pv.c
@@ -449,7 +449,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
 
-#if CONFIG_PGTABLE_LEVELS == 4
+#ifdef CONFIG_X86_64
 __visible pudval_t xen_pud_val(pud_t pud)
 {
 	return pte_mfn_to_pfn(pud.pud);
@@ -538,7 +538,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
 
 	xen_mc_issue(PARAVIRT_LAZY_MMU);
 }
-#endif	/* CONFIG_PGTABLE_LEVELS == 4 */
+#endif	/* CONFIG_X86_64 */
 
 static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
 		int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
@@ -580,21 +580,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
 		int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
 		bool last, unsigned long limit)
 {
-	int i, nr, flush = 0;
+	int flush = 0;
+	pud_t *pud;
 
-	nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
-	for (i = 0; i < nr; i++) {
-		pud_t *pud;
 
-		if (p4d_none(p4d[i]))
-			continue;
+	if (p4d_none(*p4d))
+		return flush;
 
-		pud = pud_offset(&p4d[i], 0);
-		if (PTRS_PER_PUD > 1)
-			flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
-		flush |= xen_pud_walk(mm, pud, func,
-				last && i == nr - 1, limit);
-	}
+	pud = pud_offset(p4d, 0);
+	if (PTRS_PER_PUD > 1)
+		flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
+	flush |= xen_pud_walk(mm, pud, func, last, limit);
 	return flush;
 }
 
@@ -644,8 +640,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
 			continue;
 
 		p4d = p4d_offset(&pgd[i], 0);
-		if (PTRS_PER_P4D > 1)
-			flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
 		flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
 	}
 
@@ -1176,22 +1170,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
 {
 	pgd_t *pgd;
 	p4d_t *p4d;
-	unsigned int i;
 	bool unpin;
 
 	unpin = (vaddr == 2 * PGDIR_SIZE);
 	vaddr &= PMD_MASK;
 	pgd = pgd_offset_k(vaddr);
 	p4d = p4d_offset(pgd, 0);
-	for (i = 0; i < PTRS_PER_P4D; i++) {
-		if (p4d_none(p4d[i]))
-			continue;
-		xen_cleanmfnmap_p4d(p4d + i, unpin);
-	}
-	if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-		set_pgd(pgd, __pgd(0));
-		xen_cleanmfnmap_free_pgtbl(p4d, unpin);
-	}
+	if (!p4d_none(*p4d))
+		xen_cleanmfnmap_p4d(p4d, unpin);
 }
 
 static void __init xen_pagetable_p2m_free(void)
@@ -1692,7 +1678,7 @@ static void xen_release_pmd(unsigned long pfn)
 	xen_release_ptpage(pfn, PT_PMD);
 }
 
-#if CONFIG_PGTABLE_LEVELS >= 4
+#ifdef CONFIG_X86_64
 static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
 {
 	xen_alloc_ptpage(mm, pfn, PT_PUD);
@@ -2029,13 +2015,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
  */
 void __init xen_relocate_p2m(void)
 {
-	phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys;
+	phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
 	unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
-	int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d;
+	int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
 	pte_t *pt;
 	pmd_t *pmd;
 	pud_t *pud;
-	p4d_t *p4d = NULL;
 	pgd_t *pgd;
 	unsigned long *new_p2m;
 	int save_pud;
@@ -2045,11 +2030,7 @@ void __init xen_relocate_p2m(void)
 	n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
 	n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
 	n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
-	if (PTRS_PER_P4D > 1)
-		n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
-	else
-		n_p4d = 0;
-	n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
+	n_frames = n_pte + n_pt + n_pmd + n_pud;
 
 	new_area = xen_find_free_area(PFN_PHYS(n_frames));
 	if (!new_area) {
@@ -2065,76 +2046,56 @@ void __init xen_relocate_p2m(void)
 	 * To avoid any possible virtual address collision, just use
 	 * 2 * PUD_SIZE for the new area.
 	 */
-	p4d_phys = new_area;
-	pud_phys = p4d_phys + PFN_PHYS(n_p4d);
+	pud_phys = new_area;
 	pmd_phys = pud_phys + PFN_PHYS(n_pud);
 	pt_phys = pmd_phys + PFN_PHYS(n_pmd);
 	p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
 
 	pgd = __va(read_cr3_pa());
 	new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
-	idx_p4d = 0;
 	save_pud = n_pud;
-	do {
-		if (n_p4d > 0) {
-			p4d = early_memremap(p4d_phys, PAGE_SIZE);
-			clear_page(p4d);
-			n_pud = min(save_pud, PTRS_PER_P4D);
-		}
-		for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
-			pud = early_memremap(pud_phys, PAGE_SIZE);
-			clear_page(pud);
-			for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
-				 idx_pmd++) {
-				pmd = early_memremap(pmd_phys, PAGE_SIZE);
-				clear_page(pmd);
-				for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
-					 idx_pt++) {
-					pt = early_memremap(pt_phys, PAGE_SIZE);
-					clear_page(pt);
-					for (idx_pte = 0;
-						 idx_pte < min(n_pte, PTRS_PER_PTE);
-						 idx_pte++) {
-						set_pte(pt + idx_pte,
-								pfn_pte(p2m_pfn, PAGE_KERNEL));
-						p2m_pfn++;
-					}
-					n_pte -= PTRS_PER_PTE;
-					early_memunmap(pt, PAGE_SIZE);
-					make_lowmem_page_readonly(__va(pt_phys));
-					pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
-							PFN_DOWN(pt_phys));
-					set_pmd(pmd + idx_pt,
-							__pmd(_PAGE_TABLE | pt_phys));
-					pt_phys += PAGE_SIZE;
+	for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
+		pud = early_memremap(pud_phys, PAGE_SIZE);
+		clear_page(pud);
+		for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
+				idx_pmd++) {
+			pmd = early_memremap(pmd_phys, PAGE_SIZE);
+			clear_page(pmd);
+			for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
+					idx_pt++) {
+				pt = early_memremap(pt_phys, PAGE_SIZE);
+				clear_page(pt);
+				for (idx_pte = 0;
+						idx_pte < min(n_pte, PTRS_PER_PTE);
+						idx_pte++) {
+					set_pte(pt + idx_pte,
+							pfn_pte(p2m_pfn, PAGE_KERNEL));
+					p2m_pfn++;
 				}
-				n_pt -= PTRS_PER_PMD;
-				early_memunmap(pmd, PAGE_SIZE);
-				make_lowmem_page_readonly(__va(pmd_phys));
-				pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
-						PFN_DOWN(pmd_phys));
-				set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
-				pmd_phys += PAGE_SIZE;
+				n_pte -= PTRS_PER_PTE;
+				early_memunmap(pt, PAGE_SIZE);
+				make_lowmem_page_readonly(__va(pt_phys));
+				pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
+						PFN_DOWN(pt_phys));
+				set_pmd(pmd + idx_pt,
+						__pmd(_PAGE_TABLE | pt_phys));
+				pt_phys += PAGE_SIZE;
 			}
-			n_pmd -= PTRS_PER_PUD;
-			early_memunmap(pud, PAGE_SIZE);
-			make_lowmem_page_readonly(__va(pud_phys));
-			pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
-			if (n_p4d > 0)
-				set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
-			else
-				set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
-			pud_phys += PAGE_SIZE;
+			n_pt -= PTRS_PER_PMD;
+			early_memunmap(pmd, PAGE_SIZE);
+			make_lowmem_page_readonly(__va(pmd_phys));
+			pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
+					PFN_DOWN(pmd_phys));
+			set_pud(pud + idx_pmd, __pud(_PAGE_TABLE | pmd_phys));
+			pmd_phys += PAGE_SIZE;
 		}
-		if (n_p4d > 0) {
-			save_pud -= PTRS_PER_P4D;
-			early_memunmap(p4d, PAGE_SIZE);
-			make_lowmem_page_readonly(__va(p4d_phys));
-			pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
-			set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
-			p4d_phys += PAGE_SIZE;
-		}
-	} while (++idx_p4d < n_p4d);
+		n_pmd -= PTRS_PER_PUD;
+		early_memunmap(pud, PAGE_SIZE);
+		make_lowmem_page_readonly(__va(pud_phys));
+		pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
+		set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
+		pud_phys += PAGE_SIZE;
+	}
 
 	/* Now copy the old p2m info to the new area. */
 	memcpy(new_p2m, xen_p2m_addr, size);
@@ -2361,7 +2322,7 @@ static void __init xen_post_allocator_init(void)
 	pv_mmu_ops.set_pte = xen_set_pte;
 	pv_mmu_ops.set_pmd = xen_set_pmd;
 	pv_mmu_ops.set_pud = xen_set_pud;
-#if CONFIG_PGTABLE_LEVELS >= 4
+#ifdef CONFIG_X86_64
 	pv_mmu_ops.set_p4d = xen_set_p4d;
 #endif
 
@@ -2371,7 +2332,7 @@ static void __init xen_post_allocator_init(void)
 	pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
 	pv_mmu_ops.release_pte = xen_release_pte;
 	pv_mmu_ops.release_pmd = xen_release_pmd;
-#if CONFIG_PGTABLE_LEVELS >= 4
+#ifdef CONFIG_X86_64
 	pv_mmu_ops.alloc_pud = xen_alloc_pud;
 	pv_mmu_ops.release_pud = xen_release_pud;
 #endif
@@ -2435,14 +2396,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 	.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
 	.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
 
-#if CONFIG_PGTABLE_LEVELS >= 4
+#ifdef CONFIG_X86_64
 	.pud_val = PV_CALLEE_SAVE(xen_pud_val),
 	.make_pud = PV_CALLEE_SAVE(xen_make_pud),
 	.set_p4d = xen_set_p4d_hyper,
 
 	.alloc_pud = xen_alloc_pmd_init,
 	.release_pud = xen_release_pmd_init,
-#endif	/* CONFIG_PGTABLE_LEVELS == 4 */
+#endif	/* CONFIG_X86_64 */
 
 	.activate_mm = xen_activate_mm,
 	.dup_mmap = xen_dup_mmap,
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index 05f91ce..c0c756c 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -14,6 +14,7 @@
  * single-threaded.
  */
 #include <linux/sched.h>
+#include <linux/sched/task_stack.h>
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
@@ -294,12 +295,19 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 #endif
 	memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
 
+	/*
+	 * Bring up the CPU in cpu_bringup_and_idle() with the stack
+	 * pointing just below where pt_regs would be if it were a normal
+	 * kernel entry.
+	 */
 	ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle;
 	ctxt->flags = VGCF_IN_KERNEL;
 	ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
 	ctxt->user_regs.ds = __USER_DS;
 	ctxt->user_regs.es = __USER_DS;
 	ctxt->user_regs.ss = __KERNEL_DS;
+	ctxt->user_regs.cs = __KERNEL_CS;
+	ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
 
 	xen_copy_trap_info(ctxt->trap_ctxt);
 
@@ -314,8 +322,13 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 	ctxt->gdt_frames[0] = gdt_mfn;
 	ctxt->gdt_ents      = GDT_ENTRIES;
 
+	/*
+	 * Set SS:SP that Xen will use when entering guest kernel mode
+	 * from guest user mode.  Subsequent calls to load_sp0() can
+	 * change this value.
+	 */
 	ctxt->kernel_ss = __KERNEL_DS;
-	ctxt->kernel_sp = idle->thread.sp0;
+	ctxt->kernel_sp = task_top_of_stack(idle);
 
 #ifdef CONFIG_X86_32
 	ctxt->event_callback_cs     = __KERNEL_CS;
@@ -327,10 +340,8 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
 		(unsigned long)xen_hypervisor_callback;
 	ctxt->failsafe_callback_eip =
 		(unsigned long)xen_failsafe_callback;
-	ctxt->user_regs.cs = __KERNEL_CS;
 	per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
 
-	ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
 	ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
 	if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
 		BUG();
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c98a48c..8a10c9a 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -30,7 +30,7 @@
 xen_pv_trap xendebug
 xen_pv_trap int3
 xen_pv_trap xenint3
-xen_pv_trap nmi
+xen_pv_trap xennmi
 xen_pv_trap overflow
 xen_pv_trap bounds
 xen_pv_trap invalid_op
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index b5b8d7f..497cc55 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -10,6 +10,7 @@
 #include <asm/boot.h>
 #include <asm/asm.h>
 #include <asm/page_types.h>
+#include <asm/unwind_hints.h>
 
 #include <xen/interface/elfnote.h>
 #include <xen/interface/features.h>
@@ -20,6 +21,7 @@
 #ifdef CONFIG_XEN_PV
 	__INIT
 ENTRY(startup_xen)
+	UNWIND_HINT_EMPTY
 	cld
 
 	/* Clear .bss */
@@ -34,21 +36,24 @@
 	mov $init_thread_union+THREAD_SIZE, %_ASM_SP
 
 	jmp xen_start_kernel
-
+END(startup_xen)
 	__FINIT
 #endif
 
 .pushsection .text
 	.balign PAGE_SIZE
 ENTRY(hypercall_page)
-	.skip PAGE_SIZE
+	.rept (PAGE_SIZE / 32)
+		UNWIND_HINT_EMPTY
+		.skip 32
+	.endr
 
 #define HYPERCALL(n) \
 	.equ xen_hypercall_##n, hypercall_page + __HYPERVISOR_##n * 32; \
 	.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
 #include <asm/xen-hypercalls.h>
 #undef HYPERCALL
-
+END(hypercall_page)
 .popsection
 
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 8acfc1e..63e56f6 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -687,7 +687,7 @@
 #define BUG_TABLE
 #endif
 
-#ifdef CONFIG_ORC_UNWINDER
+#ifdef CONFIG_UNWINDER_ORC
 #define ORC_UNWIND_TABLE						\
 	. = ALIGN(4);							\
 	.orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) {	\
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 43edf65..91b46f9 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2496,7 +2496,7 @@ void vmemmap_populate_print_last(void);
 void vmemmap_free(unsigned long start, unsigned long end);
 #endif
 void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
-				  unsigned long size);
+				  unsigned long nr_pages);
 
 enum mf_flags {
 	MF_COUNT_INCREASED = 1 << 0,
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index c9c4a81..a507f43 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1151,13 +1151,17 @@ struct mem_section {
 #define SECTION_ROOT_MASK	(SECTIONS_PER_ROOT - 1)
 
 #ifdef CONFIG_SPARSEMEM_EXTREME
-extern struct mem_section *mem_section[NR_SECTION_ROOTS];
+extern struct mem_section **mem_section;
 #else
 extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
 #endif
 
 static inline struct mem_section *__nr_to_section(unsigned long nr)
 {
+#ifdef CONFIG_SPARSEMEM_EXTREME
+	if (!mem_section)
+		return NULL;
+#endif
 	if (!mem_section[SECTION_NR_TO_ROOT(nr)])
 		return NULL;
 	return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index dfdad67..ff21b4d 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -376,7 +376,7 @@
 	  that runtime stack traces are more reliable.
 
 	  This is also a prerequisite for generation of ORC unwind data, which
-	  is needed for CONFIG_ORC_UNWINDER.
+	  is needed for CONFIG_UNWINDER_ORC.
 
 	  For more information, see
 	  tools/objtool/Documentation/stack-validation.txt.
diff --git a/mm/gup.c b/mm/gup.c
index b2b4d42..dfcde13 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1643,6 +1643,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
 	return 1;
 }
 
+static void gup_pgd_range(unsigned long addr, unsigned long end,
+		int write, struct page **pages, int *nr)
+{
+	unsigned long next;
+	pgd_t *pgdp;
+
+	pgdp = pgd_offset(current->mm, addr);
+	do {
+		pgd_t pgd = READ_ONCE(*pgdp);
+
+		next = pgd_addr_end(addr, end);
+		if (pgd_none(pgd))
+			return;
+		if (unlikely(pgd_huge(pgd))) {
+			if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+					  pages, nr))
+				return;
+		} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
+			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
+					 PGDIR_SHIFT, next, write, pages, nr))
+				return;
+		} else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
+			return;
+	} while (pgdp++, addr = next, addr != end);
+}
+
+#ifndef gup_fast_permitted
+/*
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
+ * we need to fall back to the slow version:
+ */
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
+{
+	unsigned long len, end;
+
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+	return end >= start;
+}
+#endif
+
 /*
  * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
  * the regular GUP. It will only return non-negative values.
@@ -1650,10 +1691,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
 int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			  struct page **pages)
 {
-	struct mm_struct *mm = current->mm;
 	unsigned long addr, len, end;
-	unsigned long next, flags;
-	pgd_t *pgdp;
+	unsigned long flags;
 	int nr = 0;
 
 	start &= PAGE_MASK;
@@ -1677,45 +1716,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	 * block IPIs that come from THPs splitting.
 	 */
 
-	local_irq_save(flags);
-	pgdp = pgd_offset(mm, addr);
-	do {
-		pgd_t pgd = READ_ONCE(*pgdp);
-
-		next = pgd_addr_end(addr, end);
-		if (pgd_none(pgd))
-			break;
-		if (unlikely(pgd_huge(pgd))) {
-			if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
-					  pages, &nr))
-				break;
-		} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
-			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
-					 PGDIR_SHIFT, next, write, pages, &nr))
-				break;
-		} else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
-			break;
-	} while (pgdp++, addr = next, addr != end);
-	local_irq_restore(flags);
+	if (gup_fast_permitted(start, nr_pages, write)) {
+		local_irq_save(flags);
+		gup_pgd_range(addr, end, write, pages, &nr);
+		local_irq_restore(flags);
+	}
 
 	return nr;
 }
 
-#ifndef gup_fast_permitted
-/*
- * Check if it's allowed to use __get_user_pages_fast() for the range, or
- * we need to fall back to the slow version:
- */
-bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
-{
-	unsigned long len, end;
-
-	len = (unsigned long) nr_pages << PAGE_SHIFT;
-	end = start + len;
-	return end >= start;
-}
-#endif
-
 /**
  * get_user_pages_fast() - pin user pages in memory
  * @start:	starting user address
@@ -1735,12 +1744,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
 int get_user_pages_fast(unsigned long start, int nr_pages, int write,
 			struct page **pages)
 {
+	unsigned long addr, len, end;
 	int nr = 0, ret = 0;
 
 	start &= PAGE_MASK;
+	addr = start;
+	len = (unsigned long) nr_pages << PAGE_SHIFT;
+	end = start + len;
+
+	if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+					(void __user *)start, len)))
+		return 0;
 
 	if (gup_fast_permitted(start, nr_pages, write)) {
-		nr = __get_user_pages_fast(start, nr_pages, write, pages);
+		local_irq_disable();
+		gup_pgd_range(addr, end, write, pages, &nr);
+		local_irq_enable();
 		ret = nr;
 	}
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 77e4d3c..8dfd13f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5646,6 +5646,16 @@ void __init sparse_memory_present_with_active_regions(int nid)
 	unsigned long start_pfn, end_pfn;
 	int i, this_nid;
 
+#ifdef CONFIG_SPARSEMEM_EXTREME
+	if (!mem_section) {
+		unsigned long size, align;
+
+		size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
+		align = 1 << (INTERNODE_CACHE_SHIFT);
+		mem_section = memblock_virt_alloc(size, align);
+	}
+#endif
+
 	for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
 		memory_present(this_nid, start_pfn, end_pfn);
 }
diff --git a/mm/sparse.c b/mm/sparse.c
index 4900707..0441388 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -23,8 +23,7 @@
  * 1) mem_section	- memory sections, mem_map's for valid memory
  */
 #ifdef CONFIG_SPARSEMEM_EXTREME
-struct mem_section *mem_section[NR_SECTION_ROOTS]
-	____cacheline_internodealigned_in_smp;
+struct mem_section **mem_section;
 #else
 struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
 	____cacheline_internodealigned_in_smp;
@@ -101,7 +100,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
 int __section_nr(struct mem_section* ms)
 {
 	unsigned long root_nr;
-	struct mem_section* root;
+	struct mem_section *root = NULL;
 
 	for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
 		root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
@@ -112,7 +111,7 @@ int __section_nr(struct mem_section* ms)
 		     break;
 	}
 
-	VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
+	VM_BUG_ON(!root);
 
 	return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
 }
@@ -330,11 +329,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
 static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
 {
 	unsigned long usemap_snr, pgdat_snr;
-	static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
-	static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
+	static unsigned long old_usemap_snr;
+	static unsigned long old_pgdat_snr;
 	struct pglist_data *pgdat = NODE_DATA(nid);
 	int usemap_nid;
 
+	/* First call */
+	if (!old_usemap_snr) {
+		old_usemap_snr = NR_MEM_SECTIONS;
+		old_pgdat_snr = NR_MEM_SECTIONS;
+	}
+
 	usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
 	pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
 	if (usemap_snr == pgdat_snr)
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index bb831d4..e63af4e 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -259,7 +259,7 @@
 
 __objtool_obj := $(objtree)/tools/objtool/objtool
 
-objtool_args = $(if $(CONFIG_ORC_UNWINDER),orc generate,check)
+objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
 
 ifndef CONFIG_FRAME_POINTER
 objtool_args += --no-fp
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index c0e26ad..9b341584 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1757,11 +1757,14 @@ static int validate_branch(struct objtool_file *file, struct instruction *first,
 		if (insn->dead_end)
 			return 0;
 
-		insn = next_insn;
-		if (!insn) {
+		if (!next_insn) {
+			if (state.cfa.base == CFI_UNDEFINED)
+				return 0;
 			WARN("%s: unexpected end of section", sec->name);
 			return 1;
 		}
+
+		insn = next_insn;
 	}
 
 	return 0;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 31e0f91..07f3299 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -70,7 +70,7 @@ static void cmd_usage(void)
 
 	printf("\n");
 
-	exit(1);
+	exit(129);
 }
 
 static void handle_options(int *argc, const char ***argv)
@@ -86,9 +86,7 @@ static void handle_options(int *argc, const char ***argv)
 			break;
 		} else {
 			fprintf(stderr, "Unknown option: %s\n", cmd);
-			fprintf(stderr, "\n Usage: %s\n",
-				objtool_usage_string);
-			exit(1);
+			cmd_usage();
 		}
 
 		(*argv)++;