ARM: smp: store current pointer in TPIDRURO register if available

Use the user space TLS register TPIDRURO to keep the 'current' pointer
while running in the kernel. This removes the need to access it via
thread_info, which is located at the base of the stack, but will be
moved out of there in a subsequent patch.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index b73b2a9..deead42 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1164,6 +1164,10 @@
 	def_bool y
 	depends on SMP && (CPU_V6K || CPU_V7)
 
+config CURRENT_POINTER_IN_TPIDRURO
+	def_bool y
+	depends on PCPU_OFFSET_IN_TPIDRPRW
+
 config ARM_CPU_TOPOLOGY
 	bool "Support cpu topology definition"
 	depends on SMP && CPU_V7
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index e2b1fd5..1b77b09 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -199,6 +199,21 @@
 	.endm
 	.endr
 
+	.macro	get_current, rd
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+	mrc	p15, 0, \rd, c13, c0, 3		@ get TPIDRURO register
+#else
+	get_thread_info \rd
+	ldr	\rd, [\rd, #TI_TASK]
+#endif
+	.endm
+
+	.macro	set_current, rn
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+	mcr	p15, 0, \rn, c13, c0, 3		@ set TPIDRURO register
+#endif
+	.endm
+
 /*
  * Get current thread_info.
  */
diff --git a/arch/arm/include/asm/current.h b/arch/arm/include/asm/current.h
new file mode 100644
index 0000000..ca924a6
--- /dev/null
+++ b/arch/arm/include/asm/current.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_ARM_CURRENT_H
+#define _ASM_ARM_CURRENT_H
+
+#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+
+static inline struct task_struct *get_current(void)
+{
+	struct task_struct *cur;
+
+	asm("mrc p15, 0, %0, c13, c0, 3" : "=r"(cur));
+	return cur;
+}
+
+#define current get_current()
+#else
+#include <asm-generic/current.h>
+#endif /* CONFIG_CURRENT_POINTER_IN_TPIDRURO */
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_ARM_CURRENT_H */
diff --git a/arch/arm/include/asm/smp.h b/arch/arm/include/asm/smp.h
index 05c0e99..4a01566 100644
--- a/arch/arm/include/asm/smp.h
+++ b/arch/arm/include/asm/smp.h
@@ -69,6 +69,9 @@
 	void *stack;
 #ifdef CONFIG_PCPU_OFFSET_IN_TPIDRPRW
 	unsigned long cpu;
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+	struct task_struct *idle;
+#endif
 #endif
 };
 extern struct secondary_data secondary_data;
diff --git a/arch/arm/include/asm/switch_to.h b/arch/arm/include/asm/switch_to.h
index 007d8fe..62d2fa0 100644
--- a/arch/arm/include/asm/switch_to.h
+++ b/arch/arm/include/asm/switch_to.h
@@ -22,11 +22,14 @@
  * contains the memory barrier to tell GCC not to cache `current'.
  */
 extern struct task_struct *__switch_to(struct task_struct *, struct thread_info *, struct thread_info *);
+DECLARE_PER_CPU(struct task_struct *, __entry_task);
 
 #define switch_to(prev,next,last)					\
 do {									\
 	__complete_pending_tlbi();					\
 	last = __switch_to(prev,task_thread_info(prev), task_thread_info(next));	\
+	if (IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO))		\
+		__this_cpu_write(__entry_task, next);			\
 } while (0)
 
 #endif /* __ASM_ARM_SWITCH_TO_H */
diff --git a/arch/arm/include/asm/tls.h b/arch/arm/include/asm/tls.h
index 5a66c3b..6fe213a 100644
--- a/arch/arm/include/asm/tls.h
+++ b/arch/arm/include/asm/tls.h
@@ -12,7 +12,9 @@
 
 	.macro switch_tls_v6k, base, tp, tpuser, tmp1, tmp2
 	mrc	p15, 0, \tmp2, c13, c0, 2	@ get the user r/w register
+#ifndef CONFIG_CURRENT_POINTER_IN_TPIDRURO
 	mcr	p15, 0, \tp, c13, c0, 3		@ set TLS register
+#endif
 	mcr	p15, 0, \tpuser, c13, c0, 2	@ and the user r/w register
 	str	\tmp2, [\base, #TI_TP_VALUE + 4] @ save it
 	.endm
@@ -77,7 +79,7 @@
 	 */
 	barrier();
 
-	if (!tls_emu) {
+	if (!tls_emu && !IS_ENABLED(CONFIG_CURRENT_POINTER_IN_TPIDRURO)) {
 		if (has_tls_reg) {
 			asm("mcr p15, 0, %0, c13, c0, 3"
 			    : : "r" (val));
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 241b73d..0d4fc82 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -385,6 +385,16 @@
  ATRAP( mcrne	p15, 0, r8, c1, c0, 0)
 
 	@
+	@ Reload the 'current' pointer into TPIDRURO if needed
+	@
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+	adr_l	r7, __entry_task		@ get __entry_task base address
+	mrc	p15, 0, r8, c13, c0, 4		@ get per-CPU offset
+	ldr	r7, [r7, r8]			@ load variable
+	mcr	p15, 0, r7, c13, c0, 3		@ store in TPIDRURO
+#endif
+
+	@
 	@ Clear FP to mark the first stack frame
 	@
 	zero_fp
@@ -766,6 +776,7 @@
 #ifdef CONFIG_CPU_USE_DOMAINS
 	mcr	p15, 0, r6, c3, c0, 0		@ Set domain register
 #endif
+	set_current r2
 	mov	r5, r0
 	add	r4, r2, #TI_CPU_SAVE
 	ldr	r0, =thread_notify_head
diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S
index 40db0f9..d0e1a11 100644
--- a/arch/arm/kernel/entry-header.S
+++ b/arch/arm/kernel/entry-header.S
@@ -292,6 +292,12 @@
 
 
 	.macro	restore_user_regs, fast = 0, offset = 0
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+	get_thread_info r1
+	ldr	r1, [r1, #TI_TP_VALUE]
+	mcr	p15, 0, r1, c13, c0, 3		@ set TLS register
+#endif
+
 	uaccess_enable r1, isb=0
 #ifndef CONFIG_THUMB2_KERNEL
 	@ ARM mode restore
diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S
index 29b2eda..da18e0a 100644
--- a/arch/arm/kernel/head-common.S
+++ b/arch/arm/kernel/head-common.S
@@ -105,6 +105,11 @@
 	mov	r1, #0
 	bl	__memset			@ clear .bss
 
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+	adr_l	r0, init_task			@ get swapper task_struct
+	set_current r0
+#endif
+
 	ldmia	r4, {r0, r1, r2, r3}
 	str	r9, [r0]			@ Save processor ID
 	str	r7, [r1]			@ Save machine type
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 553f640..ff131a4 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -426,7 +426,8 @@
 ENTRY(__secondary_switched)
 	adr_l	r7, secondary_data + 12		@ get secondary_data.stack
 #ifdef CONFIG_PCPU_OFFSET_IN_TPIDRPRW
-	ldr	r8, [r7, #4]			@ get secondary_data.cpu
+	ldrd	r8, r9, [r7, #4]		@ get secondary_data.cpu and .idle
+	set_current r9				@ set 'current' pointer
 	adr_l	r9, __per_cpu_offset		@ index the per-CPU offset array
 	ldr	r8, [r9, r8, lsl #2]		@ using the CPU number
 	mcr	p15, 0, r8, c13, c0, 4		@ set TPIDRPRW
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index fc9e8b3..5d56b02 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -38,6 +38,10 @@
 
 #include "signal.h"
 
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+DEFINE_PER_CPU(struct task_struct *, __entry_task);
+#endif
+
 #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
 #include <linux/stackprotector.h>
 unsigned long __stack_chk_guard __read_mostly;
diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c
index a3ec440..d4ac0d1 100644
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -160,6 +160,9 @@
 #endif
 #ifdef CONFIG_PCPU_OFFSET_IN_TPIDRPRW
 	secondary_data.cpu = cpu;
+#ifdef CONFIG_CURRENT_POINTER_IN_TPIDRURO
+	secondary_data.idle = idle;
+#endif
 #endif
 	sync_cache_w(&secondary_data);
 
diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S
index e2c743a..d48ba99 100644
--- a/arch/arm/mm/proc-macros.S
+++ b/arch/arm/mm/proc-macros.S
@@ -30,8 +30,7 @@
  * act_mm - get current->active_mm
  */
 	.macro	act_mm, rd
-	get_thread_info \rd
-	ldr	\rd, [\rd, #TI_TASK]
+	get_current \rd
 	.if (TSK_ACTIVE_MM > IMM12_MASK)
 	add	\rd, \rd, #TSK_ACTIVE_MM & ~IMM12_MASK
 	.endif