blob: 7387b68c745b21344370af4598c5d738953c3595 [file] [log] [blame]
Thomas Gleixnercaab2772019-06-03 07:44:50 +02001// SPDX-License-Identifier: GPL-2.0-only
Catalin Marinasb3901d52012-03-05 11:49:28 +00002/*
3 * Based on arch/arm/kernel/process.c
4 *
5 * Original Copyright (C) 1995 Linus Torvalds
6 * Copyright (C) 1996-2000 Russell King - Converted to ARM.
7 * Copyright (C) 2012 ARM Ltd.
Catalin Marinasb3901d52012-03-05 11:49:28 +00008 */
AKASHI Takahirofd92d4a2014-04-30 10:51:32 +01009#include <linux/compat.h>
Ard Biesheuvel60c0d452015-03-06 15:49:24 +010010#include <linux/efi.h>
Dave Martinab7876a2020-03-16 16:50:47 +000011#include <linux/elf.h>
Catalin Marinasb3901d52012-03-05 11:49:28 +000012#include <linux/export.h>
13#include <linux/sched.h>
Ingo Molnarb17b0152017-02-08 18:51:35 +010014#include <linux/sched/debug.h>
Ingo Molnar29930022017-02-08 18:51:36 +010015#include <linux/sched/task.h>
Ingo Molnar68db0cf2017-02-08 18:51:37 +010016#include <linux/sched/task_stack.h>
Catalin Marinasb3901d52012-03-05 11:49:28 +000017#include <linux/kernel.h>
Dave Martinab7876a2020-03-16 16:50:47 +000018#include <linux/mman.h>
Catalin Marinasb3901d52012-03-05 11:49:28 +000019#include <linux/mm.h>
Will Deacon780c0832020-09-28 14:03:00 +010020#include <linux/nospec.h>
Catalin Marinasb3901d52012-03-05 11:49:28 +000021#include <linux/stddef.h>
Catalin Marinas63f0c602019-07-23 19:58:39 +020022#include <linux/sysctl.h>
Catalin Marinasb3901d52012-03-05 11:49:28 +000023#include <linux/unistd.h>
24#include <linux/user.h>
25#include <linux/delay.h>
26#include <linux/reboot.h>
27#include <linux/interrupt.h>
Catalin Marinasb3901d52012-03-05 11:49:28 +000028#include <linux/init.h>
29#include <linux/cpu.h>
30#include <linux/elfcore.h>
31#include <linux/pm.h>
32#include <linux/tick.h>
33#include <linux/utsname.h>
34#include <linux/uaccess.h>
35#include <linux/random.h>
36#include <linux/hw_breakpoint.h>
37#include <linux/personality.h>
38#include <linux/notifier.h>
Jisheng Zhang096b3222015-09-16 22:23:21 +080039#include <trace/events/power.h>
Mark Rutlandc02433d2016-11-03 20:23:13 +000040#include <linux/percpu.h>
Dave Martinbc0ee472017-10-31 15:51:05 +000041#include <linux/thread_info.h>
Catalin Marinas63f0c602019-07-23 19:58:39 +020042#include <linux/prctl.h>
Madhavan T. Venkataraman4f62bb72021-11-29 14:28:45 +000043#include <linux/stacktrace.h>
Catalin Marinasb3901d52012-03-05 11:49:28 +000044
James Morse57f49592016-02-05 14:58:48 +000045#include <asm/alternative.h>
Catalin Marinasb3901d52012-03-05 11:49:28 +000046#include <asm/compat.h>
Julien Thierry19c95f22019-10-15 18:25:44 +010047#include <asm/cpufeature.h>
Catalin Marinasb3901d52012-03-05 11:49:28 +000048#include <asm/cacheflush.h>
James Morsed0854412016-10-18 11:27:48 +010049#include <asm/exec.h>
Will Deaconec45d1c2013-01-17 12:31:45 +000050#include <asm/fpsimd.h>
51#include <asm/mmu_context.h>
Vincenzo Frascino637ec832019-09-16 11:51:17 +010052#include <asm/mte.h>
Catalin Marinasb3901d52012-03-05 11:49:28 +000053#include <asm/processor.h>
Mark Rutland75031972018-12-07 18:39:25 +000054#include <asm/pointer_auth.h>
Catalin Marinasb3901d52012-03-05 11:49:28 +000055#include <asm/stacktrace.h>
Maninder Singhbaa96372021-03-24 12:24:58 +053056#include <asm/switch_to.h>
57#include <asm/system_misc.h>
Catalin Marinasb3901d52012-03-05 11:49:28 +000058
Ard Biesheuvel0a1213f2018-12-12 13:08:44 +010059#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
Laura Abbottc0c264a2014-06-25 23:55:03 +010060#include <linux/stackprotector.h>
Dan Li9fcb2e92021-09-14 17:44:02 +080061unsigned long __stack_chk_guard __ro_after_init;
Laura Abbottc0c264a2014-06-25 23:55:03 +010062EXPORT_SYMBOL(__stack_chk_guard);
63#endif
64
Catalin Marinasb3901d52012-03-05 11:49:28 +000065/*
66 * Function pointers to optional machine specific functions
67 */
68void (*pm_power_off)(void);
69EXPORT_SYMBOL_GPL(pm_power_off);
70
Mark Rutland9327e2c2013-10-24 20:30:18 +010071#ifdef CONFIG_HOTPLUG_CPU
Josh Poimboeuf071c44e2023-02-13 23:05:58 -080072void __noreturn arch_cpu_idle_dead(void)
Mark Rutland9327e2c2013-10-24 20:30:18 +010073{
74 cpu_die();
75}
76#endif
77
Arun KS90f51a02014-05-07 02:41:22 +010078/*
79 * Called by kexec, immediately prior to machine_kexec().
80 *
81 * This must completely disable all secondary CPUs; simply causing those CPUs
82 * to execute e.g. a RAM-based pin loop is not sufficient. This allows the
83 * kexec'd kernel to use any and all RAM as it sees fit, without having to
84 * avoid any code or data used by any SW CPU pin loop. The CPU hotplug
Qais Yousefd66b16f2020-03-23 13:50:59 +000085 * functionality embodied in smpt_shutdown_nonboot_cpus() to achieve this.
Arun KS90f51a02014-05-07 02:41:22 +010086 */
Catalin Marinasb3901d52012-03-05 11:49:28 +000087void machine_shutdown(void)
88{
Qais Yousef5efbe6a62020-03-23 13:51:00 +000089 smp_shutdown_nonboot_cpus(reboot_cpu);
Catalin Marinasb3901d52012-03-05 11:49:28 +000090}
91
Arun KS90f51a02014-05-07 02:41:22 +010092/*
93 * Halting simply requires that the secondary CPUs stop performing any
94 * activity (executing tasks, handling interrupts). smp_send_stop()
95 * achieves this.
96 */
Catalin Marinasb3901d52012-03-05 11:49:28 +000097void machine_halt(void)
98{
Arun KSb9acc492014-05-07 02:41:23 +010099 local_irq_disable();
Arun KS90f51a02014-05-07 02:41:22 +0100100 smp_send_stop();
Catalin Marinasb3901d52012-03-05 11:49:28 +0000101 while (1);
102}
103
Arun KS90f51a02014-05-07 02:41:22 +0100104/*
105 * Power-off simply requires that the secondary CPUs stop performing any
106 * activity (executing tasks, handling interrupts). smp_send_stop()
107 * achieves this. When the system power is turned off, it will take all CPUs
108 * with it.
109 */
Catalin Marinasb3901d52012-03-05 11:49:28 +0000110void machine_power_off(void)
111{
Arun KSb9acc492014-05-07 02:41:23 +0100112 local_irq_disable();
Arun KS90f51a02014-05-07 02:41:22 +0100113 smp_send_stop();
Dmitry Osipenko0c649912022-05-10 02:32:20 +0300114 do_kernel_power_off();
Catalin Marinasb3901d52012-03-05 11:49:28 +0000115}
116
Arun KS90f51a02014-05-07 02:41:22 +0100117/*
118 * Restart requires that the secondary CPUs stop performing any activity
Mark Rutland68234df2015-04-20 10:24:35 +0100119 * while the primary CPU resets the system. Systems with multiple CPUs must
Arun KS90f51a02014-05-07 02:41:22 +0100120 * provide a HW restart implementation, to ensure that all CPUs reset at once.
121 * This is required so that any code running after reset on the primary CPU
122 * doesn't have to co-ordinate with other CPUs to ensure they aren't still
123 * executing pre-reset code, and using RAM that the primary CPU's code wishes
124 * to use. Implementing such co-ordination would be essentially impossible.
125 */
Catalin Marinasb3901d52012-03-05 11:49:28 +0000126void machine_restart(char *cmd)
127{
Catalin Marinasb3901d52012-03-05 11:49:28 +0000128 /* Disable interrupts first */
129 local_irq_disable();
Arun KSb9acc492014-05-07 02:41:23 +0100130 smp_send_stop();
Catalin Marinasb3901d52012-03-05 11:49:28 +0000131
Ard Biesheuvel60c0d452015-03-06 15:49:24 +0100132 /*
133 * UpdateCapsule() depends on the system being reset via
134 * ResetSystem().
135 */
136 if (efi_enabled(EFI_RUNTIME_SERVICES))
137 efi_reboot(reboot_mode, NULL);
138
Catalin Marinasb3901d52012-03-05 11:49:28 +0000139 /* Now call the architecture specific reboot code. */
Guenter Roeckab6cef12021-06-04 15:07:36 +0100140 do_kernel_restart(cmd);
Catalin Marinasb3901d52012-03-05 11:49:28 +0000141
142 /*
143 * Whoops - the architecture was unable to reboot.
144 */
145 printk("Reboot failed -- System halted\n");
146 while (1);
147}
148
Dave Martinec94a462020-03-16 16:50:48 +0000149#define bstr(suffix, str) [PSR_BTYPE_ ## suffix >> PSR_BTYPE_SHIFT] = str
150static const char *const btypes[] = {
151 bstr(NONE, "--"),
152 bstr( JC, "jc"),
153 bstr( C, "-c"),
154 bstr( J , "j-")
155};
156#undef bstr
157
Will Deaconb7300d42017-10-19 13:26:26 +0100158static void print_pstate(struct pt_regs *regs)
159{
160 u64 pstate = regs->pstate;
161
162 if (compat_user_mode(regs)) {
Lingyan Huangec63e302021-07-22 10:20:36 +0800163 printk("pstate: %08llx (%c%c%c%c %c %s %s %c%c%c %cDIT %cSSBS)\n",
Will Deaconb7300d42017-10-19 13:26:26 +0100164 pstate,
Mark Rutlandd64567f2018-07-05 15:16:52 +0100165 pstate & PSR_AA32_N_BIT ? 'N' : 'n',
166 pstate & PSR_AA32_Z_BIT ? 'Z' : 'z',
167 pstate & PSR_AA32_C_BIT ? 'C' : 'c',
168 pstate & PSR_AA32_V_BIT ? 'V' : 'v',
169 pstate & PSR_AA32_Q_BIT ? 'Q' : 'q',
170 pstate & PSR_AA32_T_BIT ? "T32" : "A32",
171 pstate & PSR_AA32_E_BIT ? "BE" : "LE",
172 pstate & PSR_AA32_A_BIT ? 'A' : 'a',
173 pstate & PSR_AA32_I_BIT ? 'I' : 'i',
Lingyan Huangec63e302021-07-22 10:20:36 +0800174 pstate & PSR_AA32_F_BIT ? 'F' : 'f',
175 pstate & PSR_AA32_DIT_BIT ? '+' : '-',
176 pstate & PSR_AA32_SSBS_BIT ? '+' : '-');
Will Deaconb7300d42017-10-19 13:26:26 +0100177 } else {
Dave Martinec94a462020-03-16 16:50:48 +0000178 const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >>
179 PSR_BTYPE_SHIFT];
180
Lingyan Huangec63e302021-07-22 10:20:36 +0800181 printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO %cTCO %cDIT %cSSBS BTYPE=%s)\n",
Will Deaconb7300d42017-10-19 13:26:26 +0100182 pstate,
183 pstate & PSR_N_BIT ? 'N' : 'n',
184 pstate & PSR_Z_BIT ? 'Z' : 'z',
185 pstate & PSR_C_BIT ? 'C' : 'c',
186 pstate & PSR_V_BIT ? 'V' : 'v',
187 pstate & PSR_D_BIT ? 'D' : 'd',
188 pstate & PSR_A_BIT ? 'A' : 'a',
189 pstate & PSR_I_BIT ? 'I' : 'i',
190 pstate & PSR_F_BIT ? 'F' : 'f',
191 pstate & PSR_PAN_BIT ? '+' : '-',
Dave Martinec94a462020-03-16 16:50:48 +0000192 pstate & PSR_UAO_BIT ? '+' : '-',
Vincenzo Frascino637ec832019-09-16 11:51:17 +0100193 pstate & PSR_TCO_BIT ? '+' : '-',
Lingyan Huangec63e302021-07-22 10:20:36 +0800194 pstate & PSR_DIT_BIT ? '+' : '-',
195 pstate & PSR_SSBS_BIT ? '+' : '-',
Dave Martinec94a462020-03-16 16:50:48 +0000196 btype_str);
Will Deaconb7300d42017-10-19 13:26:26 +0100197 }
198}
199
Catalin Marinasb3901d52012-03-05 11:49:28 +0000200void __show_regs(struct pt_regs *regs)
201{
Catalin Marinas6ca68e82013-09-17 18:49:46 +0100202 int i, top_reg;
203 u64 lr, sp;
204
205 if (compat_user_mode(regs)) {
206 lr = regs->compat_lr;
207 sp = regs->compat_sp;
208 top_reg = 12;
209 } else {
210 lr = regs->regs[30];
211 sp = regs->sp;
212 top_reg = 29;
213 }
Catalin Marinasb3901d52012-03-05 11:49:28 +0000214
Tejun Heoa43cb952013-04-30 15:27:17 -0700215 show_regs_print_info(KERN_DEFAULT);
Will Deaconb7300d42017-10-19 13:26:26 +0100216 print_pstate(regs);
Will Deacona06f8182018-02-19 16:46:57 +0000217
218 if (!user_mode(regs)) {
219 printk("pc : %pS\n", (void *)regs->pc);
Mark Rutlandca708592023-04-12 17:01:33 +0100220 printk("lr : %pS\n", (void *)ptrauth_strip_kernel_insn_pac(lr));
Will Deacona06f8182018-02-19 16:46:57 +0000221 } else {
222 printk("pc : %016llx\n", regs->pc);
223 printk("lr : %016llx\n", lr);
224 }
225
Will Deaconb7300d42017-10-19 13:26:26 +0100226 printk("sp : %016llx\n", sp);
Mark Rutlanddb4b0712016-10-20 12:23:16 +0100227
Julien Thierry133d0512019-01-31 14:58:46 +0000228 if (system_uses_irq_prio_masking())
229 printk("pmr_save: %08llx\n", regs->pmr_save);
230
Mark Rutlanddb4b0712016-10-20 12:23:16 +0100231 i = top_reg;
232
233 while (i >= 0) {
Matthew Wilcox (Oracle)0bca3ec2021-04-20 18:22:45 +0100234 printk("x%-2d: %016llx", i, regs->regs[i]);
Mark Rutlanddb4b0712016-10-20 12:23:16 +0100235
Matthew Wilcox (Oracle)0bca3ec2021-04-20 18:22:45 +0100236 while (i-- % 3)
237 pr_cont(" x%-2d: %016llx", i, regs->regs[i]);
Mark Rutlanddb4b0712016-10-20 12:23:16 +0100238
239 pr_cont("\n");
Catalin Marinasb3901d52012-03-05 11:49:28 +0000240 }
Catalin Marinasb3901d52012-03-05 11:49:28 +0000241}
242
Zhiyuan Daid9f1b522021-02-04 09:43:49 +0800243void show_regs(struct pt_regs *regs)
Catalin Marinasb3901d52012-03-05 11:49:28 +0000244{
Catalin Marinasb3901d52012-03-05 11:49:28 +0000245 __show_regs(regs);
Dmitry Safonovc7689832020-06-08 21:30:23 -0700246 dump_backtrace(regs, NULL, KERN_DEFAULT);
Catalin Marinasb3901d52012-03-05 11:49:28 +0000247}
248
Will Deaconeb35bdd72014-09-11 14:38:16 +0100249static void tls_thread_flush(void)
250{
Mark Rutlandadf75892016-09-08 13:55:38 +0100251 write_sysreg(0, tpidr_el0);
Mark Browna9d69152022-04-19 12:22:20 +0100252 if (system_supports_tpidr2())
253 write_sysreg_s(0, SYS_TPIDR2_EL0);
Will Deaconeb35bdd72014-09-11 14:38:16 +0100254
255 if (is_compat_task()) {
Dave Martin65896542018-03-28 10:50:49 +0100256 current->thread.uw.tp_value = 0;
Will Deaconeb35bdd72014-09-11 14:38:16 +0100257
258 /*
259 * We need to ensure ordering between the shadow state and the
260 * hardware state, so that we don't corrupt the hardware state
261 * with a stale shadow state during context switch.
262 */
263 barrier();
Mark Rutlandadf75892016-09-08 13:55:38 +0100264 write_sysreg(0, tpidrro_el0);
Will Deaconeb35bdd72014-09-11 14:38:16 +0100265 }
266}
267
Catalin Marinas63f0c602019-07-23 19:58:39 +0200268static void flush_tagged_addr_state(void)
269{
270 if (IS_ENABLED(CONFIG_ARM64_TAGGED_ADDR_ABI))
271 clear_thread_flag(TIF_TAGGED_ADDR);
272}
273
Catalin Marinasb3901d52012-03-05 11:49:28 +0000274void flush_thread(void)
275{
276 fpsimd_flush_thread();
Will Deaconeb35bdd72014-09-11 14:38:16 +0100277 tls_thread_flush();
Catalin Marinasb3901d52012-03-05 11:49:28 +0000278 flush_ptrace_hw_breakpoint(current);
Catalin Marinas63f0c602019-07-23 19:58:39 +0200279 flush_tagged_addr_state();
Catalin Marinasb3901d52012-03-05 11:49:28 +0000280}
281
Dave Martinbc0ee472017-10-31 15:51:05 +0000282void arch_release_task_struct(struct task_struct *tsk)
283{
284 fpsimd_release_task(tsk);
285}
286
Catalin Marinasb3901d52012-03-05 11:49:28 +0000287int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
288{
Janet Liu6eb6c802015-06-11 12:04:32 +0800289 if (current->mm)
290 fpsimd_preserve_current_state();
Catalin Marinasb3901d52012-03-05 11:49:28 +0000291 *dst = *src;
Dave Martinbc0ee472017-10-31 15:51:05 +0000292
Masayoshi Mizuma4585fc52019-09-30 16:56:00 -0400293 /* We rely on the above assignment to initialize dst's thread_flags: */
294 BUILD_BUG_ON(!IS_ENABLED(CONFIG_THREAD_INFO_IN_TASK));
295
296 /*
297 * Detach src's sve_state (if any) from dst so that it does not
Mark Brown8bd7f912022-04-19 12:22:24 +0100298 * get erroneously used or freed prematurely. dst's copies
Masayoshi Mizuma4585fc52019-09-30 16:56:00 -0400299 * will be allocated on demand later on if dst uses SVE.
300 * For consistency, also clear TIF_SVE here: this could be done
301 * later in copy_process(), but to avoid tripping up future
Mark Brown8bd7f912022-04-19 12:22:24 +0100302 * maintainers it is best not to leave TIF flags and buffers in
Masayoshi Mizuma4585fc52019-09-30 16:56:00 -0400303 * an inconsistent state, even temporarily.
304 */
305 dst->thread.sve_state = NULL;
306 clear_tsk_thread_flag(dst, TIF_SVE);
307
Mark Brown8bd7f912022-04-19 12:22:24 +0100308 /*
309 * In the unlikely event that we create a new thread with ZA
Mark Brownd6138b42023-01-16 16:04:44 +0000310 * enabled we should retain the ZA and ZT state so duplicate
311 * it here. This may be shortly freed if we exec() or if
312 * CLONE_SETTLS but it's simpler to do it here. To avoid
313 * confusing the rest of the code ensure that we have a
314 * sve_state allocated whenever sme_state is allocated.
Mark Brown8bd7f912022-04-19 12:22:24 +0100315 */
316 if (thread_za_enabled(&src->thread)) {
317 dst->thread.sve_state = kzalloc(sve_state_size(src),
318 GFP_KERNEL);
Wan Jiabing2e29b992022-04-26 19:30:53 +0800319 if (!dst->thread.sve_state)
Mark Brown8bd7f912022-04-19 12:22:24 +0100320 return -ENOMEM;
Mark Brownce514002023-01-16 16:04:36 +0000321
322 dst->thread.sme_state = kmemdup(src->thread.sme_state,
323 sme_state_size(src),
324 GFP_KERNEL);
325 if (!dst->thread.sme_state) {
Mark Brown8bd7f912022-04-19 12:22:24 +0100326 kfree(dst->thread.sve_state);
327 dst->thread.sve_state = NULL;
328 return -ENOMEM;
329 }
330 } else {
Mark Brownce514002023-01-16 16:04:36 +0000331 dst->thread.sme_state = NULL;
Mark Brown8bd7f912022-04-19 12:22:24 +0100332 clear_tsk_thread_flag(dst, TIF_SME);
333 }
Mark Brownb40c5592022-04-19 12:22:21 +0100334
Mark Brownbaa85152022-11-15 09:46:34 +0000335 dst->thread.fp_type = FP_STATE_FPSIMD;
336
Vincenzo Frascino637ec832019-09-16 11:51:17 +0100337 /* clear any pending asynchronous tag fault raised by the parent */
338 clear_tsk_thread_flag(dst, TIF_MTE_ASYNC_FAULT);
339
Catalin Marinasb3901d52012-03-05 11:49:28 +0000340 return 0;
341}
342
343asmlinkage void ret_from_fork(void) asm("ret_from_fork");
344
Eric W. Biedermanc5febea2022-04-08 18:07:50 -0500345int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
Catalin Marinasb3901d52012-03-05 11:49:28 +0000346{
Eric W. Biedermanc5febea2022-04-08 18:07:50 -0500347 unsigned long clone_flags = args->flags;
348 unsigned long stack_start = args->stack;
Eric W. Biedermanc5febea2022-04-08 18:07:50 -0500349 unsigned long tls = args->tls;
Catalin Marinasb3901d52012-03-05 11:49:28 +0000350 struct pt_regs *childregs = task_pt_regs(p);
Catalin Marinasb3901d52012-03-05 11:49:28 +0000351
Catalin Marinasb3901d52012-03-05 11:49:28 +0000352 memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
Catalin Marinasb3901d52012-03-05 11:49:28 +0000353
Dave Martinbc0ee472017-10-31 15:51:05 +0000354 /*
Dave Martin071b6d42017-12-05 14:56:42 +0000355 * In case p was allocated the same task_struct pointer as some
356 * other recently-exited task, make sure p is disassociated from
357 * any cpu that may have run that now-exited task recently.
358 * Otherwise we could erroneously skip reloading the FPSIMD
359 * registers for p.
360 */
361 fpsimd_flush_task_state(p);
362
Kristina Martsenko33e45232020-03-13 14:34:56 +0530363 ptrauth_thread_init_kernel(p);
364
Eric W. Biederman5bd2e972022-04-12 10:18:48 -0500365 if (likely(!args->fn)) {
Al Viro9ac08002012-10-21 15:56:52 -0400366 *childregs = *current_pt_regs();
Catalin Marinasc34501d2012-10-05 12:31:20 +0100367 childregs->regs[0] = 0;
Will Deacond00a3812015-05-27 15:39:40 +0100368
369 /*
370 * Read the current TLS pointer from tpidr_el0 as it may be
371 * out-of-sync with the saved value.
372 */
Mark Rutlandadf75892016-09-08 13:55:38 +0100373 *task_user_tls(p) = read_sysreg(tpidr_el0);
Mark Browna9d69152022-04-19 12:22:20 +0100374 if (system_supports_tpidr2())
375 p->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
Will Deacond00a3812015-05-27 15:39:40 +0100376
377 if (stack_start) {
378 if (is_compat_thread(task_thread_info(p)))
Al Viroe0fd18c2012-10-18 00:55:54 -0400379 childregs->compat_sp = stack_start;
Will Deacond00a3812015-05-27 15:39:40 +0100380 else
Al Viroe0fd18c2012-10-18 00:55:54 -0400381 childregs->sp = stack_start;
Catalin Marinasc34501d2012-10-05 12:31:20 +0100382 }
Will Deacond00a3812015-05-27 15:39:40 +0100383
Catalin Marinasc34501d2012-10-05 12:31:20 +0100384 /*
Amanieu d'Antrasa4376f22020-01-02 18:24:08 +0100385 * If a TLS pointer was passed to clone, use it for the new
Mark Browna9d69152022-04-19 12:22:20 +0100386 * thread. We also reset TPIDR2 if it's in use.
Catalin Marinasc34501d2012-10-05 12:31:20 +0100387 */
Mark Browna9d69152022-04-19 12:22:20 +0100388 if (clone_flags & CLONE_SETTLS) {
Amanieu d'Antrasa4376f22020-01-02 18:24:08 +0100389 p->thread.uw.tp_value = tls;
Mark Browna9d69152022-04-19 12:22:20 +0100390 p->thread.tpidr2_el0 = 0;
391 }
Catalin Marinasc34501d2012-10-05 12:31:20 +0100392 } else {
Mark Rutlandf80d0342020-11-13 12:49:21 +0000393 /*
394 * A kthread has no context to ERET to, so ensure any buggy
395 * ERET is treated as an illegal exception return.
396 *
397 * When a user task is created from a kthread, childregs will
398 * be initialized by start_thread() or start_compat_thread().
399 */
Catalin Marinasc34501d2012-10-05 12:31:20 +0100400 memset(childregs, 0, sizeof(struct pt_regs));
Mark Rutlandf80d0342020-11-13 12:49:21 +0000401 childregs->pstate = PSR_MODE_EL1h | PSR_IL_BIT;
Julien Thierry133d0512019-01-31 14:58:46 +0000402
Eric W. Biederman5bd2e972022-04-12 10:18:48 -0500403 p->thread.cpu_context.x19 = (unsigned long)args->fn;
404 p->thread.cpu_context.x20 = (unsigned long)args->fn_arg;
Catalin Marinasc34501d2012-10-05 12:31:20 +0100405 }
406 p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
407 p->thread.cpu_context.sp = (unsigned long)childregs;
Madhavan T. Venkataraman7d7b7202021-05-10 12:00:26 +0100408 /*
409 * For the benefit of the unwinder, set up childregs->stackframe
410 * as the final frame for the new task.
411 */
412 p->thread.cpu_context.fp = (unsigned long)childregs->stackframe;
Catalin Marinasb3901d52012-03-05 11:49:28 +0000413
414 ptrace_hw_copy_thread(p);
415
416 return 0;
417}
418
Dave Martin936eb652017-06-21 16:00:44 +0100419void tls_preserve_current_state(void)
420{
421 *task_user_tls(current) = read_sysreg(tpidr_el0);
Mark Browna9d69152022-04-19 12:22:20 +0100422 if (system_supports_tpidr2() && !is_compat_task())
423 current->thread.tpidr2_el0 = read_sysreg_s(SYS_TPIDR2_EL0);
Dave Martin936eb652017-06-21 16:00:44 +0100424}
425
Catalin Marinasb3901d52012-03-05 11:49:28 +0000426static void tls_thread_switch(struct task_struct *next)
427{
Dave Martin936eb652017-06-21 16:00:44 +0100428 tls_preserve_current_state();
Catalin Marinasb3901d52012-03-05 11:49:28 +0000429
Will Deacon18011ea2017-11-14 14:33:28 +0000430 if (is_compat_thread(task_thread_info(next)))
Dave Martin65896542018-03-28 10:50:49 +0100431 write_sysreg(next->thread.uw.tp_value, tpidrro_el0);
Will Deacon18011ea2017-11-14 14:33:28 +0000432 else if (!arm64_kernel_unmapped_at_el0())
433 write_sysreg(0, tpidrro_el0);
Catalin Marinasb3901d52012-03-05 11:49:28 +0000434
Will Deacon18011ea2017-11-14 14:33:28 +0000435 write_sysreg(*task_user_tls(next), tpidr_el0);
Mark Browna9d69152022-04-19 12:22:20 +0100436 if (system_supports_tpidr2())
437 write_sysreg_s(next->thread.tpidr2_el0, SYS_TPIDR2_EL0);
Catalin Marinasb3901d52012-03-05 11:49:28 +0000438}
439
440/*
Marc Zyngiercbdf8a12019-07-22 14:53:09 +0100441 * Force SSBS state on context-switch, since it may be lost after migrating
442 * from a CPU which treats the bit as RES0 in a heterogeneous system.
443 */
444static void ssbs_thread_switch(struct task_struct *next)
445{
Marc Zyngiercbdf8a12019-07-22 14:53:09 +0100446 /*
447 * Nothing to do for kernel threads, but 'regs' may be junk
448 * (e.g. idle task) so check the flags and bail early.
449 */
450 if (unlikely(next->flags & PF_KTHREAD))
451 return;
452
Will Deaconfca3d332020-02-06 10:42:58 +0000453 /*
454 * If all CPUs implement the SSBS extension, then we just need to
455 * context-switch the PSTATE field.
456 */
Mark Rutlandbc75d0c2023-10-16 11:24:50 +0100457 if (alternative_has_cap_unlikely(ARM64_SSBS))
Will Deaconfca3d332020-02-06 10:42:58 +0000458 return;
459
Will Deaconc2876202020-09-18 11:54:33 +0100460 spectre_v4_enable_task_mitigation(next);
Marc Zyngiercbdf8a12019-07-22 14:53:09 +0100461}
462
463/*
Mark Rutlandc02433d2016-11-03 20:23:13 +0000464 * We store our current task in sp_el0, which is clobbered by userspace. Keep a
465 * shadow copy so that we can restore this upon entry from userspace.
466 *
467 * This is *only* for exception entry from EL0, and is not valid until we
468 * __switch_to() a user task.
469 */
470DEFINE_PER_CPU(struct task_struct *, __entry_task);
471
472static void entry_task_switch(struct task_struct *next)
473{
474 __this_cpu_write(__entry_task, next);
475}
476
477/*
Marc Zyngierd49f7d72020-07-31 18:38:23 +0100478 * ARM erratum 1418040 handling, affecting the 32bit view of CNTVCT.
D Scott Phillips38e02572021-12-20 15:41:14 -0800479 * Ensure access is disabled when switching to a 32bit task, ensure
480 * access is enabled when switching to a 64bit task.
Marc Zyngierd49f7d72020-07-31 18:38:23 +0100481 */
D Scott Phillips38e02572021-12-20 15:41:14 -0800482static void erratum_1418040_thread_switch(struct task_struct *next)
Marc Zyngierd49f7d72020-07-31 18:38:23 +0100483{
D Scott Phillips38e02572021-12-20 15:41:14 -0800484 if (!IS_ENABLED(CONFIG_ARM64_ERRATUM_1418040) ||
485 !this_cpu_has_cap(ARM64_WORKAROUND_1418040))
Marc Zyngierd49f7d72020-07-31 18:38:23 +0100486 return;
487
D Scott Phillips38e02572021-12-20 15:41:14 -0800488 if (is_compat_thread(task_thread_info(next)))
489 sysreg_clear_set(cntkctl_el1, ARCH_TIMER_USR_VCT_ACCESS_EN, 0);
Marc Zyngierd49f7d72020-07-31 18:38:23 +0100490 else
D Scott Phillips38e02572021-12-20 15:41:14 -0800491 sysreg_clear_set(cntkctl_el1, 0, ARCH_TIMER_USR_VCT_ACCESS_EN);
492}
Marc Zyngierd49f7d72020-07-31 18:38:23 +0100493
D Scott Phillips38e02572021-12-20 15:41:14 -0800494static void erratum_1418040_new_exec(void)
495{
496 preempt_disable();
497 erratum_1418040_thread_switch(current);
498 preempt_enable();
Marc Zyngierd49f7d72020-07-31 18:38:23 +0100499}
500
Peter Collingbourned2e0d8f2021-07-27 13:52:57 -0700501/*
502 * __switch_to() checks current->thread.sctlr_user as an optimisation. Therefore
503 * this function must be called with preemption disabled and the update to
504 * sctlr_user must be made in the same preemption disabled block so that
505 * __switch_to() does not see the variable update before the SCTLR_EL1 one.
506 */
507void update_sctlr_el1(u64 sctlr)
Peter Collingbourne2f79d2f2021-03-18 20:10:52 -0700508{
Peter Collingbourne20169862021-03-18 20:10:53 -0700509 /*
510 * EnIA must not be cleared while in the kernel as this is necessary for
511 * in-kernel PAC. It will be cleared on kernel exit if needed.
512 */
513 sysreg_clear_set(sctlr_el1, SCTLR_USER_MASK & ~SCTLR_ELx_ENIA, sctlr);
Peter Collingbourne2f79d2f2021-03-18 20:10:52 -0700514
515 /* ISB required for the kernel uaccess routines when setting TCF0. */
516 isb();
517}
518
Marc Zyngierd49f7d72020-07-31 18:38:23 +0100519/*
Catalin Marinasb3901d52012-03-05 11:49:28 +0000520 * Thread switching.
521 */
Mark Rutland86bcbaf2021-11-29 14:28:43 +0000522__notrace_funcgraph __sched
523struct task_struct *__switch_to(struct task_struct *prev,
Catalin Marinasb3901d52012-03-05 11:49:28 +0000524 struct task_struct *next)
525{
526 struct task_struct *last;
527
528 fpsimd_thread_switch(next);
529 tls_thread_switch(next);
530 hw_breakpoint_thread_switch(next);
Christopher Covington33257322013-04-03 19:01:01 +0100531 contextidr_thread_switch(next);
Mark Rutlandc02433d2016-11-03 20:23:13 +0000532 entry_task_switch(next);
Marc Zyngiercbdf8a12019-07-22 14:53:09 +0100533 ssbs_thread_switch(next);
D Scott Phillips38e02572021-12-20 15:41:14 -0800534 erratum_1418040_thread_switch(next);
Peter Collingbourneb90e4832021-03-18 20:10:54 -0700535 ptrauth_thread_switch_user(next);
Catalin Marinasb3901d52012-03-05 11:49:28 +0000536
Catalin Marinas5108c672013-04-24 14:47:02 +0100537 /*
538 * Complete any pending TLB or cache maintenance on this CPU in case
539 * the thread migrates to a different CPU.
Mathieu Desnoyers22e4ebb2017-07-28 16:40:40 -0400540 * This full barrier is also required by the membarrier system
541 * call.
Catalin Marinas5108c672013-04-24 14:47:02 +0100542 */
Will Deacon98f76852014-05-02 16:24:10 +0100543 dsb(ish);
Catalin Marinasb3901d52012-03-05 11:49:28 +0000544
Catalin Marinas1c101da2019-11-27 10:30:15 +0000545 /*
546 * MTE thread switching must happen after the DSB above to ensure that
547 * any asynchronous tag check faults have been logged in the TFSR*_EL1
548 * registers.
549 */
550 mte_thread_switch(next);
Peter Collingbourne2f79d2f2021-03-18 20:10:52 -0700551 /* avoid expensive SCTLR_EL1 accesses if no change */
552 if (prev->thread.sctlr_user != next->thread.sctlr_user)
553 update_sctlr_el1(next->thread.sctlr_user);
Catalin Marinas1c101da2019-11-27 10:30:15 +0000554
Catalin Marinasb3901d52012-03-05 11:49:28 +0000555 /* the actual thread switch */
556 last = cpu_switch_to(prev, next);
557
558 return last;
559}
560
Madhavan T. Venkataraman4f62bb72021-11-29 14:28:45 +0000561struct wchan_info {
562 unsigned long pc;
563 int count;
564};
565
566static bool get_wchan_cb(void *arg, unsigned long pc)
567{
568 struct wchan_info *wchan_info = arg;
569
570 if (!in_sched_functions(pc)) {
571 wchan_info->pc = pc;
572 return false;
573 }
574 return wchan_info->count++ < 16;
575}
576
Kees Cook42a20f82021-09-29 15:02:14 -0700577unsigned long __get_wchan(struct task_struct *p)
Catalin Marinasb3901d52012-03-05 11:49:28 +0000578{
Madhavan T. Venkataraman4f62bb72021-11-29 14:28:45 +0000579 struct wchan_info wchan_info = {
580 .pc = 0,
581 .count = 0,
582 };
Catalin Marinasb3901d52012-03-05 11:49:28 +0000583
Madhavan T. Venkataraman4f62bb72021-11-29 14:28:45 +0000584 if (!try_get_task_stack(p))
Mark Rutland9bbd4c52016-11-03 20:23:08 +0000585 return 0;
586
Madhavan T. Venkataraman4f62bb72021-11-29 14:28:45 +0000587 arch_stack_walk(get_wchan_cb, &wchan_info, p, NULL);
Dave Martinf3dcbe62019-07-02 14:07:28 +0100588
Mark Rutland9bbd4c52016-11-03 20:23:08 +0000589 put_task_stack(p);
Madhavan T. Venkataraman4f62bb72021-11-29 14:28:45 +0000590
591 return wchan_info.pc;
Catalin Marinasb3901d52012-03-05 11:49:28 +0000592}
593
594unsigned long arch_align_stack(unsigned long sp)
595{
596 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
Jason A. Donenfeld8032bf12022-10-09 20:44:02 -0600597 sp -= get_random_u32_below(PAGE_SIZE);
Catalin Marinasb3901d52012-03-05 11:49:28 +0000598 return sp & ~0xf;
599}
600
Will Deacon08cd8f42021-07-30 12:24:38 +0100601#ifdef CONFIG_COMPAT
602int compat_elf_check_arch(const struct elf32_hdr *hdr)
603{
604 if (!system_supports_32bit_el0())
605 return false;
606
607 if ((hdr)->e_machine != EM_ARM)
608 return false;
609
610 if (!((hdr)->e_flags & EF_ARM_EABI_MASK))
611 return false;
612
613 /*
614 * Prevent execve() of a 32-bit program from a deadline task
615 * if the restricted affinity mask would be inadmissible on an
616 * asymmetric system.
617 */
618 return !static_branch_unlikely(&arm64_mismatched_32bit_el0) ||
619 !dl_task_check_affinity(current, system_32bit_el0_cpumask());
620}
621#endif
622
Yury Norovd1be5c92017-08-20 13:20:48 +0300623/*
624 * Called from setup_new_exec() after (COMPAT_)SET_PERSONALITY.
625 */
626void arch_setup_new_exec(void)
627{
Will Deacon873c3e82021-06-08 19:02:57 +0100628 unsigned long mmflags = 0;
Mark Rutland75031972018-12-07 18:39:25 +0000629
Will Deacon873c3e82021-06-08 19:02:57 +0100630 if (is_compat_task()) {
631 mmflags = MMCF_AARCH32;
Will Deacon08cd8f42021-07-30 12:24:38 +0100632
633 /*
634 * Restrict the CPU affinity mask for a 32-bit task so that
635 * it contains only 32-bit-capable CPUs.
636 *
637 * From the perspective of the task, this looks similar to
638 * what would happen if the 64-bit-only CPUs were hot-unplugged
639 * at the point of execve(), although we try a bit harder to
640 * honour the cpuset hierarchy.
641 */
Will Deacon873c3e82021-06-08 19:02:57 +0100642 if (static_branch_unlikely(&arm64_mismatched_32bit_el0))
Will Deacon08cd8f42021-07-30 12:24:38 +0100643 force_compatible_cpus_allowed_ptr(current);
Will Deacon08cd8f42021-07-30 12:24:38 +0100644 } else if (static_branch_unlikely(&arm64_mismatched_32bit_el0)) {
645 relax_compatible_cpus_allowed_ptr(current);
Will Deacon873c3e82021-06-08 19:02:57 +0100646 }
647
648 current->mm->context.flags = mmflags;
Peter Collingbourne20169862021-03-18 20:10:53 -0700649 ptrauth_thread_init_user();
650 mte_thread_init_user();
D Scott Phillips38e02572021-12-20 15:41:14 -0800651 erratum_1418040_new_exec();
Will Deacon780c0832020-09-28 14:03:00 +0100652
653 if (task_spec_ssb_noexec(current)) {
654 arch_prctl_spec_ctrl_set(current, PR_SPEC_STORE_BYPASS,
655 PR_SPEC_ENABLE);
656 }
Yury Norovd1be5c92017-08-20 13:20:48 +0300657}
Catalin Marinas63f0c602019-07-23 19:58:39 +0200658
659#ifdef CONFIG_ARM64_TAGGED_ADDR_ABI
660/*
661 * Control the relaxed ABI allowing tagged user addresses into the kernel.
662 */
Catalin Marinas413235f2019-08-15 16:44:01 +0100663static unsigned int tagged_addr_disabled;
Catalin Marinas63f0c602019-07-23 19:58:39 +0200664
Catalin Marinas93f067f2020-07-03 14:25:50 +0100665long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg)
Catalin Marinas63f0c602019-07-23 19:58:39 +0200666{
Catalin Marinas1c101da2019-11-27 10:30:15 +0000667 unsigned long valid_mask = PR_TAGGED_ADDR_ENABLE;
Catalin Marinas93f067f2020-07-03 14:25:50 +0100668 struct thread_info *ti = task_thread_info(task);
Catalin Marinas1c101da2019-11-27 10:30:15 +0000669
Catalin Marinas93f067f2020-07-03 14:25:50 +0100670 if (is_compat_thread(ti))
Catalin Marinas63f0c602019-07-23 19:58:39 +0200671 return -EINVAL;
Catalin Marinas1c101da2019-11-27 10:30:15 +0000672
673 if (system_supports_mte())
Mark Brown766121b2022-02-16 17:32:24 +0000674 valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \
675 | PR_MTE_TAG_MASK;
Catalin Marinas1c101da2019-11-27 10:30:15 +0000676
677 if (arg & ~valid_mask)
Catalin Marinas63f0c602019-07-23 19:58:39 +0200678 return -EINVAL;
679
Catalin Marinas413235f2019-08-15 16:44:01 +0100680 /*
681 * Do not allow the enabling of the tagged address ABI if globally
682 * disabled via sysctl abi.tagged_addr_disabled.
683 */
684 if (arg & PR_TAGGED_ADDR_ENABLE && tagged_addr_disabled)
685 return -EINVAL;
686
Catalin Marinas93f067f2020-07-03 14:25:50 +0100687 if (set_mte_ctrl(task, arg) != 0)
Catalin Marinas1c101da2019-11-27 10:30:15 +0000688 return -EINVAL;
689
Catalin Marinas93f067f2020-07-03 14:25:50 +0100690 update_ti_thread_flag(ti, TIF_TAGGED_ADDR, arg & PR_TAGGED_ADDR_ENABLE);
Catalin Marinas63f0c602019-07-23 19:58:39 +0200691
692 return 0;
693}
694
Catalin Marinas93f067f2020-07-03 14:25:50 +0100695long get_tagged_addr_ctrl(struct task_struct *task)
Catalin Marinas63f0c602019-07-23 19:58:39 +0200696{
Catalin Marinas1c101da2019-11-27 10:30:15 +0000697 long ret = 0;
Catalin Marinas93f067f2020-07-03 14:25:50 +0100698 struct thread_info *ti = task_thread_info(task);
Catalin Marinas1c101da2019-11-27 10:30:15 +0000699
Catalin Marinas93f067f2020-07-03 14:25:50 +0100700 if (is_compat_thread(ti))
Catalin Marinas63f0c602019-07-23 19:58:39 +0200701 return -EINVAL;
702
Catalin Marinas93f067f2020-07-03 14:25:50 +0100703 if (test_ti_thread_flag(ti, TIF_TAGGED_ADDR))
Catalin Marinas1c101da2019-11-27 10:30:15 +0000704 ret = PR_TAGGED_ADDR_ENABLE;
Catalin Marinas63f0c602019-07-23 19:58:39 +0200705
Catalin Marinas93f067f2020-07-03 14:25:50 +0100706 ret |= get_mte_ctrl(task);
Catalin Marinas1c101da2019-11-27 10:30:15 +0000707
708 return ret;
Catalin Marinas63f0c602019-07-23 19:58:39 +0200709}
710
711/*
712 * Global sysctl to disable the tagged user addresses support. This control
713 * only prevents the tagged address ABI enabling via prctl() and does not
714 * disable it for tasks that already opted in to the relaxed ABI.
715 */
Catalin Marinas63f0c602019-07-23 19:58:39 +0200716
717static struct ctl_table tagged_addr_sysctl_table[] = {
718 {
Catalin Marinas413235f2019-08-15 16:44:01 +0100719 .procname = "tagged_addr_disabled",
Catalin Marinas63f0c602019-07-23 19:58:39 +0200720 .mode = 0644,
Catalin Marinas413235f2019-08-15 16:44:01 +0100721 .data = &tagged_addr_disabled,
Catalin Marinas63f0c602019-07-23 19:58:39 +0200722 .maxlen = sizeof(int),
723 .proc_handler = proc_dointvec_minmax,
Matteo Croce2c614c12020-01-24 16:51:27 +0100724 .extra1 = SYSCTL_ZERO,
725 .extra2 = SYSCTL_ONE,
Catalin Marinas63f0c602019-07-23 19:58:39 +0200726 },
Catalin Marinas63f0c602019-07-23 19:58:39 +0200727};
728
729static int __init tagged_addr_init(void)
730{
731 if (!register_sysctl("abi", tagged_addr_sysctl_table))
732 return -EINVAL;
733 return 0;
734}
735
736core_initcall(tagged_addr_init);
737#endif /* CONFIG_ARM64_TAGGED_ADDR_ABI */
Julien Thierry19c95f22019-10-15 18:25:44 +0100738
Dave Martinab7876a2020-03-16 16:50:47 +0000739#ifdef CONFIG_BINFMT_ELF
740int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state,
741 bool has_interp, bool is_interp)
742{
Mark Brown5d1b6312020-03-23 17:01:19 +0000743 /*
744 * For dynamically linked executables the interpreter is
745 * responsible for setting PROT_BTI on everything except
746 * itself.
747 */
Dave Martinab7876a2020-03-16 16:50:47 +0000748 if (is_interp != has_interp)
749 return prot;
750
751 if (!(state->flags & ARM64_ELF_BTI))
752 return prot;
753
754 if (prot & PROT_EXEC)
755 prot |= PROT_BTI;
756
757 return prot;
758}
759#endif