sh: switch to generic kernel_thread()/kernel_execve()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile
index 88571ff..f259b37 100644
--- a/arch/sh/kernel/Makefile
+++ b/arch/sh/kernel/Makefile
@@ -16,7 +16,7 @@
 	   machvec.o nmi_debug.o process.o				\
 	   process_$(BITS).o ptrace.o ptrace_$(BITS).o			\
 	   reboot.o return_address.o					\
-	   setup.o signal_$(BITS).o sys_sh.o sys_sh$(BITS).o		\
+	   setup.o signal_$(BITS).o sys_sh.o 				\
 	   syscalls_$(BITS).o time.o topology.o traps.o			\
 	   traps_$(BITS).o unwinder.o
 
@@ -25,6 +25,7 @@
 obj-$(CONFIG_HAS_IOPORT)	+= ioport.o
 endif
 
+obj-$(CONFIG_SUPERH32)		+= sys_sh32.o
 obj-y				+= cpu/
 obj-$(CONFIG_VSYSCALL)		+= vsyscall/
 obj-$(CONFIG_SMP)		+= smp.o
diff --git a/arch/sh/kernel/cpu/sh5/entry.S b/arch/sh/kernel/cpu/sh5/entry.S
index 7e605b9..0c8d037 100644
--- a/arch/sh/kernel/cpu/sh5/entry.S
+++ b/arch/sh/kernel/cpu/sh5/entry.S
@@ -1228,6 +1228,25 @@
 	pta	ret_from_syscall, tr0
 	blink	tr0, ZERO
 
+.global	ret_from_kernel_thread
+ret_from_kernel_thread:
+
+	movi	schedule_tail,r5
+	ori	r5, 1, r5
+	ptabs	r5, tr0
+	blink	tr0, LINK
+
+	ld.q	SP, FRAME_R(2), r2
+	ld.q	SP, FRAME_R(3), r3
+	ptabs	r3, tr0
+	blink	tr0, LINK
+
+	ld.q	SP, FRAME_S(FSPC), r2
+	addi	r2, 4, r2		/* Move PC, being pre-execution event */
+	st.q	SP, FRAME_S(FSPC), r2
+	pta	ret_from_syscall, tr0
+	blink	tr0, ZERO
+
 syscall_allowed:
 	/* Use LINK to deflect the exit point, default is syscall_ret */
 	pta	syscall_ret, tr0
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index b96489d..9b6e4be 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -297,6 +297,19 @@
 	 mov	r0, r4
 	bra	syscall_exit
 	 nop
+
+	.align	2
+	.globl	ret_from_kernel_thread
+ret_from_kernel_thread:
+	mov.l	1f, r8
+	jsr	@r8
+	 mov	r0, r4
+	mov.l	@(OFF_R5,r15), r5   ! fn
+	jsr	@r5
+	 mov.l	@(OFF_R4,r15), r4   ! arg
+	bra	syscall_exit
+	 nop
+
 	.align	2
 1:	.long	schedule_tail
 
diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
index ba7345f..b55070b 100644
--- a/arch/sh/kernel/process_32.c
+++ b/arch/sh/kernel/process_32.c
@@ -68,38 +68,6 @@
 	show_code(regs);
 }
 
-/*
- * Create a kernel thread
- */
-__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *))
-{
-	do_exit(fn(arg));
-}
-
-/* Don't use this in BL=1(cli).  Or else, CPU resets! */
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	struct pt_regs regs;
-	int pid;
-
-	memset(&regs, 0, sizeof(regs));
-	regs.regs[4] = (unsigned long)arg;
-	regs.regs[5] = (unsigned long)fn;
-
-	regs.pc = (unsigned long)kernel_thread_helper;
-	regs.sr = SR_MD;
-#if defined(CONFIG_SH_FPU)
-	regs.sr |= SR_FD;
-#endif
-
-	/* Ok, create the new process.. */
-	pid = do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
-		      &regs, 0, NULL, NULL);
-
-	return pid;
-}
-EXPORT_SYMBOL(kernel_thread);
-
 void start_thread(struct pt_regs *regs, unsigned long new_pc,
 		  unsigned long new_sp)
 {
@@ -157,9 +125,10 @@
 EXPORT_SYMBOL(dump_fpu);
 
 asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_kernel_thread(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long unused,
+		unsigned long arg,
 		struct task_struct *p, struct pt_regs *regs)
 {
 	struct thread_info *ti = task_thread_info(p);
@@ -177,29 +146,34 @@
 	}
 #endif
 
-	childregs = task_pt_regs(p);
-	*childregs = *regs;
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
 
-	if (user_mode(regs)) {
-		childregs->regs[15] = usp;
-		ti->addr_limit = USER_DS;
-	} else {
-		childregs->regs[15] = (unsigned long)childregs;
+	childregs = task_pt_regs(p);
+	p->thread.sp = (unsigned long) childregs;
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		p->thread.pc = (unsigned long) ret_from_kernel_thread;
+		childregs->regs[4] = arg;
+		childregs->regs[5] = usp;
+		childregs->sr = SR_MD;
+#if defined(CONFIG_SH_FPU)
+		childregs->sr |= SR_FD;
+#endif
 		ti->addr_limit = KERNEL_DS;
 		ti->status &= ~TS_USEDFPU;
 		p->fpu_counter = 0;
+		return 0;
 	}
+	*childregs = *regs;
+
+	childregs->regs[15] = usp;
+	ti->addr_limit = USER_DS;
 
 	if (clone_flags & CLONE_SETTLS)
 		childregs->gbr = childregs->regs[0];
 
 	childregs->regs[0] = 0; /* Set return value for child */
-
-	p->thread.sp = (unsigned long) childregs;
 	p->thread.pc = (unsigned long) ret_from_fork;
-
-	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
-
 	return 0;
 }
 
diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c
index 98a709f..fd338b0 100644
--- a/arch/sh/kernel/process_64.c
+++ b/arch/sh/kernel/process_64.c
@@ -285,39 +285,6 @@
 }
 
 /*
- * Create a kernel thread
- */
-__noreturn void kernel_thread_helper(void *arg, int (*fn)(void *))
-{
-	do_exit(fn(arg));
-}
-
-/*
- * This is the mechanism for creating a new kernel thread.
- *
- * NOTE! Only a kernel-only process(ie the swapper or direct descendants
- * who haven't done an "execve()") should use this: it will work within
- * a system call from a "real" process, but the process memory space will
- * not be freed until both the parent and the child have exited.
- */
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-	regs.regs[2] = (unsigned long)arg;
-	regs.regs[3] = (unsigned long)fn;
-
-	regs.pc = (unsigned long)kernel_thread_helper;
-	regs.sr = (1 << 30);
-
-	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0,
-		      &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
-/*
  * Free current thread data structures etc..
  */
 void exit_thread(void)
@@ -401,15 +368,17 @@
 EXPORT_SYMBOL(dump_fpu);
 
 asmlinkage void ret_from_fork(void);
+asmlinkage void ret_from_kernel_thread(void);
 
 int copy_thread(unsigned long clone_flags, unsigned long usp,
-		unsigned long unused,
+		unsigned long arg,
 		struct task_struct *p, struct pt_regs *regs)
 {
 	struct pt_regs *childregs;
 
 #ifdef CONFIG_SH_FPU
-	if(last_task_used_math == current) {
+	/* can't happen for a kernel thread */
+	if (last_task_used_math == current) {
 		enable_fpu();
 		save_fpu(current);
 		disable_fpu();
@@ -419,7 +388,17 @@
 #endif
 	/* Copy from sh version */
 	childregs = (struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1;
+	p->thread.sp = (unsigned long) childregs;
 
+	if (unlikely(p->flags & PF_KTHREAD)) {
+		memset(childregs, 0, sizeof(struct pt_regs));
+		childregs->regs[2] = (unsigned long)arg;
+		childregs->regs[3] = (unsigned long)fn;
+		childregs->sr = (1 << 30); /* not user_mode */
+		childregs->sr |= SR_FD; /* Invalidate FPU flag */
+		p->thread.pc = (unsigned long) ret_from_kernel_thread;
+		return 0;
+	}
 	*childregs = *regs;
 
 	/*
@@ -428,19 +407,12 @@
 	 * 32-bit wide and context switch must take care
 	 * of NEFF sign extension.
 	 */
-	if (user_mode(regs)) {
-		childregs->regs[15] = neff_sign_extend(usp);
-		p->thread.uregs = childregs;
-	} else {
-		childregs->regs[15] =
-			neff_sign_extend((unsigned long)task_stack_page(p) +
-					 THREAD_SIZE);
-	}
+	childregs->regs[15] = neff_sign_extend(usp);
+	p->thread.uregs = childregs;
 
 	childregs->regs[9] = 0; /* Set return value for child */
 	childregs->sr |= SR_FD; /* Invalidate FPU flag */
 
-	p->thread.sp = (unsigned long) childregs;
 	p->thread.pc = (unsigned long) ret_from_fork;
 
 	return 0;
diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c
index f56b6fe5..497bab3 100644
--- a/arch/sh/kernel/sys_sh32.c
+++ b/arch/sh/kernel/sys_sh32.c
@@ -60,27 +60,3 @@
 				(u64)len0 << 32 | len1,	advice);
 #endif
 }
-
-#if defined(CONFIG_CPU_SH2) || defined(CONFIG_CPU_SH2A)
-#define SYSCALL_ARG3	"trapa #0x23"
-#else
-#define SYSCALL_ARG3	"trapa #0x13"
-#endif
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	register long __sc0 __asm__ ("r3") = __NR_execve;
-	register long __sc4 __asm__ ("r4") = (long) filename;
-	register long __sc5 __asm__ ("r5") = (long) argv;
-	register long __sc6 __asm__ ("r6") = (long) envp;
-	__asm__ __volatile__ (SYSCALL_ARG3 : "=z" (__sc0)
-			: "0" (__sc0), "r" (__sc4), "r" (__sc5), "r" (__sc6)
-			: "memory");
-	return __sc0;
-}
diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c
deleted file mode 100644
index c5a38c4..0000000
--- a/arch/sh/kernel/sys_sh64.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * arch/sh/kernel/sys_sh64.c
- *
- * Copyright (C) 2000, 2001  Paolo Alberelli
- *
- * This file contains various random system calls that
- * have a non-standard calling sequence on the Linux/SH5
- * platform.
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-#include <linux/errno.h>
-#include <linux/rwsem.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/syscalls.h>
-#include <linux/ipc.h>
-#include <asm/uaccess.h>
-#include <asm/ptrace.h>
-#include <asm/unistd.h>
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	register unsigned long __sc0 __asm__ ("r9") = ((0x13 << 16) | __NR_execve);
-	register unsigned long __sc2 __asm__ ("r2") = (unsigned long) filename;
-	register unsigned long __sc3 __asm__ ("r3") = (unsigned long) argv;
-	register unsigned long __sc4 __asm__ ("r4") = (unsigned long) envp;
-	__asm__ __volatile__ ("trapa	%1 !\t\t\t execve(%2,%3,%4)"
-	: "=r" (__sc0)
-	: "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4) );
-	__asm__ __volatile__ ("!dummy	%0 %1 %2 %3"
-	: : "r" (__sc0), "r" (__sc2), "r" (__sc3), "r" (__sc4) : "memory");
-	return __sc0;
-}