[PATCH] syscall entry/exit revamp

This cleanup patch speeds up the null syscall path on ppc64 by about 3%,
and brings the ppc32 and ppc64 code slightly closer together.

The ppc64 code was checking current_thread_info()->flags twice in the
syscall exit path; once for TIF_SYSCALL_T_OR_A before disabling
interrupts, and then again for TIF_SIGPENDING|TIF_NEED_RESCHED etc after
disabling interrupts. Now we do the same as ppc32 -- check the flags
only once in the fast path, and re-enable interrupts if necessary in the
ptrace case.

The patch abolishes the 'syscall_noerror' member of struct thread_info
and replaces it with a TIF_NOERROR bit in the flags, which is handled in
the slow path. This shortens the syscall entry code, which no longer
needs to clear syscall_noerror.

The patch adds a TIF_SAVE_NVGPRS flag which causes the syscall exit slow
path to save the non-volatile GPRs into a signal frame. This removes the
need for the assembly wrappers around sys_sigsuspend(),
sys_rt_sigsuspend(), et al which existed solely to save those registers
in advance. It also means I don't have to add new wrappers for ppoll()
and pselect(), which is what I was supposed to be doing when I got
distracted into this...

Finally, it unifies the ppc64 and ppc32 methods of handling syscall exit
directly into a signal handler (as required by sigsuspend et al) by
introducing a TIF_RESTOREALL flag which causes _all_ the registers to be
reloaded from the pt_regs by taking the ret_from_exception path, instead
of the normal syscall exit path which stomps on the callee-saved GPRs.

It appears to pass an LTP test run on ppc64, and passes basic testing on
ppc32 too. Brief tests of ptrace functionality with strace and gdb also
appear OK. I wouldn't send it to Linus for 2.6.15 just yet though :)

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 91538d2..3bf89d1 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -92,9 +92,9 @@
 
 	DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
 	DEFINE(TI_PREEMPT, offsetof(struct thread_info, preempt_count));
-	DEFINE(TI_SC_NOERR, offsetof(struct thread_info, syscall_noerror));
-#ifdef CONFIG_PPC32
+	DEFINE(TI_SIGFRAME, offsetof(struct thread_info, nvgprs_frame));
 	DEFINE(TI_TASK, offsetof(struct thread_info, task));
+#ifdef CONFIG_PPC32
 	DEFINE(TI_EXECDOMAIN, offsetof(struct thread_info, exec_domain));
 	DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
 #endif /* CONFIG_PPC32 */
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 2e99ae4..8fed953 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -200,8 +200,6 @@
 	bl	do_show_syscall
 #endif /* SHOW_SYSCALLS */
 	rlwinm	r10,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
-	li	r11,0
-	stb	r11,TI_SC_NOERR(r10)
 	lwz	r11,TI_FLAGS(r10)
 	andi.	r11,r11,_TIF_SYSCALL_T_OR_A
 	bne-	syscall_dotrace
@@ -222,25 +220,21 @@
 	bl	do_show_syscall_exit
 #endif
 	mr	r6,r3
-	li	r11,-_LAST_ERRNO
-	cmplw	0,r3,r11
 	rlwinm	r12,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
-	blt+	30f
-	lbz	r11,TI_SC_NOERR(r12)
-	cmpwi	r11,0
-	bne	30f
-	neg	r3,r3
-	lwz	r10,_CCR(r1)	/* Set SO bit in CR */
-	oris	r10,r10,0x1000
-	stw	r10,_CCR(r1)
-
 	/* disable interrupts so current_thread_info()->flags can't change */
-30:	LOAD_MSR_KERNEL(r10,MSR_KERNEL)	/* doesn't include MSR_EE */
+	LOAD_MSR_KERNEL(r10,MSR_KERNEL)	/* doesn't include MSR_EE */
 	SYNC
 	MTMSRD(r10)
 	lwz	r9,TI_FLAGS(r12)
-	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED)
+	li	r8,-_LAST_ERRNO
+	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL)
 	bne-	syscall_exit_work
+	cmplw	0,r3,r8
+	blt+	syscall_exit_cont
+	lwz	r11,_CCR(r1)			/* Load CR */
+	neg	r3,r3
+	oris	r11,r11,0x1000	/* Set SO bit in CR */
+	stw	r11,_CCR(r1)
 syscall_exit_cont:
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
 	/* If the process has its own DBCR0 value, load it up.  The single
@@ -292,46 +286,113 @@
 	b	syscall_dotrace_cont
 
 syscall_exit_work:
-	stw	r6,RESULT(r1)	/* Save result */
+	andi.	r0,r9,_TIF_RESTOREALL
+	bne-	2f
+	cmplw	0,r3,r8
+	blt+	1f
+	andi.	r0,r9,_TIF_NOERROR
+	bne-	1f
+	lwz	r11,_CCR(r1)			/* Load CR */
+	neg	r3,r3
+	oris	r11,r11,0x1000	/* Set SO bit in CR */
+	stw	r11,_CCR(r1)
+
+1:	stw	r6,RESULT(r1)	/* Save result */
 	stw	r3,GPR3(r1)	/* Update return value */
-	andi.	r0,r9,_TIF_SYSCALL_T_OR_A
-	beq	5f
-	ori	r10,r10,MSR_EE
-	SYNC
-	MTMSRD(r10)		/* re-enable interrupts */
+2:	andi.	r0,r9,(_TIF_PERSYSCALL_MASK)
+	beq	4f
+
+	/* Clear per-syscall TIF flags if any are set, but _leave_
+	_TIF_SAVE_NVGPRS set in r9 since we haven't dealt with that
+	yet.  */
+
+	li	r11,_TIF_PERSYSCALL_MASK
+	addi	r12,r12,TI_FLAGS
+3:	lwarx	r8,0,r12
+	andc	r8,r8,r11
+#ifdef CONFIG_IBM405_ERR77
+	dcbt	0,r12
+#endif
+	stwcx.	r8,0,r12
+	bne-	3b
+	subi	r12,r12,TI_FLAGS
+	
+4:	/* Anything which requires enabling interrupts? */
+	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SAVE_NVGPRS)
+	beq	7f
+
+	/* Save NVGPRS if they're not saved already */
 	lwz	r4,_TRAP(r1)
 	andi.	r4,r4,1
-	beq	4f
+	beq	5f
 	SAVE_NVGPRS(r1)
 	li	r4,0xc00
 	stw	r4,_TRAP(r1)
-4:
+
+	/* Re-enable interrupts */
+5:	ori	r10,r10,MSR_EE
+	SYNC
+	MTMSRD(r10)
+
+	andi.	r0,r9,_TIF_SAVE_NVGPRS
+	bne	save_user_nvgprs
+
+save_user_nvgprs_cont:
+	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
+	beq	7f
+
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	do_syscall_trace_leave
 	REST_NVGPRS(r1)
-2:
-	lwz	r3,GPR3(r1)
+
+6:	lwz	r3,GPR3(r1)
 	LOAD_MSR_KERNEL(r10,MSR_KERNEL)	/* doesn't include MSR_EE */
 	SYNC
 	MTMSRD(r10)		/* disable interrupts again */
 	rlwinm	r12,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
 	lwz	r9,TI_FLAGS(r12)
-5:
+7:
 	andi.	r0,r9,_TIF_NEED_RESCHED
-	bne	1f
+	bne	8f
 	lwz	r5,_MSR(r1)
 	andi.	r5,r5,MSR_PR
-	beq	syscall_exit_cont
+	beq	ret_from_except
 	andi.	r0,r9,_TIF_SIGPENDING
-	beq	syscall_exit_cont
+	beq	ret_from_except
 	b	do_user_signal
-1:
+8:
 	ori	r10,r10,MSR_EE
 	SYNC
 	MTMSRD(r10)		/* re-enable interrupts */
 	bl	schedule
-	b	2b
+	b	6b
 
+save_user_nvgprs:
+	ld	r8,TI_SIGFRAME(r12)
+
+.macro savewords start, end
+  1:	stw \start,4*(\start)(r8)
+	.section __ex_table,"a"
+	.align	2
+	.long	1b,save_user_nvgprs_fault
+	.previous
+	.if \end - \start
+	savewords "(\start+1)",\end
+	.endif
+.endm	
+	savewords 14,31
+	b	save_user_nvgprs_cont
+
+	
+save_user_nvgprs_fault:
+	li	r3,11		/* SIGSEGV */
+	ld	r4,TI_TASK(r12)
+	bl	force_sigsegv
+
+	rlwinm	r12,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
+	ld	r9,TI_FLAGS(r12)
+	b	save_user_nvgprs_cont
+	
 #ifdef SHOW_SYSCALLS
 do_show_syscall:
 #ifdef SHOW_SYSCALLS_TASK
@@ -401,28 +462,10 @@
 #endif /* SHOW_SYSCALLS */
 
 /*
- * The sigsuspend and rt_sigsuspend system calls can call do_signal
- * and thus put the process into the stopped state where we might
- * want to examine its user state with ptrace.  Therefore we need
- * to save all the nonvolatile registers (r13 - r31) before calling
- * the C code.
+ * The fork/clone functions need to copy the full register set into
+ * the child process. Therefore we need to save all the nonvolatile
+ * registers (r13 - r31) before calling the C code.
  */
-	.globl	ppc_sigsuspend
-ppc_sigsuspend:
-	SAVE_NVGPRS(r1)
-	lwz	r0,_TRAP(r1)
-	rlwinm	r0,r0,0,0,30		/* clear LSB to indicate full */
-	stw	r0,_TRAP(r1)		/* register set saved */
-	b	sys_sigsuspend
-
-	.globl	ppc_rt_sigsuspend
-ppc_rt_sigsuspend:
-	SAVE_NVGPRS(r1)
-	lwz	r0,_TRAP(r1)
-	rlwinm	r0,r0,0,0,30
-	stw	r0,_TRAP(r1)
-	b	sys_rt_sigsuspend
-
 	.globl	ppc_fork
 ppc_fork:
 	SAVE_NVGPRS(r1)
@@ -447,14 +490,6 @@
 	stw	r0,_TRAP(r1)		/* register set saved */
 	b	sys_clone
 
-	.globl	ppc_swapcontext
-ppc_swapcontext:
-	SAVE_NVGPRS(r1)
-	lwz	r0,_TRAP(r1)
-	rlwinm	r0,r0,0,0,30		/* clear LSB to indicate full */
-	stw	r0,_TRAP(r1)		/* register set saved */
-	b	sys_swapcontext
-
 /*
  * Top-level page fault handling.
  * This is in assembler because if do_page_fault tells us that
@@ -626,16 +661,6 @@
 	.long	ret_from_except
 #endif
 
-	.globl	sigreturn_exit
-sigreturn_exit:
-	subi	r1,r3,STACK_FRAME_OVERHEAD
-	rlwinm	r12,r1,0,0,(31-THREAD_SHIFT)	/* current_thread_info() */
-	lwz	r9,TI_FLAGS(r12)
-	andi.	r0,r9,_TIF_SYSCALL_T_OR_A
-	beq+	ret_from_except_full
-	bl	do_syscall_trace_leave
-	/* fall through */
-
 	.globl	ret_from_except_full
 ret_from_except_full:
 	REST_NVGPRS(r1)
@@ -658,7 +683,7 @@
 	/* Check current_thread_info()->flags */
 	rlwinm	r9,r1,0,0,(31-THREAD_SHIFT)
 	lwz	r9,TI_FLAGS(r9)
-	andi.	r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED)
+	andi.	r0,r9,(_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL)
 	bne	do_work
 
 restore_user:
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index bce33a3..0bff31f 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -113,9 +113,7 @@
 	addi	r9,r1,STACK_FRAME_OVERHEAD
 #endif
 	clrrdi	r11,r1,THREAD_SHIFT
-	li	r12,0
 	ld	r10,TI_FLAGS(r11)
-	stb	r12,TI_SC_NOERR(r11)
 	andi.	r11,r10,_TIF_SYSCALL_T_OR_A
 	bne-	syscall_dotrace
 syscall_dotrace_cont:
@@ -144,24 +142,12 @@
 	bctrl			/* Call handler */
 
 syscall_exit:
-#ifdef SHOW_SYSCALLS
-	std	r3,GPR3(r1)
-	bl	.do_show_syscall_exit
-	ld	r3,GPR3(r1)
-#endif
 	std	r3,RESULT(r1)
-	ld	r5,_CCR(r1)
-	li	r10,-_LAST_ERRNO
-	cmpld	r3,r10
+#ifdef SHOW_SYSCALLS
+	bl	.do_show_syscall_exit
+	ld	r3,RESULT(r1)
+#endif
 	clrrdi	r12,r1,THREAD_SHIFT
-	bge-	syscall_error
-syscall_error_cont:
-
-	/* check for syscall tracing or audit */
-	ld	r9,TI_FLAGS(r12)
-	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
-	bne-	syscall_exit_trace
-syscall_exit_trace_cont:
 
 	/* disable interrupts so current_thread_info()->flags can't change,
 	   and so that we don't get interrupted after loading SRR0/1. */
@@ -173,8 +159,13 @@
 	rotldi	r10,r10,16
 	mtmsrd	r10,1
 	ld	r9,TI_FLAGS(r12)
-	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SIGPENDING|_TIF_NEED_RESCHED)
+	li	r11,-_LAST_ERRNO
+	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SIGPENDING|_TIF_NEED_RESCHED|_TIF_RESTOREALL|_TIF_SAVE_NVGPRS|_TIF_NOERROR)
 	bne-	syscall_exit_work
+	cmpld	r3,r11
+	ld	r5,_CCR(r1)
+	bge-	syscall_error
+syscall_error_cont:
 	ld	r7,_NIP(r1)
 	stdcx.	r0,0,r1			/* to clear the reservation */
 	andi.	r6,r8,MSR_PR
@@ -193,21 +184,12 @@
 	rfid
 	b	.	/* prevent speculative execution */
 
-syscall_enosys:
-	li	r3,-ENOSYS
-	std	r3,RESULT(r1)
-	clrrdi	r12,r1,THREAD_SHIFT
-	ld	r5,_CCR(r1)
-
-syscall_error:
-	lbz	r11,TI_SC_NOERR(r12)
-	cmpwi	0,r11,0
-	bne-	syscall_error_cont
-	neg	r3,r3
+syscall_error:	
 	oris	r5,r5,0x1000	/* Set SO bit in CR */
+	neg	r3,r3
 	std	r5,_CCR(r1)
 	b	syscall_error_cont
-        
+	
 /* Traced system call support */
 syscall_dotrace:
 	bl	.save_nvgprs
@@ -225,21 +207,69 @@
 	ld	r10,TI_FLAGS(r10)
 	b	syscall_dotrace_cont
 
-syscall_exit_trace:
-	std	r3,GPR3(r1)
-	bl	.save_nvgprs
+syscall_enosys:
+	li	r3,-ENOSYS
+	b	syscall_exit
+	
+syscall_exit_work:
+	/* If TIF_RESTOREALL is set, don't scribble on either r3 or ccr.
+	 If TIF_NOERROR is set, just save r3 as it is. */
+
+	andi.	r0,r9,_TIF_RESTOREALL
+	bne-	2f
+	cmpld	r3,r11		/* r10 is -LAST_ERRNO */
+	blt+	1f
+	andi.	r0,r9,_TIF_NOERROR
+	bne-	1f
+	ld	r5,_CCR(r1)
+	neg	r3,r3
+	oris	r5,r5,0x1000	/* Set SO bit in CR */
+	std	r5,_CCR(r1)
+1:	std	r3,GPR3(r1)
+2:	andi.	r0,r9,(_TIF_PERSYSCALL_MASK)
+	beq	4f
+
+	/* Clear per-syscall TIF flags if any are set, but _leave_
+	_TIF_SAVE_NVGPRS set in r9 since we haven't dealt with that
+	yet.  */
+
+	li	r11,_TIF_PERSYSCALL_MASK
+	addi	r12,r12,TI_FLAGS
+3:	ldarx	r10,0,r12
+	andc	r10,r10,r11
+	stdcx.	r10,0,r12
+	bne-	3b
+	subi	r12,r12,TI_FLAGS
+	
+4:	bl	save_nvgprs
+	/* Anything else left to do? */
+	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP|_TIF_SAVE_NVGPRS)
+	beq	.ret_from_except_lite
+
+	/* Re-enable interrupts */
+	mfmsr	r10
+	ori	r10,r10,MSR_EE
+	mtmsrd	r10,1
+
+	andi.	r0,r9,_TIF_SAVE_NVGPRS
+	bne	save_user_nvgprs
+
+	/* If tracing, re-enable interrupts and do it */
+save_user_nvgprs_cont:	
+	andi.	r0,r9,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
+	beq	5f
+	
 	addi	r3,r1,STACK_FRAME_OVERHEAD
 	bl	.do_syscall_trace_leave
 	REST_NVGPRS(r1)
-	ld	r3,GPR3(r1)
-	ld	r5,_CCR(r1)
 	clrrdi	r12,r1,THREAD_SHIFT
-	b	syscall_exit_trace_cont
 
-/* Stuff to do on exit from a system call. */
-syscall_exit_work:
-	std	r3,GPR3(r1)
-	std	r5,_CCR(r1)
+	/* Disable interrupts again and handle other work if any */
+5:	mfmsr	r10
+	rldicl	r10,r10,48,1
+	rotldi	r10,r10,16
+	mtmsrd	r10,1
+
 	b	.ret_from_except_lite
 
 /* Save non-volatile GPRs, if not already saved. */
@@ -252,6 +282,52 @@
 	std	r0,_TRAP(r1)
 	blr
 
+
+save_user_nvgprs:
+	ld	r10,TI_SIGFRAME(r12)
+	andi.	r0,r9,_TIF_32BIT
+	beq-	save_user_nvgprs_64
+
+	/* 32-bit save to userspace */
+
+.macro savewords start, end
+  1:	stw \start,4*(\start)(r10)
+	.section __ex_table,"a"
+	.align	3
+	.llong	1b,save_user_nvgprs_fault
+	.previous
+	.if \end - \start
+	savewords "(\start+1)",\end
+	.endif
+.endm	
+	savewords 14,31
+	b	save_user_nvgprs_cont
+
+save_user_nvgprs_64:
+	/* 64-bit save to userspace */
+
+.macro savelongs start, end
+  1:	std \start,8*(\start)(r10)
+	.section __ex_table,"a"
+	.align	3
+	.llong	1b,save_user_nvgprs_fault
+	.previous
+	.if \end - \start
+	savelongs "(\start+1)",\end
+	.endif
+.endm	
+	savelongs 14,31
+	b	save_user_nvgprs_cont
+
+save_user_nvgprs_fault:
+	li	r3,11		/* SIGSEGV */
+	ld	r4,TI_TASK(r12)
+	bl	.force_sigsegv
+
+	clrrdi	r12,r1,THREAD_SHIFT
+	ld	r9,TI_FLAGS(r12)
+	b	save_user_nvgprs_cont
+	
 /*
  * The sigsuspend and rt_sigsuspend system calls can call do_signal
  * and thus put the process into the stopped state where we might
@@ -260,35 +336,6 @@
  * the C code.  Similarly, fork, vfork and clone need the full
  * register state on the stack so that it can be copied to the child.
  */
-_GLOBAL(ppc32_sigsuspend)
-	bl	.save_nvgprs
-	bl	.compat_sys_sigsuspend
-	b	70f
-
-_GLOBAL(ppc64_rt_sigsuspend)
-	bl	.save_nvgprs
-	bl	.sys_rt_sigsuspend
-	b	70f
-
-_GLOBAL(ppc32_rt_sigsuspend)
-	bl	.save_nvgprs
-	bl	.compat_sys_rt_sigsuspend
-70:	cmpdi	0,r3,0
-	/* If it returned an error, we need to return via syscall_exit to set
-	   the SO bit in cr0 and potentially stop for ptrace. */
-	bne	syscall_exit
-	/* If sigsuspend() returns zero, we are going into a signal handler. We
-	   may need to call audit_syscall_exit() to mark the exit from sigsuspend() */
-#ifdef CONFIG_AUDITSYSCALL
-	ld	r3,PACACURRENT(r13)
-	ld	r4,AUDITCONTEXT(r3)
-	cmpdi	0,r4,0
-	beq	.ret_from_except	/* No audit_context: Leave immediately. */
-	li	r4, 2			/* AUDITSC_FAILURE */
-	li	r5,-4			/* It's always -EINTR */
-	bl	.audit_syscall_exit
-#endif
-	b	.ret_from_except
 
 _GLOBAL(ppc_fork)
 	bl	.save_nvgprs
@@ -305,37 +352,6 @@
 	bl	.sys_clone
 	b	syscall_exit
 
-_GLOBAL(ppc32_swapcontext)
-	bl	.save_nvgprs
-	bl	.compat_sys_swapcontext
-	b	80f
-	
-_GLOBAL(ppc64_swapcontext)
-	bl	.save_nvgprs
-	bl	.sys_swapcontext
-	b	80f
-
-_GLOBAL(ppc32_sigreturn)
-	bl	.compat_sys_sigreturn
-	b	80f
-
-_GLOBAL(ppc32_rt_sigreturn)
-	bl	.compat_sys_rt_sigreturn
-	b	80f
-
-_GLOBAL(ppc64_rt_sigreturn)
-	bl	.sys_rt_sigreturn
-
-80:	cmpdi	0,r3,0
-	blt	syscall_exit
-	clrrdi	r4,r1,THREAD_SHIFT
-	ld	r4,TI_FLAGS(r4)
-	andi.	r4,r4,(_TIF_SYSCALL_T_OR_A|_TIF_SINGLESTEP)
-	beq+	81f
-	addi	r3,r1,STACK_FRAME_OVERHEAD
-	bl	.do_syscall_trace_leave
-81:	b	.ret_from_except
-
 _GLOBAL(ret_from_fork)
 	bl	.schedule_tail
 	REST_NVGPRS(r1)
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 5a2eba6..c9d0275 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -76,7 +76,6 @@
  * registers from *regs.  This is what we need
  * to do when a signal has been delivered.
  */
-#define sigreturn_exit(regs)	return 0
 
 #define GP_REGS_SIZE	min(sizeof(elf_gregset_t32), sizeof(struct pt_regs32))
 #undef __SIGNAL_FRAMESIZE
@@ -156,9 +155,17 @@
 	elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
 	int i;
 
-	for (i = 0; i <= PT_RESULT; i ++)
+	if (!FULL_REGS(regs)) {
+		set_thread_flag(TIF_SAVE_NVGPRS);
+		current_thread_info()->nvgprs_frame = frame->mc_gregs;
+	}
+
+	for (i = 0; i <= PT_RESULT; i ++) {
+		if (i == 14 && !FULL_REGS(regs))
+			i = 32;
 		if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
 			return -EFAULT;
+	}
 	return 0;
 }
 
@@ -179,8 +186,6 @@
 
 #else /* CONFIG_PPC64 */
 
-extern void sigreturn_exit(struct pt_regs *);
-
 #define GP_REGS_SIZE	min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
 
 static inline int put_sigset_t(sigset_t __user *uset, sigset_t *set)
@@ -256,8 +261,10 @@
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
 		schedule();
-		if (do_signal(&saveset, regs))
-			sigreturn_exit(regs);
+		if (do_signal(&saveset, regs)) {
+			set_thread_flag(TIF_RESTOREALL);
+			return 0;
+		}
 	}
 }
 
@@ -292,8 +299,10 @@
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
 		schedule();
-		if (do_signal(&saveset, regs))
-			sigreturn_exit(regs);
+		if (do_signal(&saveset, regs)) {
+			set_thread_flag(TIF_RESTOREALL);
+			return 0;
+		}
 	}
 }
 
@@ -391,9 +400,6 @@
 static int save_user_regs(struct pt_regs *regs, struct mcontext __user *frame,
 		int sigret)
 {
-#ifdef CONFIG_PPC32
-	CHECK_FULL_REGS(regs);
-#endif
 	/* Make sure floating point registers are stored in regs */
 	flush_fp_to_thread(current);
 
@@ -828,12 +834,6 @@
 	regs->gpr[6] = (unsigned long) rt_sf;
 	regs->nip = (unsigned long) ka->sa.sa_handler;
 	regs->trap = 0;
-#ifdef CONFIG_PPC64
-	regs->result = 0;
-
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
-#endif
 	return 1;
 
 badframe:
@@ -911,8 +911,8 @@
 	 */
 	if (do_setcontext(new_ctx, regs, 0))
 		do_exit(SIGSEGV);
-	sigreturn_exit(regs);
-	/* doesn't actually return back to here */
+
+	set_thread_flag(TIF_RESTOREALL);
 	return 0;
 }
 
@@ -945,12 +945,11 @@
 	 * nobody does any...
 	 */
 	compat_sys_sigaltstack((u32)(u64)&rt_sf->uc.uc_stack, 0, 0, 0, 0, 0, regs);
-	return (int)regs->result;
 #else
 	do_sigaltstack(&rt_sf->uc.uc_stack, NULL, regs->gpr[1]);
-	sigreturn_exit(regs);		/* doesn't return here */
-	return 0;
 #endif
+	set_thread_flag(TIF_RESTOREALL);
+	return 0;
 
  bad:
 	force_sig(SIGSEGV, current);
@@ -1041,9 +1040,7 @@
 	 */
 	do_sigaltstack(&ctx->uc_stack, NULL, regs->gpr[1]);
 
-	sigreturn_exit(regs);
-	/* doesn't actually return back to here */
-
+	set_thread_flag(TIF_RESTOREALL);
  out:
 	return 0;
 }
@@ -1107,12 +1104,6 @@
 	regs->gpr[4] = (unsigned long) sc;
 	regs->nip = (unsigned long) ka->sa.sa_handler;
 	regs->trap = 0;
-#ifdef CONFIG_PPC64
-	regs->result = 0;
-
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
-#endif
 
 	return 1;
 
@@ -1160,12 +1151,8 @@
 	    || restore_user_regs(regs, sr, 1))
 		goto badframe;
 
-#ifdef CONFIG_PPC64
-	return (int)regs->result;
-#else
-	sigreturn_exit(regs);		/* doesn't return */
+	set_thread_flag(TIF_RESTOREALL);
 	return 0;
-#endif
 
 badframe:
 	force_sig(SIGSEGV, current);
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 1decf27..5462bef 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -96,8 +96,10 @@
 	while (1) {
 		current->state = TASK_INTERRUPTIBLE;
 		schedule();
-		if (do_signal(&saveset, regs))
+		if (do_signal(&saveset, regs)) {
+			set_thread_flag(TIF_RESTOREALL);
 			return 0;
+		}
 	}
 }
 
@@ -152,6 +154,14 @@
 	err |= __put_user(0, &sc->v_regs);
 #endif /* CONFIG_ALTIVEC */
 	err |= __put_user(&sc->gp_regs, &sc->regs);
+	if (!FULL_REGS(regs)) {
+		/* Zero out the unsaved GPRs to avoid information
+		   leak, and set TIF_SAVE_NVGPRS to ensure that the
+		   registers do actually get saved later. */
+		memset(&regs->gpr[14], 0, 18 * sizeof(unsigned long));
+		set_thread_flag(TIF_SAVE_NVGPRS);
+		current_thread_info()->nvgprs_frame = &sc->gp_regs;
+	}
 	err |= __copy_to_user(&sc->gp_regs, regs, GP_REGS_SIZE);
 	err |= __copy_to_user(&sc->fp_regs, &current->thread.fpr, FP_REGS_SIZE);
 	err |= __put_user(signr, &sc->signal);
@@ -340,6 +350,7 @@
 		do_exit(SIGSEGV);
 
 	/* This returns like rt_sigreturn */
+	set_thread_flag(TIF_RESTOREALL);
 	return 0;
 }
 
@@ -372,7 +383,8 @@
 	 */
 	do_sigaltstack(&uc->uc_stack, NULL, regs->gpr[1]);
 
-	return regs->result;
+	set_thread_flag(TIF_RESTOREALL);
+	return 0;
 
 badframe:
 #if DEBUG_SIG
@@ -454,9 +466,6 @@
 	if (err)
 		goto badframe;
 
-	if (test_thread_flag(TIF_SINGLESTEP))
-		ptrace_notify(SIGTRAP);
-
 	return 1;
 
 badframe:
@@ -502,6 +511,8 @@
 		 * we only get here if there is a handler, we dont restart.
 		 */
 		regs->result = -EINTR;
+		regs->gpr[3] = EINTR;
+		regs->ccr |= 0x10000000;
 		break;
 	case -ERESTARTSYS:
 		/* ERESTARTSYS means to restart the syscall if there is no
@@ -509,6 +520,8 @@
 		 */
 		if (!(ka->sa.sa_flags & SA_RESTART)) {
 			regs->result = -EINTR;
+			regs->gpr[3] = EINTR;
+			regs->ccr |= 0x10000000;
 			break;
 		}
 		/* fallthrough */
diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 65eaea9..4bb3650 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -113,7 +113,7 @@
 COMPAT_SYS(ssetmask)
 SYSCALL(setreuid)
 SYSCALL(setregid)
-SYSX(sys_ni_syscall,ppc32_sigsuspend,ppc_sigsuspend)
+SYS32ONLY(sigsuspend)
 COMPAT_SYS(sigpending)
 COMPAT_SYS(sethostname)
 COMPAT_SYS(setrlimit)
@@ -160,7 +160,7 @@
 COMPAT_SYS(sysinfo)
 COMPAT_SYS(ipc)
 SYSCALL(fsync)
-SYSX(sys_ni_syscall,ppc32_sigreturn,sys_sigreturn)
+SYS32ONLY(sigreturn)
 PPC_SYS(clone)
 COMPAT_SYS(setdomainname)
 PPC_SYS(newuname)
@@ -213,13 +213,13 @@
 SYSCALL(setresgid)
 SYSCALL(getresgid)
 COMPAT_SYS(prctl)
-SYSX(ppc64_rt_sigreturn,ppc32_rt_sigreturn,sys_rt_sigreturn)
+COMPAT_SYS(rt_sigreturn)
 COMPAT_SYS(rt_sigaction)
 COMPAT_SYS(rt_sigprocmask)
 COMPAT_SYS(rt_sigpending)
 COMPAT_SYS(rt_sigtimedwait)
 COMPAT_SYS(rt_sigqueueinfo)
-SYSX(ppc64_rt_sigsuspend,ppc32_rt_sigsuspend,ppc_rt_sigsuspend)
+COMPAT_SYS(rt_sigsuspend)
 COMPAT_SYS(pread64)
 COMPAT_SYS(pwrite64)
 SYSCALL(chown)
@@ -290,7 +290,7 @@
 COMPAT_SYS(clock_gettime)
 COMPAT_SYS(clock_getres)
 COMPAT_SYS(clock_nanosleep)
-SYSX(ppc64_swapcontext,ppc32_swapcontext,ppc_swapcontext)
+COMPAT_SYS(swapcontext)
 COMPAT_SYS(tgkill)
 COMPAT_SYS(utimes)
 COMPAT_SYS(statfs64)
diff --git a/include/asm-powerpc/ptrace.h b/include/asm-powerpc/ptrace.h
index 1f7ecdb..9c550b3 100644
--- a/include/asm-powerpc/ptrace.h
+++ b/include/asm-powerpc/ptrace.h
@@ -87,7 +87,7 @@
 
 #define force_successful_syscall_return()   \
 	do { \
-		current_thread_info()->syscall_noerror = 1; \
+		set_thread_flag(TIF_NOERROR); \
 	} while(0)
 
 /*
diff --git a/include/asm-powerpc/thread_info.h b/include/asm-powerpc/thread_info.h
index e525f49..ac1e80e 100644
--- a/include/asm-powerpc/thread_info.h
+++ b/include/asm-powerpc/thread_info.h
@@ -37,8 +37,7 @@
 	int		preempt_count;		/* 0 => preemptable,
 						   <0 => BUG */
 	struct restart_block restart_block;
-	/* set by force_successful_syscall_return */
-	unsigned char	syscall_noerror;
+	void *nvgprs_frame;
 	/* low level flags - has atomic operations done on it */
 	unsigned long	flags ____cacheline_aligned_in_smp;
 };
@@ -123,6 +122,9 @@
 #define TIF_SINGLESTEP		9	/* singlestepping active */
 #define TIF_MEMDIE		10
 #define TIF_SECCOMP		11	/* secure computing */
+#define TIF_RESTOREALL		12	/* Restore all regs (implies NOERROR) */
+#define TIF_SAVE_NVGPRS		13	/* Save r14-r31 in signal frame */
+#define TIF_NOERROR		14	/* Force successful syscall return */
 
 /* as above, but as bit values */
 #define _TIF_SYSCALL_TRACE	(1<<TIF_SYSCALL_TRACE)
@@ -136,10 +138,14 @@
 #define _TIF_SYSCALL_AUDIT	(1<<TIF_SYSCALL_AUDIT)
 #define _TIF_SINGLESTEP		(1<<TIF_SINGLESTEP)
 #define _TIF_SECCOMP		(1<<TIF_SECCOMP)
+#define _TIF_RESTOREALL		(1<<TIF_RESTOREALL)
+#define _TIF_SAVE_NVGPRS	(1<<TIF_SAVE_NVGPRS)
+#define _TIF_NOERROR		(1<<TIF_NOERROR)
 #define _TIF_SYSCALL_T_OR_A	(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP)
 
 #define _TIF_USER_WORK_MASK	(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | \
-				 _TIF_NEED_RESCHED)
+				 _TIF_NEED_RESCHED | _TIF_RESTOREALL)
+#define _TIF_PERSYSCALL_MASK	(_TIF_RESTOREALL|_TIF_NOERROR|_TIF_SAVE_NVGPRS)
 
 #endif /* __KERNEL__ */