ARM: 7626/1: arm/crypto: Make asm SHA-1 and AES code Thumb-2 compatible This patch fixes aes-armv4.S and sha1-armv4-large.S to work natively in Thumb. This allows ARM/Thumb interworking workarounds to be removed. I also take the opportunity to convert some explicit assembler directives for exported functions to the standard ENTRY()/ENDPROC(). For the code itself: * In sha1_block_data_order, use of TEQ with sp is deprecated in ARMv7 and not supported in Thumb. For the branches back to .L_00_15 and .L_40_59, the TEQ is converted to a CMP, under the assumption that clobbering the C flag here will not cause incorrect behaviour. For the first branch back to .L_20_39_or_60_79 the C flag is important, so sp is moved temporarily into another register so that TEQ can be used for the comparison. * In the AES code, most forms of register-indexed addressing with shifts and rotates are not permitted for loads and stores in Thumb, so the address calculation is done using a separate instruction for the Thumb case. The resulting code is unlikely to be optimally scheduled, but it should not have a large impact given the overall size of the code. I haven't run any benchmarks. Signed-off-by: Dave Martin <dave.martin@linaro.org> Tested-by: David McCullough <ucdevel@gmail.com> (ARM only) Acked-by: David McCullough <ucdevel@gmail.com> Acked-by: Nicolas Pitre <nico@linaro.org> Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>

commit: 638591cd7b601d403ed703d55062b48c32ea8cfb [log] [tgz]
author: Dave Martin <dave.martin@linaro.org> Thu Jan 10 12:20:15 2013 +0100
committer: Russell King <rmk+kernel@arm.linux.org.uk> Sun Jan 13 12:41:22 2013 +0000
tree: eb7f735b59d7609976b768fc7deb020b6f3d955a
parent: 9931faca02c604c22335f5a935a501bb2ace6e20 [diff] [blame]
diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S
index 7050ab1..92c6eed 100644
--- a/arch/arm/crypto/sha1-armv4-large.S
+++ b/arch/arm/crypto/sha1-armv4-large.S

@@ -51,13 +51,12 @@
 @ Profiler-assisted and platform-specific optimization resulted in 10%
 @ improvement on Cortex A8 core and 12.2 cycles per byte.
 
+#include <linux/linkage.h>
+
 .text
 
-.global	sha1_block_data_order
-.type	sha1_block_data_order,%function
-
 .align	2
-sha1_block_data_order:
+ENTRY(sha1_block_data_order)
 	stmdb	sp!,{r4-r12,lr}
 	add	r2,r1,r2,lsl#6	@ r2 to point at the end of r1
 	ldmia	r0,{r3,r4,r5,r6,r7}
@@ -194,7 +193,7 @@
 	eor	r10,r10,r7,ror#2		@ F_00_19(B,C,D)
 	str	r9,[r14,#-4]!
 	add	r3,r3,r10			@ E+=F_00_19(B,C,D)
-	teq	r14,sp
+	cmp	r14,sp
 	bne	.L_00_15		@ [((11+4)*5+2)*3]
 #if __ARM_ARCH__<7
 	ldrb	r10,[r1,#2]
@@ -374,7 +373,9 @@
 						@ F_xx_xx
 	add	r3,r3,r9			@ E+=X[i]
 	add	r3,r3,r10			@ E+=F_20_39(B,C,D)
-	teq	r14,sp			@ preserve carry
+ ARM(	teq	r14,sp		)	@ preserve carry
+ THUMB(	mov	r11,sp		)
+ THUMB(	teq	r14,r11		)	@ preserve carry
 	bne	.L_20_39_or_60_79	@ [+((12+3)*5+2)*4]
 	bcs	.L_done			@ [+((12+3)*5+2)*4], spare 300 bytes
 
@@ -466,7 +467,7 @@
 	add	r3,r3,r9			@ E+=X[i]
 	add	r3,r3,r10			@ E+=F_40_59(B,C,D)
 	add	r3,r3,r11,ror#2
-	teq	r14,sp
+	cmp	r14,sp
 	bne	.L_40_59		@ [+((12+5)*5+2)*4]
 
 	ldr	r8,.LK_60_79
@@ -485,19 +486,12 @@
 	teq	r1,r2
 	bne	.Lloop			@ [+18], total 1307
 
-#if __ARM_ARCH__>=5
 	ldmia	sp!,{r4-r12,pc}
-#else
-	ldmia	sp!,{r4-r12,lr}
-	tst	lr,#1
-	moveq	pc,lr			@ be binary compatible with V4, yet
-	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
-#endif
 .align	2
 .LK_00_19:	.word	0x5a827999
 .LK_20_39:	.word	0x6ed9eba1
 .LK_40_59:	.word	0x8f1bbcdc
 .LK_60_79:	.word	0xca62c1d6
-.size	sha1_block_data_order,.-sha1_block_data_order
+ENDPROC(sha1_block_data_order)
 .asciz	"SHA1 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>"
 .align	2
commit	638591cd7b601d403ed703d55062b48c32ea8cfb	[log] [tgz]
author	Dave Martin <dave.martin@linaro.org>	Thu Jan 10 12:20:15 2013 +0100
committer	Russell King <rmk+kernel@arm.linux.org.uk>	Sun Jan 13 12:41:22 2013 +0000
tree	eb7f735b59d7609976b768fc7deb020b6f3d955a
parent	9931faca02c604c22335f5a935a501bb2ace6e20 [diff] [blame]