Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6 This updates the sha512 fix so that it doesn't cause excessive stack usage on i386. This is done by reverting to the original code, and avoiding the W duplication by moving its initialisation into the loop. As the underlying code is in fact the one that we have used for years, I'm pushing this now instead of postponing to the next cycle. * git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: crypto: sha512 - Avoid stack bloat on i386 crypto: sha512 - Use binary and instead of modulus

commit: ca81a62198e39ad9155f12725c269fcc2a9f1f8b [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Mon Feb 13 20:33:45 2012 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> Mon Feb 13 20:33:45 2012 -0800
tree: 922decd817e170899c30eaa47335bec080f020da
parent: e3f89f4ae4ea0227d49ff1cc1276ef04b2749ed2 [diff]
parent: 3a92d687c8015860a19213e3c102cad6b722f83c [diff]
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 88f160b..f04af93 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c

@@ -78,7 +78,7 @@
 
 static inline void BLEND_OP(int I, u64 *W)
 {
-	W[I % 16] += s1(W[(I-2) % 16]) + W[(I-7) % 16] + s0(W[(I-15) % 16]);
+	W[I & 15] += s1(W[(I-2) & 15]) + W[(I-7) & 15] + s0(W[(I-15) & 15]);
 }
 
 static void
@@ -89,46 +89,42 @@
 	int i;
 	u64 W[16];
 
-	/* load the input */
-        for (i = 0; i < 16; i++)
-                LOAD_OP(i, W, input);
-
 	/* load the state into our registers */
 	a=state[0];   b=state[1];   c=state[2];   d=state[3];
 	e=state[4];   f=state[5];   g=state[6];   h=state[7];
 
-#define SHA512_0_15(i, a, b, c, d, e, f, g, h)			\
-	t1 = h + e1(e) + Ch(e, f, g) + sha512_K[i] + W[i];	\
-	t2 = e0(a) + Maj(a, b, c);				\
-	d += t1;						\
-	h = t1 + t2
+	/* now iterate */
+	for (i=0; i<80; i+=8) {
+		if (!(i & 8)) {
+			int j;
 
-#define SHA512_16_79(i, a, b, c, d, e, f, g, h)			\
-	BLEND_OP(i, W);						\
-	t1 = h + e1(e) + Ch(e, f, g) + sha512_K[i] + W[(i)%16];	\
-	t2 = e0(a) + Maj(a, b, c);				\
-	d += t1;						\
-	h = t1 + t2
+			if (i < 16) {
+				/* load the input */
+				for (j = 0; j < 16; j++)
+					LOAD_OP(i + j, W, input);
+			} else {
+				for (j = 0; j < 16; j++) {
+					BLEND_OP(i + j, W);
+				}
+			}
+		}
 
-	for (i = 0; i < 16; i += 8) {
-		SHA512_0_15(i, a, b, c, d, e, f, g, h);
-		SHA512_0_15(i + 1, h, a, b, c, d, e, f, g);
-		SHA512_0_15(i + 2, g, h, a, b, c, d, e, f);
-		SHA512_0_15(i + 3, f, g, h, a, b, c, d, e);
-		SHA512_0_15(i + 4, e, f, g, h, a, b, c, d);
-		SHA512_0_15(i + 5, d, e, f, g, h, a, b, c);
-		SHA512_0_15(i + 6, c, d, e, f, g, h, a, b);
-		SHA512_0_15(i + 7, b, c, d, e, f, g, h, a);
-	}
-	for (i = 16; i < 80; i += 8) {
-		SHA512_16_79(i, a, b, c, d, e, f, g, h);
-		SHA512_16_79(i + 1, h, a, b, c, d, e, f, g);
-		SHA512_16_79(i + 2, g, h, a, b, c, d, e, f);
-		SHA512_16_79(i + 3, f, g, h, a, b, c, d, e);
-		SHA512_16_79(i + 4, e, f, g, h, a, b, c, d);
-		SHA512_16_79(i + 5, d, e, f, g, h, a, b, c);
-		SHA512_16_79(i + 6, c, d, e, f, g, h, a, b);
-		SHA512_16_79(i + 7, b, c, d, e, f, g, h, a);
+		t1 = h + e1(e) + Ch(e,f,g) + sha512_K[i  ] + W[(i & 15)];
+		t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
+		t1 = g + e1(d) + Ch(d,e,f) + sha512_K[i+1] + W[(i & 15) + 1];
+		t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
+		t1 = f + e1(c) + Ch(c,d,e) + sha512_K[i+2] + W[(i & 15) + 2];
+		t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
+		t1 = e + e1(b) + Ch(b,c,d) + sha512_K[i+3] + W[(i & 15) + 3];
+		t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
+		t1 = d + e1(a) + Ch(a,b,c) + sha512_K[i+4] + W[(i & 15) + 4];
+		t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
+		t1 = c + e1(h) + Ch(h,a,b) + sha512_K[i+5] + W[(i & 15) + 5];
+		t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
+		t1 = b + e1(g) + Ch(g,h,a) + sha512_K[i+6] + W[(i & 15) + 6];
+		t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
+		t1 = a + e1(f) + Ch(f,g,h) + sha512_K[i+7] + W[(i & 15) + 7];
+		t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
 	}
 
 	state[0] += a; state[1] += b; state[2] += c; state[3] += d;
commit	ca81a62198e39ad9155f12725c269fcc2a9f1f8b	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Mon Feb 13 20:33:45 2012 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	Mon Feb 13 20:33:45 2012 -0800
tree	922decd817e170899c30eaa47335bec080f020da
parent	e3f89f4ae4ea0227d49ff1cc1276ef04b2749ed2 [diff]
parent	3a92d687c8015860a19213e3c102cad6b722f83c [diff]