blob: 78eb35fb505608826c0aeaba39120494e9831bb5 [file] [log] [blame]
Ard Biesheuvel2c988332014-03-06 16:23:33 +08001/*
2 * sha1-ce-core.S - SHA-1 secure hash using ARMv8 Crypto Extensions
3 *
4 * Copyright (C) 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
10
11#include <linux/linkage.h>
12#include <asm/assembler.h>
13
14 .text
15 .arch armv8-a+crypto
16
17 k0 .req v0
18 k1 .req v1
19 k2 .req v2
20 k3 .req v3
21
22 t0 .req v4
23 t1 .req v5
24
25 dga .req q6
26 dgav .req v6
27 dgb .req s7
28 dgbv .req v7
29
30 dg0q .req q12
31 dg0s .req s12
32 dg0v .req v12
33 dg1s .req s13
34 dg1v .req v13
35 dg2s .req s14
36
37 .macro add_only, op, ev, rc, s0, dg1
38 .ifc \ev, ev
39 add t1.4s, v\s0\().4s, \rc\().4s
40 sha1h dg2s, dg0s
41 .ifnb \dg1
42 sha1\op dg0q, \dg1, t0.4s
43 .else
44 sha1\op dg0q, dg1s, t0.4s
45 .endif
46 .else
47 .ifnb \s0
48 add t0.4s, v\s0\().4s, \rc\().4s
49 .endif
50 sha1h dg1s, dg0s
51 sha1\op dg0q, dg2s, t1.4s
52 .endif
53 .endm
54
55 .macro add_update, op, ev, rc, s0, s1, s2, s3, dg1
56 sha1su0 v\s0\().4s, v\s1\().4s, v\s2\().4s
57 add_only \op, \ev, \rc, \s1, \dg1
58 sha1su1 v\s0\().4s, v\s3\().4s
59 .endm
60
Ard Biesheuvel20b04c02018-01-10 12:11:42 +000061 .macro loadrc, k, val, tmp
62 movz \tmp, :abs_g0_nc:\val
63 movk \tmp, :abs_g1:\val
64 dup \k, \tmp
65 .endm
Ard Biesheuvel2c988332014-03-06 16:23:33 +080066
67 /*
Ard Biesheuvel07eb54d2015-04-09 12:55:44 +020068 * void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src,
69 * int blocks)
Ard Biesheuvel2c988332014-03-06 16:23:33 +080070 */
71ENTRY(sha1_ce_transform)
Ard Biesheuvel7df8d162018-04-30 18:18:21 +020072 frame_push 3
73
74 mov x19, x0
75 mov x20, x1
76 mov x21, x2
77
Ard Biesheuvel2c988332014-03-06 16:23:33 +080078 /* load round constants */
Ard Biesheuvel7df8d162018-04-30 18:18:21 +0200790: loadrc k0.4s, 0x5a827999, w6
Ard Biesheuvel20b04c02018-01-10 12:11:42 +000080 loadrc k1.4s, 0x6ed9eba1, w6
81 loadrc k2.4s, 0x8f1bbcdc, w6
82 loadrc k3.4s, 0xca62c1d6, w6
Ard Biesheuvel2c988332014-03-06 16:23:33 +080083
84 /* load state */
Ard Biesheuvel7df8d162018-04-30 18:18:21 +020085 ld1 {dgav.4s}, [x19]
86 ldr dgb, [x19, #16]
Ard Biesheuvel2c988332014-03-06 16:23:33 +080087
Ard Biesheuvel07eb54d2015-04-09 12:55:44 +020088 /* load sha1_ce_state::finalize */
Ard Biesheuvelf4857f42017-04-26 17:11:32 +010089 ldr_l w4, sha1_ce_offsetof_finalize, x4
Ard Biesheuvel7df8d162018-04-30 18:18:21 +020090 ldr w4, [x19, x4]
Ard Biesheuvel2c988332014-03-06 16:23:33 +080091
92 /* load input */
Ard Biesheuvel7df8d162018-04-30 18:18:21 +0200931: ld1 {v8.4s-v11.4s}, [x20], #64
94 sub w21, w21, #1
Ard Biesheuvel2c988332014-03-06 16:23:33 +080095
Ard Biesheuvel2c988332014-03-06 16:23:33 +080096CPU_LE( rev32 v8.16b, v8.16b )
97CPU_LE( rev32 v9.16b, v9.16b )
98CPU_LE( rev32 v10.16b, v10.16b )
99CPU_LE( rev32 v11.16b, v11.16b )
100
Ard Biesheuvel7df8d162018-04-30 18:18:21 +02001012: add t0.4s, v8.4s, k0.4s
Ard Biesheuvel2c988332014-03-06 16:23:33 +0800102 mov dg0v.16b, dgav.16b
103
104 add_update c, ev, k0, 8, 9, 10, 11, dgb
105 add_update c, od, k0, 9, 10, 11, 8
106 add_update c, ev, k0, 10, 11, 8, 9
107 add_update c, od, k0, 11, 8, 9, 10
108 add_update c, ev, k1, 8, 9, 10, 11
109
110 add_update p, od, k1, 9, 10, 11, 8
111 add_update p, ev, k1, 10, 11, 8, 9
112 add_update p, od, k1, 11, 8, 9, 10
113 add_update p, ev, k1, 8, 9, 10, 11
114 add_update p, od, k2, 9, 10, 11, 8
115
116 add_update m, ev, k2, 10, 11, 8, 9
117 add_update m, od, k2, 11, 8, 9, 10
118 add_update m, ev, k2, 8, 9, 10, 11
119 add_update m, od, k2, 9, 10, 11, 8
120 add_update m, ev, k3, 10, 11, 8, 9
121
122 add_update p, od, k3, 11, 8, 9, 10
123 add_only p, ev, k3, 9
124 add_only p, od, k3, 10
125 add_only p, ev, k3, 11
126 add_only p, od
127
128 /* update state */
129 add dgbv.2s, dgbv.2s, dg1v.2s
130 add dgav.4s, dgav.4s, dg0v.4s
131
Ard Biesheuvel7df8d162018-04-30 18:18:21 +0200132 cbz w21, 3f
133
134 if_will_cond_yield_neon
135 st1 {dgav.4s}, [x19]
136 str dgb, [x19, #16]
137 do_cond_yield_neon
138 b 0b
139 endif_yield_neon
140
141 b 1b
Ard Biesheuvel2c988332014-03-06 16:23:33 +0800142
143 /*
144 * Final block: add padding and total bit count.
Ard Biesheuvel07eb54d2015-04-09 12:55:44 +0200145 * Skip if the input size was not a round multiple of the block size,
146 * the padding is handled by the C code in that case.
Ard Biesheuvel2c988332014-03-06 16:23:33 +0800147 */
Ard Biesheuvel7df8d162018-04-30 18:18:21 +02001483: cbz x4, 4f
Ard Biesheuvelf4857f42017-04-26 17:11:32 +0100149 ldr_l w4, sha1_ce_offsetof_count, x4
Ard Biesheuvel7df8d162018-04-30 18:18:21 +0200150 ldr x4, [x19, x4]
Ard Biesheuvel2c988332014-03-06 16:23:33 +0800151 movi v9.2d, #0
152 mov x8, #0x80000000
153 movi v10.2d, #0
154 ror x7, x4, #29 // ror(lsl(x4, 3), 32)
155 fmov d8, x8
156 mov x4, #0
157 mov v11.d[0], xzr
158 mov v11.d[1], x7
Ard Biesheuvel7df8d162018-04-30 18:18:21 +0200159 b 2b
Ard Biesheuvel2c988332014-03-06 16:23:33 +0800160
161 /* store new state */
Ard Biesheuvel7df8d162018-04-30 18:18:21 +02001624: st1 {dgav.4s}, [x19]
163 str dgb, [x19, #16]
164 frame_pop
Ard Biesheuvel2c988332014-03-06 16:23:33 +0800165 ret
166ENDPROC(sha1_ce_transform)