blob: 99a028e298ede4034a2c3556df87bfc565d366d5 [file] [log] [blame]
Thomas Gleixnerd2912cb2019-06-04 10:11:33 +02001/* SPDX-License-Identifier: GPL-2.0-only */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +01002/*
3 * aesce-ccm-core.S - AES-CCM transform for ARMv8 with Crypto Extensions
4 *
Ard Biesheuvelf402e3112017-07-24 11:28:10 +01005 * Copyright (C) 2013 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
Ard Biesheuvela3fd8212014-02-10 11:26:29 +01006 */
7
8#include <linux/linkage.h>
Ard Biesheuvel56e4e762016-10-11 19:15:17 +01009#include <asm/assembler.h>
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010010
11 .text
12 .arch armv8-a+crypto
13
14 /*
15 * void ce_aes_ccm_auth_data(u8 mac[], u8 const in[], u32 abytes,
16 * u32 *macp, u8 const rk[], u32 rounds);
17 */
Mark Brown0e896402019-12-13 15:49:10 +000018SYM_FUNC_START(ce_aes_ccm_auth_data)
Ard Biesheuvelf10dc562018-07-29 16:52:30 +020019 ldr w8, [x3] /* leftover from prev round? */
Ard Biesheuvel56e4e762016-10-11 19:15:17 +010020 ld1 {v0.16b}, [x0] /* load mac */
Ard Biesheuvelf10dc562018-07-29 16:52:30 +020021 cbz w8, 1f
22 sub w8, w8, #16
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010023 eor v1.16b, v1.16b, v1.16b
Ard Biesheuvelf10dc562018-07-29 16:52:30 +0200240: ldrb w7, [x1], #1 /* get 1 byte of input */
25 subs w2, w2, #1
26 add w8, w8, #1
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010027 ins v1.b[0], w7
28 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
29 beq 8f /* out of input? */
Ard Biesheuvelf10dc562018-07-29 16:52:30 +020030 cbnz w8, 0b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010031 eor v0.16b, v0.16b, v1.16b
Ard Biesheuvelf10dc562018-07-29 16:52:30 +0200321: ld1 {v3.4s}, [x4] /* load first round key */
33 prfm pldl1strm, [x1]
34 cmp w5, #12 /* which key size? */
35 add x6, x4, #16
36 sub w7, w5, #2 /* modified # of rounds */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010037 bmi 2f
38 bne 5f
39 mov v5.16b, v3.16b
40 b 4f
412: mov v4.16b, v3.16b
Ard Biesheuvelf402e3112017-07-24 11:28:10 +010042 ld1 {v5.4s}, [x6], #16 /* load 2nd round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100433: aese v0.16b, v4.16b
44 aesmc v0.16b, v0.16b
Ard Biesheuvelf402e3112017-07-24 11:28:10 +0100454: ld1 {v3.4s}, [x6], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010046 aese v0.16b, v5.16b
47 aesmc v0.16b, v0.16b
Ard Biesheuvelf402e3112017-07-24 11:28:10 +0100485: ld1 {v4.4s}, [x6], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010049 subs w7, w7, #3
50 aese v0.16b, v3.16b
51 aesmc v0.16b, v0.16b
Ard Biesheuvelf402e3112017-07-24 11:28:10 +010052 ld1 {v5.4s}, [x6], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010053 bpl 3b
54 aese v0.16b, v4.16b
Ard Biesheuvelf10dc562018-07-29 16:52:30 +020055 subs w2, w2, #16 /* last data? */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010056 eor v0.16b, v0.16b, v5.16b /* final round */
57 bmi 6f
Ard Biesheuvelf10dc562018-07-29 16:52:30 +020058 ld1 {v1.16b}, [x1], #16 /* load next input block */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010059 eor v0.16b, v0.16b, v1.16b /* xor with mac */
Ard Biesheuvelf10dc562018-07-29 16:52:30 +020060 bne 1b
616: st1 {v0.16b}, [x0] /* store mac */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010062 beq 10f
Ard Biesheuvelf10dc562018-07-29 16:52:30 +020063 adds w2, w2, #16
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010064 beq 10f
Ard Biesheuvelf10dc562018-07-29 16:52:30 +020065 mov w8, w2
667: ldrb w7, [x1], #1
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010067 umov w6, v0.b[0]
68 eor w6, w6, w7
Ard Biesheuvelf10dc562018-07-29 16:52:30 +020069 strb w6, [x0], #1
70 subs w2, w2, #1
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010071 beq 10f
72 ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
73 b 7b
Ard Biesheuveleaf46ed2019-01-24 17:33:45 +0100748: cbz w8, 91f
75 mov w7, w8
Ard Biesheuvelf10dc562018-07-29 16:52:30 +020076 add w8, w8, #16
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100779: ext v1.16b, v1.16b, v1.16b, #1
78 adds w7, w7, #1
79 bne 9b
Ard Biesheuveleaf46ed2019-01-24 17:33:45 +01008091: eor v0.16b, v0.16b, v1.16b
Ard Biesheuvelf10dc562018-07-29 16:52:30 +020081 st1 {v0.16b}, [x0]
8210: str w8, [x3]
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010083 ret
Mark Brown0e896402019-12-13 15:49:10 +000084SYM_FUNC_END(ce_aes_ccm_auth_data)
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010085
86 /*
87 * void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u8 const rk[],
88 * u32 rounds);
89 */
Mark Brown0e896402019-12-13 15:49:10 +000090SYM_FUNC_START(ce_aes_ccm_final)
Ard Biesheuvelf402e3112017-07-24 11:28:10 +010091 ld1 {v3.4s}, [x2], #16 /* load first round key */
Ard Biesheuvel56e4e762016-10-11 19:15:17 +010092 ld1 {v0.16b}, [x0] /* load mac */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010093 cmp w3, #12 /* which key size? */
94 sub w3, w3, #2 /* modified # of rounds */
Ard Biesheuvel56e4e762016-10-11 19:15:17 +010095 ld1 {v1.16b}, [x1] /* load 1st ctriv */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +010096 bmi 0f
97 bne 3f
98 mov v5.16b, v3.16b
99 b 2f
1000: mov v4.16b, v3.16b
Ard Biesheuvelf402e3112017-07-24 11:28:10 +01001011: ld1 {v5.4s}, [x2], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100102 aese v0.16b, v4.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100103 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd42015-03-17 18:05:13 +0000104 aese v1.16b, v4.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100105 aesmc v1.16b, v1.16b
Ard Biesheuvelf402e3112017-07-24 11:28:10 +01001062: ld1 {v3.4s}, [x2], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100107 aese v0.16b, v5.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100108 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd42015-03-17 18:05:13 +0000109 aese v1.16b, v5.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100110 aesmc v1.16b, v1.16b
Ard Biesheuvelf402e3112017-07-24 11:28:10 +01001113: ld1 {v4.4s}, [x2], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100112 subs w3, w3, #3
113 aese v0.16b, v3.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100114 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd42015-03-17 18:05:13 +0000115 aese v1.16b, v3.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100116 aesmc v1.16b, v1.16b
117 bpl 1b
118 aese v0.16b, v4.16b
119 aese v1.16b, v4.16b
120 /* final round key cancels out */
121 eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */
Ard Biesheuvel56e4e762016-10-11 19:15:17 +0100122 st1 {v0.16b}, [x0] /* store result */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100123 ret
Mark Brown0e896402019-12-13 15:49:10 +0000124SYM_FUNC_END(ce_aes_ccm_final)
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100125
126 .macro aes_ccm_do_crypt,enc
Ard Biesheuvelf10dc562018-07-29 16:52:30 +0200127 ldr x8, [x6, #8] /* load lower ctr */
128 ld1 {v0.16b}, [x5] /* load mac */
129CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +01001300: /* outer loop */
Ard Biesheuvelf10dc562018-07-29 16:52:30 +0200131 ld1 {v1.8b}, [x6] /* load upper ctr */
132 prfm pldl1strm, [x1]
133 add x8, x8, #1
134 rev x9, x8
135 cmp w4, #12 /* which key size? */
136 sub w7, w4, #2 /* get modified # of rounds */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100137 ins v1.d[1], x9 /* no carry in lower ctr */
Ard Biesheuvelf10dc562018-07-29 16:52:30 +0200138 ld1 {v3.4s}, [x3] /* load first round key */
139 add x10, x3, #16
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100140 bmi 1f
141 bne 4f
142 mov v5.16b, v3.16b
143 b 3f
1441: mov v4.16b, v3.16b
Ard Biesheuvelf402e3112017-07-24 11:28:10 +0100145 ld1 {v5.4s}, [x10], #16 /* load 2nd round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +01001462: /* inner loop: 3 rounds, 2x interleaved */
147 aese v0.16b, v4.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100148 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd42015-03-17 18:05:13 +0000149 aese v1.16b, v4.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100150 aesmc v1.16b, v1.16b
Ard Biesheuvelf402e3112017-07-24 11:28:10 +01001513: ld1 {v3.4s}, [x10], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100152 aese v0.16b, v5.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100153 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd42015-03-17 18:05:13 +0000154 aese v1.16b, v5.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100155 aesmc v1.16b, v1.16b
Ard Biesheuvelf402e3112017-07-24 11:28:10 +01001564: ld1 {v4.4s}, [x10], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100157 subs w7, w7, #3
158 aese v0.16b, v3.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100159 aesmc v0.16b, v0.16b
Ard Biesheuvel4a97abd42015-03-17 18:05:13 +0000160 aese v1.16b, v3.16b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100161 aesmc v1.16b, v1.16b
Ard Biesheuvelf402e3112017-07-24 11:28:10 +0100162 ld1 {v5.4s}, [x10], #16 /* load next round key */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100163 bpl 2b
164 aese v0.16b, v4.16b
165 aese v1.16b, v4.16b
Ard Biesheuvelf10dc562018-07-29 16:52:30 +0200166 subs w2, w2, #16
167 bmi 6f /* partial block? */
168 ld1 {v2.16b}, [x1], #16 /* load next input block */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100169 .if \enc == 1
170 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
171 eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
172 .else
173 eor v2.16b, v2.16b, v1.16b /* xor with crypted ctr */
174 eor v1.16b, v2.16b, v5.16b /* final round enc */
175 .endif
176 eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
Ard Biesheuvelf10dc562018-07-29 16:52:30 +0200177 st1 {v1.16b}, [x0], #16 /* write output block */
178 bne 0b
179CPU_LE( rev x8, x8 )
180 st1 {v0.16b}, [x5] /* store mac */
181 str x8, [x6, #8] /* store lsb end of ctr (BE) */
1825: ret
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100183
Ard Biesheuvelf10dc562018-07-29 16:52:30 +02001846: eor v0.16b, v0.16b, v5.16b /* final round mac */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100185 eor v1.16b, v1.16b, v5.16b /* final round enc */
Ard Biesheuvelf10dc562018-07-29 16:52:30 +0200186 st1 {v0.16b}, [x5] /* store mac */
187 add w2, w2, #16 /* process partial tail block */
1887: ldrb w9, [x1], #1 /* get 1 byte of input */
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100189 umov w6, v1.b[0] /* get top crypted ctr byte */
190 umov w7, v0.b[0] /* get top mac byte */
191 .if \enc == 1
192 eor w7, w7, w9
193 eor w9, w9, w6
194 .else
195 eor w9, w9, w6
196 eor w7, w7, w9
197 .endif
Ard Biesheuvelf10dc562018-07-29 16:52:30 +0200198 strb w9, [x0], #1 /* store out byte */
199 strb w7, [x5], #1 /* store mac byte */
200 subs w2, w2, #1
201 beq 5b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100202 ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
203 ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
Ard Biesheuvelf10dc562018-07-29 16:52:30 +0200204 b 7b
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100205 .endm
206
207 /*
208 * void ce_aes_ccm_encrypt(u8 out[], u8 const in[], u32 cbytes,
209 * u8 const rk[], u32 rounds, u8 mac[],
210 * u8 ctr[]);
211 * void ce_aes_ccm_decrypt(u8 out[], u8 const in[], u32 cbytes,
212 * u8 const rk[], u32 rounds, u8 mac[],
213 * u8 ctr[]);
214 */
Mark Brown0e896402019-12-13 15:49:10 +0000215SYM_FUNC_START(ce_aes_ccm_encrypt)
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100216 aes_ccm_do_crypt 1
Mark Brown0e896402019-12-13 15:49:10 +0000217SYM_FUNC_END(ce_aes_ccm_encrypt)
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100218
Mark Brown0e896402019-12-13 15:49:10 +0000219SYM_FUNC_START(ce_aes_ccm_decrypt)
Ard Biesheuvela3fd8212014-02-10 11:26:29 +0100220 aes_ccm_do_crypt 0
Mark Brown0e896402019-12-13 15:49:10 +0000221SYM_FUNC_END(ce_aes_ccm_decrypt)