| /* SPDX-License-Identifier: GPL-2.0-only */ |
| /* |
| * Scalar AES core transform |
| * |
| * Copyright (C) 2017 Linaro Ltd. |
| * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org> |
| */ |
| |
| #include <linux/linkage.h> |
| #include <asm/assembler.h> |
| #include <asm/cache.h> |
| |
| .text |
| .align 5 |
| |
| rk .req r0 |
| rounds .req r1 |
| in .req r2 |
| out .req r3 |
| ttab .req ip |
| |
| t0 .req lr |
| t1 .req r2 |
| t2 .req r3 |
| |
| .macro __select, out, in, idx |
| .if __LINUX_ARM_ARCH__ < 7 |
| and \out, \in, #0xff << (8 * \idx) |
| .else |
| ubfx \out, \in, #(8 * \idx), #8 |
| .endif |
| .endm |
| |
| .macro __load, out, in, idx, sz, op |
| .if __LINUX_ARM_ARCH__ < 7 && \idx > 0 |
| ldr\op \out, [ttab, \in, lsr #(8 * \idx) - \sz] |
| .else |
| ldr\op \out, [ttab, \in, lsl #\sz] |
| .endif |
| .endm |
| |
| .macro __hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr |
| __select \out0, \in0, 0 |
| __select t0, \in1, 1 |
| __load \out0, \out0, 0, \sz, \op |
| __load t0, t0, 1, \sz, \op |
| |
| .if \enc |
| __select \out1, \in1, 0 |
| __select t1, \in2, 1 |
| .else |
| __select \out1, \in3, 0 |
| __select t1, \in0, 1 |
| .endif |
| __load \out1, \out1, 0, \sz, \op |
| __select t2, \in2, 2 |
| __load t1, t1, 1, \sz, \op |
| __load t2, t2, 2, \sz, \op |
| |
| eor \out0, \out0, t0, ror #24 |
| |
| __select t0, \in3, 3 |
| .if \enc |
| __select \t3, \in3, 2 |
| __select \t4, \in0, 3 |
| .else |
| __select \t3, \in1, 2 |
| __select \t4, \in2, 3 |
| .endif |
| __load \t3, \t3, 2, \sz, \op |
| __load t0, t0, 3, \sz, \op |
| __load \t4, \t4, 3, \sz, \op |
| |
| .ifnb \oldcpsr |
| /* |
| * This is the final round and we're done with all data-dependent table |
| * lookups, so we can safely re-enable interrupts. |
| */ |
| restore_irqs \oldcpsr |
| .endif |
| |
| eor \out1, \out1, t1, ror #24 |
| eor \out0, \out0, t2, ror #16 |
| ldm rk!, {t1, t2} |
| eor \out1, \out1, \t3, ror #16 |
| eor \out0, \out0, t0, ror #8 |
| eor \out1, \out1, \t4, ror #8 |
| eor \out0, \out0, t1 |
| eor \out1, \out1, t2 |
| .endm |
| |
| .macro fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr |
| __hround \out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op |
| __hround \out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr |
| .endm |
| |
| .macro iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr |
| __hround \out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op |
| __hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr |
| .endm |
| |
| .macro __rev, out, in |
| .if __LINUX_ARM_ARCH__ < 6 |
| lsl t0, \in, #24 |
| and t1, \in, #0xff00 |
| and t2, \in, #0xff0000 |
| orr \out, t0, \in, lsr #24 |
| orr \out, \out, t1, lsl #8 |
| orr \out, \out, t2, lsr #8 |
| .else |
| rev \out, \in |
| .endif |
| .endm |
| |
| .macro __adrl, out, sym, c |
| .if __LINUX_ARM_ARCH__ < 7 |
| ldr\c \out, =\sym |
| .else |
| movw\c \out, #:lower16:\sym |
| movt\c \out, #:upper16:\sym |
| .endif |
| .endm |
| |
| .macro do_crypt, round, ttab, ltab, bsz |
| push {r3-r11, lr} |
| |
| // Load keys first, to reduce latency in case they're not cached yet. |
| ldm rk!, {r8-r11} |
| |
| ldr r4, [in] |
| ldr r5, [in, #4] |
| ldr r6, [in, #8] |
| ldr r7, [in, #12] |
| |
| #ifdef CONFIG_CPU_BIG_ENDIAN |
| __rev r4, r4 |
| __rev r5, r5 |
| __rev r6, r6 |
| __rev r7, r7 |
| #endif |
| |
| eor r4, r4, r8 |
| eor r5, r5, r9 |
| eor r6, r6, r10 |
| eor r7, r7, r11 |
| |
| __adrl ttab, \ttab |
| /* |
| * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into |
| * L1 cache, assuming cacheline size >= 32. This is a hardening measure |
| * intended to make cache-timing attacks more difficult. They may not |
| * be fully prevented, however; see the paper |
| * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf |
| * ("Cache-timing attacks on AES") for a discussion of the many |
| * difficulties involved in writing truly constant-time AES software. |
| */ |
| save_and_disable_irqs t0 |
| .set i, 0 |
| .rept 1024 / 128 |
| ldr r8, [ttab, #i + 0] |
| ldr r9, [ttab, #i + 32] |
| ldr r10, [ttab, #i + 64] |
| ldr r11, [ttab, #i + 96] |
| .set i, i + 128 |
| .endr |
| push {t0} // oldcpsr |
| |
| tst rounds, #2 |
| bne 1f |
| |
| 0: \round r8, r9, r10, r11, r4, r5, r6, r7 |
| \round r4, r5, r6, r7, r8, r9, r10, r11 |
| |
| 1: subs rounds, rounds, #4 |
| \round r8, r9, r10, r11, r4, r5, r6, r7 |
| bls 2f |
| \round r4, r5, r6, r7, r8, r9, r10, r11 |
| b 0b |
| |
| 2: .ifb \ltab |
| add ttab, ttab, #1 |
| .else |
| __adrl ttab, \ltab |
| // Prefetch inverse S-box for final round; see explanation above |
| .set i, 0 |
| .rept 256 / 64 |
| ldr t0, [ttab, #i + 0] |
| ldr t1, [ttab, #i + 32] |
| .set i, i + 64 |
| .endr |
| .endif |
| |
| pop {rounds} // oldcpsr |
| \round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds |
| |
| #ifdef CONFIG_CPU_BIG_ENDIAN |
| __rev r4, r4 |
| __rev r5, r5 |
| __rev r6, r6 |
| __rev r7, r7 |
| #endif |
| |
| ldr out, [sp] |
| |
| str r4, [out] |
| str r5, [out, #4] |
| str r6, [out, #8] |
| str r7, [out, #12] |
| |
| pop {r3-r11, pc} |
| |
| .align 3 |
| .ltorg |
| .endm |
| |
| ENTRY(__aes_arm_encrypt) |
| do_crypt fround, crypto_ft_tab,, 2 |
| ENDPROC(__aes_arm_encrypt) |
| |
| .align 5 |
| ENTRY(__aes_arm_decrypt) |
| do_crypt iround, crypto_it_tab, crypto_aes_inv_sbox, 0 |
| ENDPROC(__aes_arm_decrypt) |