| /* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */ |
| // |
| // This file is dual-licensed, meaning that you can use it under your |
| // choice of either of the following two licenses: |
| // |
| // Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. |
| // |
| // Licensed under the Apache License 2.0 (the "License"). You can obtain |
| // a copy in the file LICENSE in the source distribution or at |
| // https://www.openssl.org/source/license.html |
| // |
| // or |
| // |
| // Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> |
| // Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com> |
| // Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> |
| // Copyright 2024 Google LLC |
| // All rights reserved. |
| // |
| // Redistribution and use in source and binary forms, with or without |
| // modification, are permitted provided that the following conditions |
| // are met: |
| // 1. Redistributions of source code must retain the above copyright |
| // notice, this list of conditions and the following disclaimer. |
| // 2. Redistributions in binary form must reproduce the above copyright |
| // notice, this list of conditions and the following disclaimer in the |
| // documentation and/or other materials provided with the distribution. |
| // |
| // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| // The generated code of this file depends on the following RISC-V extensions: |
| // - RV64I |
| // - RISC-V Vector ('V') with VLEN >= 128 |
| // - RISC-V Vector AES block cipher extension ('Zvkned') |
| |
| #include <linux/linkage.h> |
| |
| .text |
| .option arch, +zvkned |
| |
| #include "aes-macros.S" |
| |
| #define KEYP a0 |
| #define INP a1 |
| #define OUTP a2 |
| #define LEN a3 |
| #define IVP a4 |
| |
| .macro __aes_crypt_zvkned enc, keylen |
| vle32.v v16, (INP) |
| aes_crypt v16, \enc, \keylen |
| vse32.v v16, (OUTP) |
| ret |
| .endm |
| |
| .macro aes_crypt_zvkned enc |
| aes_begin KEYP, 128f, 192f |
| __aes_crypt_zvkned \enc, 256 |
| 128: |
| __aes_crypt_zvkned \enc, 128 |
| 192: |
| __aes_crypt_zvkned \enc, 192 |
| .endm |
| |
| // void aes_encrypt_zvkned(const struct crypto_aes_ctx *key, |
| // const u8 in[16], u8 out[16]); |
| SYM_FUNC_START(aes_encrypt_zvkned) |
| aes_crypt_zvkned 1 |
| SYM_FUNC_END(aes_encrypt_zvkned) |
| |
| // Same prototype and calling convention as the encryption function |
| SYM_FUNC_START(aes_decrypt_zvkned) |
| aes_crypt_zvkned 0 |
| SYM_FUNC_END(aes_decrypt_zvkned) |
| |
| .macro __aes_ecb_crypt enc, keylen |
| srli t0, LEN, 2 |
| // t0 is the remaining length in 32-bit words. It's a multiple of 4. |
| 1: |
| vsetvli t1, t0, e32, m8, ta, ma |
| sub t0, t0, t1 // Subtract number of words processed |
| slli t1, t1, 2 // Words to bytes |
| vle32.v v16, (INP) |
| aes_crypt v16, \enc, \keylen |
| vse32.v v16, (OUTP) |
| add INP, INP, t1 |
| add OUTP, OUTP, t1 |
| bnez t0, 1b |
| |
| ret |
| .endm |
| |
| .macro aes_ecb_crypt enc |
| aes_begin KEYP, 128f, 192f |
| __aes_ecb_crypt \enc, 256 |
| 128: |
| __aes_ecb_crypt \enc, 128 |
| 192: |
| __aes_ecb_crypt \enc, 192 |
| .endm |
| |
| // void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key, |
| // const u8 *in, u8 *out, size_t len); |
| // |
| // |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE). |
| SYM_FUNC_START(aes_ecb_encrypt_zvkned) |
| aes_ecb_crypt 1 |
| SYM_FUNC_END(aes_ecb_encrypt_zvkned) |
| |
| // Same prototype and calling convention as the encryption function |
| SYM_FUNC_START(aes_ecb_decrypt_zvkned) |
| aes_ecb_crypt 0 |
| SYM_FUNC_END(aes_ecb_decrypt_zvkned) |
| |
| .macro aes_cbc_encrypt keylen |
| vle32.v v16, (IVP) // Load IV |
| 1: |
| vle32.v v17, (INP) // Load plaintext block |
| vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block |
| aes_encrypt v16, \keylen // Encrypt |
| vse32.v v16, (OUTP) // Store ciphertext block |
| addi INP, INP, 16 |
| addi OUTP, OUTP, 16 |
| addi LEN, LEN, -16 |
| bnez LEN, 1b |
| |
| vse32.v v16, (IVP) // Store next IV |
| ret |
| .endm |
| |
| .macro aes_cbc_decrypt keylen |
| srli LEN, LEN, 2 // Convert LEN from bytes to words |
| vle32.v v16, (IVP) // Load IV |
| 1: |
| vsetvli t0, LEN, e32, m4, ta, ma |
| vle32.v v20, (INP) // Load ciphertext blocks |
| vslideup.vi v16, v20, 4 // Setup prev ciphertext blocks |
| addi t1, t0, -4 |
| vslidedown.vx v24, v20, t1 // Save last ciphertext block |
| aes_decrypt v20, \keylen // Decrypt the blocks |
| vxor.vv v20, v20, v16 // XOR with prev ciphertext blocks |
| vse32.v v20, (OUTP) // Store plaintext blocks |
| vmv.v.v v16, v24 // Next "IV" is last ciphertext block |
| slli t1, t0, 2 // Words to bytes |
| add INP, INP, t1 |
| add OUTP, OUTP, t1 |
| sub LEN, LEN, t0 |
| bnez LEN, 1b |
| |
| vsetivli zero, 4, e32, m1, ta, ma |
| vse32.v v16, (IVP) // Store next IV |
| ret |
| .endm |
| |
| // void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key, |
| // const u8 *in, u8 *out, size_t len, u8 iv[16]); |
| // |
| // |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE). |
| SYM_FUNC_START(aes_cbc_encrypt_zvkned) |
| aes_begin KEYP, 128f, 192f |
| aes_cbc_encrypt 256 |
| 128: |
| aes_cbc_encrypt 128 |
| 192: |
| aes_cbc_encrypt 192 |
| SYM_FUNC_END(aes_cbc_encrypt_zvkned) |
| |
| // Same prototype and calling convention as the encryption function |
| SYM_FUNC_START(aes_cbc_decrypt_zvkned) |
| aes_begin KEYP, 128f, 192f |
| aes_cbc_decrypt 256 |
| 128: |
| aes_cbc_decrypt 128 |
| 192: |
| aes_cbc_decrypt 192 |
| SYM_FUNC_END(aes_cbc_decrypt_zvkned) |
| |
| .macro aes_cbc_cts_encrypt keylen |
| |
| // CBC-encrypt all blocks except the last. But don't store the |
| // second-to-last block to the output buffer yet, since it will be |
| // handled specially in the ciphertext stealing step. Exception: if the |
| // message is single-block, still encrypt the last (and only) block. |
| li t0, 16 |
| j 2f |
| 1: |
| vse32.v v16, (OUTP) // Store ciphertext block |
| addi OUTP, OUTP, 16 |
| 2: |
| vle32.v v17, (INP) // Load plaintext block |
| vxor.vv v16, v16, v17 // XOR with IV or prev ciphertext block |
| aes_encrypt v16, \keylen // Encrypt |
| addi INP, INP, 16 |
| addi LEN, LEN, -16 |
| bgt LEN, t0, 1b // Repeat if more than one block remains |
| |
| // Special case: if the message is a single block, just do CBC. |
| beqz LEN, .Lcts_encrypt_done\@ |
| |
| // Encrypt the last two blocks using ciphertext stealing as follows: |
| // C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n]) |
| // C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN] |
| // |
| // C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th |
| // plaintext block. Block n, the last block, may be partial; its length |
| // is 1 <= LEN <= 16. If there are only 2 blocks, C[n-2] means the IV. |
| // |
| // v16 already contains Encrypt(P[n-1] ^ C[n-2]). |
| // INP points to P[n]. OUTP points to where C[n-1] should go. |
| // To support in-place encryption, load P[n] before storing C[n]. |
| addi t0, OUTP, 16 // Get pointer to where C[n] should go |
| vsetvli zero, LEN, e8, m1, tu, ma |
| vle8.v v17, (INP) // Load P[n] |
| vse8.v v16, (t0) // Store C[n] |
| vxor.vv v16, v16, v17 // v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n] |
| vsetivli zero, 4, e32, m1, ta, ma |
| aes_encrypt v16, \keylen |
| .Lcts_encrypt_done\@: |
| vse32.v v16, (OUTP) // Store C[n-1] (or C[n] in single-block case) |
| ret |
| .endm |
| |
| #define LEN32 t4 // Length of remaining full blocks in 32-bit words |
| #define LEN_MOD16 t5 // Length of message in bytes mod 16 |
| |
| .macro aes_cbc_cts_decrypt keylen |
| andi LEN32, LEN, ~15 |
| srli LEN32, LEN32, 2 |
| andi LEN_MOD16, LEN, 15 |
| |
| // Save C[n-2] in v28 so that it's available later during the ciphertext |
| // stealing step. If there are fewer than three blocks, C[n-2] means |
| // the IV, otherwise it means the third-to-last ciphertext block. |
| vmv.v.v v28, v16 // IV |
| add t0, LEN, -33 |
| bltz t0, .Lcts_decrypt_loop\@ |
| andi t0, t0, ~15 |
| add t0, t0, INP |
| vle32.v v28, (t0) |
| |
| // CBC-decrypt all full blocks. For the last full block, or the last 2 |
| // full blocks if the message is block-aligned, this doesn't write the |
| // correct output blocks (unless the message is only a single block), |
| // because it XORs the wrong values with the raw AES plaintexts. But we |
| // fix this after this loop without redoing the AES decryptions. This |
| // approach allows more of the AES decryptions to be parallelized. |
| .Lcts_decrypt_loop\@: |
| vsetvli t0, LEN32, e32, m4, ta, ma |
| addi t1, t0, -4 |
| vle32.v v20, (INP) // Load next set of ciphertext blocks |
| vmv.v.v v24, v16 // Get IV or last ciphertext block of prev set |
| vslideup.vi v24, v20, 4 // Setup prev ciphertext blocks |
| vslidedown.vx v16, v20, t1 // Save last ciphertext block of this set |
| aes_decrypt v20, \keylen // Decrypt this set of blocks |
| vxor.vv v24, v24, v20 // XOR prev ciphertext blocks with decrypted blocks |
| vse32.v v24, (OUTP) // Store this set of plaintext blocks |
| sub LEN32, LEN32, t0 |
| slli t0, t0, 2 // Words to bytes |
| add INP, INP, t0 |
| add OUTP, OUTP, t0 |
| bnez LEN32, .Lcts_decrypt_loop\@ |
| |
| vsetivli zero, 4, e32, m4, ta, ma |
| vslidedown.vx v20, v20, t1 // Extract raw plaintext of last full block |
| addi t0, OUTP, -16 // Get pointer to last full plaintext block |
| bnez LEN_MOD16, .Lcts_decrypt_non_block_aligned\@ |
| |
| // Special case: if the message is a single block, just do CBC. |
| li t1, 16 |
| beq LEN, t1, .Lcts_decrypt_done\@ |
| |
| // Block-aligned message. Just fix up the last 2 blocks. We need: |
| // |
| // P[n-1] = Decrypt(C[n]) ^ C[n-2] |
| // P[n] = Decrypt(C[n-1]) ^ C[n] |
| // |
| // We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28. |
| // Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this |
| // is everything needed to fix the output without re-decrypting blocks. |
| addi t1, OUTP, -32 // Get pointer to where P[n-1] should go |
| vxor.vv v20, v20, v28 // Decrypt(C[n]) ^ C[n-2] == P[n-1] |
| vle32.v v24, (t1) // Decrypt(C[n-1]) ^ C[n-2] |
| vse32.v v20, (t1) // Store P[n-1] |
| vxor.vv v20, v24, v16 // Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2] |
| j .Lcts_decrypt_finish\@ |
| |
| .Lcts_decrypt_non_block_aligned\@: |
| // Decrypt the last two blocks using ciphertext stealing as follows: |
| // |
| // P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2] |
| // P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16] |
| // |
| // We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28. |
| vmv.v.v v16, v20 // v16 = Decrypt(C[n-1]) |
| vsetvli zero, LEN_MOD16, e8, m1, tu, ma |
| vle8.v v20, (INP) // v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16] |
| vxor.vv v16, v16, v20 // v16 = Decrypt(C[n-1]) ^ C[n] |
| vse8.v v16, (OUTP) // Store P[n] |
| vsetivli zero, 4, e32, m1, ta, ma |
| aes_decrypt v20, \keylen // v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) |
| .Lcts_decrypt_finish\@: |
| vxor.vv v20, v20, v28 // XOR with C[n-2] |
| vse32.v v20, (t0) // Store last full plaintext block |
| .Lcts_decrypt_done\@: |
| ret |
| .endm |
| |
| .macro aes_cbc_cts_crypt keylen |
| vle32.v v16, (IVP) // Load IV |
| beqz a5, .Lcts_decrypt\@ |
| aes_cbc_cts_encrypt \keylen |
| .Lcts_decrypt\@: |
| aes_cbc_cts_decrypt \keylen |
| .endm |
| |
| // void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key, |
| // const u8 *in, u8 *out, size_t len, |
| // const u8 iv[16], bool enc); |
| // |
| // Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS. |
| // This is the variant that unconditionally swaps the last two blocks. |
| SYM_FUNC_START(aes_cbc_cts_crypt_zvkned) |
| aes_begin KEYP, 128f, 192f |
| aes_cbc_cts_crypt 256 |
| 128: |
| aes_cbc_cts_crypt 128 |
| 192: |
| aes_cbc_cts_crypt 192 |
| SYM_FUNC_END(aes_cbc_cts_crypt_zvkned) |