| // SPDX-License-Identifier: GPL-2.0-only |
| /* |
| * Bit sliced AES using NEON instructions |
| * |
| * Copyright (C) 2016 - 2017 Linaro Ltd <ard.biesheuvel@linaro.org> |
| */ |
| |
| #include <asm/neon.h> |
| #include <asm/simd.h> |
| #include <crypto/aes.h> |
| #include <crypto/ctr.h> |
| #include <crypto/internal/simd.h> |
| #include <crypto/internal/skcipher.h> |
| #include <crypto/scatterwalk.h> |
| #include <crypto/xts.h> |
| #include <linux/module.h> |
| |
| MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); |
| MODULE_LICENSE("GPL v2"); |
| |
| MODULE_ALIAS_CRYPTO("ecb(aes)"); |
| MODULE_ALIAS_CRYPTO("cbc(aes)"); |
| MODULE_ALIAS_CRYPTO("ctr(aes)"); |
| MODULE_ALIAS_CRYPTO("xts(aes)"); |
| |
| asmlinkage void aesbs_convert_key(u8 out[], u32 const rk[], int rounds); |
| |
| asmlinkage void aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], |
| int rounds, int blocks); |
| asmlinkage void aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], |
| int rounds, int blocks); |
| |
| asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], |
| int rounds, int blocks, u8 iv[]); |
| |
| asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], |
| int rounds, int blocks, u8 iv[], u8 final[]); |
| |
| asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], |
| int rounds, int blocks, u8 iv[]); |
| asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], |
| int rounds, int blocks, u8 iv[]); |
| |
| /* borrowed from aes-neon-blk.ko */ |
| asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], |
| int rounds, int blocks); |
| asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], |
| int rounds, int blocks, u8 iv[]); |
| asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[], |
| u32 const rk1[], int rounds, int bytes, |
| u32 const rk2[], u8 iv[], int first); |
| asmlinkage void neon_aes_xts_decrypt(u8 out[], u8 const in[], |
| u32 const rk1[], int rounds, int bytes, |
| u32 const rk2[], u8 iv[], int first); |
| |
| struct aesbs_ctx { |
| u8 rk[13 * (8 * AES_BLOCK_SIZE) + 32]; |
| int rounds; |
| } __aligned(AES_BLOCK_SIZE); |
| |
| struct aesbs_cbc_ctx { |
| struct aesbs_ctx key; |
| u32 enc[AES_MAX_KEYLENGTH_U32]; |
| }; |
| |
| struct aesbs_ctr_ctx { |
| struct aesbs_ctx key; /* must be first member */ |
| struct crypto_aes_ctx fallback; |
| }; |
| |
| struct aesbs_xts_ctx { |
| struct aesbs_ctx key; |
| u32 twkey[AES_MAX_KEYLENGTH_U32]; |
| struct crypto_aes_ctx cts; |
| }; |
| |
| static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key, |
| unsigned int key_len) |
| { |
| struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); |
| struct crypto_aes_ctx rk; |
| int err; |
| |
| err = aes_expandkey(&rk, in_key, key_len); |
| if (err) |
| return err; |
| |
| ctx->rounds = 6 + key_len / 4; |
| |
| kernel_neon_begin(); |
| aesbs_convert_key(ctx->rk, rk.key_enc, ctx->rounds); |
| kernel_neon_end(); |
| |
| return 0; |
| } |
| |
| static int __ecb_crypt(struct skcipher_request *req, |
| void (*fn)(u8 out[], u8 const in[], u8 const rk[], |
| int rounds, int blocks)) |
| { |
| struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); |
| struct skcipher_walk walk; |
| int err; |
| |
| err = skcipher_walk_virt(&walk, req, false); |
| |
| while (walk.nbytes >= AES_BLOCK_SIZE) { |
| unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; |
| |
| if (walk.nbytes < walk.total) |
| blocks = round_down(blocks, |
| walk.stride / AES_BLOCK_SIZE); |
| |
| kernel_neon_begin(); |
| fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk, |
| ctx->rounds, blocks); |
| kernel_neon_end(); |
| err = skcipher_walk_done(&walk, |
| walk.nbytes - blocks * AES_BLOCK_SIZE); |
| } |
| |
| return err; |
| } |
| |
| static int ecb_encrypt(struct skcipher_request *req) |
| { |
| return __ecb_crypt(req, aesbs_ecb_encrypt); |
| } |
| |
| static int ecb_decrypt(struct skcipher_request *req) |
| { |
| return __ecb_crypt(req, aesbs_ecb_decrypt); |
| } |
| |
| static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, |
| unsigned int key_len) |
| { |
| struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); |
| struct crypto_aes_ctx rk; |
| int err; |
| |
| err = aes_expandkey(&rk, in_key, key_len); |
| if (err) |
| return err; |
| |
| ctx->key.rounds = 6 + key_len / 4; |
| |
| memcpy(ctx->enc, rk.key_enc, sizeof(ctx->enc)); |
| |
| kernel_neon_begin(); |
| aesbs_convert_key(ctx->key.rk, rk.key_enc, ctx->key.rounds); |
| kernel_neon_end(); |
| |
| return 0; |
| } |
| |
| static int cbc_encrypt(struct skcipher_request *req) |
| { |
| struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); |
| struct skcipher_walk walk; |
| int err; |
| |
| err = skcipher_walk_virt(&walk, req, false); |
| |
| while (walk.nbytes >= AES_BLOCK_SIZE) { |
| unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; |
| |
| /* fall back to the non-bitsliced NEON implementation */ |
| kernel_neon_begin(); |
| neon_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr, |
| ctx->enc, ctx->key.rounds, blocks, |
| walk.iv); |
| kernel_neon_end(); |
| err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); |
| } |
| return err; |
| } |
| |
| static int cbc_decrypt(struct skcipher_request *req) |
| { |
| struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); |
| struct skcipher_walk walk; |
| int err; |
| |
| err = skcipher_walk_virt(&walk, req, false); |
| |
| while (walk.nbytes >= AES_BLOCK_SIZE) { |
| unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; |
| |
| if (walk.nbytes < walk.total) |
| blocks = round_down(blocks, |
| walk.stride / AES_BLOCK_SIZE); |
| |
| kernel_neon_begin(); |
| aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr, |
| ctx->key.rk, ctx->key.rounds, blocks, |
| walk.iv); |
| kernel_neon_end(); |
| err = skcipher_walk_done(&walk, |
| walk.nbytes - blocks * AES_BLOCK_SIZE); |
| } |
| |
| return err; |
| } |
| |
| static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key, |
| unsigned int key_len) |
| { |
| struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); |
| int err; |
| |
| err = aes_expandkey(&ctx->fallback, in_key, key_len); |
| if (err) |
| return err; |
| |
| ctx->key.rounds = 6 + key_len / 4; |
| |
| kernel_neon_begin(); |
| aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds); |
| kernel_neon_end(); |
| |
| return 0; |
| } |
| |
| static int ctr_encrypt(struct skcipher_request *req) |
| { |
| struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); |
| struct skcipher_walk walk; |
| u8 buf[AES_BLOCK_SIZE]; |
| int err; |
| |
| err = skcipher_walk_virt(&walk, req, false); |
| |
| while (walk.nbytes > 0) { |
| unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; |
| u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL; |
| |
| if (walk.nbytes < walk.total) { |
| blocks = round_down(blocks, |
| walk.stride / AES_BLOCK_SIZE); |
| final = NULL; |
| } |
| |
| kernel_neon_begin(); |
| aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, |
| ctx->rk, ctx->rounds, blocks, walk.iv, final); |
| kernel_neon_end(); |
| |
| if (final) { |
| u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; |
| u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; |
| |
| crypto_xor_cpy(dst, src, final, |
| walk.total % AES_BLOCK_SIZE); |
| |
| err = skcipher_walk_done(&walk, 0); |
| break; |
| } |
| err = skcipher_walk_done(&walk, |
| walk.nbytes - blocks * AES_BLOCK_SIZE); |
| } |
| return err; |
| } |
| |
| static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key, |
| unsigned int key_len) |
| { |
| struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); |
| struct crypto_aes_ctx rk; |
| int err; |
| |
| err = xts_verify_key(tfm, in_key, key_len); |
| if (err) |
| return err; |
| |
| key_len /= 2; |
| err = aes_expandkey(&ctx->cts, in_key, key_len); |
| if (err) |
| return err; |
| |
| err = aes_expandkey(&rk, in_key + key_len, key_len); |
| if (err) |
| return err; |
| |
| memcpy(ctx->twkey, rk.key_enc, sizeof(ctx->twkey)); |
| |
| return aesbs_setkey(tfm, in_key, key_len); |
| } |
| |
| static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst) |
| { |
| struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); |
| unsigned long flags; |
| |
| /* |
| * Temporarily disable interrupts to avoid races where |
| * cachelines are evicted when the CPU is interrupted |
| * to do something else. |
| */ |
| local_irq_save(flags); |
| aes_encrypt(&ctx->fallback, dst, src); |
| local_irq_restore(flags); |
| } |
| |
| static int ctr_encrypt_sync(struct skcipher_request *req) |
| { |
| if (!crypto_simd_usable()) |
| return crypto_ctr_encrypt_walk(req, ctr_encrypt_one); |
| |
| return ctr_encrypt(req); |
| } |
| |
| static int __xts_crypt(struct skcipher_request *req, bool encrypt, |
| void (*fn)(u8 out[], u8 const in[], u8 const rk[], |
| int rounds, int blocks, u8 iv[])) |
| { |
| struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); |
| struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm); |
| int tail = req->cryptlen % (8 * AES_BLOCK_SIZE); |
| struct scatterlist sg_src[2], sg_dst[2]; |
| struct skcipher_request subreq; |
| struct scatterlist *src, *dst; |
| struct skcipher_walk walk; |
| int nbytes, err; |
| int first = 1; |
| u8 *out, *in; |
| |
| if (req->cryptlen < AES_BLOCK_SIZE) |
| return -EINVAL; |
| |
| /* ensure that the cts tail is covered by a single step */ |
| if (unlikely(tail > 0 && tail < AES_BLOCK_SIZE)) { |
| int xts_blocks = DIV_ROUND_UP(req->cryptlen, |
| AES_BLOCK_SIZE) - 2; |
| |
| skcipher_request_set_tfm(&subreq, tfm); |
| skcipher_request_set_callback(&subreq, |
| skcipher_request_flags(req), |
| NULL, NULL); |
| skcipher_request_set_crypt(&subreq, req->src, req->dst, |
| xts_blocks * AES_BLOCK_SIZE, |
| req->iv); |
| req = &subreq; |
| } else { |
| tail = 0; |
| } |
| |
| err = skcipher_walk_virt(&walk, req, false); |
| if (err) |
| return err; |
| |
| while (walk.nbytes >= AES_BLOCK_SIZE) { |
| unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; |
| |
| if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE) |
| blocks = round_down(blocks, |
| walk.stride / AES_BLOCK_SIZE); |
| |
| out = walk.dst.virt.addr; |
| in = walk.src.virt.addr; |
| nbytes = walk.nbytes; |
| |
| kernel_neon_begin(); |
| if (likely(blocks > 6)) { /* plain NEON is faster otherwise */ |
| if (first) |
| neon_aes_ecb_encrypt(walk.iv, walk.iv, |
| ctx->twkey, |
| ctx->key.rounds, 1); |
| first = 0; |
| |
| fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, |
| walk.iv); |
| |
| out += blocks * AES_BLOCK_SIZE; |
| in += blocks * AES_BLOCK_SIZE; |
| nbytes -= blocks * AES_BLOCK_SIZE; |
| } |
| |
| if (walk.nbytes == walk.total && nbytes > 0) |
| goto xts_tail; |
| |
| kernel_neon_end(); |
| err = skcipher_walk_done(&walk, nbytes); |
| } |
| |
| if (err || likely(!tail)) |
| return err; |
| |
| /* handle ciphertext stealing */ |
| dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen); |
| if (req->dst != req->src) |
| dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen); |
| |
| skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail, |
| req->iv); |
| |
| err = skcipher_walk_virt(&walk, req, false); |
| if (err) |
| return err; |
| |
| out = walk.dst.virt.addr; |
| in = walk.src.virt.addr; |
| nbytes = walk.nbytes; |
| |
| kernel_neon_begin(); |
| xts_tail: |
| if (encrypt) |
| neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds, |
| nbytes, ctx->twkey, walk.iv, first ?: 2); |
| else |
| neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds, |
| nbytes, ctx->twkey, walk.iv, first ?: 2); |
| kernel_neon_end(); |
| |
| return skcipher_walk_done(&walk, 0); |
| } |
| |
| static int xts_encrypt(struct skcipher_request *req) |
| { |
| return __xts_crypt(req, true, aesbs_xts_encrypt); |
| } |
| |
| static int xts_decrypt(struct skcipher_request *req) |
| { |
| return __xts_crypt(req, false, aesbs_xts_decrypt); |
| } |
| |
| static struct skcipher_alg aes_algs[] = { { |
| .base.cra_name = "__ecb(aes)", |
| .base.cra_driver_name = "__ecb-aes-neonbs", |
| .base.cra_priority = 250, |
| .base.cra_blocksize = AES_BLOCK_SIZE, |
| .base.cra_ctxsize = sizeof(struct aesbs_ctx), |
| .base.cra_module = THIS_MODULE, |
| .base.cra_flags = CRYPTO_ALG_INTERNAL, |
| |
| .min_keysize = AES_MIN_KEY_SIZE, |
| .max_keysize = AES_MAX_KEY_SIZE, |
| .walksize = 8 * AES_BLOCK_SIZE, |
| .setkey = aesbs_setkey, |
| .encrypt = ecb_encrypt, |
| .decrypt = ecb_decrypt, |
| }, { |
| .base.cra_name = "__cbc(aes)", |
| .base.cra_driver_name = "__cbc-aes-neonbs", |
| .base.cra_priority = 250, |
| .base.cra_blocksize = AES_BLOCK_SIZE, |
| .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), |
| .base.cra_module = THIS_MODULE, |
| .base.cra_flags = CRYPTO_ALG_INTERNAL, |
| |
| .min_keysize = AES_MIN_KEY_SIZE, |
| .max_keysize = AES_MAX_KEY_SIZE, |
| .walksize = 8 * AES_BLOCK_SIZE, |
| .ivsize = AES_BLOCK_SIZE, |
| .setkey = aesbs_cbc_setkey, |
| .encrypt = cbc_encrypt, |
| .decrypt = cbc_decrypt, |
| }, { |
| .base.cra_name = "__ctr(aes)", |
| .base.cra_driver_name = "__ctr-aes-neonbs", |
| .base.cra_priority = 250, |
| .base.cra_blocksize = 1, |
| .base.cra_ctxsize = sizeof(struct aesbs_ctx), |
| .base.cra_module = THIS_MODULE, |
| .base.cra_flags = CRYPTO_ALG_INTERNAL, |
| |
| .min_keysize = AES_MIN_KEY_SIZE, |
| .max_keysize = AES_MAX_KEY_SIZE, |
| .chunksize = AES_BLOCK_SIZE, |
| .walksize = 8 * AES_BLOCK_SIZE, |
| .ivsize = AES_BLOCK_SIZE, |
| .setkey = aesbs_setkey, |
| .encrypt = ctr_encrypt, |
| .decrypt = ctr_encrypt, |
| }, { |
| .base.cra_name = "ctr(aes)", |
| .base.cra_driver_name = "ctr-aes-neonbs", |
| .base.cra_priority = 250 - 1, |
| .base.cra_blocksize = 1, |
| .base.cra_ctxsize = sizeof(struct aesbs_ctr_ctx), |
| .base.cra_module = THIS_MODULE, |
| |
| .min_keysize = AES_MIN_KEY_SIZE, |
| .max_keysize = AES_MAX_KEY_SIZE, |
| .chunksize = AES_BLOCK_SIZE, |
| .walksize = 8 * AES_BLOCK_SIZE, |
| .ivsize = AES_BLOCK_SIZE, |
| .setkey = aesbs_ctr_setkey_sync, |
| .encrypt = ctr_encrypt_sync, |
| .decrypt = ctr_encrypt_sync, |
| }, { |
| .base.cra_name = "__xts(aes)", |
| .base.cra_driver_name = "__xts-aes-neonbs", |
| .base.cra_priority = 250, |
| .base.cra_blocksize = AES_BLOCK_SIZE, |
| .base.cra_ctxsize = sizeof(struct aesbs_xts_ctx), |
| .base.cra_module = THIS_MODULE, |
| .base.cra_flags = CRYPTO_ALG_INTERNAL, |
| |
| .min_keysize = 2 * AES_MIN_KEY_SIZE, |
| .max_keysize = 2 * AES_MAX_KEY_SIZE, |
| .walksize = 8 * AES_BLOCK_SIZE, |
| .ivsize = AES_BLOCK_SIZE, |
| .setkey = aesbs_xts_setkey, |
| .encrypt = xts_encrypt, |
| .decrypt = xts_decrypt, |
| } }; |
| |
| static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)]; |
| |
| static void aes_exit(void) |
| { |
| int i; |
| |
| for (i = 0; i < ARRAY_SIZE(aes_simd_algs); i++) |
| if (aes_simd_algs[i]) |
| simd_skcipher_free(aes_simd_algs[i]); |
| |
| crypto_unregister_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); |
| } |
| |
| static int __init aes_init(void) |
| { |
| struct simd_skcipher_alg *simd; |
| const char *basename; |
| const char *algname; |
| const char *drvname; |
| int err; |
| int i; |
| |
| if (!cpu_have_named_feature(ASIMD)) |
| return -ENODEV; |
| |
| err = crypto_register_skciphers(aes_algs, ARRAY_SIZE(aes_algs)); |
| if (err) |
| return err; |
| |
| for (i = 0; i < ARRAY_SIZE(aes_algs); i++) { |
| if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL)) |
| continue; |
| |
| algname = aes_algs[i].base.cra_name + 2; |
| drvname = aes_algs[i].base.cra_driver_name + 2; |
| basename = aes_algs[i].base.cra_driver_name; |
| simd = simd_skcipher_create_compat(algname, drvname, basename); |
| err = PTR_ERR(simd); |
| if (IS_ERR(simd)) |
| goto unregister_simds; |
| |
| aes_simd_algs[i] = simd; |
| } |
| return 0; |
| |
| unregister_simds: |
| aes_exit(); |
| return err; |
| } |
| |
| module_init(aes_init); |
| module_exit(aes_exit); |