| /* SPDX-License-Identifier: GPL-2.0-or-later */ |
| # |
| # Accelerated AES-GCM stitched implementation for ppc64le. |
| # |
| # Copyright 2022- IBM Inc. All rights reserved |
| # |
| #=================================================================================== |
| # Written by Danny Tsen <dtsen@linux.ibm.com> |
| # |
| # GHASH is based on the Karatsuba multiplication method. |
| # |
| # Xi xor X1 |
| # |
| # X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H = |
| # (X1.h * H4.h + xX.l * H4.l + X1 * H4) + |
| # (X2.h * H3.h + X2.l * H3.l + X2 * H3) + |
| # (X3.h * H2.h + X3.l * H2.l + X3 * H2) + |
| # (X4.h * H.h + X4.l * H.l + X4 * H) |
| # |
| # Xi = v0 |
| # H Poly = v2 |
| # Hash keys = v3 - v14 |
| # ( H.l, H, H.h) |
| # ( H^2.l, H^2, H^2.h) |
| # ( H^3.l, H^3, H^3.h) |
| # ( H^4.l, H^4, H^4.h) |
| # |
| # v30 is IV |
| # v31 - counter 1 |
| # |
| # AES used, |
| # vs0 - vs14 for round keys |
| # v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted) |
| # |
| # This implementation uses stitched AES-GCM approach to improve overall performance. |
| # AES is implemented with 8x blocks and GHASH is using 2 4x blocks. |
| # |
| # =================================================================================== |
| # |
| |
| #include <asm/ppc_asm.h> |
| #include <linux/linkage.h> |
| |
| .machine "any" |
| .text |
| |
| # 4x loops |
| # v15 - v18 - input states |
| # vs1 - vs9 - round keys |
| # |
| .macro Loop_aes_middle4x |
| xxlor 19+32, 1, 1 |
| xxlor 20+32, 2, 2 |
| xxlor 21+32, 3, 3 |
| xxlor 22+32, 4, 4 |
| |
| vcipher 15, 15, 19 |
| vcipher 16, 16, 19 |
| vcipher 17, 17, 19 |
| vcipher 18, 18, 19 |
| |
| vcipher 15, 15, 20 |
| vcipher 16, 16, 20 |
| vcipher 17, 17, 20 |
| vcipher 18, 18, 20 |
| |
| vcipher 15, 15, 21 |
| vcipher 16, 16, 21 |
| vcipher 17, 17, 21 |
| vcipher 18, 18, 21 |
| |
| vcipher 15, 15, 22 |
| vcipher 16, 16, 22 |
| vcipher 17, 17, 22 |
| vcipher 18, 18, 22 |
| |
| xxlor 19+32, 5, 5 |
| xxlor 20+32, 6, 6 |
| xxlor 21+32, 7, 7 |
| xxlor 22+32, 8, 8 |
| |
| vcipher 15, 15, 19 |
| vcipher 16, 16, 19 |
| vcipher 17, 17, 19 |
| vcipher 18, 18, 19 |
| |
| vcipher 15, 15, 20 |
| vcipher 16, 16, 20 |
| vcipher 17, 17, 20 |
| vcipher 18, 18, 20 |
| |
| vcipher 15, 15, 21 |
| vcipher 16, 16, 21 |
| vcipher 17, 17, 21 |
| vcipher 18, 18, 21 |
| |
| vcipher 15, 15, 22 |
| vcipher 16, 16, 22 |
| vcipher 17, 17, 22 |
| vcipher 18, 18, 22 |
| |
| xxlor 23+32, 9, 9 |
| vcipher 15, 15, 23 |
| vcipher 16, 16, 23 |
| vcipher 17, 17, 23 |
| vcipher 18, 18, 23 |
| .endm |
| |
| # 8x loops |
| # v15 - v22 - input states |
| # vs1 - vs9 - round keys |
| # |
| .macro Loop_aes_middle8x |
| xxlor 23+32, 1, 1 |
| xxlor 24+32, 2, 2 |
| xxlor 25+32, 3, 3 |
| xxlor 26+32, 4, 4 |
| |
| vcipher 15, 15, 23 |
| vcipher 16, 16, 23 |
| vcipher 17, 17, 23 |
| vcipher 18, 18, 23 |
| vcipher 19, 19, 23 |
| vcipher 20, 20, 23 |
| vcipher 21, 21, 23 |
| vcipher 22, 22, 23 |
| |
| vcipher 15, 15, 24 |
| vcipher 16, 16, 24 |
| vcipher 17, 17, 24 |
| vcipher 18, 18, 24 |
| vcipher 19, 19, 24 |
| vcipher 20, 20, 24 |
| vcipher 21, 21, 24 |
| vcipher 22, 22, 24 |
| |
| vcipher 15, 15, 25 |
| vcipher 16, 16, 25 |
| vcipher 17, 17, 25 |
| vcipher 18, 18, 25 |
| vcipher 19, 19, 25 |
| vcipher 20, 20, 25 |
| vcipher 21, 21, 25 |
| vcipher 22, 22, 25 |
| |
| vcipher 15, 15, 26 |
| vcipher 16, 16, 26 |
| vcipher 17, 17, 26 |
| vcipher 18, 18, 26 |
| vcipher 19, 19, 26 |
| vcipher 20, 20, 26 |
| vcipher 21, 21, 26 |
| vcipher 22, 22, 26 |
| |
| xxlor 23+32, 5, 5 |
| xxlor 24+32, 6, 6 |
| xxlor 25+32, 7, 7 |
| xxlor 26+32, 8, 8 |
| |
| vcipher 15, 15, 23 |
| vcipher 16, 16, 23 |
| vcipher 17, 17, 23 |
| vcipher 18, 18, 23 |
| vcipher 19, 19, 23 |
| vcipher 20, 20, 23 |
| vcipher 21, 21, 23 |
| vcipher 22, 22, 23 |
| |
| vcipher 15, 15, 24 |
| vcipher 16, 16, 24 |
| vcipher 17, 17, 24 |
| vcipher 18, 18, 24 |
| vcipher 19, 19, 24 |
| vcipher 20, 20, 24 |
| vcipher 21, 21, 24 |
| vcipher 22, 22, 24 |
| |
| vcipher 15, 15, 25 |
| vcipher 16, 16, 25 |
| vcipher 17, 17, 25 |
| vcipher 18, 18, 25 |
| vcipher 19, 19, 25 |
| vcipher 20, 20, 25 |
| vcipher 21, 21, 25 |
| vcipher 22, 22, 25 |
| |
| vcipher 15, 15, 26 |
| vcipher 16, 16, 26 |
| vcipher 17, 17, 26 |
| vcipher 18, 18, 26 |
| vcipher 19, 19, 26 |
| vcipher 20, 20, 26 |
| vcipher 21, 21, 26 |
| vcipher 22, 22, 26 |
| |
| xxlor 23+32, 9, 9 |
| vcipher 15, 15, 23 |
| vcipher 16, 16, 23 |
| vcipher 17, 17, 23 |
| vcipher 18, 18, 23 |
| vcipher 19, 19, 23 |
| vcipher 20, 20, 23 |
| vcipher 21, 21, 23 |
| vcipher 22, 22, 23 |
| .endm |
| |
| .macro Loop_aes_middle_1x |
| xxlor 19+32, 1, 1 |
| xxlor 20+32, 2, 2 |
| xxlor 21+32, 3, 3 |
| xxlor 22+32, 4, 4 |
| |
| vcipher 15, 15, 19 |
| vcipher 15, 15, 20 |
| vcipher 15, 15, 21 |
| vcipher 15, 15, 22 |
| |
| xxlor 19+32, 5, 5 |
| xxlor 20+32, 6, 6 |
| xxlor 21+32, 7, 7 |
| xxlor 22+32, 8, 8 |
| |
| vcipher 15, 15, 19 |
| vcipher 15, 15, 20 |
| vcipher 15, 15, 21 |
| vcipher 15, 15, 22 |
| |
| xxlor 19+32, 9, 9 |
| vcipher 15, 15, 19 |
| .endm |
| |
| # |
| # Compute 4x hash values based on Karatsuba method. |
| # |
| .macro ppc_aes_gcm_ghash |
| vxor 15, 15, 0 |
| |
| vpmsumd 23, 12, 15 # H4.L * X.L |
| vpmsumd 24, 9, 16 |
| vpmsumd 25, 6, 17 |
| vpmsumd 26, 3, 18 |
| |
| vxor 23, 23, 24 |
| vxor 23, 23, 25 |
| vxor 23, 23, 26 # L |
| |
| vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L |
| vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L |
| vpmsumd 26, 7, 17 |
| vpmsumd 27, 4, 18 |
| |
| vxor 24, 24, 25 |
| vxor 24, 24, 26 |
| vxor 24, 24, 27 # M |
| |
| # sum hash and reduction with H Poly |
| vpmsumd 28, 23, 2 # reduction |
| |
| vxor 29, 29, 29 |
| vsldoi 26, 24, 29, 8 # mL |
| vsldoi 29, 29, 24, 8 # mH |
| vxor 23, 23, 26 # mL + L |
| |
| vsldoi 23, 23, 23, 8 # swap |
| vxor 23, 23, 28 |
| |
| vpmsumd 24, 14, 15 # H4.H * X.H |
| vpmsumd 25, 11, 16 |
| vpmsumd 26, 8, 17 |
| vpmsumd 27, 5, 18 |
| |
| vxor 24, 24, 25 |
| vxor 24, 24, 26 |
| vxor 24, 24, 27 |
| |
| vxor 24, 24, 29 |
| |
| # sum hash and reduction with H Poly |
| vsldoi 27, 23, 23, 8 # swap |
| vpmsumd 23, 23, 2 |
| vxor 27, 27, 24 |
| vxor 23, 23, 27 |
| |
| xxlor 32, 23+32, 23+32 # update hash |
| |
| .endm |
| |
| # |
| # Combine two 4x ghash |
| # v15 - v22 - input blocks |
| # |
| .macro ppc_aes_gcm_ghash2_4x |
| # first 4x hash |
| vxor 15, 15, 0 # Xi + X |
| |
| vpmsumd 23, 12, 15 # H4.L * X.L |
| vpmsumd 24, 9, 16 |
| vpmsumd 25, 6, 17 |
| vpmsumd 26, 3, 18 |
| |
| vxor 23, 23, 24 |
| vxor 23, 23, 25 |
| vxor 23, 23, 26 # L |
| |
| vpmsumd 24, 13, 15 # H4.L * X.H + H4.H * X.L |
| vpmsumd 25, 10, 16 # H3.L * X1.H + H3.H * X1.L |
| vpmsumd 26, 7, 17 |
| vpmsumd 27, 4, 18 |
| |
| vxor 24, 24, 25 |
| vxor 24, 24, 26 |
| |
| # sum hash and reduction with H Poly |
| vpmsumd 28, 23, 2 # reduction |
| |
| vxor 29, 29, 29 |
| |
| vxor 24, 24, 27 # M |
| vsldoi 26, 24, 29, 8 # mL |
| vsldoi 29, 29, 24, 8 # mH |
| vxor 23, 23, 26 # mL + L |
| |
| vsldoi 23, 23, 23, 8 # swap |
| vxor 23, 23, 28 |
| |
| vpmsumd 24, 14, 15 # H4.H * X.H |
| vpmsumd 25, 11, 16 |
| vpmsumd 26, 8, 17 |
| vpmsumd 27, 5, 18 |
| |
| vxor 24, 24, 25 |
| vxor 24, 24, 26 |
| vxor 24, 24, 27 # H |
| |
| vxor 24, 24, 29 # H + mH |
| |
| # sum hash and reduction with H Poly |
| vsldoi 27, 23, 23, 8 # swap |
| vpmsumd 23, 23, 2 |
| vxor 27, 27, 24 |
| vxor 27, 23, 27 # 1st Xi |
| |
| # 2nd 4x hash |
| vpmsumd 24, 9, 20 |
| vpmsumd 25, 6, 21 |
| vpmsumd 26, 3, 22 |
| vxor 19, 19, 27 # Xi + X |
| vpmsumd 23, 12, 19 # H4.L * X.L |
| |
| vxor 23, 23, 24 |
| vxor 23, 23, 25 |
| vxor 23, 23, 26 # L |
| |
| vpmsumd 24, 13, 19 # H4.L * X.H + H4.H * X.L |
| vpmsumd 25, 10, 20 # H3.L * X1.H + H3.H * X1.L |
| vpmsumd 26, 7, 21 |
| vpmsumd 27, 4, 22 |
| |
| vxor 24, 24, 25 |
| vxor 24, 24, 26 |
| |
| # sum hash and reduction with H Poly |
| vpmsumd 28, 23, 2 # reduction |
| |
| vxor 29, 29, 29 |
| |
| vxor 24, 24, 27 # M |
| vsldoi 26, 24, 29, 8 # mL |
| vsldoi 29, 29, 24, 8 # mH |
| vxor 23, 23, 26 # mL + L |
| |
| vsldoi 23, 23, 23, 8 # swap |
| vxor 23, 23, 28 |
| |
| vpmsumd 24, 14, 19 # H4.H * X.H |
| vpmsumd 25, 11, 20 |
| vpmsumd 26, 8, 21 |
| vpmsumd 27, 5, 22 |
| |
| vxor 24, 24, 25 |
| vxor 24, 24, 26 |
| vxor 24, 24, 27 # H |
| |
| vxor 24, 24, 29 # H + mH |
| |
| # sum hash and reduction with H Poly |
| vsldoi 27, 23, 23, 8 # swap |
| vpmsumd 23, 23, 2 |
| vxor 27, 27, 24 |
| vxor 23, 23, 27 |
| |
| xxlor 32, 23+32, 23+32 # update hash |
| |
| .endm |
| |
| # |
| # Compute update single hash |
| # |
| .macro ppc_update_hash_1x |
| vxor 28, 28, 0 |
| |
| vxor 19, 19, 19 |
| |
| vpmsumd 22, 3, 28 # L |
| vpmsumd 23, 4, 28 # M |
| vpmsumd 24, 5, 28 # H |
| |
| vpmsumd 27, 22, 2 # reduction |
| |
| vsldoi 25, 23, 19, 8 # mL |
| vsldoi 26, 19, 23, 8 # mH |
| vxor 22, 22, 25 # LL + LL |
| vxor 24, 24, 26 # HH + HH |
| |
| vsldoi 22, 22, 22, 8 # swap |
| vxor 22, 22, 27 |
| |
| vsldoi 20, 22, 22, 8 # swap |
| vpmsumd 22, 22, 2 # reduction |
| vxor 20, 20, 24 |
| vxor 22, 22, 20 |
| |
| vmr 0, 22 # update hash |
| |
| .endm |
| |
| .macro SAVE_REGS |
| stdu 1,-640(1) |
| mflr 0 |
| |
| std 14,112(1) |
| std 15,120(1) |
| std 16,128(1) |
| std 17,136(1) |
| std 18,144(1) |
| std 19,152(1) |
| std 20,160(1) |
| std 21,168(1) |
| li 9, 256 |
| stvx 20, 9, 1 |
| addi 9, 9, 16 |
| stvx 21, 9, 1 |
| addi 9, 9, 16 |
| stvx 22, 9, 1 |
| addi 9, 9, 16 |
| stvx 23, 9, 1 |
| addi 9, 9, 16 |
| stvx 24, 9, 1 |
| addi 9, 9, 16 |
| stvx 25, 9, 1 |
| addi 9, 9, 16 |
| stvx 26, 9, 1 |
| addi 9, 9, 16 |
| stvx 27, 9, 1 |
| addi 9, 9, 16 |
| stvx 28, 9, 1 |
| addi 9, 9, 16 |
| stvx 29, 9, 1 |
| addi 9, 9, 16 |
| stvx 30, 9, 1 |
| addi 9, 9, 16 |
| stvx 31, 9, 1 |
| stxv 14, 464(1) |
| stxv 15, 480(1) |
| stxv 16, 496(1) |
| stxv 17, 512(1) |
| stxv 18, 528(1) |
| stxv 19, 544(1) |
| stxv 20, 560(1) |
| stxv 21, 576(1) |
| stxv 22, 592(1) |
| std 0, 656(1) |
| .endm |
| |
| .macro RESTORE_REGS |
| lxv 14, 464(1) |
| lxv 15, 480(1) |
| lxv 16, 496(1) |
| lxv 17, 512(1) |
| lxv 18, 528(1) |
| lxv 19, 544(1) |
| lxv 20, 560(1) |
| lxv 21, 576(1) |
| lxv 22, 592(1) |
| li 9, 256 |
| lvx 20, 9, 1 |
| addi 9, 9, 16 |
| lvx 21, 9, 1 |
| addi 9, 9, 16 |
| lvx 22, 9, 1 |
| addi 9, 9, 16 |
| lvx 23, 9, 1 |
| addi 9, 9, 16 |
| lvx 24, 9, 1 |
| addi 9, 9, 16 |
| lvx 25, 9, 1 |
| addi 9, 9, 16 |
| lvx 26, 9, 1 |
| addi 9, 9, 16 |
| lvx 27, 9, 1 |
| addi 9, 9, 16 |
| lvx 28, 9, 1 |
| addi 9, 9, 16 |
| lvx 29, 9, 1 |
| addi 9, 9, 16 |
| lvx 30, 9, 1 |
| addi 9, 9, 16 |
| lvx 31, 9, 1 |
| |
| ld 0, 656(1) |
| ld 14,112(1) |
| ld 15,120(1) |
| ld 16,128(1) |
| ld 17,136(1) |
| ld 18,144(1) |
| ld 19,152(1) |
| ld 20,160(1) |
| ld 21,168(1) |
| |
| mtlr 0 |
| addi 1, 1, 640 |
| .endm |
| |
| .macro LOAD_HASH_TABLE |
| # Load Xi |
| lxvb16x 32, 0, 8 # load Xi |
| |
| # load Hash - h^4, h^3, h^2, h |
| li 10, 32 |
| lxvd2x 2+32, 10, 8 # H Poli |
| li 10, 48 |
| lxvd2x 3+32, 10, 8 # Hl |
| li 10, 64 |
| lxvd2x 4+32, 10, 8 # H |
| li 10, 80 |
| lxvd2x 5+32, 10, 8 # Hh |
| |
| li 10, 96 |
| lxvd2x 6+32, 10, 8 # H^2l |
| li 10, 112 |
| lxvd2x 7+32, 10, 8 # H^2 |
| li 10, 128 |
| lxvd2x 8+32, 10, 8 # H^2h |
| |
| li 10, 144 |
| lxvd2x 9+32, 10, 8 # H^3l |
| li 10, 160 |
| lxvd2x 10+32, 10, 8 # H^3 |
| li 10, 176 |
| lxvd2x 11+32, 10, 8 # H^3h |
| |
| li 10, 192 |
| lxvd2x 12+32, 10, 8 # H^4l |
| li 10, 208 |
| lxvd2x 13+32, 10, 8 # H^4 |
| li 10, 224 |
| lxvd2x 14+32, 10, 8 # H^4h |
| .endm |
| |
| # |
| # aes_p10_gcm_encrypt (const void *inp, void *out, size_t len, |
| # const char *rk, unsigned char iv[16], void *Xip); |
| # |
| # r3 - inp |
| # r4 - out |
| # r5 - len |
| # r6 - AES round keys |
| # r7 - iv and other data |
| # r8 - Xi, HPoli, hash keys |
| # |
| # rounds is at offset 240 in rk |
| # Xi is at 0 in gcm_table (Xip). |
| # |
| _GLOBAL(aes_p10_gcm_encrypt) |
| .align 5 |
| |
| SAVE_REGS |
| |
| LOAD_HASH_TABLE |
| |
| # initialize ICB: GHASH( IV ), IV - r7 |
| lxvb16x 30+32, 0, 7 # load IV - v30 |
| |
| mr 12, 5 # length |
| li 11, 0 # block index |
| |
| # counter 1 |
| vxor 31, 31, 31 |
| vspltisb 22, 1 |
| vsldoi 31, 31, 22,1 # counter 1 |
| |
| # load round key to VSR |
| lxv 0, 0(6) |
| lxv 1, 0x10(6) |
| lxv 2, 0x20(6) |
| lxv 3, 0x30(6) |
| lxv 4, 0x40(6) |
| lxv 5, 0x50(6) |
| lxv 6, 0x60(6) |
| lxv 7, 0x70(6) |
| lxv 8, 0x80(6) |
| lxv 9, 0x90(6) |
| lxv 10, 0xa0(6) |
| |
| # load rounds - 10 (128), 12 (192), 14 (256) |
| lwz 9,240(6) |
| |
| # |
| # vxor state, state, w # addroundkey |
| xxlor 32+29, 0, 0 |
| vxor 15, 30, 29 # IV + round key - add round key 0 |
| |
| cmpdi 9, 10 |
| beq Loop_aes_gcm_8x |
| |
| # load 2 more round keys (v11, v12) |
| lxv 11, 0xb0(6) |
| lxv 12, 0xc0(6) |
| |
| cmpdi 9, 12 |
| beq Loop_aes_gcm_8x |
| |
| # load 2 more round keys (v11, v12, v13, v14) |
| lxv 13, 0xd0(6) |
| lxv 14, 0xe0(6) |
| cmpdi 9, 14 |
| beq Loop_aes_gcm_8x |
| |
| b aes_gcm_out |
| |
| .align 5 |
| Loop_aes_gcm_8x: |
| mr 14, 3 |
| mr 9, 4 |
| |
| # |
| # check partial block |
| # |
| Continue_partial_check: |
| ld 15, 56(7) |
| cmpdi 15, 0 |
| beq Continue |
| bgt Final_block |
| cmpdi 15, 16 |
| blt Final_block |
| |
| Continue: |
| # n blcoks |
| li 10, 128 |
| divdu 10, 12, 10 # n 128 bytes-blocks |
| cmpdi 10, 0 |
| beq Loop_last_block |
| |
| vaddudm 30, 30, 31 # IV + counter |
| vxor 16, 30, 29 |
| vaddudm 30, 30, 31 |
| vxor 17, 30, 29 |
| vaddudm 30, 30, 31 |
| vxor 18, 30, 29 |
| vaddudm 30, 30, 31 |
| vxor 19, 30, 29 |
| vaddudm 30, 30, 31 |
| vxor 20, 30, 29 |
| vaddudm 30, 30, 31 |
| vxor 21, 30, 29 |
| vaddudm 30, 30, 31 |
| vxor 22, 30, 29 |
| |
| mtctr 10 |
| |
| li 15, 16 |
| li 16, 32 |
| li 17, 48 |
| li 18, 64 |
| li 19, 80 |
| li 20, 96 |
| li 21, 112 |
| |
| lwz 10, 240(6) |
| |
| Loop_8x_block: |
| |
| lxvb16x 15, 0, 14 # load block |
| lxvb16x 16, 15, 14 # load block |
| lxvb16x 17, 16, 14 # load block |
| lxvb16x 18, 17, 14 # load block |
| lxvb16x 19, 18, 14 # load block |
| lxvb16x 20, 19, 14 # load block |
| lxvb16x 21, 20, 14 # load block |
| lxvb16x 22, 21, 14 # load block |
| addi 14, 14, 128 |
| |
| Loop_aes_middle8x |
| |
| xxlor 23+32, 10, 10 |
| |
| cmpdi 10, 10 |
| beq Do_next_ghash |
| |
| # 192 bits |
| xxlor 24+32, 11, 11 |
| |
| vcipher 15, 15, 23 |
| vcipher 16, 16, 23 |
| vcipher 17, 17, 23 |
| vcipher 18, 18, 23 |
| vcipher 19, 19, 23 |
| vcipher 20, 20, 23 |
| vcipher 21, 21, 23 |
| vcipher 22, 22, 23 |
| |
| vcipher 15, 15, 24 |
| vcipher 16, 16, 24 |
| vcipher 17, 17, 24 |
| vcipher 18, 18, 24 |
| vcipher 19, 19, 24 |
| vcipher 20, 20, 24 |
| vcipher 21, 21, 24 |
| vcipher 22, 22, 24 |
| |
| xxlor 23+32, 12, 12 |
| |
| cmpdi 10, 12 |
| beq Do_next_ghash |
| |
| # 256 bits |
| xxlor 24+32, 13, 13 |
| |
| vcipher 15, 15, 23 |
| vcipher 16, 16, 23 |
| vcipher 17, 17, 23 |
| vcipher 18, 18, 23 |
| vcipher 19, 19, 23 |
| vcipher 20, 20, 23 |
| vcipher 21, 21, 23 |
| vcipher 22, 22, 23 |
| |
| vcipher 15, 15, 24 |
| vcipher 16, 16, 24 |
| vcipher 17, 17, 24 |
| vcipher 18, 18, 24 |
| vcipher 19, 19, 24 |
| vcipher 20, 20, 24 |
| vcipher 21, 21, 24 |
| vcipher 22, 22, 24 |
| |
| xxlor 23+32, 14, 14 |
| |
| cmpdi 10, 14 |
| beq Do_next_ghash |
| b aes_gcm_out |
| |
| Do_next_ghash: |
| |
| # |
| # last round |
| vcipherlast 15, 15, 23 |
| vcipherlast 16, 16, 23 |
| |
| xxlxor 47, 47, 15 |
| stxvb16x 47, 0, 9 # store output |
| xxlxor 48, 48, 16 |
| stxvb16x 48, 15, 9 # store output |
| |
| vcipherlast 17, 17, 23 |
| vcipherlast 18, 18, 23 |
| |
| xxlxor 49, 49, 17 |
| stxvb16x 49, 16, 9 # store output |
| xxlxor 50, 50, 18 |
| stxvb16x 50, 17, 9 # store output |
| |
| vcipherlast 19, 19, 23 |
| vcipherlast 20, 20, 23 |
| |
| xxlxor 51, 51, 19 |
| stxvb16x 51, 18, 9 # store output |
| xxlxor 52, 52, 20 |
| stxvb16x 52, 19, 9 # store output |
| |
| vcipherlast 21, 21, 23 |
| vcipherlast 22, 22, 23 |
| |
| xxlxor 53, 53, 21 |
| stxvb16x 53, 20, 9 # store output |
| xxlxor 54, 54, 22 |
| stxvb16x 54, 21, 9 # store output |
| |
| addi 9, 9, 128 |
| |
| # ghash here |
| ppc_aes_gcm_ghash2_4x |
| |
| xxlor 27+32, 0, 0 |
| vaddudm 30, 30, 31 # IV + counter |
| vmr 29, 30 |
| vxor 15, 30, 27 # add round key |
| vaddudm 30, 30, 31 |
| vxor 16, 30, 27 |
| vaddudm 30, 30, 31 |
| vxor 17, 30, 27 |
| vaddudm 30, 30, 31 |
| vxor 18, 30, 27 |
| vaddudm 30, 30, 31 |
| vxor 19, 30, 27 |
| vaddudm 30, 30, 31 |
| vxor 20, 30, 27 |
| vaddudm 30, 30, 31 |
| vxor 21, 30, 27 |
| vaddudm 30, 30, 31 |
| vxor 22, 30, 27 |
| |
| addi 12, 12, -128 |
| addi 11, 11, 128 |
| |
| bdnz Loop_8x_block |
| |
| vmr 30, 29 |
| stxvb16x 30+32, 0, 7 # update IV |
| |
| Loop_last_block: |
| cmpdi 12, 0 |
| beq aes_gcm_out |
| |
| # loop last few blocks |
| li 10, 16 |
| divdu 10, 12, 10 |
| |
| mtctr 10 |
| |
| lwz 10, 240(6) |
| |
| cmpdi 12, 16 |
| blt Final_block |
| |
| Next_rem_block: |
| lxvb16x 15, 0, 14 # load block |
| |
| Loop_aes_middle_1x |
| |
| xxlor 23+32, 10, 10 |
| |
| cmpdi 10, 10 |
| beq Do_next_1x |
| |
| # 192 bits |
| xxlor 24+32, 11, 11 |
| |
| vcipher 15, 15, 23 |
| vcipher 15, 15, 24 |
| |
| xxlor 23+32, 12, 12 |
| |
| cmpdi 10, 12 |
| beq Do_next_1x |
| |
| # 256 bits |
| xxlor 24+32, 13, 13 |
| |
| vcipher 15, 15, 23 |
| vcipher 15, 15, 24 |
| |
| xxlor 23+32, 14, 14 |
| |
| cmpdi 10, 14 |
| beq Do_next_1x |
| |
| Do_next_1x: |
| vcipherlast 15, 15, 23 |
| |
| xxlxor 47, 47, 15 |
| stxvb16x 47, 0, 9 # store output |
| addi 14, 14, 16 |
| addi 9, 9, 16 |
| |
| vmr 28, 15 |
| ppc_update_hash_1x |
| |
| addi 12, 12, -16 |
| addi 11, 11, 16 |
| xxlor 19+32, 0, 0 |
| vaddudm 30, 30, 31 # IV + counter |
| vxor 15, 30, 19 # add round key |
| |
| bdnz Next_rem_block |
| |
| li 15, 0 |
| std 15, 56(7) # clear partial? |
| stxvb16x 30+32, 0, 7 # update IV |
| cmpdi 12, 0 |
| beq aes_gcm_out |
| |
| Final_block: |
| lwz 10, 240(6) |
| Loop_aes_middle_1x |
| |
| xxlor 23+32, 10, 10 |
| |
| cmpdi 10, 10 |
| beq Do_final_1x |
| |
| # 192 bits |
| xxlor 24+32, 11, 11 |
| |
| vcipher 15, 15, 23 |
| vcipher 15, 15, 24 |
| |
| xxlor 23+32, 12, 12 |
| |
| cmpdi 10, 12 |
| beq Do_final_1x |
| |
| # 256 bits |
| xxlor 24+32, 13, 13 |
| |
| vcipher 15, 15, 23 |
| vcipher 15, 15, 24 |
| |
| xxlor 23+32, 14, 14 |
| |
| cmpdi 10, 14 |
| beq Do_final_1x |
| |
| Do_final_1x: |
| vcipherlast 15, 15, 23 |
| |
| # check partial block |
| li 21, 0 # encrypt |
| ld 15, 56(7) # partial? |
| cmpdi 15, 0 |
| beq Normal_block |
| bl Do_partial_block |
| |
| cmpdi 12, 0 |
| ble aes_gcm_out |
| |
| b Continue_partial_check |
| |
| Normal_block: |
| lxvb16x 15, 0, 14 # load last block |
| xxlxor 47, 47, 15 |
| |
| # create partial block mask |
| li 15, 16 |
| sub 15, 15, 12 # index to the mask |
| |
| vspltisb 16, -1 # first 16 bytes - 0xffff...ff |
| vspltisb 17, 0 # second 16 bytes - 0x0000...00 |
| li 10, 192 |
| stvx 16, 10, 1 |
| addi 10, 10, 16 |
| stvx 17, 10, 1 |
| |
| addi 10, 1, 192 |
| lxvb16x 16, 15, 10 # load partial block mask |
| xxland 47, 47, 16 |
| |
| vmr 28, 15 |
| ppc_update_hash_1x |
| |
| # * should store only the remaining bytes. |
| bl Write_partial_block |
| |
| stxvb16x 30+32, 0, 7 # update IV |
| std 12, 56(7) # update partial? |
| li 16, 16 |
| |
| stxvb16x 32, 0, 8 # write out Xi |
| stxvb16x 32, 16, 8 # write out Xi |
| b aes_gcm_out |
| |
| # |
| # Compute data mask |
| # |
| .macro GEN_MASK _mask _start _end |
| vspltisb 16, -1 # first 16 bytes - 0xffff...ff |
| vspltisb 17, 0 # second 16 bytes - 0x0000...00 |
| li 10, 192 |
| stxvb16x 17+32, 10, 1 |
| add 10, 10, \_start |
| stxvb16x 16+32, 10, 1 |
| add 10, 10, \_end |
| stxvb16x 17+32, 10, 1 |
| |
| addi 10, 1, 192 |
| lxvb16x \_mask, 0, 10 # load partial block mask |
| .endm |
| |
| # |
| # Handle multiple partial blocks for encrypt and decrypt |
| # operations. |
| # |
| SYM_FUNC_START_LOCAL(Do_partial_block) |
| add 17, 15, 5 |
| cmpdi 17, 16 |
| bgt Big_block |
| GEN_MASK 18, 15, 5 |
| b _Partial |
| SYM_FUNC_END(Do_partial_block) |
| Big_block: |
| li 16, 16 |
| GEN_MASK 18, 15, 16 |
| |
| _Partial: |
| lxvb16x 17+32, 0, 14 # load last block |
| sldi 16, 15, 3 |
| mtvsrdd 32+16, 0, 16 |
| vsro 17, 17, 16 |
| xxlxor 47, 47, 17+32 |
| xxland 47, 47, 18 |
| |
| vxor 0, 0, 0 # clear Xi |
| vmr 28, 15 |
| |
| cmpdi 21, 0 # encrypt/decrypt ops? |
| beq Skip_decrypt |
| xxland 32+28, 32+17, 18 |
| |
| Skip_decrypt: |
| |
| ppc_update_hash_1x |
| |
| li 16, 16 |
| lxvb16x 32+29, 16, 8 |
| vxor 0, 0, 29 |
| stxvb16x 32, 0, 8 # save Xi |
| stxvb16x 32, 16, 8 # save Xi |
| |
| # store partial block |
| # loop the rest of the stream if any |
| sldi 16, 15, 3 |
| mtvsrdd 32+16, 0, 16 |
| vslo 15, 15, 16 |
| #stxvb16x 15+32, 0, 9 # last block |
| |
| li 16, 16 |
| sub 17, 16, 15 # 16 - partial |
| |
| add 16, 15, 5 |
| cmpdi 16, 16 |
| bgt Larger_16 |
| mr 17, 5 |
| Larger_16: |
| |
| # write partial |
| li 10, 192 |
| stxvb16x 15+32, 10, 1 # save current block |
| |
| addi 10, 9, -1 |
| addi 16, 1, 191 |
| mtctr 17 # move partial byte count |
| |
| Write_last_partial: |
| lbzu 18, 1(16) |
| stbu 18, 1(10) |
| bdnz Write_last_partial |
| # Complete loop partial |
| |
| add 14, 14, 17 |
| add 9, 9, 17 |
| sub 12, 12, 17 |
| add 11, 11, 17 |
| |
| add 15, 15, 5 |
| cmpdi 15, 16 |
| blt Save_partial |
| |
| vaddudm 30, 30, 31 |
| stxvb16x 30+32, 0, 7 # update IV |
| xxlor 32+29, 0, 0 |
| vxor 15, 30, 29 # IV + round key - add round key 0 |
| li 15, 0 |
| std 15, 56(7) # partial done - clear |
| b Partial_done |
| Save_partial: |
| std 15, 56(7) # partial |
| |
| Partial_done: |
| blr |
| |
| # |
| # Write partial block |
| # r9 - output |
| # r12 - remaining bytes |
| # v15 - partial input data |
| # |
| SYM_FUNC_START_LOCAL(Write_partial_block) |
| li 10, 192 |
| stxvb16x 15+32, 10, 1 # last block |
| |
| addi 10, 9, -1 |
| addi 16, 1, 191 |
| |
| mtctr 12 # remaining bytes |
| li 15, 0 |
| |
| Write_last_byte: |
| lbzu 14, 1(16) |
| stbu 14, 1(10) |
| bdnz Write_last_byte |
| blr |
| SYM_FUNC_END(Write_partial_block) |
| |
| aes_gcm_out: |
| # out = state |
| stxvb16x 32, 0, 8 # write out Xi |
| add 3, 11, 12 # return count |
| |
| RESTORE_REGS |
| blr |
| |
| # |
| # 8x Decrypt |
| # |
| _GLOBAL(aes_p10_gcm_decrypt) |
| .align 5 |
| |
| SAVE_REGS |
| |
| LOAD_HASH_TABLE |
| |
| # initialize ICB: GHASH( IV ), IV - r7 |
| lxvb16x 30+32, 0, 7 # load IV - v30 |
| |
| mr 12, 5 # length |
| li 11, 0 # block index |
| |
| # counter 1 |
| vxor 31, 31, 31 |
| vspltisb 22, 1 |
| vsldoi 31, 31, 22,1 # counter 1 |
| |
| # load round key to VSR |
| lxv 0, 0(6) |
| lxv 1, 0x10(6) |
| lxv 2, 0x20(6) |
| lxv 3, 0x30(6) |
| lxv 4, 0x40(6) |
| lxv 5, 0x50(6) |
| lxv 6, 0x60(6) |
| lxv 7, 0x70(6) |
| lxv 8, 0x80(6) |
| lxv 9, 0x90(6) |
| lxv 10, 0xa0(6) |
| |
| # load rounds - 10 (128), 12 (192), 14 (256) |
| lwz 9,240(6) |
| |
| # |
| # vxor state, state, w # addroundkey |
| xxlor 32+29, 0, 0 |
| vxor 15, 30, 29 # IV + round key - add round key 0 |
| |
| cmpdi 9, 10 |
| beq Loop_aes_gcm_8x_dec |
| |
| # load 2 more round keys (v11, v12) |
| lxv 11, 0xb0(6) |
| lxv 12, 0xc0(6) |
| |
| cmpdi 9, 12 |
| beq Loop_aes_gcm_8x_dec |
| |
| # load 2 more round keys (v11, v12, v13, v14) |
| lxv 13, 0xd0(6) |
| lxv 14, 0xe0(6) |
| cmpdi 9, 14 |
| beq Loop_aes_gcm_8x_dec |
| |
| b aes_gcm_out |
| |
| .align 5 |
| Loop_aes_gcm_8x_dec: |
| mr 14, 3 |
| mr 9, 4 |
| |
| # |
| # check partial block |
| # |
| Continue_partial_check_dec: |
| ld 15, 56(7) |
| cmpdi 15, 0 |
| beq Continue_dec |
| bgt Final_block_dec |
| cmpdi 15, 16 |
| blt Final_block_dec |
| |
| Continue_dec: |
| # n blcoks |
| li 10, 128 |
| divdu 10, 12, 10 # n 128 bytes-blocks |
| cmpdi 10, 0 |
| beq Loop_last_block_dec |
| |
| vaddudm 30, 30, 31 # IV + counter |
| vxor 16, 30, 29 |
| vaddudm 30, 30, 31 |
| vxor 17, 30, 29 |
| vaddudm 30, 30, 31 |
| vxor 18, 30, 29 |
| vaddudm 30, 30, 31 |
| vxor 19, 30, 29 |
| vaddudm 30, 30, 31 |
| vxor 20, 30, 29 |
| vaddudm 30, 30, 31 |
| vxor 21, 30, 29 |
| vaddudm 30, 30, 31 |
| vxor 22, 30, 29 |
| |
| mtctr 10 |
| |
| li 15, 16 |
| li 16, 32 |
| li 17, 48 |
| li 18, 64 |
| li 19, 80 |
| li 20, 96 |
| li 21, 112 |
| |
| lwz 10, 240(6) |
| |
| Loop_8x_block_dec: |
| |
| lxvb16x 15, 0, 14 # load block |
| lxvb16x 16, 15, 14 # load block |
| lxvb16x 17, 16, 14 # load block |
| lxvb16x 18, 17, 14 # load block |
| lxvb16x 19, 18, 14 # load block |
| lxvb16x 20, 19, 14 # load block |
| lxvb16x 21, 20, 14 # load block |
| lxvb16x 22, 21, 14 # load block |
| addi 14, 14, 128 |
| |
| Loop_aes_middle8x |
| |
| xxlor 23+32, 10, 10 |
| |
| cmpdi 10, 10 |
| beq Do_next_ghash_dec |
| |
| # 192 bits |
| xxlor 24+32, 11, 11 |
| |
| vcipher 15, 15, 23 |
| vcipher 16, 16, 23 |
| vcipher 17, 17, 23 |
| vcipher 18, 18, 23 |
| vcipher 19, 19, 23 |
| vcipher 20, 20, 23 |
| vcipher 21, 21, 23 |
| vcipher 22, 22, 23 |
| |
| vcipher 15, 15, 24 |
| vcipher 16, 16, 24 |
| vcipher 17, 17, 24 |
| vcipher 18, 18, 24 |
| vcipher 19, 19, 24 |
| vcipher 20, 20, 24 |
| vcipher 21, 21, 24 |
| vcipher 22, 22, 24 |
| |
| xxlor 23+32, 12, 12 |
| |
| cmpdi 10, 12 |
| beq Do_next_ghash_dec |
| |
| # 256 bits |
| xxlor 24+32, 13, 13 |
| |
| vcipher 15, 15, 23 |
| vcipher 16, 16, 23 |
| vcipher 17, 17, 23 |
| vcipher 18, 18, 23 |
| vcipher 19, 19, 23 |
| vcipher 20, 20, 23 |
| vcipher 21, 21, 23 |
| vcipher 22, 22, 23 |
| |
| vcipher 15, 15, 24 |
| vcipher 16, 16, 24 |
| vcipher 17, 17, 24 |
| vcipher 18, 18, 24 |
| vcipher 19, 19, 24 |
| vcipher 20, 20, 24 |
| vcipher 21, 21, 24 |
| vcipher 22, 22, 24 |
| |
| xxlor 23+32, 14, 14 |
| |
| cmpdi 10, 14 |
| beq Do_next_ghash_dec |
| b aes_gcm_out |
| |
| Do_next_ghash_dec: |
| |
| # |
| # last round |
| vcipherlast 15, 15, 23 |
| vcipherlast 16, 16, 23 |
| |
| xxlxor 47, 47, 15 |
| stxvb16x 47, 0, 9 # store output |
| xxlxor 48, 48, 16 |
| stxvb16x 48, 15, 9 # store output |
| |
| vcipherlast 17, 17, 23 |
| vcipherlast 18, 18, 23 |
| |
| xxlxor 49, 49, 17 |
| stxvb16x 49, 16, 9 # store output |
| xxlxor 50, 50, 18 |
| stxvb16x 50, 17, 9 # store output |
| |
| vcipherlast 19, 19, 23 |
| vcipherlast 20, 20, 23 |
| |
| xxlxor 51, 51, 19 |
| stxvb16x 51, 18, 9 # store output |
| xxlxor 52, 52, 20 |
| stxvb16x 52, 19, 9 # store output |
| |
| vcipherlast 21, 21, 23 |
| vcipherlast 22, 22, 23 |
| |
| xxlxor 53, 53, 21 |
| stxvb16x 53, 20, 9 # store output |
| xxlxor 54, 54, 22 |
| stxvb16x 54, 21, 9 # store output |
| |
| addi 9, 9, 128 |
| |
| xxlor 15+32, 15, 15 |
| xxlor 16+32, 16, 16 |
| xxlor 17+32, 17, 17 |
| xxlor 18+32, 18, 18 |
| xxlor 19+32, 19, 19 |
| xxlor 20+32, 20, 20 |
| xxlor 21+32, 21, 21 |
| xxlor 22+32, 22, 22 |
| |
| # ghash here |
| ppc_aes_gcm_ghash2_4x |
| |
| xxlor 27+32, 0, 0 |
| vaddudm 30, 30, 31 # IV + counter |
| vmr 29, 30 |
| vxor 15, 30, 27 # add round key |
| vaddudm 30, 30, 31 |
| vxor 16, 30, 27 |
| vaddudm 30, 30, 31 |
| vxor 17, 30, 27 |
| vaddudm 30, 30, 31 |
| vxor 18, 30, 27 |
| vaddudm 30, 30, 31 |
| vxor 19, 30, 27 |
| vaddudm 30, 30, 31 |
| vxor 20, 30, 27 |
| vaddudm 30, 30, 31 |
| vxor 21, 30, 27 |
| vaddudm 30, 30, 31 |
| vxor 22, 30, 27 |
| |
| addi 12, 12, -128 |
| addi 11, 11, 128 |
| |
| bdnz Loop_8x_block_dec |
| |
| vmr 30, 29 |
| stxvb16x 30+32, 0, 7 # update IV |
| |
| Loop_last_block_dec: |
| cmpdi 12, 0 |
| beq aes_gcm_out |
| |
| # loop last few blocks |
| li 10, 16 |
| divdu 10, 12, 10 |
| |
| mtctr 10 |
| |
| lwz 10, 240(6) |
| |
| cmpdi 12, 16 |
| blt Final_block_dec |
| |
| Next_rem_block_dec: |
| lxvb16x 15, 0, 14 # load block |
| |
| Loop_aes_middle_1x |
| |
| xxlor 23+32, 10, 10 |
| |
| cmpdi 10, 10 |
| beq Do_next_1x_dec |
| |
| # 192 bits |
| xxlor 24+32, 11, 11 |
| |
| vcipher 15, 15, 23 |
| vcipher 15, 15, 24 |
| |
| xxlor 23+32, 12, 12 |
| |
| cmpdi 10, 12 |
| beq Do_next_1x_dec |
| |
| # 256 bits |
| xxlor 24+32, 13, 13 |
| |
| vcipher 15, 15, 23 |
| vcipher 15, 15, 24 |
| |
| xxlor 23+32, 14, 14 |
| |
| cmpdi 10, 14 |
| beq Do_next_1x_dec |
| |
| Do_next_1x_dec: |
| vcipherlast 15, 15, 23 |
| |
| xxlxor 47, 47, 15 |
| stxvb16x 47, 0, 9 # store output |
| addi 14, 14, 16 |
| addi 9, 9, 16 |
| |
| xxlor 28+32, 15, 15 |
| #vmr 28, 15 |
| ppc_update_hash_1x |
| |
| addi 12, 12, -16 |
| addi 11, 11, 16 |
| xxlor 19+32, 0, 0 |
| vaddudm 30, 30, 31 # IV + counter |
| vxor 15, 30, 19 # add round key |
| |
| bdnz Next_rem_block_dec |
| |
| li 15, 0 |
| std 15, 56(7) # clear partial? |
| stxvb16x 30+32, 0, 7 # update IV |
| cmpdi 12, 0 |
| beq aes_gcm_out |
| |
| Final_block_dec: |
| lwz 10, 240(6) |
| Loop_aes_middle_1x |
| |
| xxlor 23+32, 10, 10 |
| |
| cmpdi 10, 10 |
| beq Do_final_1x_dec |
| |
| # 192 bits |
| xxlor 24+32, 11, 11 |
| |
| vcipher 15, 15, 23 |
| vcipher 15, 15, 24 |
| |
| xxlor 23+32, 12, 12 |
| |
| cmpdi 10, 12 |
| beq Do_final_1x_dec |
| |
| # 256 bits |
| xxlor 24+32, 13, 13 |
| |
| vcipher 15, 15, 23 |
| vcipher 15, 15, 24 |
| |
| xxlor 23+32, 14, 14 |
| |
| cmpdi 10, 14 |
| beq Do_final_1x_dec |
| |
| Do_final_1x_dec: |
| vcipherlast 15, 15, 23 |
| |
| # check partial block |
| li 21, 1 # decrypt |
| ld 15, 56(7) # partial? |
| cmpdi 15, 0 |
| beq Normal_block_dec |
| bl Do_partial_block |
| cmpdi 12, 0 |
| ble aes_gcm_out |
| |
| b Continue_partial_check_dec |
| |
| Normal_block_dec: |
| lxvb16x 15, 0, 14 # load last block |
| xxlxor 47, 47, 15 |
| |
| # create partial block mask |
| li 15, 16 |
| sub 15, 15, 12 # index to the mask |
| |
| vspltisb 16, -1 # first 16 bytes - 0xffff...ff |
| vspltisb 17, 0 # second 16 bytes - 0x0000...00 |
| li 10, 192 |
| stvx 16, 10, 1 |
| addi 10, 10, 16 |
| stvx 17, 10, 1 |
| |
| addi 10, 1, 192 |
| lxvb16x 16, 15, 10 # load partial block mask |
| xxland 47, 47, 16 |
| |
| xxland 32+28, 15, 16 |
| #vmr 28, 15 |
| ppc_update_hash_1x |
| |
| # * should store only the remaining bytes. |
| bl Write_partial_block |
| |
| stxvb16x 30+32, 0, 7 # update IV |
| std 12, 56(7) # update partial? |
| li 16, 16 |
| |
| stxvb16x 32, 0, 8 # write out Xi |
| stxvb16x 32, 16, 8 # write out Xi |
| b aes_gcm_out |