| /* SPDX-License-Identifier: GPL-2.0 */ |
| /* |
| * Copyright (C) 2024 Christophe Leroy <christophe.leroy@csgroup.eu>, CS GROUP France |
| */ |
| |
| #include <linux/linkage.h> |
| |
| #include <asm/ppc_asm.h> |
| |
| #define dst_bytes r3 |
| #define key r4 |
| #define counter r5 |
| #define nblocks r6 |
| |
| #define idx_r0 r0 |
| #define val4 r4 |
| |
| #define const0 0x61707865 |
| #define const1 0x3320646e |
| #define const2 0x79622d32 |
| #define const3 0x6b206574 |
| |
| #define key0 r5 |
| #define key1 r6 |
| #define key2 r7 |
| #define key3 r8 |
| #define key4 r9 |
| #define key5 r10 |
| #define key6 r11 |
| #define key7 r12 |
| |
| #define counter0 r14 |
| #define counter1 r15 |
| |
| #define state0 r16 |
| #define state1 r17 |
| #define state2 r18 |
| #define state3 r19 |
| #define state4 r20 |
| #define state5 r21 |
| #define state6 r22 |
| #define state7 r23 |
| #define state8 r24 |
| #define state9 r25 |
| #define state10 r26 |
| #define state11 r27 |
| #define state12 r28 |
| #define state13 r29 |
| #define state14 r30 |
| #define state15 r31 |
| |
| .macro quarterround4 a1 b1 c1 d1 a2 b2 c2 d2 a3 b3 c3 d3 a4 b4 c4 d4 |
| add \a1, \a1, \b1 |
| add \a2, \a2, \b2 |
| add \a3, \a3, \b3 |
| add \a4, \a4, \b4 |
| xor \d1, \d1, \a1 |
| xor \d2, \d2, \a2 |
| xor \d3, \d3, \a3 |
| xor \d4, \d4, \a4 |
| rotlwi \d1, \d1, 16 |
| rotlwi \d2, \d2, 16 |
| rotlwi \d3, \d3, 16 |
| rotlwi \d4, \d4, 16 |
| add \c1, \c1, \d1 |
| add \c2, \c2, \d2 |
| add \c3, \c3, \d3 |
| add \c4, \c4, \d4 |
| xor \b1, \b1, \c1 |
| xor \b2, \b2, \c2 |
| xor \b3, \b3, \c3 |
| xor \b4, \b4, \c4 |
| rotlwi \b1, \b1, 12 |
| rotlwi \b2, \b2, 12 |
| rotlwi \b3, \b3, 12 |
| rotlwi \b4, \b4, 12 |
| add \a1, \a1, \b1 |
| add \a2, \a2, \b2 |
| add \a3, \a3, \b3 |
| add \a4, \a4, \b4 |
| xor \d1, \d1, \a1 |
| xor \d2, \d2, \a2 |
| xor \d3, \d3, \a3 |
| xor \d4, \d4, \a4 |
| rotlwi \d1, \d1, 8 |
| rotlwi \d2, \d2, 8 |
| rotlwi \d3, \d3, 8 |
| rotlwi \d4, \d4, 8 |
| add \c1, \c1, \d1 |
| add \c2, \c2, \d2 |
| add \c3, \c3, \d3 |
| add \c4, \c4, \d4 |
| xor \b1, \b1, \c1 |
| xor \b2, \b2, \c2 |
| xor \b3, \b3, \c3 |
| xor \b4, \b4, \c4 |
| rotlwi \b1, \b1, 7 |
| rotlwi \b2, \b2, 7 |
| rotlwi \b3, \b3, 7 |
| rotlwi \b4, \b4, 7 |
| .endm |
| |
| #define QUARTERROUND4(a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,a4,b4,c4,d4) \ |
| quarterround4 state##a1 state##b1 state##c1 state##d1 \ |
| state##a2 state##b2 state##c2 state##d2 \ |
| state##a3 state##b3 state##c3 state##d3 \ |
| state##a4 state##b4 state##c4 state##d4 |
| |
| /* |
| * Very basic 32 bits implementation of ChaCha20. Produces a given positive number |
| * of blocks of output with a nonce of 0, taking an input key and 8-byte |
| * counter. Importantly does not spill to the stack. Its arguments are: |
| * |
| * r3: output bytes |
| * r4: 32-byte key input |
| * r5: 8-byte counter input/output (saved on stack) |
| * r6: number of 64-byte blocks to write to output |
| * |
| * r0: counter of blocks (initialised with r6) |
| * r4: Value '4' after key has been read. |
| * r5-r12: key |
| * r14-r15: counter |
| * r16-r31: state |
| */ |
| SYM_FUNC_START(__arch_chacha20_blocks_nostack) |
| #ifdef __powerpc64__ |
| std counter, -216(r1) |
| |
| std r14, -144(r1) |
| std r15, -136(r1) |
| std r16, -128(r1) |
| std r17, -120(r1) |
| std r18, -112(r1) |
| std r19, -104(r1) |
| std r20, -96(r1) |
| std r21, -88(r1) |
| std r22, -80(r1) |
| std r23, -72(r1) |
| std r24, -64(r1) |
| std r25, -56(r1) |
| std r26, -48(r1) |
| std r27, -40(r1) |
| std r28, -32(r1) |
| std r29, -24(r1) |
| std r30, -16(r1) |
| std r31, -8(r1) |
| #else |
| stwu r1, -96(r1) |
| stw counter, 20(r1) |
| #ifdef __BIG_ENDIAN__ |
| stmw r14, 24(r1) |
| #else |
| stw r14, 24(r1) |
| stw r15, 28(r1) |
| stw r16, 32(r1) |
| stw r17, 36(r1) |
| stw r18, 40(r1) |
| stw r19, 44(r1) |
| stw r20, 48(r1) |
| stw r21, 52(r1) |
| stw r22, 56(r1) |
| stw r23, 60(r1) |
| stw r24, 64(r1) |
| stw r25, 68(r1) |
| stw r26, 72(r1) |
| stw r27, 76(r1) |
| stw r28, 80(r1) |
| stw r29, 84(r1) |
| stw r30, 88(r1) |
| stw r31, 92(r1) |
| #endif |
| #endif /* __powerpc64__ */ |
| |
| lwz counter0, 0(counter) |
| lwz counter1, 4(counter) |
| #ifdef __powerpc64__ |
| rldimi counter0, counter1, 32, 0 |
| #endif |
| mr idx_r0, nblocks |
| subi dst_bytes, dst_bytes, 4 |
| |
| lwz key0, 0(key) |
| lwz key1, 4(key) |
| lwz key2, 8(key) |
| lwz key3, 12(key) |
| lwz key4, 16(key) |
| lwz key5, 20(key) |
| lwz key6, 24(key) |
| lwz key7, 28(key) |
| |
| li val4, 4 |
| .Lblock: |
| li r31, 10 |
| |
| lis state0, const0@ha |
| lis state1, const1@ha |
| lis state2, const2@ha |
| lis state3, const3@ha |
| addi state0, state0, const0@l |
| addi state1, state1, const1@l |
| addi state2, state2, const2@l |
| addi state3, state3, const3@l |
| |
| mtctr r31 |
| |
| mr state4, key0 |
| mr state5, key1 |
| mr state6, key2 |
| mr state7, key3 |
| mr state8, key4 |
| mr state9, key5 |
| mr state10, key6 |
| mr state11, key7 |
| |
| mr state12, counter0 |
| mr state13, counter1 |
| |
| li state14, 0 |
| li state15, 0 |
| |
| .Lpermute: |
| QUARTERROUND4( 0, 4, 8,12, 1, 5, 9,13, 2, 6,10,14, 3, 7,11,15) |
| QUARTERROUND4( 0, 5,10,15, 1, 6,11,12, 2, 7, 8,13, 3, 4, 9,14) |
| |
| bdnz .Lpermute |
| |
| addis state0, state0, const0@ha |
| addis state1, state1, const1@ha |
| addis state2, state2, const2@ha |
| addis state3, state3, const3@ha |
| addi state0, state0, const0@l |
| addi state1, state1, const1@l |
| addi state2, state2, const2@l |
| addi state3, state3, const3@l |
| |
| add state4, state4, key0 |
| add state5, state5, key1 |
| add state6, state6, key2 |
| add state7, state7, key3 |
| add state8, state8, key4 |
| add state9, state9, key5 |
| add state10, state10, key6 |
| add state11, state11, key7 |
| |
| add state12, state12, counter0 |
| add state13, state13, counter1 |
| |
| #ifdef __BIG_ENDIAN__ |
| stwbrx state0, val4, dst_bytes |
| addi dst_bytes, dst_bytes, 8 |
| stwbrx state1, 0, dst_bytes |
| stwbrx state2, val4, dst_bytes |
| addi dst_bytes, dst_bytes, 8 |
| stwbrx state3, 0, dst_bytes |
| stwbrx state4, val4, dst_bytes |
| addi dst_bytes, dst_bytes, 8 |
| stwbrx state5, 0, dst_bytes |
| stwbrx state6, val4, dst_bytes |
| addi dst_bytes, dst_bytes, 8 |
| stwbrx state7, 0, dst_bytes |
| stwbrx state8, val4, dst_bytes |
| addi dst_bytes, dst_bytes, 8 |
| stwbrx state9, 0, dst_bytes |
| stwbrx state10, val4, dst_bytes |
| addi dst_bytes, dst_bytes, 8 |
| stwbrx state11, 0, dst_bytes |
| stwbrx state12, val4, dst_bytes |
| addi dst_bytes, dst_bytes, 8 |
| stwbrx state13, 0, dst_bytes |
| stwbrx state14, val4, dst_bytes |
| addi dst_bytes, dst_bytes, 8 |
| stwbrx state15, 0, dst_bytes |
| #else |
| stw state0, 4(dst_bytes) |
| stw state1, 8(dst_bytes) |
| stw state2, 12(dst_bytes) |
| stw state3, 16(dst_bytes) |
| stw state4, 20(dst_bytes) |
| stw state5, 24(dst_bytes) |
| stw state6, 28(dst_bytes) |
| stw state7, 32(dst_bytes) |
| stw state8, 36(dst_bytes) |
| stw state9, 40(dst_bytes) |
| stw state10, 44(dst_bytes) |
| stw state11, 48(dst_bytes) |
| stw state12, 52(dst_bytes) |
| stw state13, 56(dst_bytes) |
| stw state14, 60(dst_bytes) |
| stwu state15, 64(dst_bytes) |
| #endif |
| |
| subic. idx_r0, idx_r0, 1 /* subi. can't use r0 as source */ |
| |
| #ifdef __powerpc64__ |
| addi counter0, counter0, 1 |
| srdi counter1, counter0, 32 |
| #else |
| addic counter0, counter0, 1 |
| addze counter1, counter1 |
| #endif |
| |
| bne .Lblock |
| |
| #ifdef __powerpc64__ |
| ld counter, -216(r1) |
| #else |
| lwz counter, 20(r1) |
| #endif |
| stw counter0, 0(counter) |
| stw counter1, 4(counter) |
| |
| li r6, 0 |
| li r7, 0 |
| li r8, 0 |
| li r9, 0 |
| li r10, 0 |
| li r11, 0 |
| li r12, 0 |
| |
| #ifdef __powerpc64__ |
| ld r14, -144(r1) |
| ld r15, -136(r1) |
| ld r16, -128(r1) |
| ld r17, -120(r1) |
| ld r18, -112(r1) |
| ld r19, -104(r1) |
| ld r20, -96(r1) |
| ld r21, -88(r1) |
| ld r22, -80(r1) |
| ld r23, -72(r1) |
| ld r24, -64(r1) |
| ld r25, -56(r1) |
| ld r26, -48(r1) |
| ld r27, -40(r1) |
| ld r28, -32(r1) |
| ld r29, -24(r1) |
| ld r30, -16(r1) |
| ld r31, -8(r1) |
| #else |
| #ifdef __BIG_ENDIAN__ |
| lmw r14, 24(r1) |
| #else |
| lwz r14, 24(r1) |
| lwz r15, 28(r1) |
| lwz r16, 32(r1) |
| lwz r17, 36(r1) |
| lwz r18, 40(r1) |
| lwz r19, 44(r1) |
| lwz r20, 48(r1) |
| lwz r21, 52(r1) |
| lwz r22, 56(r1) |
| lwz r23, 60(r1) |
| lwz r24, 64(r1) |
| lwz r25, 68(r1) |
| lwz r26, 72(r1) |
| lwz r27, 76(r1) |
| lwz r28, 80(r1) |
| lwz r29, 84(r1) |
| lwz r30, 88(r1) |
| lwz r31, 92(r1) |
| #endif |
| addi r1, r1, 96 |
| #endif /* __powerpc64__ */ |
| blr |
| SYM_FUNC_END(__arch_chacha20_blocks_nostack) |