| /* |
| * Copyright 2002, 2003 Andi Kleen, SuSE Labs. |
| * |
| * This file is subject to the terms and conditions of the GNU General Public |
| * License. See the file COPYING in the main directory of this archive |
| * for more details. No warranty for anything given at all. |
| */ |
| #include <linux/linkage.h> |
| #include <asm/errno.h> |
| #include <asm/asm.h> |
| |
| /* |
| * Checksum copy with exception handling. |
| * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the |
| * destination is zeroed. |
| * |
| * Input |
| * rdi source |
| * rsi destination |
| * edx len (32bit) |
| * |
| * Output |
| * eax 64bit sum. undefined in case of exception. |
| * |
| * Wrappers need to take care of valid exception sum and zeroing. |
| * They also should align source or destination to 8 bytes. |
| */ |
| |
| .macro source |
| 10: |
| _ASM_EXTABLE_UA(10b, .Lfault) |
| .endm |
| |
| .macro dest |
| 20: |
| _ASM_EXTABLE_UA(20b, .Lfault) |
| .endm |
| |
| SYM_FUNC_START(csum_partial_copy_generic) |
| subq $5*8, %rsp |
| movq %rbx, 0*8(%rsp) |
| movq %r12, 1*8(%rsp) |
| movq %r14, 2*8(%rsp) |
| movq %r13, 3*8(%rsp) |
| movq %r15, 4*8(%rsp) |
| |
| movl $-1, %eax |
| xorl %r9d, %r9d |
| movl %edx, %ecx |
| cmpl $8, %ecx |
| jb .Lshort |
| |
| testb $7, %sil |
| jne .Lunaligned |
| .Laligned: |
| movl %ecx, %r12d |
| |
| shrq $6, %r12 |
| jz .Lhandle_tail /* < 64 */ |
| |
| clc |
| |
| /* main loop. clear in 64 byte blocks */ |
| /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ |
| /* r11: temp3, rdx: temp4, r12 loopcnt */ |
| /* r10: temp5, r15: temp6, r14 temp7, r13 temp8 */ |
| .p2align 4 |
| .Lloop: |
| source |
| movq (%rdi), %rbx |
| source |
| movq 8(%rdi), %r8 |
| source |
| movq 16(%rdi), %r11 |
| source |
| movq 24(%rdi), %rdx |
| |
| source |
| movq 32(%rdi), %r10 |
| source |
| movq 40(%rdi), %r15 |
| source |
| movq 48(%rdi), %r14 |
| source |
| movq 56(%rdi), %r13 |
| |
| 30: |
| /* |
| * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a |
| * potentially unmapped kernel address. |
| */ |
| _ASM_EXTABLE(30b, 2f) |
| prefetcht0 5*64(%rdi) |
| 2: |
| adcq %rbx, %rax |
| adcq %r8, %rax |
| adcq %r11, %rax |
| adcq %rdx, %rax |
| adcq %r10, %rax |
| adcq %r15, %rax |
| adcq %r14, %rax |
| adcq %r13, %rax |
| |
| decl %r12d |
| |
| dest |
| movq %rbx, (%rsi) |
| dest |
| movq %r8, 8(%rsi) |
| dest |
| movq %r11, 16(%rsi) |
| dest |
| movq %rdx, 24(%rsi) |
| |
| dest |
| movq %r10, 32(%rsi) |
| dest |
| movq %r15, 40(%rsi) |
| dest |
| movq %r14, 48(%rsi) |
| dest |
| movq %r13, 56(%rsi) |
| |
| leaq 64(%rdi), %rdi |
| leaq 64(%rsi), %rsi |
| |
| jnz .Lloop |
| |
| adcq %r9, %rax |
| |
| /* do last up to 56 bytes */ |
| .Lhandle_tail: |
| /* ecx: count, rcx.63: the end result needs to be rol8 */ |
| movq %rcx, %r10 |
| andl $63, %ecx |
| shrl $3, %ecx |
| jz .Lfold |
| clc |
| .p2align 4 |
| .Lloop_8: |
| source |
| movq (%rdi), %rbx |
| adcq %rbx, %rax |
| decl %ecx |
| dest |
| movq %rbx, (%rsi) |
| leaq 8(%rsi), %rsi /* preserve carry */ |
| leaq 8(%rdi), %rdi |
| jnz .Lloop_8 |
| adcq %r9, %rax /* add in carry */ |
| |
| .Lfold: |
| /* reduce checksum to 32bits */ |
| movl %eax, %ebx |
| shrq $32, %rax |
| addl %ebx, %eax |
| adcl %r9d, %eax |
| |
| /* do last up to 6 bytes */ |
| .Lhandle_7: |
| movl %r10d, %ecx |
| andl $7, %ecx |
| .L1: /* .Lshort rejoins the common path here */ |
| shrl $1, %ecx |
| jz .Lhandle_1 |
| movl $2, %edx |
| xorl %ebx, %ebx |
| clc |
| .p2align 4 |
| .Lloop_1: |
| source |
| movw (%rdi), %bx |
| adcl %ebx, %eax |
| decl %ecx |
| dest |
| movw %bx, (%rsi) |
| leaq 2(%rdi), %rdi |
| leaq 2(%rsi), %rsi |
| jnz .Lloop_1 |
| adcl %r9d, %eax /* add in carry */ |
| |
| /* handle last odd byte */ |
| .Lhandle_1: |
| testb $1, %r10b |
| jz .Lende |
| xorl %ebx, %ebx |
| source |
| movb (%rdi), %bl |
| dest |
| movb %bl, (%rsi) |
| addl %ebx, %eax |
| adcl %r9d, %eax /* carry */ |
| |
| .Lende: |
| testq %r10, %r10 |
| js .Lwas_odd |
| .Lout: |
| movq 0*8(%rsp), %rbx |
| movq 1*8(%rsp), %r12 |
| movq 2*8(%rsp), %r14 |
| movq 3*8(%rsp), %r13 |
| movq 4*8(%rsp), %r15 |
| addq $5*8, %rsp |
| ret |
| .Lshort: |
| movl %ecx, %r10d |
| jmp .L1 |
| .Lunaligned: |
| xorl %ebx, %ebx |
| testb $1, %sil |
| jne .Lodd |
| 1: testb $2, %sil |
| je 2f |
| source |
| movw (%rdi), %bx |
| dest |
| movw %bx, (%rsi) |
| leaq 2(%rdi), %rdi |
| subq $2, %rcx |
| leaq 2(%rsi), %rsi |
| addq %rbx, %rax |
| 2: testb $4, %sil |
| je .Laligned |
| source |
| movl (%rdi), %ebx |
| dest |
| movl %ebx, (%rsi) |
| leaq 4(%rdi), %rdi |
| subq $4, %rcx |
| leaq 4(%rsi), %rsi |
| addq %rbx, %rax |
| jmp .Laligned |
| |
| .Lodd: |
| source |
| movb (%rdi), %bl |
| dest |
| movb %bl, (%rsi) |
| leaq 1(%rdi), %rdi |
| leaq 1(%rsi), %rsi |
| /* decrement, set MSB */ |
| leaq -1(%rcx, %rcx), %rcx |
| rorq $1, %rcx |
| shll $8, %ebx |
| addq %rbx, %rax |
| jmp 1b |
| |
| .Lwas_odd: |
| roll $8, %eax |
| jmp .Lout |
| |
| /* Exception: just return 0 */ |
| .Lfault: |
| xorl %eax, %eax |
| jmp .Lout |
| SYM_FUNC_END(csum_partial_copy_generic) |