| /* |
| * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> |
| * Copyright 2002 Andi Kleen, SuSE Labs. |
| * Subject to the GNU Public License v2. |
| * |
| * Functions to copy from and to user space. |
| */ |
| |
| #include <linux/linkage.h> |
| #include <asm/current.h> |
| #include <asm/asm-offsets.h> |
| #include <asm/thread_info.h> |
| #include <asm/cpufeatures.h> |
| #include <asm/alternative-asm.h> |
| #include <asm/asm.h> |
| #include <asm/smap.h> |
| #include <asm/export.h> |
| |
| /* |
| * copy_user_generic_unrolled - memory copy with exception handling. |
| * This version is for CPUs like P4 that don't have efficient micro |
| * code for rep movsq |
| * |
| * Input: |
| * rdi destination |
| * rsi source |
| * rdx count |
| * |
| * Output: |
| * eax uncopied bytes or 0 if successful. |
| */ |
| ENTRY(copy_user_generic_unrolled) |
| ASM_STAC |
| cmpl $8,%edx |
| jb 20f /* less then 8 bytes, go to byte copy loop */ |
| ALIGN_DESTINATION |
| movl %edx,%ecx |
| andl $63,%edx |
| shrl $6,%ecx |
| jz 17f |
| 1: movq (%rsi),%r8 |
| 2: movq 1*8(%rsi),%r9 |
| 3: movq 2*8(%rsi),%r10 |
| 4: movq 3*8(%rsi),%r11 |
| 5: movq %r8,(%rdi) |
| 6: movq %r9,1*8(%rdi) |
| 7: movq %r10,2*8(%rdi) |
| 8: movq %r11,3*8(%rdi) |
| 9: movq 4*8(%rsi),%r8 |
| 10: movq 5*8(%rsi),%r9 |
| 11: movq 6*8(%rsi),%r10 |
| 12: movq 7*8(%rsi),%r11 |
| 13: movq %r8,4*8(%rdi) |
| 14: movq %r9,5*8(%rdi) |
| 15: movq %r10,6*8(%rdi) |
| 16: movq %r11,7*8(%rdi) |
| leaq 64(%rsi),%rsi |
| leaq 64(%rdi),%rdi |
| decl %ecx |
| jnz 1b |
| 17: movl %edx,%ecx |
| andl $7,%edx |
| shrl $3,%ecx |
| jz 20f |
| 18: movq (%rsi),%r8 |
| 19: movq %r8,(%rdi) |
| leaq 8(%rsi),%rsi |
| leaq 8(%rdi),%rdi |
| decl %ecx |
| jnz 18b |
| 20: andl %edx,%edx |
| jz 23f |
| movl %edx,%ecx |
| 21: movb (%rsi),%al |
| 22: movb %al,(%rdi) |
| incq %rsi |
| incq %rdi |
| decl %ecx |
| jnz 21b |
| 23: xor %eax,%eax |
| ASM_CLAC |
| ret |
| |
| .section .fixup,"ax" |
| 30: shll $6,%ecx |
| addl %ecx,%edx |
| jmp 60f |
| 40: leal (%rdx,%rcx,8),%edx |
| jmp 60f |
| 50: movl %ecx,%edx |
| 60: jmp copy_user_handle_tail /* ecx is zerorest also */ |
| .previous |
| |
| _ASM_EXTABLE(1b,30b) |
| _ASM_EXTABLE(2b,30b) |
| _ASM_EXTABLE(3b,30b) |
| _ASM_EXTABLE(4b,30b) |
| _ASM_EXTABLE(5b,30b) |
| _ASM_EXTABLE(6b,30b) |
| _ASM_EXTABLE(7b,30b) |
| _ASM_EXTABLE(8b,30b) |
| _ASM_EXTABLE(9b,30b) |
| _ASM_EXTABLE(10b,30b) |
| _ASM_EXTABLE(11b,30b) |
| _ASM_EXTABLE(12b,30b) |
| _ASM_EXTABLE(13b,30b) |
| _ASM_EXTABLE(14b,30b) |
| _ASM_EXTABLE(15b,30b) |
| _ASM_EXTABLE(16b,30b) |
| _ASM_EXTABLE(18b,40b) |
| _ASM_EXTABLE(19b,40b) |
| _ASM_EXTABLE(21b,50b) |
| _ASM_EXTABLE(22b,50b) |
| ENDPROC(copy_user_generic_unrolled) |
| EXPORT_SYMBOL(copy_user_generic_unrolled) |
| |
| /* Some CPUs run faster using the string copy instructions. |
| * This is also a lot simpler. Use them when possible. |
| * |
| * Only 4GB of copy is supported. This shouldn't be a problem |
| * because the kernel normally only writes from/to page sized chunks |
| * even if user space passed a longer buffer. |
| * And more would be dangerous because both Intel and AMD have |
| * errata with rep movsq > 4GB. If someone feels the need to fix |
| * this please consider this. |
| * |
| * Input: |
| * rdi destination |
| * rsi source |
| * rdx count |
| * |
| * Output: |
| * eax uncopied bytes or 0 if successful. |
| */ |
| ENTRY(copy_user_generic_string) |
| ASM_STAC |
| cmpl $8,%edx |
| jb 2f /* less than 8 bytes, go to byte copy loop */ |
| ALIGN_DESTINATION |
| movl %edx,%ecx |
| shrl $3,%ecx |
| andl $7,%edx |
| 1: rep |
| movsq |
| 2: movl %edx,%ecx |
| 3: rep |
| movsb |
| xorl %eax,%eax |
| ASM_CLAC |
| ret |
| |
| .section .fixup,"ax" |
| 11: leal (%rdx,%rcx,8),%ecx |
| 12: movl %ecx,%edx /* ecx is zerorest also */ |
| jmp copy_user_handle_tail |
| .previous |
| |
| _ASM_EXTABLE(1b,11b) |
| _ASM_EXTABLE(3b,12b) |
| ENDPROC(copy_user_generic_string) |
| EXPORT_SYMBOL(copy_user_generic_string) |
| |
| /* |
| * Some CPUs are adding enhanced REP MOVSB/STOSB instructions. |
| * It's recommended to use enhanced REP MOVSB/STOSB if it's enabled. |
| * |
| * Input: |
| * rdi destination |
| * rsi source |
| * rdx count |
| * |
| * Output: |
| * eax uncopied bytes or 0 if successful. |
| */ |
| ENTRY(copy_user_enhanced_fast_string) |
| ASM_STAC |
| movl %edx,%ecx |
| 1: rep |
| movsb |
| xorl %eax,%eax |
| ASM_CLAC |
| ret |
| |
| .section .fixup,"ax" |
| 12: movl %ecx,%edx /* ecx is zerorest also */ |
| jmp copy_user_handle_tail |
| .previous |
| |
| _ASM_EXTABLE(1b,12b) |
| ENDPROC(copy_user_enhanced_fast_string) |
| EXPORT_SYMBOL(copy_user_enhanced_fast_string) |
| |
| /* |
| * copy_user_nocache - Uncached memory copy with exception handling |
| * This will force destination out of cache for more performance. |
| * |
| * Note: Cached memory copy is used when destination or size is not |
| * naturally aligned. That is: |
| * - Require 8-byte alignment when size is 8 bytes or larger. |
| * - Require 4-byte alignment when size is 4 bytes. |
| */ |
| ENTRY(__copy_user_nocache) |
| ASM_STAC |
| |
| /* If size is less than 8 bytes, go to 4-byte copy */ |
| cmpl $8,%edx |
| jb .L_4b_nocache_copy_entry |
| |
| /* If destination is not 8-byte aligned, "cache" copy to align it */ |
| ALIGN_DESTINATION |
| |
| /* Set 4x8-byte copy count and remainder */ |
| movl %edx,%ecx |
| andl $63,%edx |
| shrl $6,%ecx |
| jz .L_8b_nocache_copy_entry /* jump if count is 0 */ |
| |
| /* Perform 4x8-byte nocache loop-copy */ |
| .L_4x8b_nocache_copy_loop: |
| 1: movq (%rsi),%r8 |
| 2: movq 1*8(%rsi),%r9 |
| 3: movq 2*8(%rsi),%r10 |
| 4: movq 3*8(%rsi),%r11 |
| 5: movnti %r8,(%rdi) |
| 6: movnti %r9,1*8(%rdi) |
| 7: movnti %r10,2*8(%rdi) |
| 8: movnti %r11,3*8(%rdi) |
| 9: movq 4*8(%rsi),%r8 |
| 10: movq 5*8(%rsi),%r9 |
| 11: movq 6*8(%rsi),%r10 |
| 12: movq 7*8(%rsi),%r11 |
| 13: movnti %r8,4*8(%rdi) |
| 14: movnti %r9,5*8(%rdi) |
| 15: movnti %r10,6*8(%rdi) |
| 16: movnti %r11,7*8(%rdi) |
| leaq 64(%rsi),%rsi |
| leaq 64(%rdi),%rdi |
| decl %ecx |
| jnz .L_4x8b_nocache_copy_loop |
| |
| /* Set 8-byte copy count and remainder */ |
| .L_8b_nocache_copy_entry: |
| movl %edx,%ecx |
| andl $7,%edx |
| shrl $3,%ecx |
| jz .L_4b_nocache_copy_entry /* jump if count is 0 */ |
| |
| /* Perform 8-byte nocache loop-copy */ |
| .L_8b_nocache_copy_loop: |
| 20: movq (%rsi),%r8 |
| 21: movnti %r8,(%rdi) |
| leaq 8(%rsi),%rsi |
| leaq 8(%rdi),%rdi |
| decl %ecx |
| jnz .L_8b_nocache_copy_loop |
| |
| /* If no byte left, we're done */ |
| .L_4b_nocache_copy_entry: |
| andl %edx,%edx |
| jz .L_finish_copy |
| |
| /* If destination is not 4-byte aligned, go to byte copy: */ |
| movl %edi,%ecx |
| andl $3,%ecx |
| jnz .L_1b_cache_copy_entry |
| |
| /* Set 4-byte copy count (1 or 0) and remainder */ |
| movl %edx,%ecx |
| andl $3,%edx |
| shrl $2,%ecx |
| jz .L_1b_cache_copy_entry /* jump if count is 0 */ |
| |
| /* Perform 4-byte nocache copy: */ |
| 30: movl (%rsi),%r8d |
| 31: movnti %r8d,(%rdi) |
| leaq 4(%rsi),%rsi |
| leaq 4(%rdi),%rdi |
| |
| /* If no bytes left, we're done: */ |
| andl %edx,%edx |
| jz .L_finish_copy |
| |
| /* Perform byte "cache" loop-copy for the remainder */ |
| .L_1b_cache_copy_entry: |
| movl %edx,%ecx |
| .L_1b_cache_copy_loop: |
| 40: movb (%rsi),%al |
| 41: movb %al,(%rdi) |
| incq %rsi |
| incq %rdi |
| decl %ecx |
| jnz .L_1b_cache_copy_loop |
| |
| /* Finished copying; fence the prior stores */ |
| .L_finish_copy: |
| xorl %eax,%eax |
| ASM_CLAC |
| sfence |
| ret |
| |
| .section .fixup,"ax" |
| .L_fixup_4x8b_copy: |
| shll $6,%ecx |
| addl %ecx,%edx |
| jmp .L_fixup_handle_tail |
| .L_fixup_8b_copy: |
| lea (%rdx,%rcx,8),%rdx |
| jmp .L_fixup_handle_tail |
| .L_fixup_4b_copy: |
| lea (%rdx,%rcx,4),%rdx |
| jmp .L_fixup_handle_tail |
| .L_fixup_1b_copy: |
| movl %ecx,%edx |
| .L_fixup_handle_tail: |
| sfence |
| jmp copy_user_handle_tail |
| .previous |
| |
| _ASM_EXTABLE(1b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(2b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(3b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(4b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(5b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(6b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(7b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(8b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(9b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(10b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(11b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(12b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(13b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(14b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(15b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(16b,.L_fixup_4x8b_copy) |
| _ASM_EXTABLE(20b,.L_fixup_8b_copy) |
| _ASM_EXTABLE(21b,.L_fixup_8b_copy) |
| _ASM_EXTABLE(30b,.L_fixup_4b_copy) |
| _ASM_EXTABLE(31b,.L_fixup_4b_copy) |
| _ASM_EXTABLE(40b,.L_fixup_1b_copy) |
| _ASM_EXTABLE(41b,.L_fixup_1b_copy) |
| ENDPROC(__copy_user_nocache) |
| EXPORT_SYMBOL(__copy_user_nocache) |