Greg Kroah-Hartman | b244131 | 2017-11-01 15:07:57 +0100 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 2 | /* |
| 3 | * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) |
| 4 | * |
| 5 | * Finds length of a 0-terminated string. Optimized for the |
| 6 | * Alpha architecture: |
| 7 | * |
| 8 | * - memory accessed as aligned quadwords only |
| 9 | * - uses bcmpge to compare 8 bytes in parallel |
| 10 | * - does binary search to find 0 byte in last |
| 11 | * quadword (HAKMEM needed 12 instructions to |
| 12 | * do this instead of the 9 instructions that |
| 13 | * binary search needs). |
| 14 | */ |
Al Viro | 00fc0e0 | 2016-01-11 09:51:29 -0500 | [diff] [blame] | 15 | #include <asm/export.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 16 | .set noreorder |
| 17 | .set noat |
| 18 | |
| 19 | .align 3 |
| 20 | |
| 21 | .globl strlen |
| 22 | .ent strlen |
| 23 | |
| 24 | strlen: |
| 25 | ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) |
| 26 | lda $2, -1($31) |
| 27 | insqh $2, $16, $2 |
| 28 | andnot $16, 7, $0 |
| 29 | or $2, $1, $1 |
| 30 | cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 |
| 31 | bne $2, found |
| 32 | |
| 33 | loop: ldq $1, 8($0) |
| 34 | addq $0, 8, $0 # addr += 8 |
| 35 | nop # helps dual issue last two insns |
| 36 | cmpbge $31, $1, $2 |
| 37 | beq $2, loop |
| 38 | |
| 39 | found: blbs $2, done # make aligned case fast |
| 40 | negq $2, $3 |
| 41 | and $2, $3, $2 |
| 42 | |
| 43 | and $2, 0x0f, $1 |
| 44 | addq $0, 4, $3 |
| 45 | cmoveq $1, $3, $0 |
| 46 | |
| 47 | and $2, 0x33, $1 |
| 48 | addq $0, 2, $3 |
| 49 | cmoveq $1, $3, $0 |
| 50 | |
| 51 | and $2, 0x55, $1 |
| 52 | addq $0, 1, $3 |
| 53 | cmoveq $1, $3, $0 |
| 54 | |
| 55 | done: subq $0, $16, $0 |
| 56 | ret $31, ($26) |
| 57 | |
| 58 | .end strlen |
Al Viro | 00fc0e0 | 2016-01-11 09:51:29 -0500 | [diff] [blame] | 59 | EXPORT_SYMBOL(strlen) |