Thomas Gleixner | 2874c5f | 2019-05-27 08:55:01 +0200 | [diff] [blame] | 1 | /* SPDX-License-Identifier: GPL-2.0-or-later */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 2 | /* |
| 3 | * This file contains assembly-language implementations |
| 4 | * of IP-style 1's complement checksum routines. |
| 5 | * |
| 6 | * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) |
| 7 | * |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 8 | * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). |
| 9 | */ |
| 10 | |
| 11 | #include <linux/sys.h> |
| 12 | #include <asm/processor.h> |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 13 | #include <asm/cache.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 14 | #include <asm/errno.h> |
| 15 | #include <asm/ppc_asm.h> |
Al Viro | 9445aa1 | 2016-01-13 23:33:46 -0500 | [diff] [blame] | 16 | #include <asm/export.h> |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 17 | |
| 18 | .text |
| 19 | |
| 20 | /* |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 21 | * computes the checksum of a memory block at buff, length len, |
| 22 | * and adds in "sum" (32-bit) |
| 23 | * |
Christophe Leroy | 7e39322 | 2016-03-07 18:44:37 +0100 | [diff] [blame] | 24 | * __csum_partial(buff, len, sum) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 25 | */ |
Christophe Leroy | 7e39322 | 2016-03-07 18:44:37 +0100 | [diff] [blame] | 26 | _GLOBAL(__csum_partial) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 27 | subi r3,r3,4 |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 28 | srawi. r6,r4,2 /* Divide len by 4 and also clear carry */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 29 | beq 3f /* if we're doing < 4 bytes */ |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 30 | andi. r0,r3,2 /* Align buffer to longword boundary */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 31 | beq+ 1f |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 32 | lhz r0,4(r3) /* do 2 bytes to get aligned */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 33 | subi r4,r4,2 |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 34 | addi r3,r3,2 |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 35 | srwi. r6,r4,2 /* # words to do */ |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 36 | adde r5,r5,r0 |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 37 | beq 3f |
Christophe Leroy | f867d55 | 2015-09-22 16:34:32 +0200 | [diff] [blame] | 38 | 1: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */ |
| 39 | beq 21f |
| 40 | mtctr r6 |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 41 | 2: lwzu r0,4(r3) |
| 42 | adde r5,r5,r0 |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 43 | bdnz 2b |
Christophe Leroy | f867d55 | 2015-09-22 16:34:32 +0200 | [diff] [blame] | 44 | 21: srwi. r6,r4,4 /* # blocks of 4 words to do */ |
| 45 | beq 3f |
Christophe Leroy | 373e098 | 2018-05-24 11:22:27 +0000 | [diff] [blame] | 46 | lwz r0,4(r3) |
Christophe Leroy | f867d55 | 2015-09-22 16:34:32 +0200 | [diff] [blame] | 47 | mtctr r6 |
Christophe Leroy | f867d55 | 2015-09-22 16:34:32 +0200 | [diff] [blame] | 48 | lwz r6,8(r3) |
Christophe Leroy | f867d55 | 2015-09-22 16:34:32 +0200 | [diff] [blame] | 49 | adde r5,r5,r0 |
Christophe Leroy | 373e098 | 2018-05-24 11:22:27 +0000 | [diff] [blame] | 50 | lwz r7,12(r3) |
Christophe Leroy | f867d55 | 2015-09-22 16:34:32 +0200 | [diff] [blame] | 51 | adde r5,r5,r6 |
Christophe Leroy | 373e098 | 2018-05-24 11:22:27 +0000 | [diff] [blame] | 52 | lwzu r8,16(r3) |
Christophe Leroy | f867d55 | 2015-09-22 16:34:32 +0200 | [diff] [blame] | 53 | adde r5,r5,r7 |
Christophe Leroy | 373e098 | 2018-05-24 11:22:27 +0000 | [diff] [blame] | 54 | bdz 23f |
| 55 | 22: lwz r0,4(r3) |
Christophe Leroy | f867d55 | 2015-09-22 16:34:32 +0200 | [diff] [blame] | 56 | adde r5,r5,r8 |
Christophe Leroy | 373e098 | 2018-05-24 11:22:27 +0000 | [diff] [blame] | 57 | lwz r6,8(r3) |
| 58 | adde r5,r5,r0 |
| 59 | lwz r7,12(r3) |
| 60 | adde r5,r5,r6 |
| 61 | lwzu r8,16(r3) |
| 62 | adde r5,r5,r7 |
Christophe Leroy | f867d55 | 2015-09-22 16:34:32 +0200 | [diff] [blame] | 63 | bdnz 22b |
Christophe Leroy | 373e098 | 2018-05-24 11:22:27 +0000 | [diff] [blame] | 64 | 23: adde r5,r5,r8 |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 65 | 3: andi. r0,r4,2 |
| 66 | beq+ 4f |
| 67 | lhz r0,4(r3) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 68 | addi r3,r3,2 |
Christophe Leroy | 48821a3 | 2015-09-22 16:34:29 +0200 | [diff] [blame] | 69 | adde r5,r5,r0 |
| 70 | 4: andi. r0,r4,1 |
| 71 | beq+ 5f |
| 72 | lbz r0,4(r3) |
| 73 | slwi r0,r0,8 /* Upper byte of word */ |
| 74 | adde r5,r5,r0 |
| 75 | 5: addze r3,r5 /* add in final carry */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 76 | blr |
Al Viro | 9445aa1 | 2016-01-13 23:33:46 -0500 | [diff] [blame] | 77 | EXPORT_SYMBOL(__csum_partial) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 78 | |
| 79 | /* |
| 80 | * Computes the checksum of a memory block at src, length len, |
| 81 | * and adds in "sum" (32-bit), while copying the block to dst. |
| 82 | * If an access exception occurs on src or dst, it stores -EFAULT |
| 83 | * to *src_err or *dst_err respectively, and (for an error on |
| 84 | * src) zeroes the rest of dst. |
| 85 | * |
| 86 | * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) |
| 87 | */ |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 88 | #define CSUM_COPY_16_BYTES_WITHEX(n) \ |
| 89 | 8 ## n ## 0: \ |
| 90 | lwz r7,4(r4); \ |
| 91 | 8 ## n ## 1: \ |
| 92 | lwz r8,8(r4); \ |
| 93 | 8 ## n ## 2: \ |
| 94 | lwz r9,12(r4); \ |
| 95 | 8 ## n ## 3: \ |
| 96 | lwzu r10,16(r4); \ |
| 97 | 8 ## n ## 4: \ |
| 98 | stw r7,4(r6); \ |
| 99 | adde r12,r12,r7; \ |
| 100 | 8 ## n ## 5: \ |
| 101 | stw r8,8(r6); \ |
| 102 | adde r12,r12,r8; \ |
| 103 | 8 ## n ## 6: \ |
| 104 | stw r9,12(r6); \ |
| 105 | adde r12,r12,r9; \ |
| 106 | 8 ## n ## 7: \ |
| 107 | stwu r10,16(r6); \ |
| 108 | adde r12,r12,r10 |
| 109 | |
| 110 | #define CSUM_COPY_16_BYTES_EXCODE(n) \ |
Nicholas Piggin | 24bfa6a | 2016-10-13 16:42:53 +1100 | [diff] [blame] | 111 | EX_TABLE(8 ## n ## 0b, src_error); \ |
| 112 | EX_TABLE(8 ## n ## 1b, src_error); \ |
| 113 | EX_TABLE(8 ## n ## 2b, src_error); \ |
| 114 | EX_TABLE(8 ## n ## 3b, src_error); \ |
| 115 | EX_TABLE(8 ## n ## 4b, dst_error); \ |
| 116 | EX_TABLE(8 ## n ## 5b, dst_error); \ |
| 117 | EX_TABLE(8 ## n ## 6b, dst_error); \ |
| 118 | EX_TABLE(8 ## n ## 7b, dst_error); |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 119 | |
| 120 | .text |
| 121 | .stabs "arch/powerpc/lib/",N_SO,0,0,0f |
| 122 | .stabs "checksum_32.S",N_SO,0,0,0f |
| 123 | 0: |
| 124 | |
| 125 | CACHELINE_BYTES = L1_CACHE_BYTES |
| 126 | LG_CACHELINE_BYTES = L1_CACHE_SHIFT |
| 127 | CACHELINE_MASK = (L1_CACHE_BYTES-1) |
| 128 | |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 129 | _GLOBAL(csum_partial_copy_generic) |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 130 | stwu r1,-16(r1) |
| 131 | stw r7,12(r1) |
| 132 | stw r8,8(r1) |
| 133 | |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 134 | addic r12,r6,0 |
| 135 | addi r6,r4,-4 |
| 136 | neg r0,r4 |
| 137 | addi r4,r3,-4 |
| 138 | andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ |
Christophe Leroy | 8540571 | 2016-08-26 16:45:13 +0200 | [diff] [blame] | 139 | crset 4*cr7+eq |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 140 | beq 58f |
| 141 | |
| 142 | cmplw 0,r5,r0 /* is this more than total to do? */ |
| 143 | blt 63f /* if not much to do */ |
Christophe Leroy | 8540571 | 2016-08-26 16:45:13 +0200 | [diff] [blame] | 144 | rlwinm r7,r6,3,0x8 |
| 145 | rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */ |
| 146 | cmplwi cr7,r7,0 /* is destination address even ? */ |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 147 | andi. r8,r0,3 /* get it word-aligned first */ |
| 148 | mtctr r8 |
| 149 | beq+ 61f |
| 150 | li r3,0 |
| 151 | 70: lbz r9,4(r4) /* do some bytes */ |
| 152 | addi r4,r4,1 |
| 153 | slwi r3,r3,8 |
| 154 | rlwimi r3,r9,0,24,31 |
| 155 | 71: stb r9,4(r6) |
| 156 | addi r6,r6,1 |
| 157 | bdnz 70b |
| 158 | adde r12,r12,r3 |
| 159 | 61: subf r5,r0,r5 |
| 160 | srwi. r0,r0,2 |
| 161 | mtctr r0 |
| 162 | beq 58f |
| 163 | 72: lwzu r9,4(r4) /* do some words */ |
| 164 | adde r12,r12,r9 |
| 165 | 73: stwu r9,4(r6) |
| 166 | bdnz 72b |
| 167 | |
| 168 | 58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ |
| 169 | clrlwi r5,r5,32-LG_CACHELINE_BYTES |
| 170 | li r11,4 |
| 171 | beq 63f |
| 172 | |
| 173 | /* Here we decide how far ahead to prefetch the source */ |
| 174 | li r3,4 |
| 175 | cmpwi r0,1 |
| 176 | li r7,0 |
| 177 | ble 114f |
| 178 | li r7,1 |
| 179 | #if MAX_COPY_PREFETCH > 1 |
| 180 | /* Heuristically, for large transfers we prefetch |
| 181 | MAX_COPY_PREFETCH cachelines ahead. For small transfers |
| 182 | we prefetch 1 cacheline ahead. */ |
| 183 | cmpwi r0,MAX_COPY_PREFETCH |
| 184 | ble 112f |
| 185 | li r7,MAX_COPY_PREFETCH |
| 186 | 112: mtctr r7 |
| 187 | 111: dcbt r3,r4 |
| 188 | addi r3,r3,CACHELINE_BYTES |
| 189 | bdnz 111b |
| 190 | #else |
| 191 | dcbt r3,r4 |
| 192 | addi r3,r3,CACHELINE_BYTES |
| 193 | #endif /* MAX_COPY_PREFETCH > 1 */ |
| 194 | |
| 195 | 114: subf r8,r7,r0 |
| 196 | mr r0,r7 |
| 197 | mtctr r8 |
| 198 | |
| 199 | 53: dcbt r3,r4 |
| 200 | 54: dcbz r11,r6 |
| 201 | /* the main body of the cacheline loop */ |
| 202 | CSUM_COPY_16_BYTES_WITHEX(0) |
| 203 | #if L1_CACHE_BYTES >= 32 |
| 204 | CSUM_COPY_16_BYTES_WITHEX(1) |
| 205 | #if L1_CACHE_BYTES >= 64 |
| 206 | CSUM_COPY_16_BYTES_WITHEX(2) |
| 207 | CSUM_COPY_16_BYTES_WITHEX(3) |
| 208 | #if L1_CACHE_BYTES >= 128 |
| 209 | CSUM_COPY_16_BYTES_WITHEX(4) |
| 210 | CSUM_COPY_16_BYTES_WITHEX(5) |
| 211 | CSUM_COPY_16_BYTES_WITHEX(6) |
| 212 | CSUM_COPY_16_BYTES_WITHEX(7) |
| 213 | #endif |
| 214 | #endif |
| 215 | #endif |
| 216 | bdnz 53b |
| 217 | cmpwi r0,0 |
| 218 | li r3,4 |
| 219 | li r7,0 |
| 220 | bne 114b |
| 221 | |
| 222 | 63: srwi. r0,r5,2 |
| 223 | mtctr r0 |
| 224 | beq 64f |
| 225 | 30: lwzu r0,4(r4) |
| 226 | adde r12,r12,r0 |
| 227 | 31: stwu r0,4(r6) |
| 228 | bdnz 30b |
| 229 | |
| 230 | 64: andi. r0,r5,2 |
| 231 | beq+ 65f |
| 232 | 40: lhz r0,4(r4) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 233 | addi r4,r4,2 |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 234 | 41: sth r0,4(r6) |
| 235 | adde r12,r12,r0 |
| 236 | addi r6,r6,2 |
| 237 | 65: andi. r0,r5,1 |
| 238 | beq+ 66f |
| 239 | 50: lbz r0,4(r4) |
| 240 | 51: stb r0,4(r6) |
| 241 | slwi r0,r0,8 |
| 242 | adde r12,r12,r0 |
| 243 | 66: addze r3,r12 |
| 244 | addi r1,r1,16 |
| 245 | beqlr+ cr7 |
Christophe Leroy | 1bc8b81 | 2016-08-02 10:07:05 +0200 | [diff] [blame] | 246 | rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 247 | blr |
| 248 | |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 249 | /* read fault */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 250 | src_error: |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 251 | lwz r7,12(r1) |
| 252 | addi r1,r1,16 |
| 253 | cmpwi cr0,r7,0 |
| 254 | beqlr |
| 255 | li r0,-EFAULT |
| 256 | stw r0,0(r7) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 257 | blr |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 258 | /* write fault */ |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 259 | dst_error: |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 260 | lwz r8,8(r1) |
| 261 | addi r1,r1,16 |
| 262 | cmpwi cr0,r8,0 |
| 263 | beqlr |
| 264 | li r0,-EFAULT |
| 265 | stw r0,0(r8) |
Paul Mackerras | 14cf11a | 2005-09-26 16:04:21 +1000 | [diff] [blame] | 266 | blr |
| 267 | |
Nicholas Piggin | 24bfa6a | 2016-10-13 16:42:53 +1100 | [diff] [blame] | 268 | EX_TABLE(70b, src_error); |
| 269 | EX_TABLE(71b, dst_error); |
| 270 | EX_TABLE(72b, src_error); |
| 271 | EX_TABLE(73b, dst_error); |
| 272 | EX_TABLE(54b, dst_error); |
Christophe Leroy | 7aef4136 | 2015-09-22 16:34:27 +0200 | [diff] [blame] | 273 | |
| 274 | /* |
| 275 | * this stuff handles faults in the cacheline loop and branches to either |
| 276 | * src_error (if in read part) or dst_error (if in write part) |
| 277 | */ |
| 278 | CSUM_COPY_16_BYTES_EXCODE(0) |
| 279 | #if L1_CACHE_BYTES >= 32 |
| 280 | CSUM_COPY_16_BYTES_EXCODE(1) |
| 281 | #if L1_CACHE_BYTES >= 64 |
| 282 | CSUM_COPY_16_BYTES_EXCODE(2) |
| 283 | CSUM_COPY_16_BYTES_EXCODE(3) |
| 284 | #if L1_CACHE_BYTES >= 128 |
| 285 | CSUM_COPY_16_BYTES_EXCODE(4) |
| 286 | CSUM_COPY_16_BYTES_EXCODE(5) |
| 287 | CSUM_COPY_16_BYTES_EXCODE(6) |
| 288 | CSUM_COPY_16_BYTES_EXCODE(7) |
| 289 | #endif |
| 290 | #endif |
| 291 | #endif |
| 292 | |
Nicholas Piggin | 24bfa6a | 2016-10-13 16:42:53 +1100 | [diff] [blame] | 293 | EX_TABLE(30b, src_error); |
| 294 | EX_TABLE(31b, dst_error); |
| 295 | EX_TABLE(40b, src_error); |
| 296 | EX_TABLE(41b, dst_error); |
| 297 | EX_TABLE(50b, src_error); |
| 298 | EX_TABLE(51b, dst_error); |
| 299 | |
Al Viro | 9445aa1 | 2016-01-13 23:33:46 -0500 | [diff] [blame] | 300 | EXPORT_SYMBOL(csum_partial_copy_generic) |
Christophe Leroy | e9c4943 | 2018-05-24 11:33:18 +0000 | [diff] [blame] | 301 | |
| 302 | /* |
| 303 | * __sum16 csum_ipv6_magic(const struct in6_addr *saddr, |
| 304 | * const struct in6_addr *daddr, |
| 305 | * __u32 len, __u8 proto, __wsum sum) |
| 306 | */ |
| 307 | |
| 308 | _GLOBAL(csum_ipv6_magic) |
| 309 | lwz r8, 0(r3) |
| 310 | lwz r9, 4(r3) |
| 311 | addc r0, r7, r8 |
| 312 | lwz r10, 8(r3) |
| 313 | adde r0, r0, r9 |
| 314 | lwz r11, 12(r3) |
| 315 | adde r0, r0, r10 |
| 316 | lwz r8, 0(r4) |
| 317 | adde r0, r0, r11 |
| 318 | lwz r9, 4(r4) |
| 319 | adde r0, r0, r8 |
| 320 | lwz r10, 8(r4) |
| 321 | adde r0, r0, r9 |
| 322 | lwz r11, 12(r4) |
| 323 | adde r0, r0, r10 |
| 324 | add r5, r5, r6 /* assumption: len + proto doesn't carry */ |
| 325 | adde r0, r0, r11 |
| 326 | adde r0, r0, r5 |
| 327 | addze r0, r0 |
| 328 | rotlwi r3, r0, 16 |
| 329 | add r3, r0, r3 |
| 330 | not r3, r3 |
| 331 | rlwinm r3, r3, 16, 16, 31 |
| 332 | blr |
| 333 | EXPORT_SYMBOL(csum_ipv6_magic) |