blob: ecd150dc3ed9e9c6c500ba3ac75d063761c92a4a [file] [log] [blame]
Thomas Gleixner2874c5f2019-05-27 08:55:01 +02001/* SPDX-License-Identifier: GPL-2.0-or-later */
Paul Mackerras14cf11a2005-09-26 16:04:21 +10002/*
3 * This file contains assembly-language implementations
4 * of IP-style 1's complement checksum routines.
5 *
6 * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
7 *
Paul Mackerras14cf11a2005-09-26 16:04:21 +10008 * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
9 */
10
11#include <linux/sys.h>
12#include <asm/processor.h>
Christophe Leroy7aef41362015-09-22 16:34:27 +020013#include <asm/cache.h>
Paul Mackerras14cf11a2005-09-26 16:04:21 +100014#include <asm/errno.h>
15#include <asm/ppc_asm.h>
Al Viro9445aa12016-01-13 23:33:46 -050016#include <asm/export.h>
Paul Mackerras14cf11a2005-09-26 16:04:21 +100017
18 .text
19
20/*
Paul Mackerras14cf11a2005-09-26 16:04:21 +100021 * computes the checksum of a memory block at buff, length len,
22 * and adds in "sum" (32-bit)
23 *
Christophe Leroy7e393222016-03-07 18:44:37 +010024 * __csum_partial(buff, len, sum)
Paul Mackerras14cf11a2005-09-26 16:04:21 +100025 */
Christophe Leroy7e393222016-03-07 18:44:37 +010026_GLOBAL(__csum_partial)
Paul Mackerras14cf11a2005-09-26 16:04:21 +100027 subi r3,r3,4
Christophe Leroy48821a32015-09-22 16:34:29 +020028 srawi. r6,r4,2 /* Divide len by 4 and also clear carry */
Paul Mackerras14cf11a2005-09-26 16:04:21 +100029 beq 3f /* if we're doing < 4 bytes */
Christophe Leroy48821a32015-09-22 16:34:29 +020030 andi. r0,r3,2 /* Align buffer to longword boundary */
Paul Mackerras14cf11a2005-09-26 16:04:21 +100031 beq+ 1f
Christophe Leroy48821a32015-09-22 16:34:29 +020032 lhz r0,4(r3) /* do 2 bytes to get aligned */
Paul Mackerras14cf11a2005-09-26 16:04:21 +100033 subi r4,r4,2
Christophe Leroy48821a32015-09-22 16:34:29 +020034 addi r3,r3,2
Paul Mackerras14cf11a2005-09-26 16:04:21 +100035 srwi. r6,r4,2 /* # words to do */
Christophe Leroy48821a32015-09-22 16:34:29 +020036 adde r5,r5,r0
Paul Mackerras14cf11a2005-09-26 16:04:21 +100037 beq 3f
Christophe Leroyf867d552015-09-22 16:34:32 +0200381: andi. r6,r6,3 /* Prepare to handle words 4 by 4 */
39 beq 21f
40 mtctr r6
Christophe Leroy48821a32015-09-22 16:34:29 +0200412: lwzu r0,4(r3)
42 adde r5,r5,r0
Paul Mackerras14cf11a2005-09-26 16:04:21 +100043 bdnz 2b
Christophe Leroyf867d552015-09-22 16:34:32 +02004421: srwi. r6,r4,4 /* # blocks of 4 words to do */
45 beq 3f
Christophe Leroy373e0982018-05-24 11:22:27 +000046 lwz r0,4(r3)
Christophe Leroyf867d552015-09-22 16:34:32 +020047 mtctr r6
Christophe Leroyf867d552015-09-22 16:34:32 +020048 lwz r6,8(r3)
Christophe Leroyf867d552015-09-22 16:34:32 +020049 adde r5,r5,r0
Christophe Leroy373e0982018-05-24 11:22:27 +000050 lwz r7,12(r3)
Christophe Leroyf867d552015-09-22 16:34:32 +020051 adde r5,r5,r6
Christophe Leroy373e0982018-05-24 11:22:27 +000052 lwzu r8,16(r3)
Christophe Leroyf867d552015-09-22 16:34:32 +020053 adde r5,r5,r7
Christophe Leroy373e0982018-05-24 11:22:27 +000054 bdz 23f
5522: lwz r0,4(r3)
Christophe Leroyf867d552015-09-22 16:34:32 +020056 adde r5,r5,r8
Christophe Leroy373e0982018-05-24 11:22:27 +000057 lwz r6,8(r3)
58 adde r5,r5,r0
59 lwz r7,12(r3)
60 adde r5,r5,r6
61 lwzu r8,16(r3)
62 adde r5,r5,r7
Christophe Leroyf867d552015-09-22 16:34:32 +020063 bdnz 22b
Christophe Leroy373e0982018-05-24 11:22:27 +00006423: adde r5,r5,r8
Christophe Leroy48821a32015-09-22 16:34:29 +0200653: andi. r0,r4,2
66 beq+ 4f
67 lhz r0,4(r3)
Paul Mackerras14cf11a2005-09-26 16:04:21 +100068 addi r3,r3,2
Christophe Leroy48821a32015-09-22 16:34:29 +020069 adde r5,r5,r0
704: andi. r0,r4,1
71 beq+ 5f
72 lbz r0,4(r3)
73 slwi r0,r0,8 /* Upper byte of word */
74 adde r5,r5,r0
755: addze r3,r5 /* add in final carry */
Paul Mackerras14cf11a2005-09-26 16:04:21 +100076 blr
Al Viro9445aa12016-01-13 23:33:46 -050077EXPORT_SYMBOL(__csum_partial)
Paul Mackerras14cf11a2005-09-26 16:04:21 +100078
79/*
80 * Computes the checksum of a memory block at src, length len,
81 * and adds in "sum" (32-bit), while copying the block to dst.
82 * If an access exception occurs on src or dst, it stores -EFAULT
83 * to *src_err or *dst_err respectively, and (for an error on
84 * src) zeroes the rest of dst.
85 *
86 * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err)
87 */
Christophe Leroy7aef41362015-09-22 16:34:27 +020088#define CSUM_COPY_16_BYTES_WITHEX(n) \
898 ## n ## 0: \
90 lwz r7,4(r4); \
918 ## n ## 1: \
92 lwz r8,8(r4); \
938 ## n ## 2: \
94 lwz r9,12(r4); \
958 ## n ## 3: \
96 lwzu r10,16(r4); \
978 ## n ## 4: \
98 stw r7,4(r6); \
99 adde r12,r12,r7; \
1008 ## n ## 5: \
101 stw r8,8(r6); \
102 adde r12,r12,r8; \
1038 ## n ## 6: \
104 stw r9,12(r6); \
105 adde r12,r12,r9; \
1068 ## n ## 7: \
107 stwu r10,16(r6); \
108 adde r12,r12,r10
109
110#define CSUM_COPY_16_BYTES_EXCODE(n) \
Nicholas Piggin24bfa6a2016-10-13 16:42:53 +1100111 EX_TABLE(8 ## n ## 0b, src_error); \
112 EX_TABLE(8 ## n ## 1b, src_error); \
113 EX_TABLE(8 ## n ## 2b, src_error); \
114 EX_TABLE(8 ## n ## 3b, src_error); \
115 EX_TABLE(8 ## n ## 4b, dst_error); \
116 EX_TABLE(8 ## n ## 5b, dst_error); \
117 EX_TABLE(8 ## n ## 6b, dst_error); \
118 EX_TABLE(8 ## n ## 7b, dst_error);
Christophe Leroy7aef41362015-09-22 16:34:27 +0200119
120 .text
121 .stabs "arch/powerpc/lib/",N_SO,0,0,0f
122 .stabs "checksum_32.S",N_SO,0,0,0f
1230:
124
125CACHELINE_BYTES = L1_CACHE_BYTES
126LG_CACHELINE_BYTES = L1_CACHE_SHIFT
127CACHELINE_MASK = (L1_CACHE_BYTES-1)
128
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000129_GLOBAL(csum_partial_copy_generic)
Christophe Leroy7aef41362015-09-22 16:34:27 +0200130 stwu r1,-16(r1)
131 stw r7,12(r1)
132 stw r8,8(r1)
133
Christophe Leroy7aef41362015-09-22 16:34:27 +0200134 addic r12,r6,0
135 addi r6,r4,-4
136 neg r0,r4
137 addi r4,r3,-4
138 andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
Christophe Leroy85405712016-08-26 16:45:13 +0200139 crset 4*cr7+eq
Christophe Leroy7aef41362015-09-22 16:34:27 +0200140 beq 58f
141
142 cmplw 0,r5,r0 /* is this more than total to do? */
143 blt 63f /* if not much to do */
Christophe Leroy85405712016-08-26 16:45:13 +0200144 rlwinm r7,r6,3,0x8
145 rlwnm r12,r12,r7,0,31 /* odd destination address: rotate one byte */
146 cmplwi cr7,r7,0 /* is destination address even ? */
Christophe Leroy7aef41362015-09-22 16:34:27 +0200147 andi. r8,r0,3 /* get it word-aligned first */
148 mtctr r8
149 beq+ 61f
150 li r3,0
15170: lbz r9,4(r4) /* do some bytes */
152 addi r4,r4,1
153 slwi r3,r3,8
154 rlwimi r3,r9,0,24,31
15571: stb r9,4(r6)
156 addi r6,r6,1
157 bdnz 70b
158 adde r12,r12,r3
15961: subf r5,r0,r5
160 srwi. r0,r0,2
161 mtctr r0
162 beq 58f
16372: lwzu r9,4(r4) /* do some words */
164 adde r12,r12,r9
16573: stwu r9,4(r6)
166 bdnz 72b
167
16858: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
169 clrlwi r5,r5,32-LG_CACHELINE_BYTES
170 li r11,4
171 beq 63f
172
173 /* Here we decide how far ahead to prefetch the source */
174 li r3,4
175 cmpwi r0,1
176 li r7,0
177 ble 114f
178 li r7,1
179#if MAX_COPY_PREFETCH > 1
180 /* Heuristically, for large transfers we prefetch
181 MAX_COPY_PREFETCH cachelines ahead. For small transfers
182 we prefetch 1 cacheline ahead. */
183 cmpwi r0,MAX_COPY_PREFETCH
184 ble 112f
185 li r7,MAX_COPY_PREFETCH
186112: mtctr r7
187111: dcbt r3,r4
188 addi r3,r3,CACHELINE_BYTES
189 bdnz 111b
190#else
191 dcbt r3,r4
192 addi r3,r3,CACHELINE_BYTES
193#endif /* MAX_COPY_PREFETCH > 1 */
194
195114: subf r8,r7,r0
196 mr r0,r7
197 mtctr r8
198
19953: dcbt r3,r4
20054: dcbz r11,r6
201/* the main body of the cacheline loop */
202 CSUM_COPY_16_BYTES_WITHEX(0)
203#if L1_CACHE_BYTES >= 32
204 CSUM_COPY_16_BYTES_WITHEX(1)
205#if L1_CACHE_BYTES >= 64
206 CSUM_COPY_16_BYTES_WITHEX(2)
207 CSUM_COPY_16_BYTES_WITHEX(3)
208#if L1_CACHE_BYTES >= 128
209 CSUM_COPY_16_BYTES_WITHEX(4)
210 CSUM_COPY_16_BYTES_WITHEX(5)
211 CSUM_COPY_16_BYTES_WITHEX(6)
212 CSUM_COPY_16_BYTES_WITHEX(7)
213#endif
214#endif
215#endif
216 bdnz 53b
217 cmpwi r0,0
218 li r3,4
219 li r7,0
220 bne 114b
221
22263: srwi. r0,r5,2
223 mtctr r0
224 beq 64f
22530: lwzu r0,4(r4)
226 adde r12,r12,r0
22731: stwu r0,4(r6)
228 bdnz 30b
229
23064: andi. r0,r5,2
231 beq+ 65f
23240: lhz r0,4(r4)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000233 addi r4,r4,2
Christophe Leroy7aef41362015-09-22 16:34:27 +020023441: sth r0,4(r6)
235 adde r12,r12,r0
236 addi r6,r6,2
23765: andi. r0,r5,1
238 beq+ 66f
23950: lbz r0,4(r4)
24051: stb r0,4(r6)
241 slwi r0,r0,8
242 adde r12,r12,r0
24366: addze r3,r12
244 addi r1,r1,16
245 beqlr+ cr7
Christophe Leroy1bc8b812016-08-02 10:07:05 +0200246 rlwinm r3,r3,8,0,31 /* odd destination address: rotate one byte */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000247 blr
248
Christophe Leroy7aef41362015-09-22 16:34:27 +0200249/* read fault */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000250src_error:
Christophe Leroy7aef41362015-09-22 16:34:27 +0200251 lwz r7,12(r1)
252 addi r1,r1,16
253 cmpwi cr0,r7,0
254 beqlr
255 li r0,-EFAULT
256 stw r0,0(r7)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000257 blr
Christophe Leroy7aef41362015-09-22 16:34:27 +0200258/* write fault */
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000259dst_error:
Christophe Leroy7aef41362015-09-22 16:34:27 +0200260 lwz r8,8(r1)
261 addi r1,r1,16
262 cmpwi cr0,r8,0
263 beqlr
264 li r0,-EFAULT
265 stw r0,0(r8)
Paul Mackerras14cf11a2005-09-26 16:04:21 +1000266 blr
267
Nicholas Piggin24bfa6a2016-10-13 16:42:53 +1100268 EX_TABLE(70b, src_error);
269 EX_TABLE(71b, dst_error);
270 EX_TABLE(72b, src_error);
271 EX_TABLE(73b, dst_error);
272 EX_TABLE(54b, dst_error);
Christophe Leroy7aef41362015-09-22 16:34:27 +0200273
274/*
275 * this stuff handles faults in the cacheline loop and branches to either
276 * src_error (if in read part) or dst_error (if in write part)
277 */
278 CSUM_COPY_16_BYTES_EXCODE(0)
279#if L1_CACHE_BYTES >= 32
280 CSUM_COPY_16_BYTES_EXCODE(1)
281#if L1_CACHE_BYTES >= 64
282 CSUM_COPY_16_BYTES_EXCODE(2)
283 CSUM_COPY_16_BYTES_EXCODE(3)
284#if L1_CACHE_BYTES >= 128
285 CSUM_COPY_16_BYTES_EXCODE(4)
286 CSUM_COPY_16_BYTES_EXCODE(5)
287 CSUM_COPY_16_BYTES_EXCODE(6)
288 CSUM_COPY_16_BYTES_EXCODE(7)
289#endif
290#endif
291#endif
292
Nicholas Piggin24bfa6a2016-10-13 16:42:53 +1100293 EX_TABLE(30b, src_error);
294 EX_TABLE(31b, dst_error);
295 EX_TABLE(40b, src_error);
296 EX_TABLE(41b, dst_error);
297 EX_TABLE(50b, src_error);
298 EX_TABLE(51b, dst_error);
299
Al Viro9445aa12016-01-13 23:33:46 -0500300EXPORT_SYMBOL(csum_partial_copy_generic)
Christophe Leroye9c49432018-05-24 11:33:18 +0000301
302/*
303 * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
304 * const struct in6_addr *daddr,
305 * __u32 len, __u8 proto, __wsum sum)
306 */
307
308_GLOBAL(csum_ipv6_magic)
309 lwz r8, 0(r3)
310 lwz r9, 4(r3)
311 addc r0, r7, r8
312 lwz r10, 8(r3)
313 adde r0, r0, r9
314 lwz r11, 12(r3)
315 adde r0, r0, r10
316 lwz r8, 0(r4)
317 adde r0, r0, r11
318 lwz r9, 4(r4)
319 adde r0, r0, r8
320 lwz r10, 8(r4)
321 adde r0, r0, r9
322 lwz r11, 12(r4)
323 adde r0, r0, r10
324 add r5, r5, r6 /* assumption: len + proto doesn't carry */
325 adde r0, r0, r11
326 adde r0, r0, r5
327 addze r0, r0
328 rotlwi r3, r0, 16
329 add r3, r0, r3
330 not r3, r3
331 rlwinm r3, r3, 16, 16, 31
332 blr
333EXPORT_SYMBOL(csum_ipv6_magic)