| ; |
| ; linux/arch/c6x/lib/csum_64plus.s |
| ; |
| ; Port on Texas Instruments TMS320C6x architecture |
| ; |
| ; Copyright (C) 2006, 2009, 2010, 2011 Texas Instruments Incorporated |
| ; Author: Aurelien Jacquiot (aurelien.jacquiot@jaluna.com) |
| ; |
| ; This program is free software; you can redistribute it and/or modify |
| ; it under the terms of the GNU General Public License version 2 as |
| ; published by the Free Software Foundation. |
| ; |
| #include <linux/linkage.h> |
| |
| ; |
| ;unsigned int csum_partial_copy(const char *src, char * dst, |
| ; int len, int sum) |
| ; |
| ; A4: src |
| ; B4: dst |
| ; A6: len |
| ; B6: sum |
| ; return csum in A4 |
| ; |
| |
| .text |
| ENTRY(csum_partial_copy) |
| MVC .S2 ILC,B30 |
| |
| MV .D1X B6,A31 ; given csum |
| ZERO .D1 A9 ; csum (a side) |
| || ZERO .D2 B9 ; csum (b side) |
| || SHRU .S2X A6,2,B5 ; len / 4 |
| |
| ;; Check alignment and size |
| AND .S1 3,A4,A1 |
| || AND .S2 3,B4,B0 |
| OR .L2X B0,A1,B0 ; non aligned condition |
| || MVC .S2 B5,ILC |
| || MVK .D2 1,B2 |
| || MV .D1X B5,A1 ; words condition |
| [!A1] B .S1 L8 |
| [B0] BNOP .S1 L6,5 |
| |
| SPLOOP 1 |
| |
| ;; Main loop for aligned words |
| LDW .D1T1 *A4++,A7 |
| NOP 4 |
| MV .S2X A7,B7 |
| || EXTU .S1 A7,0,16,A16 |
| STW .D2T2 B7,*B4++ |
| || MPYU .M2 B7,B2,B8 |
| || ADD .L1 A16,A9,A9 |
| NOP |
| SPKERNEL 8,0 |
| || ADD .L2 B8,B9,B9 |
| |
| ZERO .D1 A1 |
| || ADD .L1X A9,B9,A9 ; add csum from a and b sides |
| |
| L6: |
| [!A1] BNOP .S1 L8,5 |
| |
| ;; Main loop for non-aligned words |
| SPLOOP 2 |
| || MVK .L1 1,A2 |
| |
| LDNW .D1T1 *A4++,A7 |
| NOP 3 |
| |
| NOP |
| MV .S2X A7,B7 |
| || EXTU .S1 A7,0,16,A16 |
| || MPYU .M1 A7,A2,A8 |
| |
| ADD .L1 A16,A9,A9 |
| SPKERNEL 6,0 |
| || STNW .D2T2 B7,*B4++ |
| || ADD .L1 A8,A9,A9 |
| |
| L8: AND .S2X 2,A6,B5 |
| CMPGT .L2 B5,0,B0 |
| [!B0] BNOP .S1 L82,4 |
| |
| ;; Manage half-word |
| ZERO .L1 A7 |
| || ZERO .D1 A8 |
| |
| #ifdef CONFIG_CPU_BIG_ENDIAN |
| |
| LDBU .D1T1 *A4++,A7 |
| LDBU .D1T1 *A4++,A8 |
| NOP 3 |
| SHL .S1 A7,8,A0 |
| ADD .S1 A8,A9,A9 |
| STB .D2T1 A7,*B4++ |
| || ADD .S1 A0,A9,A9 |
| STB .D2T1 A8,*B4++ |
| |
| #else |
| |
| LDBU .D1T1 *A4++,A7 |
| LDBU .D1T1 *A4++,A8 |
| NOP 3 |
| ADD .S1 A7,A9,A9 |
| SHL .S1 A8,8,A0 |
| |
| STB .D2T1 A7,*B4++ |
| || ADD .S1 A0,A9,A9 |
| STB .D2T1 A8,*B4++ |
| |
| #endif |
| |
| ;; Manage eventually the last byte |
| L82: AND .S2X 1,A6,B0 |
| [!B0] BNOP .S1 L9,5 |
| |
| || ZERO .L1 A7 |
| |
| L83: LDBU .D1T1 *A4++,A7 |
| NOP 4 |
| |
| MV .L2X A7,B7 |
| |
| #ifdef CONFIG_CPU_BIG_ENDIAN |
| |
| STB .D2T2 B7,*B4++ |
| || SHL .S1 A7,8,A7 |
| ADD .S1 A7,A9,A9 |
| |
| #else |
| |
| STB .D2T2 B7,*B4++ |
| || ADD .S1 A7,A9,A9 |
| |
| #endif |
| |
| ;; Fold the csum |
| L9: SHRU .S2X A9,16,B0 |
| [!B0] BNOP .S1 L10,5 |
| |
| L91: SHRU .S2X A9,16,B4 |
| || EXTU .S1 A9,16,16,A3 |
| ADD .D1X A3,B4,A9 |
| |
| SHRU .S1 A9,16,A0 |
| [A0] BNOP .S1 L91,5 |
| |
| L10: ADD .D1 A31,A9,A9 |
| MV .D1 A9,A4 |
| |
| BNOP .S2 B3,4 |
| MVC .S2 B30,ILC |
| ENDPROC(csum_partial_copy) |
| |
| ; |
| ;unsigned short |
| ;ip_fast_csum(unsigned char *iph, unsigned int ihl) |
| ;{ |
| ; unsigned int checksum = 0; |
| ; unsigned short *tosum = (unsigned short *) iph; |
| ; int len; |
| ; |
| ; len = ihl*4; |
| ; |
| ; if (len <= 0) |
| ; return 0; |
| ; |
| ; while(len) { |
| ; len -= 2; |
| ; checksum += *tosum++; |
| ; } |
| ; if (len & 1) |
| ; checksum += *(unsigned char*) tosum; |
| ; |
| ; while(checksum >> 16) |
| ; checksum = (checksum & 0xffff) + (checksum >> 16); |
| ; |
| ; return ~checksum; |
| ;} |
| ; |
| ; A4: iph |
| ; B4: ihl |
| ; return checksum in A4 |
| ; |
| .text |
| |
| ENTRY(ip_fast_csum) |
| ZERO .D1 A5 |
| || MVC .S2 ILC,B30 |
| SHL .S2 B4,2,B0 |
| CMPGT .L2 B0,0,B1 |
| [!B1] BNOP .S1 L15,4 |
| [!B1] ZERO .D1 A3 |
| |
| [!B0] B .S1 L12 |
| SHRU .S2 B0,1,B0 |
| MVC .S2 B0,ILC |
| NOP 3 |
| |
| SPLOOP 1 |
| LDHU .D1T1 *A4++,A3 |
| NOP 3 |
| NOP |
| SPKERNEL 5,0 |
| || ADD .L1 A3,A5,A5 |
| |
| L12: SHRU .S1 A5,16,A0 |
| [!A0] BNOP .S1 L14,5 |
| |
| L13: SHRU .S2X A5,16,B4 |
| EXTU .S1 A5,16,16,A3 |
| ADD .D1X A3,B4,A5 |
| SHRU .S1 A5,16,A0 |
| [A0] BNOP .S1 L13,5 |
| |
| L14: NOT .D1 A5,A3 |
| EXTU .S1 A3,16,16,A3 |
| |
| L15: BNOP .S2 B3,3 |
| MVC .S2 B30,ILC |
| MV .D1 A3,A4 |
| ENDPROC(ip_fast_csum) |
| |
| ; |
| ;unsigned short |
| ;do_csum(unsigned char *buff, unsigned int len) |
| ;{ |
| ; int odd, count; |
| ; unsigned int result = 0; |
| ; |
| ; if (len <= 0) |
| ; goto out; |
| ; odd = 1 & (unsigned long) buff; |
| ; if (odd) { |
| ;#ifdef __LITTLE_ENDIAN |
| ; result += (*buff << 8); |
| ;#else |
| ; result = *buff; |
| ;#endif |
| ; len--; |
| ; buff++; |
| ; } |
| ; count = len >> 1; /* nr of 16-bit words.. */ |
| ; if (count) { |
| ; if (2 & (unsigned long) buff) { |
| ; result += *(unsigned short *) buff; |
| ; count--; |
| ; len -= 2; |
| ; buff += 2; |
| ; } |
| ; count >>= 1; /* nr of 32-bit words.. */ |
| ; if (count) { |
| ; unsigned int carry = 0; |
| ; do { |
| ; unsigned int w = *(unsigned int *) buff; |
| ; count--; |
| ; buff += 4; |
| ; result += carry; |
| ; result += w; |
| ; carry = (w > result); |
| ; } while (count); |
| ; result += carry; |
| ; result = (result & 0xffff) + (result >> 16); |
| ; } |
| ; if (len & 2) { |
| ; result += *(unsigned short *) buff; |
| ; buff += 2; |
| ; } |
| ; } |
| ; if (len & 1) |
| ;#ifdef __LITTLE_ENDIAN |
| ; result += *buff; |
| ;#else |
| ; result += (*buff << 8); |
| ;#endif |
| ; result = (result & 0xffff) + (result >> 16); |
| ; /* add up carry.. */ |
| ; result = (result & 0xffff) + (result >> 16); |
| ; if (odd) |
| ; result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); |
| ;out: |
| ; return result; |
| ;} |
| ; |
| ; A4: buff |
| ; B4: len |
| ; return checksum in A4 |
| ; |
| |
| ENTRY(do_csum) |
| CMPGT .L2 B4,0,B0 |
| [!B0] BNOP .S1 L26,3 |
| EXTU .S1 A4,31,31,A0 |
| |
| MV .L1 A0,A3 |
| || MV .S1X B3,A5 |
| || MV .L2 B4,B3 |
| || ZERO .D1 A1 |
| |
| #ifdef CONFIG_CPU_BIG_ENDIAN |
| [A0] SUB .L2 B3,1,B3 |
| || [A0] LDBU .D1T1 *A4++,A1 |
| #else |
| [!A0] BNOP .S1 L21,5 |
| || [A0] LDBU .D1T1 *A4++,A0 |
| SUB .L2 B3,1,B3 |
| || SHL .S1 A0,8,A1 |
| L21: |
| #endif |
| SHR .S2 B3,1,B0 |
| [!B0] BNOP .S1 L24,3 |
| MVK .L1 2,A0 |
| AND .L1 A4,A0,A0 |
| |
| [!A0] BNOP .S1 L22,5 |
| || [A0] LDHU .D1T1 *A4++,A0 |
| SUB .L2 B0,1,B0 |
| || SUB .S2 B3,2,B3 |
| || ADD .L1 A0,A1,A1 |
| L22: |
| SHR .S2 B0,1,B0 |
| || ZERO .L1 A0 |
| |
| [!B0] BNOP .S1 L23,5 |
| || [B0] MVC .S2 B0,ILC |
| |
| SPLOOP 3 |
| SPMASK L1 |
| || MV .L1 A1,A2 |
| || LDW .D1T1 *A4++,A1 |
| |
| NOP 4 |
| ADD .L1 A0,A1,A0 |
| ADD .L1 A2,A0,A2 |
| |
| SPKERNEL 1,2 |
| || CMPGTU .L1 A1,A2,A0 |
| |
| ADD .L1 A0,A2,A6 |
| EXTU .S1 A6,16,16,A7 |
| SHRU .S2X A6,16,B0 |
| NOP 1 |
| ADD .L1X A7,B0,A1 |
| L23: |
| MVK .L2 2,B0 |
| AND .L2 B3,B0,B0 |
| [B0] LDHU .D1T1 *A4++,A0 |
| NOP 4 |
| [B0] ADD .L1 A0,A1,A1 |
| L24: |
| EXTU .S2 B3,31,31,B0 |
| #ifdef CONFIG_CPU_BIG_ENDIAN |
| [!B0] BNOP .S1 L25,4 |
| || [B0] LDBU .D1T1 *A4,A0 |
| SHL .S1 A0,8,A0 |
| ADD .L1 A0,A1,A1 |
| L25: |
| #else |
| [B0] LDBU .D1T1 *A4,A0 |
| NOP 4 |
| [B0] ADD .L1 A0,A1,A1 |
| #endif |
| EXTU .S1 A1,16,16,A0 |
| SHRU .S2X A1,16,B0 |
| NOP 1 |
| ADD .L1X A0,B0,A0 |
| SHRU .S1 A0,16,A1 |
| ADD .L1 A0,A1,A0 |
| EXTU .S1 A0,16,16,A1 |
| EXTU .S1 A1,16,24,A2 |
| |
| EXTU .S1 A1,24,16,A0 |
| || MV .L2X A3,B0 |
| |
| [B0] OR .L1 A0,A2,A1 |
| L26: |
| NOP 1 |
| BNOP .S2X A5,4 |
| MV .L1 A1,A4 |
| ENDPROC(do_csum) |
| |
| ;__wsum csum_partial(const void *buff, int len, __wsum wsum) |
| ;{ |
| ; unsigned int sum = (__force unsigned int)wsum; |
| ; unsigned int result = do_csum(buff, len); |
| ; |
| ; /* add in old sum, and carry.. */ |
| ; result += sum; |
| ; if (sum > result) |
| ; result += 1; |
| ; return (__force __wsum)result; |
| ;} |
| ; |
| ENTRY(csum_partial) |
| MV .L1X B3,A9 |
| || CALLP .S2 do_csum,B3 |
| || MV .S1 A6,A8 |
| BNOP .S2X A9,2 |
| ADD .L1 A8,A4,A1 |
| CMPGTU .L1 A8,A1,A0 |
| ADD .L1 A1,A0,A4 |
| ENDPROC(csum_partial) |
| |
| ;unsigned short |
| ;ip_compute_csum(unsigned char *buff, unsigned int len) |
| ; |
| ; A4: buff |
| ; B4: len |
| ; return checksum in A4 |
| |
| ENTRY(ip_compute_csum) |
| MV .L1X B3,A9 |
| || CALLP .S2 do_csum,B3 |
| BNOP .S2X A9,3 |
| NOT .S1 A4,A4 |
| CLR .S1 A4,16,31,A4 |
| ENDPROC(ip_compute_csum) |