James Hogan | 086e9dc | 2012-10-05 17:02:09 +0100 | [diff] [blame] | 1 | ! Copyright (C) 2008-2012 Imagination Technologies Ltd. |
| 2 | |
| 3 | .text |
| 4 | .global _memset |
| 5 | .type _memset,function |
| 6 | ! D1Ar1 dst |
| 7 | ! D0Ar2 c |
| 8 | ! D1Ar3 cnt |
| 9 | ! D0Re0 dst |
| 10 | _memset: |
| 11 | AND D0Ar2,D0Ar2,#0xFF ! Ensure a byte input value |
| 12 | MULW D0Ar2,D0Ar2,#0x0101 ! Duplicate byte value into 0-15 |
| 13 | ANDS D0Ar4,D1Ar1,#7 ! Extract bottom LSBs of dst |
| 14 | LSL D0Re0,D0Ar2,#16 ! Duplicate byte value into 16-31 |
| 15 | ADD A0.2,D0Ar2,D0Re0 ! Duplicate byte value into 4 (A0.2) |
| 16 | MOV D0Re0,D1Ar1 ! Return dst |
| 17 | BZ $LLongStub ! if start address is aligned |
| 18 | ! start address is not aligned on an 8 byte boundary, so we |
| 19 | ! need the number of bytes up to the next 8 byte address |
| 20 | ! boundary, or the length of the string if less than 8, in D1Ar5 |
| 21 | MOV D0Ar2,#8 ! Need 8 - N in D1Ar5 ... |
| 22 | SUB D1Ar5,D0Ar2,D0Ar4 ! ... subtract N |
| 23 | CMP D1Ar3,D1Ar5 |
| 24 | MOVMI D1Ar5,D1Ar3 |
| 25 | B $LByteStub ! dst is mis-aligned, do $LByteStub |
| 26 | |
| 27 | ! |
| 28 | ! Preamble to LongLoop which generates 4*8 bytes per interation (5 cycles) |
| 29 | ! |
| 30 | $LLongStub: |
| 31 | LSRS D0Ar2,D1Ar3,#5 |
| 32 | AND D1Ar3,D1Ar3,#0x1F |
| 33 | MOV A1.2,A0.2 |
| 34 | BEQ $LLongishStub |
| 35 | SUB TXRPT,D0Ar2,#1 |
| 36 | CMP D1Ar3,#0 |
| 37 | $LLongLoop: |
| 38 | SETL [D1Ar1++],A0.2,A1.2 |
| 39 | SETL [D1Ar1++],A0.2,A1.2 |
| 40 | SETL [D1Ar1++],A0.2,A1.2 |
| 41 | SETL [D1Ar1++],A0.2,A1.2 |
| 42 | BR $LLongLoop |
| 43 | BZ $Lexit |
| 44 | ! |
| 45 | ! Preamble to LongishLoop which generates 1*8 bytes per interation (2 cycles) |
| 46 | ! |
| 47 | $LLongishStub: |
| 48 | LSRS D0Ar2,D1Ar3,#3 |
| 49 | AND D1Ar3,D1Ar3,#0x7 |
| 50 | MOV D1Ar5,D1Ar3 |
| 51 | BEQ $LByteStub |
| 52 | SUB TXRPT,D0Ar2,#1 |
| 53 | CMP D1Ar3,#0 |
| 54 | $LLongishLoop: |
| 55 | SETL [D1Ar1++],A0.2,A1.2 |
| 56 | BR $LLongishLoop |
| 57 | BZ $Lexit |
| 58 | ! |
| 59 | ! This does a byte structured burst of up to 7 bytes |
| 60 | ! |
| 61 | ! D1Ar1 should point to the location required |
| 62 | ! D1Ar3 should be the remaining total byte count |
| 63 | ! D1Ar5 should be burst size (<= D1Ar3) |
| 64 | ! |
| 65 | $LByteStub: |
| 66 | SUBS D1Ar3,D1Ar3,D1Ar5 ! Reduce count |
| 67 | ADD D1Ar1,D1Ar1,D1Ar5 ! Advance pointer to end of area |
| 68 | MULW D1Ar5,D1Ar5,#4 ! Scale to (1*4), (2*4), (3*4) |
| 69 | SUB D1Ar5,D1Ar5,#(8*4) ! Rebase to -(7*4), -(6*4), -(5*4), ... |
| 70 | MOV A1.2,D1Ar5 |
| 71 | SUB PC,CPC1,A1.2 ! Jump into table below |
| 72 | SETB [D1Ar1+#(-7)],A0.2 |
| 73 | SETB [D1Ar1+#(-6)],A0.2 |
| 74 | SETB [D1Ar1+#(-5)],A0.2 |
| 75 | SETB [D1Ar1+#(-4)],A0.2 |
| 76 | SETB [D1Ar1+#(-3)],A0.2 |
| 77 | SETB [D1Ar1+#(-2)],A0.2 |
| 78 | SETB [D1Ar1+#(-1)],A0.2 |
| 79 | ! |
| 80 | ! Return if all data has been output, otherwise do $LLongStub |
| 81 | ! |
| 82 | BNZ $LLongStub |
| 83 | $Lexit: |
| 84 | MOV PC,D1RtP |
| 85 | .size _memset,.-_memset |
| 86 | |