Blame - arch/metag/lib/memcpy.S - linux

blob: 46b7a2b9479eaad4592e02bfa94cd46a1597d734 [file] [log] [blame]

James Hogan	086e9dc	2012-10-05 17:02:09 +0100	[diff] [blame]	1	! Copyright (C) 2008-2012 Imagination Technologies Ltd.
				2
				3	.text
				4	.global _memcpy
				5	.type _memcpy,function
				6	! D1Ar1 dst
				7	! D0Ar2 src
				8	! D1Ar3 cnt
				9	! D0Re0 dst
				10	_memcpy:
				11	CMP D1Ar3, #16
				12	MOV A1.2, D0Ar2 ! source pointer
				13	MOV A0.2, D1Ar1 ! destination pointer
				14	MOV A0.3, D1Ar1 ! for return value
				15	! If there are less than 16 bytes to copy use the byte copy loop
				16	BGE $Llong_copy
				17
				18	$Lbyte_copy:
				19	! Simply copy a byte at a time
				20	SUBS TXRPT, D1Ar3, #1
				21	BLT $Lend
				22	$Lloop_byte:
				23	GETB D1Re0, [A1.2++]
				24	SETB [A0.2++], D1Re0
				25	BR $Lloop_byte
				26
				27	$Lend:
				28	! Finally set return value and return
				29	MOV D0Re0, A0.3
				30	MOV PC, D1RtP
				31
				32	$Llong_copy:
				33	ANDS D1Ar5, D1Ar1, #7 ! test destination alignment
				34	BZ $Laligned_dst
				35
				36	! The destination address is not 8 byte aligned. We will copy bytes from
				37	! the source to the destination until the remaining data has an 8 byte
				38	! destination address alignment (i.e we should never copy more than 7
				39	! bytes here).
				40	$Lalign_dst:
				41	GETB D0Re0, [A1.2++]
				42	ADD D1Ar5, D1Ar5, #1 ! dest is aligned when D1Ar5 reaches #8
				43	SUB D1Ar3, D1Ar3, #1 ! decrement count of remaining bytes
				44	SETB [A0.2++], D0Re0
				45	CMP D1Ar5, #8
				46	BNE $Lalign_dst
				47
				48	! We have at least (16 - 7) = 9 bytes to copy - calculate the number of 8 byte
				49	! blocks, then jump to the unaligned copy loop or fall through to the aligned
				50	! copy loop as appropriate.
				51	$Laligned_dst:
				52	MOV D0Ar4, A1.2
				53	LSR D1Ar5, D1Ar3, #3 ! D1Ar5 = number of 8 byte blocks
				54	ANDS D0Ar4, D0Ar4, #7 ! test source alignment
				55	BNZ $Lunaligned_copy ! if unaligned, use unaligned copy loop
				56
				57	! Both source and destination are 8 byte aligned - the easy case.
				58	$Laligned_copy:
				59	LSRS D1Ar5, D1Ar3, #5 ! D1Ar5 = number of 32 byte blocks
				60	BZ $Lbyte_copy
				61	SUB TXRPT, D1Ar5, #1
				62
				63	$Laligned_32:
				64	GETL D0Re0, D1Re0, [A1.2++]
				65	GETL D0Ar6, D1Ar5, [A1.2++]
				66	SETL [A0.2++], D0Re0, D1Re0
				67	SETL [A0.2++], D0Ar6, D1Ar5
				68	GETL D0Re0, D1Re0, [A1.2++]
				69	GETL D0Ar6, D1Ar5, [A1.2++]
				70	SETL [A0.2++], D0Re0, D1Re0
				71	SETL [A0.2++], D0Ar6, D1Ar5
				72	BR $Laligned_32
				73
				74	! If there are any remaining bytes use the byte copy loop, otherwise we are done
				75	ANDS D1Ar3, D1Ar3, #0x1f
				76	BNZ $Lbyte_copy
				77	B $Lend
				78
				79	! The destination is 8 byte aligned but the source is not, and there are 8
				80	! or more bytes to be copied.
				81	$Lunaligned_copy:
				82	! Adjust the source pointer (A1.2) to the 8 byte boundary before its
				83	! current value
				84	MOV D0Ar4, A1.2
				85	MOV D0Ar6, A1.2
				86	ANDMB D0Ar4, D0Ar4, #0xfff8
				87	MOV A1.2, D0Ar4
				88	! Save the number of bytes of mis-alignment in D0Ar4 for use later
				89	SUBS D0Ar6, D0Ar6, D0Ar4
				90	MOV D0Ar4, D0Ar6
				91	! if there is no mis-alignment after all, use the aligned copy loop
				92	BZ $Laligned_copy
				93
				94	! prefetch 8 bytes
				95	GETL D0Re0, D1Re0, [A1.2]
				96
				97	SUB TXRPT, D1Ar5, #1
				98
				99	! There are 3 mis-alignment cases to be considered. Less than 4 bytes, exactly
				100	! 4 bytes, and more than 4 bytes.
				101	CMP D0Ar6, #4
				102	BLT $Lunaligned_1_2_3 ! use 1-3 byte mis-alignment loop
				103	BZ $Lunaligned_4 ! use 4 byte mis-alignment loop
				104
				105	! The mis-alignment is more than 4 bytes
				106	$Lunaligned_5_6_7:
				107	SUB D0Ar6, D0Ar6, #4
				108	! Calculate the bit offsets required for the shift operations necesssary
				109	! to align the data.
				110	! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
				111	MULW D0Ar6, D0Ar6, #8
				112	MOV D1Ar5, #32
				113	SUB D1Ar5, D1Ar5, D0Ar6
				114	! Move data 4 bytes before we enter the main loop
				115	MOV D0Re0, D1Re0
				116
				117	$Lloop_5_6_7:
				118	GETL D0Ar2, D1Ar1, [++A1.2]
				119	! form 64-bit data in D0Re0, D1Re0
				120	LSR D0Re0, D0Re0, D0Ar6
				121	MOV D1Re0, D0Ar2
				122	LSL D1Re0, D1Re0, D1Ar5
				123	ADD D0Re0, D0Re0, D1Re0
				124
				125	LSR D0Ar2, D0Ar2, D0Ar6
				126	LSL D1Re0, D1Ar1, D1Ar5
				127	ADD D1Re0, D1Re0, D0Ar2
				128
				129	SETL [A0.2++], D0Re0, D1Re0
				130	MOV D0Re0, D1Ar1
				131	BR $Lloop_5_6_7
				132
				133	B $Lunaligned_end
				134
				135	$Lunaligned_1_2_3:
				136	! Calculate the bit offsets required for the shift operations necesssary
				137	! to align the data.
				138	! D0Ar6 = bit offset, D1Ar5 = (32 - bit offset)
				139	MULW D0Ar6, D0Ar6, #8
				140	MOV D1Ar5, #32
				141	SUB D1Ar5, D1Ar5, D0Ar6
				142
				143	$Lloop_1_2_3:
				144	! form 64-bit data in D0Re0,D1Re0
				145	LSR D0Re0, D0Re0, D0Ar6
				146	LSL D1Ar1, D1Re0, D1Ar5
				147	ADD D0Re0, D0Re0, D1Ar1
				148	MOV D0Ar2, D1Re0
				149	LSR D0FrT, D0Ar2, D0Ar6
				150	GETL D0Ar2, D1Ar1, [++A1.2]
				151
				152	MOV D1Re0, D0Ar2
				153	LSL D1Re0, D1Re0, D1Ar5
				154	ADD D1Re0, D1Re0, D0FrT
				155
				156	SETL [A0.2++], D0Re0, D1Re0
				157	MOV D0Re0, D0Ar2
				158	MOV D1Re0, D1Ar1
				159	BR $Lloop_1_2_3
				160
				161	B $Lunaligned_end
				162
				163	! The 4 byte mis-alignment case - this does not require any shifting, just a
				164	! shuffling of registers.
				165	$Lunaligned_4:
				166	MOV D0Re0, D1Re0
				167	$Lloop_4:
				168	GETL D0Ar2, D1Ar1, [++A1.2]
				169	MOV D1Re0, D0Ar2
				170	SETL [A0.2++], D0Re0, D1Re0
				171	MOV D0Re0, D1Ar1
				172	BR $Lloop_4
				173
				174	$Lunaligned_end:
				175	! If there are no remaining bytes to copy, we are done.
				176	ANDS D1Ar3, D1Ar3, #7
				177	BZ $Lend
				178	! Re-adjust the source pointer (A1.2) back to the actual (unaligned) byte
				179	! address of the remaining bytes, and fall through to the byte copy loop.
				180	MOV D0Ar6, A1.2
				181	ADD D1Ar5, D0Ar4, D0Ar6
				182	MOV A1.2, D1Ar5
				183	B $Lbyte_copy
				184
				185	.size _memcpy,.-_memcpy