1295dcddcognet/*-
2295dcddcognet * Copyright (c) 2004 Olivier Houchard
3295dcddcognet * All rights reserved.
4295dcddcognet *
5295dcddcognet * Redistribution and use in source and binary forms, with or without
6295dcddcognet * modification, are permitted provided that the following conditions
7295dcddcognet * are met:
8295dcddcognet * 1. Redistributions of source code must retain the above copyright
9295dcddcognet *    notice, this list of conditions and the following disclaimer.
10295dcddcognet * 2. Redistributions in binary form must reproduce the above copyright
11295dcddcognet *    notice, this list of conditions and the following disclaimer in the
12295dcddcognet *    documentation and/or other materials provided with the distribution.
13295dcddcognet *
14295dcddcognet * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15295dcddcognet * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16295dcddcognet * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17295dcddcognet * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18295dcddcognet * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19295dcddcognet * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20295dcddcognet * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21295dcddcognet * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22295dcddcognet * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23295dcddcognet * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24295dcddcognet * SUCH DAMAGE.
25295dcddcognet */
261aa2c85cognet/*
271aa2c85cognet * Copyright 2003 Wasabi Systems, Inc.
281aa2c85cognet * All rights reserved.
291aa2c85cognet *
301aa2c85cognet * Written by Steve C. Woodford for Wasabi Systems, Inc.
311aa2c85cognet *
321aa2c85cognet * Redistribution and use in source and binary forms, with or without
331aa2c85cognet * modification, are permitted provided that the following conditions
341aa2c85cognet * are met:
351aa2c85cognet * 1. Redistributions of source code must retain the above copyright
361aa2c85cognet *    notice, this list of conditions and the following disclaimer.
371aa2c85cognet * 2. Redistributions in binary form must reproduce the above copyright
381aa2c85cognet *    notice, this list of conditions and the following disclaimer in the
391aa2c85cognet *    documentation and/or other materials provided with the distribution.
401aa2c85cognet * 3. All advertising materials mentioning features or use of this software
411aa2c85cognet *    must display the following acknowledgement:
421aa2c85cognet *      This product includes software developed for the NetBSD Project by
431aa2c85cognet *      Wasabi Systems, Inc.
441aa2c85cognet * 4. The name of Wasabi Systems, Inc. may not be used to endorse
451aa2c85cognet *    or promote products derived from this software without specific prior
461aa2c85cognet *    written permission.
471aa2c85cognet *
481aa2c85cognet * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
491aa2c85cognet * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
501aa2c85cognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
511aa2c85cognet * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
521aa2c85cognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
531aa2c85cognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
541aa2c85cognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
551aa2c85cognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
561aa2c85cognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
571aa2c85cognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
581aa2c85cognet * POSSIBILITY OF SUCH DAMAGE.
591aa2c85cognet */
601aa2c85cognet/*
611aa2c85cognet * Copyright (c) 1997 The NetBSD Foundation, Inc.
621aa2c85cognet * All rights reserved.
631aa2c85cognet *
641aa2c85cognet * This code is derived from software contributed to The NetBSD Foundation
651aa2c85cognet * by Neil A. Carson and Mark Brinicombe
661aa2c85cognet *
671aa2c85cognet * Redistribution and use in source and binary forms, with or without
681aa2c85cognet * modification, are permitted provided that the following conditions
691aa2c85cognet * are met:
701aa2c85cognet * 1. Redistributions of source code must retain the above copyright
711aa2c85cognet *    notice, this list of conditions and the following disclaimer.
721aa2c85cognet * 2. Redistributions in binary form must reproduce the above copyright
731aa2c85cognet *    notice, this list of conditions and the following disclaimer in the
741aa2c85cognet *    documentation and/or other materials provided with the distribution.
751aa2c85cognet *
761aa2c85cognet * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
771aa2c85cognet * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
781aa2c85cognet * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
791aa2c85cognet * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
801aa2c85cognet * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
811aa2c85cognet * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
821aa2c85cognet * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
831aa2c85cognet * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
841aa2c85cognet * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
851aa2c85cognet * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
861aa2c85cognet * POSSIBILITY OF SUCH DAMAGE.
871aa2c85cognet */
88295dcddcognet
89295dcddcognet#include <machine/asm.h>
90295dcddcognet__FBSDID("$FreeBSD$");
91295dcddcognet
926fe54a5emaste#include "assym.inc"
93295dcddcognet
94b706460andrew	.syntax	unified
95b706460andrew
964cd1fe1cognet.L_arm_memcpy:
974cd1fe1cognet	.word	_C_LABEL(_arm_memcpy)
984cd1fe1cognet.L_arm_bzero:
994cd1fe1cognet	.word	_C_LABEL(_arm_bzero)
1004cd1fe1cognet.L_min_memcpy_size:
1014cd1fe1cognet	.word	_C_LABEL(_min_memcpy_size)
1024cd1fe1cognet.L_min_bzero_size:
1034cd1fe1cognet	.word	_C_LABEL(_min_bzero_size)
1041bb1149cognet/*
1051bb1149cognet * memset: Sets a block of memory to the specified value
1061bb1149cognet *
1071bb1149cognet * On entry:
1081bb1149cognet *   r0 - dest address
1091bb1149cognet *   r1 - byte to write
1101bb1149cognet *   r2 - number of bytes to write
1111bb1149cognet *
1121bb1149cognet * On exit:
1131bb1149cognet *   r0 - dest address
1141bb1149cognet */
1151bb1149cognet/* LINTSTUB: Func: void bzero(void *, size_t) */
1161bb1149cognetENTRY(bzero)
1174cd1fe1cognet	ldr	r3, .L_arm_bzero
1184cd1fe1cognet	ldr	r3, [r3]
1194cd1fe1cognet	cmp	r3, #0
1204cd1fe1cognet	beq	.Lnormal0
1214cd1fe1cognet	ldr	r2, .L_min_bzero_size
1224cd1fe1cognet	ldr	r2, [r2]
1234cd1fe1cognet	cmp	r1, r2
1244cd1fe1cognet	blt	.Lnormal0
1254cd1fe1cognet	stmfd	sp!, {r0, r1, lr}
1264cd1fe1cognet	mov	r2, #0
1274cd1fe1cognet	mov	lr, pc
1284cd1fe1cognet	mov	pc, r3
1294cd1fe1cognet	cmp	r0, #0
1304cd1fe1cognet	ldmfd	sp!, {r0, r1, lr}
1314cd1fe1cognet	RETeq
1324cd1fe1cognet.Lnormal0:
1331bb1149cognet	mov	r3, #0x00
1341bb1149cognet	b	do_memset
13545f9f39andrewEND(bzero)
1361bb1149cognet/* LINTSTUB: Func: void *memset(void *, int, size_t) */
1371bb1149cognetENTRY(memset)
1381bb1149cognet	and	r3, r1, #0xff		/* We deal with bytes */
1391bb1149cognet	mov	r1, r2
1401bb1149cognetdo_memset:
1411bb1149cognet	cmp	r1, #0x04		/* Do we have less than 4 bytes */
1421bb1149cognet	mov	ip, r0
1431bb1149cognet	blt	.Lmemset_lessthanfour
1441bb1149cognet
1451bb1149cognet	/* Ok first we will word align the address */
1461bb1149cognet	ands	r2, ip, #0x03		/* Get the bottom two bits */
1471bb1149cognet	bne	.Lmemset_wordunaligned	/* The address is not word aligned */
1481bb1149cognet
1491bb1149cognet	/* We are now word aligned */
1501bb1149cognet.Lmemset_wordaligned:
1511bb1149cognet	orr	r3, r3, r3, lsl #8	/* Extend value to 16-bits */
15223ec7d7cognet#ifdef _ARM_ARCH_5E
15323ec7d7cognet	tst	ip, #0x04		/* Quad-align for armv5e */
1541bb1149cognet#else
1551bb1149cognet	cmp	r1, #0x10
1561bb1149cognet#endif
1571bb1149cognet	orr	r3, r3, r3, lsl #16	/* Extend value to 32-bits */
15823ec7d7cognet#ifdef _ARM_ARCH_5E
1591bb1149cognet	subne	r1, r1, #0x04		/* Quad-align if necessary */
1601bb1149cognet	strne	r3, [ip], #0x04
1611bb1149cognet	cmp	r1, #0x10
1621bb1149cognet#endif
1631bb1149cognet	blt	.Lmemset_loop4		/* If less than 16 then use words */
1641bb1149cognet	mov	r2, r3			/* Duplicate data */
1651bb1149cognet	cmp	r1, #0x80		/* If < 128 then skip the big loop */
1661bb1149cognet	blt	.Lmemset_loop32
1671bb1149cognet
1681bb1149cognet	/* Do 128 bytes at a time */
1691bb1149cognet.Lmemset_loop128:
1701bb1149cognet	subs	r1, r1, #0x80
17123ec7d7cognet#ifdef _ARM_ARCH_5E
172b706460andrew	strdge	r2, [ip], #0x08
173b706460andrew	strdge	r2, [ip], #0x08
174b706460andrew	strdge	r2, [ip], #0x08
175b706460andrew	strdge	r2, [ip], #0x08
176b706460andrew	strdge	r2, [ip], #0x08
177b706460andrew	strdge	r2, [ip], #0x08
178b706460andrew	strdge	r2, [ip], #0x08
179b706460andrew	strdge	r2, [ip], #0x08
180b706460andrew	strdge	r2, [ip], #0x08
181b706460andrew	strdge	r2, [ip], #0x08
182b706460andrew	strdge	r2, [ip], #0x08
183b706460andrew	strdge	r2, [ip], #0x08
184b706460andrew	strdge	r2, [ip], #0x08
185b706460andrew	strdge	r2, [ip], #0x08
186b706460andrew	strdge	r2, [ip], #0x08
187b706460andrew	strdge	r2, [ip], #0x08
1881bb1149cognet#else
189b706460andrew	stmiage	ip!, {r2-r3}
190b706460andrew	stmiage	ip!, {r2-r3}
191b706460andrew	stmiage	ip!, {r2-r3}
192b706460andrew	stmiage	ip!, {r2-r3}
193b706460andrew	stmiage	ip!, {r2-r3}
194b706460andrew	stmiage	ip!, {r2-r3}
195b706460andrew	stmiage	ip!, {r2-r3}
196b706460andrew	stmiage	ip!, {r2-r3}
197b706460andrew	stmiage	ip!, {r2-r3}
198b706460andrew	stmiage	ip!, {r2-r3}
199b706460andrew	stmiage	ip!, {r2-r3}
200b706460andrew	stmiage	ip!, {r2-r3}
201b706460andrew	stmiage	ip!, {r2-r3}
202b706460andrew	stmiage	ip!, {r2-r3}
203b706460andrew	stmiage	ip!, {r2-r3}
204b706460andrew	stmiage	ip!, {r2-r3}
2051bb1149cognet#endif
2061bb1149cognet	bgt	.Lmemset_loop128
2077e3e230cognet	RETeq			/* Zero length so just exit */
2081bb1149cognet
2091bb1149cognet	add	r1, r1, #0x80		/* Adjust for extra sub */
2101bb1149cognet
2111bb1149cognet	/* Do 32 bytes at a time */
2121bb1149cognet.Lmemset_loop32:
2131bb1149cognet	subs	r1, r1, #0x20
21423ec7d7cognet#ifdef _ARM_ARCH_5E
215b706460andrew	strdge	r2, [ip], #0x08
216b706460andrew	strdge	r2, [ip], #0x08
217b706460andrew	strdge	r2, [ip], #0x08
218b706460andrew	strdge	r2, [ip], #0x08
2191bb1149cognet#else
220b706460andrew	stmiage	ip!, {r2-r3}
221b706460andrew	stmiage	ip!, {r2-r3}
222b706460andrew	stmiage	ip!, {r2-r3}
223b706460andrew	stmiage	ip!, {r2-r3}
2241bb1149cognet#endif
2251bb1149cognet	bgt	.Lmemset_loop32
2267e3e230cognet	RETeq			/* Zero length so just exit */
2271bb1149cognet
2281bb1149cognet	adds	r1, r1, #0x10		/* Partially adjust for extra sub */
2291bb1149cognet
2301bb1149cognet	/* Deal with 16 bytes or more */
23123ec7d7cognet#ifdef _ARM_ARCH_5E
232b706460andrew	strdge	r2, [ip], #0x08
233b706460andrew	strdge	r2, [ip], #0x08
2341bb1149cognet#else
235b706460andrew	stmiage	ip!, {r2-r3}
236b706460andrew	stmiage	ip!, {r2-r3}
2371bb1149cognet#endif
2387e3e230cognet	RETeq			/* Zero length so just exit */
2391bb1149cognet
2401bb1149cognet	addlt	r1, r1, #0x10		/* Possibly adjust for extra sub */
2411bb1149cognet
2421bb1149cognet	/* We have at least 4 bytes so copy as words */
2431bb1149cognet.Lmemset_loop4:
2441bb1149cognet	subs	r1, r1, #0x04
2451bb1149cognet	strge	r3, [ip], #0x04
2461bb1149cognet	bgt	.Lmemset_loop4
2477e3e230cognet	RETeq			/* Zero length so just exit */
2481bb1149cognet
24923ec7d7cognet#ifdef _ARM_ARCH_5E
2501bb1149cognet	/* Compensate for 64-bit alignment check */
2511bb1149cognet	adds	r1, r1, #0x04
2527e3e230cognet	RETeq
2531bb1149cognet	cmp	r1, #2
2541bb1149cognet#else
2551bb1149cognet	cmp	r1, #-2
2561bb1149cognet#endif
2571bb1149cognet
2581bb1149cognet	strb	r3, [ip], #0x01		/* Set 1 byte */
259b706460andrew	strbge	r3, [ip], #0x01		/* Set another byte */
260b706460andrew	strbgt	r3, [ip]		/* and a third */
2617e3e230cognet	RET			/* Exit */
2621bb1149cognet
2631bb1149cognet.Lmemset_wordunaligned:
2641bb1149cognet	rsb	r2, r2, #0x004
2651bb1149cognet	strb	r3, [ip], #0x01		/* Set 1 byte */
2661bb1149cognet	cmp	r2, #0x02
267b706460andrew	strbge	r3, [ip], #0x01		/* Set another byte */
2681bb1149cognet	sub	r1, r1, r2
269b706460andrew	strbgt	r3, [ip], #0x01		/* and a third */
2701bb1149cognet	cmp	r1, #0x04		/* More than 4 bytes left? */
2711bb1149cognet	bge	.Lmemset_wordaligned	/* Yup */
2721bb1149cognet
2731bb1149cognet.Lmemset_lessthanfour:
2741bb1149cognet	cmp	r1, #0x00
2757e3e230cognet	RETeq			/* Zero length so exit */
2761bb1149cognet	strb	r3, [ip], #0x01		/* Set 1 byte */
2771bb1149cognet	cmp	r1, #0x02
278b706460andrew	strbge	r3, [ip], #0x01		/* Set another byte */
279b706460andrew	strbgt	r3, [ip]		/* and a third */
2807e3e230cognet	RET			/* Exit */
281b706460andrewEEND(memset)
282b706460andrewEND(bzero)
283a8828afcognet
284c2900a7cognetENTRY(bcmp)
285a8828afcognet	mov	ip, r0
286a8828afcognet	cmp	r2, #0x06
287a8828afcognet	beq	.Lmemcmp_6bytes
288a8828afcognet	mov	r0, #0x00
289a8828afcognet
290a8828afcognet	/* Are both addresses aligned the same way? */
291a8828afcognet	cmp	r2, #0x00
292b706460andrew	eorsne	r3, ip, r1
2937e3e230cognet	RETeq			/* len == 0, or same addresses! */
294a8828afcognet	tst	r3, #0x03
295a8828afcognet	subne	r2, r2, #0x01
296a8828afcognet	bne	.Lmemcmp_bytewise2	/* Badly aligned. Do it the slow way */
297a8828afcognet
298a8828afcognet	/* Word-align the addresses, if necessary */
299a8828afcognet	sub	r3, r1, #0x05
300a8828afcognet	ands	r3, r3, #0x03
301a8828afcognet	add	r3, r3, r3, lsl #1
302a8828afcognet	addne	pc, pc, r3, lsl #3
303a8828afcognet	nop
304a8828afcognet
305a8828afcognet	/* Compare up to 3 bytes */
306a8828afcognet	ldrb	r0, [ip], #0x01
307a8828afcognet	ldrb	r3, [r1], #0x01
308a8828afcognet	subs	r0, r0, r3
3097e3e230cognet	RETne
310a8828afcognet	subs	r2, r2, #0x01
3117e3e230cognet	RETeq
312a8828afcognet
313a8828afcognet	/* Compare up to 2 bytes */
314a8828afcognet	ldrb	r0, [ip], #0x01
315a8828afcognet	ldrb	r3, [r1], #0x01
316a8828afcognet	subs	r0, r0, r3
3177e3e230cognet	RETne
318a8828afcognet	subs	r2, r2, #0x01
3197e3e230cognet	RETeq
320a8828afcognet
321a8828afcognet	/* Compare 1 byte */
322a8828afcognet	ldrb	r0, [ip], #0x01
323a8828afcognet	ldrb	r3, [r1], #0x01
324a8828afcognet	subs	r0, r0, r3
3257e3e230cognet	RETne
326a8828afcognet	subs	r2, r2, #0x01
3277e3e230cognet	RETeq
328a8828afcognet
329a8828afcognet	/* Compare 4 bytes at a time, if possible */
330a8828afcognet	subs	r2, r2, #0x04
331a8828afcognet	bcc	.Lmemcmp_bytewise
332a8828afcognet.Lmemcmp_word_aligned:
333a8828afcognet	ldr	r0, [ip], #0x04
334a8828afcognet	ldr	r3, [r1], #0x04
335a8828afcognet	subs	r2, r2, #0x04
336a8828afcognet	cmpcs	r0, r3
337a8828afcognet	beq	.Lmemcmp_word_aligned
338a8828afcognet	sub	r0, r0, r3
339a8828afcognet
340a8828afcognet	/* Correct for extra subtraction, and check if done */
341a8828afcognet	adds	r2, r2, #0x04
342a8828afcognet	cmpeq	r0, #0x00		/* If done, did all bytes match? */
3437e3e230cognet	RETeq			/* Yup. Just return */
344a8828afcognet
345a8828afcognet	/* Re-do the final word byte-wise */
346a8828afcognet	sub	ip, ip, #0x04
347a8828afcognet	sub	r1, r1, #0x04
348a8828afcognet
349a8828afcognet.Lmemcmp_bytewise:
350a8828afcognet	add	r2, r2, #0x03
351a8828afcognet.Lmemcmp_bytewise2:
352a8828afcognet	ldrb	r0, [ip], #0x01
353a8828afcognet	ldrb	r3, [r1], #0x01
354a8828afcognet	subs	r2, r2, #0x01
355a8828afcognet	cmpcs	r0, r3
356a8828afcognet	beq	.Lmemcmp_bytewise2
357a8828afcognet	sub	r0, r0, r3
3587e3e230cognet	RET
359a8828afcognet
360a8828afcognet	/*
361a8828afcognet	 * 6 byte compares are very common, thanks to the network stack.
362a8828afcognet	 * This code is hand-scheduled to reduce the number of stalls for
363a8828afcognet	 * load results. Everything else being equal, this will be ~32%
364a8828afcognet	 * faster than a byte-wise memcmp.
365a8828afcognet	 */
366a8828afcognet	.align	5
367a8828afcognet.Lmemcmp_6bytes:
368a8828afcognet	ldrb	r3, [r1, #0x00]		/* r3 = b2#0 */
369a8828afcognet	ldrb	r0, [ip, #0x00]		/* r0 = b1#0 */
370a8828afcognet	ldrb	r2, [r1, #0x01]		/* r2 = b2#1 */
371a8828afcognet	subs	r0, r0, r3		/* r0 = b1#0 - b2#0 */
372b706460andrew	ldrbeq	r3, [ip, #0x01]		/* r3 = b1#1 */
3737e3e230cognet	RETne			/* Return if mismatch on #0 */
374a8828afcognet	subs	r0, r3, r2		/* r0 = b1#1 - b2#1 */
375b706460andrew	ldrbeq	r3, [r1, #0x02]		/* r3 = b2#2 */
376b706460andrew	ldrbeq	r0, [ip, #0x02]		/* r0 = b1#2 */
3777e3e230cognet	RETne			/* Return if mismatch on #1 */
378a8828afcognet	ldrb	r2, [r1, #0x03]		/* r2 = b2#3 */
379a8828afcognet	subs	r0, r0, r3		/* r0 = b1#2 - b2#2 */
380b706460andrew	ldrbeq	r3, [ip, #0x03]		/* r3 = b1#3 */
3817e3e230cognet	RETne			/* Return if mismatch on #2 */
382a8828afcognet	subs	r0, r3, r2		/* r0 = b1#3 - b2#3 */
383b706460andrew	ldrbeq	r3, [r1, #0x04]		/* r3 = b2#4 */
384b706460andrew	ldrbeq	r0, [ip, #0x04]		/* r0 = b1#4 */
3857e3e230cognet	RETne			/* Return if mismatch on #3 */
386a8828afcognet	ldrb	r2, [r1, #0x05]		/* r2 = b2#5 */
387a8828afcognet	subs	r0, r0, r3		/* r0 = b1#4 - b2#4 */
388b706460andrew	ldrbeq	r3, [ip, #0x05]		/* r3 = b1#5 */
3897e3e230cognet	RETne			/* Return if mismatch on #4 */
390a8828afcognet	sub	r0, r3, r2		/* r0 = b1#5 - b2#5 */
3917e3e230cognet	RET
392112fb74andrewEND(bcmp)
393a8828afcognet
394a8828afcognetENTRY(bcopy)
395d058ee9cognet	/* switch the source and destination registers */
3968357ee5imp	eor     r0, r1, r0
3978357ee5imp	eor     r1, r0, r1
3988357ee5imp	eor     r0, r1, r0
399e51357bianEENTRY(memmove)
400d058ee9cognet	/* Do the buffers overlap? */
401d058ee9cognet	cmp	r0, r1
402d058ee9cognet	RETeq		/* Bail now if src/dst are the same */
403d058ee9cognet	subcc	r3, r0, r1	/* if (dst > src) r3 = dst - src */
404d058ee9cognet	subcs	r3, r1, r0	/* if (src > dsr) r3 = src - dst */
405d058ee9cognet	cmp	r3, r2		/* if (r3 < len) we have an overlap */
406d058ee9cognet	bcc	PIC_SYM(_C_LABEL(memcpy), PLT)
407d058ee9cognet
408d058ee9cognet	/* Determine copy direction */
409d058ee9cognet	cmp	r1, r0
410d058ee9cognet	bcc	.Lmemmove_backwards
411d058ee9cognet
412d058ee9cognet	moveq	r0, #0			/* Quick abort for len=0 */
413d058ee9cognet	RETeq
414d058ee9cognet
415d058ee9cognet	stmdb	sp!, {r0, lr}		/* memmove() returns dest addr */
416d058ee9cognet	subs	r2, r2, #4
417d058ee9cognet	blt	.Lmemmove_fl4		/* less than 4 bytes */
418d058ee9cognet	ands	r12, r0, #3
419d058ee9cognet	bne	.Lmemmove_fdestul	/* oh unaligned destination addr */
420d058ee9cognet	ands	r12, r1, #3
421d058ee9cognet	bne	.Lmemmove_fsrcul		/* oh unaligned source addr */
422d058ee9cognet
423d058ee9cognet.Lmemmove_ft8:
424d058ee9cognet	/* We have aligned source and destination */
425d058ee9cognet	subs	r2, r2, #8
426d058ee9cognet	blt	.Lmemmove_fl12		/* less than 12 bytes (4 from above) */
4278357ee5imp	subs	r2, r2, #0x14
428d058ee9cognet	blt	.Lmemmove_fl32		/* less than 32 bytes (12 from above) */
429d058ee9cognet	stmdb	sp!, {r4}		/* borrow r4 */
430d058ee9cognet
431d058ee9cognet	/* blat 32 bytes at a time */
432d058ee9cognet	/* XXX for really big copies perhaps we should use more registers */
4332b5d0cdandrew.Lmemmove_floop32:
434d058ee9cognet	ldmia	r1!, {r3, r4, r12, lr}
435d058ee9cognet	stmia	r0!, {r3, r4, r12, lr}
436d058ee9cognet	ldmia	r1!, {r3, r4, r12, lr}
437d058ee9cognet	stmia	r0!, {r3, r4, r12, lr}
4388357ee5imp	subs	r2, r2, #0x20
439d058ee9cognet	bge	.Lmemmove_floop32
440d058ee9cognet
441d058ee9cognet	cmn	r2, #0x10
442b706460andrew	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
443b706460andrew	stmiage	r0!, {r3, r4, r12, lr}
4448357ee5imp	subge	r2, r2, #0x10
445d058ee9cognet	ldmia	sp!, {r4}		/* return r4 */
446d058ee9cognet
447d058ee9cognet.Lmemmove_fl32:
4488357ee5imp	adds	r2, r2, #0x14
449d058ee9cognet
450d058ee9cognet	/* blat 12 bytes at a time */
451d058ee9cognet.Lmemmove_floop12:
452b706460andrew	ldmiage	r1!, {r3, r12, lr}
453b706460andrew	stmiage	r0!, {r3, r12, lr}
454b706460andrew	subsge	r2, r2, #0x0c
455d058ee9cognet	bge	.Lmemmove_floop12
456d058ee9cognet
457d058ee9cognet.Lmemmove_fl12:
458d058ee9cognet	adds	r2, r2, #8
459d058ee9cognet	blt	.Lmemmove_fl4
460d058ee9cognet
461d058ee9cognet	subs	r2, r2, #4
462d058ee9cognet	ldrlt	r3, [r1], #4
463d058ee9cognet	strlt	r3, [r0], #4
464b706460andrew	ldmiage	r1!, {r3, r12}
465b706460andrew	stmiage	r0!, {r3, r12}
466d058ee9cognet	subge	r2, r2, #4
467d058ee9cognet
468d058ee9cognet.Lmemmove_fl4:
469d058ee9cognet	/* less than 4 bytes to go */
470d058ee9cognet	adds	r2, r2, #4
471b706460andrew	ldmiaeq	sp!, {r0, pc}		/* done */
472d058ee9cognet
473d058ee9cognet	/* copy the crud byte at a time */
474d058ee9cognet	cmp	r2, #2
475d058ee9cognet	ldrb	r3, [r1], #1
476d058ee9cognet	strb	r3, [r0], #1
477b706460andrew	ldrbge	r3, [r1], #1
478b706460andrew	strbge	r3, [r0], #1
479b706460andrew	ldrbgt	r3, [r1], #1
480b706460andrew	strbgt	r3, [r0], #1
481d058ee9cognet	ldmia	sp!, {r0, pc}
482d058ee9cognet
483d058ee9cognet	/* erg - unaligned destination */
484d058ee9cognet.Lmemmove_fdestul:
485d058ee9cognet	rsb	r12, r12, #4
486d058ee9cognet	cmp	r12, #2
487d058ee9cognet
488d058ee9cognet	/* align destination with byte copies */
489d058ee9cognet	ldrb	r3, [r1], #1
490d058ee9cognet	strb	r3, [r0], #1
491b706460andrew	ldrbge	r3, [r1], #1
492b706460andrew	strbge	r3, [r0], #1
493b706460andrew	ldrbgt	r3, [r1], #1
494b706460andrew	strbgt	r3, [r0], #1
495d058ee9cognet	subs	r2, r2, r12
496d058ee9cognet	blt	.Lmemmove_fl4		/* less the 4 bytes */
497d058ee9cognet
498d058ee9cognet	ands	r12, r1, #3
499d058ee9cognet	beq	.Lmemmove_ft8		/* we have an aligned source */
500d058ee9cognet
501d058ee9cognet	/* erg - unaligned source */
502d058ee9cognet	/* This is where it gets nasty ... */
503d058ee9cognet.Lmemmove_fsrcul:
504d058ee9cognet	bic	r1, r1, #3
505d058ee9cognet	ldr	lr, [r1], #4
506d058ee9cognet	cmp	r12, #2
507d058ee9cognet	bgt	.Lmemmove_fsrcul3
508d058ee9cognet	beq	.Lmemmove_fsrcul2
5098357ee5imp	cmp	r2, #0x0c
510d058ee9cognet	blt	.Lmemmove_fsrcul1loop4
5118357ee5imp	sub	r2, r2, #0x0c
512d058ee9cognet	stmdb	sp!, {r4, r5}
513d058ee9cognet
514d058ee9cognet.Lmemmove_fsrcul1loop16:
515d058ee9cognet#ifdef __ARMEB__
516d058ee9cognet	mov	r3, lr, lsl #8
517d058ee9cognet#else
518d058ee9cognet	mov	r3, lr, lsr #8
519d058ee9cognet#endif
520d058ee9cognet	ldmia	r1!, {r4, r5, r12, lr}
521d058ee9cognet#ifdef __ARMEB__
522d058ee9cognet	orr	r3, r3, r4, lsr #24
523d058ee9cognet	mov	r4, r4, lsl #8
524d058ee9cognet	orr	r4, r4, r5, lsr #24
525d058ee9cognet	mov	r5, r5, lsl #8
526d058ee9cognet	orr	r5, r5, r12, lsr #24
527d058ee9cognet	mov	r12, r12, lsl #8
528d058ee9cognet	orr	r12, r12, lr, lsr #24
529d058ee9cognet#else
530d058ee9cognet	orr	r3, r3, r4, lsl #24
531d058ee9cognet	mov	r4, r4, lsr #8
532d058ee9cognet	orr	r4, r4, r5, lsl #24
533d058ee9cognet	mov	r5, r5, lsr #8
534d058ee9cognet	orr	r5, r5, r12, lsl #24
535d058ee9cognet	mov	r12, r12, lsr #8
536d058ee9cognet	orr	r12, r12, lr, lsl #24
537d058ee9cognet#endif
538d058ee9cognet	stmia	r0!, {r3-r5, r12}
5398357ee5imp	subs	r2, r2, #0x10
540d058ee9cognet	bge	.Lmemmove_fsrcul1loop16
541d058ee9cognet	ldmia	sp!, {r4, r5}
5428357ee5imp	adds	r2, r2, #0x0c
543d058ee9cognet	blt	.Lmemmove_fsrcul1l4
544d058ee9cognet
545d058ee9cognet.Lmemmove_fsrcul1loop4:
546d058ee9cognet#ifdef __ARMEB__
547d058ee9cognet	mov	r12, lr, lsl #8
548d058ee9cognet#else
549d058ee9cognet	mov	r12, lr, lsr #8
550d058ee9cognet#endif
551d058ee9cognet	ldr	lr, [r1], #4
552d058ee9cognet#ifdef __ARMEB__
553d058ee9cognet	orr	r12, r12, lr, lsr #24
554d058ee9cognet#else
555d058ee9cognet	orr	r12, r12, lr, lsl #24
556d058ee9cognet#endif
557d058ee9cognet	str	r12, [r0], #4
558d058ee9cognet	subs	r2, r2, #4
559d058ee9cognet	bge	.Lmemmove_fsrcul1loop4
560d058ee9cognet
561d058ee9cognet.Lmemmove_fsrcul1l4:
562d058ee9cognet	sub	r1, r1, #3
563d058ee9cognet	b	.Lmemmove_fl4
564d058ee9cognet
565d058ee9cognet.Lmemmove_fsrcul2:
5668357ee5imp	cmp	r2, #0x0c
567d058ee9cognet	blt	.Lmemmove_fsrcul2loop4
5688357ee5imp	sub	r2, r2, #0x0c
569d058ee9cognet	stmdb	sp!, {r4, r5}
570d058ee9cognet
571d058ee9cognet.Lmemmove_fsrcul2loop16:
572d058ee9cognet#ifdef __ARMEB__
573d058ee9cognet	mov	r3, lr, lsl #16
574d058ee9cognet#else
575d058ee9cognet	mov	r3, lr, lsr #16
576d058ee9cognet#endif
577d058ee9cognet	ldmia	r1!, {r4, r5, r12, lr}
578d058ee9cognet#ifdef __ARMEB__
579d058ee9cognet	orr	r3, r3, r4, lsr #16
580d058ee9cognet	mov	r4, r4, lsl #16
581d058ee9cognet	orr	r4, r4, r5, lsr #16
582d058ee9cognet	mov	r5, r5, lsl #16
583d058ee9cognet	orr	r5, r5, r12, lsr #16
584d058ee9cognet	mov	r12, r12, lsl #16
585d058ee9cognet	orr	r12, r12, lr, lsr #16
586d058ee9cognet#else
587d058ee9cognet	orr	r3, r3, r4, lsl #16
588d058ee9cognet	mov	r4, r4, lsr #16
589d058ee9cognet	orr	r4, r4, r5, lsl #16
590d058ee9cognet	mov	r5, r5, lsr #16
591d058ee9cognet	orr	r5, r5, r12, lsl #16
592d058ee9cognet	mov	r12, r12, lsr #16
593d058ee9cognet	orr	r12, r12, lr, lsl #16
594d058ee9cognet#endif
595d058ee9cognet	stmia	r0!, {r3-r5, r12}
5968357ee5imp	subs	r2, r2, #0x10
597d058ee9cognet	bge	.Lmemmove_fsrcul2loop16
598d058ee9cognet	ldmia	sp!, {r4, r5}
5998357ee5imp	adds	r2, r2, #0x0c
600d058ee9cognet	blt	.Lmemmove_fsrcul2l4
601d058ee9cognet
602d058ee9cognet.Lmemmove_fsrcul2loop4:
603d058ee9cognet#ifdef __ARMEB__
604d058ee9cognet	mov	r12, lr, lsl #16
605d058ee9cognet#else
606d058ee9cognet	mov	r12, lr, lsr #16
607d058ee9cognet#endif
608d058ee9cognet	ldr	lr, [r1], #4
609d058ee9cognet#ifdef __ARMEB__
610d058ee9cognet	orr	r12, r12, lr, lsr #16
611d058ee9cognet#else
612d058ee9cognet	orr	r12, r12, lr, lsl #16
613d058ee9cognet#endif
614d058ee9cognet	str	r12, [r0], #4
615d058ee9cognet	subs	r2, r2, #4
616d058ee9cognet	bge	.Lmemmove_fsrcul2loop4
617d058ee9cognet
618d058ee9cognet.Lmemmove_fsrcul2l4:
619d058ee9cognet	sub	r1, r1, #2
620d058ee9cognet	b	.Lmemmove_fl4
621d058ee9cognet
622d058ee9cognet.Lmemmove_fsrcul3:
6238357ee5imp	cmp	r2, #0x0c
624d058ee9cognet	blt	.Lmemmove_fsrcul3loop4
6258357ee5imp	sub	r2, r2, #0x0c
626d058ee9cognet	stmdb	sp!, {r4, r5}
627d058ee9cognet
628d058ee9cognet.Lmemmove_fsrcul3loop16:
629d058ee9cognet#ifdef __ARMEB__
630d058ee9cognet	mov	r3, lr, lsl #24
631d058ee9cognet#else
632d058ee9cognet	mov	r3, lr, lsr #24
633d058ee9cognet#endif
634d058ee9cognet	ldmia	r1!, {r4, r5, r12, lr}
635d058ee9cognet#ifdef __ARMEB__
636d058ee9cognet	orr	r3, r3, r4, lsr #8
637d058ee9cognet	mov	r4, r4, lsl #24
638d058ee9cognet	orr	r4, r4, r5, lsr #8
639d058ee9cognet	mov	r5, r5, lsl #24
640d058ee9cognet	orr	r5, r5, r12, lsr #8
641d058ee9cognet	mov	r12, r12, lsl #24
642d058ee9cognet	orr	r12, r12, lr, lsr #8
643d058ee9cognet#else
644d058ee9cognet	orr	r3, r3, r4, lsl #8
645d058ee9cognet	mov	r4, r4, lsr #24
646d058ee9cognet	orr	r4, r4, r5, lsl #8
647d058ee9cognet	mov	r5, r5, lsr #24
648d058ee9cognet	orr	r5, r5, r12, lsl #8
649d058ee9cognet	mov	r12, r12, lsr #24
650d058ee9cognet	orr	r12, r12, lr, lsl #8
651d058ee9cognet#endif
652d058ee9cognet	stmia	r0!, {r3-r5, r12}
6538357ee5imp	subs	r2, r2, #0x10
654d058ee9cognet	bge	.Lmemmove_fsrcul3loop16
655d058ee9cognet	ldmia	sp!, {r4, r5}
6568357ee5imp	adds	r2, r2, #0x0c
657d058ee9cognet	blt	.Lmemmove_fsrcul3l4
658d058ee9cognet
659d058ee9cognet.Lmemmove_fsrcul3loop4:
660d058ee9cognet#ifdef __ARMEB__
661d058ee9cognet	mov	r12, lr, lsl #24
662d058ee9cognet#else
663d058ee9cognet	mov	r12, lr, lsr #24
664d058ee9cognet#endif
665d058ee9cognet	ldr	lr, [r1], #4
666d058ee9cognet#ifdef __ARMEB__
667d058ee9cognet	orr	r12, r12, lr, lsr #8
668d058ee9cognet#else
669d058ee9cognet	orr	r12, r12, lr, lsl #8
670d058ee9cognet#endif
671d058ee9cognet	str	r12, [r0], #4
672d058ee9cognet	subs	r2, r2, #4
673d058ee9cognet	bge	.Lmemmove_fsrcul3loop4
674d058ee9cognet
675d058ee9cognet.Lmemmove_fsrcul3l4:
676d058ee9cognet	sub	r1, r1, #1
677d058ee9cognet	b	.Lmemmove_fl4
678d058ee9cognet
679d058ee9cognet.Lmemmove_backwards:
680d058ee9cognet	add	r1, r1, r2
681d058ee9cognet	add	r0, r0, r2
682d058ee9cognet	subs	r2, r2, #4
683d058ee9cognet	blt	.Lmemmove_bl4		/* less than 4 bytes */
684d058ee9cognet	ands	r12, r0, #3
685d058ee9cognet	bne	.Lmemmove_bdestul	/* oh unaligned destination addr */
686d058ee9cognet	ands	r12, r1, #3
687d058ee9cognet	bne	.Lmemmove_bsrcul		/* oh unaligned source addr */
688d058ee9cognet
689d058ee9cognet.Lmemmove_bt8:
690d058ee9cognet	/* We have aligned source and destination */
691d058ee9cognet	subs	r2, r2, #8
692d058ee9cognet	blt	.Lmemmove_bl12		/* less than 12 bytes (4 from above) */
693d058ee9cognet	stmdb	sp!, {r4, lr}
694d058ee9cognet	subs	r2, r2, #0x14		/* less than 32 bytes (12 from above) */
695d058ee9cognet	blt	.Lmemmove_bl32
696d058ee9cognet
697d058ee9cognet	/* blat 32 bytes at a time */
698d058ee9cognet	/* XXX for really big copies perhaps we should use more registers */
699d058ee9cognet.Lmemmove_bloop32:
700d058ee9cognet	ldmdb	r1!, {r3, r4, r12, lr}
701d058ee9cognet	stmdb	r0!, {r3, r4, r12, lr}
702d058ee9cognet	ldmdb	r1!, {r3, r4, r12, lr}
703d058ee9cognet	stmdb	r0!, {r3, r4, r12, lr}
7048357ee5imp	subs	r2, r2, #0x20
705d058ee9cognet	bge	.Lmemmove_bloop32
706d058ee9cognet
707d058ee9cognet.Lmemmove_bl32:
7088357ee5imp	cmn	r2, #0x10
709b706460andrew	ldmdbge	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
710b706460andrew	stmdbge	r0!, {r3, r4, r12, lr}
7118357ee5imp	subge	r2, r2, #0x10
7128357ee5imp	adds	r2, r2, #0x14
713b706460andrew	ldmdbge	r1!, {r3, r12, lr}	/* blat a remaining 12 bytes */
714b706460andrew	stmdbge	r0!, {r3, r12, lr}
7158357ee5imp	subge	r2, r2, #0x0c
716d058ee9cognet	ldmia	sp!, {r4, lr}
717d058ee9cognet
718d058ee9cognet.Lmemmove_bl12:
719d058ee9cognet	adds	r2, r2, #8
720d058ee9cognet	blt	.Lmemmove_bl4
721d058ee9cognet	subs	r2, r2, #4
722d058ee9cognet	ldrlt	r3, [r1, #-4]!
723d058ee9cognet	strlt	r3, [r0, #-4]!
724b706460andrew	ldmdbge	r1!, {r3, r12}
725b706460andrew	stmdbge	r0!, {r3, r12}
726d058ee9cognet	subge	r2, r2, #4
727d058ee9cognet
728d058ee9cognet.Lmemmove_bl4:
729d058ee9cognet	/* less than 4 bytes to go */
730d058ee9cognet	adds	r2, r2, #4
731d058ee9cognet	RETeq			/* done */
732d058ee9cognet
733d058ee9cognet	/* copy the crud byte at a time */
734d058ee9cognet	cmp	r2, #2
735d058ee9cognet	ldrb	r3, [r1, #-1]!
736d058ee9cognet	strb	r3, [r0, #-1]!
737b706460andrew	ldrbge	r3, [r1, #-1]!
738b706460andrew	strbge	r3, [r0, #-1]!
739b706460andrew	ldrbgt	r3, [r1, #-1]!
740b706460andrew	strbgt	r3, [r0, #-1]!
741d058ee9cognet	RET
742d058ee9cognet
743d058ee9cognet	/* erg - unaligned destination */
744d058ee9cognet.Lmemmove_bdestul:
745d058ee9cognet	cmp	r12, #2
746d058ee9cognet
747d058ee9cognet	/* align destination with byte copies */
748d058ee9cognet	ldrb	r3, [r1, #-1]!
749d058ee9cognet	strb	r3, [r0, #-1]!
750b706460andrew	ldrbge	r3, [r1, #-1]!
751b706460andrew	strbge	r3, [r0, #-1]!
752b706460andrew	ldrbgt	r3, [r1, #-1]!
753b706460andrew	strbgt	r3, [r0, #-1]!
754d058ee9cognet	subs	r2, r2, r12
755d058ee9cognet	blt	.Lmemmove_bl4		/* less than 4 bytes to go */
756d058ee9cognet	ands	r12, r1, #3
757d058ee9cognet	beq	.Lmemmove_bt8		/* we have an aligned source */
758d058ee9cognet
759d058ee9cognet	/* erg - unaligned source */
760d058ee9cognet	/* This is where it gets nasty ... */
761d058ee9cognet.Lmemmove_bsrcul:
762d058ee9cognet	bic	r1, r1, #3
763d058ee9cognet	ldr	r3, [r1, #0]
764d058ee9cognet	cmp	r12, #2
765d058ee9cognet	blt	.Lmemmove_bsrcul1
766d058ee9cognet	beq	.Lmemmove_bsrcul2
7678357ee5imp	cmp	r2, #0x0c
768d058ee9cognet	blt	.Lmemmove_bsrcul3loop4
7698357ee5imp	sub	r2, r2, #0x0c
770d058ee9cognet	stmdb	sp!, {r4, r5, lr}
771d058ee9cognet
772d058ee9cognet.Lmemmove_bsrcul3loop16:
773d058ee9cognet#ifdef __ARMEB__
774d058ee9cognet	mov	lr, r3, lsr #8
775d058ee9cognet#else
776d058ee9cognet	mov	lr, r3, lsl #8
777d058ee9cognet#endif
778d058ee9cognet	ldmdb	r1!, {r3-r5, r12}
779d058ee9cognet#ifdef __ARMEB__
780d058ee9cognet	orr	lr, lr, r12, lsl #24
781d058ee9cognet	mov	r12, r12, lsr #8
782d058ee9cognet	orr	r12, r12, r5, lsl #24
783d058ee9cognet	mov	r5, r5, lsr #8
784d058ee9cognet	orr	r5, r5, r4, lsl #24
785d058ee9cognet	mov	r4, r4, lsr #8
786d058ee9cognet	orr	r4, r4, r3, lsl #24
787d058ee9cognet#else
788d058ee9cognet	orr	lr, lr, r12, lsr #24
789d058ee9cognet	mov	r12, r12, lsl #8
790d058ee9cognet	orr	r12, r12, r5, lsr #24
791d058ee9cognet	mov	r5, r5, lsl #8
792d058ee9cognet	orr	r5, r5, r4, lsr #24
793d058ee9cognet	mov	r4, r4, lsl #8
794d058ee9cognet	orr	r4, r4, r3, lsr #24
795d058ee9cognet#endif
796d058ee9cognet	stmdb	r0!, {r4, r5, r12, lr}
7978357ee5imp	subs	r2, r2, #0x10
798d058ee9cognet	bge	.Lmemmove_bsrcul3loop16
799d058ee9cognet	ldmia	sp!, {r4, r5, lr}
8008357ee5imp	adds	r2, r2, #0x0c
801d058ee9cognet	blt	.Lmemmove_bsrcul3l4
802d058ee9cognet
803d058ee9cognet.Lmemmove_bsrcul3loop4:
804d058ee9cognet#ifdef __ARMEB__
805d058ee9cognet	mov	r12, r3, lsr #8
806d058ee9cognet#else
807d058ee9cognet	mov	r12, r3, lsl #8
808d058ee9cognet#endif
809d058ee9cognet	ldr	r3, [r1, #-4]!
810d058ee9cognet#ifdef __ARMEB__
811d058ee9cognet	orr	r12, r12, r3, lsl #24
812d058ee9cognet#else
813d058ee9cognet	orr	r12, r12, r3, lsr #24
814d058ee9cognet#endif
815d058ee9cognet	str	r12, [r0, #-4]!
816d058ee9cognet	subs	r2, r2, #4
817d058ee9cognet	bge	.Lmemmove_bsrcul3loop4
818d058ee9cognet
819d058ee9cognet.Lmemmove_bsrcul3l4:
820d058ee9cognet	add	r1, r1, #3
821d058ee9cognet	b	.Lmemmove_bl4
822d058ee9cognet
823d058ee9cognet.Lmemmove_bsrcul2:
8248357ee5imp	cmp	r2, #0x0c
825d058ee9cognet	blt	.Lmemmove_bsrcul2loop4
8268357ee5imp	sub	r2, r2, #0x0c
827d058ee9cognet	stmdb	sp!, {r4, r5, lr}
828d058ee9cognet
829d058ee9cognet.Lmemmove_bsrcul2loop16:
830d058ee9cognet#ifdef __ARMEB__
831d058ee9cognet	mov	lr, r3, lsr #16
832d058ee9cognet#else
833d058ee9cognet	mov	lr, r3, lsl #16
834d058ee9cognet#endif
835d058ee9cognet	ldmdb	r1!, {r3-r5, r12}
836d058ee9cognet#ifdef __ARMEB__
837d058ee9cognet	orr	lr, lr, r12, lsl #16
838d058ee9cognet	mov	r12, r12, lsr #16
839d058ee9cognet	orr	r12, r12, r5, lsl #16
840d058ee9cognet	mov	r5, r5, lsr #16
841d058ee9cognet	orr	r5, r5, r4, lsl #16
842d058ee9cognet	mov	r4, r4, lsr #16
843d058ee9cognet	orr	r4, r4, r3, lsl #16
844d058ee9cognet#else
845d058ee9cognet	orr	lr, lr, r12, lsr #16
846d058ee9cognet	mov	r12, r12, lsl #16
847d058ee9cognet	orr	r12, r12, r5, lsr #16
848d058ee9cognet	mov	r5, r5, lsl #16
849d058ee9cognet	orr	r5, r5, r4, lsr #16
850d058ee9cognet	mov	r4, r4, lsl #16
851d058ee9cognet	orr	r4, r4, r3, lsr #16
852d058ee9cognet#endif
853d058ee9cognet	stmdb	r0!, {r4, r5, r12, lr}
8548357ee5imp	subs	r2, r2, #0x10
855d058ee9cognet	bge	.Lmemmove_bsrcul2loop16
856d058ee9cognet	ldmia	sp!, {r4, r5, lr}
8578357ee5imp	adds	r2, r2, #0x0c
858d058ee9cognet	blt	.Lmemmove_bsrcul2l4
859d058ee9cognet
860d058ee9cognet.Lmemmove_bsrcul2loop4:
861d058ee9cognet#ifdef __ARMEB__
862d058ee9cognet	mov	r12, r3, lsr #16
863d058ee9cognet#else
864d058ee9cognet	mov	r12, r3, lsl #16
865d058ee9cognet#endif
866d058ee9cognet	ldr	r3, [r1, #-4]!
867d058ee9cognet#ifdef __ARMEB__
868d058ee9cognet	orr	r12, r12, r3, lsl #16
869d058ee9cognet#else
870d058ee9cognet	orr	r12, r12, r3, lsr #16
871d058ee9cognet#endif
872d058ee9cognet	str	r12, [r0, #-4]!
873d058ee9cognet	subs	r2, r2, #4
874d058ee9cognet	bge	.Lmemmove_bsrcul2loop4
875d058ee9cognet
876d058ee9cognet.Lmemmove_bsrcul2l4:
877d058ee9cognet	add	r1, r1, #2
878d058ee9cognet	b	.Lmemmove_bl4
879d058ee9cognet
880d058ee9cognet.Lmemmove_bsrcul1:
8818357ee5imp	cmp	r2, #0x0c
882d058ee9cognet	blt	.Lmemmove_bsrcul1loop4
8838357ee5imp	sub	r2, r2, #0x0c
884d058ee9cognet	stmdb	sp!, {r4, r5, lr}
885d058ee9cognet
886d058ee9cognet.Lmemmove_bsrcul1loop32:
887d058ee9cognet#ifdef __ARMEB__
888d058ee9cognet	mov	lr, r3, lsr #24
889d058ee9cognet#else
890d058ee9cognet	mov	lr, r3, lsl #24
891d058ee9cognet#endif
892d058ee9cognet	ldmdb	r1!, {r3-r5, r12}
893d058ee9cognet#ifdef __ARMEB__
894d058ee9cognet	orr	lr, lr, r12, lsl #8
895d058ee9cognet	mov	r12, r12, lsr #24
896d058ee9cognet	orr	r12, r12, r5, lsl #8
897d058ee9cognet	mov	r5, r5, lsr #24
898d058ee9cognet	orr	r5, r5, r4, lsl #8
899d058ee9cognet	mov	r4, r4, lsr #24
900d058ee9cognet	orr	r4, r4, r3, lsl #8
901d058ee9cognet#else
902d058ee9cognet	orr	lr, lr, r12, lsr #8
903d058ee9cognet	mov	r12, r12, lsl #24
904d058ee9cognet	orr	r12, r12, r5, lsr #8
905d058ee9cognet	mov	r5, r5, lsl #24
906d058ee9cognet	orr	r5, r5, r4, lsr #8
907d058ee9cognet	mov	r4, r4, lsl #24
908d058ee9cognet	orr	r4, r4, r3, lsr #8
909d058ee9cognet#endif
910d058ee9cognet	stmdb	r0!, {r4, r5, r12, lr}
9118357ee5imp	subs	r2, r2, #0x10
912d058ee9cognet	bge	.Lmemmove_bsrcul1loop32
913d058ee9cognet	ldmia	sp!, {r4, r5, lr}
9148357ee5imp	adds	r2, r2, #0x0c
915d058ee9cognet	blt	.Lmemmove_bsrcul1l4
916d058ee9cognet
917d058ee9cognet.Lmemmove_bsrcul1loop4:
918d058ee9cognet#ifdef __ARMEB__
919d058ee9cognet	mov	r12, r3, lsr #24
920d058ee9cognet#else
921d058ee9cognet	mov	r12, r3, lsl #24
922d058ee9cognet#endif
923d058ee9cognet	ldr	r3, [r1, #-4]!
924d058ee9cognet#ifdef __ARMEB__
925d058ee9cognet	orr	r12, r12, r3, lsl #8
926d058ee9cognet#else
927d058ee9cognet	orr	r12, r12, r3, lsr #8
928d058ee9cognet#endif
929d058ee9cognet	str	r12, [r0, #-4]!
930d058ee9cognet	subs	r2, r2, #4
931d058ee9cognet	bge	.Lmemmove_bsrcul1loop4
932d058ee9cognet
933d058ee9cognet.Lmemmove_bsrcul1l4:
934d058ee9cognet	add	r1, r1, #1
935d058ee9cognet	b	.Lmemmove_bl4
936e51357bianEEND(memmove)
937112fb74andrewEND(bcopy)
938d058ee9cognet
93923ec7d7cognet#if !defined(_ARM_ARCH_5E)
940a8828afcognetENTRY(memcpy)
941a8828afcognet	/* save leaf functions having to store this away */
94214a40facognet	/* Do not check arm_memcpy if we're running from flash */
94382efdd5ian#if defined(FLASHADDR) && defined(PHYSADDR)
94414a40facognet#if FLASHADDR > PHYSADDR
94514a40facognet	ldr	r3, =FLASHADDR
94614a40facognet	cmp	r3, pc
94714a40facognet	bls	.Lnormal
94814a40facognet#else
94914a40facognet	ldr	r3, =FLASHADDR
95014a40facognet	cmp	r3, pc
95114a40facognet	bhi	.Lnormal
95214a40facognet#endif
95314a40facognet#endif
9544cd1fe1cognet	ldr	r3, .L_arm_memcpy
9554cd1fe1cognet	ldr	r3, [r3]
9564cd1fe1cognet	cmp	r3, #0
9574cd1fe1cognet	beq	.Lnormal
9584cd1fe1cognet	ldr	r3, .L_min_memcpy_size
9594cd1fe1cognet	ldr	r3, [r3]
9604cd1fe1cognet	cmp	r2, r3
9614cd1fe1cognet	blt	.Lnormal
9624cd1fe1cognet	stmfd	sp!, {r0-r2, r4, lr}
9634cd1fe1cognet	mov	r3, #0
9644cd1fe1cognet	ldr	r4, .L_arm_memcpy
9654cd1fe1cognet	mov	lr, pc
9664cd1fe1cognet	ldr	pc, [r4]
9674cd1fe1cognet	cmp	r0, #0
9684cd1fe1cognet	ldmfd	sp!, {r0-r2, r4, lr}
9694cd1fe1cognet	RETeq
9704cd1fe1cognet
9719d67736cognet.Lnormal:
972a8828afcognet	stmdb	sp!, {r0, lr}		/* memcpy() returns dest addr */
973a8828afcognet
974a8828afcognet	subs	r2, r2, #4
975a8828afcognet	blt	.Lmemcpy_l4		/* less than 4 bytes */
976a8828afcognet	ands	r12, r0, #3
977a8828afcognet	bne	.Lmemcpy_destul		/* oh unaligned destination addr */
978a8828afcognet	ands	r12, r1, #3
979a8828afcognet	bne	.Lmemcpy_srcul		/* oh unaligned source addr */
980a8828afcognet
981a8828afcognet.Lmemcpy_t8:
982a8828afcognet	/* We have aligned source and destination */
983a8828afcognet	subs	r2, r2, #8
984a8828afcognet	blt	.Lmemcpy_l12		/* less than 12 bytes (4 from above) */
9858357ee5imp	subs	r2, r2, #0x14
986a8828afcognet	blt	.Lmemcpy_l32		/* less than 32 bytes (12 from above) */
987a8828afcognet	stmdb	sp!, {r4}		/* borrow r4 */
988a8828afcognet
989a8828afcognet	/* blat 32 bytes at a time */
990a8828afcognet	/* XXX for really big copies perhaps we should use more registers */
9912b5d0cdandrew.Lmemcpy_loop32:
992a8828afcognet	ldmia	r1!, {r3, r4, r12, lr}
993a8828afcognet	stmia	r0!, {r3, r4, r12, lr}
994a8828afcognet	ldmia	r1!, {r3, r4, r12, lr}
995a8828afcognet	stmia	r0!, {r3, r4, r12, lr}
9968357ee5imp	subs	r2, r2, #0x20
997a8828afcognet	bge	.Lmemcpy_loop32
998a8828afcognet
999a8828afcognet	cmn	r2, #0x10
1000b706460andrew	ldmiage	r1!, {r3, r4, r12, lr}	/* blat a remaining 16 bytes */
1001b706460andrew	stmiage	r0!, {r3, r4, r12, lr}
10028357ee5imp	subge	r2, r2, #0x10
1003a8828afcognet	ldmia	sp!, {r4}		/* return r4 */
1004a8828afcognet
1005a8828afcognet.Lmemcpy_l32:
10068357ee5imp	adds	r2, r2, #0x14
1007a8828afcognet
1008a8828afcognet	/* blat 12 bytes at a time */
1009a8828afcognet.Lmemcpy_loop12:
1010b706460andrew	ldmiage	r1!, {r3, r12, lr}
1011b706460andrew	stmiage	r0!, {r3, r12, lr}
1012b706460andrew	subsge	r2, r2, #0x0c
1013a8828afcognet	bge	.Lmemcpy_loop12
1014a8828afcognet
1015a8828afcognet.Lmemcpy_l12:
1016a8828afcognet	adds	r2, r2, #8
1017a8828afcognet	blt	.Lmemcpy_l4
1018a8828afcognet
1019a8828afcognet	subs	r2, r2, #4
1020a8828afcognet	ldrlt	r3, [r1], #4
1021a8828afcognet	strlt	r3, [r0], #4
1022b706460andrew	ldmiage	r1!, {r3, r12}
1023b706460andrew	stmiage	r0!, {r3, r12}
1024a8828afcognet	subge	r2, r2, #4
1025a8828afcognet
1026a8828afcognet.Lmemcpy_l4:
1027a8828afcognet	/* less than 4 bytes to go */
1028a8828afcognet	adds	r2, r2, #4
1029a8828afcognet#ifdef __APCS_26_
1030b706460andrew	ldmiaeq sp!, {r0, pc}^		/* done */
1031a8828afcognet#else
1032b706460andrew	ldmiaeq	sp!, {r0, pc}		/* done */
1033a8828afcognet#endif
1034a8828afcognet	/* copy the crud byte at a time */
1035a8828afcognet	cmp	r2, #2
1036a8828afcognet	ldrb	r3, [r1], #1
1037a8828afcognet	strb	r3, [r0], #1
1038b706460andrew	ldrbge	r3, [r1], #1
1039b706460andrew	strbge	r3, [r0], #1
1040b706460andrew	ldrbgt	r3, [r1], #1
1041b706460andrew	strbgt	r3, [r0], #1
1042a8828afcognet	ldmia	sp!, {r0, pc}
1043a8828afcognet
1044a8828afcognet	/* erg - unaligned destination */
1045a8828afcognet.Lmemcpy_destul:
1046a8828afcognet	rsb	r12, r12, #4
1047a8828afcognet	cmp	r12, #2
1048a8828afcognet
1049a8828afcognet	/* align destination with byte copies */
1050a8828afcognet	ldrb	r3, [r1], #1
1051a8828afcognet	strb	r3, [r0], #1
1052b706460andrew	ldrbge	r3, [r1], #1
1053b706460andrew	strbge	r3, [r0], #1
1054b706460andrew	ldrbgt	r3, [r1], #1
1055b706460andrew	strbgt	r3, [r0], #1
1056a8828afcognet	subs	r2, r2, r12
1057a8828afcognet	blt	.Lmemcpy_l4		/* less the 4 bytes */
1058a8828afcognet
1059a8828afcognet	ands	r12, r1, #3
1060a8828afcognet	beq	.Lmemcpy_t8		/* we have an aligned source */
1061a8828afcognet
1062a8828afcognet	/* erg - unaligned source */
1063a8828afcognet	/* This is where it gets nasty ... */
1064a8828afcognet.Lmemcpy_srcul:
1065a8828afcognet	bic	r1, r1, #3
1066a8828afcognet	ldr	lr, [r1], #4
1067a8828afcognet	cmp	r12, #2
1068a8828afcognet	bgt	.Lmemcpy_srcul3
1069a8828afcognet	beq	.Lmemcpy_srcul2
10708357ee5imp	cmp	r2, #0x0c
1071a8828afcognet	blt	.Lmemcpy_srcul1loop4
10728357ee5imp	sub	r2, r2, #0x0c
1073a8828afcognet	stmdb	sp!, {r4, r5}
1074a8828afcognet
1075a8828afcognet.Lmemcpy_srcul1loop16:
1076a8828afcognet	mov	r3, lr, lsr #8
1077a8828afcognet	ldmia	r1!, {r4, r5, r12, lr}
1078a8828afcognet	orr	r3, r3, r4, lsl #24
1079a8828afcognet	mov	r4, r4, lsr #8
1080a8828afcognet	orr	r4, r4, r5, lsl #24
1081a8828afcognet	mov	r5, r5, lsr #8
1082a8828afcognet	orr	r5, r5, r12, lsl #24
1083a8828afcognet	mov	r12, r12, lsr #8
1084a8828afcognet	orr	r12, r12, lr, lsl #24
1085a8828afcognet	stmia	r0!, {r3-r5, r12}
10868357ee5imp	subs	r2, r2, #0x10
1087a8828afcognet	bge	.Lmemcpy_srcul1loop16
1088a8828afcognet	ldmia	sp!, {r4, r5}
10898357ee5imp	adds	r2, r2, #0x0c
1090a8828afcognet	blt	.Lmemcpy_srcul1l4
1091a8828afcognet
1092a8828afcognet.Lmemcpy_srcul1loop4:
1093a8828afcognet	mov	r12, lr, lsr #8
1094a8828afcognet	ldr	lr, [r1], #4
1095a8828afcognet	orr	r12, r12, lr, lsl #24
1096a8828afcognet	str	r12, [r0], #4
1097a8828afcognet	subs	r2, r2, #4
1098a8828afcognet	bge	.Lmemcpy_srcul1loop4
1099a8828afcognet
1100a8828afcognet.Lmemcpy_srcul1l4:
1101a8828afcognet	sub	r1, r1, #3
1102a8828afcognet	b	.Lmemcpy_l4
1103a8828afcognet
1104a8828afcognet.Lmemcpy_srcul2:
11058357ee5imp	cmp	r2, #0x0c
1106a8828afcognet	blt	.Lmemcpy_srcul2loop4
11078357ee5imp	sub	r2, r2, #0x0c
1108a8828afcognet	stmdb	sp!, {r4, r5}
1109a8828afcognet
1110a8828afcognet.Lmemcpy_srcul2loop16:
1111a8828afcognet	mov	r3, lr, lsr #16
1112a8828afcognet	ldmia	r1!, {r4, r5, r12, lr}
1113a8828afcognet	orr	r3, r3, r4, lsl #16
1114a8828afcognet	mov	r4, r4, lsr #16
1115a8828afcognet	orr	r4, r4, r5, lsl #16
1116a8828afcognet	mov	r5, r5, lsr #16
1117a8828afcognet	orr	r5, r5, r12, lsl #16
1118a8828afcognet	mov	r12, r12, lsr #16
1119a8828afcognet	orr	r12, r12, lr, lsl #16
1120a8828afcognet	stmia	r0!, {r3-r5, r12}
11218357ee5imp	subs	r2, r2, #0x10
1122a8828afcognet	bge	.Lmemcpy_srcul2loop16
1123a8828afcognet	ldmia	sp!, {r4, r5}
11248357ee5imp	adds	r2, r2, #0x0c
1125a8828afcognet	blt	.Lmemcpy_srcul2l4
1126a8828afcognet
1127a8828afcognet.Lmemcpy_srcul2loop4:
1128a8828afcognet	mov	r12, lr, lsr #16
1129a8828afcognet	ldr	lr, [r1], #4
1130a8828afcognet	orr	r12, r12, lr, lsl #16
1131a8828afcognet	str	r12, [r0], #4
1132a8828afcognet	subs	r2, r2, #4
1133a8828afcognet	bge	.Lmemcpy_srcul2loop4
1134a8828afcognet
1135a8828afcognet.Lmemcpy_srcul2l4:
1136a8828afcognet	sub	r1, r1, #2
1137a8828afcognet	b	.Lmemcpy_l4
1138a8828afcognet
1139a8828afcognet.Lmemcpy_srcul3:
11408357ee5imp	cmp	r2, #0x0c
1141a8828afcognet	blt	.Lmemcpy_srcul3loop4
11428357ee5imp	sub	r2, r2, #0x0c
1143a8828afcognet	stmdb	sp!, {r4, r5}
1144a8828afcognet
1145a8828afcognet.Lmemcpy_srcul3loop16:
1146a8828afcognet	mov	r3, lr, lsr #24
1147a8828afcognet	ldmia	r1!, {r4, r5, r12, lr}
1148a8828afcognet	orr	r3, r3, r4, lsl #8
1149a8828afcognet	mov	r4, r4, lsr #24
1150a8828afcognet	orr	r4, r4, r5, lsl #8
1151a8828afcognet	mov	r5, r5, lsr #24
1152a8828afcognet	orr	r5, r5, r12, lsl #8
1153a8828afcognet	mov	r12, r12, lsr #24
1154a8828afcognet	orr	r12, r12, lr, lsl #8
1155a8828afcognet	stmia	r0!, {r3-r5, r12}
11568357ee5imp	subs	r2, r2, #0x10
1157a8828afcognet	bge	.Lmemcpy_srcul3loop16
1158a8828afcognet	ldmia	sp!, {r4, r5}
11598357ee5imp	adds	r2, r2, #0x0c
1160a8828afcognet	blt	.Lmemcpy_srcul3l4
1161a8828afcognet
1162a8828afcognet.Lmemcpy_srcul3loop4:
1163a8828afcognet	mov	r12, lr, lsr #24
1164a8828afcognet	ldr	lr, [r1], #4
1165a8828afcognet	orr	r12, r12, lr, lsl #8
1166a8828afcognet	str	r12, [r0], #4
1167a8828afcognet	subs	r2, r2, #4
1168a8828afcognet	bge	.Lmemcpy_srcul3loop4
1169a8828afcognet
1170a8828afcognet.Lmemcpy_srcul3l4:
1171a8828afcognet	sub	r1, r1, #1
1172