xref: /illumos-gate/usr/src/lib/libc/sparcv9/gen/memcpy.s (revision 9a70fc3b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"memcpy.s"
28
29/*
30 * memcpy(s1, s2, len)
31 *
32 * Copy s2 to s1, always copy n bytes.
33 * Note: this does not work for overlapped copies, bcopy() does
34 *
35 * Added entry __align_cpy_1 is generally for use of the compilers.
36 *
37 *
38 * Fast assembler language version of the following C-program for memcpy
39 * which represents the `standard' for the C-library.
40 *
41 *	void *
42 *	memcpy(void *s, const void *s0, size_t n)
43 *	{
44 *		if (n != 0) {
45 *			char *s1 = s;
46 *			const char *s2 = s0;
47 *			do {
48 *				*s1++ = *s2++;
49 *			} while (--n != 0);
50 *		}
51 *		return (s);
52 *	}
53 */
54
55#include <sys/asm_linkage.h>
56
57	ANSI_PRAGMA_WEAK(memcpy,function)
58
59	ENTRY(memcpy)
60	ENTRY(__align_cpy_1)
61	mov	%o0, %g5		! save des address for return val
62	cmp	%o2, 17			! for small counts copy bytes
63	bleu,pn	%xcc, .dbytecp
64	andcc	%o1, 3, %o5		! is src word aligned
65	bz,pn	%icc, .aldst
66	cmp	%o5, 2			! is src half-word aligned
67	be,pt	%xcc, .s2algn
68	cmp	%o5, 3			! src is byte aligned
69.s1algn:ldub	[%o1], %o3		! move 1 or 3 bytes to align it
70	inc	1, %o1
71	stb	%o3, [%g5]		! move a byte to align src
72	inc	1, %g5
73	bne,pt	%icc, .s2algn
74	dec	%o2
75	b	.ald			! now go align dest
76	andcc	%g5, 3, %o5
77
78.s2algn:lduh	[%o1], %o3		! know src is 2 byte alinged
79	inc	2, %o1
80	srl	%o3, 8, %o4
81	stb	%o4, [%g5]		! have to do bytes,
82	stb	%o3, [%g5 + 1]		! don't know dst alingment
83	inc	2, %g5
84	dec	2, %o2
85
86.aldst:	andcc	%g5, 3, %o5		! align the destination address
87.ald:	bz,pn	%icc, .w4cp
88	cmp	%o5, 2
89	bz,pn	%icc, .w2cp
90	cmp	%o5, 3
91.w3cp:	lduw	[%o1], %o4
92	inc	4, %o1
93	srl	%o4, 24, %o5
94	stb	%o5, [%g5]
95	bne,pt	%icc, .w1cp
96	inc	%g5
97	dec	1, %o2
98	andn	%o2, 3, %o3		! o3 is aligned word count
99	dec	4, %o3			! avoid reading beyond tail of src
100	sub	%o1, %g5, %o1		! o1 gets the difference
101
1021:	sll	%o4, 8, %g1		! save residual bytes
103	lduw	[%o1+%g5], %o4
104	deccc	4, %o3
105	srl	%o4, 24, %o5		! merge with residual
106	or	%o5, %g1, %g1
107	st	%g1, [%g5]
108	bnz,pt	%xcc, 1b
109	inc	4, %g5
110	sub	%o1, 3, %o1		! used one byte of last word read
111	and	%o2, 3, %o2
112	b	7f
113	inc	4, %o2
114
115.w1cp:	srl	%o4, 8, %o5
116	sth	%o5, [%g5]
117	inc	2, %g5
118	dec	3, %o2
119	andn	%o2, 3, %o3		! o3 is aligned word count
120	dec	4, %o3			! avoid reading beyond tail of src
121	sub	%o1, %g5, %o1		! o1 gets the difference
122
1232:	sll	%o4, 24, %g1		! save residual bytes
124	lduw	[%o1+%g5], %o4
125	deccc	4, %o3
126	srl	%o4, 8, %o5		! merge with residual
127	or	%o5, %g1, %g1
128	st	%g1, [%g5]
129	bnz,pt	%xcc, 2b
130	inc	4, %g5
131	sub	%o1, 1, %o1		! used three bytes of last word read
132	and	%o2, 3, %o2
133	b	7f
134	inc	4, %o2
135
136.w2cp:	lduw	[%o1], %o4
137	inc	4, %o1
138	srl	%o4, 16, %o5
139	sth	%o5, [%g5]
140	inc	2, %g5
141	dec	2, %o2
142	andn	%o2, 3, %o3		! o3 is aligned word count
143	dec	4, %o3			! avoid reading beyond tail of src
144	sub	%o1, %g5, %o1		! o1 gets the difference
145
1463:	sll	%o4, 16, %g1		! save residual bytes
147	lduw	[%o1+%g5], %o4
148	deccc	4, %o3
149	srl	%o4, 16, %o5		! merge with residual
150	or	%o5, %g1, %g1
151	st	%g1, [%g5]
152	bnz,pt	%xcc, 3b
153	inc	4, %g5
154	sub	%o1, 2, %o1		! used two bytes of last word read
155	and	%o2, 3, %o2
156	b	7f
157	inc	4, %o2
158
159.w4cp:	andn	%o2, 3, %o3		! o3 is aligned word count
160	sub	%o1, %g5, %o1		! o1 gets the difference
161
1621:	lduw	[%o1+%g5], %o4		! read from address
163	deccc	4, %o3			! decrement count
164	st	%o4, [%g5]		! write at destination address
165	bgu,pt	%xcc, 1b
166	inc	4, %g5			! increment to address
167	b	7f
168	and	%o2, 3, %o2		! number of leftover bytes, if any
169
170	!
171	! differenced byte copy, works with any alignment
172	!
173.dbytecp:
174	b	7f
175	sub	%o1, %g5, %o1		! o1 gets the difference
176
1774:	stb	%o4, [%g5]		! write to address
178	inc	%g5			! inc to address
1797:	deccc	%o2			! decrement count
180	bgeu,a,pt %xcc,4b		! loop till done
181	ldub	[%o1+%g5], %o4		! read from address
182	retl
183	nop
184
185	SET_SIZE(memcpy)
186	SET_SIZE(__align_cpy_1)
187