1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"strncpy.s"
28
29/*
30 * strncpy(s1, s2)
31 *
32 * Copy string s2 to s1, truncating or null-padding to always copy n bytes
33 * return s1.
34 *
35 * Fast assembler language version of the following C-program for strncpy
36 * which represents the `standard' for the C-library.
37 *
38 *	char *
39 *	strncpy(char *s1, const char *s2, size_t n)
40 *	{
41 *		char *os1 = s1;
42 *
43 *		n++;
44 *		while ((--n != 0) &&  ((*s1++ = *s2++) != '\0'))
45 *			;
46 *		if (n != 0)
47 *			while (--n != 0)
48 *				*s1++ = '\0';
49 *		return (os1);
50 *	}
51 */
52
53#include <sys/asm_linkage.h>
54
55	! strncpy works similarly to strcpy, except that n bytes of s2
56	! are copied to s1. If a null character is reached in s2 yet more
57	! bytes remain to be copied, strncpy will copy null bytes into
58	! the destination string.
59	!
60	! This implementation works by first aligning the src ptr and
61	! performing small copies until it is aligned.  Then, the string
62	! is copied based upon destination alignment.  (byte, half-word,
63	! word, etc.)
64
65	ENTRY(strncpy)
66
67	.align 32
68	subcc	%g0, %o2, %o4		! n = -n
69	bz	.doneshort		! if n == 0, done
70	cmp	%o2, 7			! n < 7 ?
71	add	%o1, %o2, %o3		! src = src + n
72	blu	.shortcpy		! n < 7, use byte-wise copy
73	add	%o0, %o2, %o2		! dst = dst + n
74	andcc	%o1, 3, %o5		! src word aligned ?
75	bz	.wordaligned		! yup
76	save	%sp, -0x40, %sp		! create new register window
77	sub	%i5, 4, %i5		! bytes until src aligned
78	nop				! align loop on 16-byte boundary
79	nop				! align loop on 16-byte boundary
80
81.alignsrc:
82	ldub	[%i3 + %i4], %i1	! src[]
83	stb	%i1, [%i2 + %i4]	! dst[] = src[]
84	inccc	%i4			! src++, dst++, n--
85	bz	.done			! n == 0, done
86	tst     %i1			! end of src reached (null byte) ?
87	bz,a	.bytepad		! yes, at least one byte to pad here
88	add 	%i2, %i4, %l0		! need single dest pointer for fill
89	inccc	%i5			! src aligned now?
90	bnz	.alignsrc		! no, copy another byte
91	.empty
92
93.wordaligned:
94	add	%i2, %i4, %l0		! dst
95	sethi	%hi(0x01010101), %l1	! Alan Mycroft's magic1
96	sub	%i2, 4, %i2		! adjust for dest pre-incr in cpy loops
97	or	%l1, %lo(0x01010101),%l1!  finish loading magic1
98	andcc	%l0, 3, %g1		! destination word aligned ?
99	bnz	.dstnotaligned		! nope
100	sll	%l1, 7, %i5		! create Alan Mycroft's magic2
101
102.storeword:
103	lduw	[%i3 + %i4], %i1	! src dword
104	addcc	%i4, 4, %i4		! n += 4, src += 4, dst += 4
105	bcs	.lastword		! if counter wraps, last word
106	andn	%i5, %i1, %g1		! ~dword & 0x80808080
107	sub	%i1, %l1, %l0		! dword - 0x01010101
108	andcc	%l0, %g1, %g0		! ((dword - 0x01010101) & ~dword & 0x80808080)
109	bz,a	.storeword		! no zero byte if magic expression == 0
110	stw	%i1, [%i2 + %i4]	! store word to dst (address pre-incremented)
111
112	! n has not expired, but src is at the end. we need to push out the
113	! remaining src bytes and then start padding with null bytes
114
115.zerobyte:
116	add	%i2, %i4, %l0		! pointer to dest string
117	srl	%i1, 24, %g1		! first byte
118	stb	%g1, [%l0]		! store it
119	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
120	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
121	andn	%i1, %g1, %i1		! if byte == 0, start padding with null bytes
122	srl	%i1, 16, %g1		! second byte
123	stb	%g1, [%l0 + 1]		! store it
124	and	%g1, 0xff, %g1		! isolate byte
125	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
126	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
127	andn	%i1, %g1, %i1		! if byte == 0, start padding with null bytes
128	srl	%i1, 8, %g1		! third byte
129	stb	%g1, [%l0 + 2]		! store it
130	and	%g1, 0xff, %g1		! isolate byte
131	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
132	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
133	andn	%i1, %g1, %i1		! if byte == 0, start padding with null bytes
134	stb	%i1, [%l0 + 3]		! store fourth byte
135	addcc	%i4, 8, %g0		! number of pad bytes < 8 ?
136	bcs	.bytepad		! yes, do simple byte wise fill
137	add	%l0, 4, %l0		! dst += 4
138	andcc	%l0, 3, %l1		! dst offset relative to word boundary
139	bz	.fillaligned		! dst already word aligned
140
141	! here there is a least one more byte to zero out: otherwise we would
142	! have exited through label .lastword
143
144	sub	%l1, 4, %l1		! bytes to align dst to word boundary
145.makealigned:
146	stb	%g0, [%l0]		! dst[] = 0
147	addcc	%i4, 1, %i4		! n--
148	bz	.done			! n == 0, we are done
149	addcc	%l1, 1, %l1		! any more byte needed to align
150	bnz	.makealigned		! yup, pad another byte
151	add	%l0, 1, %l0		! dst++
152	nop				! pad to align copy loop below
153
154	! here we know that there at least another 4 bytes to pad, since
155	! we don't get here unless there were >= 8 bytes to pad to begin
156	! with, and we have padded at most 3 bytes suring dst aligning
157
158.fillaligned:
159	add	%i4, 3, %i2		! round up to next word boundary
160	and	%i2, -4, %l1		! pointer to next word boundary
161	and	%i2, 4, %i2		! word count odd ? 4 : 0
162	stw	%g0, [%l0]		! store first word
163	addcc	%l1, %i2, %l1		! dword count == 1 ?
164	add	%i4, %i2, %i4		! if word count odd, n -= 4
165	bz	.bytepad		! if word count == 1, pad bytes left
166	add	%l0, %i2, %l0		! bump dst if word count odd
167
168.fillword:
169	addcc	%l1, 8, %l1		! count -= 8
170	stw	%g0, [%l0]		! dst[n] = 0
171	stw	%g0, [%l0 + 4]		! dst[n+4] = 0
172	add	%l0, 8, %l0		! dst += 8
173	bcc	.fillword		! fill words until count == 0
174	addcc	%i4, 8, %i4		! n -= 8
175	bz	.done			! if n == 0, we are done
176	.empty
177
178.bytepad:
179	and	%i4, 1, %i2		! byte count odd ? 1 : 0
180	stb	%g0, [%l0]		! store first byte
181	addcc	%i4, %i2, %i4		! byte count == 1 ?
182	bz	.done			! yup, we are done
183	add	%l0, %i2, %l0		! bump pointer if odd
184
185.fillbyte:
186	addcc	%i4, 2, %i4		! n -= 2
187	stb	%g0, [%l0]		! dst[n] = 0
188	stb	%g0, [%l0 + 1]		! dst[n+1] = 0
189	bnz	.fillbyte		! fill until n == 0
190	add	%l0, 2, %l0		! dst += 2
191
192.done:
193	ret				! done
194	restore	%i0, %g0, %o0		! restore reg window, return dst
195
196	! this is the last word. It may contain null bytes. store bytes
197	! until n == 0. if null byte encountered, continue
198
199.lastword:
200	sub	%i4, 4, %i4		! undo counter pre-increment
201	add	%i2, 4, %i2		! adjust dst for counter un-bumping
202
203	srl	%i1, 24, %g1		! first byte
204	stb	%g1, [%i2 + %i4]	! store it
205	inccc	%i4			! n--
206	bz	.done			! if n == 0, we're done
207	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
208	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
209	andn	%i1, %g1, %i1		! if byte == 0, start padding with null
210	srl	%i1, 16, %g1		! second byte
211	stb	%g1, [%i2 + %i4]	! store it
212	inccc	%i4			! n--
213	bz	.done			! if n == 0, we're done
214	and	%g1, 0xff, %g1		! isolate byte
215	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
216	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
217	andn	%i1, %g1, %i1		! if byte == 0, start padding with null
218	srl	%i1, 8, %g1		! third byte
219	stb	%g1, [%i2 + %i4]	! store it
220	inccc	%i4			! n--
221	bz	.done			! if n == 0, we're done
222	and	%g1, 0xff, %g1		! isolate byte
223	sub	%g1, 1, %g1		! byte == 0 ? -1 : byte - 1
224	sra	%g1, 31, %g1		! byte == 0 ? -1 : 0
225	andn	%i1, %g1, %i1		! if byte == 0, start padding with null
226	ba	.done			! here n must be zero, we are done
227	stb	%i1, [%i2 + %i4]	! store fourth byte
228
229.dstnotaligned:
230	cmp	%g1, 2			! dst half word aligned?
231	be	.storehalfword2		! yup, store half word at a time
232	.empty
233.storebyte:
234	lduw	[%i3 + %i4], %i1	! x = src[]
235	addcc	%i4, 4, %i4		! src += 4, dst += 4, n -= 4
236	bcs	.lastword		! if counter wraps, last word
237	andn	%i5, %i1, %g1		! ~x & 0x80808080
238	sub	%i1, %l1, %l0		! x - 0x01010101
239	andcc	%l0, %g1, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
240	bnz	.zerobyte		! end of src found, may need to pad
241	add	%i2, %i4, %l0		! dst (in pointer form)
242	srl	%i1, 24, %g1		! %g1<7:0> = 1st byte; half-word aligned now
243	stb	%g1, [%l0]		! store first byte
244	srl	%i1, 8, %g1		! %g1<15:0> = bytes 2, 3
245	sth	%g1, [%l0 + 1]		! store bytes 2, 3
246	ba	.storebyte		! next word
247	stb	%i1, [%l0 + 3]		! store fourth byte
248	nop
249	nop
250
251.storehalfword:
252	lduw	[%i3 + %i4], %i1	! x = src[]
253.storehalfword2:
254	addcc	%i4, 4, %i4		! src += 4, dst += 4, n -= 4
255	bcs	.lastword		! if counter wraps, last word
256	andn	%i5, %i1, %g1		! ~x & 0x80808080
257	sub	%i1, %l1, %l0		! x - 0x01010101
258	andcc	%l0, %g1, %g0		! ((x -0x01010101) & ~x & 0x8080808080)
259	bnz	.zerobyte		! x has zero byte, handle end cases
260	add	%i2, %i4, %l0		! dst (in pointer form)
261	srl	%i1, 16, %g1		! %g1<15:0> = bytes 1, 2
262	sth	%g1, [%l0]		! store bytes 1, 2
263	ba	.storehalfword		! next dword
264	sth	%i1, [%l0 + 2]		! store bytes 3, 4
265
266.shortcpy:
267	ldub	[%o3 + %o4], %o5	! src[]
268	stb	%o5, [%o2 + %o4]	! dst[] = src[]
269	inccc	%o4			! src++, dst++, n--
270	bz	.doneshort		! if n == 0, done
271	tst	%o5			! src[] == 0 ?
272	bnz,a	.shortcpy		! nope, next byte
273	nop				! empty delay slot
274
275.padbyte:
276	stb	%g0, [%o2 + %o4]	! dst[] = 0
277.padbyte2:
278	addcc	%o4, 1, %o4		! dst++, n--
279	bnz,a	.padbyte2		! if n != 0, next byte
280	stb	%g0, [%o2 + %o4]	! dst[] = 0
281	nop				! align label below to 16-byte boundary
282
283.doneshort:
284	retl				! return from leaf
285	nop				! empty delay slot
286	SET_SIZE(strncpy)
287