xref: /illumos-gate/usr/src/lib/libc/sparcv9/gen/strlcpy.S (revision 55fea89d)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27	.file	"strlcpy.s"
28
29/*
30 * The strlcpy() function copies at most dstsize-1 characters
31 * (dstsize being the size of the string buffer dst) from src
32 * to dst, truncating src if necessary. The result is always
33 * null-terminated.  The function returns strlen(src). Buffer
34 * overflow can be checked as follows:
35 *
36 *   if (strlcpy(dst, src, dstsize) >= dstsize)
37 *           return -1;
38 */
39
40#include <sys/asm_linkage.h>
41
42	! strlcpy implementation is similar to that of strcpy, except
43	! in this case, the maximum size of the detination must be
44	! tracked since it bounds our maximum copy size.  However,
45	! we must still continue to check for zero since the routine
46	! is expected to null-terminate any string that is within
47	! the dest size bound.
48	!
49	! this method starts by checking for and arranging source alignment.
50	! Once this has occurred, we copy based upon destination alignment.
51	! This is either by xword, word, halfword, or byte.  As this occurs, we
52	! check for a zero-byte.  If one is found, we branch to a method
53	! which checks for the exact location of a zero-byte within a
54	! larger xword/word/half-word quantity.
55
56
57	ENTRY(strlcpy)
58
59	.align 32
60
61	save	%sp, -SA(WINDOWSIZE), %sp
62	subcc	%g0, %i2, %g4		! n = -n, n == 0 ?
63	bz,pn	%ncc, .getstrlen	! n == 0, must determine strlen
64	add	%i1, %i2, %i3		! src = src + n
65	andcc	%i1, 7, %i4		! src dword aligned ?
66	bz,pn	%ncc, .dwordaligned	! yup
67	add	%i0, %i2, %i2		! dst = dst + n
68	sub	%i4, 8, %i4		! bytes until src aligned
69
70.alignsrc:
71	ldub	[%i3 + %g4], %l1	! src[]
72	andcc	%l1, 0xff, %g0		! end of src reached (null byte) ?
73	stub	%l1, [%i2 + %g4]	! dst[] = src[]
74	bz,a	%ncc, .done		! yes, done
75	add 	%i2, %g4, %i2		! need single dest pointer for strlen
76	addcc	%g4, 1, %g4		! src++, dst++, n--
77	bz,pn	%ncc, .forcenullunalign	! n == 0, force null byte, compute len
78	addcc	%i4, 1, %i4		! src aligned now?
79	bnz,a	%ncc, .alignsrc		! no, copy another byte
80	nop				! pad
81
82.dwordaligned:
83	sethi	%hi(0x01010101), %i4	! Alan Mycroft's magic1
84	add	%i2, %g4, %l0		! dst
85	or	%i4, %lo(0x01010101),%i4!  finish loading magic1
86	and	%l0, 3, %g1		! dst<1:0> to examine offset
87	sllx	%i4, 32, %l1		! spread magic1
88	cmp	%g1, 1			! dst offset of 1 or 5
89	or	%i4, %l1, %i4		!   to all 64 bits
90	sub	%i2, 8, %i2		! adjust for dest pre-incr in cpy loops
91	be,pn	%ncc, .storebyte1241	! store 1, 2, 4, 1 bytes
92	sllx	%i4, 7, %i5		!  Alan Mycroft's magic2
93	cmp	%g1, 3			! dst offset of 3 or 7
94	be,pn	%ncc, .storebyte1421	! store 1, 4, 2, 1 bytes
95	cmp	%g1, 2			! dst halfword aligned ?
96	be,pn	%ncc, .storehalfword	! yup, store half-word wise
97	andcc	%l0, 7, %g0		! dst word aligned ?
98	bnz,pn	%ncc, .storeword2	! yup, store word wise
99	nop				! ensure loop is 16-byte aligned
100	nop				! ensure loop is 16-byte aligned
101
102.storedword:
103	ldx	[%i3 + %g4], %l1	! src dword
104	addcc	%g4, 8, %g4		! n += 8, src += 8, dst += 8
105	bcs,pn	%ncc, .lastword		! if counter wraps, last word
106	andn	%i5, %l1, %g1		! ~dword & 0x8080808080808080
107	sub	%l1, %i4, %l0		! dword - 0x0101010101010101
108	andcc	%l0, %g1, %g0		! ((dword - 0x0101010101010101) & ~dword & 0x8080808080808080)
109	bz,a,pt	%ncc, .storedword	! no zero byte if magic expression == 0
110	stx	%l1, [%i2 + %g4]	! store word to dst (address pre-incremented)
111
112	! n has not expired, but src is at the end. we need to push out the
113	! remaining src bytes. Since strlen(dts) == strlen(src), we can
114	! compute the return value as the difference of final dst pointer
115	! and the pointer to the start of dst
116
117.zerobyte:
118	add	%i2, %g4, %i2		! pointer to dest string
119	srlx	%l1, 56, %g1		! first byte
120	andcc	%g1, 0xff, %g0		! end of string ?
121	bz,pn	%ncc, .done		! yup, copy done, return length
122	stb	%g1, [%i2]		! store it
123	add	%i2, 1, %i2		! dst++
124	srlx	%l1, 48, %g1		! second byte
125	andcc	%g1, 0xff, %g0		! end of string ?
126	bz,pn	%ncc, .done		! yup, copy done, return length
127	stb	%g1, [%i2]		! store it
128	add	%i2, 1, %i2		! dst++
129	srlx	%l1, 40, %g1		! third byte
130	andcc	%g1, 0xff, %g0		! end of string ?
131	bz,pn	%ncc, .done		! yup, copy done, return length
132	stb	%g1, [%i2]		! store it
133	add	%i2, 1, %i2		! dst++
134	srlx	%l1, 32, %g1		! fourth byte
135	andcc	%g1, 0xff, %g0		! end of string ?
136	bz,pn	%ncc, .done		! yup, copy done, return length
137	stb	%g1, [%i2]		! store it
138	add	%i2, 1, %i2		! dst++
139	srlx	%l1, 24, %g1		! fifth byte
140	andcc	%g1, 0xff, %g0		! end of string ?
141	bz,pn	%ncc, .done		! yup, copy done, return length
142	stb	%g1, [%i2]		! store it
143	add	%i2, 1, %i2		! dst++
144	srlx	%l1, 16, %g1		! sixth byte
145	andcc	%g1, 0xff, %g0		! end of string ?
146	bz,pn	%ncc, .done		! yup, copy done, return length
147	stb	%g1, [%i2]		! store it
148	add	%i2, 1, %i2		! dst++
149	srlx	%l1, 8, %g1		! seventh byte
150	andcc	%g1, 0xff, %g0		! end of string ?
151	bz,pn	%ncc, .done		! yup, copy done, return length
152	stb	%g1, [%i2]		! store it
153	stb	%l1, [%i2 + 1]		! store eigth byte
154	add	%i2, 1, %i2		! dst++
155
156.done:
157	sub	%i2, %i0, %i0		! len = dst - orig dst
158	ret				! subroutine done
159	restore	%i0, %g0, %o0		! restore register window, return len
160
161	! n expired, so this is the last word. It may contain null bytes.
162	! Store bytes until n == 0. If a null byte is encountered during
163	! processing of this last src word, we are done. Otherwise continue
164	! to scan src until we hit the end, and compute strlen from the
165	! difference between the pointer past the last byte of src and the
166	! original pointer to the start of src
167
168.lastword:
169	add	%i2, %g4, %i2		! we want a single dst pointer here
170	sub	%g4, 8, %g4		! undo counter pre-increment
171	add	%i3, %g4, %i3		! we want a single src pointer here
172
173	srlx	%l1, 56, %g1		! first byte
174	andcc	%g1, 0xff, %g0		! end of src reached ?
175	bz,pn	%ncc, .done		! yup
176	stb	%g1, [%i2]		! store it
177	inccc	%g4			! n--
178	bz	.forcenull		! if n == 0, force null byte, compute len
179	srlx	%l1, 48, %g1		! second byte
180	add	%i2, 1, %i2		! dst++
181	andcc	%g1, 0xff, %g0		! end of src reached ?
182	bz,pn	%ncc, .done		! yup
183	stb	%g1, [%i2]		! store it
184	inccc	%g4			! n--
185	bz	.forcenull		! if n == 0, force null byte, compute len
186	srlx	%l1, 40, %g1		! third byte
187	add	%i2, 1, %i2		! dst++
188	andcc	%g1, 0xff, %g0		! end of src reached ?
189	bz,pn	%ncc, .done		! yup
190	stb	%g1, [%i2]		! store it
191	inccc	%g4			! n--
192	bz	.forcenull		! if n == 0, force null byte, compute strlen
193	srlx	%l1, 32, %g1		! fourth byte
194	add	%i2, 1, %i2		! dst++
195	andcc	%g1, 0xff, %g0		! end of src reached ?
196	bz,pn	%ncc, .done		! yup
197	stb	%g1, [%i2]		! store it
198	inccc	%g4			! n--
199	bz	.forcenull		! if n == 0, force null byte, compute strlen
200	srlx	%l1, 24, %g1		! fifth byte
201	add	%i2, 1, %i2		! dst++
202	andcc	%g1, 0xff, %g0		! end of src reached ?
203	bz,pn	%ncc, .done		! yup
204	stb	%g1, [%i2]		! store it
205	inccc	%g4			! n--
206	bz	.forcenull		! if n == 0, force null byte, compute strlen
207	srlx	%l1, 16, %g1		! sixth byte
208	add	%i2, 1, %i2		! dst++
209	andcc	%g1, 0xff, %g0		! end of src reached ?
210	bz,pn	%ncc, .done		! yup
211	stb	%g1, [%i2]		! store it
212	inccc	%g4			! n--
213	bz	.forcenull		! if n == 0, force null byte, compute strlen
214	srlx	%l1, 8, %g1		! seventh byte
215	add	%i2, 1, %i2		! dst++
216	andcc	%g1, 0xff, %g0		! end of src reached ?
217	bz,pn	%ncc, .done		! yup
218	stb	%g1, [%i2]		! store it
219	inccc	%g4			! n--
220	bz	.forcenull		! if n == 0, force null byte, compute strlen
221	andcc	%l1, 0xff, %g0		! end of src reached ?
222	add	%i2, 1, %i2		! dst++
223	bz,pn	%ncc, .done		! yup
224	stb	%l1, [%i2]		! store eigth byte
225
226	! we need to force a null byte in the last position of dst
227	! %i2 points to the location
228
229.forcenull:
230	stb	%g0, [%i2]		! force string terminating null byte
231
232	! here: %i1 points to src start
233	!	%i3 points is current src ptr (8-byte aligned)
234
235.searchword:
236	ldx	[%i3], %l1		! src dword
237.searchword2:
238	andn	%i5, %l1, %g1		! ~dword & 0x8080808080808080
239	sub	%l1, %i4, %l0		! dword - 0x0101010101010101
240	andcc	%l0, %g1, %g0		! ((dword - 0x0101010101010101) & ~dword & 0x80808080
241	bz,a,pt	%ncc, .searchword	! no null byte if expression is 0
242	add	%i3, 8, %i3		! src += 8
243
244	mov	0xff, %i5		! create byte mask for null byte scanning
245	sllx	%i5, 56, %i5		! mask for 1st byte = 0xff0000000000000000
246.searchbyte:
247	andcc	%l1, %i5, %g0		! current byte zero?
248	srlx	%i5, 8, %i5		! byte mask for next byte
249	bnz,a	%ncc, .searchbyte	! current byte != zero, continue search
250	add	%i3, 1, %i3		! src++
251
252.endfound:
253	sub	%i3, %i1, %i0		! len = src - orig src
254	ret				! done
255	restore	%i0, %g0, %o0		! restore register window, return len
256	nop				! align loop on 16-byte
257
258.storebyte1421:
259	ldx	[%i3 + %g4], %l1	! x = src[]
260	addcc	%g4, 8, %g4		! src += 8, dst += 8
261	bcs,pn	%ncc, .lastword		! if counter wraps, last word
262	andn	%i5, %l1, %g1		! ~x & 0x8080808080808080
263	sub	%l1, %i4, %l0		! x - 0x0101010101010101
264	andcc	%l0, %g1, %g0		! ((x - 0x0101010101010101) & ~x & 0x8080808080808080)
265	bnz,pn	%ncc, .zerobyte		! end of src found, may need to pad
266	add	%i2, %g4, %l0		! dst (in pointer form)
267	srlx	%l1, 56, %g1		! %g1<7:0> = first byte; word aligned now
268	stb	%g1, [%l0]		! store first byte
269	srlx	%l1, 24, %g1		! %g1<31:0> = bytes 2, 3, 4, 5
270	stw	%g1, [%l0 + 1]		! store bytes 2, 3, 4, 5
271	srlx	%l1, 8, %g1		! %g1<15:0> = bytes 6, 7
272	sth	%g1, [%l0 + 5]		! store bytes 6, 7
273	ba	.storebyte1421		! next dword
274	stb	%l1, [%l0 + 7]		! store eigth byte
275
276.storebyte1241:
277	ldx	[%i3 + %g4], %l1	! x = src[]
278	addcc	%g4, 8, %g4		! src += 8, dst += 8
279	bcs,pn	%ncc, .lastword		! if counter wraps, last word
280	andn	%i5, %l1, %g1		! ~x & 0x8080808080808080
281	sub	%l1, %i4, %l0		! x - 0x0101010101010101
282	andcc	%l0, %g1, %g0		! ((x - 0x0101010101010101) & ~x & 0x8080808080808080)
283	bnz,pn	%ncc, .zerobyte		! x has zero byte, handle end cases
284	add	%i2, %g4, %l0		! dst (in pointer form)
285	srlx	%l1, 56, %g1		! %g1<7:0> = first byte; half-word aligned now
286	stb	%g1, [%l0]		! store first byte
287	srlx	%l1, 40, %g1		! %g1<15:0> = bytes 2, 3
288	sth	%g1, [%l0 + 1]		! store bytes 2, 3
289	srlx	%l1, 8, %g1		! %g1<31:0> = bytes 4, 5, 6, 7
290	stw	%g1, [%l0 + 3]		! store bytes 4, 5, 6, 7
291	ba	.storebyte1241		! next dword
292	stb	%l1, [%l0 + 7]		! store eigth byte
293
294.storehalfword:
295	ldx	[%i3 + %g4], %l1	! x = src[]
296	addcc	%g4, 8, %g4		! src += 8, dst += 8
297	bcs,pn	%ncc, .lastword		! if counter wraps, last word
298	andn	%i5, %l1, %g1		! ~x & 0x8080808080808080
299	sub	%l1, %i4, %l0		! x - 0x0101010101010101
300	andcc	%l0, %g1, %g0		! ((x - 0x0101010101010101) & ~x & 0x8080808080808080)
301	bnz,pn	%ncc, .zerobyte		! x has zero byte, handle end cases
302	add	%i2, %g4, %l0		! dst (in pointer form)
303	srlx	%l1, 48, %g1		! %g1<15:0> = bytes 1, 2; word aligned now
304	sth	%g1, [%l0]		! store bytes 1, 2
305	srlx	%l1, 16, %g1		! %g1<31:0> = bytes 3, 4, 5, 6
306	stw	%g1, [%l0 + 2]		! store bytes 3, 4, 5, 6
307	ba	.storehalfword		! next dword
308	sth	%l1, [%l0 + 6]		! store bytes 7, 8
309	nop				! align next loop to 16-byte boundary
310	nop				! align next loop to 16-byte boundary
311
312.storeword2:
313	ldx	[%i3 + %g4], %l1	! x = src[]
314	addcc	%g4, 8, %g4		! src += 8, dst += 8
315	bcs,pn	%ncc, .lastword		! if counter wraps, last word
316	andn	%i5, %l1, %g1		! ~x & 0x8080808080808080
317	sub	%l1, %i4, %l0		! x - 0x0101010101010101
318	andcc	%l0, %g1, %g0		! ((x - 0x0101010101010101) & ~x & 0x8080808080808080)
319	bnz,pn	%ncc, .zerobyte		! x has zero byte, handle end cases
320	add	%i2, %g4, %l0		! dst (in pointer form)
321	srlx	%l1, 32, %g1		! %g1<31:0> = bytes 1, 2, 3, 4
322	stw	%g1, [%l0]		! store bytes 1, 2, 3, 4
323	ba	.storeword2		! next dword
324	stw	%l1, [%l0 + 4]		! store bytes 5, 6, 7, 8
325
326	! n expired, i.e. end of destination buffer reached. Force null
327	! null termination of dst, then scan src until end foudn for
328	! determination of strlen(src)
329	!
330	! here: %i3 points to current src byte
331	!       %i2 points one byte past end of dst
332	! magic constants not loaded
333
334.forcenullunalign:
335	add	%i2, %g4, %i2		! we need a single dst ptr
336	stb	%g0, [%i2 - 1]		! force string terminating null byte
337
338.getstrlen:
339	sethi	%hi(0x01010101), %i4	! Alan Mycroft's magic1
340	or	%i4, %lo(0x01010101),%i4!  finish loading magic1
341	sllx	%i4, 32, %i2		! spread magic1
342	or	%i4, %i2, %i4		!   to all 64 bits
343	sllx	%i4, 7, %i5		!  Alan Mycroft's magic2
344	nop				! align loop to 16-byte boundary
345
346.getstrlenloop:
347	andcc	%i3, 7, %g0		! src dword aligned?
348	bz,a,pn	%ncc, .searchword2	! yup, now search a dword at a time
349	ldx	[%i3], %l1		! src dword
350	ldub	[%i3], %l1		! load src byte
351	andcc	%l1, 0xff, %g0		! end of src reached?
352	bnz,a	%ncc, .getstrlenloop	! yup, return length
353	add	%i3, 1, %i3		! src++
354	sub	%i3, %i1, %i0		! len = src - orig src
355	ret				! done
356	restore	%i0, %g0, %o0		! restore register window, return len
357
358	nop				! pad tp 16-byte boundary
359	nop				! pad tp 16-byte boundary
360	SET_SIZE(strlcpy)
361