17c478bdstevel@tonic-gate/*
27c478bdstevel@tonic-gate * CDDL HEADER START
37c478bdstevel@tonic-gate *
47c478bdstevel@tonic-gate * The contents of this file are subject to the terms of the
57257d1braf * Common Development and Distribution License (the "License").
67257d1braf * You may not use this file except in compliance with the License.
77c478bdstevel@tonic-gate *
87c478bdstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bdstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bdstevel@tonic-gate * See the License for the specific language governing permissions
117c478bdstevel@tonic-gate * and limitations under the License.
127c478bdstevel@tonic-gate *
137c478bdstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bdstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bdstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bdstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bdstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bdstevel@tonic-gate *
197c478bdstevel@tonic-gate * CDDL HEADER END
207c478bdstevel@tonic-gate */
217257d1braf
227c478bdstevel@tonic-gate/*
237257d1braf * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
247c478bdstevel@tonic-gate * Use is subject to license terms.
257c478bdstevel@tonic-gate */
267c478bdstevel@tonic-gate
279a70fc3Mark J. Nelson	.file	"strcpy.s"
287c478bdstevel@tonic-gate
297c478bdstevel@tonic-gate/*
307c478bdstevel@tonic-gate * strcpy(s1, s2)
317c478bdstevel@tonic-gate *
327c478bdstevel@tonic-gate * Copy string s2 to s1.  s1 must be large enough. Return s1.
337c478bdstevel@tonic-gate *
347c478bdstevel@tonic-gate * Fast assembler language version of the following C-program strcpy
357c478bdstevel@tonic-gate * which represents the `standard' for the C-library.
367c478bdstevel@tonic-gate *
377c478bdstevel@tonic-gate *	char *
387c478bdstevel@tonic-gate *	strcpy(s1, s2)
397c478bdstevel@tonic-gate *	register char *s1;
407c478bdstevel@tonic-gate *	register const char *s2;
417c478bdstevel@tonic-gate *	{
427c478bdstevel@tonic-gate *		char *os1 = s1;
437c478bdstevel@tonic-gate *
447c478bdstevel@tonic-gate *		while(*s1++ = *s2++)
457c478bdstevel@tonic-gate *			;
467c478bdstevel@tonic-gate *		return(os1);
477c478bdstevel@tonic-gate *	}
487c478bdstevel@tonic-gate *
497c478bdstevel@tonic-gate */
507c478bdstevel@tonic-gate
517c478bdstevel@tonic-gate#include <sys/asm_linkage.h>
527c478bdstevel@tonic-gate
537c478bdstevel@tonic-gate	! This is a 32-bit implementation of strcpy.  It works by
547c478bdstevel@tonic-gate	! first checking the alignment of its source pointer. And,
557c478bdstevel@tonic-gate	! if it is not aligned, attempts to copy bytes until it is.
567c478bdstevel@tonic-gate	! once this has occurred, the copy takes place, while checking
577c478bdstevel@tonic-gate	! for zero bytes, based upon destination alignment.
587c478bdstevel@tonic-gate	! Methods exist to handle per-byte, half-word, and word sized
597c478bdstevel@tonic-gate	! copies.
607c478bdstevel@tonic-gate
617c478bdstevel@tonic-gate	ENTRY(strcpy)
627c478bdstevel@tonic-gate
637c478bdstevel@tonic-gate	.align 32
647c478bdstevel@tonic-gate
657c478bdstevel@tonic-gate	sub	%o1, %o0, %o3		! src - dst
667c478bdstevel@tonic-gate	andcc	%o1, 3, %o4		! src word aligned ?
677c478bdstevel@tonic-gate	bz	.srcaligned		! yup
687c478bdstevel@tonic-gate	mov	%o0, %o2		! save dst
697c478bdstevel@tonic-gate
707c478bdstevel@tonic-gate	cmp	%o4, 2			! src halfword aligned
717c478bdstevel@tonic-gate	be	.s2aligned		! yup
727c478bdstevel@tonic-gate	ldub	[%o2 + %o3], %o1	! src[0]
737c478bdstevel@tonic-gate	tst	%o1			! byte zero?
747c478bdstevel@tonic-gate	stb	%o1, [%o2]		! store first byte
757c478bdstevel@tonic-gate	bz	.done			! yup, done
767c478bdstevel@tonic-gate	cmp	%o4, 3			! only one byte needed to align?
777c478bdstevel@tonic-gate	bz	.srcaligned		! yup
787c478bdstevel@tonic-gate	inc	%o2			! src++, dst++
797c478bdstevel@tonic-gate
807c478bdstevel@tonic-gate.s2aligned:
817c478bdstevel@tonic-gate	lduh	[%o2 + %o3], %o1	! src[]
827c478bdstevel@tonic-gate	srl	%o1, 8, %o4		! %o4<7:0> = first byte
837c478bdstevel@tonic-gate	tst	%o4			! first byte zero ?
847c478bdstevel@tonic-gate	bz	.done			! yup, done
857c478bdstevel@tonic-gate	stb	%o4, [%o2]		! store first byte
867c478bdstevel@tonic-gate	andcc	%o1, 0xff, %g0		! second byte zero ?
877c478bdstevel@tonic-gate	bz	.done			! yup, done
887c478bdstevel@tonic-gate	stb	%o1, [%o2 + 1]		! store second byte
897c478bdstevel@tonic-gate	add	%o2, 2, %o2		! src += 2, dst += 2
907c478bdstevel@tonic-gate
917c478bdstevel@tonic-gate.srcaligned:
927c478bdstevel@tonic-gate	sethi	%hi(0x01010101), %o4	! Alan Mycroft's magic1
937c478bdstevel@tonic-gate	sethi	%hi(0x80808080), %o5	! Alan Mycroft's magic2
947c478bdstevel@tonic-gate	or	%o4, %lo(0x01010101), %o4
957c478bdstevel@tonic-gate	andcc	%o2, 3, %o1		! destination word aligned?
967c478bdstevel@tonic-gate	bnz	.dstnotaligned		! nope
977c478bdstevel@tonic-gate	or	%o5, %lo(0x80808080), %o5
987c478bdstevel@tonic-gate
997c478bdstevel@tonic-gate.copyword:
1007c478bdstevel@tonic-gate	lduw	[%o2 + %o3], %o1	! src word
1017c478bdstevel@tonic-gate	add	%o2, 4, %o2		! src += 4, dst += 4
1027c478bdstevel@tonic-gate	andn	%o5, %o1, %g1		! ~word & 0x80808080
1037c478bdstevel@tonic-gate	sub	%o1, %o4, %o1		! word - 0x01010101
1047c478bdstevel@tonic-gate	andcc	%o1, %g1, %g0		! ((word - 0x01010101) & ~word & 0x80808080)
1057c478bdstevel@tonic-gate	add	%o1, %o4, %o1		! restore word
1067c478bdstevel@tonic-gate	bz,a	.copyword		! no zero byte if magic expression == 0
1077c478bdstevel@tonic-gate	st	%o1, [%o2 - 4]		! store word to dst (address pre-incremented)
1087c478bdstevel@tonic-gate
1097c478bdstevel@tonic-gate.zerobyte:
1107c478bdstevel@tonic-gate	set	0xff000000, %o4		! mask for 1st byte
1117c478bdstevel@tonic-gate	srl	%o1, 24, %o3		! %o3<7:0> = first byte
1127c478bdstevel@tonic-gate	andcc	%o1, %o4, %g0		! first byte zero?
1137c478bdstevel@tonic-gate	bz	.done			! yup, done
1147c478bdstevel@tonic-gate	stb	%o3, [%o2 - 4]		! store first byte
1157c478bdstevel@tonic-gate	set	0x00ff0000, %o5		! mask for 2nd byte
1167c478bdstevel@tonic-gate	srl	%o1, 16, %o3		! %o3<7:0> = second byte
1177c478bdstevel@tonic-gate	andcc	%o1, %o5, %g0		! second byte zero?
1187c478bdstevel@tonic-gate	bz	.done			! yup, done
1197c478bdstevel@tonic-gate	stb	%o3, [%o2 - 3]		! store second byte
1207c478bdstevel@tonic-gate	srl	%o4, 16, %o4		! 0x0000ff00 = mask for 3rd byte
1217c478bdstevel@tonic-gate	andcc	%o1, %o4, %g0		! third byte zero?
1227c478bdstevel@tonic-gate	srl	%o1, 8, %o3		! %o3<7:0> = third byte
1237c478bdstevel@tonic-gate	bz	.done			! yup, done
1247c478bdstevel@tonic-gate	stb	%o3, [%o2 - 2]		! store third byte
1257c478bdstevel@tonic-gate	stb	%o1, [%o2 - 1]		! store fourth byte
1267c478bdstevel@tonic-gate
1277c478bdstevel@tonic-gate.done:
1287c478bdstevel@tonic-gate	retl				! done with leaf function
1297c478bdstevel@tonic-gate	.empty
1307c478bdstevel@tonic-gate
1317c478bdstevel@tonic-gate.dstnotaligned:
1327c478bdstevel@tonic-gate	cmp	%o1, 2			! dst half word aligned?
1337c478bdstevel@tonic-gate	be,a	.storehalfword2		! yup, store half word at a time
1347c478bdstevel@tonic-gate	lduw	[%o2 + %o3], %o1	! src word
1357c478bdstevel@tonic-gate
1367c478bdstevel@tonic-gate.storebyte:
1377c478bdstevel@tonic-gate	lduw	[%o2 + %o3], %o1	! src word
1387c478bdstevel@tonic-gate	add	%o2, 4, %o2		! src += 4, dst += 4
1397c478bdstevel@tonic-gate	sub	%o1, %o4, %g1		! x - 0x01010101
1407c478bdstevel@tonic-gate	andn	%g1, %o1, %g1		! (x - 0x01010101) & ~x
1417c478bdstevel@tonic-gate	andcc	%g1, %o5, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
1427c478bdstevel@tonic-gate	bnz	.zerobyte		! word has zero byte, handle end cases
1437c478bdstevel@tonic-gate	srl	%o1, 24, %g1		! %g1<7:0> = first byte
1447c478bdstevel@tonic-gate	stb	%g1, [%o2 - 4]		! store first byte; half-word aligned now
1457c478bdstevel@tonic-gate	srl	%o1, 8, %g1		! %g1<15:0> = byte 2, 3
1467c478bdstevel@tonic-gate	sth	%g1, [%o2 - 3]		! store bytes 2, 3
1477c478bdstevel@tonic-gate	ba	.storebyte		! next word
1487c478bdstevel@tonic-gate	stb	%o1, [%o2 - 1]		! store fourth byte
1497c478bdstevel@tonic-gate
1507c478bdstevel@tonic-gate.storehalfword:
1517c478bdstevel@tonic-gate	lduw	[%o2 + %o3], %o1	! src word
1527c478bdstevel@tonic-gate.storehalfword2:
1537c478bdstevel@tonic-gate	add	%o2, 4, %o2		! src += 4, dst += 4
1547c478bdstevel@tonic-gate	sub	%o1, %o4, %g1		! x - 0x01010101
1557c478bdstevel@tonic-gate	andn	%g1, %o1, %g1		! (x - 0x01010101) & ~x
1567c478bdstevel@tonic-gate	andcc	%g1, %o5, %g0		! ((x - 0x01010101) & ~x & 0x80808080)
1577c478bdstevel@tonic-gate	bnz	.zerobyte		! word has zero byte, handle end cases
1587c478bdstevel@tonic-gate	srl	%o1, 16, %g1		! get first and second byte
1597c478bdstevel@tonic-gate	sth	%g1, [%o2 - 4]		! store first and second byte
1607c478bdstevel@tonic-gate	ba	.storehalfword		! next word
1617c478bdstevel@tonic-gate	sth	%o1, [%o2 - 2]		! store third and fourth byte
1627c478bdstevel@tonic-gate
1637c478bdstevel@tonic-gate	! DO NOT remove these NOPs. It will slow down the halfword loop by 15%
1647c478bdstevel@tonic-gate
1657c478bdstevel@tonic-gate	nop				! padding
1667c478bdstevel@tonic-gate	nop				! padding
1677c478bdstevel@tonic-gate
1687c478bdstevel@tonic-gate	SET_SIZE(strcpy)
1697c478bdstevel@tonic-gate
170