17c478bd9Sstevel@tonic-gate/*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
57257d1b4Sraf * Common Development and Distribution License (the "License").
67257d1b4Sraf * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217257d1b4Sraf
227c478bd9Sstevel@tonic-gate/*
23*1e49577aSRod Evans * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
269a70fc3bSMark J. Nelson	.file	"memset.s"
277257d1b4Sraf
287c478bd9Sstevel@tonic-gate/*
297c478bd9Sstevel@tonic-gate * char *memset(sp, c, n)
307c478bd9Sstevel@tonic-gate *
317c478bd9Sstevel@tonic-gate * Set an array of n chars starting at sp to the character c.
327c478bd9Sstevel@tonic-gate * Return sp.
337c478bd9Sstevel@tonic-gate *
347c478bd9Sstevel@tonic-gate * Fast assembler language version of the following C-program for memset
357c478bd9Sstevel@tonic-gate * which represents the `standard' for the C-library.
367c478bd9Sstevel@tonic-gate *
377c478bd9Sstevel@tonic-gate *	void *
387c478bd9Sstevel@tonic-gate *	memset(void *sp1, int c, size_t n)
397c478bd9Sstevel@tonic-gate *	{
407c478bd9Sstevel@tonic-gate *	    if (n != 0) {
417c478bd9Sstevel@tonic-gate *		char *sp = sp1;
427c478bd9Sstevel@tonic-gate *		do {
437c478bd9Sstevel@tonic-gate *		    *sp++ = (char)c;
447c478bd9Sstevel@tonic-gate *		} while (--n != 0);
457c478bd9Sstevel@tonic-gate *	    }
467c478bd9Sstevel@tonic-gate *	    return (sp1);
477c478bd9Sstevel@tonic-gate *	}
487c478bd9Sstevel@tonic-gate */
497c478bd9Sstevel@tonic-gate
507c478bd9Sstevel@tonic-gate#include <sys/asm_linkage.h>
517c478bd9Sstevel@tonic-gate#include <sys/sun4asi.h>
527c478bd9Sstevel@tonic-gate
537c478bd9Sstevel@tonic-gate	ANSI_PRAGMA_WEAK(memset,function)
547c478bd9Sstevel@tonic-gate
557c478bd9Sstevel@tonic-gate#define	ALIGN8(X)	(((X) + 7) & ~7)
567c478bd9Sstevel@tonic-gate
577c478bd9Sstevel@tonic-gate	.section        ".text"
587c478bd9Sstevel@tonic-gate	.align 32
597c478bd9Sstevel@tonic-gate
607c478bd9Sstevel@tonic-gate	ENTRY(memset)
617c478bd9Sstevel@tonic-gate	cmp	%o2, 12			! if small counts, just write bytes
627c478bd9Sstevel@tonic-gate	bgeu,pn	%ncc, .wrbig
637c478bd9Sstevel@tonic-gate	mov	%o0, %o5		! copy sp1 before using it
647c478bd9Sstevel@tonic-gate
657c478bd9Sstevel@tonic-gate.wrchar:
667c478bd9Sstevel@tonic-gate	deccc   %o2			! byte clearing loop
677c478bd9Sstevel@tonic-gate        inc     %o5
687c478bd9Sstevel@tonic-gate	bgeu,a,pt %ncc, .wrchar
697c478bd9Sstevel@tonic-gate        stb     %o1, [%o5 + -1]         ! we've already incremented the address
707c478bd9Sstevel@tonic-gate
717c478bd9Sstevel@tonic-gate        retl
727c478bd9Sstevel@tonic-gate	.empty	! next instruction is safe, %o0 still good
737c478bd9Sstevel@tonic-gate
747c478bd9Sstevel@tonic-gate.wrbig:
757c478bd9Sstevel@tonic-gate        andcc	%o5, 7, %o3		! is sp1 aligned on a 8 byte bound
767c478bd9Sstevel@tonic-gate        bz,pt	%ncc, .blkchk		! already double aligned
777c478bd9Sstevel@tonic-gate	and	%o1, 0xff, %o1		! o1 is (char)c
787c478bd9Sstevel@tonic-gate        sub	%o3, 8, %o3		! -(bytes till double aligned)
797c478bd9Sstevel@tonic-gate        add	%o2, %o3, %o2		! update o2 with new count
807c478bd9Sstevel@tonic-gate
817c478bd9Sstevel@tonic-gate	! Set -(%o3) bytes till sp1 double aligned
827c478bd9Sstevel@tonic-gate1:	stb	%o1, [%o5]		! there is at least 1 byte to set
837c478bd9Sstevel@tonic-gate	inccc	%o3			! byte clearing loop
847c478bd9Sstevel@tonic-gate        bl,pt	%ncc, 1b
857c478bd9Sstevel@tonic-gate        inc	%o5
867c478bd9Sstevel@tonic-gate
877c478bd9Sstevel@tonic-gate
887c478bd9Sstevel@tonic-gate	! Now sp1 is double aligned (sp1 is found in %o5)
897c478bd9Sstevel@tonic-gate.blkchk:
907c478bd9Sstevel@tonic-gate	sll     %o1, 8, %o3
917c478bd9Sstevel@tonic-gate        or      %o1, %o3, %o1		! now o1 has 2 bytes of c
927c478bd9Sstevel@tonic-gate
937c478bd9Sstevel@tonic-gate        sll     %o1, 16, %o3
947c478bd9Sstevel@tonic-gate        or      %o1, %o3, %o1		! now o1 has 4 bytes of c
957c478bd9Sstevel@tonic-gate
967c478bd9Sstevel@tonic-gate	cmp     %o2, 4095		! if large count use Block ld/st
977c478bd9Sstevel@tonic-gate
987c478bd9Sstevel@tonic-gate	sllx	%o1, 32, %o3
997c478bd9Sstevel@tonic-gate	or	%o1, %o3, %o1		! now o1 has 8 bytes of c
1007c478bd9Sstevel@tonic-gate
1017c478bd9Sstevel@tonic-gate        bgu,a,pn %ncc, .blkwr		! Do block write for large count
1027c478bd9Sstevel@tonic-gate        andcc   %o5, 63, %o3            ! is sp1 block aligned?
1037c478bd9Sstevel@tonic-gate
1047c478bd9Sstevel@tonic-gate	and	%o2, 24, %o3		! o3 is {0, 8, 16, 24}
1057c478bd9Sstevel@tonic-gate
1067c478bd9Sstevel@tonic-gate1:	subcc	%o3, 8, %o3		! double-word loop
1077c478bd9Sstevel@tonic-gate	add	%o5, 8, %o5
1087c478bd9Sstevel@tonic-gate	bgeu,a,pt %ncc, 1b
1097c478bd9Sstevel@tonic-gate	stx	%o1, [%o5 - 8]		! already incremented the address
1107c478bd9Sstevel@tonic-gate
1117c478bd9Sstevel@tonic-gate	andncc	%o2, 31, %o4		! o4 has 32 byte aligned count
1127c478bd9Sstevel@tonic-gate	bz,pn	%ncc, 3f		! First instruction of icache line
1137c478bd9Sstevel@tonic-gate2:
1147c478bd9Sstevel@tonic-gate	subcc	%o4, 32, %o4		! main loop, 32 bytes per iteration
1157c478bd9Sstevel@tonic-gate	stx	%o1, [%o5 - 8]
1167c478bd9Sstevel@tonic-gate	stx	%o1, [%o5]
1177c478bd9Sstevel@tonic-gate	stx	%o1, [%o5 + 8]
1187c478bd9Sstevel@tonic-gate	stx	%o1, [%o5 + 16]
1197c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, 2b
1207c478bd9Sstevel@tonic-gate	add	%o5, 32, %o5
1217c478bd9Sstevel@tonic-gate
1227c478bd9Sstevel@tonic-gate3:
1237c478bd9Sstevel@tonic-gate	and	%o2, 7, %o2		! o2 has the remaining bytes (<8)
1247c478bd9Sstevel@tonic-gate
1257c478bd9Sstevel@tonic-gate4:
1267c478bd9Sstevel@tonic-gate	deccc   %o2                     ! byte clearing loop
1277c478bd9Sstevel@tonic-gate        inc     %o5
1287c478bd9Sstevel@tonic-gate        bgeu,a,pt %ncc, 4b
1297c478bd9Sstevel@tonic-gate        stb     %o1, [%o5 - 9]		! already incremented the address
1307c478bd9Sstevel@tonic-gate
1317c478bd9Sstevel@tonic-gate	retl
1327c478bd9Sstevel@tonic-gate	nop				! %o0 still preserved
1337c478bd9Sstevel@tonic-gate
1347c478bd9Sstevel@tonic-gate.blkwr:
1357c478bd9Sstevel@tonic-gate        bz,pn   %ncc, .blalign		! now block aligned
1367c478bd9Sstevel@tonic-gate        sub	%o3, 64, %o3		! o3 is -(bytes till block aligned)
1377c478bd9Sstevel@tonic-gate	add	%o2, %o3, %o2		! o2 is the remainder
1387c478bd9Sstevel@tonic-gate
1397c478bd9Sstevel@tonic-gate        ! Store -(%o3) bytes till dst is block (64 byte) aligned.
1407c478bd9Sstevel@tonic-gate        ! Use double word stores.
1417c478bd9Sstevel@tonic-gate	! Recall that dst is already double word aligned
1427c478bd9Sstevel@tonic-gate1:
1437c478bd9Sstevel@tonic-gate        stx     %o1, [%o5]
1447c478bd9Sstevel@tonic-gate	addcc   %o3, 8, %o3
1457c478bd9Sstevel@tonic-gate	bl,pt	%ncc, 1b
1467c478bd9Sstevel@tonic-gate	add     %o5, 8, %o5
1477c478bd9Sstevel@tonic-gate
1487c478bd9Sstevel@tonic-gate	! sp1 is block aligned
1497c478bd9Sstevel@tonic-gate.blalign:
1507c478bd9Sstevel@tonic-gate        rd      %fprs, %g1              ! g1 = fprs
1517c478bd9Sstevel@tonic-gate
1527c478bd9Sstevel@tonic-gate	and	%o2, 63, %o3		! calc bytes left after blk store.
1537c478bd9Sstevel@tonic-gate
1547c478bd9Sstevel@tonic-gate	andcc	%g1, 0x4, %g1		! fprs.du = fprs.dl = 0
1557c478bd9Sstevel@tonic-gate	bz,a	%ncc, 2f		! Is fprs.fef == 0
1567c478bd9Sstevel@tonic-gate        wr      %g0, 0x4, %fprs         ! fprs.fef = 1
1577c478bd9Sstevel@tonic-gate2:
1587c478bd9Sstevel@tonic-gate	brnz,pn	%o1, 3f			! %o1 is safe to check all 64-bits
1597c478bd9Sstevel@tonic-gate	andn	%o2, 63, %o4		! calc size of blocks in bytes
1607c478bd9Sstevel@tonic-gate	fzero   %d0
1617c478bd9Sstevel@tonic-gate	fzero   %d2
1627c478bd9Sstevel@tonic-gate	fzero   %d4
1637c478bd9Sstevel@tonic-gate	fzero   %d6
1647c478bd9Sstevel@tonic-gate	fmuld   %d0, %d0, %d8
1657c478bd9Sstevel@tonic-gate	fzero   %d10
1667c478bd9Sstevel@tonic-gate	ba	4f
1677c478bd9Sstevel@tonic-gate	fmuld   %d0, %d0, %d12
1687c478bd9Sstevel@tonic-gate
1697c478bd9Sstevel@tonic-gate3:
1707c478bd9Sstevel@tonic-gate	! allocate 8 bytes of scratch space on the stack
1717c478bd9Sstevel@tonic-gate	add	%sp, -SA(16), %sp
1727c478bd9Sstevel@tonic-gate	stx	%o1, [%sp + STACK_BIAS + ALIGN8(MINFRAME)]  ! move %o1 to %d0
1737c478bd9Sstevel@tonic-gate	ldd	[%sp + STACK_BIAS + ALIGN8(MINFRAME)], %d0
1747c478bd9Sstevel@tonic-gate
1757c478bd9Sstevel@tonic-gate	fmovd	%d0, %d2
1767c478bd9Sstevel@tonic-gate	add	%sp, SA(16), %sp	! deallocate the scratch space
1777c478bd9Sstevel@tonic-gate	fmovd	%d0, %d4
1787c478bd9Sstevel@tonic-gate	fmovd	%d0, %d6
1797c478bd9Sstevel@tonic-gate	fmovd	%d0, %d8
1807c478bd9Sstevel@tonic-gate	fmovd	%d0, %d10
1817c478bd9Sstevel@tonic-gate	fmovd	%d0, %d12
1827c478bd9Sstevel@tonic-gate4:
1837c478bd9Sstevel@tonic-gate	fmovd	%d0, %d14
1847c478bd9Sstevel@tonic-gate
1857c478bd9Sstevel@tonic-gate	! 1st quadrant has 64 bytes of c
1867c478bd9Sstevel@tonic-gate	! instructions 32-byte aligned here
1877c478bd9Sstevel@tonic-gate
1887c478bd9Sstevel@tonic-gate        stda    %d0, [%o5]ASI_BLK_P
1897c478bd9Sstevel@tonic-gate        subcc   %o4, 64, %o4
1907c478bd9Sstevel@tonic-gate        bgu,pt	%ncc, 4b
1917c478bd9Sstevel@tonic-gate        add     %o5, 64, %o5
1927c478bd9Sstevel@tonic-gate
1937c478bd9Sstevel@tonic-gate	! Set the remaining doubles
1947c478bd9Sstevel@tonic-gate	subcc   %o3, 8, %o3		! Can we store any doubles?
1957c478bd9Sstevel@tonic-gate	blu,pn  %ncc, 6f
1967c478bd9Sstevel@tonic-gate	and	%o2, 7, %o2		! calc bytes left after doubles
1977c478bd9Sstevel@tonic-gate
1987c478bd9Sstevel@tonic-gate5:
1997c478bd9Sstevel@tonic-gate	std     %d0, [%o5]		! store the doubles
2007c478bd9Sstevel@tonic-gate	subcc   %o3, 8, %o3
2017c478bd9Sstevel@tonic-gate	bgeu,pt	%ncc, 5b
2027c478bd9Sstevel@tonic-gate        add     %o5, 8, %o5
2037c478bd9Sstevel@tonic-gate6:
2047c478bd9Sstevel@tonic-gate	! Set the remaining bytes
2057c478bd9Sstevel@tonic-gate	brz	%o2, .exit		! safe to check all 64-bits
2067c478bd9Sstevel@tonic-gate
2077c478bd9Sstevel@tonic-gate#if 0
2087c478bd9Sstevel@tonic-gate	! Terminate the copy with a partial store. (bug 1200071 does not apply)
2097c478bd9Sstevel@tonic-gate	! The data should be at d0
2107c478bd9Sstevel@tonic-gate        dec     %o2                     ! needed to get the mask right
2117c478bd9Sstevel@tonic-gate	edge8n	%g0, %o2, %o4
2127c478bd9Sstevel@tonic-gate	stda	%d0, [%o5]%o4, ASI_PST8_P
2137c478bd9Sstevel@tonic-gate#else
2147c478bd9Sstevel@tonic-gate7:
2157c478bd9Sstevel@tonic-gate	deccc	%o2
2167c478bd9Sstevel@tonic-gate	stb	%o1, [%o5]
2177c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, 7b
2187c478bd9Sstevel@tonic-gate	inc	%o5
2197c478bd9Sstevel@tonic-gate#endif
2207c478bd9Sstevel@tonic-gate
2217c478bd9Sstevel@tonic-gate.exit:
2227c478bd9Sstevel@tonic-gate        membar  #StoreLoad|#StoreStore
2237c478bd9Sstevel@tonic-gate        retl				! %o0 was preserved
2247c478bd9Sstevel@tonic-gate        wr	%g1, %g0, %fprs         ! fprs = g1  restore fprs
2257c478bd9Sstevel@tonic-gate
2267c478bd9Sstevel@tonic-gate	SET_SIZE(memset)
227