17c478bd9Sstevel@tonic-gate/*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate * with the License.
87c478bd9Sstevel@tonic-gate *
97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate * and limitations under the License.
137c478bd9Sstevel@tonic-gate *
147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate *
207c478bd9Sstevel@tonic-gate * CDDL HEADER END
217c478bd9Sstevel@tonic-gate */
227c478bd9Sstevel@tonic-gate/*
237c478bd9Sstevel@tonic-gate * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate */
267c478bd9Sstevel@tonic-gate
277c478bd9Sstevel@tonic-gate#include <sys/param.h>
287c478bd9Sstevel@tonic-gate#include <sys/errno.h>
297c478bd9Sstevel@tonic-gate#include <sys/asm_linkage.h>
307c478bd9Sstevel@tonic-gate#include <sys/vtrace.h>
317c478bd9Sstevel@tonic-gate#include <sys/machthread.h>
327c478bd9Sstevel@tonic-gate#include <sys/clock.h>
337c478bd9Sstevel@tonic-gate#include <sys/asi.h>
347c478bd9Sstevel@tonic-gate#include <sys/fsr.h>
357c478bd9Sstevel@tonic-gate#include <sys/privregs.h>
367c478bd9Sstevel@tonic-gate#include <sys/fpras_impl.h>
377c478bd9Sstevel@tonic-gate
387c478bd9Sstevel@tonic-gate#include "assym.h"
397c478bd9Sstevel@tonic-gate
407c478bd9Sstevel@tonic-gate/*
417c478bd9Sstevel@tonic-gate * Pseudo-code to aid in understanding the control flow of the
427c478bd9Sstevel@tonic-gate * bcopy/copyin/copyout routines.
437c478bd9Sstevel@tonic-gate *
447c478bd9Sstevel@tonic-gate * On entry:
457c478bd9Sstevel@tonic-gate *
467c478bd9Sstevel@tonic-gate * 	! Determine whether to use the FP register version
477c478bd9Sstevel@tonic-gate * 	! or the leaf routine version depending on size
487c478bd9Sstevel@tonic-gate * 	! of copy and flags.  Set up error handling accordingly.
497c478bd9Sstevel@tonic-gate *	! The transition point depends on whether the src and
507c478bd9Sstevel@tonic-gate * 	! dst addresses can be aligned to long word, word,
517c478bd9Sstevel@tonic-gate * 	! half word, or byte boundaries.
527c478bd9Sstevel@tonic-gate *	!
537c478bd9Sstevel@tonic-gate *	! WARNING: <Register usage convention>
547c478bd9Sstevel@tonic-gate *	! For FP version, %l6 holds previous error handling and
557c478bd9Sstevel@tonic-gate *	! a flag: TRAMP_FLAG (low bits)
567c478bd9Sstevel@tonic-gate *	! for leaf routine version, %o4 holds those values.
577c478bd9Sstevel@tonic-gate *	! So either %l6 or %o4 is reserved and not available for
587c478bd9Sstevel@tonic-gate *	! any other use.
597c478bd9Sstevel@tonic-gate *
607c478bd9Sstevel@tonic-gate * 	if (length <= VIS_COPY_THRESHOLD) 	! start with a quick test
617c478bd9Sstevel@tonic-gate * 		go to small_copy;		! to speed short copies
62*5d9d9091SRichard Lowe *
637c478bd9Sstevel@tonic-gate * 	! src, dst long word alignable
647c478bd9Sstevel@tonic-gate * 		if (hw_copy_limit_8 == 0) 	! hw_copy disabled
657c478bd9Sstevel@tonic-gate * 			go to small_copy;
667c478bd9Sstevel@tonic-gate *		if (length <= hw_copy_limit_8)
677c478bd9Sstevel@tonic-gate * 			go to small_copy;
687c478bd9Sstevel@tonic-gate * 		go to FPBLK_copy;
697c478bd9Sstevel@tonic-gate * 	}
707c478bd9Sstevel@tonic-gate * 	if (src,dst not alignable) {
717c478bd9Sstevel@tonic-gate * 		if (hw_copy_limit_1 == 0) 	! hw_copy disabled
727c478bd9Sstevel@tonic-gate * 			go to small_copy;
737c478bd9Sstevel@tonic-gate *		if (length <= hw_copy_limit_1)
747c478bd9Sstevel@tonic-gate * 			go to small_copy;
757c478bd9Sstevel@tonic-gate * 		go to FPBLK_copy;
767c478bd9Sstevel@tonic-gate * 	}
777c478bd9Sstevel@tonic-gate * 	if (src,dst halfword alignable) {
787c478bd9Sstevel@tonic-gate * 		if (hw_copy_limit_2 == 0) 	! hw_copy disabled
797c478bd9Sstevel@tonic-gate * 			go to small_copy;
807c478bd9Sstevel@tonic-gate *		if (length <= hw_copy_limit_2)
817c478bd9Sstevel@tonic-gate * 			go to small_copy;
827c478bd9Sstevel@tonic-gate * 		go to FPBLK_copy;
837c478bd9Sstevel@tonic-gate * 	}
847c478bd9Sstevel@tonic-gate * 	if (src,dst word alignable) {
857c478bd9Sstevel@tonic-gate * 		if (hw_copy_limit_4 == 0) 	! hw_copy disabled
867c478bd9Sstevel@tonic-gate * 			go to small_copy;
877c478bd9Sstevel@tonic-gate *		if (length <= hw_copy_limit_4)
887c478bd9Sstevel@tonic-gate * 			go to small_copy;
897c478bd9Sstevel@tonic-gate * 		go to FPBLK_copy;
907c478bd9Sstevel@tonic-gate * 	}
917c478bd9Sstevel@tonic-gate *
927c478bd9Sstevel@tonic-gate * small_copy:
937c478bd9Sstevel@tonic-gate *	Setup_leaf_rtn_error_handler; 		! diffs for each entry point
94*5d9d9091SRichard Lowe *
957c478bd9Sstevel@tonic-gate *	if (count <= 3)				! fast path for tiny copies
967c478bd9Sstevel@tonic-gate *		go to sm_left;			! special finish up code
977c478bd9Sstevel@tonic-gate *	else
987c478bd9Sstevel@tonic-gate *		if (count > CHKSIZE)		! medium sized copies
997c478bd9Sstevel@tonic-gate *			go to sm_med		! tuned by alignment
1007c478bd9Sstevel@tonic-gate *		if(src&dst not both word aligned) {
1017c478bd9Sstevel@tonic-gate *	sm_movebytes:
1027c478bd9Sstevel@tonic-gate *			move byte by byte in 4-way unrolled loop
1037c478bd9Sstevel@tonic-gate *			fall into sm_left;
1047c478bd9Sstevel@tonic-gate *	sm_left:
1057c478bd9Sstevel@tonic-gate *			move 0-3 bytes byte at a time as needed.
1067c478bd9Sstevel@tonic-gate *			restore error handler and exit.
1077c478bd9Sstevel@tonic-gate *
1087c478bd9Sstevel@tonic-gate * 		} else {	! src&dst are word aligned
1097c478bd9Sstevel@tonic-gate *			check for at least 8 bytes left,
1107c478bd9Sstevel@tonic-gate *			move word at a time, unrolled by 2
1117c478bd9Sstevel@tonic-gate *			when fewer than 8 bytes left,
1127c478bd9Sstevel@tonic-gate *	sm_half:	move half word at a time while 2 or more bytes left
1137c478bd9Sstevel@tonic-gate *	sm_byte:	move final byte if necessary
1147c478bd9Sstevel@tonic-gate *	sm_exit:
1157c478bd9Sstevel@tonic-gate *			restore error handler and exit.
1167c478bd9Sstevel@tonic-gate *		}
1177c478bd9Sstevel@tonic-gate *
1187c478bd9Sstevel@tonic-gate * ! Medium length cases with at least CHKSIZE bytes available
1197c478bd9Sstevel@tonic-gate * ! method: line up src and dst as best possible, then
1207c478bd9Sstevel@tonic-gate * ! move data in 4-way unrolled loops.
1217c478bd9Sstevel@tonic-gate *
1227c478bd9Sstevel@tonic-gate * sm_med:
1237c478bd9Sstevel@tonic-gate *	if(src&dst unalignable)
1247c478bd9Sstevel@tonic-gate * 		go to sm_movebytes
1257c478bd9Sstevel@tonic-gate *	if(src&dst halfword alignable)
1267c478bd9Sstevel@tonic-gate *		go to sm_movehalf
1277c478bd9Sstevel@tonic-gate *	if(src&dst word alignable)
1287c478bd9Sstevel@tonic-gate *		go to sm_moveword
1297c478bd9Sstevel@tonic-gate * ! fall into long word movement
1307c478bd9Sstevel@tonic-gate *	move bytes until src is word aligned
1317c478bd9Sstevel@tonic-gate *	if not long word aligned, move a word
1327c478bd9Sstevel@tonic-gate *	move long words in 4-way unrolled loop until < 32 bytes left
1337c478bd9Sstevel@tonic-gate *      move long words in 1-way unrolled loop until < 8 bytes left
1347c478bd9Sstevel@tonic-gate *	if zero bytes left, goto sm_exit
1357c478bd9Sstevel@tonic-gate *	if one byte left, go to sm_byte
1367c478bd9Sstevel@tonic-gate *	else go to sm_half
1377c478bd9Sstevel@tonic-gate *
1387c478bd9Sstevel@tonic-gate * sm_moveword:
1397c478bd9Sstevel@tonic-gate *	move bytes until src is word aligned
1407c478bd9Sstevel@tonic-gate *	move words in 4-way unrolled loop until < 16 bytes left
1417c478bd9Sstevel@tonic-gate *      move words in 1-way unrolled loop until < 4 bytes left
1427c478bd9Sstevel@tonic-gate *	if zero bytes left, goto sm_exit
1437c478bd9Sstevel@tonic-gate *	if one byte left, go to sm_byte
1447c478bd9Sstevel@tonic-gate *	else go to sm_half
1457c478bd9Sstevel@tonic-gate *
1467c478bd9Sstevel@tonic-gate * sm_movehalf:
1477c478bd9Sstevel@tonic-gate *	move a byte if needed to align src on halfword
1487c478bd9Sstevel@tonic-gate *	move halfwords in 4-way unrolled loop until < 8 bytes left
1497c478bd9Sstevel@tonic-gate *	if zero bytes left, goto sm_exit
1507c478bd9Sstevel@tonic-gate *	if one byte left, go to sm_byte
1517c478bd9Sstevel@tonic-gate *	else go to sm_half
1527c478bd9Sstevel@tonic-gate *
1537c478bd9Sstevel@tonic-gate *
1547c478bd9Sstevel@tonic-gate * FPBLK_copy:
1557c478bd9Sstevel@tonic-gate * 	%l6 = curthread->t_lofault;
1567c478bd9Sstevel@tonic-gate * 	if (%l6 != NULL) {
1577c478bd9Sstevel@tonic-gate * 		membar #Sync
1587c478bd9Sstevel@tonic-gate * 		curthread->t_lofault = .copyerr;
1597c478bd9Sstevel@tonic-gate * 		caller_error_handler = TRUE             ! %l6 |= 2
1607c478bd9Sstevel@tonic-gate * 	}
1617c478bd9Sstevel@tonic-gate *
1627c478bd9Sstevel@tonic-gate *	! for FPU testing we must not migrate cpus
1637c478bd9Sstevel@tonic-gate * 	if (curthread->t_lwp == NULL) {
1647c478bd9Sstevel@tonic-gate *		! Kernel threads do not have pcb's in which to store
1657c478bd9Sstevel@tonic-gate *		! the floating point state, so disallow preemption during
1667c478bd9Sstevel@tonic-gate *		! the copy.  This also prevents cpu migration.
1677c478bd9Sstevel@tonic-gate * 		kpreempt_disable(curthread);
1687c478bd9Sstevel@tonic-gate *	} else {
1697c478bd9Sstevel@tonic-gate *		thread_nomigrate();
1707c478bd9Sstevel@tonic-gate *	}
1717c478bd9Sstevel@tonic-gate *
1727c478bd9Sstevel@tonic-gate * 	old_fprs = %fprs;
1737c478bd9Sstevel@tonic-gate * 	old_gsr = %gsr;
1747c478bd9Sstevel@tonic-gate * 	if (%fprs.fef) {
1757c478bd9Sstevel@tonic-gate * 		%fprs.fef = 1;
1767c478bd9Sstevel@tonic-gate * 		save current fpregs on stack using blockstore
1777c478bd9Sstevel@tonic-gate * 	} else {
1787c478bd9Sstevel@tonic-gate * 		%fprs.fef = 1;
1797c478bd9Sstevel@tonic-gate * 	}
1807c478bd9Sstevel@tonic-gate *
1817c478bd9Sstevel@tonic-gate *
1827c478bd9Sstevel@tonic-gate * 	do_blockcopy_here;
1837c478bd9Sstevel@tonic-gate *
1847c478bd9Sstevel@tonic-gate * In lofault handler:
1857c478bd9Sstevel@tonic-gate *	curthread->t_lofault = .copyerr2;
1867c478bd9Sstevel@tonic-gate *	Continue on with the normal exit handler
1877c478bd9Sstevel@tonic-gate *
1887c478bd9Sstevel@tonic-gate * On normal exit:
1897c478bd9Sstevel@tonic-gate * 	%gsr = old_gsr;
1907c478bd9Sstevel@tonic-gate * 	if (old_fprs & FPRS_FEF)
1917c478bd9Sstevel@tonic-gate * 		restore fpregs from stack using blockload
1927c478bd9Sstevel@tonic-gate *	else
1937c478bd9Sstevel@tonic-gate *		zero fpregs
1947c478bd9Sstevel@tonic-gate * 	%fprs = old_fprs;
1957c478bd9Sstevel@tonic-gate * 	membar #Sync
1967c478bd9Sstevel@tonic-gate * 	curthread->t_lofault = (%l6 & ~3);
1977c478bd9Sstevel@tonic-gate *	! following test omitted from copyin/copyout as they
1987c478bd9Sstevel@tonic-gate *	! will always have a current thread
1997c478bd9Sstevel@tonic-gate * 	if (curthread->t_lwp == NULL)
2007c478bd9Sstevel@tonic-gate *		kpreempt_enable(curthread);
2017c478bd9Sstevel@tonic-gate *	else
2027c478bd9Sstevel@tonic-gate *		thread_allowmigrate();
2037c478bd9Sstevel@tonic-gate * 	return (0)
2047c478bd9Sstevel@tonic-gate *
2057c478bd9Sstevel@tonic-gate * In second lofault handler (.copyerr2):
2067c478bd9Sstevel@tonic-gate *	We've tried to restore fp state from the stack and failed.  To
2077c478bd9Sstevel@tonic-gate *	prevent from returning with a corrupted fp state, we will panic.
2087c478bd9Sstevel@tonic-gate */
2097c478bd9Sstevel@tonic-gate
2107c478bd9Sstevel@tonic-gate/*
2117c478bd9Sstevel@tonic-gate * Comments about optimization choices
2127c478bd9Sstevel@tonic-gate *
2137c478bd9Sstevel@tonic-gate * The initial optimization decision in this code is to determine
2147c478bd9Sstevel@tonic-gate * whether to use the FP registers for a copy or not.  If we don't
2157c478bd9Sstevel@tonic-gate * use the FP registers, we can execute the copy as a leaf routine,
2167c478bd9Sstevel@tonic-gate * saving a register save and restore.  Also, less elaborate setup
2177c478bd9Sstevel@tonic-gate * is required, allowing short copies to be completed more quickly.
2187c478bd9Sstevel@tonic-gate * For longer copies, especially unaligned ones (where the src and
2197c478bd9Sstevel@tonic-gate * dst do not align to allow simple ldx,stx operation), the FP
2207c478bd9Sstevel@tonic-gate * registers allow much faster copy operations.
2217c478bd9Sstevel@tonic-gate *
2227c478bd9Sstevel@tonic-gate * The estimated extra cost of the FP path will vary depending on
2237c478bd9Sstevel@tonic-gate * src/dst alignment, dst offset from the next 64 byte FPblock store
2247c478bd9Sstevel@tonic-gate * boundary, remaining src data after the last full dst cache line is
2257c478bd9Sstevel@tonic-gate * moved whether the FP registers need to be saved, and some other
2267c478bd9Sstevel@tonic-gate * minor issues.  The average additional overhead is estimated to be
2277c478bd9Sstevel@tonic-gate * 400 clocks.  Since each non-repeated/predicted tst and branch costs
228*5d9d9091SRichard Lowe * around 10 clocks, elaborate calculation would slow down to all
2297c478bd9Sstevel@tonic-gate * longer copies and only benefit a small portion of medium sized
2307c478bd9Sstevel@tonic-gate * copies.  Rather than incur such cost, we chose fixed transition
2317c478bd9Sstevel@tonic-gate * points for each of the alignment choices.
2327c478bd9Sstevel@tonic-gate *
2337c478bd9Sstevel@tonic-gate * For the inner loop, here is a comparison of the per cache line
2347c478bd9Sstevel@tonic-gate * costs for each alignment when src&dst are in cache:
235*5d9d9091SRichard Lowe *
2367c478bd9Sstevel@tonic-gate * byte aligned:  108 clocks slower for non-FPBLK
2377c478bd9Sstevel@tonic-gate * half aligned:   44 clocks slower for non-FPBLK
2387c478bd9Sstevel@tonic-gate * word aligned:   12 clocks slower for non-FPBLK
2397c478bd9Sstevel@tonic-gate * long aligned:    4 clocks >>faster<< for non-FPBLK
2407c478bd9Sstevel@tonic-gate *
2417c478bd9Sstevel@tonic-gate * The long aligned loop runs faster because it does no prefetching.
2427c478bd9Sstevel@tonic-gate * That wins if the data is not in cache or there is too little
2437c478bd9Sstevel@tonic-gate * data to gain much benefit from prefetching.  But when there
2447c478bd9Sstevel@tonic-gate * is more data and that data is not in cache, failing to prefetch
2457c478bd9Sstevel@tonic-gate * can run much slower.  In addition, there is a 2 Kbyte store queue
2467c478bd9Sstevel@tonic-gate * which will cause the non-FPBLK inner loop to slow for larger copies.
2477c478bd9Sstevel@tonic-gate * The exact tradeoff is strongly load and application dependent, with
2487c478bd9Sstevel@tonic-gate * increasing risk of a customer visible performance regression if the
2497c478bd9Sstevel@tonic-gate * non-FPBLK code is used for larger copies. Studies of synthetic in-cache
2507c478bd9Sstevel@tonic-gate * vs out-of-cache copy tests in user space suggest 1024 bytes as a safe
2517c478bd9Sstevel@tonic-gate * upper limit for the non-FPBLK code.  To minimize performance regression
252*5d9d9091SRichard Lowe * risk while still gaining the primary benefits of the improvements to
2537c478bd9Sstevel@tonic-gate * the non-FPBLK code, we set an upper bound of 1024 bytes for the various
254*5d9d9091SRichard Lowe * hw_copy_limit_*.  Later experimental studies using different values
255*5d9d9091SRichard Lowe * of hw_copy_limit_* can be used to make further adjustments if
2567c478bd9Sstevel@tonic-gate * appropriate.
2577c478bd9Sstevel@tonic-gate *
2587c478bd9Sstevel@tonic-gate * hw_copy_limit_1 = src and dst are byte aligned but not halfword aligned
2597c478bd9Sstevel@tonic-gate * hw_copy_limit_2 = src and dst are halfword aligned but not word aligned
2607c478bd9Sstevel@tonic-gate * hw_copy_limit_4 = src and dst are word aligned but not longword aligned
2617c478bd9Sstevel@tonic-gate * hw_copy_limit_8 = src and dst are longword aligned
2627c478bd9Sstevel@tonic-gate *
2637c478bd9Sstevel@tonic-gate * To say that src and dst are word aligned means that after
2647c478bd9Sstevel@tonic-gate * some initial alignment activity of moving 0 to 3 bytes,
2657c478bd9Sstevel@tonic-gate * both the src and dst will be on word boundaries so that
2667c478bd9Sstevel@tonic-gate * word loads and stores may be used.
2677c478bd9Sstevel@tonic-gate *
2687c478bd9Sstevel@tonic-gate * Recommended initial values as of Mar 2004, includes testing
2697c478bd9Sstevel@tonic-gate * on Cheetah+ (900MHz), Cheetah++ (1200MHz), and Jaguar(1050MHz):
2707c478bd9Sstevel@tonic-gate * hw_copy_limit_1 =  256
2717c478bd9Sstevel@tonic-gate * hw_copy_limit_2 =  512
2727c478bd9Sstevel@tonic-gate * hw_copy_limit_4 = 1024
2737c478bd9Sstevel@tonic-gate * hw_copy_limit_8 = 1024 (or 1536 on some systems)
2747c478bd9Sstevel@tonic-gate *
2757c478bd9Sstevel@tonic-gate *
2767c478bd9Sstevel@tonic-gate * If hw_copy_limit_? is set to zero, then use of FPBLK copy is
2777c478bd9Sstevel@tonic-gate * disabled for that alignment choice.
2787c478bd9Sstevel@tonic-gate * If hw_copy_limit_? is set to a value between 1 and VIS_COPY_THRESHOLD (256)
2797c478bd9Sstevel@tonic-gate * the value of VIS_COPY_THRESHOLD is used.
2807c478bd9Sstevel@tonic-gate * It is not envisioned that hw_copy_limit_? will be changed in the field
2817c478bd9Sstevel@tonic-gate * It is provided to allow for disabling FPBLK copies and to allow
2827c478bd9Sstevel@tonic-gate * easy testing of alternate values on future HW implementations
2837c478bd9Sstevel@tonic-gate * that might have different cache sizes, clock rates or instruction
2847c478bd9Sstevel@tonic-gate * timing rules.
2857c478bd9Sstevel@tonic-gate *
2867c478bd9Sstevel@tonic-gate * Our first test for FPBLK copies vs non-FPBLK copies checks a minimum
2877c478bd9Sstevel@tonic-gate * threshold to speedup all shorter copies (less than 256).  That
2887c478bd9Sstevel@tonic-gate * saves an alignment test, memory reference, and enabling test
2897c478bd9Sstevel@tonic-gate * for all short copies, or an estimated 24 clocks.
2907c478bd9Sstevel@tonic-gate *
2917c478bd9Sstevel@tonic-gate * The order in which these limits are checked does matter since each
2927c478bd9Sstevel@tonic-gate * non-predicted tst and branch costs around 10 clocks.
2937c478bd9Sstevel@tonic-gate * If src and dst are randomly selected addresses,
2947c478bd9Sstevel@tonic-gate * 4 of 8 will not be alignable.
2957c478bd9Sstevel@tonic-gate * 2 of 8 will be half word alignable.
2967c478bd9Sstevel@tonic-gate * 1 of 8 will be word alignable.
2977c478bd9Sstevel@tonic-gate * 1 of 8 will be long word alignable.
2987c478bd9Sstevel@tonic-gate * But, tests on running kernels show that src and dst to copy code
2997c478bd9Sstevel@tonic-gate * are typically not on random alignments.  Structure copies and
3007c478bd9Sstevel@tonic-gate * copies of larger data sizes are often on long word boundaries.
3017c478bd9Sstevel@tonic-gate * So we test the long word alignment case first, then
3027c478bd9Sstevel@tonic-gate * the byte alignment, then halfword, then word alignment.
3037c478bd9Sstevel@tonic-gate *
3047c478bd9Sstevel@tonic-gate * Several times, tests for length are made to split the code
3057c478bd9Sstevel@tonic-gate * into subcases.  These tests often allow later tests to be
306*5d9d9091SRichard Lowe * avoided.  For example, within the non-FPBLK copy, we first
3077c478bd9Sstevel@tonic-gate * check for tiny copies of 3 bytes or less.  That allows us
3087c478bd9Sstevel@tonic-gate * to use a 4-way unrolled loop for the general byte copy case
3097c478bd9Sstevel@tonic-gate * without a test on loop entry.
3107c478bd9Sstevel@tonic-gate * We subdivide the non-FPBLK case further into CHKSIZE bytes and less
3117c478bd9Sstevel@tonic-gate * vs longer cases.  For the really short case, we don't attempt
3127c478bd9Sstevel@tonic-gate * align src and dst.  We try to minimize special case tests in
3137c478bd9Sstevel@tonic-gate * the shortest loops as each test adds a significant percentage
3147c478bd9Sstevel@tonic-gate * to the total time.
3157c478bd9Sstevel@tonic-gate *
3167c478bd9Sstevel@tonic-gate * For the medium sized cases, we allow ourselves to adjust the
3177c478bd9Sstevel@tonic-gate * src and dst alignment and provide special cases for each of
3187c478bd9Sstevel@tonic-gate * the four adjusted alignment cases. The CHKSIZE that was used
3197c478bd9Sstevel@tonic-gate * to decide between short and medium size was chosen to be 39
3207c478bd9Sstevel@tonic-gate * as that allows for the worst case of 7 bytes of alignment
3217c478bd9Sstevel@tonic-gate * shift and 4 times 8 bytes for the first long word unrolling.
3227c478bd9Sstevel@tonic-gate * That knowledge saves an initial test for length on entry into
3237c478bd9Sstevel@tonic-gate * the medium cases.  If the general loop unrolling factor were
3247c478bd9Sstevel@tonic-gate * to be increases, this number would also need to be adjusted.
3257c478bd9Sstevel@tonic-gate *
3267c478bd9Sstevel@tonic-gate * For all cases in the non-FPBLK code where it is known that at
3277c478bd9Sstevel@tonic-gate * least 4 chunks of data are available for movement, the
3287c478bd9Sstevel@tonic-gate * loop is unrolled by four.  This 4-way loop runs in 8 clocks
3297c478bd9Sstevel@tonic-gate * or 2 clocks per data element.  Due to limitations of the
3307c478bd9Sstevel@tonic-gate * branch instruction on Cheetah, Jaguar, and Panther, the
3317c478bd9Sstevel@tonic-gate * minimum time for a small, tight loop is 3 clocks.  So
3327c478bd9Sstevel@tonic-gate * the 4-way loop runs 50% faster than the fastest non-unrolled
3337c478bd9Sstevel@tonic-gate * loop.
3347c478bd9Sstevel@tonic-gate *
3357c478bd9Sstevel@tonic-gate * Instruction alignment is forced by used of .align 16 directives
3367c478bd9Sstevel@tonic-gate * and nops which are not executed in the code.  This
3377c478bd9Sstevel@tonic-gate * combination of operations shifts the alignment of following
3387c478bd9Sstevel@tonic-gate * loops to insure that loops are aligned so that their instructions
339*5d9d9091SRichard Lowe * fall within the minimum number of 4 instruction fetch groups.
340*5d9d9091SRichard Lowe * If instructions are inserted or removed between the .align
3417c478bd9Sstevel@tonic-gate * instruction and the unrolled loops, then the alignment needs
3427c478bd9Sstevel@tonic-gate * to be readjusted.  Misaligned loops can add a clock per loop
3437c478bd9Sstevel@tonic-gate * iteration to the loop timing.
3447c478bd9Sstevel@tonic-gate *
3457c478bd9Sstevel@tonic-gate * In a few cases, code is duplicated to avoid a branch.  Since
3467c478bd9Sstevel@tonic-gate * a non-predicted tst and branch takes 10 clocks, this savings
3477c478bd9Sstevel@tonic-gate * is judged an appropriate time-space tradeoff.
3487c478bd9Sstevel@tonic-gate *
3497c478bd9Sstevel@tonic-gate * Within the FPBLK-code, the prefetch method in the inner
350*5d9d9091SRichard Lowe * loop needs to be explained as it is not standard.  Two
3517c478bd9Sstevel@tonic-gate * prefetches are issued for each cache line instead of one.
3527c478bd9Sstevel@tonic-gate * The primary one is at the maximum reach of 8 cache lines.
3537c478bd9Sstevel@tonic-gate * Most of the time, that maximum prefetch reach gives the
3547c478bd9Sstevel@tonic-gate * cache line more time to reach the processor for systems with
3557c478bd9Sstevel@tonic-gate * higher processor clocks.  But, sometimes memory interference
3567c478bd9Sstevel@tonic-gate * can cause that prefetch to be dropped.  Putting a second
3577c478bd9Sstevel@tonic-gate * prefetch at a reach of 5 cache lines catches the drops
3587c478bd9Sstevel@tonic-gate * three iterations later and shows a measured improvement
3597c478bd9Sstevel@tonic-gate * in performance over any similar loop with a single prefetch.
360*5d9d9091SRichard Lowe * The prefetches are placed in the loop so they overlap with
361*5d9d9091SRichard Lowe * non-memory instructions, so that there is no extra cost
3627c478bd9Sstevel@tonic-gate * when the data is already in-cache.
3637c478bd9Sstevel@tonic-gate *
3647c478bd9Sstevel@tonic-gate */
3657c478bd9Sstevel@tonic-gate
3667c478bd9Sstevel@tonic-gate/*
3677c478bd9Sstevel@tonic-gate * Notes on preserving existing fp state and on membars.
3687c478bd9Sstevel@tonic-gate *
3697c478bd9Sstevel@tonic-gate * When a copyOP decides to use fp we may have to preserve existing
3707c478bd9Sstevel@tonic-gate * floating point state.  It is not the caller's state that we need to
3717c478bd9Sstevel@tonic-gate * preserve - the rest of the kernel does not use fp and, anyway, fp
3727c478bd9Sstevel@tonic-gate * registers are volatile across a call.  Some examples:
3737c478bd9Sstevel@tonic-gate *
374*5d9d9091SRichard Lowe *	- userland has fp state and is interrupted (device interrupt
3757c478bd9Sstevel@tonic-gate *	  or trap) and within the interrupt/trap handling we use
3767c478bd9Sstevel@tonic-gate *	  bcopy()
3777c478bd9Sstevel@tonic-gate *	- another (higher level) interrupt or trap handler uses bcopy
3787c478bd9Sstevel@tonic-gate *	  while a bcopy from an earlier interrupt is still active
3797c478bd9Sstevel@tonic-gate *	- an asynchronous error trap occurs while fp state exists (in
3807c478bd9Sstevel@tonic-gate *	  userland or in kernel copy) and the tl0 component of the handling
3817c478bd9Sstevel@tonic-gate *	  uses bcopy
3827c478bd9Sstevel@tonic-gate *	- a user process with fp state incurs a copy-on-write fault and
3837c478bd9Sstevel@tonic-gate *	  hwblkpagecopy always uses fp
3847c478bd9Sstevel@tonic-gate *
3857c478bd9Sstevel@tonic-gate * We therefore need a per-call place in which to preserve fp state -
3867c478bd9Sstevel@tonic-gate * using our stack is ideal (and since fp copy cannot be leaf optimized
3877c478bd9Sstevel@tonic-gate * because of calls it makes, this is no hardship).
3887c478bd9Sstevel@tonic-gate *
3897c478bd9Sstevel@tonic-gate * The following membar BLD/BST discussion is Cheetah pipeline specific.
3907c478bd9Sstevel@tonic-gate * In Cheetah BLD is blocking, #LoadLoad/#LoadStore/#StoreStore are
3917c478bd9Sstevel@tonic-gate * nops (those semantics always apply) and #StoreLoad is implemented
3927c478bd9Sstevel@tonic-gate * as a membar #Sync.
3937c478bd9Sstevel@tonic-gate *
3947c478bd9Sstevel@tonic-gate * It is possible that the owner of the fp state has a block load or
3957c478bd9Sstevel@tonic-gate * block store still "in flight" at the time we come to preserve that
3967c478bd9Sstevel@tonic-gate * state.  Block loads are blocking in Cheetah pipelines so we do not
3977c478bd9Sstevel@tonic-gate * need to sync with them.  In preserving fp regs we will use block stores
3987c478bd9Sstevel@tonic-gate * (which are not blocking in Cheetah pipelines) so we require a membar #Sync
3997c478bd9Sstevel@tonic-gate * after storing state (so that our subsequent use of those registers
4007c478bd9Sstevel@tonic-gate * does not modify them before the block stores complete);  this membar
4017c478bd9Sstevel@tonic-gate * also serves to sync with block stores the owner of the fp state has
4027c478bd9Sstevel@tonic-gate * initiated.
4037c478bd9Sstevel@tonic-gate *
4047c478bd9Sstevel@tonic-gate * When we have finished fp copy (with it's repeated block stores)
4057c478bd9Sstevel@tonic-gate * we must membar #Sync so that our block stores may complete before
4067c478bd9Sstevel@tonic-gate * we either restore the original fp state into the fp registers or
4077c478bd9Sstevel@tonic-gate * return to a caller which may initiate other fp operations that could
4087c478bd9Sstevel@tonic-gate * modify the fp regs we used before the block stores complete.
4097c478bd9Sstevel@tonic-gate *
4107c478bd9Sstevel@tonic-gate * Synchronous faults (eg, unresolvable DMMU miss) that occur while
4117c478bd9Sstevel@tonic-gate * t_lofault is not NULL will not panic but will instead trampoline
4127c478bd9Sstevel@tonic-gate * to the registered lofault handler.  There is no need for any
4137c478bd9Sstevel@tonic-gate * membars for these - eg, our store to t_lofault will always be visible to
4147c478bd9Sstevel@tonic-gate * ourselves and it is our cpu which will take any trap.
4157c478bd9Sstevel@tonic-gate *
4167c478bd9Sstevel@tonic-gate * Asynchronous faults (eg, uncorrectable ECC error from memory) that occur
4177c478bd9Sstevel@tonic-gate * while t_lofault is not NULL will also not panic.  Since we're copying
4187c478bd9Sstevel@tonic-gate * to or from userland the extent of the damage is known - the destination
4197c478bd9Sstevel@tonic-gate * buffer is incomplete.  So trap handlers will trampoline to the lofault
4207c478bd9Sstevel@tonic-gate * handler in this case which should take some form of error action to
4217c478bd9Sstevel@tonic-gate * avoid using the incomplete buffer.  The trap handler also flags the
4227c478bd9Sstevel@tonic-gate * fault so that later return-from-trap handling (for the trap that brought
4237c478bd9Sstevel@tonic-gate * this thread into the kernel in the first place) can notify the process
4247c478bd9Sstevel@tonic-gate * and reboot the system (or restart the service with Greenline/Contracts).
4257c478bd9Sstevel@tonic-gate *
4267c478bd9Sstevel@tonic-gate * Asynchronous faults (eg, uncorrectable ECC error from memory) can
4277c478bd9Sstevel@tonic-gate * result in deferred error traps - the trap is taken sometime after
4287c478bd9Sstevel@tonic-gate * the event and the trap PC may not be the PC of the faulting access.
4297c478bd9Sstevel@tonic-gate * Delivery of such pending traps can be forced by a membar #Sync, acting
4307c478bd9Sstevel@tonic-gate * as an "error barrier" in this role.  To accurately apply the user/kernel
4317c478bd9Sstevel@tonic-gate * separation described in the preceding paragraph we must force delivery
4327c478bd9Sstevel@tonic-gate * of deferred traps affecting kernel state before we install a lofault
4337c478bd9Sstevel@tonic-gate * handler (if we interpose a new lofault handler on an existing one there
4347c478bd9Sstevel@tonic-gate * is no need to repeat this), and we must force delivery of deferred
4357c478bd9Sstevel@tonic-gate * errors affecting the lofault-protected region before we clear t_lofault.
4367c478bd9Sstevel@tonic-gate * Failure to do so results in lost kernel state being interpreted as
4377c478bd9Sstevel@tonic-gate * affecting a copyin/copyout only, or of an error that really only
4387c478bd9Sstevel@tonic-gate * affects copy data being interpreted as losing kernel state.
4397c478bd9Sstevel@tonic-gate *
4407c478bd9Sstevel@tonic-gate * Since the copy operations may preserve and later restore floating
4417c478bd9Sstevel@tonic-gate * point state that does not belong to the caller (see examples above),
4427c478bd9Sstevel@tonic-gate * we must be careful in how we do this in order to prevent corruption
4437c478bd9Sstevel@tonic-gate * of another program.
4447c478bd9Sstevel@tonic-gate *
4457c478bd9Sstevel@tonic-gate * To make sure that floating point state is always saved and restored
4467c478bd9Sstevel@tonic-gate * correctly, the following "big rules" must be followed when the floating
4477c478bd9Sstevel@tonic-gate * point registers will be used:
4487c478bd9Sstevel@tonic-gate *
4497c478bd9Sstevel@tonic-gate * 1. %l6 always holds the caller's lofault handler.  Also in this register,
4507c478bd9Sstevel@tonic-gate *    Bit 1 (FPUSED_FLAG) indicates that the floating point registers are in
4517c478bd9Sstevel@tonic-gate *    use.  Bit 2 (TRAMP_FLAG) indicates that the call was to bcopy, and a
4527c478bd9Sstevel@tonic-gate *    lofault handler was set coming in.
4537c478bd9Sstevel@tonic-gate *
4547c478bd9Sstevel@tonic-gate * 2. The FPUSED flag indicates that all FP state has been successfully stored
4557c478bd9Sstevel@tonic-gate *    on the stack.  It should not be set until this save has been completed.
4567c478bd9Sstevel@tonic-gate *
4577c478bd9Sstevel@tonic-gate * 3. The FPUSED flag should not be cleared on exit until all FP state has
4587c478bd9Sstevel@tonic-gate *    been restored from the stack.  If an error occurs while restoring
4597c478bd9Sstevel@tonic-gate *    data from the stack, the error handler can check this flag to see if
4607c478bd9Sstevel@tonic-gate *    a restore is necessary.
4617c478bd9Sstevel@tonic-gate *
4627c478bd9Sstevel@tonic-gate * 4. Code run under the new lofault handler must be kept to a minimum.  In
4637c478bd9Sstevel@tonic-gate *    particular, any calls to FP_ALLOWMIGRATE, which could result in a call
4647c478bd9Sstevel@tonic-gate *    to kpreempt(), should not be made until after the lofault handler has
4657c478bd9Sstevel@tonic-gate *    been restored.
4667c478bd9Sstevel@tonic-gate */
4677c478bd9Sstevel@tonic-gate
4687c478bd9Sstevel@tonic-gate/*
4697c478bd9Sstevel@tonic-gate * VIS_COPY_THRESHOLD indicates the minimum number of bytes needed
4707c478bd9Sstevel@tonic-gate * to "break even" using FP/VIS-accelerated memory operations.
4717c478bd9Sstevel@tonic-gate * The FPBLK code assumes a minimum number of bytes are available
472*5d9d9091SRichard Lowe * to be moved on entry.  Check that code carefully before
4737c478bd9Sstevel@tonic-gate * reducing VIS_COPY_THRESHOLD below 256.
4747c478bd9Sstevel@tonic-gate */
4757c478bd9Sstevel@tonic-gate/*
4767c478bd9Sstevel@tonic-gate * This shadows sys/machsystm.h which can't be included due to the lack of
4777c478bd9Sstevel@tonic-gate * _ASM guards in include files it references. Change it here, change it there.
4787c478bd9Sstevel@tonic-gate */
4797c478bd9Sstevel@tonic-gate#define VIS_COPY_THRESHOLD 256
4807c478bd9Sstevel@tonic-gate
4817c478bd9Sstevel@tonic-gate/*
4827c478bd9Sstevel@tonic-gate * TEST for very short copies
4837c478bd9Sstevel@tonic-gate * Be aware that the maximum unroll for the short unaligned case
4847c478bd9Sstevel@tonic-gate * is SHORTCOPY+1
4857c478bd9Sstevel@tonic-gate */
4867c478bd9Sstevel@tonic-gate#define SHORTCOPY 3
4877c478bd9Sstevel@tonic-gate#define CHKSIZE  39
4887c478bd9Sstevel@tonic-gate
4897c478bd9Sstevel@tonic-gate/*
4907c478bd9Sstevel@tonic-gate * Indicates that we're to trampoline to the error handler.
4917c478bd9Sstevel@tonic-gate * Entry points bcopy, copyin_noerr, and copyout_noerr use this flag.
4927c478bd9Sstevel@tonic-gate * kcopy, copyout, xcopyout, copyin, and xcopyin do not set this flag.
4937c478bd9Sstevel@tonic-gate */
4947c478bd9Sstevel@tonic-gate#define	FPUSED_FLAG	1
4957c478bd9Sstevel@tonic-gate#define	TRAMP_FLAG	2
4967c478bd9Sstevel@tonic-gate#define	MASK_FLAGS	3
4977c478bd9Sstevel@tonic-gate
4987c478bd9Sstevel@tonic-gate/*
4997c478bd9Sstevel@tonic-gate * Number of outstanding prefetches.
5007c478bd9Sstevel@tonic-gate * Testing with 1200 MHz Cheetah+ and Jaguar gives best results with
5017c478bd9Sstevel@tonic-gate * two prefetches, one with a reach of 8*BLOCK_SIZE+8 and one with a
5027c478bd9Sstevel@tonic-gate * reach of 5*BLOCK_SIZE.  The double prefetch gives an typical improvement
5037c478bd9Sstevel@tonic-gate * of 5% for large copies as compared to a single prefetch.  The reason
5047c478bd9Sstevel@tonic-gate * for the improvement is that with Cheetah and Jaguar, some prefetches
5057c478bd9Sstevel@tonic-gate * are dropped due to the prefetch queue being full.  The second prefetch
506*5d9d9091SRichard Lowe * reduces the number of cache lines that are dropped.
5077c478bd9Sstevel@tonic-gate * Do not remove the double prefetch or change either CHEETAH_PREFETCH
5087c478bd9Sstevel@tonic-gate * or CHEETAH_2ND_PREFETCH without extensive performance tests to prove
5097c478bd9Sstevel@tonic-gate * there is no loss of performance.
5107c478bd9Sstevel@tonic-gate */
5117c478bd9Sstevel@tonic-gate#define	CHEETAH_PREFETCH	8
5127c478bd9Sstevel@tonic-gate#define	CHEETAH_2ND_PREFETCH	5
5137c478bd9Sstevel@tonic-gate
5147c478bd9Sstevel@tonic-gate#define	VIS_BLOCKSIZE		64
5157c478bd9Sstevel@tonic-gate
5167c478bd9Sstevel@tonic-gate/*
5177c478bd9Sstevel@tonic-gate * Size of stack frame in order to accomodate a 64-byte aligned
5187c478bd9Sstevel@tonic-gate * floating-point register save area and 2 64-bit temp locations.
5197c478bd9Sstevel@tonic-gate * All copy functions use two quadrants of fp registers; to assure a
5207c478bd9Sstevel@tonic-gate * block-aligned two block buffer in which to save we must reserve
5217c478bd9Sstevel@tonic-gate * three blocks on stack.  Not all functions preserve %pfrs on stack
5227c478bd9Sstevel@tonic-gate * or need to preserve %gsr but we use HWCOPYFRAMESIZE for all.
5237c478bd9Sstevel@tonic-gate *
5247c478bd9Sstevel@tonic-gate *    _______________________________________ <-- %fp + STACK_BIAS
5257c478bd9Sstevel@tonic-gate *    | We may need to preserve 2 quadrants |
5267c478bd9Sstevel@tonic-gate *    | of fp regs, but since we do so with |
5277c478bd9Sstevel@tonic-gate *    | BST/BLD we need room in which to    |
5287c478bd9Sstevel@tonic-gate *    | align to VIS_BLOCKSIZE bytes.  So   |
5297c478bd9Sstevel@tonic-gate *    | this area is 3 * VIS_BLOCKSIZE.     | <--  - SAVED_FPREGS_OFFSET
5307c478bd9Sstevel@tonic-gate *    |-------------------------------------|
5317c478bd9Sstevel@tonic-gate *    | 8 bytes to save %fprs               | <--  - SAVED_FPRS_OFFSET
5327c478bd9Sstevel@tonic-gate *    |-------------------------------------|
5337c478bd9Sstevel@tonic-gate *    | 8 bytes to save %gsr                | <--  - SAVED_GSR_OFFSET
5347c478bd9Sstevel@tonic-gate *    ---------------------------------------
5357c478bd9Sstevel@tonic-gate */
5367c478bd9Sstevel@tonic-gate#define	HWCOPYFRAMESIZE		((VIS_BLOCKSIZE * (2 + 1)) + (2 * 8))
5377c478bd9Sstevel@tonic-gate#define SAVED_FPREGS_OFFSET	(VIS_BLOCKSIZE * 3)
5387c478bd9Sstevel@tonic-gate#define SAVED_FPREGS_ADJUST	((VIS_BLOCKSIZE * 2) - 1)
5397c478bd9Sstevel@tonic-gate#define	SAVED_FPRS_OFFSET	(SAVED_FPREGS_OFFSET + 8)
5407c478bd9Sstevel@tonic-gate#define	SAVED_GSR_OFFSET	(SAVED_FPRS_OFFSET + 8)
5417c478bd9Sstevel@tonic-gate
5427c478bd9Sstevel@tonic-gate/*
5437c478bd9Sstevel@tonic-gate * Common macros used by the various versions of the block copy
5447c478bd9Sstevel@tonic-gate * routines in this file.
5457c478bd9Sstevel@tonic-gate */
5467c478bd9Sstevel@tonic-gate
5477c478bd9Sstevel@tonic-gate/*
5487c478bd9Sstevel@tonic-gate * In FP copies if we do not have preserved data to restore over
5497c478bd9Sstevel@tonic-gate * the fp regs we used then we must zero those regs to avoid
5507c478bd9Sstevel@tonic-gate * exposing portions of the data to later threads (data security).
5517c478bd9Sstevel@tonic-gate *
5527c478bd9Sstevel@tonic-gate * Copy functions use either quadrants 1 and 3 or 2 and 4.
5537c478bd9Sstevel@tonic-gate *
5547c478bd9Sstevel@tonic-gate * FZEROQ1Q3: Zero quadrants 1 and 3, ie %f0 - %f15 and %f32 - %f47
5557c478bd9Sstevel@tonic-gate * FZEROQ2Q4: Zero quadrants 2 and 4, ie %f16 - %f31 and %f48 - %f63
5567c478bd9Sstevel@tonic-gate *
5577c478bd9Sstevel@tonic-gate * The instructions below are quicker than repeated fzero instructions
5587c478bd9Sstevel@tonic-gate * since they can dispatch down two fp pipelines.
5597c478bd9Sstevel@tonic-gate */
5607c478bd9Sstevel@tonic-gate#define	FZEROQ1Q3			\
5617c478bd9Sstevel@tonic-gate	fzero	%f0			;\
5627c478bd9Sstevel@tonic-gate	fzero	%f2			;\
5637c478bd9Sstevel@tonic-gate	faddd	%f0, %f2, %f4		;\
5647c478bd9Sstevel@tonic-gate	fmuld	%f0, %f2, %f6		;\
5657c478bd9Sstevel@tonic-gate	faddd	%f0, %f2, %f8		;\
5667c478bd9Sstevel@tonic-gate	fmuld	%f0, %f2, %f10		;\
5677c478bd9Sstevel@tonic-gate	faddd	%f0, %f2, %f12		;\
5687c478bd9Sstevel@tonic-gate	fmuld	%f0, %f2, %f14		;\
5697c478bd9Sstevel@tonic-gate	faddd	%f0, %f2, %f32		;\
5707c478bd9Sstevel@tonic-gate	fmuld	%f0, %f2, %f34		;\
5717c478bd9Sstevel@tonic-gate	faddd	%f0, %f2, %f36		;\
5727c478bd9Sstevel@tonic-gate	fmuld	%f0, %f2, %f38		;\
5737c478bd9Sstevel@tonic-gate	faddd	%f0, %f2, %f40		;\
5747c478bd9Sstevel@tonic-gate	fmuld	%f0, %f2, %f42		;\
5757c478bd9Sstevel@tonic-gate	faddd	%f0, %f2, %f44		;\
5767c478bd9Sstevel@tonic-gate	fmuld	%f0, %f2, %f46
5777c478bd9Sstevel@tonic-gate
5787c478bd9Sstevel@tonic-gate#define	FZEROQ2Q4			\
5797c478bd9Sstevel@tonic-gate	fzero	%f16			;\
5807c478bd9Sstevel@tonic-gate	fzero	%f18			;\
5817c478bd9Sstevel@tonic-gate	faddd	%f16, %f18, %f20	;\
5827c478bd9Sstevel@tonic-gate	fmuld	%f16, %f18, %f22	;\
5837c478bd9Sstevel@tonic-gate	faddd	%f16, %f18, %f24	;\
5847c478bd9Sstevel@tonic-gate	fmuld	%f16, %f18, %f26	;\
5857c478bd9Sstevel@tonic-gate	faddd	%f16, %f18, %f28	;\
5867c478bd9Sstevel@tonic-gate	fmuld	%f16, %f18, %f30	;\
5877c478bd9Sstevel@tonic-gate	faddd	%f16, %f18, %f48	;\
5887c478bd9Sstevel@tonic-gate	fmuld	%f16, %f18, %f50	;\
5897c478bd9Sstevel@tonic-gate	faddd	%f16, %f18, %f52	;\
5907c478bd9Sstevel@tonic-gate	fmuld	%f16, %f18, %f54	;\
5917c478bd9Sstevel@tonic-gate	faddd	%f16, %f18, %f56	;\
5927c478bd9Sstevel@tonic-gate	fmuld	%f16, %f18, %f58	;\
5937c478bd9Sstevel@tonic-gate	faddd	%f16, %f18, %f60	;\
5947c478bd9Sstevel@tonic-gate	fmuld	%f16, %f18, %f62
5957c478bd9Sstevel@tonic-gate
5967c478bd9Sstevel@tonic-gate/*
5977c478bd9Sstevel@tonic-gate * Macros to save and restore quadrants 1 and 3 or 2 and 4 to/from the stack.
5987c478bd9Sstevel@tonic-gate * Used to save and restore in-use fp registers when we want to use FP
5997c478bd9Sstevel@tonic-gate * and find fp already in use and copy size still large enough to justify
6007c478bd9Sstevel@tonic-gate * the additional overhead of this save and restore.
6017c478bd9Sstevel@tonic-gate *
6027c478bd9Sstevel@tonic-gate * A membar #Sync is needed before save to sync fp ops initiated before
6037c478bd9Sstevel@tonic-gate * the call to the copy function (by whoever has fp in use); for example
6047c478bd9Sstevel@tonic-gate * an earlier block load to the quadrant we are about to save may still be
6057c478bd9Sstevel@tonic-gate * "in flight".  A membar #Sync is required at the end of the save to
6067c478bd9Sstevel@tonic-gate * sync our block store (the copy code is about to begin ldd's to the
6077c478bd9Sstevel@tonic-gate * first quadrant).  Note, however, that since Cheetah pipeline block load
6087c478bd9Sstevel@tonic-gate * is blocking we can omit the initial membar before saving fp state (they're
6097c478bd9Sstevel@tonic-gate * commented below in case of future porting to a chip that does not block
6107c478bd9Sstevel@tonic-gate * on block load).
6117c478bd9Sstevel@tonic-gate *
6127c478bd9Sstevel@tonic-gate * Similarly: a membar #Sync before restore allows the block stores of
6137c478bd9Sstevel@tonic-gate * the copy operation to complete before we fill the quadrants with their
6147c478bd9Sstevel@tonic-gate * original data, and a membar #Sync after restore lets the block loads
6157c478bd9Sstevel@tonic-gate * of the restore complete before we return to whoever has the fp regs
6167c478bd9Sstevel@tonic-gate * in use.  To avoid repeated membar #Sync we make it the responsibility
6177c478bd9Sstevel@tonic-gate * of the copy code to membar #Sync immediately after copy is complete
6187c478bd9Sstevel@tonic-gate * and before using the BLD_*_FROMSTACK macro.
6197c478bd9Sstevel@tonic-gate */
6207c478bd9Sstevel@tonic-gate#define BST_FPQ1Q3_TOSTACK(tmp1)				\
6217c478bd9Sstevel@tonic-gate	/* membar #Sync	*/					;\
6227c478bd9Sstevel@tonic-gate	add	%fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1	;\
6237c478bd9Sstevel@tonic-gate	and	tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */	;\
6247c478bd9Sstevel@tonic-gate	stda	%f0, [tmp1]ASI_BLK_P				;\
6257c478bd9Sstevel@tonic-gate	add	tmp1, VIS_BLOCKSIZE, tmp1			;\
6267c478bd9Sstevel@tonic-gate	stda	%f32, [tmp1]ASI_BLK_P				;\
6277c478bd9Sstevel@tonic-gate	membar	#Sync
6287c478bd9Sstevel@tonic-gate
6297c478bd9Sstevel@tonic-gate#define	BLD_FPQ1Q3_FROMSTACK(tmp1)				\
6307c478bd9Sstevel@tonic-gate	/* membar #Sync - provided at copy completion */	;\
6317c478bd9Sstevel@tonic-gate	add	%fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1	;\
6327c478bd9Sstevel@tonic-gate	and	tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */	;\
6337c478bd9Sstevel@tonic-gate	ldda	[tmp1]ASI_BLK_P, %f0				;\
6347c478bd9Sstevel@tonic-gate	add	tmp1, VIS_BLOCKSIZE, tmp1			;\
6357c478bd9Sstevel@tonic-gate	ldda	[tmp1]ASI_BLK_P, %f32				;\
6367c478bd9Sstevel@tonic-gate	membar	#Sync
6377c478bd9Sstevel@tonic-gate
6387c478bd9Sstevel@tonic-gate#define BST_FPQ2Q4_TOSTACK(tmp1)				\
6397c478bd9Sstevel@tonic-gate	/* membar #Sync */					;\
6407c478bd9Sstevel@tonic-gate	add	%fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1	;\
6417c478bd9Sstevel@tonic-gate	and	tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */	;\
6427c478bd9Sstevel@tonic-gate	stda	%f16, [tmp1]ASI_BLK_P				;\
6437c478bd9Sstevel@tonic-gate	add	tmp1, VIS_BLOCKSIZE, tmp1			;\
6447c478bd9Sstevel@tonic-gate	stda	%f48, [tmp1]ASI_BLK_P				;\
6457c478bd9Sstevel@tonic-gate	membar	#Sync
6467c478bd9Sstevel@tonic-gate
6477c478bd9Sstevel@tonic-gate#define	BLD_FPQ2Q4_FROMSTACK(tmp1)				\
6487c478bd9Sstevel@tonic-gate	/* membar #Sync - provided at copy completion */	;\
6497c478bd9Sstevel@tonic-gate	add	%fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1	;\
6507c478bd9Sstevel@tonic-gate	and	tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */	;\
6517c478bd9Sstevel@tonic-gate	ldda	[tmp1]ASI_BLK_P, %f16				;\
6527c478bd9Sstevel@tonic-gate	add	tmp1, VIS_BLOCKSIZE, tmp1			;\
6537c478bd9Sstevel@tonic-gate	ldda	[tmp1]ASI_BLK_P, %f48				;\
6547c478bd9Sstevel@tonic-gate	membar	#Sync
6557c478bd9Sstevel@tonic-gate
6567c478bd9Sstevel@tonic-gate/*
6577c478bd9Sstevel@tonic-gate * FP_NOMIGRATE and FP_ALLOWMIGRATE.  Prevent migration (or, stronger,
6587c478bd9Sstevel@tonic-gate * prevent preemption if there is no t_lwp to save FP state to on context
6597c478bd9Sstevel@tonic-gate * switch) before commencing a FP copy, and reallow it on completion or
6607c478bd9Sstevel@tonic-gate * in error trampoline paths when we were using FP copy.
6617c478bd9Sstevel@tonic-gate *
6627c478bd9Sstevel@tonic-gate * Both macros may call other functions, so be aware that all outputs are
6637c478bd9Sstevel@tonic-gate * forfeit after using these macros.  For this reason we do not pass registers
6647c478bd9Sstevel@tonic-gate * to use - we just use any outputs we want.
6657c478bd9Sstevel@tonic-gate *
6667c478bd9Sstevel@tonic-gate * For fpRAS we need to perform the fpRAS mechanism test on the same
6677c478bd9Sstevel@tonic-gate * CPU as we use for the copy operation, both so that we validate the
6687c478bd9Sstevel@tonic-gate * CPU we perform the copy on and so that we know which CPU failed
6697c478bd9Sstevel@tonic-gate * if a failure is detected.  Hence we need to be bound to "our" CPU.
6707c478bd9Sstevel@tonic-gate * This could be achieved through disabling preemption (and we have do it that
6717c478bd9Sstevel@tonic-gate * way for threads with no t_lwp) but for larger copies this may hold
6727c478bd9Sstevel@tonic-gate * higher priority threads off of cpu for too long (eg, realtime).  So we
6737c478bd9Sstevel@tonic-gate * make use of the lightweight t_nomigrate mechanism where we can (ie, when
6747c478bd9Sstevel@tonic-gate * we have a t_lwp).
6757c478bd9Sstevel@tonic-gate *
6767c478bd9Sstevel@tonic-gate * Pseudo code:
6777c478bd9Sstevel@tonic-gate *
6787c478bd9Sstevel@tonic-gate * FP_NOMIGRATE:
6797c478bd9Sstevel@tonic-gate *
6807c478bd9Sstevel@tonic-gate * if (curthread->t_lwp) {
6817c478bd9Sstevel@tonic-gate *	thread_nomigrate();
6827c478bd9Sstevel@tonic-gate * } else {
6837c478bd9Sstevel@tonic-gate *	kpreempt_disable();
6847c478bd9Sstevel@tonic-gate * }
6857c478bd9Sstevel@tonic-gate *
6867c478bd9Sstevel@tonic-gate * FP_ALLOWMIGRATE:
6877c478bd9Sstevel@tonic-gate *
6887c478bd9Sstevel@tonic-gate * if (curthread->t_lwp) {
6897c478bd9Sstevel@tonic-gate *	thread_allowmigrate();
6907c478bd9Sstevel@tonic-gate * } else {
6917c478bd9Sstevel@tonic-gate *	kpreempt_enable();
6927c478bd9Sstevel@tonic-gate * }
6937c478bd9Sstevel@tonic-gate */
6947c478bd9Sstevel@tonic-gate
6957c478bd9Sstevel@tonic-gate#define	FP_NOMIGRATE(label1, label2)				\
6967c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LWP], %o0			;\
697*5d9d9091SRichard Lowe	brz,a,pn %o0, label1##f					;\
6987c478bd9Sstevel@tonic-gate	  ldsb	[THREAD_REG + T_PREEMPT], %o1			;\
6997c478bd9Sstevel@tonic-gate	call	thread_nomigrate				;\
7007c478bd9Sstevel@tonic-gate	  nop							;\
701*5d9d9091SRichard Lowe	ba	label2##f					;\
7027c478bd9Sstevel@tonic-gate	  nop							;\
7037c478bd9Sstevel@tonic-gatelabel1:								;\
7047c478bd9Sstevel@tonic-gate	inc	%o1						;\
7057c478bd9Sstevel@tonic-gate	stb	%o1, [THREAD_REG + T_PREEMPT]			;\
7067c478bd9Sstevel@tonic-gatelabel2:
7077c478bd9Sstevel@tonic-gate
708*5d9d9091SRichard Lowe#define	FP_ALLOWMIGRATE(label1, label2)				\
7097c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LWP], %o0			;\
710*5d9d9091SRichard Lowe	brz,a,pn %o0, label1##f					;\
7117c478bd9Sstevel@tonic-gate	  ldsb	[THREAD_REG + T_PREEMPT], %o1			;\
7127c478bd9Sstevel@tonic-gate	call thread_allowmigrate				;\
7137c478bd9Sstevel@tonic-gate	  nop							;\
714*5d9d9091SRichard Lowe	ba	label2##f					;\
7157c478bd9Sstevel@tonic-gate	  nop							;\
7167c478bd9Sstevel@tonic-gatelabel1:								;\
7177c478bd9Sstevel@tonic-gate	dec	%o1						;\
718*5d9d9091SRichard Lowe	brnz,pn	%o1, label2##f					;\
7197c478bd9Sstevel@tonic-gate	  stb	%o1, [THREAD_REG + T_PREEMPT]			;\
7207c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_CPU], %o0			;\
7217c478bd9Sstevel@tonic-gate	ldub	[%o0 + CPU_KPRUNRUN], %o0			;\
722*5d9d9091SRichard Lowe	brz,pt	%o0, label2##f					;\
7237c478bd9Sstevel@tonic-gate	  nop							;\
7247c478bd9Sstevel@tonic-gate	call	kpreempt					;\
7257c478bd9Sstevel@tonic-gate	  rdpr	%pil, %o0					;\
7267c478bd9Sstevel@tonic-gatelabel2:
7277c478bd9Sstevel@tonic-gate
7287c478bd9Sstevel@tonic-gate/*
7297c478bd9Sstevel@tonic-gate * Copy a block of storage, returning an error code if `from' or
7307c478bd9Sstevel@tonic-gate * `to' takes a kernel pagefault which cannot be resolved.
7317c478bd9Sstevel@tonic-gate * Returns errno value on pagefault error, 0 if all ok
7327c478bd9Sstevel@tonic-gate */
7337c478bd9Sstevel@tonic-gate
7347c478bd9Sstevel@tonic-gate	.seg	".text"
7357c478bd9Sstevel@tonic-gate	.align	4
7367c478bd9Sstevel@tonic-gate
7377c478bd9Sstevel@tonic-gate	ENTRY(kcopy)
7387c478bd9Sstevel@tonic-gate
7397c478bd9Sstevel@tonic-gate	cmp	%o2, VIS_COPY_THRESHOLD		! check for leaf rtn case
7407c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .kcopy_small		! go to larger cases
7417c478bd9Sstevel@tonic-gate	  xor	%o0, %o1, %o3			! are src, dst alignable?
7427c478bd9Sstevel@tonic-gate	btst	7, %o3				!
7437c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .kcopy_8			! check for longword alignment
7447c478bd9Sstevel@tonic-gate	  nop
745*5d9d9091SRichard Lowe	btst	1, %o3				!
7467c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .kcopy_2			! check for half-word
7477c478bd9Sstevel@tonic-gate	  nop
7487c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_1), %o3	! Check copy limit
7497c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_1)], %o3
7507c478bd9Sstevel@tonic-gate	tst	%o3
7517c478bd9Sstevel@tonic-gate	bz,pn	%icc, .kcopy_small		! if zero, disable HW copy
7527c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
7537c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .kcopy_small		! go to small copy
7547c478bd9Sstevel@tonic-gate	  nop
7557c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .kcopy_more		! otherwise go to large copy
7567c478bd9Sstevel@tonic-gate	  nop
7577c478bd9Sstevel@tonic-gate.kcopy_2:
7587c478bd9Sstevel@tonic-gate	btst	3, %o3				!
7597c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .kcopy_4			! check for word alignment
7607c478bd9Sstevel@tonic-gate	  nop
7617c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_2), %o3	! Check copy limit
7627c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_2)], %o3
7637c478bd9Sstevel@tonic-gate	tst	%o3
7647c478bd9Sstevel@tonic-gate	bz,pn	%icc, .kcopy_small		! if zero, disable HW copy
7657c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
7667c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .kcopy_small		! go to small copy
7677c478bd9Sstevel@tonic-gate	  nop
7687c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .kcopy_more		! otherwise go to large copy
7697c478bd9Sstevel@tonic-gate	  nop
7707c478bd9Sstevel@tonic-gate.kcopy_4:
7717c478bd9Sstevel@tonic-gate	! already checked longword, must be word aligned
7727c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_4), %o3	! Check copy limit
7737c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_4)], %o3
7747c478bd9Sstevel@tonic-gate	tst	%o3
7757c478bd9Sstevel@tonic-gate	bz,pn	%icc, .kcopy_small		! if zero, disable HW copy
7767c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
7777c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .kcopy_small		! go to small copy
7787c478bd9Sstevel@tonic-gate	  nop
7797c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .kcopy_more		! otherwise go to large copy
7807c478bd9Sstevel@tonic-gate	  nop
7817c478bd9Sstevel@tonic-gate.kcopy_8:
7827c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_8), %o3	! Check copy limit
7837c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_8)], %o3
7847c478bd9Sstevel@tonic-gate	tst	%o3
7857c478bd9Sstevel@tonic-gate	bz,pn	%icc, .kcopy_small		! if zero, disable HW copy
7867c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
7877c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .kcopy_small		! go to small copy
7887c478bd9Sstevel@tonic-gate	  nop
7897c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .kcopy_more		! otherwise go to large copy
7907c478bd9Sstevel@tonic-gate	  nop
7917c478bd9Sstevel@tonic-gate
7927c478bd9Sstevel@tonic-gate.kcopy_small:
7937c478bd9Sstevel@tonic-gate	sethi	%hi(.sm_copyerr), %o5		! sm_copyerr is lofault value
7947c478bd9Sstevel@tonic-gate	or	%o5, %lo(.sm_copyerr), %o5
7957c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o4	! save existing handler
7967c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
7977c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .sm_do_copy		! common code
7987c478bd9Sstevel@tonic-gate	 stn	%o5, [THREAD_REG + T_LOFAULT]	! set t_lofault
7997c478bd9Sstevel@tonic-gate
8007c478bd9Sstevel@tonic-gate.kcopy_more:
8017c478bd9Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
8027c478bd9Sstevel@tonic-gate	sethi	%hi(.copyerr), %l7		! copyerr is lofault value
8037c478bd9Sstevel@tonic-gate	or	%l7, %lo(.copyerr), %l7
8047c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %l6	! save existing handler
8057c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
8067c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .do_copy			! common code
8077c478bd9Sstevel@tonic-gate	  stn	%l7, [THREAD_REG + T_LOFAULT]	! set t_lofault
8087c478bd9Sstevel@tonic-gate
8097c478bd9Sstevel@tonic-gate
8107c478bd9Sstevel@tonic-gate/*
8117c478bd9Sstevel@tonic-gate * We got here because of a fault during bcopy_more, called from kcopy or bcopy.
8127c478bd9Sstevel@tonic-gate * Errno value is in %g1.  bcopy_more uses fp quadrants 1 and 3.
8137c478bd9Sstevel@tonic-gate */
8147c478bd9Sstevel@tonic-gate.copyerr:
8157c478bd9Sstevel@tonic-gate	set	.copyerr2, %l0
8167c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
8177c478bd9Sstevel@tonic-gate	stn	%l0, [THREAD_REG + T_LOFAULT]	! set t_lofault
8187c478bd9Sstevel@tonic-gate	btst	FPUSED_FLAG, %l6
8197c478bd9Sstevel@tonic-gate	bz	%ncc, 1f
8207c478bd9Sstevel@tonic-gate	  and	%l6, TRAMP_FLAG, %l0		! copy trampoline flag to %l0
8217c478bd9Sstevel@tonic-gate
8227c478bd9Sstevel@tonic-gate	ldx	[%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2	! restore gsr
8237c478bd9Sstevel@tonic-gate	wr	%o2, 0, %gsr
8247c478bd9Sstevel@tonic-gate
8257c478bd9Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
8267c478bd9Sstevel@tonic-gate	btst	FPRS_FEF, %o3
8277c478bd9Sstevel@tonic-gate	bz,pt	%icc, 4f
8287c478bd9Sstevel@tonic-gate	  nop
8297c478bd9Sstevel@tonic-gate
8307c478bd9Sstevel@tonic-gate	BLD_FPQ1Q3_FROMSTACK(%o2)
8317c478bd9Sstevel@tonic-gate
8327c478bd9Sstevel@tonic-gate	ba,pt	%ncc, 1f
8337c478bd9Sstevel@tonic-gate	  wr	%o3, 0, %fprs		! restore fprs
8347c478bd9Sstevel@tonic-gate
8357c478bd9Sstevel@tonic-gate4:
8367c478bd9Sstevel@tonic-gate	FZEROQ1Q3
8377c478bd9Sstevel@tonic-gate	wr	%o3, 0, %fprs		! restore fprs
8387c478bd9Sstevel@tonic-gate
8397c478bd9Sstevel@tonic-gate	!
8407c478bd9Sstevel@tonic-gate	! Need to cater for the different expectations of kcopy
8417c478bd9Sstevel@tonic-gate	! and bcopy. kcopy will *always* set a t_lofault handler
8427c478bd9Sstevel@tonic-gate	! If it fires, we're expected to just return the error code
8437c478bd9Sstevel@tonic-gate	! and *not* to invoke any existing error handler. As far as
8447c478bd9Sstevel@tonic-gate	! bcopy is concerned, we only set t_lofault if there was an
8457c478bd9Sstevel@tonic-gate	! existing lofault handler. In that case we're expected to
8467c478bd9Sstevel@tonic-gate	! invoke the previously existing handler after resetting the
8477c478bd9Sstevel@tonic-gate	! t_lofault value.
8487c478bd9Sstevel@tonic-gate	!
8497c478bd9Sstevel@tonic-gate1:
8507c478bd9Sstevel@tonic-gate	andn	%l6, MASK_FLAGS, %l6		! turn trampoline flag off
8517c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
8527c478bd9Sstevel@tonic-gate	stn	%l6, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
8537c478bd9Sstevel@tonic-gate	FP_ALLOWMIGRATE(5, 6)
8547c478bd9Sstevel@tonic-gate
8557c478bd9Sstevel@tonic-gate	btst	TRAMP_FLAG, %l0
8567c478bd9Sstevel@tonic-gate	bnz,pn	%ncc, 3f
8577c478bd9Sstevel@tonic-gate	  nop
8587c478bd9Sstevel@tonic-gate	ret
8597c478bd9Sstevel@tonic-gate	  restore	%g1, 0, %o0
8607c478bd9Sstevel@tonic-gate
8617c478bd9Sstevel@tonic-gate3:
8627c478bd9Sstevel@tonic-gate	!
8637c478bd9Sstevel@tonic-gate	! We're here via bcopy. There *must* have been an error handler
8647c478bd9Sstevel@tonic-gate	! in place otherwise we would have died a nasty death already.
8657c478bd9Sstevel@tonic-gate	!
8667c478bd9Sstevel@tonic-gate	jmp	%l6				! goto real handler
8677c478bd9Sstevel@tonic-gate	  restore	%g0, 0, %o0		! dispose of copy window
8687c478bd9Sstevel@tonic-gate
8697c478bd9Sstevel@tonic-gate/*
8707c478bd9Sstevel@tonic-gate * We got here because of a fault in .copyerr.  We can't safely restore fp
8717c478bd9Sstevel@tonic-gate * state, so we panic.
8727c478bd9Sstevel@tonic-gate */
8737c478bd9Sstevel@tonic-gatefp_panic_msg:
8747c478bd9Sstevel@tonic-gate	.asciz	"Unable to restore fp state after copy operation"
8757c478bd9Sstevel@tonic-gate
8767c478bd9Sstevel@tonic-gate	.align	4
8777c478bd9Sstevel@tonic-gate.copyerr2:
8787c478bd9Sstevel@tonic-gate	set	fp_panic_msg, %o0
8797c478bd9Sstevel@tonic-gate	call	panic
8807c478bd9Sstevel@tonic-gate	  nop
8817c478bd9Sstevel@tonic-gate
8827c478bd9Sstevel@tonic-gate/*
8837c478bd9Sstevel@tonic-gate * We got here because of a fault during a small kcopy or bcopy.
8847c478bd9Sstevel@tonic-gate * No floating point registers are used by the small copies.
8857c478bd9Sstevel@tonic-gate * Errno value is in %g1.
8867c478bd9Sstevel@tonic-gate */
8877c478bd9Sstevel@tonic-gate.sm_copyerr:
8887c478bd9Sstevel@tonic-gate1:
8897c478bd9Sstevel@tonic-gate	btst	TRAMP_FLAG, %o4
8907c478bd9Sstevel@tonic-gate	membar	#Sync
8917c478bd9Sstevel@tonic-gate	andn	%o4, TRAMP_FLAG, %o4
8927c478bd9Sstevel@tonic-gate	bnz,pn	%ncc, 3f
8937c478bd9Sstevel@tonic-gate	  stn	%o4, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
8947c478bd9Sstevel@tonic-gate	retl
8957c478bd9Sstevel@tonic-gate	  mov	%g1, %o0
8967c478bd9Sstevel@tonic-gate3:
8977c478bd9Sstevel@tonic-gate	jmp	%o4				! goto real handler
898*5d9d9091SRichard Lowe	  mov	%g0, %o0			!
8997c478bd9Sstevel@tonic-gate
9007c478bd9Sstevel@tonic-gate	SET_SIZE(kcopy)
9017c478bd9Sstevel@tonic-gate
9027c478bd9Sstevel@tonic-gate
9037c478bd9Sstevel@tonic-gate/*
9047c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to).
9057c478bd9Sstevel@tonic-gate * Registers: l6 - saved t_lofault
9067c478bd9Sstevel@tonic-gate * (for short copies, o4 - saved t_lofault)
9077c478bd9Sstevel@tonic-gate *
9087c478bd9Sstevel@tonic-gate * Copy a page of memory.
9097c478bd9Sstevel@tonic-gate * Assumes double word alignment and a count >= 256.
9107c478bd9Sstevel@tonic-gate */
9117c478bd9Sstevel@tonic-gate
9127c478bd9Sstevel@tonic-gate	ENTRY(bcopy)
9137c478bd9Sstevel@tonic-gate
9147c478bd9Sstevel@tonic-gate	cmp	%o2, VIS_COPY_THRESHOLD		! check for leaf rtn case
9157c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .bcopy_small		! go to larger cases
9167c478bd9Sstevel@tonic-gate	  xor	%o0, %o1, %o3			! are src, dst alignable?
9177c478bd9Sstevel@tonic-gate	btst	7, %o3				!
9187c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bcopy_8			! check for longword alignment
9197c478bd9Sstevel@tonic-gate	  nop
920*5d9d9091SRichard Lowe	btst	1, %o3				!
9217c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bcopy_2			! check for half-word
9227c478bd9Sstevel@tonic-gate	  nop
9237c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_1), %o3	! Check copy limit
9247c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_1)], %o3
9257c478bd9Sstevel@tonic-gate	tst	%o3
9267c478bd9Sstevel@tonic-gate	bz,pn	%icc, .bcopy_small		! if zero, disable HW copy
9277c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
9287c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .bcopy_small		! go to small copy
9297c478bd9Sstevel@tonic-gate	  nop
9307c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .bcopy_more		! otherwise go to large copy
9317c478bd9Sstevel@tonic-gate	  nop
9327c478bd9Sstevel@tonic-gate.bcopy_2:
9337c478bd9Sstevel@tonic-gate	btst	3, %o3				!
9347c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bcopy_4			! check for word alignment
9357c478bd9Sstevel@tonic-gate	  nop
9367c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_2), %o3	! Check copy limit
9377c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_2)], %o3
9387c478bd9Sstevel@tonic-gate	tst	%o3
9397c478bd9Sstevel@tonic-gate	bz,pn	%icc, .bcopy_small		! if zero, disable HW copy
9407c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
9417c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .bcopy_small		! go to small copy
9427c478bd9Sstevel@tonic-gate	  nop
9437c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .bcopy_more		! otherwise go to large copy
9447c478bd9Sstevel@tonic-gate	  nop
9457c478bd9Sstevel@tonic-gate.bcopy_4:
9467c478bd9Sstevel@tonic-gate	! already checked longword, must be word aligned
9477c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_4), %o3	! Check copy limit
9487c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_4)], %o3
9497c478bd9Sstevel@tonic-gate	tst	%o3
9507c478bd9Sstevel@tonic-gate	bz,pn	%icc, .bcopy_small		! if zero, disable HW copy
9517c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
9527c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .bcopy_small		! go to small copy
9537c478bd9Sstevel@tonic-gate	  nop
9547c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .bcopy_more		! otherwise go to large copy
9557c478bd9Sstevel@tonic-gate	  nop
9567c478bd9Sstevel@tonic-gate.bcopy_8:
9577c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_8), %o3	! Check copy limit
9587c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_8)], %o3
9597c478bd9Sstevel@tonic-gate	tst	%o3
9607c478bd9Sstevel@tonic-gate	bz,pn	%icc, .bcopy_small		! if zero, disable HW copy
9617c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
9627c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .bcopy_small		! go to small copy
9637c478bd9Sstevel@tonic-gate	  nop
9647c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .bcopy_more		! otherwise go to large copy
9657c478bd9Sstevel@tonic-gate	  nop
9667c478bd9Sstevel@tonic-gate
9677c478bd9Sstevel@tonic-gate	.align	16
9687c478bd9Sstevel@tonic-gate.bcopy_small:
9697c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o4	! save t_lofault
9707c478bd9Sstevel@tonic-gate	tst	%o4
9717c478bd9Sstevel@tonic-gate	bz,pt	%icc, .sm_do_copy
9727c478bd9Sstevel@tonic-gate	  nop
9737c478bd9Sstevel@tonic-gate	sethi	%hi(.sm_copyerr), %o5
9747c478bd9Sstevel@tonic-gate	or	%o5, %lo(.sm_copyerr), %o5
9757c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
9767c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! install new vector
9777c478bd9Sstevel@tonic-gate	or	%o4, TRAMP_FLAG, %o4		! error should trampoline
9787c478bd9Sstevel@tonic-gate.sm_do_copy:
9797c478bd9Sstevel@tonic-gate	cmp	%o2, SHORTCOPY		! check for really short case
9807c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .bc_sm_left	!
9817c478bd9Sstevel@tonic-gate	  cmp	%o2, CHKSIZE		! check for medium length cases
9827c478bd9Sstevel@tonic-gate	bgu,pn	%ncc, .bc_med		!
9837c478bd9Sstevel@tonic-gate	  or	%o0, %o1, %o3		! prepare alignment check
9847c478bd9Sstevel@tonic-gate	andcc	%o3, 0x3, %g0		! test for alignment
9857c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_word	! branch to word aligned case
9867c478bd9Sstevel@tonic-gate.bc_sm_movebytes:
9877c478bd9Sstevel@tonic-gate	  sub	%o2, 3, %o2		! adjust count to allow cc zero test
9887c478bd9Sstevel@tonic-gate.bc_sm_notalign4:
9897c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3		! read byte
9907c478bd9Sstevel@tonic-gate	stb	%o3, [%o1]		! write byte
9917c478bd9Sstevel@tonic-gate	subcc	%o2, 4, %o2		! reduce count by 4
9927c478bd9Sstevel@tonic-gate	ldub	[%o0 + 1], %o3		! repeat for a total of 4 bytes
9937c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! advance SRC by 4
9947c478bd9Sstevel@tonic-gate	stb	%o3, [%o1 + 1]
9957c478bd9Sstevel@tonic-gate	ldub	[%o0 - 2], %o3
9967c478bd9Sstevel@tonic-gate	add	%o1, 4, %o1		! advance DST by 4
9977c478bd9Sstevel@tonic-gate	stb	%o3, [%o1 - 2]
9987c478bd9Sstevel@tonic-gate	ldub	[%o0 - 1], %o3
9997c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .bc_sm_notalign4	! loop til 3 or fewer bytes remain
10007c478bd9Sstevel@tonic-gate	  stb	%o3, [%o1 - 1]
10017c478bd9Sstevel@tonic-gate	add	%o2, 3, %o2		! restore count
10027c478bd9Sstevel@tonic-gate.bc_sm_left:
10037c478bd9Sstevel@tonic-gate	tst	%o2
10047c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_exit	! check for zero length
10057c478bd9Sstevel@tonic-gate	  deccc	%o2			! reduce count for cc test
10067c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3		! move one byte
10077c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_exit
10087c478bd9Sstevel@tonic-gate	  stb	%o3, [%o1]
10097c478bd9Sstevel@tonic-gate	ldub	[%o0 + 1], %o3		! move another byte
10107c478bd9Sstevel@tonic-gate	deccc	%o2			! check for more
10117c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_exit
10127c478bd9Sstevel@tonic-gate	  stb	%o3, [%o1 + 1]
10137c478bd9Sstevel@tonic-gate	ldub	[%o0 + 2], %o3		! move final byte
10147c478bd9Sstevel@tonic-gate	stb	%o3, [%o1 + 2]
10157c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
10167c478bd9Sstevel@tonic-gate	andn	%o4, TRAMP_FLAG, %o4
10177c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
10187c478bd9Sstevel@tonic-gate	retl
10197c478bd9Sstevel@tonic-gate	  mov	%g0, %o0		! return 0
10207c478bd9Sstevel@tonic-gate	.align	16
10217c478bd9Sstevel@tonic-gate	nop				! instruction alignment
10227c478bd9Sstevel@tonic-gate					! see discussion at start of file
10237c478bd9Sstevel@tonic-gate.bc_sm_words:
10247c478bd9Sstevel@tonic-gate	lduw	[%o0], %o3		! read word
10257c478bd9Sstevel@tonic-gate.bc_sm_wordx:
10267c478bd9Sstevel@tonic-gate	subcc	%o2, 8, %o2		! update count
10277c478bd9Sstevel@tonic-gate	stw	%o3, [%o1]		! write word
10287c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0		! update SRC
10297c478bd9Sstevel@tonic-gate	lduw	[%o0 - 4], %o3		! read word
10307c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1		! update DST
10317c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .bc_sm_words	! loop til done
10327c478bd9Sstevel@tonic-gate	  stw	%o3, [%o1 - 4]		! write word
10337c478bd9Sstevel@tonic-gate	addcc	%o2, 7, %o2		! restore count
10347c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_exit
10357c478bd9Sstevel@tonic-gate	  deccc	%o2
10367c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_byte
10377c478bd9Sstevel@tonic-gate.bc_sm_half:
10387c478bd9Sstevel@tonic-gate	  subcc	%o2, 2, %o2		! reduce count by 2
10397c478bd9Sstevel@tonic-gate	add	%o0, 2, %o0		! advance SRC by 2
10407c478bd9Sstevel@tonic-gate	lduh	[%o0 - 2], %o3		! read half word
10417c478bd9Sstevel@tonic-gate	add	%o1, 2, %o1		! advance DST by 2
10427c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .bc_sm_half	! loop til done
10437c478bd9Sstevel@tonic-gate	  sth	%o3, [%o1 - 2]		! write half word
10447c478bd9Sstevel@tonic-gate	addcc	%o2, 1, %o2		! restore count
10457c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_exit
10467c478bd9Sstevel@tonic-gate	  nop
10477c478bd9Sstevel@tonic-gate.bc_sm_byte:
10487c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3
10497c478bd9Sstevel@tonic-gate	stb	%o3, [%o1]
10507c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
10517c478bd9Sstevel@tonic-gate	andn	%o4, TRAMP_FLAG, %o4
10527c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
10537c478bd9Sstevel@tonic-gate	retl
10547c478bd9Sstevel@tonic-gate	  mov	%g0, %o0		! return 0
10557c478bd9Sstevel@tonic-gate
10567c478bd9Sstevel@tonic-gate.bc_sm_word:
10577c478bd9Sstevel@tonic-gate	subcc	%o2, 4, %o2		! update count
10587c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .bc_sm_wordx
10597c478bd9Sstevel@tonic-gate	  lduw	[%o0], %o3		! read word
10607c478bd9Sstevel@tonic-gate	addcc	%o2, 3, %o2		! restore count
10617c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_exit
10627c478bd9Sstevel@tonic-gate	  stw	%o3, [%o1]		! write word
10637c478bd9Sstevel@tonic-gate	deccc	%o2			! reduce count for cc test
10647c478bd9Sstevel@tonic-gate	ldub	[%o0 + 4], %o3		! load one byte
10657c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_exit
10667c478bd9Sstevel@tonic-gate	  stb	%o3, [%o1 + 4]		! store one byte
10677c478bd9Sstevel@tonic-gate	ldub	[%o0 + 5], %o3		! load second byte
10687c478bd9Sstevel@tonic-gate	deccc	%o2
10697c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_exit
10707c478bd9Sstevel@tonic-gate	  stb	%o3, [%o1 + 5]		! store second byte
10717c478bd9Sstevel@tonic-gate	ldub	[%o0 + 6], %o3		! load third byte
10727c478bd9Sstevel@tonic-gate	stb	%o3, [%o1 + 6]		! store third byte
10737c478bd9Sstevel@tonic-gate.bc_sm_exit:
10747c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
10757c478bd9Sstevel@tonic-gate	andn	%o4, TRAMP_FLAG, %o4
10767c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
10777c478bd9Sstevel@tonic-gate	retl
10787c478bd9Sstevel@tonic-gate	  mov	%g0, %o0		! return 0
10797c478bd9Sstevel@tonic-gate
10807c478bd9Sstevel@tonic-gate	.align 16
10817c478bd9Sstevel@tonic-gate.bc_med:
10827c478bd9Sstevel@tonic-gate	xor	%o0, %o1, %o3		! setup alignment check
10837c478bd9Sstevel@tonic-gate	btst	1, %o3
10847c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .bc_sm_movebytes	! unaligned
10857c478bd9Sstevel@tonic-gate	  nop
10867c478bd9Sstevel@tonic-gate	btst	3, %o3
10877c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .bc_med_half	! halfword aligned
10887c478bd9Sstevel@tonic-gate	  nop
10897c478bd9Sstevel@tonic-gate	btst	7, %o3
10907c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .bc_med_word	! word aligned
10917c478bd9Sstevel@tonic-gate	  nop
10927c478bd9Sstevel@tonic-gate.bc_med_long:
10937c478bd9Sstevel@tonic-gate	btst	3, %o0			! check for
10947c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_med_long1	! word alignment
10957c478bd9Sstevel@tonic-gate	  nop
10967c478bd9Sstevel@tonic-gate.bc_med_long0:
10977c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3		! load one byte
10987c478bd9Sstevel@tonic-gate	inc	%o0
10997c478bd9Sstevel@tonic-gate	stb	%o3,[%o1]		! store byte
11007c478bd9Sstevel@tonic-gate	inc	%o1
11017c478bd9Sstevel@tonic-gate	btst	3, %o0
11027c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .bc_med_long0
11037c478bd9Sstevel@tonic-gate	  dec	%o2
11047c478bd9Sstevel@tonic-gate.bc_med_long1:			! word aligned
11057c478bd9Sstevel@tonic-gate	btst	7, %o0			! check for long word
11067c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_med_long2
11077c478bd9Sstevel@tonic-gate	  nop
11087c478bd9Sstevel@tonic-gate	lduw	[%o0], %o3		! load word
11097c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! advance SRC by 4
11107c478bd9Sstevel@tonic-gate	stw	%o3, [%o1]		! store word
11117c478bd9Sstevel@tonic-gate	add	%o1, 4, %o1		! advance DST by 4
11127c478bd9Sstevel@tonic-gate	sub	%o2, 4, %o2		! reduce count by 4
11137c478bd9Sstevel@tonic-gate!
11147c478bd9Sstevel@tonic-gate!  Now long word aligned and have at least 32 bytes to move
11157c478bd9Sstevel@tonic-gate!
11167c478bd9Sstevel@tonic-gate.bc_med_long2:
11177c478bd9Sstevel@tonic-gate	sub	%o2, 31, %o2		! adjust count to allow cc zero test
11187c478bd9Sstevel@tonic-gate.bc_med_lmove:
11197c478bd9Sstevel@tonic-gate	ldx	[%o0], %o3		! read long word
11207c478bd9Sstevel@tonic-gate	stx	%o3, [%o1]		! write long word
11217c478bd9Sstevel@tonic-gate	subcc	%o2, 32, %o2		! reduce count by 32
11227c478bd9Sstevel@tonic-gate	ldx	[%o0 + 8], %o3		! repeat for a total for 4 long words
11237c478bd9Sstevel@tonic-gate	add	%o0, 32, %o0		! advance SRC by 32
11247c478bd9Sstevel@tonic-gate	stx	%o3, [%o1 + 8]
11257c478bd9Sstevel@tonic-gate	ldx	[%o0 - 16], %o3
11267c478bd9Sstevel@tonic-gate	add	%o1, 32, %o1		! advance DST by 32
11277c478bd9Sstevel@tonic-gate	stx	%o3, [%o1 - 16]
11287c478bd9Sstevel@tonic-gate	ldx	[%o0 - 8], %o3
11297c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .bc_med_lmove	! loop til 31 or fewer bytes left
11307c478bd9Sstevel@tonic-gate	  stx	%o3, [%o1 - 8]
11317c478bd9Sstevel@tonic-gate	addcc	%o2, 24, %o2		! restore count to long word offset
11327c478bd9Sstevel@tonic-gate	ble,pt	%ncc, .bc_med_lextra	! check for more long words to move
11337c478bd9Sstevel@tonic-gate	  nop
11347c478bd9Sstevel@tonic-gate.bc_med_lword:
11357c478bd9Sstevel@tonic-gate	ldx	[%o0], %o3		! read long word
11367c478bd9Sstevel@tonic-gate	subcc	%o2, 8, %o2		! reduce count by 8
11377c478bd9Sstevel@tonic-gate	stx	%o3, [%o1]		! write long word
11387c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0		! advance SRC by 8
11397c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .bc_med_lword	! loop til 7 or fewer bytes left
11407c478bd9Sstevel@tonic-gate	  add	%o1, 8, %o1		! advance DST by 8
11417c478bd9Sstevel@tonic-gate.bc_med_lextra:
11427c478bd9Sstevel@tonic-gate	addcc	%o2, 7, %o2		! restore rest of count
11437c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_exit	! if zero, then done
11447c478bd9Sstevel@tonic-gate	  deccc	%o2
11457c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_byte
11467c478bd9Sstevel@tonic-gate	  nop
11477c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .bc_sm_half
11487c478bd9Sstevel@tonic-gate	  nop
11497c478bd9Sstevel@tonic-gate
11507c478bd9Sstevel@tonic-gate	.align 16
11517c478bd9Sstevel@tonic-gate.bc_med_word:
11527c478bd9Sstevel@tonic-gate	btst	3, %o0			! check for
11537c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_med_word1	! word alignment
11547c478bd9Sstevel@tonic-gate	  nop
11557c478bd9Sstevel@tonic-gate.bc_med_word0:
11567c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3		! load one byte
11577c478bd9Sstevel@tonic-gate	inc	%o0
11587c478bd9Sstevel@tonic-gate	stb	%o3,[%o1]		! store byte
11597c478bd9Sstevel@tonic-gate	inc	%o1
11607c478bd9Sstevel@tonic-gate	btst	3, %o0
11617c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .bc_med_word0
11627c478bd9Sstevel@tonic-gate	  dec	%o2
11637c478bd9Sstevel@tonic-gate!
11647c478bd9Sstevel@tonic-gate!  Now word aligned and have at least 36 bytes to move
11657c478bd9Sstevel@tonic-gate!
11667c478bd9Sstevel@tonic-gate.bc_med_word1:
11677c478bd9Sstevel@tonic-gate	sub	%o2, 15, %o2		! adjust count to allow cc zero test
11687c478bd9Sstevel@tonic-gate.bc_med_wmove:
11697c478bd9Sstevel@tonic-gate	lduw	[%o0], %o3		! read word
11707c478bd9Sstevel@tonic-gate	stw	%o3, [%o1]		! write word
11717c478bd9Sstevel@tonic-gate	subcc	%o2, 16, %o2		! reduce count by 16
11727c478bd9Sstevel@tonic-gate	lduw	[%o0 + 4], %o3		! repeat for a total for 4 words
11737c478bd9Sstevel@tonic-gate	add	%o0, 16, %o0		! advance SRC by 16
11747c478bd9Sstevel@tonic-gate	stw	%o3, [%o1 + 4]
11757c478bd9Sstevel@tonic-gate	lduw	[%o0 - 8], %o3
11767c478bd9Sstevel@tonic-gate	add	%o1, 16, %o1		! advance DST by 16
11777c478bd9Sstevel@tonic-gate	stw	%o3, [%o1 - 8]
11787c478bd9Sstevel@tonic-gate	lduw	[%o0 - 4], %o3
11797c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .bc_med_wmove	! loop til 15 or fewer bytes left
11807c478bd9Sstevel@tonic-gate	  stw	%o3, [%o1 - 4]
11817c478bd9Sstevel@tonic-gate	addcc	%o2, 12, %o2		! restore count to word offset
11827c478bd9Sstevel@tonic-gate	ble,pt	%ncc, .bc_med_wextra	! check for more words to move
11837c478bd9Sstevel@tonic-gate	  nop
11847c478bd9Sstevel@tonic-gate.bc_med_word2:
11857c478bd9Sstevel@tonic-gate	lduw	[%o0], %o3		! read word
11867c478bd9Sstevel@tonic-gate	subcc	%o2, 4, %o2		! reduce count by 4
11877c478bd9Sstevel@tonic-gate	stw	%o3, [%o1]		! write word
11887c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! advance SRC by 4
11897c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .bc_med_word2	! loop til 3 or fewer bytes left
11907c478bd9Sstevel@tonic-gate	  add	%o1, 4, %o1		! advance DST by 4
11917c478bd9Sstevel@tonic-gate.bc_med_wextra:
11927c478bd9Sstevel@tonic-gate	addcc	%o2, 3, %o2		! restore rest of count
11937c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_exit	! if zero, then done
11947c478bd9Sstevel@tonic-gate	  deccc	%o2
11957c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_byte
11967c478bd9Sstevel@tonic-gate	  nop
11977c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .bc_sm_half
11987c478bd9Sstevel@tonic-gate	  nop
11997c478bd9Sstevel@tonic-gate
12007c478bd9Sstevel@tonic-gate	.align 16
12017c478bd9Sstevel@tonic-gate.bc_med_half:
12027c478bd9Sstevel@tonic-gate	btst	1, %o0			! check for
12037c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_med_half1	! half word alignment
12047c478bd9Sstevel@tonic-gate	  nop
12057c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3		! load one byte
12067c478bd9Sstevel@tonic-gate	inc	%o0
12077c478bd9Sstevel@tonic-gate	stb	%o3,[%o1]		! store byte
12087c478bd9Sstevel@tonic-gate	inc	%o1
12097c478bd9Sstevel@tonic-gate	dec	%o2
12107c478bd9Sstevel@tonic-gate!
12117c478bd9Sstevel@tonic-gate!  Now half word aligned and have at least 38 bytes to move
12127c478bd9Sstevel@tonic-gate!
12137c478bd9Sstevel@tonic-gate.bc_med_half1:
12147c478bd9Sstevel@tonic-gate	sub	%o2, 7, %o2		! adjust count to allow cc zero test
12157c478bd9Sstevel@tonic-gate.bc_med_hmove:
12167c478bd9Sstevel@tonic-gate	lduh	[%o0], %o3		! read half word
12177c478bd9Sstevel@tonic-gate	sth	%o3, [%o1]		! write half word
12187c478bd9Sstevel@tonic-gate	subcc	%o2, 8, %o2		! reduce count by 8
12197c478bd9Sstevel@tonic-gate	lduh	[%o0 + 2], %o3		! repeat for a total for 4 halfwords
12207c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0		! advance SRC by 8
12217c478bd9Sstevel@tonic-gate	sth	%o3, [%o1 + 2]
12227c478bd9Sstevel@tonic-gate	lduh	[%o0 - 4], %o3
12237c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1		! advance DST by 8
12247c478bd9Sstevel@tonic-gate	sth	%o3, [%o1 - 4]
12257c478bd9Sstevel@tonic-gate	lduh	[%o0 - 2], %o3
12267c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .bc_med_hmove	! loop til 7 or fewer bytes left
12277c478bd9Sstevel@tonic-gate	  sth	%o3, [%o1 - 2]
12287c478bd9Sstevel@tonic-gate	addcc	%o2, 7, %o2		! restore count
12297c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_exit
12307c478bd9Sstevel@tonic-gate	  deccc	%o2
12317c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .bc_sm_byte
12327c478bd9Sstevel@tonic-gate	  nop
12337c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .bc_sm_half
12347c478bd9Sstevel@tonic-gate	  nop
12357c478bd9Sstevel@tonic-gate
12367c478bd9Sstevel@tonic-gate	SET_SIZE(bcopy)
12377c478bd9Sstevel@tonic-gate
12387c478bd9Sstevel@tonic-gate/*
12397c478bd9Sstevel@tonic-gate * The _more entry points are not intended to be used directly by
12407c478bd9Sstevel@tonic-gate * any caller from outside this file.  They are provided to allow
12417c478bd9Sstevel@tonic-gate * profiling and dtrace of the portions of the copy code that uses
12427c478bd9Sstevel@tonic-gate * the floating point registers.
12437c478bd9Sstevel@tonic-gate * This entry is particularly important as DTRACE (at least as of
12447c478bd9Sstevel@tonic-gate * 4/2004) does not support leaf functions.
12457c478bd9Sstevel@tonic-gate */
12467c478bd9Sstevel@tonic-gate
12477c478bd9Sstevel@tonic-gate	ENTRY(bcopy_more)
1248*5d9d9091SRichard Lowe.bcopy_more:
12497c478bd9Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
12507c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %l6	! save t_lofault
12517c478bd9Sstevel@tonic-gate	tst	%l6
12527c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .do_copy
12537c478bd9Sstevel@tonic-gate	  nop
12547c478bd9Sstevel@tonic-gate	sethi	%hi(.copyerr), %o2
12557c478bd9Sstevel@tonic-gate	or	%o2, %lo(.copyerr), %o2
12567c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
12577c478bd9Sstevel@tonic-gate	stn	%o2, [THREAD_REG + T_LOFAULT]	! install new vector
12587c478bd9Sstevel@tonic-gate	!
12597c478bd9Sstevel@tonic-gate	! We've already captured whether t_lofault was zero on entry.
12607c478bd9Sstevel@tonic-gate	! We need to mark ourselves as being from bcopy since both
12617c478bd9Sstevel@tonic-gate	! kcopy and bcopy use the same code path. If TRAMP_FLAG is set
12627c478bd9Sstevel@tonic-gate	! and the saved lofault was zero, we won't reset lofault on
12637c478bd9Sstevel@tonic-gate	! returning.
12647c478bd9Sstevel@tonic-gate	!
12657c478bd9Sstevel@tonic-gate	or	%l6, TRAMP_FLAG, %l6
12667c478bd9Sstevel@tonic-gate
12677c478bd9Sstevel@tonic-gate/*
12687c478bd9Sstevel@tonic-gate * Copies that reach here are larger than VIS_COPY_THRESHOLD bytes
12697c478bd9Sstevel@tonic-gate * Also, use of FP registers has been tested to be enabled
12707c478bd9Sstevel@tonic-gate */
12717c478bd9Sstevel@tonic-gate.do_copy:
12727c478bd9Sstevel@tonic-gate	FP_NOMIGRATE(6, 7)
12737c478bd9Sstevel@tonic-gate
12747c478bd9Sstevel@tonic-gate	rd	%fprs, %o2		! check for unused fp
12757c478bd9Sstevel@tonic-gate	st	%o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
12767c478bd9Sstevel@tonic-gate	btst	FPRS_FEF, %o2
12777c478bd9Sstevel@tonic-gate	bz,a,pt	%icc, .do_blockcopy
12787c478bd9Sstevel@tonic-gate	  wr	%g0, FPRS_FEF, %fprs
12797c478bd9Sstevel@tonic-gate
12807c478bd9Sstevel@tonic-gate	BST_FPQ1Q3_TOSTACK(%o2)
12817c478bd9Sstevel@tonic-gate
12827c478bd9Sstevel@tonic-gate.do_blockcopy:
12837c478bd9Sstevel@tonic-gate	rd	%gsr, %o2
12847c478bd9Sstevel@tonic-gate	stx	%o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET]	! save gsr
12857c478bd9Sstevel@tonic-gate	or	%l6, FPUSED_FLAG, %l6
12867c478bd9Sstevel@tonic-gate
12877c478bd9Sstevel@tonic-gate#define	REALSRC	%i0
12887c478bd9Sstevel@tonic-gate#define	DST	%i1
12897c478bd9Sstevel@tonic-gate#define	CNT	%i2
12907c478bd9Sstevel@tonic-gate#define	SRC	%i3
12917c478bd9Sstevel@tonic-gate#define	TMP	%i5
12927c478bd9Sstevel@tonic-gate
12937c478bd9Sstevel@tonic-gate	andcc	DST, VIS_BLOCKSIZE - 1, TMP
12947c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f
12957c478bd9Sstevel@tonic-gate	  neg	TMP
12967c478bd9Sstevel@tonic-gate	add	TMP, VIS_BLOCKSIZE, TMP
12977c478bd9Sstevel@tonic-gate
12987c478bd9Sstevel@tonic-gate	! TMP = bytes required to align DST on FP_BLOCK boundary
12997c478bd9Sstevel@tonic-gate	! Using SRC as a tmp here
13007c478bd9Sstevel@tonic-gate	cmp	TMP, 3
13017c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, 1f
13027c478bd9Sstevel@tonic-gate	  sub	CNT,TMP,CNT		! adjust main count
13037c478bd9Sstevel@tonic-gate	sub	TMP, 3, TMP		! adjust for end of loop test
13047c478bd9Sstevel@tonic-gate.bc_blkalign:
13057c478bd9Sstevel@tonic-gate	ldub	[REALSRC], SRC		! move 4 bytes per loop iteration
13067c478bd9Sstevel@tonic-gate	stb	SRC, [DST]
13077c478bd9Sstevel@tonic-gate	subcc	TMP, 4, TMP
13087c478bd9Sstevel@tonic-gate	ldub	[REALSRC + 1], SRC
13097c478bd9Sstevel@tonic-gate	add	REALSRC, 4, REALSRC
13107c478bd9Sstevel@tonic-gate	stb	SRC, [DST + 1]
13117c478bd9Sstevel@tonic-gate	ldub	[REALSRC - 2], SRC
13127c478bd9Sstevel@tonic-gate	add	DST, 4, DST
13137c478bd9Sstevel@tonic-gate	stb	SRC, [DST - 2]
13147c478bd9Sstevel@tonic-gate	ldub	[REALSRC - 1], SRC
13157c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, .bc_blkalign
13167c478bd9Sstevel@tonic-gate	  stb	SRC, [DST - 1]
13177c478bd9Sstevel@tonic-gate
13187c478bd9Sstevel@tonic-gate	addcc	TMP, 3, TMP		! restore count adjustment
13197c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f		! no bytes left?
13207c478bd9Sstevel@tonic-gate	  nop
13217c478bd9Sstevel@tonic-gate1:	ldub	[REALSRC], SRC
13227c478bd9Sstevel@tonic-gate	inc	REALSRC
13237c478bd9Sstevel@tonic-gate	inc	DST
13247c478bd9Sstevel@tonic-gate	deccc	TMP
13257c478bd9Sstevel@tonic-gate	bgu	%ncc, 1b
13267c478bd9Sstevel@tonic-gate	  stb	SRC, [DST - 1]
13277c478bd9Sstevel@tonic-gate
13287c478bd9Sstevel@tonic-gate2:
13297c478bd9Sstevel@tonic-gate	andn	REALSRC, 0x7, SRC
13307c478bd9Sstevel@tonic-gate	alignaddr REALSRC, %g0, %g0
13317c478bd9Sstevel@tonic-gate
13327c478bd9Sstevel@tonic-gate	! SRC - 8-byte aligned
13337c478bd9Sstevel@tonic-gate	! DST - 64-byte aligned
13347c478bd9Sstevel@tonic-gate	prefetch [SRC], #one_read
13357c478bd9Sstevel@tonic-gate	prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read
13367c478bd9Sstevel@tonic-gate	prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read
13377c478bd9Sstevel@tonic-gate	prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read
13387c478bd9Sstevel@tonic-gate	ldd	[SRC], %f0
13397c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4
13407c478bd9Sstevel@tonic-gate	prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read
13417c478bd9Sstevel@tonic-gate#endif
13427c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x08], %f2
13437c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5
13447c478bd9Sstevel@tonic-gate	prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read
13457c478bd9Sstevel@tonic-gate#endif
13467c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x10], %f4
13477c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6
13487c478bd9Sstevel@tonic-gate	prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read
13497c478bd9Sstevel@tonic-gate#endif
13507c478bd9Sstevel@tonic-gate	faligndata %f0, %f2, %f32
13517c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x18], %f6
13527c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7
13537c478bd9Sstevel@tonic-gate	prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read
13547c478bd9Sstevel@tonic-gate#endif
13557c478bd9Sstevel@tonic-gate	faligndata %f2, %f4, %f34
13567c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x20], %f8
13577c478bd9Sstevel@tonic-gate	faligndata %f4, %f6, %f36
13587c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x28], %f10
13597c478bd9Sstevel@tonic-gate	faligndata %f6, %f8, %f38
13607c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x30], %f12
13617c478bd9Sstevel@tonic-gate	faligndata %f8, %f10, %f40
13627c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x38], %f14
13637c478bd9Sstevel@tonic-gate	faligndata %f10, %f12, %f42
13647c478bd9Sstevel@tonic-gate	ldd	[SRC + VIS_BLOCKSIZE], %f0
13657c478bd9Sstevel@tonic-gate	sub	CNT, VIS_BLOCKSIZE, CNT
13667c478bd9Sstevel@tonic-gate	add	SRC, VIS_BLOCKSIZE, SRC
13677c478bd9Sstevel@tonic-gate	add	REALSRC, VIS_BLOCKSIZE, REALSRC
13687c478bd9Sstevel@tonic-gate	ba,a,pt	%ncc, 1f
13697c478bd9Sstevel@tonic-gate	  nop
13707c478bd9Sstevel@tonic-gate	.align	16
13717c478bd9Sstevel@tonic-gate1:
13727c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x08], %f2
13737c478bd9Sstevel@tonic-gate	faligndata %f12, %f14, %f44
13747c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x10], %f4
13757c478bd9Sstevel@tonic-gate	faligndata %f14, %f0, %f46
13767c478bd9Sstevel@tonic-gate	stda	%f32, [DST]ASI_BLK_P
13777c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x18], %f6
13787c478bd9Sstevel@tonic-gate	faligndata %f0, %f2, %f32
13797c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x20], %f8
13807c478bd9Sstevel@tonic-gate	faligndata %f2, %f4, %f34
13817c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x28], %f10
13827c478bd9Sstevel@tonic-gate	faligndata %f4, %f6, %f36
13837c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x30], %f12
13847c478bd9Sstevel@tonic-gate	faligndata %f6, %f8, %f38
13857c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x38], %f14
13867c478bd9Sstevel@tonic-gate	faligndata %f8, %f10, %f40
13877c478bd9Sstevel@tonic-gate	sub	CNT, VIS_BLOCKSIZE, CNT
13887c478bd9Sstevel@tonic-gate	ldd	[SRC + VIS_BLOCKSIZE], %f0
13897c478bd9Sstevel@tonic-gate	faligndata %f10, %f12, %f42
13907c478bd9Sstevel@tonic-gate	prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read
13917c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
13927c478bd9Sstevel@tonic-gate	prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read
13937c478bd9Sstevel@tonic-gate	add	REALSRC, VIS_BLOCKSIZE, REALSRC
13947c478bd9Sstevel@tonic-gate	cmp	CNT, VIS_BLOCKSIZE + 8
13957c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, 1b
13967c478bd9Sstevel@tonic-gate	  add	SRC, VIS_BLOCKSIZE, SRC
13977c478bd9Sstevel@tonic-gate
13987c478bd9Sstevel@tonic-gate	! only if REALSRC & 0x7 is 0
13997c478bd9Sstevel@tonic-gate	cmp	CNT, VIS_BLOCKSIZE
14007c478bd9Sstevel@tonic-gate	bne	%ncc, 3f
14017c478bd9Sstevel@tonic-gate	  andcc	REALSRC, 0x7, %g0
14027c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f
14037c478bd9Sstevel@tonic-gate	  nop
1404*5d9d9091SRichard Lowe3:
14057c478bd9Sstevel@tonic-gate	faligndata %f12, %f14, %f44
14067c478bd9Sstevel@tonic-gate	faligndata %f14, %f0, %f46
14077c478bd9Sstevel@tonic-gate	stda	%f32, [DST]ASI_BLK_P
14087c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
14097c478bd9Sstevel@tonic-gate	ba,pt	%ncc, 3f
14107c478bd9Sstevel@tonic-gate	  nop
14117c478bd9Sstevel@tonic-gate2:
14127c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x08], %f2
14137c478bd9Sstevel@tonic-gate	fsrc1	%f12, %f44
14147c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x10], %f4
14157c478bd9Sstevel@tonic-gate	fsrc1	%f14, %f46
14167c478bd9Sstevel@tonic-gate	stda	%f32, [DST]ASI_BLK_P
14177c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x18], %f6
14187c478bd9Sstevel@tonic-gate	fsrc1	%f0, %f32
14197c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x20], %f8
14207c478bd9Sstevel@tonic-gate	fsrc1	%f2, %f34
14217c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x28], %f10
14227c478bd9Sstevel@tonic-gate	fsrc1	%f4, %f36
14237c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x30], %f12
14247c478bd9Sstevel@tonic-gate	fsrc1	%f6, %f38
14257c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x38], %f14
14267c478bd9Sstevel@tonic-gate	fsrc1	%f8, %f40
14277c478bd9Sstevel@tonic-gate	sub	CNT, VIS_BLOCKSIZE, CNT
14287c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
14297c478bd9Sstevel@tonic-gate	add	SRC, VIS_BLOCKSIZE, SRC
14307c478bd9Sstevel@tonic-gate	add	REALSRC, VIS_BLOCKSIZE, REALSRC
14317c478bd9Sstevel@tonic-gate	fsrc1	%f10, %f42
14327c478bd9Sstevel@tonic-gate	fsrc1	%f12, %f44
14337c478bd9Sstevel@tonic-gate	fsrc1	%f14, %f46
14347c478bd9Sstevel@tonic-gate	stda	%f32, [DST]ASI_BLK_P
14357c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
14367c478bd9Sstevel@tonic-gate	ba,a,pt	%ncc, .bcb_exit
14377c478bd9Sstevel@tonic-gate	  nop
14387c478bd9Sstevel@tonic-gate
14397c478bd9Sstevel@tonic-gate3:	tst	CNT
14407c478bd9Sstevel@tonic-gate	bz,a,pt	%ncc, .bcb_exit
14417c478bd9Sstevel@tonic-gate	  nop
14427c478bd9Sstevel@tonic-gate
14437c478bd9Sstevel@tonic-gate5:	ldub	[REALSRC], TMP
14447c478bd9Sstevel@tonic-gate	inc	REALSRC
14457c478bd9Sstevel@tonic-gate	inc	DST
14467c478bd9Sstevel@tonic-gate	deccc	CNT
14477c478bd9Sstevel@tonic-gate	bgu	%ncc, 5b
14487c478bd9Sstevel@tonic-gate	  stb	TMP, [DST - 1]
14497c478bd9Sstevel@tonic-gate.bcb_exit:
14507c478bd9Sstevel@tonic-gate	membar	#Sync
14517c478bd9Sstevel@tonic-gate
14527c478bd9Sstevel@tonic-gate	FPRAS_INTERVAL(FPRAS_BCOPY, 0, %l5, %o2, %o3, %o4, %o5, 8)
14537c478bd9Sstevel@tonic-gate	FPRAS_REWRITE_TYPE2Q1(0, %l5, %o2, %o3, 8, 9)
14547c478bd9Sstevel@tonic-gate	FPRAS_CHECK(FPRAS_BCOPY, %l5, 9)	! outputs lost
14557c478bd9Sstevel@tonic-gate
14567c478bd9Sstevel@tonic-gate	ldx	[%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2	! restore gsr
14577c478bd9Sstevel@tonic-gate	wr	%o2, 0, %gsr
14587c478bd9Sstevel@tonic-gate
14597c478bd9Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
14607c478bd9Sstevel@tonic-gate	btst	FPRS_FEF, %o3
14617c478bd9Sstevel@tonic-gate	bz,pt	%icc, 4f
14627c478bd9Sstevel@tonic-gate	  nop
14637c478bd9Sstevel@tonic-gate
14647c478bd9Sstevel@tonic-gate	BLD_FPQ1Q3_FROMSTACK(%o2)
14657c478bd9Sstevel@tonic-gate
1466*5d9d9091SRichard Lowe	ba,pt	%ncc, 2f
14677c478bd9Sstevel@tonic-gate	  wr	%o3, 0, %fprs		! restore fprs
14687c478bd9Sstevel@tonic-gate4:
14697c478bd9Sstevel@tonic-gate	FZEROQ1Q3
14707c478bd9Sstevel@tonic-gate	wr	%o3, 0, %fprs		! restore fprs
14717c478bd9Sstevel@tonic-gate2:
14727c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
14737c478bd9Sstevel@tonic-gate	andn	%l6, MASK_FLAGS, %l6
14747c478bd9Sstevel@tonic-gate	stn	%l6, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
14757c478bd9Sstevel@tonic-gate	FP_ALLOWMIGRATE(5, 6)
14767c478bd9Sstevel@tonic-gate	ret
14777c478bd9Sstevel@tonic-gate	  restore	%g0, 0, %o0
14787c478bd9Sstevel@tonic-gate
14797c478bd9Sstevel@tonic-gate	SET_SIZE(bcopy_more)
14807c478bd9Sstevel@tonic-gate
14817c478bd9Sstevel@tonic-gate/*
14827c478bd9Sstevel@tonic-gate * Block copy with possibly overlapped operands.
14837c478bd9Sstevel@tonic-gate */
14847c478bd9Sstevel@tonic-gate
14857c478bd9Sstevel@tonic-gate	ENTRY(ovbcopy)
14867c478bd9Sstevel@tonic-gate	tst	%o2			! check count
14877c478bd9Sstevel@tonic-gate	bgu,a	%ncc, 1f		! nothing to do or bad arguments
14887c478bd9Sstevel@tonic-gate	  subcc	%o0, %o1, %o3		! difference of from and to address
14897c478bd9Sstevel@tonic-gate
14907c478bd9Sstevel@tonic-gate	retl				! return
14917c478bd9Sstevel@tonic-gate	  nop
14927c478bd9Sstevel@tonic-gate1:
14937c478bd9Sstevel@tonic-gate	bneg,a	%ncc, 2f
14947c478bd9Sstevel@tonic-gate	  neg	%o3			! if < 0, make it positive
14957c478bd9Sstevel@tonic-gate2:	cmp	%o2, %o3		! cmp size and abs(from - to)
14967c478bd9Sstevel@tonic-gate	bleu	%ncc, bcopy		! if size <= abs(diff): use bcopy,
14977c478bd9Sstevel@tonic-gate	  .empty				!   no overlap
14987c478bd9Sstevel@tonic-gate	  cmp	%o0, %o1		! compare from and to addresses
14997c478bd9Sstevel@tonic-gate	blu	%ncc, .ov_bkwd		! if from < to, copy backwards
15007c478bd9Sstevel@tonic-gate	  nop
15017c478bd9Sstevel@tonic-gate	!
15027c478bd9Sstevel@tonic-gate	! Copy forwards.
15037c478bd9Sstevel@tonic-gate	!
15047c478bd9Sstevel@tonic-gate.ov_fwd:
15057c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3		! read from address
15067c478bd9Sstevel@tonic-gate	inc	%o0			! inc from address
15077c478bd9Sstevel@tonic-gate	stb	%o3, [%o1]		! write to address
15087c478bd9Sstevel@tonic-gate	deccc	%o2			! dec count
15097c478bd9Sstevel@tonic-gate	bgu	%ncc, .ov_fwd		! loop till done
15107c478bd9Sstevel@tonic-gate	  inc	%o1			! inc to address
15117c478bd9Sstevel@tonic-gate
15127c478bd9Sstevel@tonic-gate	retl				! return
15137c478bd9Sstevel@tonic-gate	  nop
15147c478bd9Sstevel@tonic-gate	!
15157c478bd9Sstevel@tonic-gate	! Copy backwards.
15167c478bd9Sstevel@tonic-gate	!
15177c478bd9Sstevel@tonic-gate.ov_bkwd:
15187c478bd9Sstevel@tonic-gate	deccc	%o2			! dec count
15197c478bd9Sstevel@tonic-gate	ldub	[%o0 + %o2], %o3	! get byte at end of src
15207c478bd9Sstevel@tonic-gate	bgu	%ncc, .ov_bkwd		! loop till done
15217c478bd9Sstevel@tonic-gate	  stb	%o3, [%o1 + %o2]	! delay slot, store at end of dst
15227c478bd9Sstevel@tonic-gate
15237c478bd9Sstevel@tonic-gate	retl				! return
15247c478bd9Sstevel@tonic-gate	  nop
15257c478bd9Sstevel@tonic-gate
15267c478bd9Sstevel@tonic-gate	SET_SIZE(ovbcopy)
15277c478bd9Sstevel@tonic-gate
15287c478bd9Sstevel@tonic-gate
15297c478bd9Sstevel@tonic-gate/*
15307c478bd9Sstevel@tonic-gate * hwblkpagecopy()
15317c478bd9Sstevel@tonic-gate *
15327c478bd9Sstevel@tonic-gate * Copies exactly one page.  This routine assumes the caller (ppcopy)
15337c478bd9Sstevel@tonic-gate * has already disabled kernel preemption and has checked
15347c478bd9Sstevel@tonic-gate * use_hw_bcopy.  Preventing preemption also prevents cpu migration.
15357c478bd9Sstevel@tonic-gate */
15367c478bd9Sstevel@tonic-gate	ENTRY(hwblkpagecopy)
15377c478bd9Sstevel@tonic-gate	! get another window w/space for three aligned blocks of saved fpregs
15387c478bd9Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
15397c478bd9Sstevel@tonic-gate
15407c478bd9Sstevel@tonic-gate	! %i0 - source address (arg)
15417c478bd9Sstevel@tonic-gate	! %i1 - destination address (arg)
15427c478bd9Sstevel@tonic-gate	! %i2 - length of region (not arg)
15437c478bd9Sstevel@tonic-gate	! %l0 - saved fprs
15447c478bd9Sstevel@tonic-gate	! %l1 - pointer to saved fpregs
15457c478bd9Sstevel@tonic-gate
15467c478bd9Sstevel@tonic-gate	rd	%fprs, %l0		! check for unused fp
15477c478bd9Sstevel@tonic-gate	btst	FPRS_FEF, %l0
15487c478bd9Sstevel@tonic-gate	bz,a,pt	%icc, 1f
15497c478bd9Sstevel@tonic-gate	  wr	%g0, FPRS_FEF, %fprs
15507c478bd9Sstevel@tonic-gate
15517c478bd9Sstevel@tonic-gate	BST_FPQ1Q3_TOSTACK(%l1)
15527c478bd9Sstevel@tonic-gate
15537c478bd9Sstevel@tonic-gate1:	set	PAGESIZE, CNT
15547c478bd9Sstevel@tonic-gate	mov	REALSRC, SRC
15557c478bd9Sstevel@tonic-gate
15567c478bd9Sstevel@tonic-gate	prefetch [SRC], #one_read
15577c478bd9Sstevel@tonic-gate	prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read
15587c478bd9Sstevel@tonic-gate	prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read
15597c478bd9Sstevel@tonic-gate	prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read
15607c478bd9Sstevel@tonic-gate	ldd	[SRC], %f0
15617c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4
15627c478bd9Sstevel@tonic-gate	prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read
15637c478bd9Sstevel@tonic-gate#endif
15647c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x08], %f2
15657c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5
15667c478bd9Sstevel@tonic-gate	prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read
15677c478bd9Sstevel@tonic-gate#endif
15687c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x10], %f4
15697c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6
15707c478bd9Sstevel@tonic-gate	prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read
15717c478bd9Sstevel@tonic-gate#endif
15727c478bd9Sstevel@tonic-gate	fsrc1	%f0, %f32
15737c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x18], %f6
15747c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7
15757c478bd9Sstevel@tonic-gate	prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read
15767c478bd9Sstevel@tonic-gate#endif
15777c478bd9Sstevel@tonic-gate	fsrc1	%f2, %f34
15787c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x20], %f8
15797c478bd9Sstevel@tonic-gate	fsrc1	%f4, %f36
15807c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x28], %f10
15817c478bd9Sstevel@tonic-gate	fsrc1	%f6, %f38
15827c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x30], %f12
15837c478bd9Sstevel@tonic-gate	fsrc1	%f8, %f40
15847c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x38], %f14
15857c478bd9Sstevel@tonic-gate	fsrc1	%f10, %f42
15867c478bd9Sstevel@tonic-gate	ldd	[SRC + VIS_BLOCKSIZE], %f0
15877c478bd9Sstevel@tonic-gate	sub	CNT, VIS_BLOCKSIZE, CNT
15887c478bd9Sstevel@tonic-gate	add	SRC, VIS_BLOCKSIZE, SRC
15897c478bd9Sstevel@tonic-gate	ba,a,pt	%ncc, 2f
15907c478bd9Sstevel@tonic-gate	  nop
15917c478bd9Sstevel@tonic-gate	.align	16
15927c478bd9Sstevel@tonic-gate2:
15937c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x08], %f2
15947c478bd9Sstevel@tonic-gate	fsrc1	%f12, %f44
15957c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x10], %f4
15967c478bd9Sstevel@tonic-gate	fsrc1	%f14, %f46
15977c478bd9Sstevel@tonic-gate	stda	%f32, [DST]ASI_BLK_P
15987c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x18], %f6
15997c478bd9Sstevel@tonic-gate	fsrc1	%f0, %f32
16007c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x20], %f8
16017c478bd9Sstevel@tonic-gate	fsrc1	%f2, %f34
16027c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x28], %f10
16037c478bd9Sstevel@tonic-gate	fsrc1	%f4, %f36
16047c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x30], %f12
16057c478bd9Sstevel@tonic-gate	fsrc1	%f6, %f38
16067c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x38], %f14
16077c478bd9Sstevel@tonic-gate	fsrc1	%f8, %f40
16087c478bd9Sstevel@tonic-gate	ldd	[SRC + VIS_BLOCKSIZE], %f0
16097c478bd9Sstevel@tonic-gate	fsrc1	%f10, %f42
16107c478bd9Sstevel@tonic-gate	prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read
16117c478bd9Sstevel@tonic-gate	sub	CNT, VIS_BLOCKSIZE, CNT
16127c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
16137c478bd9Sstevel@tonic-gate	cmp	CNT, VIS_BLOCKSIZE + 8
16147c478bd9Sstevel@tonic-gate	prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read
16157c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, 2b
16167c478bd9Sstevel@tonic-gate	  add	SRC, VIS_BLOCKSIZE, SRC
16177c478bd9Sstevel@tonic-gate
16187c478bd9Sstevel@tonic-gate	! trailing block
16197c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x08], %f2
16207c478bd9Sstevel@tonic-gate	fsrc1	%f12, %f44
16217c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x10], %f4
16227c478bd9Sstevel@tonic-gate	fsrc1	%f14, %f46
16237c478bd9Sstevel@tonic-gate	stda	%f32, [DST]ASI_BLK_P
16247c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x18], %f6
16257c478bd9Sstevel@tonic-gate	fsrc1	%f0, %f32
16267c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x20], %f8
16277c478bd9Sstevel@tonic-gate	fsrc1	%f2, %f34
16287c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x28], %f10
16297c478bd9Sstevel@tonic-gate	fsrc1	%f4, %f36
16307c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x30], %f12
16317c478bd9Sstevel@tonic-gate	fsrc1	%f6, %f38
16327c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x38], %f14
16337c478bd9Sstevel@tonic-gate	fsrc1	%f8, %f40
16347c478bd9Sstevel@tonic-gate	sub	CNT, VIS_BLOCKSIZE, CNT
16357c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
16367c478bd9Sstevel@tonic-gate	add	SRC, VIS_BLOCKSIZE, SRC
16377c478bd9Sstevel@tonic-gate	fsrc1	%f10, %f42
16387c478bd9Sstevel@tonic-gate	fsrc1	%f12, %f44
16397c478bd9Sstevel@tonic-gate	fsrc1	%f14, %f46
16407c478bd9Sstevel@tonic-gate	stda	%f32, [DST]ASI_BLK_P
16417c478bd9Sstevel@tonic-gate
16427c478bd9Sstevel@tonic-gate	membar	#Sync
16437c478bd9Sstevel@tonic-gate
16447c478bd9Sstevel@tonic-gate	FPRAS_INTERVAL(FPRAS_PGCOPY, 1, %l5, %o2, %o3, %o4, %o5, 8)
16457c478bd9Sstevel@tonic-gate	FPRAS_REWRITE_TYPE1(1, %l5, %f32, %o2, 9)
16467c478bd9Sstevel@tonic-gate	FPRAS_CHECK(FPRAS_PGCOPY, %l5, 9)	! lose outputs
16477c478bd9Sstevel@tonic-gate
16487c478bd9Sstevel@tonic-gate	btst	FPRS_FEF, %l0
16497c478bd9Sstevel@tonic-gate	bz,pt	%icc, 2f
16507c478bd9Sstevel@tonic-gate	  nop
16517c478bd9Sstevel@tonic-gate
16527c478bd9Sstevel@tonic-gate	BLD_FPQ1Q3_FROMSTACK(%l3)
16537c478bd9Sstevel@tonic-gate	ba	3f
16547c478bd9Sstevel@tonic-gate	  nop
16557c478bd9Sstevel@tonic-gate
16567c478bd9Sstevel@tonic-gate2:	FZEROQ1Q3
16577c478bd9Sstevel@tonic-gate
16587c478bd9Sstevel@tonic-gate3:	wr	%l0, 0, %fprs		! restore fprs
16597c478bd9Sstevel@tonic-gate	ret
16607c478bd9Sstevel@tonic-gate	  restore	%g0, 0, %o0
16617c478bd9Sstevel@tonic-gate
16627c478bd9Sstevel@tonic-gate	SET_SIZE(hwblkpagecopy)
16637c478bd9Sstevel@tonic-gate
16647c478bd9Sstevel@tonic-gate
16657c478bd9Sstevel@tonic-gate/*
16667c478bd9Sstevel@tonic-gate * Transfer data to and from user space -
16677c478bd9Sstevel@tonic-gate * Note that these routines can cause faults
16687c478bd9Sstevel@tonic-gate * It is assumed that the kernel has nothing at
16697c478bd9Sstevel@tonic-gate * less than KERNELBASE in the virtual address space.
16707c478bd9Sstevel@tonic-gate *
16717c478bd9Sstevel@tonic-gate * Note that copyin(9F) and copyout(9F) are part of the
16727c478bd9Sstevel@tonic-gate * DDI/DKI which specifies that they return '-1' on "errors."
16737c478bd9Sstevel@tonic-gate *
16747c478bd9Sstevel@tonic-gate * Sigh.
16757c478bd9Sstevel@tonic-gate *
16767c478bd9Sstevel@tonic-gate * So there's two extremely similar routines - xcopyin() and xcopyout()
16777c478bd9Sstevel@tonic-gate * which return the errno that we've faithfully computed.  This
16787c478bd9Sstevel@tonic-gate * allows other callers (e.g. uiomove(9F)) to work correctly.
16797c478bd9Sstevel@tonic-gate * Given that these are used pretty heavily, we expand the calling
16807c478bd9Sstevel@tonic-gate * sequences inline for all flavours (rather than making wrappers).
16817c478bd9Sstevel@tonic-gate *
16827c478bd9Sstevel@tonic-gate * There are also stub routines for xcopyout_little and xcopyin_little,
16837c478bd9Sstevel@tonic-gate * which currently are intended to handle requests of <= 16 bytes from
16847c478bd9Sstevel@tonic-gate * do_unaligned. Future enhancement to make them handle 8k pages efficiently
16857c478bd9Sstevel@tonic-gate * is left as an exercise...
16867c478bd9Sstevel@tonic-gate */
16877c478bd9Sstevel@tonic-gate
16887c478bd9Sstevel@tonic-gate/*
16897c478bd9Sstevel@tonic-gate * Copy user data to kernel space (copyOP/xcopyOP/copyOP_noerr)
1690*5d9d9091SRichard Lowe *
16917c478bd9Sstevel@tonic-gate * General theory of operation:
16927c478bd9Sstevel@tonic-gate *
16937c478bd9Sstevel@tonic-gate * The only difference between copy{in,out} and
16947c478bd9Sstevel@tonic-gate * xcopy{in,out} is in the error handling routine they invoke
16957c478bd9Sstevel@tonic-gate * when a memory access error occurs. xcopyOP returns the errno
16967c478bd9Sstevel@tonic-gate * while copyOP returns -1 (see above). copy{in,out}_noerr set
16977c478bd9Sstevel@tonic-gate * a special flag (by oring the TRAMP_FLAG into the fault handler address)
16987c478bd9Sstevel@tonic-gate * if they are called with a fault handler already in place. That flag
16997c478bd9Sstevel@tonic-gate * causes the default handlers to trampoline to the previous handler
17007c478bd9Sstevel@tonic-gate * upon an error.
17017c478bd9Sstevel@tonic-gate *
17027c478bd9Sstevel@tonic-gate * None of the copyops routines grab a window until it's decided that
17037c478bd9Sstevel@tonic-gate * we need to do a HW block copy operation. This saves a window
17047c478bd9Sstevel@tonic-gate * spill/fill when we're called during socket ops. The typical IO
17057c478bd9Sstevel@tonic-gate * path won't cause spill/fill traps.
17067c478bd9Sstevel@tonic-gate *
17077c478bd9Sstevel@tonic-gate * This code uses a set of 4 limits for the maximum size that will
17087c478bd9Sstevel@tonic-gate * be copied given a particular input/output address alignment.
17097c478bd9Sstevel@tonic-gate * If the value for a particular limit is zero, the copy will be performed
17107c478bd9Sstevel@tonic-gate * by the plain copy loops rather than FPBLK.
17117c478bd9Sstevel@tonic-gate *
17127c478bd9Sstevel@tonic-gate * See the description of bcopy above for more details of the
17137c478bd9Sstevel@tonic-gate * data copying algorithm and the default limits.
17147c478bd9Sstevel@tonic-gate *
17157c478bd9Sstevel@tonic-gate */
17167c478bd9Sstevel@tonic-gate
17177c478bd9Sstevel@tonic-gate/*
17187c478bd9Sstevel@tonic-gate * Copy kernel data to user space (copyout/xcopyout/xcopyout_little).
17197c478bd9Sstevel@tonic-gate */
17207c478bd9Sstevel@tonic-gate
17217c478bd9Sstevel@tonic-gate/*
17227c478bd9Sstevel@tonic-gate * We save the arguments in the following registers in case of a fault:
17237c478bd9Sstevel@tonic-gate *	kaddr - %l1
17247c478bd9Sstevel@tonic-gate *	uaddr - %l2
17257c478bd9Sstevel@tonic-gate *	count - %l3
17267c478bd9Sstevel@tonic-gate */
17277c478bd9Sstevel@tonic-gate#define SAVE_SRC	%l1
17287c478bd9Sstevel@tonic-gate#define SAVE_DST	%l2
17297c478bd9Sstevel@tonic-gate#define SAVE_COUNT	%l3
17307c478bd9Sstevel@tonic-gate
17317c478bd9Sstevel@tonic-gate#define SM_SAVE_SRC		%g4
17327c478bd9Sstevel@tonic-gate#define SM_SAVE_DST		%g5
17337c478bd9Sstevel@tonic-gate#define SM_SAVE_COUNT		%o5
17347c478bd9Sstevel@tonic-gate#define ERRNO		%l5
17357c478bd9Sstevel@tonic-gate
17367c478bd9Sstevel@tonic-gate
17377c478bd9Sstevel@tonic-gate#define REAL_LOFAULT	%l4
17387c478bd9Sstevel@tonic-gate/*
17397c478bd9Sstevel@tonic-gate * Generic copyio fault handler.  This is the first line of defense when a
17407c478bd9Sstevel@tonic-gate * fault occurs in (x)copyin/(x)copyout.  In order for this to function
17417c478bd9Sstevel@tonic-gate * properly, the value of the 'real' lofault handler should be in REAL_LOFAULT.
17427c478bd9Sstevel@tonic-gate * This allows us to share common code for all the flavors of the copy
17437c478bd9Sstevel@tonic-gate * operations, including the _noerr versions.
17447c478bd9Sstevel@tonic-gate *
17457c478bd9Sstevel@tonic-gate * Note that this function will restore the original input parameters before
17467c478bd9Sstevel@tonic-gate * calling REAL_LOFAULT.  So the real handler can vector to the appropriate
17477c478bd9Sstevel@tonic-gate * member of the t_copyop structure, if needed.
17487c478bd9Sstevel@tonic-gate */
17497c478bd9Sstevel@tonic-gate	ENTRY(copyio_fault)
17507c478bd9Sstevel@tonic-gate	membar	#Sync
17517c478bd9Sstevel@tonic-gate	mov	%g1,ERRNO			! save errno in ERRNO
17527c478bd9Sstevel@tonic-gate	btst	FPUSED_FLAG, %l6
17537c478bd9Sstevel@tonic-gate	bz	%ncc, 1f
17547c478bd9Sstevel@tonic-gate	  nop
17557c478bd9Sstevel@tonic-gate
17567c478bd9Sstevel@tonic-gate	ldx	[%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
17577c478bd9Sstevel@tonic-gate	wr	%o2, 0, %gsr    	! restore gsr
17587c478bd9Sstevel@tonic-gate
17597c478bd9Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
17607c478bd9Sstevel@tonic-gate	btst	FPRS_FEF, %o3
17617c478bd9Sstevel@tonic-gate	bz,pt	%icc, 4f
17627c478bd9Sstevel@tonic-gate	  nop
17637c478bd9Sstevel@tonic-gate
17647c478bd9Sstevel@tonic-gate	BLD_FPQ2Q4_FROMSTACK(%o2)
17657c478bd9Sstevel@tonic-gate
17667c478bd9Sstevel@tonic-gate	ba,pt	%ncc, 1f
17677c478bd9Sstevel@tonic-gate	  wr	%o3, 0, %fprs   	! restore fprs
17687c478bd9Sstevel@tonic-gate
17697c478bd9Sstevel@tonic-gate4:
17707c478bd9Sstevel@tonic-gate	FZEROQ2Q4
17717c478bd9Sstevel@tonic-gate	wr	%o3, 0, %fprs   	! restore fprs
17727c478bd9Sstevel@tonic-gate
17737c478bd9Sstevel@tonic-gate1:
17747c478bd9Sstevel@tonic-gate	andn	%l6, FPUSED_FLAG, %l6
17757c478bd9Sstevel@tonic-gate	membar	#Sync
17767c478bd9Sstevel@tonic-gate	stn	%l6, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
17777c478bd9Sstevel@tonic-gate	FP_ALLOWMIGRATE(5, 6)
17787c478bd9Sstevel@tonic-gate
17797c478bd9Sstevel@tonic-gate	mov	SAVE_SRC, %i0
17807c478bd9Sstevel@tonic-gate	mov	SAVE_DST, %i1
17817c478bd9Sstevel@tonic-gate	jmp	REAL_LOFAULT
17827c478bd9Sstevel@tonic-gate	  mov	SAVE_COUNT, %i2
17837c478bd9Sstevel@tonic-gate
17847c478bd9Sstevel@tonic-gate	SET_SIZE(copyio_fault)
17857c478bd9Sstevel@tonic-gate
17867c478bd9Sstevel@tonic-gate
17877c478bd9Sstevel@tonic-gate	ENTRY(copyout)
17887c478bd9Sstevel@tonic-gate
17897c478bd9Sstevel@tonic-gate	cmp	%o2, VIS_COPY_THRESHOLD		! check for leaf rtn case
17907c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyout_small		! go to larger cases
17917c478bd9Sstevel@tonic-gate	  xor	%o0, %o1, %o3			! are src, dst alignable?
17927c478bd9Sstevel@tonic-gate	btst	7, %o3				!
17937c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .copyout_8		! check for longword alignment
17947c478bd9Sstevel@tonic-gate	  nop
1795*5d9d9091SRichard Lowe	btst	1, %o3				!
17967c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .copyout_2		! check for half-word
17977c478bd9Sstevel@tonic-gate	  nop
17987c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_1), %o3	! Check copy limit
17997c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_1)], %o3
18007c478bd9Sstevel@tonic-gate	tst	%o3
18017c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyout_small		! if zero, disable HW copy
18027c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
18037c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyout_small		! go to small copy
18047c478bd9Sstevel@tonic-gate	  nop
18057c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyout_more		! otherwise go to large copy
18067c478bd9Sstevel@tonic-gate	  nop
18077c478bd9Sstevel@tonic-gate.copyout_2:
18087c478bd9Sstevel@tonic-gate	btst	3, %o3				!
18097c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .copyout_4		! check for word alignment
18107c478bd9Sstevel@tonic-gate	  nop
18117c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_2), %o3	! Check copy limit
18127c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_2)], %o3
18137c478bd9Sstevel@tonic-gate	tst	%o3
18147c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyout_small		! if zero, disable HW copy
18157c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
18167c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyout_small		! go to small copy
18177c478bd9Sstevel@tonic-gate	  nop
18187c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyout_more		! otherwise go to large copy
18197c478bd9Sstevel@tonic-gate	  nop
18207c478bd9Sstevel@tonic-gate.copyout_4:
18217c478bd9Sstevel@tonic-gate	! already checked longword, must be word aligned
18227c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_4), %o3	! Check copy limit
18237c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_4)], %o3
18247c478bd9Sstevel@tonic-gate	tst	%o3
18257c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyout_small		! if zero, disable HW copy
18267c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
18277c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyout_small		! go to small copy
18287c478bd9Sstevel@tonic-gate	  nop
18297c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyout_more		! otherwise go to large copy
18307c478bd9Sstevel@tonic-gate	  nop
18317c478bd9Sstevel@tonic-gate.copyout_8:
18327c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_8), %o3	! Check copy limit
18337c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_8)], %o3
18347c478bd9Sstevel@tonic-gate	tst	%o3
18357c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyout_small		! if zero, disable HW copy
18367c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
18377c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyout_small		! go to small copy
18387c478bd9Sstevel@tonic-gate	  nop
18397c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyout_more		! otherwise go to large copy
18407c478bd9Sstevel@tonic-gate	  nop
18417c478bd9Sstevel@tonic-gate
18427c478bd9Sstevel@tonic-gate	.align	16
18437c478bd9Sstevel@tonic-gate	nop				! instruction alignment
18447c478bd9Sstevel@tonic-gate					! see discussion at start of file
18457c478bd9Sstevel@tonic-gate.copyout_small:
18467c478bd9Sstevel@tonic-gate	sethi	%hi(.sm_copyout_err), %o5	! .sm_copyout_err is lofault
18477c478bd9Sstevel@tonic-gate	or	%o5, %lo(.sm_copyout_err), %o5
18487c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o4	! save existing handler
18497c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
18507c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! set t_lofault
18517c478bd9Sstevel@tonic-gate.sm_do_copyout:
18527c478bd9Sstevel@tonic-gate	mov	%o0, SM_SAVE_SRC
18537c478bd9Sstevel@tonic-gate	mov	%o1, SM_SAVE_DST
18547c478bd9Sstevel@tonic-gate	cmp	%o2, SHORTCOPY		! check for really short case
18557c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .co_sm_left	!
18567c478bd9Sstevel@tonic-gate	  mov	%o2, SM_SAVE_COUNT
18577c478bd9Sstevel@tonic-gate	cmp	%o2, CHKSIZE		! check for medium length cases
18587c478bd9Sstevel@tonic-gate	bgu,pn	%ncc, .co_med		!
18597c478bd9Sstevel@tonic-gate	  or	%o0, %o1, %o3		! prepare alignment check
18607c478bd9Sstevel@tonic-gate	andcc	%o3, 0x3, %g0		! test for alignment
18617c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_word	! branch to word aligned case
18627c478bd9Sstevel@tonic-gate.co_sm_movebytes:
18637c478bd9Sstevel@tonic-gate	  sub	%o2, 3, %o2		! adjust count to allow cc zero test
18647c478bd9Sstevel@tonic-gate.co_sm_notalign4:
18657c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3		! read byte
18667c478bd9Sstevel@tonic-gate	subcc	%o2, 4, %o2		! reduce count by 4
18677c478bd9Sstevel@tonic-gate	stba	%o3, [%o1]ASI_USER	! write byte
18687c478bd9Sstevel@tonic-gate	inc	%o1			! advance DST by 1
18697c478bd9Sstevel@tonic-gate	ldub	[%o0 + 1], %o3		! repeat for a total of 4 bytes
18707c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! advance SRC by 4
18717c478bd9Sstevel@tonic-gate	stba	%o3, [%o1]ASI_USER
18727c478bd9Sstevel@tonic-gate	inc	%o1			! advance DST by 1
18737c478bd9Sstevel@tonic-gate	ldub	[%o0 - 2], %o3
18747c478bd9Sstevel@tonic-gate	stba	%o3, [%o1]ASI_USER
18757c478bd9Sstevel@tonic-gate	inc	%o1			! advance DST by 1
18767c478bd9Sstevel@tonic-gate	ldub	[%o0 - 1], %o3
18777c478bd9Sstevel@tonic-gate	stba	%o3, [%o1]ASI_USER
18787c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .co_sm_notalign4	! loop til 3 or fewer bytes remain
18797c478bd9Sstevel@tonic-gate	  inc	%o1			! advance DST by 1
18807c478bd9Sstevel@tonic-gate	add	%o2, 3, %o2		! restore count
18817c478bd9Sstevel@tonic-gate.co_sm_left:
18827c478bd9Sstevel@tonic-gate	tst	%o2
18837c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_exit	! check for zero length
18847c478bd9Sstevel@tonic-gate	  nop
18857c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3		! load one byte
18867c478bd9Sstevel@tonic-gate	deccc	%o2			! reduce count for cc test
18877c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_exit
18887c478bd9Sstevel@tonic-gate	  stba	%o3,[%o1]ASI_USER	! store one byte
18897c478bd9Sstevel@tonic-gate	ldub	[%o0 + 1], %o3		! load second byte
18907c478bd9Sstevel@tonic-gate	deccc	%o2
18917c478bd9Sstevel@tonic-gate	inc	%o1
18927c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_exit
18937c478bd9Sstevel@tonic-gate	  stba	%o3,[%o1]ASI_USER	! store second byte
18947c478bd9Sstevel@tonic-gate	ldub	[%o0 + 2], %o3		! load third byte
18957c478bd9Sstevel@tonic-gate	inc	%o1
18967c478bd9Sstevel@tonic-gate	stba	%o3,[%o1]ASI_USER	! store third byte
18977c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
18987c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
18997c478bd9Sstevel@tonic-gate	retl
19007c478bd9Sstevel@tonic-gate	  mov	%g0, %o0		! return 0
19017c478bd9Sstevel@tonic-gate	.align	16
19027c478bd9Sstevel@tonic-gate.co_sm_words:
19037c478bd9Sstevel@tonic-gate	lduw	[%o0], %o3		! read word
19047c478bd9Sstevel@tonic-gate.co_sm_wordx:
19057c478bd9Sstevel@tonic-gate	subcc	%o2, 8, %o2		! update count
19067c478bd9Sstevel@tonic-gate	stwa	%o3, [%o1]ASI_USER	! write word
19077c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0		! update SRC
19087c478bd9Sstevel@tonic-gate	lduw	[%o0 - 4], %o3		! read word
19097c478bd9Sstevel@tonic-gate	add	%o1, 4, %o1		! update DST
19107c478bd9Sstevel@tonic-gate	stwa	%o3, [%o1]ASI_USER	! write word
19117c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .co_sm_words	! loop til done
19127c478bd9Sstevel@tonic-gate	  add	%o1, 4, %o1		! update DST
19137c478bd9Sstevel@tonic-gate	addcc	%o2, 7, %o2		! restore count
19147c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_exit
19157c478bd9Sstevel@tonic-gate	  nop
19167c478bd9Sstevel@tonic-gate	deccc	%o2
19177c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_byte
19187c478bd9Sstevel@tonic-gate.co_sm_half:
19197c478bd9Sstevel@tonic-gate	  subcc	%o2, 2, %o2		! reduce count by 2
19207c478bd9Sstevel@tonic-gate	lduh	[%o0], %o3		! read half word
19217c478bd9Sstevel@tonic-gate	add	%o0, 2, %o0		! advance SRC by 2
19227c478bd9Sstevel@tonic-gate	stha	%o3, [%o1]ASI_USER	! write half word
19237c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .co_sm_half	! loop til done
19247c478bd9Sstevel@tonic-gate	  add	%o1, 2, %o1		! advance DST by 2
19257c478bd9Sstevel@tonic-gate	addcc	%o2, 1, %o2		! restore count
19267c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_exit
19277c478bd9Sstevel@tonic-gate	  nop
19287c478bd9Sstevel@tonic-gate.co_sm_byte:
19297c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3
19307c478bd9Sstevel@tonic-gate	stba	%o3, [%o1]ASI_USER
19317c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
19327c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
19337c478bd9Sstevel@tonic-gate	retl
19347c478bd9Sstevel@tonic-gate	  mov	%g0, %o0		! return 0
19357c478bd9Sstevel@tonic-gate	.align 16
19367c478bd9Sstevel@tonic-gate.co_sm_word:
19377c478bd9Sstevel@tonic-gate	subcc	%o2, 4, %o2		! update count
19387c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .co_sm_wordx
19397c478bd9Sstevel@tonic-gate	  lduw	[%o0], %o3		! read word
19407c478bd9Sstevel@tonic-gate	addcc	%o2, 3, %o2		! restore count
19417c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_exit
19427c478bd9Sstevel@tonic-gate	  stwa	%o3, [%o1]ASI_USER	! write word
19437c478bd9Sstevel@tonic-gate	deccc	%o2			! reduce count for cc test
19447c478bd9Sstevel@tonic-gate	ldub	[%o0 + 4], %o3		! load one byte
19457c478bd9Sstevel@tonic-gate	add	%o1, 4, %o1
19467c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_exit
19477c478bd9Sstevel@tonic-gate	  stba	%o3, [%o1]ASI_USER	! store one byte
19487c478bd9Sstevel@tonic-gate	ldub	[%o0 + 5], %o3		! load second byte
19497c478bd9Sstevel@tonic-gate	deccc	%o2
19507c478bd9Sstevel@tonic-gate	inc	%o1
19517c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_exit
19527c478bd9Sstevel@tonic-gate	  stba	%o3, [%o1]ASI_USER	! store second byte
19537c478bd9Sstevel@tonic-gate	ldub	[%o0 + 6], %o3		! load third byte
19547c478bd9Sstevel@tonic-gate	inc	%o1
19557c478bd9Sstevel@tonic-gate	stba	%o3, [%o1]ASI_USER	! store third byte
19567c478bd9Sstevel@tonic-gate.co_sm_exit:
19577c478bd9Sstevel@tonic-gate	  membar	#Sync				! sync error barrier
19587c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
19597c478bd9Sstevel@tonic-gate	retl
19607c478bd9Sstevel@tonic-gate	  mov	%g0, %o0		! return 0
19617c478bd9Sstevel@tonic-gate
19627c478bd9Sstevel@tonic-gate	.align 16
19637c478bd9Sstevel@tonic-gate.co_med:
19647c478bd9Sstevel@tonic-gate	xor	%o0, %o1, %o3		! setup alignment check
19657c478bd9Sstevel@tonic-gate	btst	1, %o3
19667c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .co_sm_movebytes	! unaligned
19677c478bd9Sstevel@tonic-gate	  nop
19687c478bd9Sstevel@tonic-gate	btst	3, %o3
19697c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .co_med_half	! halfword aligned
19707c478bd9Sstevel@tonic-gate	  nop
19717c478bd9Sstevel@tonic-gate	btst	7, %o3
19727c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .co_med_word	! word aligned
19737c478bd9Sstevel@tonic-gate	  nop
19747c478bd9Sstevel@tonic-gate.co_med_long:
19757c478bd9Sstevel@tonic-gate	btst	3, %o0			! check for
19767c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_med_long1	! word alignment
19777c478bd9Sstevel@tonic-gate	  nop
19787c478bd9Sstevel@tonic-gate.co_med_long0:
19797c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3		! load one byte
19807c478bd9Sstevel@tonic-gate	inc	%o0
19817c478bd9Sstevel@tonic-gate	stba	%o3,[%o1]ASI_USER	! store byte
19827c478bd9Sstevel@tonic-gate	inc	%o1
19837c478bd9Sstevel@tonic-gate	btst	3, %o0
19847c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .co_med_long0
19857c478bd9Sstevel@tonic-gate	  dec	%o2
19867c478bd9Sstevel@tonic-gate.co_med_long1:			! word aligned
19877c478bd9Sstevel@tonic-gate	btst	7, %o0			! check for long word
19887c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_med_long2
19897c478bd9Sstevel@tonic-gate	  nop
19907c478bd9Sstevel@tonic-gate	lduw	[%o0], %o3		! load word
19917c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! advance SRC by 4
19927c478bd9Sstevel@tonic-gate	stwa	%o3, [%o1]ASI_USER	! store word
19937c478bd9Sstevel@tonic-gate	add	%o1, 4, %o1		! advance DST by 4
19947c478bd9Sstevel@tonic-gate	sub	%o2, 4, %o2		! reduce count by 4
19957c478bd9Sstevel@tonic-gate!
19967c478bd9Sstevel@tonic-gate!  Now long word aligned and have at least 32 bytes to move
19977c478bd9Sstevel@tonic-gate!
19987c478bd9Sstevel@tonic-gate.co_med_long2:
19997c478bd9Sstevel@tonic-gate	sub	%o2, 31, %o2		! adjust count to allow cc zero test
20007c478bd9Sstevel@tonic-gate	sub	%o1, 8, %o1		! adjust pointer to allow store in
20017c478bd9Sstevel@tonic-gate					! branch delay slot instead of add
20027c478bd9Sstevel@tonic-gate.co_med_lmove:
20037c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1		! advance DST by 8
20047c478bd9Sstevel@tonic-gate	ldx	[%o0], %o3		! read long word
20057c478bd9Sstevel@tonic-gate	subcc	%o2, 32, %o2		! reduce count by 32
20067c478bd9Sstevel@tonic-gate	stxa	%o3, [%o1]ASI_USER	! write long word
20077c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1		! advance DST by 8
20087c478bd9Sstevel@tonic-gate	ldx	[%o0 + 8], %o3		! repeat for a total for 4 long words
20097c478bd9Sstevel@tonic-gate	add	%o0, 32, %o0		! advance SRC by 32
20107c478bd9Sstevel@tonic-gate	stxa	%o3, [%o1]ASI_USER
20117c478bd9Sstevel@tonic-gate	ldx	[%o0 - 16], %o3
20127c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1		! advance DST by 8
20137c478bd9Sstevel@tonic-gate	stxa	%o3, [%o1]ASI_USER
20147c478bd9Sstevel@tonic-gate	ldx	[%o0 - 8], %o3
20157c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1		! advance DST by 8
20167c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .co_med_lmove	! loop til 31 or fewer bytes left
20177c478bd9Sstevel@tonic-gate	  stxa	%o3, [%o1]ASI_USER
20187c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1		! advance DST by 8
20197c478bd9Sstevel@tonic-gate	addcc	%o2, 24, %o2		! restore count to long word offset
20207c478bd9Sstevel@tonic-gate	ble,pt	%ncc, .co_med_lextra	! check for more long words to move
20217c478bd9Sstevel@tonic-gate	  nop
20227c478bd9Sstevel@tonic-gate.co_med_lword:
20237c478bd9Sstevel@tonic-gate	ldx	[%o0], %o3		! read long word
20247c478bd9Sstevel@tonic-gate	subcc	%o2, 8, %o2		! reduce count by 8
20257c478bd9Sstevel@tonic-gate	stxa	%o3, [%o1]ASI_USER	! write long word
20267c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0		! advance SRC by 8
20277c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .co_med_lword	! loop til 7 or fewer bytes left
20287c478bd9Sstevel@tonic-gate	  add	%o1, 8, %o1		! advance DST by 8
20297c478bd9Sstevel@tonic-gate.co_med_lextra:
20307c478bd9Sstevel@tonic-gate	addcc	%o2, 7, %o2		! restore rest of count
20317c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_exit	! if zero, then done
20327c478bd9Sstevel@tonic-gate	  deccc	%o2
20337c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_byte
20347c478bd9Sstevel@tonic-gate	  nop
20357c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .co_sm_half
20367c478bd9Sstevel@tonic-gate	  nop
20377c478bd9Sstevel@tonic-gate
20387c478bd9Sstevel@tonic-gate	.align 16
20397c478bd9Sstevel@tonic-gate	nop				! instruction alignment
20407c478bd9Sstevel@tonic-gate					! see discussion at start of file
20417c478bd9Sstevel@tonic-gate.co_med_word:
20427c478bd9Sstevel@tonic-gate	btst	3, %o0			! check for
20437c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_med_word1	! word alignment
20447c478bd9Sstevel@tonic-gate	  nop
20457c478bd9Sstevel@tonic-gate.co_med_word0:
20467c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3		! load one byte
20477c478bd9Sstevel@tonic-gate	inc	%o0
20487c478bd9Sstevel@tonic-gate	stba	%o3,[%o1]ASI_USER	! store byte
20497c478bd9Sstevel@tonic-gate	inc	%o1
20507c478bd9Sstevel@tonic-gate	btst	3, %o0
20517c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .co_med_word0
20527c478bd9Sstevel@tonic-gate	  dec	%o2
20537c478bd9Sstevel@tonic-gate!
20547c478bd9Sstevel@tonic-gate!  Now word aligned and have at least 36 bytes to move
20557c478bd9Sstevel@tonic-gate!
20567c478bd9Sstevel@tonic-gate.co_med_word1:
20577c478bd9Sstevel@tonic-gate	sub	%o2, 15, %o2		! adjust count to allow cc zero test
20587c478bd9Sstevel@tonic-gate.co_med_wmove:
20597c478bd9Sstevel@tonic-gate	lduw	[%o0], %o3		! read word
20607c478bd9Sstevel@tonic-gate	subcc	%o2, 16, %o2		! reduce count by 16
20617c478bd9Sstevel@tonic-gate	stwa	%o3, [%o1]ASI_USER	! write word
20627c478bd9Sstevel@tonic-gate	add	%o1, 4, %o1		! advance DST by 4
20637c478bd9Sstevel@tonic-gate	lduw	[%o0 + 4], %o3		! repeat for a total for 4 words
20647c478bd9Sstevel@tonic-gate	add	%o0, 16, %o0		! advance SRC by 16
20657c478bd9Sstevel@tonic-gate	stwa	%o3, [%o1]ASI_USER
20667c478bd9Sstevel@tonic-gate	add	%o1, 4, %o1		! advance DST by 4
20677c478bd9Sstevel@tonic-gate	lduw	[%o0 - 8], %o3
20687c478bd9Sstevel@tonic-gate	stwa	%o3, [%o1]ASI_USER
20697c478bd9Sstevel@tonic-gate	add	%o1, 4, %o1		! advance DST by 4
20707c478bd9Sstevel@tonic-gate	lduw	[%o0 - 4], %o3
20717c478bd9Sstevel@tonic-gate	stwa	%o3, [%o1]ASI_USER
20727c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .co_med_wmove	! loop til 15 or fewer bytes left
20737c478bd9Sstevel@tonic-gate	  add	%o1, 4, %o1		! advance DST by 4
20747c478bd9Sstevel@tonic-gate	addcc	%o2, 12, %o2		! restore count to word offset
20757c478bd9Sstevel@tonic-gate	ble,pt	%ncc, .co_med_wextra	! check for more words to move
20767c478bd9Sstevel@tonic-gate	  nop
20777c478bd9Sstevel@tonic-gate.co_med_word2:
20787c478bd9Sstevel@tonic-gate	lduw	[%o0], %o3		! read word
20797c478bd9Sstevel@tonic-gate	subcc	%o2, 4, %o2		! reduce count by 4
20807c478bd9Sstevel@tonic-gate	stwa	%o3, [%o1]ASI_USER	! write word
20817c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! advance SRC by 4
20827c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .co_med_word2	! loop til 3 or fewer bytes left
20837c478bd9Sstevel@tonic-gate	  add	%o1, 4, %o1		! advance DST by 4
20847c478bd9Sstevel@tonic-gate.co_med_wextra:
20857c478bd9Sstevel@tonic-gate	addcc	%o2, 3, %o2		! restore rest of count
20867c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_exit	! if zero, then done
20877c478bd9Sstevel@tonic-gate	  deccc	%o2
20887c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_byte
20897c478bd9Sstevel@tonic-gate	  nop
20907c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .co_sm_half
20917c478bd9Sstevel@tonic-gate	  nop
20927c478bd9Sstevel@tonic-gate
20937c478bd9Sstevel@tonic-gate	.align 16
20947c478bd9Sstevel@tonic-gate	nop				! instruction alignment
20957c478bd9Sstevel@tonic-gate	nop				! see discussion at start of file
20967c478bd9Sstevel@tonic-gate	nop
20977c478bd9Sstevel@tonic-gate.co_med_half:
20987c478bd9Sstevel@tonic-gate	btst	1, %o0			! check for
20997c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_med_half1	! half word alignment
21007c478bd9Sstevel@tonic-gate	  nop
21017c478bd9Sstevel@tonic-gate	ldub	[%o0], %o3		! load one byte
21027c478bd9Sstevel@tonic-gate	inc	%o0
21037c478bd9Sstevel@tonic-gate	stba	%o3,[%o1]ASI_USER	! store byte
21047c478bd9Sstevel@tonic-gate	inc	%o1
21057c478bd9Sstevel@tonic-gate	dec	%o2
21067c478bd9Sstevel@tonic-gate!
21077c478bd9Sstevel@tonic-gate!  Now half word aligned and have at least 38 bytes to move
21087c478bd9Sstevel@tonic-gate!
21097c478bd9Sstevel@tonic-gate.co_med_half1:
21107c478bd9Sstevel@tonic-gate	sub	%o2, 7, %o2		! adjust count to allow cc zero test
21117c478bd9Sstevel@tonic-gate.co_med_hmove:
21127c478bd9Sstevel@tonic-gate	lduh	[%o0], %o3		! read half word
21137c478bd9Sstevel@tonic-gate	subcc	%o2, 8, %o2		! reduce count by 8
21147c478bd9Sstevel@tonic-gate	stha	%o3, [%o1]ASI_USER	! write half word
21157c478bd9Sstevel@tonic-gate	add	%o1, 2, %o1		! advance DST by 2
21167c478bd9Sstevel@tonic-gate	lduh	[%o0 + 2], %o3		! repeat for a total for 4 halfwords
21177c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0		! advance SRC by 8
21187c478bd9Sstevel@tonic-gate	stha	%o3, [%o1]ASI_USER
21197c478bd9Sstevel@tonic-gate	add	%o1, 2, %o1		! advance DST by 2
21207c478bd9Sstevel@tonic-gate	lduh	[%o0 - 4], %o3
21217c478bd9Sstevel@tonic-gate	stha	%o3, [%o1]ASI_USER
21227c478bd9Sstevel@tonic-gate	add	%o1, 2, %o1		! advance DST by 2
21237c478bd9Sstevel@tonic-gate	lduh	[%o0 - 2], %o3
21247c478bd9Sstevel@tonic-gate	stha	%o3, [%o1]ASI_USER
21257c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .co_med_hmove	! loop til 7 or fewer bytes left
21267c478bd9Sstevel@tonic-gate	  add	%o1, 2, %o1		! advance DST by 2
21277c478bd9Sstevel@tonic-gate	addcc	%o2, 7, %o2		! restore count
21287c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_exit
21297c478bd9Sstevel@tonic-gate	  deccc	%o2
21307c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .co_sm_byte
21317c478bd9Sstevel@tonic-gate	  nop
21327c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .co_sm_half
21337c478bd9Sstevel@tonic-gate	  nop
21347c478bd9Sstevel@tonic-gate
21357c478bd9Sstevel@tonic-gate/*
21367c478bd9Sstevel@tonic-gate * We got here because of a fault during short copyout.
21377c478bd9Sstevel@tonic-gate * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
21387c478bd9Sstevel@tonic-gate */
21397c478bd9Sstevel@tonic-gate.sm_copyout_err:
21407c478bd9Sstevel@tonic-gate	membar	#Sync
21417c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
21427c478bd9Sstevel@tonic-gate	mov	SM_SAVE_SRC, %o0
21437c478bd9Sstevel@tonic-gate	mov	SM_SAVE_DST, %o1
21447c478bd9Sstevel@tonic-gate	mov	SM_SAVE_COUNT, %o2
21457c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o3	! check for copyop handler
21467c478bd9Sstevel@tonic-gate	tst	%o3
21477c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 3f			! if not, return error
21487c478bd9Sstevel@tonic-gate	  nop
21497c478bd9Sstevel@tonic-gate	ldn	[%o3 + CP_COPYOUT], %o5		! if handler, invoke it with
21507c478bd9Sstevel@tonic-gate	jmp	%o5				! original arguments
21517c478bd9Sstevel@tonic-gate	  nop
21527c478bd9Sstevel@tonic-gate3:
21537c478bd9Sstevel@tonic-gate	retl
21547c478bd9Sstevel@tonic-gate	  or	%g0, -1, %o0		! return error value
21557c478bd9Sstevel@tonic-gate
21567c478bd9Sstevel@tonic-gate	SET_SIZE(copyout)
21577c478bd9Sstevel@tonic-gate
21587c478bd9Sstevel@tonic-gate/*
21597c478bd9Sstevel@tonic-gate * The _more entry points are not intended to be used directly by
21607c478bd9Sstevel@tonic-gate * any caller from outside this file.  They are provided to allow
21617c478bd9Sstevel@tonic-gate * profiling and dtrace of the portions of the copy code that uses
21627c478bd9Sstevel@tonic-gate * the floating point registers.
21637c478bd9Sstevel@tonic-gate * This entry is particularly important as DTRACE (at least as of
21647c478bd9Sstevel@tonic-gate * 4/2004) does not support leaf functions.
21657c478bd9Sstevel@tonic-gate */
21667c478bd9Sstevel@tonic-gate
21677c478bd9Sstevel@tonic-gate	ENTRY(copyout_more)
21687c478bd9Sstevel@tonic-gate.copyout_more:
21697c478bd9Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
21707c478bd9Sstevel@tonic-gate	set	.copyout_err, REAL_LOFAULT
21717c478bd9Sstevel@tonic-gate
21727c478bd9Sstevel@tonic-gate/*
21737c478bd9Sstevel@tonic-gate * Copy outs that reach here are larger than VIS_COPY_THRESHOLD bytes
21747c478bd9Sstevel@tonic-gate */
21757c478bd9Sstevel@tonic-gate.do_copyout:
21767c478bd9Sstevel@tonic-gate        set     copyio_fault, %l7		! .copyio_fault is lofault val
21777c478bd9Sstevel@tonic-gate
21787c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %l6	! save existing handler
21797c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
21807c478bd9Sstevel@tonic-gate	stn	%l7, [THREAD_REG + T_LOFAULT]	! set t_lofault
21817c478bd9Sstevel@tonic-gate
21827c478bd9Sstevel@tonic-gate	mov	%i0, SAVE_SRC
21837c478bd9Sstevel@tonic-gate	mov	%i1, SAVE_DST
21847c478bd9Sstevel@tonic-gate	mov	%i2, SAVE_COUNT
21857c478bd9Sstevel@tonic-gate
21867c478bd9Sstevel@tonic-gate	FP_NOMIGRATE(6, 7)
21877c478bd9Sstevel@tonic-gate
21887c478bd9Sstevel@tonic-gate	rd	%fprs, %o2		! check for unused fp
21897c478bd9Sstevel@tonic-gate	st	%o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
21907c478bd9Sstevel@tonic-gate	btst	FPRS_FEF, %o2
21917c478bd9Sstevel@tonic-gate	bz,a,pt	%icc, .do_blockcopyout
21927c478bd9Sstevel@tonic-gate	  wr	%g0, FPRS_FEF, %fprs
21937c478bd9Sstevel@tonic-gate
21947c478bd9Sstevel@tonic-gate	BST_FPQ2Q4_TOSTACK(%o2)
21957c478bd9Sstevel@tonic-gate
21967c478bd9Sstevel@tonic-gate.do_blockcopyout:
21977c478bd9Sstevel@tonic-gate	rd	%gsr, %o2
21987c478bd9Sstevel@tonic-gate	stx	%o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET]	! save gsr
21997c478bd9Sstevel@tonic-gate	or	%l6, FPUSED_FLAG, %l6
22007c478bd9Sstevel@tonic-gate
22017c478bd9Sstevel@tonic-gate	andcc	DST, VIS_BLOCKSIZE - 1, TMP
22027c478bd9Sstevel@tonic-gate	mov	ASI_USER, %asi
22037c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f
22047c478bd9Sstevel@tonic-gate	  neg	TMP
22057c478bd9Sstevel@tonic-gate	add	TMP, VIS_BLOCKSIZE, TMP
22067c478bd9Sstevel@tonic-gate
22077c478bd9Sstevel@tonic-gate	! TMP = bytes required to align DST on FP_BLOCK boundary
22087c478bd9Sstevel@tonic-gate	! Using SRC as a tmp here
22097c478bd9Sstevel@tonic-gate	cmp	TMP, 3
22107c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, 1f
22117c478bd9Sstevel@tonic-gate	  sub	CNT,TMP,CNT		! adjust main count
22127c478bd9Sstevel@tonic-gate	sub	TMP, 3, TMP		! adjust for end of loop test
22137c478bd9Sstevel@tonic-gate.co_blkalign:
22147c478bd9Sstevel@tonic-gate	ldub	[REALSRC], SRC		! move 4 bytes per loop iteration
22157c478bd9Sstevel@tonic-gate	stba	SRC, [DST]%asi
22167c478bd9Sstevel@tonic-gate	subcc	TMP, 4, TMP
22177c478bd9Sstevel@tonic-gate	ldub	[REALSRC + 1], SRC
22187c478bd9Sstevel@tonic-gate	add	REALSRC, 4, REALSRC
22197c478bd9Sstevel@tonic-gate	stba	SRC, [DST + 1]%asi
22207c478bd9Sstevel@tonic-gate	ldub	[REALSRC - 2], SRC
22217c478bd9Sstevel@tonic-gate	add	DST, 4, DST
22227c478bd9Sstevel@tonic-gate	stba	SRC, [DST - 2]%asi
22237c478bd9Sstevel@tonic-gate	ldub	[REALSRC - 1], SRC
22247c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, .co_blkalign
22257c478bd9Sstevel@tonic-gate	  stba	SRC, [DST - 1]%asi
22267c478bd9Sstevel@tonic-gate
22277c478bd9Sstevel@tonic-gate	addcc	TMP, 3, TMP		! restore count adjustment
22287c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f		! no bytes left?
22297c478bd9Sstevel@tonic-gate	  nop
22307c478bd9Sstevel@tonic-gate1:	ldub	[REALSRC], SRC
22317c478bd9Sstevel@tonic-gate	inc	REALSRC
22327c478bd9Sstevel@tonic-gate	inc	DST
22337c478bd9Sstevel@tonic-gate	deccc	TMP
22347c478bd9Sstevel@tonic-gate	bgu	%ncc, 1b
22357c478bd9Sstevel@tonic-gate	  stba	SRC, [DST - 1]%asi
22367c478bd9Sstevel@tonic-gate
22377c478bd9Sstevel@tonic-gate2:
22387c478bd9Sstevel@tonic-gate	andn	REALSRC, 0x7, SRC
22397c478bd9Sstevel@tonic-gate	alignaddr REALSRC, %g0, %g0
22407c478bd9Sstevel@tonic-gate
22417c478bd9Sstevel@tonic-gate	! SRC - 8-byte aligned
22427c478bd9Sstevel@tonic-gate	! DST - 64-byte aligned
22437c478bd9Sstevel@tonic-gate	prefetch [SRC], #one_read
22447c478bd9Sstevel@tonic-gate	prefetch [SRC + (1 * VIS_BLOCKSIZE)], #one_read
22457c478bd9Sstevel@tonic-gate	prefetch [SRC + (2 * VIS_BLOCKSIZE)], #one_read
22467c478bd9Sstevel@tonic-gate	prefetch [SRC + (3 * VIS_BLOCKSIZE)], #one_read
22477c478bd9Sstevel@tonic-gate	ldd	[SRC], %f16
22487c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4
22497c478bd9Sstevel@tonic-gate	prefetch [SRC + (4 * VIS_BLOCKSIZE)], #one_read
22507c478bd9Sstevel@tonic-gate#endif
22517c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x08], %f18
22527c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5
22537c478bd9Sstevel@tonic-gate	prefetch [SRC + (5 * VIS_BLOCKSIZE)], #one_read
22547c478bd9Sstevel@tonic-gate#endif
22557c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x10], %f20
22567c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6
22577c478bd9Sstevel@tonic-gate	prefetch [SRC + (6 * VIS_BLOCKSIZE)], #one_read
22587c478bd9Sstevel@tonic-gate#endif
22597c478bd9Sstevel@tonic-gate	faligndata %f16, %f18, %f48
22607c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x18], %f22
22617c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7
22627c478bd9Sstevel@tonic-gate	prefetch [SRC + (7 * VIS_BLOCKSIZE)], #one_read
22637c478bd9Sstevel@tonic-gate#endif
22647c478bd9Sstevel@tonic-gate	faligndata %f18, %f20, %f50
22657c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x20], %f24
22667c478bd9Sstevel@tonic-gate	faligndata %f20, %f22, %f52
22677c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x28], %f26
22687c478bd9Sstevel@tonic-gate	faligndata %f22, %f24, %f54
22697c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x30], %f28
22707c478bd9Sstevel@tonic-gate	faligndata %f24, %f26, %f56
22717c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x38], %f30
22727c478bd9Sstevel@tonic-gate	faligndata %f26, %f28, %f58
22737c478bd9Sstevel@tonic-gate	ldd	[SRC + VIS_BLOCKSIZE], %f16
22747c478bd9Sstevel@tonic-gate	sub	CNT, VIS_BLOCKSIZE, CNT
22757c478bd9Sstevel@tonic-gate	add	SRC, VIS_BLOCKSIZE, SRC
22767c478bd9Sstevel@tonic-gate	add	REALSRC, VIS_BLOCKSIZE, REALSRC
22777c478bd9Sstevel@tonic-gate	ba,a,pt	%ncc, 1f
22787c478bd9Sstevel@tonic-gate	  nop
22797c478bd9Sstevel@tonic-gate	.align	16
22807c478bd9Sstevel@tonic-gate1:
22817c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x08], %f18
22827c478bd9Sstevel@tonic-gate	faligndata %f28, %f30, %f60
22837c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x10], %f20
22847c478bd9Sstevel@tonic-gate	faligndata %f30, %f16, %f62
22857c478bd9Sstevel@tonic-gate	stda	%f48, [DST]ASI_BLK_AIUS
22867c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x18], %f22
22877c478bd9Sstevel@tonic-gate	faligndata %f16, %f18, %f48
22887c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x20], %f24
22897c478bd9Sstevel@tonic-gate	faligndata %f18, %f20, %f50
22907c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x28], %f26
22917c478bd9Sstevel@tonic-gate	faligndata %f20, %f22, %f52
22927c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x30], %f28
22937c478bd9Sstevel@tonic-gate	faligndata %f22, %f24, %f54
22947c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x38], %f30
22957c478bd9Sstevel@tonic-gate	faligndata %f24, %f26, %f56
22967c478bd9Sstevel@tonic-gate	sub	CNT, VIS_BLOCKSIZE, CNT
22977c478bd9Sstevel@tonic-gate	ldd	[SRC + VIS_BLOCKSIZE], %f16
22987c478bd9Sstevel@tonic-gate	faligndata %f26, %f28, %f58
22997c478bd9Sstevel@tonic-gate	prefetch [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8], #one_read
23007c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
23017c478bd9Sstevel@tonic-gate	prefetch [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)], #one_read
23027c478bd9Sstevel@tonic-gate	add	REALSRC, VIS_BLOCKSIZE, REALSRC
23037c478bd9Sstevel@tonic-gate	cmp	CNT, VIS_BLOCKSIZE + 8
23047c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, 1b
23057c478bd9Sstevel@tonic-gate	  add	SRC, VIS_BLOCKSIZE, SRC
23067c478bd9Sstevel@tonic-gate
23077c478bd9Sstevel@tonic-gate	! only if REALSRC & 0x7 is 0
23087c478bd9Sstevel@tonic-gate	cmp	CNT, VIS_BLOCKSIZE
23097c478bd9Sstevel@tonic-gate	bne	%ncc, 3f
23107c478bd9Sstevel@tonic-gate	  andcc	REALSRC, 0x7, %g0
23117c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f
23127c478bd9Sstevel@tonic-gate	  nop
2313*5d9d9091SRichard Lowe3:
23147c478bd9Sstevel@tonic-gate	faligndata %f28, %f30, %f60
23157c478bd9Sstevel@tonic-gate	faligndata %f30, %f16, %f62
23167c478bd9Sstevel@tonic-gate	stda	%f48, [DST]ASI_BLK_AIUS
23177c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
23187c478bd9Sstevel@tonic-gate	ba,pt	%ncc, 3f
23197c478bd9Sstevel@tonic-gate	  nop
23207c478bd9Sstevel@tonic-gate2:
23217c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x08], %f18
23227c478bd9Sstevel@tonic-gate	fsrc1	%f28, %f60
23237c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x10], %f20
23247c478bd9Sstevel@tonic-gate	fsrc1	%f30, %f62
23257c478bd9Sstevel@tonic-gate	stda	%f48, [DST]ASI_BLK_AIUS
23267c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x18], %f22
23277c478bd9Sstevel@tonic-gate	fsrc1	%f16, %f48
23287c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x20], %f24
23297c478bd9Sstevel@tonic-gate	fsrc1	%f18, %f50
23307c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x28], %f26
23317c478bd9Sstevel@tonic-gate	fsrc1	%f20, %f52
23327c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x30], %f28
23337c478bd9Sstevel@tonic-gate	fsrc1	%f22, %f54
23347c478bd9Sstevel@tonic-gate	ldd	[SRC + 0x38], %f30
23357c478bd9Sstevel@tonic-gate	fsrc1	%f24, %f56
23367c478bd9Sstevel@tonic-gate	sub	CNT, VIS_BLOCKSIZE, CNT
23377c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
23387c478bd9Sstevel@tonic-gate	add	SRC, VIS_BLOCKSIZE, SRC
23397c478bd9Sstevel@tonic-gate	add	REALSRC, VIS_BLOCKSIZE, REALSRC
23407c478bd9Sstevel@tonic-gate	fsrc1	%f26, %f58
23417c478bd9Sstevel@tonic-gate	fsrc1	%f28, %f60
23427c478bd9Sstevel@tonic-gate	fsrc1	%f30, %f62
23437c478bd9Sstevel@tonic-gate	stda	%f48, [DST]ASI_BLK_AIUS
23447c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
23457c478bd9Sstevel@tonic-gate	ba,a,pt	%ncc, 4f
23467c478bd9Sstevel@tonic-gate	  nop
23477c478bd9Sstevel@tonic-gate
23487c478bd9Sstevel@tonic-gate3:	tst	CNT
23497c478bd9Sstevel@tonic-gate	bz,a	%ncc, 4f
23507c478bd9Sstevel@tonic-gate	  nop
23517c478bd9Sstevel@tonic-gate
23527c478bd9Sstevel@tonic-gate5:	ldub	[REALSRC], TMP
23537c478bd9Sstevel@tonic-gate	inc	REALSRC
23547c478bd9Sstevel@tonic-gate	inc	DST
23557c478bd9Sstevel@tonic-gate	deccc	CNT
23567c478bd9Sstevel@tonic-gate	bgu	%ncc, 5b
23577c478bd9Sstevel@tonic-gate	  stba	TMP, [DST - 1]%asi
23587c478bd9Sstevel@tonic-gate4:
23597c478bd9Sstevel@tonic-gate
23607c478bd9Sstevel@tonic-gate.copyout_exit:
23617c478bd9Sstevel@tonic-gate	membar	#Sync
23627c478bd9Sstevel@tonic-gate
23637c478bd9Sstevel@tonic-gate	FPRAS_INTERVAL(FPRAS_COPYOUT, 0, %l5, %o2, %o3, %o4, %o5, 8)
23647c478bd9Sstevel@tonic-gate	FPRAS_REWRITE_TYPE2Q2(0, %l5, %o2, %o3, 8, 9)
23657c478bd9Sstevel@tonic-gate	FPRAS_CHECK(FPRAS_COPYOUT, %l5, 9)	! lose outputs
23667c478bd9Sstevel@tonic-gate
23677c478bd9Sstevel@tonic-gate	ldx	[%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2
23687c478bd9Sstevel@tonic-gate	wr	%o2, 0, %gsr		! restore gsr
23697c478bd9Sstevel@tonic-gate
23707c478bd9Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
23717c478bd9Sstevel@tonic-gate	btst	FPRS_FEF, %o3
23727c478bd9Sstevel@tonic-gate	bz,pt	%icc, 4f
23737c478bd9Sstevel@tonic-gate	  nop
23747c478bd9Sstevel@tonic-gate
23757c478bd9Sstevel@tonic-gate	BLD_FPQ2Q4_FROMSTACK(%o2)
23767c478bd9Sstevel@tonic-gate
23777c478bd9Sstevel@tonic-gate	ba,pt	%ncc, 1f
23787c478bd9Sstevel@tonic-gate	  wr	%o3, 0, %fprs		! restore fprs
23797c478bd9Sstevel@tonic-gate
23807c478bd9Sstevel@tonic-gate4:
23817c478bd9Sstevel@tonic-gate	FZEROQ2Q4
23827c478bd9Sstevel@tonic-gate	wr	%o3, 0, %fprs		! restore fprs
23837c478bd9Sstevel@tonic-gate
23847c478bd9Sstevel@tonic-gate1:
23857c478bd9Sstevel@tonic-gate	membar	#Sync
23867c478bd9Sstevel@tonic-gate	andn	%l6, FPUSED_FLAG, %l6
23877c478bd9Sstevel@tonic-gate	stn	%l6, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
23887c478bd9Sstevel@tonic-gate	FP_ALLOWMIGRATE(5, 6)
23897c478bd9Sstevel@tonic-gate	ret
23907c478bd9Sstevel@tonic-gate	  restore	%g0, 0, %o0
23917c478bd9Sstevel@tonic-gate
23927c478bd9Sstevel@tonic-gate/*
23937c478bd9Sstevel@tonic-gate * We got here because of a fault during copyout.
23947c478bd9Sstevel@tonic-gate * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
23957c478bd9Sstevel@tonic-gate */
23967c478bd9Sstevel@tonic-gate.copyout_err:
23977c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o4	! check for copyop handler
23987c478bd9Sstevel@tonic-gate	tst	%o4
23997c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f			! if not, return error
24007c478bd9Sstevel@tonic-gate	  nop
24017c478bd9Sstevel@tonic-gate	ldn	[%o4 + CP_COPYOUT], %g2		! if handler, invoke it with
24027c478bd9Sstevel@tonic-gate	jmp	%g2				! original arguments
24037c478bd9Sstevel@tonic-gate	  restore %g0, 0, %g0			! dispose of copy window
24047c478bd9Sstevel@tonic-gate2:
24057c478bd9Sstevel@tonic-gate        ret
24067c478bd9Sstevel@tonic-gate	  restore %g0, -1, %o0			! return error value
24077c478bd9Sstevel@tonic-gate
24087c478bd9Sstevel@tonic-gate
24097c478bd9Sstevel@tonic-gate	SET_SIZE(copyout_more)
24107c478bd9Sstevel@tonic-gate
24117c478bd9Sstevel@tonic-gate
24127c478bd9Sstevel@tonic-gate	ENTRY(xcopyout)
24137c478bd9Sstevel@tonic-gate	cmp	%o2, VIS_COPY_THRESHOLD		! check for leaf rtn case
24147c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .xcopyout_small		! go to larger cases
24157c478bd9Sstevel@tonic-gate	  xor	%o0, %o1, %o3			! are src, dst alignable?
24167c478bd9Sstevel@tonic-gate	btst	7, %o3				!
24177c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .xcopyout_8		!
24187c478bd9Sstevel@tonic-gate	  nop
2419*5d9d9091SRichard Lowe	btst	1, %o3				!
24207c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .xcopyout_2		! check for half-word
24217c478bd9Sstevel@tonic-gate	  nop
24227c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_1), %o3	! Check copy limit
24237c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_1)], %o3
24247c478bd9Sstevel@tonic-gate	tst	%o3
24257c478bd9Sstevel@tonic-gate	bz,pn	%icc, .xcopyout_small		! if zero, disable HW copy
24267c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
24277c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .xcopyout_small		! go to small copy
24287c478bd9Sstevel@tonic-gate	  nop
24297c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .xcopyout_more		! otherwise go to large copy
24307c478bd9Sstevel@tonic-gate	  nop
24317c478bd9Sstevel@tonic-gate.xcopyout_2:
24327c478bd9Sstevel@tonic-gate	btst	3, %o3				!
24337c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .xcopyout_4		! check for word alignment
24347c478bd9Sstevel@tonic-gate	  nop
24357c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_2), %o3	! Check copy limit
24367c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_2)], %o3
24377c478bd9Sstevel@tonic-gate	tst	%o3
24387c478bd9Sstevel@tonic-gate	bz,pn	%icc, .xcopyout_small		! if zero, disable HW copy
24397c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
24407c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .xcopyout_small		! go to small copy
24417c478bd9Sstevel@tonic-gate	  nop
24427c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .xcopyout_more		! otherwise go to large copy
24437c478bd9Sstevel@tonic-gate	  nop
24447c478bd9Sstevel@tonic-gate.xcopyout_4:
24457c478bd9Sstevel@tonic-gate	! already checked longword, must be word aligned
24467c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_4), %o3	! Check copy limit
24477c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_4)], %o3
24487c478bd9Sstevel@tonic-gate	tst	%o3
24497c478bd9Sstevel@tonic-gate	bz,pn	%icc, .xcopyout_small		! if zero, disable HW copy
24507c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
24517c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .xcopyout_small		! go to small copy
24527c478bd9Sstevel@tonic-gate	  nop
24537c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .xcopyout_more		! otherwise go to large copy
24547c478bd9Sstevel@tonic-gate	  nop
24557c478bd9Sstevel@tonic-gate.xcopyout_8:
24567c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_8), %o3	! Check copy limit
24577c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_8)], %o3
24587c478bd9Sstevel@tonic-gate	tst	%o3
24597c478bd9Sstevel@tonic-gate	bz,pn	%icc, .xcopyout_small		! if zero, disable HW copy
24607c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
24617c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .xcopyout_small		! go to small copy
24627c478bd9Sstevel@tonic-gate	  nop
24637c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .xcopyout_more		! otherwise go to large copy
24647c478bd9Sstevel@tonic-gate	  nop
24657c478bd9Sstevel@tonic-gate
24667c478bd9Sstevel@tonic-gate.xcopyout_small:
24677c478bd9Sstevel@tonic-gate	sethi	%hi(.sm_xcopyout_err), %o5	! .sm_xcopyout_err is lofault
24687c478bd9Sstevel@tonic-gate	or	%o5, %lo(.sm_xcopyout_err), %o5
24697c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o4	! save existing handler
24707c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
24717c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .sm_do_copyout		! common code
24727c478bd9Sstevel@tonic-gate	  stn	%o5, [THREAD_REG + T_LOFAULT]	! set t_lofault
24737c478bd9Sstevel@tonic-gate
24747c478bd9Sstevel@tonic-gate.xcopyout_more:
24757c478bd9Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
24767c478bd9Sstevel@tonic-gate	sethi	%hi(.xcopyout_err), REAL_LOFAULT
24777c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .do_copyout		! common code
24787c478bd9Sstevel@tonic-gate	  or	REAL_LOFAULT, %lo(.xcopyout_err), REAL_LOFAULT
24797c478bd9Sstevel@tonic-gate
24807c478bd9Sstevel@tonic-gate/*
24817c478bd9Sstevel@tonic-gate * We got here because of fault during xcopyout
24827c478bd9Sstevel@tonic-gate * Errno value is in ERRNO
24837c478bd9Sstevel@tonic-gate */
24847c478bd9Sstevel@tonic-gate.xcopyout_err:
24857c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o4	! check for copyop handler
24867c478bd9Sstevel@tonic-gate	tst	%o4
24877c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f			! if not, return error
24887c478bd9Sstevel@tonic-gate	  nop
24897c478bd9Sstevel@tonic-gate	ldn	[%o4 + CP_XCOPYOUT], %g2	! if handler, invoke it with
24907c478bd9Sstevel@tonic-gate	jmp	%g2				! original arguments
24917c478bd9Sstevel@tonic-gate	  restore %g0, 0, %g0			! dispose of copy window
24927c478bd9Sstevel@tonic-gate2:
24937c478bd9Sstevel@tonic-gate        ret
24947c478bd9Sstevel@tonic-gate	  restore ERRNO, 0, %o0			! return errno value
24957c478bd9Sstevel@tonic-gate
24967c478bd9Sstevel@tonic-gate.sm_xcopyout_err:
24977c478bd9Sstevel@tonic-gate
24987c478bd9Sstevel@tonic-gate	membar	#Sync
24997c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
25007c478bd9Sstevel@tonic-gate	mov	SM_SAVE_SRC, %o0
25017c478bd9Sstevel@tonic-gate	mov	SM_SAVE_DST, %o1
25027c478bd9Sstevel@tonic-gate	mov	SM_SAVE_COUNT, %o2
25037c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o3	! check for copyop handler
25047c478bd9Sstevel@tonic-gate	tst	%o3
25057c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 3f			! if not, return error
25067c478bd9Sstevel@tonic-gate	  nop
25077c478bd9Sstevel@tonic-gate	ldn	[%o3 + CP_XCOPYOUT], %o5	! if handler, invoke it with
25087c478bd9Sstevel@tonic-gate	jmp	%o5				! original arguments
25097c478bd9Sstevel@tonic-gate	  nop
25107c478bd9Sstevel@tonic-gate3:
25117c478bd9Sstevel@tonic-gate	retl
25127c478bd9Sstevel@tonic-gate	  or	%g1, 0, %o0		! return errno value
25137c478bd9Sstevel@tonic-gate
25147c478bd9Sstevel@tonic-gate	SET_SIZE(xcopyout)
25157c478bd9Sstevel@tonic-gate
25167c478bd9Sstevel@tonic-gate	ENTRY(xcopyout_little)
25177c478bd9Sstevel@tonic-gate	sethi	%hi(.xcopyio_err), %o5
25187c478bd9Sstevel@tonic-gate	or	%o5, %lo(.xcopyio_err), %o5
25197c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o4
25207c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
25217c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]
25227c478bd9Sstevel@tonic-gate	mov	%o4, %o5
25237c478bd9Sstevel@tonic-gate
25247c478bd9Sstevel@tonic-gate	subcc	%g0, %o2, %o3
25257c478bd9Sstevel@tonic-gate	add	%o0, %o2, %o0
25267c478bd9Sstevel@tonic-gate	bz,pn	%ncc, 2f		! check for zero bytes
25277c478bd9Sstevel@tonic-gate	  sub	%o2, 1, %o4
25287c478bd9Sstevel@tonic-gate	add	%o0, %o4, %o0		! start w/last byte
25297c478bd9Sstevel@tonic-gate	add	%o1, %o2, %o1
25307c478bd9Sstevel@tonic-gate	ldub	[%o0 + %o3], %o4
25317c478bd9Sstevel@tonic-gate
25327c478bd9Sstevel@tonic-gate1:	stba	%o4, [%o1 + %o3]ASI_AIUSL
25337c478bd9Sstevel@tonic-gate	inccc	%o3
25347c478bd9Sstevel@tonic-gate	sub	%o0, 2, %o0		! get next byte
25357c478bd9Sstevel@tonic-gate	bcc,a,pt %ncc, 1b
25367c478bd9Sstevel@tonic-gate	  ldub	[%o0 + %o3], %o4
25377c478bd9Sstevel@tonic-gate
25387c478bd9Sstevel@tonic-gate2:
25397c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
25407c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
25417c478bd9Sstevel@tonic-gate	retl
25427c478bd9Sstevel@tonic-gate	  mov	%g0, %o0		! return (0)
25437c478bd9Sstevel@tonic-gate
25447c478bd9Sstevel@tonic-gate	SET_SIZE(xcopyout_little)
25457c478bd9Sstevel@tonic-gate
25467c478bd9Sstevel@tonic-gate/*
25477c478bd9Sstevel@tonic-gate * Copy user data to kernel space (copyin/xcopyin/xcopyin_little)
25487c478bd9Sstevel@tonic-gate */
25497c478bd9Sstevel@tonic-gate
25507c478bd9Sstevel@tonic-gate	ENTRY(copyin)
25517c478bd9Sstevel@tonic-gate	cmp	%o2, VIS_COPY_THRESHOLD		! check for leaf rtn case
25527c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyin_small		! go to larger cases
25537c478bd9Sstevel@tonic-gate	  xor	%o0, %o1, %o3			! are src, dst alignable?
25547c478bd9Sstevel@tonic-gate	btst	7, %o3				!
25557c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .copyin_8			! check for longword alignment
25567c478bd9Sstevel@tonic-gate	  nop
2557*5d9d9091SRichard Lowe	btst	1, %o3				!
25587c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .copyin_2			! check for half-word
25597c478bd9Sstevel@tonic-gate	  nop
25607c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_1), %o3	! Check copy limit
25617c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_1)], %o3
25627c478bd9Sstevel@tonic-gate	tst	%o3
25637c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyin_small		! if zero, disable HW copy
25647c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
25657c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyin_small		! go to small copy
25667c478bd9Sstevel@tonic-gate	  nop
25677c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyin_more		! otherwise go to large copy
25687c478bd9Sstevel@tonic-gate	  nop
25697c478bd9Sstevel@tonic-gate.copyin_2:
25707c478bd9Sstevel@tonic-gate	btst	3, %o3				!
25717c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .copyin_4			! check for word alignment
25727c478bd9Sstevel@tonic-gate	  nop
25737c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_2), %o3	! Check copy limit
25747c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_2)], %o3
25757c478bd9Sstevel@tonic-gate	tst	%o3
25767c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyin_small		! if zero, disable HW copy
25777c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
25787c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyin_small		! go to small copy
25797c478bd9Sstevel@tonic-gate	  nop
25807c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyin_more		! otherwise go to large copy
25817c478bd9Sstevel@tonic-gate	  nop
25827c478bd9Sstevel@tonic-gate.copyin_4:
25837c478bd9Sstevel@tonic-gate	! already checked longword, must be word aligned
25847c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_4), %o3	! Check copy limit
25857c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_4)], %o3
25867c478bd9Sstevel@tonic-gate	tst	%o3
25877c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyin_small		! if zero, disable HW copy
25887c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
25897c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyin_small		! go to small copy
25907c478bd9Sstevel@tonic-gate	  nop
25917c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyin_more		! otherwise go to large copy
25927c478bd9Sstevel@tonic-gate	  nop
25937c478bd9Sstevel@tonic-gate.copyin_8:
25947c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_8), %o3	! Check copy limit
25957c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_8)], %o3
25967c478bd9Sstevel@tonic-gate	tst	%o3
25977c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyin_small		! if zero, disable HW copy
25987c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
25997c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyin_small		! go to small copy
26007c478bd9Sstevel@tonic-gate	  nop
26017c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyin_more		! otherwise go to large copy
26027c478bd9Sstevel@tonic-gate	  nop
26037c478bd9Sstevel@tonic-gate
26047c478bd9Sstevel@tonic-gate	.align	16
26057c478bd9Sstevel@tonic-gate	nop				! instruction alignment
26067c478bd9Sstevel@tonic-gate					! see discussion at start of file
26077c478bd9Sstevel@tonic-gate.copyin_small:
2608*5d9d9091SRichard Lowe	sethi	%hi(.sm_copyin_err), %o5	! .sm_copyin_err is lofault
26097c478bd9Sstevel@tonic-gate	or	%o5, %lo(.sm_copyin_err), %o5
26107c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o4	! set/save t_lofault, no tramp
26117c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
26127c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]
26137c478bd9Sstevel@tonic-gate.sm_do_copyin:
26147c478bd9Sstevel@tonic-gate	mov	%o0, SM_SAVE_SRC
26157c478bd9Sstevel@tonic-gate	mov	%o1, SM_SAVE_DST
26167c478bd9Sstevel@tonic-gate	cmp	%o2, SHORTCOPY		! check for really short case
26177c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .ci_sm_left	!
26187c478bd9Sstevel@tonic-gate	  mov	%o2, SM_SAVE_COUNT
26197c478bd9Sstevel@tonic-gate	cmp	%o2, CHKSIZE		! check for medium length cases
26207c478bd9Sstevel@tonic-gate	bgu,pn	%ncc, .ci_med		!
26217c478bd9Sstevel@tonic-gate	  or	%o0, %o1, %o3		! prepare alignment check
26227c478bd9Sstevel@tonic-gate	andcc	%o3, 0x3, %g0		! test for alignment
26237c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_word	! branch to word aligned case
26247c478bd9Sstevel@tonic-gate.ci_sm_movebytes:
26257c478bd9Sstevel@tonic-gate	  sub	%o2, 3, %o2		! adjust count to allow cc zero test
26267c478bd9Sstevel@tonic-gate.ci_sm_notalign4:
26277c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3	! read byte
26287c478bd9Sstevel@tonic-gate	subcc	%o2, 4, %o2		! reduce count by 4
26297c478bd9Sstevel@tonic-gate	stb	%o3, [%o1]		! write byte
26307c478bd9Sstevel@tonic-gate	add	%o0, 1, %o0		! advance SRC by 1
26317c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3	! repeat for a total of 4 bytes
26327c478bd9Sstevel@tonic-gate	add	%o0, 1, %o0		! advance SRC by 1
26337c478bd9Sstevel@tonic-gate	stb	%o3, [%o1 + 1]
26347c478bd9Sstevel@tonic-gate	add	%o1, 4, %o1		! advance DST by 4
26357c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3
26367c478bd9Sstevel@tonic-gate	add	%o0, 1, %o0		! advance SRC by 1
26377c478bd9Sstevel@tonic-gate	stb	%o3, [%o1 - 2]
26387c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3
26397c478bd9Sstevel@tonic-gate	add	%o0, 1, %o0		! advance SRC by 1
26407c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .ci_sm_notalign4	! loop til 3 or fewer bytes remain
26417c478bd9Sstevel@tonic-gate	  stb	%o3, [%o1 - 1]
26427c478bd9Sstevel@tonic-gate	add	%o2, 3, %o2		! restore count
26437c478bd9Sstevel@tonic-gate.ci_sm_left:
26447c478bd9Sstevel@tonic-gate	tst	%o2
26457c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_exit
26467c478bd9Sstevel@tonic-gate	  nop
26477c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3		! load one byte
26487c478bd9Sstevel@tonic-gate	deccc	%o2			! reduce count for cc test
26497c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_exit
26507c478bd9Sstevel@tonic-gate	  stb	%o3,[%o1]		! store one byte
26517c478bd9Sstevel@tonic-gate	inc	%o0
26527c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3	! load second byte
26537c478bd9Sstevel@tonic-gate	deccc	%o2
26547c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_exit
26557c478bd9Sstevel@tonic-gate	  stb	%o3,[%o1 + 1]		! store second byte
26567c478bd9Sstevel@tonic-gate	inc	%o0
26577c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3	! load third byte
26587c478bd9Sstevel@tonic-gate	stb	%o3,[%o1 + 2]		! store third byte
26597c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
26607c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
26617c478bd9Sstevel@tonic-gate	retl
26627c478bd9Sstevel@tonic-gate	  mov	%g0, %o0		! return 0
26637c478bd9Sstevel@tonic-gate	.align	16
26647c478bd9Sstevel@tonic-gate.ci_sm_words:
26657c478bd9Sstevel@tonic-gate	lduwa	[%o0]ASI_USER, %o3		! read word
26667c478bd9Sstevel@tonic-gate.ci_sm_wordx:
26677c478bd9Sstevel@tonic-gate	subcc	%o2, 8, %o2		! update count
26687c478bd9Sstevel@tonic-gate	stw	%o3, [%o1]		! write word
26697c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! update SRC
26707c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1		! update DST
26717c478bd9Sstevel@tonic-gate	lduwa	[%o0]ASI_USER, %o3	! read word
26727c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! update SRC
26737c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .ci_sm_words	! loop til done
26747c478bd9Sstevel@tonic-gate	  stw	%o3, [%o1 - 4]		! write word
26757c478bd9Sstevel@tonic-gate	addcc	%o2, 7, %o2		! restore count
26767c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_exit
26777c478bd9Sstevel@tonic-gate	  nop
26787c478bd9Sstevel@tonic-gate	deccc	%o2
26797c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_byte
26807c478bd9Sstevel@tonic-gate.ci_sm_half:
26817c478bd9Sstevel@tonic-gate	  subcc	%o2, 2, %o2		! reduce count by 2
26827c478bd9Sstevel@tonic-gate	lduha	[%o0]ASI_USER, %o3	! read half word
26837c478bd9Sstevel@tonic-gate	add	%o0, 2, %o0		! advance SRC by 2
26847c478bd9Sstevel@tonic-gate	add	%o1, 2, %o1		! advance DST by 2
26857c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .ci_sm_half	! loop til done
26867c478bd9Sstevel@tonic-gate	  sth	%o3, [%o1 - 2]		! write half word
26877c478bd9Sstevel@tonic-gate	addcc	%o2, 1, %o2		! restore count
26887c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_exit
26897c478bd9Sstevel@tonic-gate	  nop
26907c478bd9Sstevel@tonic-gate.ci_sm_byte:
26917c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3
26927c478bd9Sstevel@tonic-gate	stb	%o3, [%o1]
26937c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
26947c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
26957c478bd9Sstevel@tonic-gate	retl
26967c478bd9Sstevel@tonic-gate	  mov	%g0, %o0		! return 0
26977c478bd9Sstevel@tonic-gate	.align	16
26987c478bd9Sstevel@tonic-gate.ci_sm_word:
26997c478bd9Sstevel@tonic-gate	subcc	%o2, 4, %o2		! update count
27007c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .ci_sm_wordx
27017c478bd9Sstevel@tonic-gate	  lduwa	[%o0]ASI_USER, %o3		! read word
27027c478bd9Sstevel@tonic-gate	addcc	%o2, 3, %o2		! restore count
27037c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_exit
27047c478bd9Sstevel@tonic-gate	  stw	%o3, [%o1]		! write word
27057c478bd9Sstevel@tonic-gate	deccc	%o2			! reduce count for cc test
27067c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0
27077c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3	! load one byte
27087c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_exit
27097c478bd9Sstevel@tonic-gate	  stb	%o3, [%o1 + 4]		! store one byte
27107c478bd9Sstevel@tonic-gate	inc	%o0
27117c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3	! load second byte
27127c478bd9Sstevel@tonic-gate	deccc	%o2
27137c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_exit
27147c478bd9Sstevel@tonic-gate	  stb	%o3, [%o1 + 5]		! store second byte
27157c478bd9Sstevel@tonic-gate	inc	%o0
27167c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3	! load third byte
27177c478bd9Sstevel@tonic-gate	stb	%o3, [%o1 + 6]		! store third byte
27187c478bd9Sstevel@tonic-gate.ci_sm_exit:
27197c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
27207c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
27217c478bd9Sstevel@tonic-gate	retl
27227c478bd9Sstevel@tonic-gate	  mov	%g0, %o0		! return 0
27237c478bd9Sstevel@tonic-gate
27247c478bd9Sstevel@tonic-gate	.align 16
27257c478bd9Sstevel@tonic-gate.ci_med:
27267c478bd9Sstevel@tonic-gate	xor	%o0, %o1, %o3		! setup alignment check
27277c478bd9Sstevel@tonic-gate	btst	1, %o3
27287c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .ci_sm_movebytes	! unaligned
27297c478bd9Sstevel@tonic-gate	  nop
27307c478bd9Sstevel@tonic-gate	btst	3, %o3
27317c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .ci_med_half	! halfword aligned
27327c478bd9Sstevel@tonic-gate	  nop
27337c478bd9Sstevel@tonic-gate	btst	7, %o3
27347c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .ci_med_word	! word aligned
27357c478bd9Sstevel@tonic-gate	  nop
27367c478bd9Sstevel@tonic-gate.ci_med_long:
27377c478bd9Sstevel@tonic-gate	btst	3, %o0			! check for
27387c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_med_long1	! word alignment
27397c478bd9Sstevel@tonic-gate	  nop
27407c478bd9Sstevel@tonic-gate.ci_med_long0:
27417c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3		! load one byte
27427c478bd9Sstevel@tonic-gate	inc	%o0
27437c478bd9Sstevel@tonic-gate	stb	%o3,[%o1]		! store byte
27447c478bd9Sstevel@tonic-gate	inc	%o1
27457c478bd9Sstevel@tonic-gate	btst	3, %o0
27467c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .ci_med_long0
27477c478bd9Sstevel@tonic-gate	  dec	%o2
27487c478bd9Sstevel@tonic-gate.ci_med_long1:			! word aligned
27497c478bd9Sstevel@tonic-gate	btst	7, %o0			! check for long word
27507c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_med_long2
27517c478bd9Sstevel@tonic-gate	  nop
27527c478bd9Sstevel@tonic-gate	lduwa	[%o0]ASI_USER, %o3	! load word
27537c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! advance SRC by 4
27547c478bd9Sstevel@tonic-gate	stw	%o3, [%o1]		! store word
27557c478bd9Sstevel@tonic-gate	add	%o1, 4, %o1		! advance DST by 4
27567c478bd9Sstevel@tonic-gate	sub	%o2, 4, %o2		! reduce count by 4
27577c478bd9Sstevel@tonic-gate!
27587c478bd9Sstevel@tonic-gate!  Now long word aligned and have at least 32 bytes to move
27597c478bd9Sstevel@tonic-gate!
27607c478bd9Sstevel@tonic-gate.ci_med_long2:
27617c478bd9Sstevel@tonic-gate	sub	%o2, 31, %o2		! adjust count to allow cc zero test
27627c478bd9Sstevel@tonic-gate.ci_med_lmove:
27637c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_USER, %o3	! read long word
27647c478bd9Sstevel@tonic-gate	subcc	%o2, 32, %o2		! reduce count by 32
27657c478bd9Sstevel@tonic-gate	stx	%o3, [%o1]		! write long word
27667c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0		! advance SRC by 8
27677c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_USER, %o3	! repeat for a total for 4 long words
27687c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0		! advance SRC by 8
27697c478bd9Sstevel@tonic-gate	stx	%o3, [%o1 + 8]
27707c478bd9Sstevel@tonic-gate	add	%o1, 32, %o1		! advance DST by 32
27717c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_USER, %o3
27727c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0		! advance SRC by 8
27737c478bd9Sstevel@tonic-gate	stx	%o3, [%o1 - 16]
27747c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_USER, %o3
27757c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0		! advance SRC by 8
27767c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .ci_med_lmove	! loop til 31 or fewer bytes left
27777c478bd9Sstevel@tonic-gate	  stx	%o3, [%o1 - 8]
27787c478bd9Sstevel@tonic-gate	addcc	%o2, 24, %o2		! restore count to long word offset
27797c478bd9Sstevel@tonic-gate	ble,pt	%ncc, .ci_med_lextra	! check for more long words to move
27807c478bd9Sstevel@tonic-gate	  nop
27817c478bd9Sstevel@tonic-gate.ci_med_lword:
27827c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_USER, %o3	! read long word
27837c478bd9Sstevel@tonic-gate	subcc	%o2, 8, %o2		! reduce count by 8
27847c478bd9Sstevel@tonic-gate	stx	%o3, [%o1]		! write long word
27857c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0		! advance SRC by 8
27867c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .ci_med_lword	! loop til 7 or fewer bytes left
27877c478bd9Sstevel@tonic-gate	  add	%o1, 8, %o1		! advance DST by 8
27887c478bd9Sstevel@tonic-gate.ci_med_lextra:
27897c478bd9Sstevel@tonic-gate	addcc	%o2, 7, %o2		! restore rest of count
27907c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_exit	! if zero, then done
27917c478bd9Sstevel@tonic-gate	  deccc	%o2
27927c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_byte
27937c478bd9Sstevel@tonic-gate	  nop
27947c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .ci_sm_half
27957c478bd9Sstevel@tonic-gate	  nop
27967c478bd9Sstevel@tonic-gate
27977c478bd9Sstevel@tonic-gate	.align 16
27987c478bd9Sstevel@tonic-gate	nop				! instruction alignment
27997c478bd9Sstevel@tonic-gate					! see discussion at start of file
28007c478bd9Sstevel@tonic-gate.ci_med_word:
28017c478bd9Sstevel@tonic-gate	btst	3, %o0			! check for
28027c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_med_word1	! word alignment
28037c478bd9Sstevel@tonic-gate	  nop
28047c478bd9Sstevel@tonic-gate.ci_med_word0:
28057c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3	! load one byte
28067c478bd9Sstevel@tonic-gate	inc	%o0
28077c478bd9Sstevel@tonic-gate	stb	%o3,[%o1]		! store byte
28087c478bd9Sstevel@tonic-gate	inc	%o1
28097c478bd9Sstevel@tonic-gate	btst	3, %o0
28107c478bd9Sstevel@tonic-gate	bnz,pt	%ncc, .ci_med_word0
28117c478bd9Sstevel@tonic-gate	  dec	%o2
28127c478bd9Sstevel@tonic-gate!
28137c478bd9Sstevel@tonic-gate!  Now word aligned and have at least 36 bytes to move
28147c478bd9Sstevel@tonic-gate!
28157c478bd9Sstevel@tonic-gate.ci_med_word1:
28167c478bd9Sstevel@tonic-gate	sub	%o2, 15, %o2		! adjust count to allow cc zero test
28177c478bd9Sstevel@tonic-gate.ci_med_wmove:
28187c478bd9Sstevel@tonic-gate	lduwa	[%o0]ASI_USER, %o3	! read word
28197c478bd9Sstevel@tonic-gate	subcc	%o2, 16, %o2		! reduce count by 16
28207c478bd9Sstevel@tonic-gate	stw	%o3, [%o1]		! write word
28217c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! advance SRC by 4
28227c478bd9Sstevel@tonic-gate	lduwa	[%o0]ASI_USER, %o3	! repeat for a total for 4 words
28237c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! advance SRC by 4
28247c478bd9Sstevel@tonic-gate	stw	%o3, [%o1 + 4]
28257c478bd9Sstevel@tonic-gate	add	%o1, 16, %o1		! advance DST by 16
28267c478bd9Sstevel@tonic-gate	lduwa	[%o0]ASI_USER, %o3
28277c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! advance SRC by 4
28287c478bd9Sstevel@tonic-gate	stw	%o3, [%o1 - 8]
28297c478bd9Sstevel@tonic-gate	lduwa	[%o0]ASI_USER, %o3
28307c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! advance SRC by 4
28317c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .ci_med_wmove	! loop til 15 or fewer bytes left
28327c478bd9Sstevel@tonic-gate	  stw	%o3, [%o1 - 4]
28337c478bd9Sstevel@tonic-gate	addcc	%o2, 12, %o2		! restore count to word offset
28347c478bd9Sstevel@tonic-gate	ble,pt	%ncc, .ci_med_wextra	! check for more words to move
28357c478bd9Sstevel@tonic-gate	  nop
28367c478bd9Sstevel@tonic-gate.ci_med_word2:
28377c478bd9Sstevel@tonic-gate	lduwa	[%o0]ASI_USER, %o3	! read word
28387c478bd9Sstevel@tonic-gate	subcc	%o2, 4, %o2		! reduce count by 4
28397c478bd9Sstevel@tonic-gate	stw	%o3, [%o1]		! write word
28407c478bd9Sstevel@tonic-gate	add	%o0, 4, %o0		! advance SRC by 4
28417c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .ci_med_word2	! loop til 3 or fewer bytes left
28427c478bd9Sstevel@tonic-gate	  add	%o1, 4, %o1		! advance DST by 4
28437c478bd9Sstevel@tonic-gate.ci_med_wextra:
28447c478bd9Sstevel@tonic-gate	addcc	%o2, 3, %o2		! restore rest of count
28457c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_exit	! if zero, then done
28467c478bd9Sstevel@tonic-gate	  deccc	%o2
28477c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_byte
28487c478bd9Sstevel@tonic-gate	  nop
28497c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .ci_sm_half
28507c478bd9Sstevel@tonic-gate	  nop
28517c478bd9Sstevel@tonic-gate
28527c478bd9Sstevel@tonic-gate	.align 16
28537c478bd9Sstevel@tonic-gate	nop				! instruction alignment
28547c478bd9Sstevel@tonic-gate					! see discussion at start of file
28557c478bd9Sstevel@tonic-gate.ci_med_half:
28567c478bd9Sstevel@tonic-gate	btst	1, %o0			! check for
28577c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_med_half1	! half word alignment
28587c478bd9Sstevel@tonic-gate	  nop
28597c478bd9Sstevel@tonic-gate	lduba	[%o0]ASI_USER, %o3	! load one byte
28607c478bd9Sstevel@tonic-gate	inc	%o0
28617c478bd9Sstevel@tonic-gate	stb	%o3,[%o1]		! store byte
28627c478bd9Sstevel@tonic-gate	inc	%o1
28637c478bd9Sstevel@tonic-gate	dec	%o2
28647c478bd9Sstevel@tonic-gate!
28657c478bd9Sstevel@tonic-gate!  Now half word aligned and have at least 38 bytes to move
28667c478bd9Sstevel@tonic-gate!
28677c478bd9Sstevel@tonic-gate.ci_med_half1:
28687c478bd9Sstevel@tonic-gate	sub	%o2, 7, %o2		! adjust count to allow cc zero test
28697c478bd9Sstevel@tonic-gate.ci_med_hmove:
28707c478bd9Sstevel@tonic-gate	lduha	[%o0]ASI_USER, %o3	! read half word
28717c478bd9Sstevel@tonic-gate	subcc	%o2, 8, %o2		! reduce count by 8
28727c478bd9Sstevel@tonic-gate	sth	%o3, [%o1]		! write half word
28737c478bd9Sstevel@tonic-gate	add	%o0, 2, %o0		! advance SRC by 2
28747c478bd9Sstevel@tonic-gate	lduha	[%o0]ASI_USER, %o3	! repeat for a total for 4 halfwords
28757c478bd9Sstevel@tonic-gate	add	%o0, 2, %o0		! advance SRC by 2
28767c478bd9Sstevel@tonic-gate	sth	%o3, [%o1 + 2]
28777c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1		! advance DST by 8
28787c478bd9Sstevel@tonic-gate	lduha	[%o0]ASI_USER, %o3
28797c478bd9Sstevel@tonic-gate	add	%o0, 2, %o0		! advance SRC by 2
28807c478bd9Sstevel@tonic-gate	sth	%o3, [%o1 - 4]
28817c478bd9Sstevel@tonic-gate	lduha	[%o0]ASI_USER, %o3
28827c478bd9Sstevel@tonic-gate	add	%o0, 2, %o0		! advance SRC by 2
28837c478bd9Sstevel@tonic-gate	bgt,pt	%ncc, .ci_med_hmove	! loop til 7 or fewer bytes left
28847c478bd9Sstevel@tonic-gate	  sth	%o3, [%o1 - 2]
28857c478bd9Sstevel@tonic-gate	addcc	%o2, 7, %o2		! restore count
28867c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_exit
28877c478bd9Sstevel@tonic-gate	  deccc	%o2
28887c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .ci_sm_byte
28897c478bd9Sstevel@tonic-gate	  nop
28907c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .ci_sm_half
28917c478bd9Sstevel@tonic-gate	  nop
28927c478bd9Sstevel@tonic-gate
28937c478bd9Sstevel@tonic-gate.sm_copyin_err:
28947c478bd9Sstevel@tonic-gate	membar	#Sync
28957c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
28967c478bd9Sstevel@tonic-gate	mov	SM_SAVE_SRC, %o0
28977c478bd9Sstevel@tonic-gate	mov	SM_SAVE_DST, %o1
28987c478bd9Sstevel@tonic-gate	mov	SM_SAVE_COUNT, %o2
28997c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o3	! check for copyop handler
29007c478bd9Sstevel@tonic-gate	tst	%o3
29017c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 3f			! if not, return error
29027c478bd9Sstevel@tonic-gate	  nop
29037c478bd9Sstevel@tonic-gate	ldn	[%o3 + CP_COPYIN], %o5		! if handler, invoke it with
29047c478bd9Sstevel@tonic-gate	jmp	%o5				! original arguments
29057c478bd9Sstevel@tonic-gate	  nop
29067c478bd9Sstevel@tonic-gate3:
29077c478bd9Sstevel@tonic-gate	retl
29087c478bd9Sstevel@tonic-gate	  or	%g0, -1, %o0		! return errno value
29097c478bd9Sstevel@tonic-gate
29107c478bd9Sstevel@tonic-gate	SET_SIZE(copyin)
29117c478bd9Sstevel@tonic-gate
29127c478bd9Sstevel@tonic-gate
29137c478bd9Sstevel@tonic-gate/*
29147c478bd9Sstevel@tonic-gate * The _more entry points are not intended to be used directly by
29157c478bd9Sstevel@tonic-gate * any caller from outside this file.  They are provided to allow
29167c478bd9Sstevel@tonic-gate * profiling and dtrace of the portions of the copy code that uses
29177c478bd9Sstevel@tonic-gate * the floating point registers.
29187c478bd9Sstevel@tonic-gate * This entry is particularly important as DTRACE (at least as of
29197c478bd9Sstevel@tonic-gate * 4/2004) does not support leaf functions.
29207c478bd9Sstevel@tonic-gate */
29217c478bd9Sstevel@tonic-gate
29227c478bd9Sstevel@tonic-gate	ENTRY(copyin_more)
29237c478bd9Sstevel@tonic-gate.copyin_more:
29247c478bd9Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
29257c478bd9Sstevel@tonic-gate	set	.copyin_err, REAL_LOFAULT
29267c478bd9Sstevel@tonic-gate
29277c478bd9Sstevel@tonic-gate/*
29287c478bd9Sstevel@tonic-gate * Copy ins that reach here are larger than VIS_COPY_THRESHOLD bytes
29297c478bd9Sstevel@tonic-gate */
29307c478bd9Sstevel@tonic-gate.do_copyin:
29317c478bd9Sstevel@tonic-gate	set	copyio_fault, %l7		! .copyio_fault is lofault val
29327c478bd9Sstevel@tonic-gate
29337c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %l6	! save existing handler
29347c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
29357c478bd9Sstevel@tonic-gate	stn	%l7, [THREAD_REG + T_LOFAULT]	! set t_lofault
29367c478bd9Sstevel@tonic-gate
29377c478bd9Sstevel@tonic-gate	mov	%i0, SAVE_SRC
29387c478bd9Sstevel@tonic-gate	mov	%i1, SAVE_DST
29397c478bd9Sstevel@tonic-gate	mov	%i2, SAVE_COUNT
29407c478bd9Sstevel@tonic-gate
29417c478bd9Sstevel@tonic-gate	FP_NOMIGRATE(6, 7)
29427c478bd9Sstevel@tonic-gate
29437c478bd9Sstevel@tonic-gate	rd	%fprs, %o2		! check for unused fp
29447c478bd9Sstevel@tonic-gate	st	%o2, [%fp + STACK_BIAS - SAVED_FPRS_OFFSET] ! save orig %fprs
29457c478bd9Sstevel@tonic-gate	btst	FPRS_FEF, %o2
29467c478bd9Sstevel@tonic-gate	bz,a,pt	%icc, .do_blockcopyin
29477c478bd9Sstevel@tonic-gate	  wr	%g0, FPRS_FEF, %fprs
29487c478bd9Sstevel@tonic-gate
29497c478bd9Sstevel@tonic-gate	BST_FPQ2Q4_TOSTACK(%o2)
29507c478bd9Sstevel@tonic-gate
29517c478bd9Sstevel@tonic-gate.do_blockcopyin:
29527c478bd9Sstevel@tonic-gate	rd	%gsr, %o2
29537c478bd9Sstevel@tonic-gate	stx	%o2, [%fp + STACK_BIAS - SAVED_GSR_OFFSET]	! save gsr
29547c478bd9Sstevel@tonic-gate	or	%l6, FPUSED_FLAG, %l6
29557c478bd9Sstevel@tonic-gate
29567c478bd9Sstevel@tonic-gate	andcc	DST, VIS_BLOCKSIZE - 1, TMP
29577c478bd9Sstevel@tonic-gate	mov	ASI_USER, %asi
29587c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f
29597c478bd9Sstevel@tonic-gate	  neg	TMP
29607c478bd9Sstevel@tonic-gate	add	TMP, VIS_BLOCKSIZE, TMP
29617c478bd9Sstevel@tonic-gate
29627c478bd9Sstevel@tonic-gate	! TMP = bytes required to align DST on FP_BLOCK boundary
29637c478bd9Sstevel@tonic-gate	! Using SRC as a tmp here
29647c478bd9Sstevel@tonic-gate	cmp	TMP, 3
29657c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, 1f
29667c478bd9Sstevel@tonic-gate	  sub	CNT,TMP,CNT		! adjust main count
29677c478bd9Sstevel@tonic-gate	sub	TMP, 3, TMP		! adjust for end of loop test
29687c478bd9Sstevel@tonic-gate.ci_blkalign:
29697c478bd9Sstevel@tonic-gate	lduba	[REALSRC]%asi, SRC	! move 4 bytes per loop iteration
29707c478bd9Sstevel@tonic-gate	stb	SRC, [DST]
29717c478bd9Sstevel@tonic-gate	subcc	TMP, 4, TMP
29727c478bd9Sstevel@tonic-gate	lduba	[REALSRC + 1]%asi, SRC
29737c478bd9Sstevel@tonic-gate	add	REALSRC, 4, REALSRC
29747c478bd9Sstevel@tonic-gate	stb	SRC, [DST + 1]
29757c478bd9Sstevel@tonic-gate	lduba	[REALSRC - 2]%asi, SRC
29767c478bd9Sstevel@tonic-gate	add	DST, 4, DST
29777c478bd9Sstevel@tonic-gate	stb	SRC, [DST - 2]
29787c478bd9Sstevel@tonic-gate	lduba	[REALSRC - 1]%asi, SRC
29797c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, .ci_blkalign
29807c478bd9Sstevel@tonic-gate	  stb	SRC, [DST - 1]
29817c478bd9Sstevel@tonic-gate
29827c478bd9Sstevel@tonic-gate	addcc	TMP, 3, TMP		! restore count adjustment
29837c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f		! no bytes left?
29847c478bd9Sstevel@tonic-gate	  nop
29857c478bd9Sstevel@tonic-gate1:	lduba	[REALSRC]%asi, SRC
29867c478bd9Sstevel@tonic-gate	inc	REALSRC
29877c478bd9Sstevel@tonic-gate	inc	DST
29887c478bd9Sstevel@tonic-gate	deccc	TMP
29897c478bd9Sstevel@tonic-gate	bgu	%ncc, 1b
29907c478bd9Sstevel@tonic-gate	  stb	SRC, [DST - 1]
29917c478bd9Sstevel@tonic-gate
29927c478bd9Sstevel@tonic-gate2:
29937c478bd9Sstevel@tonic-gate	andn	REALSRC, 0x7, SRC
29947c478bd9Sstevel@tonic-gate	alignaddr REALSRC, %g0, %g0
29957c478bd9Sstevel@tonic-gate
29967c478bd9Sstevel@tonic-gate	! SRC - 8-byte aligned
29977c478bd9Sstevel@tonic-gate	! DST - 64-byte aligned
29987c478bd9Sstevel@tonic-gate	prefetcha [SRC]%asi, #one_read
29997c478bd9Sstevel@tonic-gate	prefetcha [SRC + (1 * VIS_BLOCKSIZE)]%asi, #one_read
30007c478bd9Sstevel@tonic-gate	prefetcha [SRC + (2 * VIS_BLOCKSIZE)]%asi, #one_read
30017c478bd9Sstevel@tonic-gate	prefetcha [SRC + (3 * VIS_BLOCKSIZE)]%asi, #one_read
30027c478bd9Sstevel@tonic-gate	ldda	[SRC]%asi, %f16
30037c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 4
30047c478bd9Sstevel@tonic-gate	prefetcha [SRC + (4 * VIS_BLOCKSIZE)]%asi, #one_read
30057c478bd9Sstevel@tonic-gate#endif
30067c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x08]%asi, %f18
30077c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 5
30087c478bd9Sstevel@tonic-gate	prefetcha [SRC + (5 * VIS_BLOCKSIZE)]%asi, #one_read
30097c478bd9Sstevel@tonic-gate#endif
30107c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x10]%asi, %f20
30117c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 6
30127c478bd9Sstevel@tonic-gate	prefetcha [SRC + (6 * VIS_BLOCKSIZE)]%asi, #one_read
30137c478bd9Sstevel@tonic-gate#endif
30147c478bd9Sstevel@tonic-gate	faligndata %f16, %f18, %f48
30157c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x18]%asi, %f22
30167c478bd9Sstevel@tonic-gate#if CHEETAH_PREFETCH > 7
30177c478bd9Sstevel@tonic-gate	prefetcha [SRC + (7 * VIS_BLOCKSIZE)]%asi, #one_read
30187c478bd9Sstevel@tonic-gate#endif
30197c478bd9Sstevel@tonic-gate	faligndata %f18, %f20, %f50
30207c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x20]%asi, %f24
30217c478bd9Sstevel@tonic-gate	faligndata %f20, %f22, %f52
30227c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x28]%asi, %f26
30237c478bd9Sstevel@tonic-gate	faligndata %f22, %f24, %f54
30247c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x30]%asi, %f28
30257c478bd9Sstevel@tonic-gate	faligndata %f24, %f26, %f56
30267c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x38]%asi, %f30
30277c478bd9Sstevel@tonic-gate	faligndata %f26, %f28, %f58
30287c478bd9Sstevel@tonic-gate	ldda	[SRC + VIS_BLOCKSIZE]%asi, %f16
30297c478bd9Sstevel@tonic-gate	sub	CNT, VIS_BLOCKSIZE, CNT
30307c478bd9Sstevel@tonic-gate	add	SRC, VIS_BLOCKSIZE, SRC
30317c478bd9Sstevel@tonic-gate	add	REALSRC, VIS_BLOCKSIZE, REALSRC
30327c478bd9Sstevel@tonic-gate	ba,a,pt	%ncc, 1f
30337c478bd9Sstevel@tonic-gate	  nop
30347c478bd9Sstevel@tonic-gate	.align	16
30357c478bd9Sstevel@tonic-gate1:
30367c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x08]%asi, %f18
30377c478bd9Sstevel@tonic-gate	faligndata %f28, %f30, %f60
30387c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x10]%asi, %f20
30397c478bd9Sstevel@tonic-gate	faligndata %f30, %f16, %f62
30407c478bd9Sstevel@tonic-gate	stda	%f48, [DST]ASI_BLK_P
30417c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x18]%asi, %f22
30427c478bd9Sstevel@tonic-gate	faligndata %f16, %f18, %f48
30437c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x20]%asi, %f24
30447c478bd9Sstevel@tonic-gate	faligndata %f18, %f20, %f50
30457c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x28]%asi, %f26
30467c478bd9Sstevel@tonic-gate	faligndata %f20, %f22, %f52
30477c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x30]%asi, %f28
30487c478bd9Sstevel@tonic-gate	faligndata %f22, %f24, %f54
30497c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x38]%asi, %f30
30507c478bd9Sstevel@tonic-gate	faligndata %f24, %f26, %f56
30517c478bd9Sstevel@tonic-gate	sub	CNT, VIS_BLOCKSIZE, CNT
30527c478bd9Sstevel@tonic-gate	ldda	[SRC + VIS_BLOCKSIZE]%asi, %f16
30537c478bd9Sstevel@tonic-gate	faligndata %f26, %f28, %f58
30547c478bd9Sstevel@tonic-gate	prefetcha [SRC + ((CHEETAH_PREFETCH) * VIS_BLOCKSIZE) + 8]%asi, #one_read
30557c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
30567c478bd9Sstevel@tonic-gate	prefetcha [SRC + ((CHEETAH_2ND_PREFETCH) * VIS_BLOCKSIZE)]%asi, #one_read
30577c478bd9Sstevel@tonic-gate	add	REALSRC, VIS_BLOCKSIZE, REALSRC
30587c478bd9Sstevel@tonic-gate	cmp	CNT, VIS_BLOCKSIZE + 8
30597c478bd9Sstevel@tonic-gate	bgu,pt	%ncc, 1b
30607c478bd9Sstevel@tonic-gate	  add	SRC, VIS_BLOCKSIZE, SRC
30617c478bd9Sstevel@tonic-gate
30627c478bd9Sstevel@tonic-gate	! only if REALSRC & 0x7 is 0
30637c478bd9Sstevel@tonic-gate	cmp	CNT, VIS_BLOCKSIZE
30647c478bd9Sstevel@tonic-gate	bne	%ncc, 3f
30657c478bd9Sstevel@tonic-gate	  andcc	REALSRC, 0x7, %g0
30667c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f
30677c478bd9Sstevel@tonic-gate	  nop
3068*5d9d9091SRichard Lowe3:
30697c478bd9Sstevel@tonic-gate	faligndata %f28, %f30, %f60
30707c478bd9Sstevel@tonic-gate	faligndata %f30, %f16, %f62
30717c478bd9Sstevel@tonic-gate	stda	%f48, [DST]ASI_BLK_P
30727c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
30737c478bd9Sstevel@tonic-gate	ba,pt	%ncc, 3f
30747c478bd9Sstevel@tonic-gate	  nop
30757c478bd9Sstevel@tonic-gate2:
30767c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x08]%asi, %f18
30777c478bd9Sstevel@tonic-gate	fsrc1	%f28, %f60
30787c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x10]%asi, %f20
30797c478bd9Sstevel@tonic-gate	fsrc1	%f30, %f62
30807c478bd9Sstevel@tonic-gate	stda	%f48, [DST]ASI_BLK_P
30817c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x18]%asi, %f22
30827c478bd9Sstevel@tonic-gate	fsrc1	%f16, %f48
30837c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x20]%asi, %f24
30847c478bd9Sstevel@tonic-gate	fsrc1	%f18, %f50
30857c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x28]%asi, %f26
30867c478bd9Sstevel@tonic-gate	fsrc1	%f20, %f52
30877c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x30]%asi, %f28
30887c478bd9Sstevel@tonic-gate	fsrc1	%f22, %f54
30897c478bd9Sstevel@tonic-gate	ldda	[SRC + 0x38]%asi, %f30
30907c478bd9Sstevel@tonic-gate	fsrc1	%f24, %f56
30917c478bd9Sstevel@tonic-gate	sub	CNT, VIS_BLOCKSIZE, CNT
30927c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
30937c478bd9Sstevel@tonic-gate	add	SRC, VIS_BLOCKSIZE, SRC
30947c478bd9Sstevel@tonic-gate	add	REALSRC, VIS_BLOCKSIZE, REALSRC
30957c478bd9Sstevel@tonic-gate	fsrc1	%f26, %f58
30967c478bd9Sstevel@tonic-gate	fsrc1	%f28, %f60
30977c478bd9Sstevel@tonic-gate	fsrc1	%f30, %f62
30987c478bd9Sstevel@tonic-gate	stda	%f48, [DST]ASI_BLK_P
30997c478bd9Sstevel@tonic-gate	add	DST, VIS_BLOCKSIZE, DST
31007c478bd9Sstevel@tonic-gate	ba,a,pt	%ncc, 4f
31017c478bd9Sstevel@tonic-gate	  nop
31027c478bd9Sstevel@tonic-gate
31037c478bd9Sstevel@tonic-gate3:	tst	CNT
31047c478bd9Sstevel@tonic-gate	bz,a	%ncc, 4f
31057c478bd9Sstevel@tonic-gate	  nop
31067c478bd9Sstevel@tonic-gate
31077c478bd9Sstevel@tonic-gate5:	lduba	[REALSRC]ASI_USER, TMP
31087c478bd9Sstevel@tonic-gate	inc	REALSRC
31097c478bd9Sstevel@tonic-gate	inc	DST
31107c478bd9Sstevel@tonic-gate	deccc	CNT
31117c478bd9Sstevel@tonic-gate	bgu	%ncc, 5b
31127c478bd9Sstevel@tonic-gate	  stb	TMP, [DST - 1]
31137c478bd9Sstevel@tonic-gate4:
31147c478bd9Sstevel@tonic-gate
31157c478bd9Sstevel@tonic-gate.copyin_exit:
31167c478bd9Sstevel@tonic-gate	membar	#Sync
31177c478bd9Sstevel@tonic-gate
31187c478bd9Sstevel@tonic-gate	FPRAS_INTERVAL(FPRAS_COPYIN, 1, %l5, %o2, %o3, %o4, %o5, 8)
31197c478bd9Sstevel@tonic-gate	FPRAS_REWRITE_TYPE1(1, %l5, %f48, %o2, 9)
31207c478bd9Sstevel@tonic-gate	FPRAS_CHECK(FPRAS_COPYIN, %l5, 9)	! lose outputs
31217c478bd9Sstevel@tonic-gate
31227c478bd9Sstevel@tonic-gate	ldx	[%fp + STACK_BIAS - SAVED_GSR_OFFSET], %o2	! restore gsr
31237c478bd9Sstevel@tonic-gate	wr	%o2, 0, %gsr
31247c478bd9Sstevel@tonic-gate
31257c478bd9Sstevel@tonic-gate	ld	[%fp + STACK_BIAS - SAVED_FPRS_OFFSET], %o3
31267c478bd9Sstevel@tonic-gate	btst	FPRS_FEF, %o3
31277c478bd9Sstevel@tonic-gate	bz,pt	%icc, 4f
31287c478bd9Sstevel@tonic-gate	  nop
31297c478bd9Sstevel@tonic-gate
31307c478bd9Sstevel@tonic-gate	BLD_FPQ2Q4_FROMSTACK(%o2)
31317c478bd9Sstevel@tonic-gate
31327c478bd9Sstevel@tonic-gate	ba,pt	%ncc, 1f
31337c478bd9Sstevel@tonic-gate	  wr	%o3, 0, %fprs		! restore fprs
31347c478bd9Sstevel@tonic-gate
31357c478bd9Sstevel@tonic-gate4:
31367c478bd9Sstevel@tonic-gate	FZEROQ2Q4
31377c478bd9Sstevel@tonic-gate	wr	%o3, 0, %fprs		! restore fprs
31387c478bd9Sstevel@tonic-gate
31397c478bd9Sstevel@tonic-gate1:
31407c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
31417c478bd9Sstevel@tonic-gate	andn	%l6, FPUSED_FLAG, %l6
31427c478bd9Sstevel@tonic-gate	stn	%l6, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
31437c478bd9Sstevel@tonic-gate	FP_ALLOWMIGRATE(5, 6)
31447c478bd9Sstevel@tonic-gate	ret
31457c478bd9Sstevel@tonic-gate	  restore	%g0, 0, %o0
31467c478bd9Sstevel@tonic-gate/*
31477c478bd9Sstevel@tonic-gate * We got here because of a fault during copyin
31487c478bd9Sstevel@tonic-gate * Errno value is in ERRNO, but DDI/DKI says return -1 (sigh).
31497c478bd9Sstevel@tonic-gate */
31507c478bd9Sstevel@tonic-gate.copyin_err:
31517c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o4	! check for copyop handler
31527c478bd9Sstevel@tonic-gate	tst	%o4
31537c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f			! if not, return error
31547c478bd9Sstevel@tonic-gate	nop
31557c478bd9Sstevel@tonic-gate	ldn	[%o4 + CP_COPYIN], %g2		! if handler, invoke it with
31567c478bd9Sstevel@tonic-gate	jmp	%g2				! original arguments
31577c478bd9Sstevel@tonic-gate	restore %g0, 0, %g0			! dispose of copy window
31587c478bd9Sstevel@tonic-gate2:
31597c478bd9Sstevel@tonic-gate	ret
31607c478bd9Sstevel@tonic-gate	restore %g0, -1, %o0			! return error value
31617c478bd9Sstevel@tonic-gate
31627c478bd9Sstevel@tonic-gate
31637c478bd9Sstevel@tonic-gate	SET_SIZE(copyin_more)
31647c478bd9Sstevel@tonic-gate
31657c478bd9Sstevel@tonic-gate	ENTRY(xcopyin)
31667c478bd9Sstevel@tonic-gate
31677c478bd9Sstevel@tonic-gate	cmp	%o2, VIS_COPY_THRESHOLD		! check for leaf rtn case
31687c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .xcopyin_small		! go to larger cases
31697c478bd9Sstevel@tonic-gate	  xor	%o0, %o1, %o3			! are src, dst alignable?
31707c478bd9Sstevel@tonic-gate	btst	7, %o3				!
31717c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .xcopyin_8		! check for longword alignment
31727c478bd9Sstevel@tonic-gate	  nop
3173*5d9d9091SRichard Lowe	btst	1, %o3				!
31747c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .xcopyin_2		! check for half-word
31757c478bd9Sstevel@tonic-gate	  nop
31767c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_1), %o3	! Check copy limit
31777c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_1)], %o3
31787c478bd9Sstevel@tonic-gate	tst	%o3
31797c478bd9Sstevel@tonic-gate	bz,pn	%icc, .xcopyin_small		! if zero, disable HW copy
31807c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
31817c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .xcopyin_small		! go to small copy
31827c478bd9Sstevel@tonic-gate	  nop
31837c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .xcopyin_more		! otherwise go to large copy
31847c478bd9Sstevel@tonic-gate	  nop
31857c478bd9Sstevel@tonic-gate.xcopyin_2:
31867c478bd9Sstevel@tonic-gate	btst	3, %o3				!
31877c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .xcopyin_4		! check for word alignment
31887c478bd9Sstevel@tonic-gate	  nop
31897c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_2), %o3	! Check copy limit
31907c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_2)], %o3
31917c478bd9Sstevel@tonic-gate	tst	%o3
31927c478bd9Sstevel@tonic-gate	bz,pn	%icc, .xcopyin_small		! if zero, disable HW copy
31937c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
31947c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .xcopyin_small		! go to small copy
31957c478bd9Sstevel@tonic-gate	  nop
31967c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .xcopyin_more		! otherwise go to large copy
31977c478bd9Sstevel@tonic-gate	  nop
31987c478bd9Sstevel@tonic-gate.xcopyin_4:
31997c478bd9Sstevel@tonic-gate	! already checked longword, must be word aligned
32007c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_4), %o3	! Check copy limit
32017c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_4)], %o3
32027c478bd9Sstevel@tonic-gate	tst	%o3
32037c478bd9Sstevel@tonic-gate	bz,pn	%icc, .xcopyin_small		! if zero, disable HW copy
32047c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
32057c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .xcopyin_small		! go to small copy
32067c478bd9Sstevel@tonic-gate	  nop
32077c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .xcopyin_more		! otherwise go to large copy
32087c478bd9Sstevel@tonic-gate	  nop
32097c478bd9Sstevel@tonic-gate.xcopyin_8:
32107c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_8), %o3	! Check copy limit
32117c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_8)], %o3
32127c478bd9Sstevel@tonic-gate	tst	%o3
32137c478bd9Sstevel@tonic-gate	bz,pn	%icc, .xcopyin_small		! if zero, disable HW copy
32147c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
32157c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .xcopyin_small		! go to small copy
32167c478bd9Sstevel@tonic-gate	  nop
32177c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .xcopyin_more		! otherwise go to large copy
32187c478bd9Sstevel@tonic-gate	  nop
32197c478bd9Sstevel@tonic-gate
32207c478bd9Sstevel@tonic-gate.xcopyin_small:
32217c478bd9Sstevel@tonic-gate	sethi	%hi(.sm_xcopyin_err), %o5  ! .sm_xcopyin_err is lofault value
32227c478bd9Sstevel@tonic-gate	or	%o5, %lo(.sm_xcopyin_err), %o5
32237c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o4	! set/save t_lofaul
32247c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
32257c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .sm_do_copyin		! common code
32267c478bd9Sstevel@tonic-gate	  stn	%o5, [THREAD_REG + T_LOFAULT]
3227*5d9d9091SRichard Lowe
32287c478bd9Sstevel@tonic-gate.xcopyin_more:
32297c478bd9Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
32307c478bd9Sstevel@tonic-gate	sethi	%hi(.xcopyin_err), REAL_LOFAULT	! .xcopyin_err is lofault value
32317c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .do_copyin
32327c478bd9Sstevel@tonic-gate	  or	REAL_LOFAULT, %lo(.xcopyin_err), REAL_LOFAULT
32337c478bd9Sstevel@tonic-gate
32347c478bd9Sstevel@tonic-gate/*
32357c478bd9Sstevel@tonic-gate * We got here because of fault during xcopyin
32367c478bd9Sstevel@tonic-gate * Errno value is in ERRNO
32377c478bd9Sstevel@tonic-gate */
32387c478bd9Sstevel@tonic-gate.xcopyin_err:
32397c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o4	! check for copyop handler
32407c478bd9Sstevel@tonic-gate	tst	%o4
32417c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 2f			! if not, return error
32427c478bd9Sstevel@tonic-gate	  nop
32437c478bd9Sstevel@tonic-gate	ldn	[%o4 + CP_XCOPYIN], %g2		! if handler, invoke it with
32447c478bd9Sstevel@tonic-gate	jmp	%g2				! original arguments
32457c478bd9Sstevel@tonic-gate	  restore %g0, 0, %g0			! dispose of copy window
32467c478bd9Sstevel@tonic-gate2:
32477c478bd9Sstevel@tonic-gate        ret
32487c478bd9Sstevel@tonic-gate	  restore ERRNO, 0, %o0			! return errno value
32497c478bd9Sstevel@tonic-gate
32507c478bd9Sstevel@tonic-gate.sm_xcopyin_err:
32517c478bd9Sstevel@tonic-gate
32527c478bd9Sstevel@tonic-gate	membar	#Sync
32537c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]   ! restore old t_lofault
32547c478bd9Sstevel@tonic-gate	mov	SM_SAVE_SRC, %o0
32557c478bd9Sstevel@tonic-gate	mov	SM_SAVE_DST, %o1
32567c478bd9Sstevel@tonic-gate	mov	SM_SAVE_COUNT, %o2
32577c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_COPYOPS], %o3	! check for copyop handler
32587c478bd9Sstevel@tonic-gate	tst	%o3
32597c478bd9Sstevel@tonic-gate	bz,pt	%ncc, 3f			! if not, return error
32607c478bd9Sstevel@tonic-gate	  nop
32617c478bd9Sstevel@tonic-gate	ldn	[%o3 + CP_XCOPYIN], %o5		! if handler, invoke it with
32627c478bd9Sstevel@tonic-gate	jmp	%o5				! original arguments
32637c478bd9Sstevel@tonic-gate	  nop
32647c478bd9Sstevel@tonic-gate3:
32657c478bd9Sstevel@tonic-gate	retl
32667c478bd9Sstevel@tonic-gate	  or	%g1, 0, %o0		! return errno value
32677c478bd9Sstevel@tonic-gate
32687c478bd9Sstevel@tonic-gate	SET_SIZE(xcopyin)
32697c478bd9Sstevel@tonic-gate
32707c478bd9Sstevel@tonic-gate	ENTRY(xcopyin_little)
32717c478bd9Sstevel@tonic-gate	sethi	%hi(.xcopyio_err), %o5
32727c478bd9Sstevel@tonic-gate	or	%o5, %lo(.xcopyio_err), %o5
32737c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o4
32747c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
3275*5d9d9091SRichard Lowe	stn	%o5, [THREAD_REG + T_LOFAULT]
32767c478bd9Sstevel@tonic-gate	mov	%o4, %o5
32777c478bd9Sstevel@tonic-gate
32787c478bd9Sstevel@tonic-gate	subcc	%g0, %o2, %o3
32797c478bd9Sstevel@tonic-gate	add	%o0, %o2, %o0
32807c478bd9Sstevel@tonic-gate	bz,pn	%ncc, 2f		! check for zero bytes
32817c478bd9Sstevel@tonic-gate	  sub	%o2, 1, %o4
3282*5d9d9091SRichard Lowe	add	%o0, %o4, %o0		! start w/last byte
32837c478bd9Sstevel@tonic-gate	add	%o1, %o2, %o1
32847c478bd9Sstevel@tonic-gate	lduba	[%o0 + %o3]ASI_AIUSL, %o4
32857c478bd9Sstevel@tonic-gate
32867c478bd9Sstevel@tonic-gate1:	stb	%o4, [%o1 + %o3]
32877c478bd9Sstevel@tonic-gate	inccc	%o3
32887c478bd9Sstevel@tonic-gate	sub	%o0, 2, %o0		! get next byte
32897c478bd9Sstevel@tonic-gate	bcc,a,pt %ncc, 1b
32907c478bd9Sstevel@tonic-gate	  lduba	[%o0 + %o3]ASI_AIUSL, %o4
32917c478bd9Sstevel@tonic-gate
32927c478bd9Sstevel@tonic-gate2:
32937c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
32947c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
32957c478bd9Sstevel@tonic-gate	retl
32967c478bd9Sstevel@tonic-gate	  mov	%g0, %o0		! return (0)
32977c478bd9Sstevel@tonic-gate
32987c478bd9Sstevel@tonic-gate.xcopyio_err:
32997c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
33007c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
33017c478bd9Sstevel@tonic-gate	retl
33027c478bd9Sstevel@tonic-gate	  mov	%g1, %o0
33037c478bd9Sstevel@tonic-gate
33047c478bd9Sstevel@tonic-gate	SET_SIZE(xcopyin_little)
33057c478bd9Sstevel@tonic-gate
33067c478bd9Sstevel@tonic-gate
33077c478bd9Sstevel@tonic-gate/*
33087c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to).
33097c478bd9Sstevel@tonic-gate * No fault handler installed (to be called under on_fault())
33107c478bd9Sstevel@tonic-gate */
33117c478bd9Sstevel@tonic-gate	ENTRY(copyin_noerr)
33127c478bd9Sstevel@tonic-gate
33137c478bd9Sstevel@tonic-gate	cmp	%o2, VIS_COPY_THRESHOLD		! check for leaf rtn case
33147c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyin_ne_small		! go to larger cases
33157c478bd9Sstevel@tonic-gate	  xor	%o0, %o1, %o3			! are src, dst alignable?
33167c478bd9Sstevel@tonic-gate	btst	7, %o3				!
33177c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .copyin_ne_8		! check for longword alignment
33187c478bd9Sstevel@tonic-gate	  nop
3319*5d9d9091SRichard Lowe	btst	1, %o3				!
33207c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .copyin_ne_2		! check for half-word
33217c478bd9Sstevel@tonic-gate	  nop
33227c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_1), %o3	! Check copy limit
33237c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_1)], %o3
33247c478bd9Sstevel@tonic-gate	tst	%o3
33257c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyin_ne_small		! if zero, disable HW copy
33267c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
33277c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyin_ne_small		! go to small copy
33287c478bd9Sstevel@tonic-gate	  nop
33297c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyin_noerr_more	! otherwise go to large copy
33307c478bd9Sstevel@tonic-gate	  nop
33317c478bd9Sstevel@tonic-gate.copyin_ne_2:
33327c478bd9Sstevel@tonic-gate	btst	3, %o3				!
33337c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .copyin_ne_4		! check for word alignment
33347c478bd9Sstevel@tonic-gate	  nop
33357c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_2), %o3	! Check copy limit
33367c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_2)], %o3
33377c478bd9Sstevel@tonic-gate	tst	%o3
33387c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyin_ne_small		! if zero, disable HW copy
33397c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
33407c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyin_ne_small		! go to small copy
33417c478bd9Sstevel@tonic-gate	  nop
33427c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyin_noerr_more	! otherwise go to large copy
33437c478bd9Sstevel@tonic-gate	  nop
33447c478bd9Sstevel@tonic-gate.copyin_ne_4:
33457c478bd9Sstevel@tonic-gate	! already checked longword, must be word aligned
33467c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_4), %o3	! Check copy limit
33477c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_4)], %o3
33487c478bd9Sstevel@tonic-gate	tst	%o3
33497c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyin_ne_small		! if zero, disable HW copy
33507c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
33517c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyin_ne_small		! go to small copy
33527c478bd9Sstevel@tonic-gate	  nop
33537c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyin_noerr_more	! otherwise go to large copy
33547c478bd9Sstevel@tonic-gate	  nop
33557c478bd9Sstevel@tonic-gate.copyin_ne_8:
33567c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_8), %o3	! Check copy limit
33577c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_8)], %o3
33587c478bd9Sstevel@tonic-gate	tst	%o3
33597c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyin_ne_small		! if zero, disable HW copy
33607c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
33617c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyin_ne_small		! go to small copy
33627c478bd9Sstevel@tonic-gate	  nop
33637c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyin_noerr_more	! otherwise go to large copy
33647c478bd9Sstevel@tonic-gate	  nop
33657c478bd9Sstevel@tonic-gate
33667c478bd9Sstevel@tonic-gate.copyin_ne_small:
33677c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o4
33687c478bd9Sstevel@tonic-gate	tst	%o4
33697c478bd9Sstevel@tonic-gate	bz,pn	%ncc, .sm_do_copyin
33707c478bd9Sstevel@tonic-gate	  nop
33717c478bd9Sstevel@tonic-gate	sethi	%hi(.sm_copyio_noerr), %o5
33727c478bd9Sstevel@tonic-gate	or	%o5, %lo(.sm_copyio_noerr), %o5
33737c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
33747c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .sm_do_copyin
33757c478bd9Sstevel@tonic-gate	  stn	%o5, [THREAD_REG + T_LOFAULT]	! set/save t_lofault
33767c478bd9Sstevel@tonic-gate
33777c478bd9Sstevel@tonic-gate.copyin_noerr_more:
33787c478bd9Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
33797c478bd9Sstevel@tonic-gate	sethi	%hi(.copyio_noerr), REAL_LOFAULT
33807c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .do_copyin
33817c478bd9Sstevel@tonic-gate	  or	REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
33827c478bd9Sstevel@tonic-gate
33837c478bd9Sstevel@tonic-gate.copyio_noerr:
33847c478bd9Sstevel@tonic-gate	jmp	%l6
33857c478bd9Sstevel@tonic-gate	  restore %g0,0,%g0
33867c478bd9Sstevel@tonic-gate
33877c478bd9Sstevel@tonic-gate.sm_copyio_noerr:
33887c478bd9Sstevel@tonic-gate	membar	#Sync
33897c478bd9Sstevel@tonic-gate	stn	%o4, [THREAD_REG + T_LOFAULT]	! restore t_lofault
33907c478bd9Sstevel@tonic-gate	jmp	%o4
33917c478bd9Sstevel@tonic-gate	  nop
33927c478bd9Sstevel@tonic-gate
33937c478bd9Sstevel@tonic-gate	SET_SIZE(copyin_noerr)
33947c478bd9Sstevel@tonic-gate
33957c478bd9Sstevel@tonic-gate/*
33967c478bd9Sstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to).
33977c478bd9Sstevel@tonic-gate * No fault handler installed (to be called under on_fault())
33987c478bd9Sstevel@tonic-gate */
33997c478bd9Sstevel@tonic-gate
34007c478bd9Sstevel@tonic-gate	ENTRY(copyout_noerr)
34017c478bd9Sstevel@tonic-gate
34027c478bd9Sstevel@tonic-gate	cmp	%o2, VIS_COPY_THRESHOLD		! check for leaf rtn case
34037c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyout_ne_small		! go to larger cases
34047c478bd9Sstevel@tonic-gate	  xor	%o0, %o1, %o3			! are src, dst alignable?
34057c478bd9Sstevel@tonic-gate	btst	7, %o3				!
34067c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .copyout_ne_8		! check for longword alignment
34077c478bd9Sstevel@tonic-gate	  nop
3408*5d9d9091SRichard Lowe	btst	1, %o3				!
34097c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .copyout_ne_2		! check for half-word
34107c478bd9Sstevel@tonic-gate	  nop
34117c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_1), %o3	! Check copy limit
34127c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_1)], %o3
34137c478bd9Sstevel@tonic-gate	tst	%o3
34147c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyout_ne_small		! if zero, disable HW copy
34157c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
34167c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyout_ne_small		! go to small copy
34177c478bd9Sstevel@tonic-gate	  nop
34187c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyout_noerr_more	! otherwise go to large copy
34197c478bd9Sstevel@tonic-gate	  nop
34207c478bd9Sstevel@tonic-gate.copyout_ne_2:
34217c478bd9Sstevel@tonic-gate	btst	3, %o3				!
34227c478bd9Sstevel@tonic-gate	bz,pt	%ncc, .copyout_ne_4		! check for word alignment
34237c478bd9Sstevel@tonic-gate	  nop
34247c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_2), %o3	! Check copy limit
34257c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_2)], %o3
34267c478bd9Sstevel@tonic-gate	tst	%o3
34277c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyout_ne_small		! if zero, disable HW copy
34287c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
34297c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyout_ne_small		! go to small copy
34307c478bd9Sstevel@tonic-gate	  nop
34317c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyout_noerr_more	! otherwise go to large copy
34327c478bd9Sstevel@tonic-gate	  nop
34337c478bd9Sstevel@tonic-gate.copyout_ne_4:
34347c478bd9Sstevel@tonic-gate	! already checked longword, must be word aligned
34357c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_4), %o3	! Check copy limit
34367c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_4)], %o3
34377c478bd9Sstevel@tonic-gate	tst	%o3
34387c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyout_ne_small		! if zero, disable HW copy
34397c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
34407c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyout_ne_small		! go to small copy
34417c478bd9Sstevel@tonic-gate	  nop
34427c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyout_noerr_more	! otherwise go to large copy
34437c478bd9Sstevel@tonic-gate	  nop
34447c478bd9Sstevel@tonic-gate.copyout_ne_8:
34457c478bd9Sstevel@tonic-gate	sethi	%hi(hw_copy_limit_8), %o3	! Check copy limit
34467c478bd9Sstevel@tonic-gate	ld	[%o3 + %lo(hw_copy_limit_8)], %o3
34477c478bd9Sstevel@tonic-gate	tst	%o3
34487c478bd9Sstevel@tonic-gate	bz,pn	%icc, .copyout_ne_small		! if zero, disable HW copy
34497c478bd9Sstevel@tonic-gate	  cmp	%o2, %o3			! if length <= limit
34507c478bd9Sstevel@tonic-gate	bleu,pt	%ncc, .copyout_ne_small		! go to small copy
34517c478bd9Sstevel@tonic-gate	  nop
34527c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .copyout_noerr_more	! otherwise go to large copy
34537c478bd9Sstevel@tonic-gate	  nop
34547c478bd9Sstevel@tonic-gate
34557c478bd9Sstevel@tonic-gate.copyout_ne_small:
34567c478bd9Sstevel@tonic-gate	ldn	[THREAD_REG + T_LOFAULT], %o4
34577c478bd9Sstevel@tonic-gate	tst	%o4
34587c478bd9Sstevel@tonic-gate	bz,pn	%ncc, .sm_do_copyout
34597c478bd9Sstevel@tonic-gate	  nop
34607c478bd9Sstevel@tonic-gate	sethi	%hi(.sm_copyio_noerr), %o5
34617c478bd9Sstevel@tonic-gate	or	%o5, %lo(.sm_copyio_noerr), %o5
34627c478bd9Sstevel@tonic-gate	membar	#Sync				! sync error barrier
34637c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .sm_do_copyout
34647c478bd9Sstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! set/save t_lofault
34657c478bd9Sstevel@tonic-gate
34667c478bd9Sstevel@tonic-gate.copyout_noerr_more:
34677c478bd9Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
34687c478bd9Sstevel@tonic-gate	sethi	%hi(.copyio_noerr), REAL_LOFAULT
34697c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .do_copyout
34707c478bd9Sstevel@tonic-gate	  or	REAL_LOFAULT, %lo(.copyio_noerr), REAL_LOFAULT
34717c478bd9Sstevel@tonic-gate
34727c478bd9Sstevel@tonic-gate	SET_SIZE(copyout_noerr)
34737c478bd9Sstevel@tonic-gate
34747c478bd9Sstevel@tonic-gate
34757c478bd9Sstevel@tonic-gate/*
34767c478bd9Sstevel@tonic-gate * hwblkclr - clears block-aligned, block-multiple-sized regions that are
34777c478bd9Sstevel@tonic-gate * longer than 256 bytes in length using spitfire's block stores.  If
34787c478bd9Sstevel@tonic-gate * the criteria for using this routine are not met then it calls bzero
34797c478bd9Sstevel@tonic-gate * and returns 1.  Otherwise 0 is returned indicating success.
34807c478bd9Sstevel@tonic-gate * Caller is responsible for ensuring use_hw_bzero is true and that
34817c478bd9Sstevel@tonic-gate * kpreempt_disable() has been called.
34827c478bd9Sstevel@tonic-gate */
34837c478bd9Sstevel@tonic-gate	! %i0 - start address
34847c478bd9Sstevel@tonic-gate	! %i1 - length of region (multiple of 64)
34857c478bd9Sstevel@tonic-gate	! %l0 - saved fprs
34867c478bd9Sstevel@tonic-gate	! %l1 - pointer to saved %d0 block
34877c478bd9Sstevel@tonic-gate	! %l2 - saved curthread->t_lwp
34887c478bd9Sstevel@tonic-gate
34897c478bd9Sstevel@tonic-gate	ENTRY(hwblkclr)
34907c478bd9Sstevel@tonic-gate	! get another window w/space for one aligned block of saved fpregs
34917c478bd9Sstevel@tonic-gate	save	%sp, -SA(MINFRAME + 2*VIS_BLOCKSIZE), %sp
34927c478bd9Sstevel@tonic-gate
34937c478bd9Sstevel@tonic-gate	! Must be block-aligned
34947c478bd9Sstevel@tonic-gate	andcc	%i0, (VIS_BLOCKSIZE-1), %g0
34957c478bd9Sstevel@tonic-gate	bnz,pn	%ncc, 1f
34967c478bd9Sstevel@tonic-gate	  nop
34977c478bd9Sstevel@tonic-gate
34987c478bd9Sstevel@tonic-gate	! ... and must be 256 bytes or more
34997c478bd9Sstevel@tonic-gate	cmp	%i1, 256
35007c478bd9Sstevel@tonic-gate	blu,pn	%ncc, 1f
35017c478bd9Sstevel@tonic-gate	  nop
35027c478bd9Sstevel@tonic-gate
35037c478bd9Sstevel@tonic-gate	! ... and length must be a multiple of VIS_BLOCKSIZE
35047c478bd9Sstevel@tonic-gate	andcc	%i1, (VIS_BLOCKSIZE-1), %g0
35057c478bd9Sstevel@tonic-gate	bz,pn	%ncc, 2f
35067c478bd9Sstevel@tonic-gate	  nop
35077c478bd9Sstevel@tonic-gate
35087c478bd9Sstevel@tonic-gate1:	! punt, call bzero but notify the caller that bzero was used
35097c478bd9Sstevel@tonic-gate	mov	%i0, %o0
35107c478bd9Sstevel@tonic-gate	call	bzero
35117c478bd9Sstevel@tonic-gate	mov	%i1, %o1
35127c478bd9Sstevel@tonic-gate	ret
35137c478bd9Sstevel@tonic-gate	  restore	%g0, 1, %o0 ! return (1) - did not use block operations
35147c478bd9Sstevel@tonic-gate
35157c478bd9Sstevel@tonic-gate2:	rd	%fprs, %l0		! check for unused fp
35167c478bd9Sstevel@tonic-gate	btst	FPRS_FEF, %l0
35177c478bd9Sstevel@tonic-gate	bz,pt	%icc, 1f
35187c478bd9Sstevel@tonic-gate	  nop
35197c478bd9Sstevel@tonic-gate
35207c478bd9Sstevel@tonic-gate	! save in-use fpregs on stack
35217c478bd9Sstevel@tonic-gate	membar	#Sync
35227c478bd9Sstevel@tonic-gate	add	%fp, STACK_BIAS - 65, %l1
35237c478bd9Sstevel@tonic-gate	and	%l1, -VIS_BLOCKSIZE, %l1
35247c478bd9Sstevel@tonic-gate	stda	%d0, [%l1]ASI_BLK_P
35257c478bd9Sstevel@tonic-gate
35267c478bd9Sstevel@tonic-gate1:	membar	#StoreStore|#StoreLoad|#LoadStore
35277c478bd9Sstevel@tonic-gate	wr	%g0, FPRS_FEF, %fprs
35287c478bd9Sstevel@tonic-gate	wr	%g0, ASI_BLK_P, %asi
35297c478bd9Sstevel@tonic-gate
35307c478bd9Sstevel@tonic-gate	! Clear block
35317c478bd9Sstevel@tonic-gate	fzero	%d0
35327c478bd9Sstevel@tonic-gate	fzero	%d2
35337c478bd9Sstevel@tonic-gate	fzero	%d4
35347c478bd9Sstevel@tonic-gate	fzero	%d6
35357c478bd9Sstevel@tonic-gate	fzero	%d8
35367c478bd9Sstevel@tonic-gate	fzero	%d10
35377c478bd9Sstevel@tonic-gate	fzero	%d12
35387c478bd9Sstevel@tonic-gate	fzero	%d14
35397c478bd9Sstevel@tonic-gate
35407c478bd9Sstevel@tonic-gate	mov	256, %i3
35417c478bd9Sstevel@tonic-gate	ba,pt	%ncc, .pz_doblock
35427c478bd9Sstevel@tonic-gate	  nop
35437c478bd9Sstevel@tonic-gate
3544*5d9d9091SRichard Lowe.pz_blkstart:
35457c478bd9Sstevel@tonic-gate      ! stda	%d0, [%i0 + 192]%asi  ! in dly slot of branch that got us here
35467c478bd9Sstevel@tonic-gate	stda	%d0, [%i0 + 128]%asi
35477c478bd9Sstevel@tonic-gate	stda	%d0, [%i0 + 64]%asi
35487c478bd9Sstevel@tonic-gate	stda	%d0, [%i0]%asi
35497c478bd9Sstevel@tonic-gate.pz_zinst:
35507c478bd9Sstevel@tonic-gate	add	%i0, %i3, %i0
35517c478bd9Sstevel@tonic-gate	sub	%i1, %i3, %i1
35527c478bd9Sstevel@tonic-gate.pz_doblock:
35537c478bd9Sstevel@tonic-gate	cmp	%i1, 256
35547c478bd9Sstevel@tonic-gate	bgeu,a	%ncc, .pz_blkstart
35557c478bd9Sstevel@tonic-gate	  stda	%d0, [%i0 + 192]%asi
35567c478bd9Sstevel@tonic-gate
35577c478bd9Sstevel@tonic-gate	cmp	%i1, 64
35587c478bd9Sstevel@tonic-gate	blu	%ncc, .pz_finish
3559*5d9d9091SRichard Lowe
35607c478bd9Sstevel@tonic-gate	  andn	%i1, (64-1), %i3
35617c478bd9Sstevel@tonic-gate	srl	%i3, 4, %i2		! using blocks, 1 instr / 16 words
35627c478bd9Sstevel@tonic-gate	set	.pz_zinst, %i4
35637c478bd9Sstevel@tonic-gate	sub	%i4, %i2, %i4
35647c478bd9Sstevel@tonic-gate	jmp	%i4
35657c478bd9Sstevel@tonic-gate	  nop
35667c478bd9Sstevel@tonic-gate
35677c478bd9Sstevel@tonic-gate.pz_finish:
35687c478bd9Sstevel@tonic-gate	membar	#Sync
35697c478bd9Sstevel@tonic-gate	btst	FPRS_FEF, %l0
35707c478bd9Sstevel@tonic-gate	bz,a	.pz_finished
35717c478bd9Sstevel@tonic-gate	  wr	%l0, 0, %fprs		! restore fprs
35727c478bd9Sstevel@tonic-gate
35737c478bd9Sstevel@tonic-gate	! restore fpregs from stack
35747c478bd9Sstevel@tonic-gate	ldda	[%l1]ASI_BLK_P, %d0
35757c478bd9Sstevel@tonic-gate	membar	#Sync
35767c478bd9Sstevel@tonic-gate	wr	%l0, 0, %fprs		! restore fprs
35777c478bd9Sstevel@tonic-gate
35787c478bd9Sstevel@tonic-gate.pz_finished:
35797c478bd9Sstevel@tonic-gate	ret
35807c478bd9Sstevel@tonic-gate	  restore	%g0, 0, %o0		! return (bzero or not)
35817c478bd9Sstevel@tonic-gate
35827c478bd9Sstevel@tonic-gate	SET_SIZE(hwblkclr)
35839b0bb795SJohn Levon
35847c478bd9Sstevel@tonic-gate	/*
35857c478bd9Sstevel@tonic-gate	 * Copy 32 bytes of data from src (%o0) to dst (%o1)
35867c478bd9Sstevel@tonic-gate	 * using physical addresses.
35877c478bd9Sstevel@tonic-gate	 */
35887c478bd9Sstevel@tonic-gate	ENTRY_NP(hw_pa_bcopy32)
35897c478bd9Sstevel@tonic-gate	rdpr	%pstate, %g1
35907c478bd9Sstevel@tonic-gate	andn	%g1, PSTATE_IE, %g2
35917c478bd9Sstevel@tonic-gate	wrpr	%g0, %g2, %pstate
35927c478bd9Sstevel@tonic-gate
35937c478bd9Sstevel@tonic-gate	rdpr	%pstate, %g0
35947c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_MEM, %o2
35957c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0
35967c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_MEM, %o3
35977c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0
35987c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_MEM, %o4
35997c478bd9Sstevel@tonic-gate	add	%o0, 8, %o0
36007c478bd9Sstevel@tonic-gate	ldxa	[%o0]ASI_MEM, %o5
36017c478bd9Sstevel@tonic-gate
36027c478bd9Sstevel@tonic-gate    	stxa	%g0, [%o1]ASI_DC_INVAL
36037c478bd9Sstevel@tonic-gate	membar	#Sync
36047c478bd9Sstevel@tonic-gate
36057c478bd9Sstevel@tonic-gate	stxa	%o2, [%o1]ASI_MEM
36067c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1
36077c478bd9Sstevel@tonic-gate	stxa	%o3, [%o1]ASI_MEM
36087c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1
36097c478bd9Sstevel@tonic-gate	stxa	%o4, [%o1]ASI_MEM
36107c478bd9Sstevel@tonic-gate	add	%o1, 8, %o1
36117c478bd9Sstevel@tonic-gate	stxa	%o5, [%o1]ASI_MEM
36127c478bd9Sstevel@tonic-gate
36137c478bd9Sstevel@tonic-gate	retl
36147c478bd9Sstevel@tonic-gate	  wrpr	  %g0, %g1, %pstate
36157c478bd9Sstevel@tonic-gate
36167c478bd9Sstevel@tonic-gate	SET_SIZE(hw_pa_bcopy32)
36177c478bd9Sstevel@tonic-gate
36187c478bd9Sstevel@tonic-gate	DGDEF(use_hw_bcopy)
36197c478bd9Sstevel@tonic-gate	.word	1
36207c478bd9Sstevel@tonic-gate	DGDEF(use_hw_bzero)
36217c478bd9Sstevel@tonic-gate	.word	1
36227c478bd9Sstevel@tonic-gate	DGDEF(hw_copy_limit_1)
36237c478bd9Sstevel@tonic-gate	.word	0
36247c478bd9Sstevel@tonic-gate	DGDEF(hw_copy_limit_2)
36257c478bd9Sstevel@tonic-gate	.word	0
36267c478bd9Sstevel@tonic-gate	DGDEF(hw_copy_limit_4)
36277c478bd9Sstevel@tonic-gate	.word	0
36287c478bd9Sstevel@tonic-gate	DGDEF(hw_copy_limit_8)
36297c478bd9Sstevel@tonic-gate	.word	0
36307c478bd9Sstevel@tonic-gate
36317c478bd9Sstevel@tonic-gate	.align	64
36327c478bd9Sstevel@tonic-gate	.section ".text"
3633