27c478bdstevel@tonic-gate * CDDL HEADER START
37c478bdstevel@tonic-gate *
47c478bdstevel@tonic-gate * The contents of this file are subject to the terms of the
5340af27wh * Common Development and Distribution License (the "License").
6340af27wh * You may not use this file except in compliance with the License.
77c478bdstevel@tonic-gate *
87c478bdstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bdstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bdstevel@tonic-gate * See the License for the specific language governing permissions
117c478bdstevel@tonic-gate * and limitations under the License.
127c478bdstevel@tonic-gate *
137c478bdstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bdstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bdstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bdstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bdstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bdstevel@tonic-gate *
197c478bdstevel@tonic-gate * CDDL HEADER END
207c478bdstevel@tonic-gate */
22280575bPatrick McGehearty * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
237c478bdstevel@tonic-gate */
267c478bdstevel@tonic-gate#include <sys/param.h>
277c478bdstevel@tonic-gate#include <sys/errno.h>
287c478bdstevel@tonic-gate#include <sys/asm_linkage.h>
297c478bdstevel@tonic-gate#include <sys/vtrace.h>
307c478bdstevel@tonic-gate#include <sys/machthread.h>
317c478bdstevel@tonic-gate#include <sys/clock.h>
327c478bdstevel@tonic-gate#include <sys/asi.h>
337c478bdstevel@tonic-gate#include <sys/fsr.h>
347c478bdstevel@tonic-gate#include <sys/privregs.h>
357c478bdstevel@tonic-gate#include <sys/machasi.h>
367c478bdstevel@tonic-gate#include <sys/niagaraasi.h>
387c478bdstevel@tonic-gate#if !defined(lint)
397c478bdstevel@tonic-gate#include "assym.h"
407c478bdstevel@tonic-gate#endif	/* lint */
447c478bdstevel@tonic-gate * Pseudo-code to aid in understanding the control flow of the
457c478bdstevel@tonic-gate * bcopy/kcopy routine.
467c478bdstevel@tonic-gate *
47473b13dae *	! WARNING : <Register usage convention>
48473b13dae *	! In kcopy() the %o5, holds previous error handler and a flag
49473b13dae *	! LOFAULT_SET (low bits). The %o5 is null in bcopy().
50473b13dae *	! The %o5 is not available for any other use.
51473b13dae *
52280575bPatrick McGehearty * On entry:
53280575bPatrick McGehearty *	! Determine whether to use the FP register version or the
54280575bPatrick McGehearty *	! the leaf routine version depending on the size of the copy.
55280575bPatrick McGehearty *	! Set up error handling accordingly.
56280575bPatrick McGehearty *	! The transition point depends on FP_COPY
57280575bPatrick McGehearty *	! For both versions %o5 is reserved
58280575bPatrick McGehearty *
59473b13dae * kcopy():
60280575bPatrick McGehearty *	if(length > FP_COPY)
61280575bPatrick McGehearty *		go to regular_kcopy
62280575bPatrick McGehearty *
63280575bPatrick McGehearty *	! Setup_leaf_rtn_error_handler
64280575bPatrick McGehearty *	%o5 = curthread->t_lofault;		! save existing handler in %o5
65280575bPatrick McGehearty *	%o5 |= LOFAULT_SET;			! ORed with LOFAULT_SET flag
66280575bPatrick McGehearty *	curthread->t_lofault = .sm_copyerr;
67280575bPatrick McGehearty *	goto small_bcopy();
68280575bPatrick McGehearty *
69280575bPatrick McGehearty * regular_kcopy:
70280575bPatrick McGehearty *	save_registers()
71473b13dae *	%o5 = curthread->t_lofault;		! save existing handler in %o5
72473b13dae *	%o5 |= LOFAULT_SET;			! ORed with LOFAULT_SET flag
737c478bdstevel@tonic-gate *	curthread->t_lofault = .copyerr;
74280575bPatrick McGehearty *	goto do_copy();
757c478bdstevel@tonic-gate *
76473b13dae * bcopy():
77280575bPatrick McGehearty *	if(length > FP_COPY)
78280575bPatrick McGehearty *		go to regular_bcopy
79280575bPatrick McGehearty *
80280575bPatrick McGehearty *	! Setup_leaf_rtn_error_handler
81280575bPatrick McGehearty *	%o5 = curthread->t_lofault;		! save existing handler in %o5
82280575bPatrick McGehearty *	curthread->t_lofault = .sm_copyerr;
83280575bPatrick McGehearty *	goto small_bcopy();
84280575bPatrick McGehearty *
85280575bPatrick McGehearty * regular_bcopy:
86280575bPatrick McGehearty *	%o5 = curthread->t_lofault;		! save existing handler in %o5
87280575bPatrick McGehearty *	curthread->t_lofault = .copyerr;
88280575bPatrick McGehearty *	goto do_copy();
89473b13dae *
90280575bPatrick McGehearty * small_bcopy:
91280575bPatrick McGehearty *	! handle copies smaller than FP_COPY
92280575bPatrick McGehearty *	restore t_lofault handler
93280575bPatrick McGehearty *	exit
947c478bdstevel@tonic-gate *
95280575bPatrick McGehearty * do_copy:
96280575bPatrick McGehearty *	! handle copies larger than FP_COPY
97280575bPatrick McGehearty *	save fp_regs
98473b13dae * 	blockcopy;
99280575bPatrick McGehearty *	restore fp_regs
100473b13dae *	restore t_lofault handler if came from kcopy();
1017c478bdstevel@tonic-gate *
102280575bPatrick McGehearty *
103280575bPatrick McGehearty * In leaf lofault handler:
104280575bPatrick McGehearty *	curthread->t_lofault = (%o5 & ~LOFAULT_SET);	! restore old t_lofault
105280575bPatrick McGehearty *	return (errno)
1067c478bdstevel@tonic-gate *
1077c478bdstevel@tonic-gate * In lofault handler:
108473b13dae *	curthread->t_lofault = (%o5 & ~LOFAULT_SET);	! restore old t_lofault
109280575bPatrick McGehearty *	restore fp_regs
1107c478bdstevel@tonic-gate *	return (errno)
1117c478bdstevel@tonic-gate *
112280575bPatrick McGehearty *
113280575bPatrick McGehearty *
114280575bPatrick McGehearty * For all of bcopy/copyin/copyout the copy logic is specialized according
115280575bPatrick McGehearty * to how the src and dst is aligned and how much data needs to be moved.
116280575bPatrick McGehearty * The following comments apply to the N2/RF code (#if !defined(NIAGARA_IMPL))
117280575bPatrick McGehearty *
118280575bPatrick McGehearty * N2/RF Flow :
119280575bPatrick McGehearty *
120280575bPatrick McGehearty * if (count < FP_COPY) {  (584 bytes)
121280575bPatrick McGehearty *   set small fault handler (no register window save/restore)
122280575bPatrick McGehearty *   if count < SHORTCOPY  (7 bytes)
123280575bPatrick McGehearty *	copy bytes; go to short_exit
124280575bPatrick McGehearty *   else
125280575bPatrick McGehearty *   determine dst alignment, move minimum bytes/halfwords to
126280575bPatrick McGehearty *   get dst aligned on long word boundary
127280575bPatrick McGehearty *     if( src is on long word boundary ) {
128280575bPatrick McGehearty * medlong:					   src/dst aligned on 8 bytes
129280575bPatrick McGehearty *	 copy with ldx/stx in 4-way unrolled loop;
130280575bPatrick McGehearty *       copy final 0-31 bytes; go to short_exit
131280575bPatrick McGehearty *     } else {					src/dst not aligned on 8 bytes
132280575bPatrick McGehearty *     if src is word aligned, ld/st words in 32-byte chunks
133280575bPatrick McGehearty *     if src is half word aligned, ld half, ld word, ld half; pack
134280575bPatrick McGehearty *		into long word, store long words in 32-byte chunks
135280575bPatrick McGehearty *     if src is byte aligned, ld byte,half,word parts;  pack into long
136280575bPatrick McGehearty *	   word, store long words in 32-byte chunks
137280575bPatrick McGehearty *     move final 0-31 bytes according to src alignment;  go to short_exit
138280575bPatrick McGehearty * short_exit:
139280575bPatrick McGehearty *     restore trap handler if needed, retl
140280575bPatrick McGehearty * else {					   More than FP_COPY bytes
141280575bPatrick McGehearty *     set fault handler
142280575bPatrick McGehearty *     disable kernel preemption
143280575bPatrick McGehearty *     save registers, save FP registers if in use
144280575bPatrick McGehearty *     move bytes to align destination register on long word boundary
145280575bPatrick McGehearty *     if(src is on long word boundary) {	   src/dst aligned on 8 bytes
146280575bPatrick McGehearty *       align dst on 64 byte boundary;  use 8-way test for each of 8 possible
147280575bPatrick McGehearty *       src alignments relative to a 64 byte boundary to select the
148280575bPatrick McGehearty *       16-way unrolled loop (128 bytes) to use for
149280575bPatrick McGehearty *       block load, fmovd, block-init-store, block-store, fmovd operations
150280575bPatrick McGehearty *       then go to remain_stuff.
151280575bPatrick McGehearty * remain_stuff: move remaining bytes. go to long_exit
152280575bPatrick McGehearty *     } else {
153280575bPatrick McGehearty *       setup alignaddr for faligndata instructions
154280575bPatrick McGehearty *       align dst on 64 byte boundary; use 8-way test for each of 8 possible
155280575bPatrick McGehearty *       src alignments to nearest long word relative to 64 byte boundary to
156280575bPatrick McGehearty *       select the 8-way unrolled loop (64 bytes) to use for
157280575bPatrick McGehearty *       block load, falign, fmovd, block-store loop
158280575bPatrick McGehearty *	 (only use block-init-store when src/dst on 8 byte boundaries.)
159280575bPatrick McGehearty *       goto unalign_done.
160280575bPatrick McGehearty * unalign_done:
161280575bPatrick McGehearty *       move remaining bytes for unaligned cases. go to long_exit
162280575bPatrick McGehearty * long_exit:
163280575bPatrick McGehearty *       restore %gsr, FP regs (either from stack or set to zero),
164280575bPatrick McGehearty *       restore trap handler, check for kernel preemption request,
165280575bPatrick McGehearty *       handle if needed, ret.
166280575bPatrick McGehearty * }
167280575bPatrick McGehearty *
168280575bPatrick McGehearty * Other platforms include hw_bcopy_limit_[1248] to control the exact
169280575bPatrick McGehearty * point where the FP register code is used. On those platforms, the
170280575bPatrick McGehearty * FP register code did not leave data in L2 cache, potentially affecting
171280575bPatrick McGehearty * performance more than the gain/loss from the algorithm difference.
172280575bPatrick McGehearty * For N2/RF, block store places data in the L2 cache, so use or non-use
173280575bPatrick McGehearty * of the FP registers has no effect on L2 cache behavior.
174280575bPatrick McGehearty * The cost for testing hw_bcopy_limit_* according to different
175280575bPatrick McGehearty * alignments exceeds 50 cycles for all cases, even when hw_bcopy_limits
176280575bPatrick McGehearty * were not used. That cost was judged too high relative to the benefits,
177280575bPatrick McGehearty * so the hw_bcopy_limit option is omitted from this code.
1787c478bdstevel@tonic-gate */
1817c478bdstevel@tonic-gate * Less then or equal this number of bytes we will always copy byte-for-byte
1827c478bdstevel@tonic-gate */
1837c478bdstevel@tonic-gate#define	SMALL_LIMIT	7
186473b13dae * LOFAULT_SET : Flag set by kzero and kcopy to indicate that t_lofault
187473b13dae * handler was set
1887c478bdstevel@tonic-gate */
1897c478bdstevel@tonic-gate#define	LOFAULT_SET 2
1927c478bdstevel@tonic-gate * This define is to align data for the unaligned source cases.
1937c478bdstevel@tonic-gate * The data1, data2 and data3 is merged into data1 and data2.
1947c478bdstevel@tonic-gate * The data3 is preserved for next merge.
1957c478bdstevel@tonic-gate */
1967c478bdstevel@tonic-gate#define	ALIGN_DATA(data1, data2, data3, lshift, rshift, tmp)	\
1977c478bdstevel@tonic-gate	sllx	data1, lshift, data1				;\
1987c478bdstevel@tonic-gate	srlx	data2, rshift, tmp				;\
1997c478bdstevel@tonic-gate	or	data1, tmp, data1				;\
2007c478bdstevel@tonic-gate	sllx	data2, lshift, data2				;\
2017c478bdstevel@tonic-gate	srlx	data3, rshift, tmp				;\
2027c478bdstevel@tonic-gate	or	data2, tmp, data2
2047c478bdstevel@tonic-gate * This macro is to align the data. Basically it merges
2057c478bdstevel@tonic-gate * data1 and data2 to form double word.
2067c478bdstevel@tonic-gate */
2077c478bdstevel@tonic-gate#define	ALIGN_DATA_EW(data1, data2, lshift, rshift, tmp)	\
2087c478bdstevel@tonic-gate	sllx	data1, lshift, data1				;\
2097c478bdstevel@tonic-gate	srlx	data2, rshift, tmp				;\
2107c478bdstevel@tonic-gate	or	data1, tmp, data1
212340af27wh#if !defined(NIAGARA_IMPL)
214340af27wh * Flags set in the lower bits of the t_lofault address:
215340af27wh * FPUSED_FLAG: The FP registers were in use and must be restored
216280575bPatrick McGehearty * LOFAULT_SET: Set for bcopy calls, cleared for kcopy calls
217340af27wh * COPY_FLAGS: Both of the above
218340af27wh *
219340af27wh * Other flags:
220340af27wh * KPREEMPT_FLAG: kpreempt needs to be called
221340af27wh */
222340af27wh#define	FPUSED_FLAG	1
223280575bPatrick McGehearty#define	LOFAULT_SET	2
224280575bPatrick McGehearty#define	COPY_FLAGS	(FPUSED_FLAG | LOFAULT_SET)
225340af27wh#define	KPREEMPT_FLAG	4
227340af27wh#define	ALIGN_OFF_1_7			\
228340af27wh	faligndata %d0, %d2, %d48	;\
229340af27wh	faligndata %d2, %d4, %d50	;\
230340af27wh	faligndata %d4, %d6, %d52	;\
231340af27wh	faligndata %d6, %d8, %d54	;\
232340af27wh	faligndata %d8, %d10, %d56	;\
233340af27wh	faligndata %d10, %d12, %d58	;\
234340af27wh	faligndata %d12, %d14, %d60	;\
235340af27wh	faligndata %d14, %d16, %d62
237340af27wh#define	ALIGN_OFF_8_15			\
238340af27wh	faligndata %d2, %d4, %d48	;\
239340af27wh	faligndata %d4, %d6, %d50	;\
240340af27wh	faligndata %d6, %d8, %d52	;\
241340af27wh	faligndata %d8, %d10, %d54	;\
242340af27wh	faligndata %d10, %d12, %d56	;\
243340af27wh	faligndata %d12, %d14, %d58	;\
244340af27wh	faligndata %d14, %d16, %d60	;\
245340af27wh	faligndata %d16, %d18, %d62
247340af27wh#define	ALIGN_OFF_16_23			\
248340af27wh	faligndata %d4, %d6, %d48	;\
249340af27wh	faligndata %d6, %d8, %d50	;\
250340af27wh	faligndata %d8, %d10, %d52	;\
251340af27wh	faligndata %d10, %d12, %d54	;\
252340af27wh	faligndata %d12, %d14, %d56	;\
253340af27wh	faligndata %d14, %d16, %d58	;\
254340af27wh	faligndata %d16, %d18, %d60	;\
255340af27wh	faligndata %d18, %d20, %d62
257340af27wh#define	ALIGN_OFF_24_31			\
258340af27wh	faligndata %d6, %d8, %d48	;\
259340af27wh	faligndata %d8, %d10, %d50	;\
260340af27wh	faligndata %d10, %d12, %d52	;\
261340af27wh	faligndata %d12, %d14, %d54	;\
262340af27wh	faligndata %d14, %d16, %d56	;\
263340af27wh	faligndata %d16, %d18, %d58	;\
264340af27wh	faligndata %d18, %d20, %d60	;\
265340af27wh	faligndata %d20, %d22, %d62
267340af27wh#define	ALIGN_OFF_32_39			\
268340af27wh	faligndata %d8, %d10, %d48	;\
269340af27wh	faligndata %d10, %d12, %d50	;\
270340af27wh	faligndata %d12, %d14, %d52	;\
271340af27wh	faligndata %d14, %d16, %d54	;\
272340af27wh	faligndata %d16, %d18, %d56	;\
273340af27wh	faligndata %d18, %d20, %d58	;\
274340af27wh	faligndata %d20, %d22, %d60	;\
275340af27wh	faligndata %d22, %d24, %d62
277340af27wh#define	ALIGN_OFF_40_47			\
278340af27wh	faligndata %d10, %d12, %d48	;\
279340af27wh	faligndata %d12, %d14, %d50	;\
280340af27wh	faligndata %d14, %d16, %d52	;\
281340af27wh	faligndata %d16, %d18, %d54	;\
282340af27wh	faligndata %d18, %d20, %d56	;\
283340af27wh	faligndata %d20, %d22, %d58	;\
284340af27wh	faligndata %d22, %d24, %d60	;\
285340af27wh	faligndata %d24, %d26, %d62
287340af27wh#define	ALIGN_OFF_48_55			\
288340af27wh	faligndata %d12, %d14, %d48	;\
289340af27wh	faligndata %d14, %d16, %d50	;\
290340af27wh	faligndata %d16, %d18, %d52	;\
291340af27wh	faligndata %d18, %d20, %d54	;\
292340af27wh	faligndata %d20, %d22, %d56	;\
293340af27wh	faligndata %d22, %d24, %d58	;\
294340af27wh	faligndata %d24, %d26, %d60	;\
295340af27wh	faligndata %d26, %d28, %d62
297340af27wh#define	ALIGN_OFF_56_63			\
298340af27wh	faligndata %d14, %d16, %d48	;\
299340af27wh	faligndata %d16, %d18, %d50	;\
300340af27wh	faligndata %d18, %d20, %d52	;\
301340af27wh	faligndata %d20, %d22, %d54	;\
302340af27wh	faligndata %d22, %d24, %d56	;\
303340af27wh	faligndata %d24, %d26, %d58	;\
304340af27wh	faligndata %d26, %d28, %d60	;\
305340af27wh	faligndata %d28, %d30, %d62
307280575bPatrick McGehearty/*
308280575bPatrick McGehearty * FP_COPY indicates the minimum number of bytes needed
309280575bPatrick McGehearty * to justify using FP/VIS-accelerated memory operations.
310280575bPatrick McGehearty * The FPBLK code assumes a minimum number of bytes are available
311280575bPatrick McGehearty * to be moved on entry.  Check that code carefully before
312280575bPatrick McGehearty * reducing FP_COPY below 256.
313280575bPatrick McGehearty */
314280575bPatrick McGehearty#define FP_COPY			584
315280575bPatrick McGehearty#define SHORTCOPY		7
316280575bPatrick McGehearty#define ASI_STBI_P		ASI_BLK_INIT_ST_QUAD_LDD_P
317280575bPatrick McGehearty#define ASI_STBI_AIUS		ASI_BLK_INIT_QUAD_LDD_AIUS
318280575bPatrick McGehearty#define CACHE_LINE		64
319340af27wh#define	VIS_BLOCKSIZE		64
322340af27wh * Size of stack frame in order to accomodate a 64-byte aligned
323340af27wh * floating-point register save area and 2 64-bit temp locations.
324340af27wh * All copy functions use three quadrants of fp registers; to assure a
325340af27wh * block-aligned three block buffer in which to save we must reserve
326340af27wh * four blocks on stack.
327340af27wh *
328340af27wh *    _______________________________________ <-- %fp + STACK_BIAS
329340af27wh *    | We may need to preserve 3 quadrants |
330340af27wh *    | of fp regs, but since we do so with |
331340af27wh *    | BST/BLD we need room in which to    |
332340af27wh *    | align to VIS_BLOCKSIZE bytes.  So   |
333340af27wh *    | this area is 4 * VIS_BLOCKSIZE.     | <--  - SAVED_FPREGS_OFFSET
334340af27wh *    |-------------------------------------|
335280575bPatrick McGehearty *    | 8 bytes to save %fprs		    | <--  - SAVED_FPRS_OFFSET
336340af27wh *    |-------------------------------------|
337280575bPatrick McGehearty *    | 8 bytes to save %gsr		    | <--  - SAVED_GSR_OFFSET
338340af27wh *    ---------------------------------------
339340af27wh */
340280575bPatrick McGehearty#define HWCOPYFRAMESIZE		((VIS_BLOCKSIZE * (3 + 1)) + (2 * 8))
341280575bPatrick McGehearty#define SAVED_FPREGS_OFFSET	(VIS_BLOCKSIZE * 4)
342280575bPatrick McGehearty#define SAVED_FPREGS_ADJUST	((VIS_BLOCKSIZE * 3) + 1)
343280575bPatrick McGehearty#define SAVED_FPRS_OFFSET	(SAVED_FPREGS_OFFSET + 8)
344280575bPatrick McGehearty#define SAVED_GSR_OFFSET	(SAVED_FPRS_OFFSET + 8)
347340af27wh * In FP copies if we do not have preserved data to restore over
348340af27wh * the fp regs we used then we must zero those regs to avoid
349340af27wh * exposing portions of the data to later threads (data security).
350340af27wh */
351340af27wh#define	FZERO				\
352340af27wh	fzero	%f0			;\
353340af27wh	fzero	%f2			;\
354340af27wh	faddd	%f0, %f2, %f4		;\
355340af27wh	fmuld	%f0, %f2, %f6		;\
356340af27wh	faddd	%f0, %f2, %f8		;\
357340af27wh	fmuld	%f0, %f2, %f10		;\
358340af27wh	faddd	%f0, %f2, %f12		;\
359340af27wh	fmuld	%f0, %f2, %f14		;\
360340af27wh	faddd	%f0, %f2, %f16		;\
361340af27wh	fmuld	%f0, %f2, %f18		;\
362340af27wh	faddd	%f0, %f2, %f20		;\
363340af27wh	fmuld	%f0, %f2, %f22		;\
364340af27wh	faddd	%f0, %f2, %f24		;\
365340af27wh	fmuld	%f0, %f2, %f26		;\
366340af27wh	faddd	%f0, %f2, %f28		;\
367340af27wh	fmuld	%f0, %f2, %f30		;\
368340af27wh	faddd	%f0, %f2, %f48		;\
369340af27wh	fmuld	%f0, %f2, %f50		;\
370340af27wh	faddd	%f0, %f2, %f52		;\
371340af27wh	fmuld	%f0, %f2, %f54		;\
372340af27wh	faddd	%f0, %f2, %f56		;\
373340af27wh	fmuld	%f0, %f2, %f58		;\
374340af27wh	faddd	%f0, %f2, %f60		;\
375340af27wh	fmuld	%f0, %f2, %f62
37759ac0c1davemq#if !defined(lint)
380340af27wh * Macros to save and restore fp registers to/from the stack.
381340af27wh * Used to save and restore in-use fp registers when we want to use FP.
382340af27wh */
383340af27wh#define BST_FP_TOSTACK(tmp1)					\
384340af27wh	/* membar #Sync	*/					;\
385340af27wh	add	%fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1	;\
386340af27wh	and	tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */	;\
387340af27wh	stda	%f0, [tmp1]ASI_BLK_P				;\
388340af27wh	add	tmp1, VIS_BLOCKSIZE, tmp1			;\
389340af27wh	stda	%f16, [tmp1]ASI_BLK_P				;\
390340af27wh	add	tmp1, VIS_BLOCKSIZE, tmp1			;\
391340af27wh	stda	%f48, [tmp1]ASI_BLK_P				;\
392340af27wh	membar	#Sync
394340af27wh#define	BLD_FP_FROMSTACK(tmp1)					\
395340af27wh	/* membar #Sync - provided at copy completion */	;\
396340af27wh	add	%fp, STACK_BIAS - SAVED_FPREGS_ADJUST, tmp1	;\
397340af27wh	and	tmp1, -VIS_BLOCKSIZE, tmp1 /* block align */	;\
398340af27wh	ldda	[tmp1]ASI_BLK_P, %f0				;\
399340af27wh	add	tmp1, VIS_BLOCKSIZE, tmp1			;\
400340af27wh	ldda	[tmp1]ASI_BLK_P, %f16				;\
401340af27wh	add	tmp1, VIS_BLOCKSIZE, tmp1			;\
402340af27wh	ldda	[tmp1]ASI_BLK_P, %f48				;\
403340af27wh	membar	#Sync
404340af27wh#endif	/* NIAGARA_IMPL */
40659ac0c1davemq#endif	/* lint */
4087c478bdstevel@tonic-gate * Copy a block of storage, returning an error code if `from' or
4097c478bdstevel@tonic-gate * `to' takes a kernel pagefault which cannot be resolved.
4107c478bdstevel@tonic-gate * Returns errno value on pagefault error, 0 if all ok
4117c478bdstevel@tonic-gate */
4137c478bdstevel@tonic-gate#if defined(lint)
4157c478bdstevel@tonic-gate/* ARGSUSED */
4177c478bdstevel@tonic-gatekcopy(const void *from, void *to, size_t count)
4187c478bdstevel@tonic-gate{ return(0); }
4207c478bdstevel@tonic-gate#else	/* lint */
4227c478bdstevel@tonic-gate	.seg	".text"
4237c478bdstevel@tonic-gate	.align	4
4257c478bdstevel@tonic-gate	ENTRY(kcopy)
426340af27wh#if !defined(NIAGARA_IMPL)
427280575bPatrick McGehearty	cmp	%o2, FP_COPY			! check for small copy/leaf case
428280575bPatrick McGehearty	bgt,pt	%ncc, .kcopy_more		!
429280575bPatrick McGehearty	nop
430280575bPatrick McGehearty.kcopy_small:					! setup error handler
431280575bPatrick McGehearty	sethi	%hi(.sm_copyerr), %o4
432280575bPatrick McGehearty	or	%o4, %lo(.sm_copyerr), %o4	! .sm_copyerr is lofault value
433280575bPatrick McGehearty	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
434280575bPatrick McGehearty	! Note that we carefully do *not* flag the setting of
435280575bPatrick McGehearty	! t_lofault.
436280575bPatrick McGehearty	membar	#Sync				! sync error barrier
437280575bPatrick McGehearty	b	.sm_do_copy			! common code
438280575bPatrick McGehearty	stn	%o4, [THREAD_REG + T_LOFAULT]	! set t_lofault
439280575bPatrick McGehearty
440280575bPatrick McGehearty
441280575bPatrick McGehearty.kcopy_more:
442340af27wh	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
443340af27wh	sethi	%hi(.copyerr), %l7		! copyerr is lofault value
444340af27wh	or	%l7, %lo(.copyerr), %l7
445340af27wh	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
446340af27wh	! Note that we carefully do *not* flag the setting of
447340af27wh	! t_lofault.
448340af27wh	membar	#Sync				! sync error barrier
449340af27wh	b	.do_copy			! common code
450340af27wh	stn	%l7, [THREAD_REG + T_LOFAULT]	! set t_lofault
453280575bPatrick McGehearty * We got here because of a fault during a small kcopy or bcopy.
454280575bPatrick McGehearty * if a fault handler existed when bcopy was called.
455280575bPatrick McGehearty * No floating point registers are used by the small copies.
456280575bPatrick McGehearty * Small copies are from a leaf routine
457280575bPatrick McGehearty * Errno value is in %g1.
458280575bPatrick McGehearty */
459280575bPatrick McGehearty.sm_copyerr:
460280575bPatrick McGehearty	! The kcopy will always set a t_lofault handler. If it fires,
461280575bPatrick McGehearty	! we're expected to just return the error code and not to
462280575bPatrick McGehearty	! invoke any existing error handler. As far as bcopy is concerned,
463280575bPatrick McGehearty	! we only set t_lofault if there was an existing lofault handler.
464280575bPatrick McGehearty	! In that case we're expected to invoke the previously existing
465280575bPatrick McGehearty	! handler after resetting the t_lofault value.
466280575bPatrick McGehearty	btst	LOFAULT_SET, %o5
467280575bPatrick McGehearty	membar	#Sync				! sync error barrier
468280575bPatrick McGehearty	andn	%o5, LOFAULT_SET, %o5		! clear fault flag
469280575bPatrick McGehearty	bnz,pn	%ncc, 3f
470280575bPatrick McGehearty	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
471280575bPatrick McGehearty	retl
472280575bPatrick McGehearty	mov	%g1, %o0
473280575bPatrick McGehearty3:
474280575bPatrick McGehearty	! We're here via bcopy. There must have been an error handler
475280575bPatrick McGehearty	! in place otherwise we would have died a nasty death already.
476280575bPatrick McGehearty	jmp	%o5				! goto real handler
477280575bPatrick McGehearty	mov	%g0, %o0
478280575bPatrick McGehearty/*
479280575bPatrick McGehearty *  end of .sm_copyerr
480280575bPatrick McGehearty */
481280575bPatrick McGehearty
482280575bPatrick McGehearty/*
483340af27wh * We got here because of a fault during kcopy or bcopy if a fault
484340af27wh * handler existed when bcopy was called.
485280575bPatrick McGehearty * stack and fp registers need to be restored
486340af27wh * Errno value is in %g1.
487340af27wh */
489340af27wh	sethi	%hi(.copyerr2), %l1
490340af27wh	or	%l1, %lo(.copyerr2), %l1
491340af27wh	membar	#Sync				! sync error barrier
492340af27wh	stn	%l1, [THREAD_REG + T_LOFAULT]	! set t_lofault
493340af27wh	btst	FPUSED_FLAG, %o5
494340af27wh	bz,pt	%xcc, 1f
495280575bPatrick McGehearty	and	%o5, LOFAULT_SET, %l1	! copy flag to %l1
497340af27wh	membar	#Sync				! sync error barrier
498280575bPatrick McGehearty	wr	%l5, 0, %gsr
499280575bPatrick McGehearty	btst	FPRS_FEF, %g5
500340af27wh	bz,pt	%icc, 4f
501280575bPatrick McGehearty	nop
502340af27wh	! restore fpregs from stack
503340af27wh	BLD_FP_FROMSTACK(%o2)
504340af27wh	ba,pt	%ncc, 2f
505280575bPatrick McGehearty	wr	%g5, 0, %fprs		! restore fprs
507340af27wh	FZERO
508280575bPatrick McGehearty	wr	%g5, 0, %fprs		! restore fprs
510340af27wh	ldn	[THREAD_REG + T_LWP], %o2
511340af27wh	brnz,pt	%o2, 1f
512280575bPatrick McGehearty	nop
514340af27wh	ldsb	[THREAD_REG + T_PREEMPT], %l0
515340af27wh	deccc	%l0
516340af27wh	bnz,pn	%ncc, 1f
517280575bPatrick McGehearty	stb	%l0, [THREAD_REG + T_PREEMPT]
519340af27wh	! Check for a kernel preemption request
520340af27wh	ldn	[THREAD_REG + T_CPU], %l0
521340af27wh	ldub	[%l0 + CPU_KPRUNRUN], %l0
522340af27wh	brnz,a,pt	%l0, 1f	! Need to call kpreempt?
523280575bPatrick McGehearty	or	%l1, KPREEMPT_FLAG, %l1	! If so, set the flag
525340af27wh	! The kcopy will always set a t_lofault handler. If it fires,
526340af27wh	! we're expected to just return the error code and not to
527340af27wh	! invoke any existing error handler. As far as bcopy is concerned,
528340af27wh	! we only set t_lofault if there was an existing lofault handler.
529340af27wh	! In that case we're expected to invoke the previously existing
530280575bPatrick McGehearty	! handler after resetting the t_lofault value.
532340af27wh	andn	%o5, COPY_FLAGS, %o5	! remove flags from lofault address
533340af27wh	membar	#Sync				! sync error barrier
534340af27wh	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
536340af27wh	! call kpreempt if necessary
537340af27wh	btst	KPREEMPT_FLAG, %l1
538340af27wh	bz,pt	%icc, 2f
539280575bPatrick McGehearty	nop
540340af27wh	call	kpreempt
541280575bPatrick McGehearty	rdpr	%pil, %o0	! pass %pil
543280575bPatrick McGehearty	btst	LOFAULT_SET, %l1
544340af27wh	bnz,pn	%ncc, 3f
545340af27wh	nop
546340af27wh	ret
547340af27wh	restore	%g1, 0, %o0
549340af27wh	! We're here via bcopy. There must have been an error handler
550340af27wh	! in place otherwise we would have died a nasty death already.
551340af27wh	jmp	%o5				! goto real handler
552340af27wh	restore	%g0, 0, %o0			! dispose of copy window
555340af27wh * We got here because of a fault in .copyerr.  We can't safely restore fp
556340af27wh * state, so we panic.
557340af27wh */
559340af27wh	.asciz	"Unable to restore fp state after copy operation"
561340af27wh	.align	4
563340af27wh	set	fp_panic_msg, %o0
564340af27wh	call	panic
565280575bPatrick McGehearty	nop
566280575bPatrick McGehearty/*
567280575bPatrick McGehearty *  end of .copyerr
568280575bPatrick McGehearty */
569280575bPatrick McGehearty
570340af27wh#else	/* NIAGARA_IMPL */
571473b13dae	save	%sp, -SA(MINFRAME), %sp
572473b13dae	set	.copyerr, %l7			! copyerr is lofault value
573473b13dae	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
574473b13dae	or	%o5, LOFAULT_SET, %o5
575473b13dae	membar	#Sync				! sync error barrier
576473b13dae	b	.do_copy			! common code
577473b13dae	stn	%l7, [THREAD_REG + T_LOFAULT]	! set t_lofault
5807c478bdstevel@tonic-gate * We got here because of a fault during kcopy.
5817c478bdstevel@tonic-gate * Errno value is in %g1.
5827c478bdstevel@tonic-gate */
584473b13dae	! The kcopy() *always* sets a t_lofault handler and it ORs LOFAULT_SET
585473b13dae	! into %o5 to indicate it has set t_lofault handler. Need to clear
586473b13dae	! LOFAULT_SET flag before restoring the error handler.
587473b13dae	andn	%o5, LOFAULT_SET, %o5
588473b13dae	membar	#Sync				! sync error barrier
5897c478bdstevel@tonic-gate	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
5907c478bdstevel@tonic-gate	ret
5917c478bdstevel@tonic-gate	restore	%g1, 0, %o0
592340af27wh#endif	/* NIAGARA_IMPL */
5947c478bdstevel@tonic-gate	SET_SIZE(kcopy)
5957c478bdstevel@tonic-gate#endif	/* lint */
5997c478bdstevel@tonic-gate * Copy a block of storage - must not overlap (from + len <= to).
6007c478bdstevel@tonic-gate */
6017c478bdstevel@tonic-gate#if defined(lint)
6037c478bdstevel@tonic-gate/* ARGSUSED */
6057c478bdstevel@tonic-gatebcopy(const void *from, void *to, size_t count)
6087c478bdstevel@tonic-gate#else	/* lint */
6107c478bdstevel@tonic-gate	ENTRY(bcopy)
611340af27wh#if !defined(NIAGARA_IMPL)
612280575bPatrick McGehearty	cmp	%o2, FP_COPY			! check for small copy/leaf case
613280575bPatrick McGehearty	bgt,pt	%ncc, .bcopy_more		!
614280575bPatrick McGehearty	nop
615280575bPatrick McGehearty.bcopy_small:					! setup error handler
616280575bPatrick McGehearty	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
617280575bPatrick McGehearty	tst	%o5
618280575bPatrick McGehearty	bz,pt	%icc, .sm_do_copy
619280575bPatrick McGehearty	sethi	%hi(.sm_copyerr), %o4
620280575bPatrick McGehearty	or	%o4, %lo(.sm_copyerr), %o4	! .sm_copyerr is lofault value
621280575bPatrick McGehearty	membar	#Sync				! sync error barrier
622280575bPatrick McGehearty	stn	%o4, [THREAD_REG + T_LOFAULT]	! set t_lofault
623280575bPatrick McGehearty	or	%o5, LOFAULT_SET, %o5		! Error should trampoline
624280575bPatrick McGehearty.sm_do_copy:
625280575bPatrick McGehearty	mov	%o0, %g1		! save %o0
626280575bPatrick McGehearty	cmp	%o2, SHORTCOPY		! make sure there is enough to align
627280575bPatrick McGehearty	ble,pt	%ncc, .bc_smallest
628280575bPatrick McGehearty	andcc	%o1, 0x7, %o3		! is dest long aligned
629280575bPatrick McGehearty	bnz,pn	%ncc, .bc_align
630280575bPatrick McGehearty	andcc	%o1, 1, %o3		! is dest byte aligned
631280575bPatrick McGehearty
632280575bPatrick McGehearty! Destination is long word aligned
633280575bPatrick McGehearty.bc_al_src:
634280575bPatrick McGehearty	andcc	%o0, 7, %o3
635280575bPatrick McGehearty	brnz,pt	%o3, .bc_src_dst_unal8
636280575bPatrick McGehearty	nop
637280575bPatrick McGehearty/*
638280575bPatrick McGehearty * Special case for handling when src and dest are both long word aligned
639280575bPatrick McGehearty * and total data to move is less than FP_COPY bytes
640280575bPatrick McGehearty * Also handles finish up for large block moves, so may be less than 32 bytes
641280575bPatrick McGehearty */
642280575bPatrick McGehearty.bc_medlong:
643280575bPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
644280575bPatrick McGehearty	ble,pt	%ncc, .bc_medl31
645280575bPatrick McGehearty	nop
646280575bPatrick McGehearty.bc_medl32:
647280575bPatrick McGehearty	ldx	[%o0], %o4		! move 32 bytes
648280575bPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count by 32
649280575bPatrick McGehearty	stx	%o4, [%o1]
650280575bPatrick McGehearty	ldx	[%o0+8], %o4
651280575bPatrick McGehearty	stx	%o4, [%o1+8]
652280575bPatrick McGehearty	ldx	[%o0+16], %o4
653280575bPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
654280575bPatrick McGehearty	stx	%o4, [%o1+16]
655280575bPatrick McGehearty	ldx	[%o0-8], %o4
656280575bPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
657280575bPatrick McGehearty	bgu,pt	%ncc, .bc_medl32	! repeat if at least 32 bytes left
658280575bPatrick McGehearty	stx	%o4, [%o1-8]
659280575bPatrick McGehearty.bc_medl31:
660280575bPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
661280575bPatrick McGehearty	ble,pt	%ncc, .bc_medl7		! skip if 7 or fewer bytes left
662280575bPatrick McGehearty	nop
663280575bPatrick McGehearty.bc_medl8:
664280575bPatrick McGehearty	ldx	[%o0], %o4		! move 8 bytes
665280575bPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
666280575bPatrick McGehearty	subcc	%o2, 8, %o2		! decrease count by 8
667280575bPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
668280575bPatrick McGehearty	bgu,pt	%ncc, .bc_medl8
669280575bPatrick McGehearty	stx	%o4, [%o1-8]
670280575bPatrick McGehearty.bc_medl7:
671280575bPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
672280575bPatrick McGehearty	bnz,pt	%ncc, .bc_small4	! do final bytes if not finished
673280575bPatrick McGehearty
674280575bPatrick McGehearty.bc_smallx:				! finish up and exit
675280575bPatrick McGehearty	tst	%o5
676280575bPatrick McGehearty	bz,pt	%ncc, .bc_sm_done
677280575bPatrick McGehearty	andn	%o5, COPY_FLAGS, %o5	! remove flags from lofault address
678280575bPatrick McGehearty	membar	#Sync			! sync error barrier
679280575bPatrick McGehearty	stn	%o5, [THREAD_REG + T_LOFAULT]	! restore old t_lofault
680280575bPatrick McGehearty.bc_sm_done:
681280575bPatrick McGehearty	retl
682280575bPatrick McGehearty	mov	%g0, %o0
683280575bPatrick McGehearty
684280575bPatrick McGehearty.bc_small4:
685280575bPatrick McGehearty	cmp	%o2, 4
686280575bPatrick McGehearty	blt,pt	%ncc, .bc_small3x	! skip if less than 4 bytes left
687280575bPatrick McGehearty	nop				!
688280575bPatrick McGehearty	ld	[%o0], %o4		! move 4 bytes
689280575bPatrick McGehearty	add	%o0, 4, %o0		! increase src ptr by 4
690280575bPatrick McGehearty	add	%o1, 4, %o1		! increase dst ptr by 4
691280575bPatrick McGehearty	subcc	%o2, 4, %o2		! decrease count by 4
692280575bPatrick McGehearty	bz,pt	%ncc, .bc_smallx
693280575bPatrick McGehearty	stw	%o4, [%o1-4]
694280575bPatrick McGehearty
695280575bPatrick McGehearty.bc_small3x:				! Exactly 1, 2, or 3 bytes remain
696280575bPatrick McGehearty	subcc	%o2, 1, %o2		! reduce count for cc test
697280575bPatrick McGehearty	ldub	[%o0], %o4		! load one byte
698280575bPatrick McGehearty	bz,pt	%ncc, .bc_smallx
699280575bPatrick McGehearty	stb	%o4, [%o1]		! store one byte
700280575bPatrick McGehearty	ldub	[%o0+1], %o4		! load second byte
701280575bPatrick McGehearty	subcc	%o2, 1, %o2
702280575bPatrick McGehearty	bz,pt	%ncc, .bc_smallx
703280575bPatrick McGehearty	stb	%o4, [%o1+1]		! store second byte
704280575bPatrick McGehearty	ldub	[%o0+2], %o4		! load third byte
705280575bPatrick McGehearty	ba	.bc_smallx
706280575bPatrick McGehearty	stb	%o4, [%o1+2]		! store third byte
707280575bPatrick McGehearty
708280575bPatrick McGehearty.bc_smallest:				! 7 or fewer bytes remain
709280575bPatrick McGehearty	tst	%o2
710280575bPatrick McGehearty	bz,pt	%ncc, .bc_smallx
711280575bPatrick McGehearty	cmp	%o2, 4
712280575bPatrick McGehearty	blt,pt	%ncc, .bc_small3x
713280575bPatrick McGehearty	nop
714280575bPatrick McGehearty	ldub	[%o0], %o4		! read byte
715280575bPatrick McGehearty	subcc	%o2, 4, %o2		! reduce count by 4
716280575bPatrick McGehearty	stb	%o4, [%o1]		! write byte
717280575bPatrick McGehearty	ldub	[%o0+1], %o4		! repeat for total of 4 bytes
718280575bPatrick McGehearty	add	%o0, 4, %o0		! advance src by 4
719280575bPatrick McGehearty	stb	%o4, [%o1+1]
720280575bPatrick McGehearty	ldub	[%o0-2], %o4
721280575bPatrick McGehearty	add	%o1, 4, %o1		! advance dst by 4
722280575bPatrick McGehearty	stb	%o4, [%o1-2]
723280575bPatrick McGehearty	ldub	[%o0-1], %o4
724280575bPatrick McGehearty	bnz,pt	%ncc, .bc_small3x
725280575bPatrick McGehearty	stb	%o4, [%o1-1]
726280575bPatrick McGehearty	ba	.bc_smallx
727280575bPatrick McGehearty	nop
728280575bPatrick McGehearty
729280575bPatrick McGehearty/*
730280575bPatrick McGehearty * Align destination to long word boundary
731280575bPatrick McGehearty */
732280575bPatrick McGehearty.bc_align:				! byte align test in prior branch delay
733280575bPatrick McGehearty	bnz,pt	%ncc, .bc_al_d1
734280575bPatrick McGehearty.bc_al_d1f:				! dest is now half word aligned
735280575bPatrick McGehearty	andcc	%o1, 2, %o3
736280575bPatrick McGehearty	bnz,pt	%ncc, .bc_al_d2
737280575bPatrick McGehearty.bc_al_d2f:				! dest is now word aligned
738280575bPatrick McGehearty	andcc	%o1, 4, %o3		! is dest longword aligned?
739280575bPatrick McGehearty	bz,pt	%ncc, .bc_al_src
740280575bPatrick McGehearty	nop
741280575bPatrick McGehearty.bc_al_d4:				! dest is word aligned;  src is unknown
742280575bPatrick McGehearty	ldub	[%o0], %o4		! move a word (src align unknown)
743280575bPatrick McGehearty	ldub	[%o0+1], %o3
744280575bPatrick McGehearty	sll	%o4, 24, %o4		! position
745280575bPatrick McGehearty	sll	%o3, 16, %o3		! position
746280575bPatrick McGehearty	or	%o4, %o3, %o3		! merge
747280575bPatrick McGehearty	ldub	[%o0+2], %o4
748280575bPatrick McGehearty	sll	%o4, 8, %o4		! position
749280575bPatrick McGehearty	or	%o4, %o3, %o3		! merge
750280575bPatrick McGehearty	ldub	[%o0+3], %o4
751280575bPatrick McGehearty	or	%o4, %o3, %o4		! merge
752280575bPatrick McGehearty	stw	%o4,[%o1]		! store four bytes
753280575bPatrick McGehearty	add	%o0, 4, %o0		! adjust src by 4
754280575bPatrick McGehearty	add	%o1, 4, %o1		! adjust dest by 4
755280575bPatrick McGehearty	sub	%o2, 4, %o2		! adjust count by 4
756280575bPatrick McGehearty	andcc	%o0, 7, %o3		! check for src long word alignment
757280575bPatrick McGehearty	brz,pt	%o3, .bc_medlong
758280575bPatrick McGehearty.bc_src_dst_unal8:
759280575bPatrick McGehearty	! dst is 8-byte aligned, src is not
760280575bPatrick McGehearty	! Size is less than FP_COPY
761280575bPatrick McGehearty	! Following code is to select for alignment
762280575bPatrick McGehearty	andcc	%o0, 0x3, %o3		! test word alignment
763280575bPatrick McGehearty	bz,pt	%ncc, .bc_medword
764280575bPatrick McGehearty	nop
765280575bPatrick McGehearty	andcc	%o0, 0x1, %o3		! test halfword alignment
766280575bPatrick McGehearty	bnz,pt	%ncc, .bc_med_byte	! go to byte move if not halfword
767280575bPatrick McGehearty	andcc	%o0, 0x2, %o3		! test which byte alignment
768280575bPatrick McGehearty	ba	.bc_medhalf
769280575bPatrick McGehearty	nop
770280575bPatrick McGehearty.bc_al_d1:				! align dest to half word
771280575bPatrick McGehearty	ldub	[%o0], %o4		! move a byte
772280575bPatrick McGehearty	add	%o0, 1, %o0
773280575bPatrick McGehearty	stb	%o4, [%o1]
774280575bPatrick McGehearty	add	%o1, 1, %o1
775280575bPatrick McGehearty	andcc	%o1, 2, %o3
776280575bPatrick McGehearty	bz,pt	%ncc, .bc_al_d2f
777280575bPatrick McGehearty	sub	%o2, 1, %o2
778280575bPatrick McGehearty.bc_al_d2:				! align dest to word
779280575bPatrick McGehearty	ldub	[%o0], %o4		! move a half-word (src align unknown)
780280575bPatrick McGehearty	ldub	[%o0+1], %o3
781280575bPatrick McGehearty	sll	%o4, 8, %o4		! position
782280575bPatrick McGehearty	or	%o4, %o3, %o4		! merge
783280575bPatrick McGehearty	sth	%o4, [%o1]
784280575bPatrick McGehearty	add	%o0, 2, %o0
785280575bPatrick McGehearty	add	%o1, 2, %o1
786280575bPatrick McGehearty	andcc	%o1, 4, %o3		! is dest longword aligned?
787280575bPatrick McGehearty	bz,pt	%ncc, .bc_al_src
788280575bPatrick McGehearty	sub	%o2, 2, %o2
789280575bPatrick McGehearty	ba	.bc_al_d4
790280575bPatrick McGehearty	nop
791280575bPatrick McGehearty/*
792280575bPatrick McGehearty * Handle all cases where src and dest are aligned on word
793280575bPatrick McGehearty * boundaries. Use unrolled loops for better performance.
794280575bPatrick McGehearty * This option wins over standard large data move when
795280575bPatrick McGehearty * source and destination is in cache for medium
796280575bPatrick McGehearty * to short data moves.
797280575bPatrick McGehearty */
798280575bPatrick McGehearty.bc_medword:
799280575bPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
800280575bPatrick McGehearty	ble,pt	%ncc, .bc_medw31
801280575bPatrick McGehearty	nop
802280575bPatrick McGehearty.bc_medw32:
803280575bPatrick McGehearty	ld	[%o0], %o4		! move a block of 32 bytes
804280575bPatrick McGehearty	stw	%o4, [%o1]
805280575bPatrick McGehearty	ld	[%o0+4], %o4
806280575bPatrick McGehearty	stw	%o4, [%o1+4]
807280575bPatrick McGehearty	ld	[%o0+8], %o4
808280575bPatrick McGehearty	stw	%o4, [%o1+8]
809280575bPatrick McGehearty	ld	[%o0+12], %o4
810280575bPatrick McGehearty	stw	%o4, [%o1+12]
811280575bPatrick McGehearty	ld	[%o0+16], %o4
812280575bPatrick McGehearty	stw	%o4, [%o1+16]
813280575bPatrick McGehearty	ld	[%o0+20], %o4
814280575bPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
815280575bPatrick McGehearty	stw	%o4, [%o1+20]
816280575bPatrick McGehearty	ld	[%o0+24], %o4
817280575bPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
818280575bPatrick McGehearty	stw	%o4, [%o1+24]
819280575bPatrick McGehearty	ld	[%o0-4], %o4
820280575bPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
821280575bPatrick McGehearty	bgu,pt	%ncc, .bc_medw32	! repeat if at least 32 bytes left
822280575bPatrick McGehearty	stw	%o4, [%o1-4]
823280575bPatrick McGehearty.bc_medw31:
824280575bPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
825280575bPatrick McGehearty	ble,pt	%ncc, .bc_medw7		! skip if 7 or fewer bytes left
826280575bPatrick McGehearty	nop				!
827280575bPatrick McGehearty.bc_medw15:
828280575bPatrick McGehearty	ld	[%o0], %o4		! move a block of 8 bytes
829280575bPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
830280575bPatrick McGehearty	stw	%o4, [%o1]
831280575bPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
832280575bPatrick McGehearty	ld	[%o0-4], %o4
833280575bPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
834280575bPatrick McGehearty	bgu,pt	%ncc, .bc_medw15
835280575bPatrick McGehearty	stw	%o4, [%o1-4]
836280575bPatrick McGehearty.bc_medw7:
837280575bPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
838280575bPatrick McGehearty	bz,pt	%ncc, .bc_smallx	! exit if finished
839280575bPatrick McGehearty	cmp	%o2, 4
840280575bPatrick McGehearty	blt,pt	%ncc, .bc_small3x	! skip if less than 4 bytes left
841280575bPatrick McGehearty	nop				!
842280575bPatrick McGehearty	ld	[%o0], %o4		! move 4 bytes
843280575bPatrick McGehearty	add	%o0, 4, %o0		! increase src ptr by 4
844280575bPatrick McGehearty	add	%o1, 4, %o1		! increase dst ptr by 4
845280575bPatrick McGehearty	subcc	%o2, 4, %o2		! decrease count by 4
846280575bPatrick McGehearty	bnz	.bc_small3x
847280575bPatrick McGehearty	stw	%o4, [%o1-4]
848280575bPatrick McGehearty	ba	.bc_smallx
849280575bPatrick McGehearty	nop
850280575bPatrick McGehearty
851280575bPatrick McGehearty.bc_medhalf:
852280575bPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
853280575bPatrick McGehearty	ble,pt	%ncc, .bc_medh31
854280575bPatrick McGehearty	nop
855280575bPatrick McGehearty.bc_medh32:				! load and store block of 32 bytes
856280575bPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
857280575bPatrick McGehearty
858280575bPatrick McGehearty	lduh	[%o0], %o4		! move 32 bytes
859280575bPatrick McGehearty	lduw	[%o0+2], %o3
860280575bPatrick McGehearty	sllx	%o4, 48, %o4
861280575bPatrick McGehearty	sllx	%o3, 16, %o3
862280575bPatrick McGehearty	or	%o4, %o3, %o3
863280575bPatrick McGehearty	lduh	[%o0+6], %o4
864280575bPatrick McGehearty	or	%o4, %o3, %o4
865280575bPatrick McGehearty	stx	%o4, [%o1]
866280575bPatrick McGehearty
867280575bPatrick McGehearty	lduh	[%o0+8], %o4
868280575bPatrick McGehearty	lduw	[%o0+10], %o3
869280575bPatrick McGehearty	sllx	%o4, 48, %o4
870280575bPatrick McGehearty	sllx	%o3, 16, %o3
871280575bPatrick McGehearty	or	%o4, %o3, %o3
872280575bPatrick McGehearty	lduh	[%o0+14], %o4
873280575bPatrick McGehearty	or	%o4, %o3, %o4
874280575bPatrick McGehearty	stx	%o4, [%o1+8]
875280575bPatrick McGehearty
876280575bPatrick McGehearty	lduh	[%o0+16], %o4
877280575bPatrick McGehearty	lduw	[%o0+18], %o3
878280575bPatrick McGehearty	sllx	%o4, 48, %o4
879280575bPatrick McGehearty	sllx	%o3, 16, %o3
880280575bPatrick McGehearty	or	%o4, %o3, %o3
881280575bPatrick McGehearty	lduh	[%o0+22], %o4
882280575bPatrick McGehearty	or	%o4, %o3, %o4
883280575bPatrick McGehearty	stx	%o4, [%o1+16]
884280575bPatrick McGehearty
885280575bPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
886280575bPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
887280575bPatrick McGehearty
888280575bPatrick McGehearty	lduh	[%o0-8], %o4
889280575bPatrick McGehearty	lduw	[%o0-6], %o3
890280575bPatrick McGehearty	sllx	%o4, 48, %o4
891280575bPatrick McGehearty	sllx	%o3, 16, %o3
892280575bPatrick McGehearty	or	%o4, %o3, %o3
893280575bPatrick McGehearty	lduh	[%o0-2], %o4
894280575bPatrick McGehearty	or	%o3, %o4, %o4
895280575bPatrick McGehearty	bgu,pt	%ncc, .bc_medh32	! repeat if at least 32 bytes left
896280575bPatrick McGehearty	stx	%o4, [%o1-8]
897280575bPatrick McGehearty
898280575bPatrick McGehearty.bc_medh31:
899280575bPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
900280575bPatrick McGehearty	ble,pt	%ncc, .bc_medh7		! skip if 7 or fewer bytes left
901280575bPatrick McGehearty	nop				!
902280575bPatrick McGehearty.bc_medh15:
903280575bPatrick McGehearty	lduh	[%o0], %o4		! move 16 bytes
904280575bPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
905280575bPatrick McGehearty	lduw	[%o0+2], %o3
906280575bPatrick McGehearty	sllx	%o4, 48, %o4
907280575bPatrick McGehearty	sllx	%o3, 16, %o3
908280575bPatrick McGehearty	or	%o4, %o3, %o3
909280575bPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
910280575bPatrick McGehearty	lduh	[%o0+6], %o4
911280575bPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
912280575bPatrick McGehearty	or	%o4, %o3, %o4
913280575bPatrick McGehearty	bgu,pt	%ncc, .bc_medh15
914280575bPatrick McGehearty	stx	%o4, [%o1-8]
915280575bPatrick McGehearty.bc_medh7:
916280575bPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
917280575bPatrick McGehearty	bz,pt	%ncc, .bc_smallx	! exit if finished
918280575bPatrick McGehearty	cmp	%o2, 4
919280575bPatrick McGehearty	blt,pt	%ncc, .bc_small3x	! skip if less than 4 bytes left
920280575bPatrick McGehearty	nop				!
921280575bPatrick McGehearty	lduh	[%o0], %o4
922280575bPatrick McGehearty	sll	%o4, 16, %o4
923280575bPatrick McGehearty	lduh	[%o0+2], %o3
924280575bPatrick McGehearty	or	%o3, %o4, %o4
925280575bPatrick McGehearty	subcc	%o2, 4, %o2
926280575bPatrick McGehearty	add	%o0, 4, %o0
927280575bPatrick McGehearty	add	%o1, 4, %o1
928280575bPatrick McGehearty	bnz	.bc_small3x
929280575bPatrick McGehearty	stw	%o4, [%o1-4]
930280575bPatrick McGehearty	ba	.bc_smallx
931280575bPatrick McGehearty	nop
932280575bPatrick McGehearty
933280575bPatrick McGehearty	.align 16
934280575bPatrick McGehearty.bc_med_byte:
935280575bPatrick McGehearty	bnz,pt	%ncc, .bc_medbh32a	! go to correct byte move
936280575bPatrick McGehearty	subcc	%o2, 31, %o2		! adjust length to allow cc test
937280575bPatrick McGehearty	ble,pt	%ncc, .bc_medb31
938280575bPatrick McGehearty	nop
939280575bPatrick McGehearty.bc_medb32:				! Alignment 1 or 5
940280575bPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
941280575bPatrick McGehearty
942280575bPatrick McGehearty	ldub	[%o0], %o4		! load and store a block of 32 bytes
943280575bPatrick McGehearty	sllx	%o4, 56, %o3
944280575bPatrick McGehearty	lduh	[%o0+1], %o4
945280575bPatrick McGehearty	sllx	%o4, 40, %o4
946280575bPatrick McGehearty	or	%o4, %o3, %o3
947280575bPatrick McGehearty	lduw	[%o0+3], %o4
948280575bPatrick McGehearty	sllx	%o4, 8, %o4
949280575bPatrick McGehearty	or	%o4, %o3, %o3
950280575bPatrick McGehearty	ldub	[%o0+7], %o4
951280575bPatrick McGehearty	or	%o4, %o3, %o4
952280575bPatrick McGehearty	stx	%o4, [%o1]
953280575bPatrick McGehearty
954280575bPatrick McGehearty	ldub	[%o0+8], %o4
955280575bPatrick McGehearty	sllx	%o4, 56, %o3
956280575bPatrick McGehearty	lduh	[%o0+9], %o4
957280575bPatrick McGehearty	sllx	%o4, 40, %o4
958280575bPatrick McGehearty	or	%o4, %o3, %o3
959280575bPatrick McGehearty	lduw	[%o0+11], %o4
960280575bPatrick McGehearty	sllx	%o4, 8, %o4
961280575bPatrick McGehearty	or	%o4, %o3, %o3
962280575bPatrick McGehearty	ldub	[%o0+15], %o4
963280575bPatrick McGehearty	or	%o4, %o3, %o4
964280575bPatrick McGehearty	stx	%o4, [%o1+8]
965280575bPatrick McGehearty
966280575bPatrick McGehearty	ldub	[%o0+16], %o4
967280575bPatrick McGehearty	sllx	%o4, 56, %o3
968280575bPatrick McGehearty	lduh	[%o0+17], %o4
969280575bPatrick McGehearty	sllx	%o4, 40, %o4
970280575bPatrick McGehearty	or	%o4, %o3, %o3
971280575bPatrick McGehearty	lduw	[%o0+19], %o4
972280575bPatrick McGehearty	sllx	%o4, 8, %o4
973280575bPatrick McGehearty	or	%o4, %o3, %o3
974280575bPatrick McGehearty	ldub	[%o0+23], %o4
975280575bPatrick McGehearty	or	%o4, %o3, %o4
976280575bPatrick McGehearty	stx	%o4, [%o1+16]
977280575bPatrick McGehearty
978280575bPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
979280575bPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
980280575bPatrick McGehearty
981280575bPatrick McGehearty	ldub	[%o0-8], %o4
982280575bPatrick McGehearty	sllx	%o4, 56, %o3
983280575bPatrick McGehearty	lduh	[%o0-7], %o4
984280575bPatrick McGehearty	sllx	%o4, 40, %o4
985280575bPatrick McGehearty	or	%o4, %o3, %o3
986280575bPatrick McGehearty	lduw	[%o0-5], %o4
987280575bPatrick McGehearty	sllx	%o4, 8, %o4
988280575bPatrick McGehearty	or	%o4, %o3, %o3
989280575bPatrick McGehearty	ldub	[%o0-1], %o4
990280575bPatrick McGehearty	or	%o4, %o3, %o4
991280575bPatrick McGehearty	bgu,pt	%ncc, .bc_medb32	! repeat if at least 32 bytes left
992280575bPatrick McGehearty	stx	%o4, [%o1-8]
993280575bPatrick McGehearty
994280575bPatrick McGehearty.bc_medb31:				! 31 or fewer bytes remaining
995280575bPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
996280575bPatrick McGehearty	ble,pt	%ncc, .bc_medb7		! skip if 7 or fewer bytes left
997280575bPatrick McGehearty	nop				!
998280575bPatrick McGehearty.bc_medb15:
999280575bPatrick McGehearty
1000280575bPatrick McGehearty	ldub	[%o0], %o4		! load and store a block of 8 bytes
1001280575bPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
1002280575bPatrick McGehearty	sllx	%o4, 56, %o3
1003280575bPatrick McGehearty	lduh	[%o0+1], %o4
1004280575bPatrick McGehearty	sllx	%o4, 40, %o4
1005280575bPatrick McGehearty	or	%o4, %o3, %o3
1006280575bPatrick McGehearty	lduw	[%o0+3], %o4
1007280575bPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 16
1008280575bPatrick McGehearty	sllx	%o4, 8, %o4
1009280575bPatrick McGehearty	or	%o4, %o3, %o3
1010280575bPatrick McGehearty	ldub	[%o0+7], %o4
1011280575bPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 16
1012280575bPatrick McGehearty	or	%o4, %o3, %o4
1013280575bPatrick McGehearty	bgu,pt	%ncc, .bc_medb15
1014280575bPatrick McGehearty	stx	%o4, [%o1-8]
1015280575bPatrick McGehearty.bc_medb7:
1016280575bPatrick McGehearty	addcc	%o2, 7, %o2		! finish adjustment of remaining count
1017280575bPatrick McGehearty	bz,pt	%ncc, .bc_smallx	! exit if finished
1018280575bPatrick McGehearty	cmp	%o2, 4
1019280575bPatrick McGehearty	blt,pt	%ncc, .bc_small3x	! skip if less than 4 bytes left
1020280575bPatrick McGehearty	nop				!
1021280575bPatrick McGehearty	ldub	[%o0], %o4		! move 4 bytes
1022280575bPatrick McGehearty	sll	%o4, 24, %o3
1023280575bPatrick McGehearty	lduh	[%o0+1], %o4
1024280575bPatrick McGehearty	sll	%o4, 8, %o4
1025280575bPatrick McGehearty	or	%o4, %o3, %o3
1026280575bPatrick McGehearty	ldub	[%o0+3], %o4
1027280575bPatrick McGehearty	or	%o4, %o3, %o4
1028280575bPatrick McGehearty	subcc	%o2, 4, %o2
1029280575bPatrick McGehearty	add	%o0, 4, %o0
1030280575bPatrick McGehearty	add	%o1, 4, %o1
1031280575bPatrick McGehearty	bnz	.bc_small3x
1032280575bPatrick McGehearty	stw	%o4, [%o1-4]
1033280575bPatrick McGehearty	ba	.bc_smallx
1034280575bPatrick McGehearty	nop
1035280575bPatrick McGehearty
1036280575bPatrick McGehearty	.align 16
1037280575bPatrick McGehearty.bc_medbh32a:				! Alignment 3 or 7
1038280575bPatrick McGehearty	ble,pt	%ncc, .bc_medbh31
1039280575bPatrick McGehearty	nop
1040280575bPatrick McGehearty.bc_medbh32:				! Alignment 3 or 7
1041280575bPatrick McGehearty	subcc	%o2, 32, %o2		! decrement length count
1042280575bPatrick McGehearty
1043280575bPatrick McGehearty	ldub	[%o0], %o4		! load and store a block of 32 bytes
1044280575bPatrick McGehearty	sllx	%o4, 56, %o3
1045280575bPatrick McGehearty	lduw	[%o0+1], %o4
1046280575bPatrick McGehearty	sllx	%o4, 24, %o4
1047280575bPatrick McGehearty	or	%o4, %o3, %o3
1048280575bPatrick McGehearty	lduh	[%o0+5], %o4
1049280575bPatrick McGehearty	sllx	%o4, 8, %o4
1050280575bPatrick McGehearty	or	%o4, %o3, %o3
1051280575bPatrick McGehearty	ldub	[%o0+7], %o4
1052280575bPatrick McGehearty	or	%o4, %o3, %o4
1053280575bPatrick McGehearty	stx	%o4, [%o1]
1054280575bPatrick McGehearty
1055280575bPatrick McGehearty	ldub	[%o0+8], %o4
1056280575bPatrick McGehearty	sllx	%o4, 56, %o3
1057280575bPatrick McGehearty	lduw	[%o0+9], %o4
1058280575bPatrick McGehearty	sllx	%o4, 24, %o4
1059280575bPatrick McGehearty	or	%o4, %o3, %o3
1060280575bPatrick McGehearty	lduh	[%o0+13], %o4
1061280575bPatrick McGehearty	sllx	%o4, 8, %o4
1062280575bPatrick McGehearty	or	%o4, %o3, %o3
1063280575bPatrick McGehearty	ldub	[%o0+15], %o4
1064280575bPatrick McGehearty	or	%o4, %o3, %o4
1065280575bPatrick McGehearty	stx	%o4, [%o1+8]
1066280575bPatrick McGehearty
1067280575bPatrick McGehearty	ldub	[%o0+16], %o4
1068280575bPatrick McGehearty	sllx	%o4, 56, %o3
1069280575bPatrick McGehearty	lduw	[%o0+17], %o4
1070280575bPatrick McGehearty	sllx	%o4, 24, %o4
1071280575bPatrick McGehearty	or	%o4, %o3, %o3
1072280575bPatrick McGehearty	lduh	[%o0+21], %o4
1073280575bPatrick McGehearty	sllx	%o4, 8, %o4
1074280575bPatrick McGehearty	or	%o4, %o3, %o3
1075280575bPatrick McGehearty	ldub	[%o0+23], %o4
1076280575bPatrick McGehearty	or	%o4, %o3, %o4
1077280575bPatrick McGehearty	stx	%o4, [%o1+16]
1078280575bPatrick McGehearty
1079280575bPatrick McGehearty	add	%o0, 32, %o0		! increase src ptr by 32
1080280575bPatrick McGehearty	add	%o1, 32, %o1		! increase dst ptr by 32
1081280575bPatrick McGehearty
1082280575bPatrick McGehearty	ldub	[%o0-8], %o4
1083280575bPatrick McGehearty	sllx	%o4, 56, %o3
1084280575bPatrick McGehearty	lduw	[%o0-7], %o4
1085280575bPatrick McGehearty	sllx	%o4, 24, %o4
1086280575bPatrick McGehearty	or	%o4, %o3, %o3
1087280575bPatrick McGehearty	lduh	[%o0-3], %o4
1088280575bPatrick McGehearty	sllx	%o4, 8, %o4
1089280575bPatrick McGehearty	or	%o4, %o3, %o3
1090280575bPatrick McGehearty	ldub	[%o0-1], %o4
1091280575bPatrick McGehearty	or	%o4, %o3, %o4
1092280575bPatrick McGehearty	bgu,pt	%ncc, .bc_medbh32	! repeat if at least 32 bytes left
1093280575bPatrick McGehearty	stx	%o4, [%o1-8]
1094280575bPatrick McGehearty
1095280575bPatrick McGehearty.bc_medbh31:
1096280575bPatrick McGehearty	addcc	%o2, 24, %o2		! adjust count to be off by 7
1097280575bPatrick McGehearty	ble,pt	%ncc, .bc_medb7		! skip if 7 or fewer bytes left
1098280575bPatrick McGehearty	nop				!
1099280575bPatrick McGehearty.bc_medbh15:
1100280575bPatrick McGehearty	ldub	[%o0], %o4		! load and store a block of 8 bytes
1101280575bPatrick McGehearty	sllx	%o4, 56, %o3
1102280575bPatrick McGehearty	lduw	[%o0+1], %o4
1103280575bPatrick McGehearty	sllx	%o4, 24, %o4
1104280575bPatrick McGehearty	or	%o4, %o3, %o3
1105280575bPatrick McGehearty	lduh	[%o0+5], %o4
1106280575bPatrick McGehearty	sllx	%o4, 8, %o4
1107280575bPatrick McGehearty	or	%o4, %o3, %o3
1108280575bPatrick McGehearty	ldub	[%o0+7], %o4
1109280575bPatrick McGehearty	or	%o4, %o3, %o4
1110280575bPatrick McGehearty	stx	%o4, [%o1]
1111280575bPatrick McGehearty	subcc	%o2, 8, %o2		! decrement length count
1112280575bPatrick McGehearty	add	%o1, 8, %o1		! increase dst ptr by 8
1113280575bPatrick McGehearty	add	%o0, 8, %o0		! increase src ptr by 8
1114280575bPatrick McGehearty	bgu,pt	%ncc, .bc_medbh15
1115280575bPatrick McGehearty	stx	%o4, [%o1-8]
1116280575bPatrick McGehearty	ba	.bc_medb7
1117280575bPatrick McGehearty	nop
1118280575bPatrick McGehearty
1119280575bPatrick McGehearty	SET_SIZE(bcopy)
1120280575bPatrick McGehearty/*
1121280575bPatrick McGehearty * The _more entry points are not intended to be used directly by
1122280575bPatrick McGehearty * any caller from outside this file.  They are provided to allow
1123280575bPatrick McGehearty * profiling and dtrace of the portions of the copy code that uses
1124280575bPatrick McGehearty * the floating point registers.
1125280575bPatrick McGehearty*/
1126280575bPatrick McGehearty	ENTRY(bcopy_more)
1127280575bPatrick McGehearty.bcopy_more:
1128340af27wh	save	%sp, -SA(MINFRAME + HWCOPYFRAMESIZE), %sp
1129340af27wh	ldn	[THREAD_REG + T_LOFAULT], %o5	! save existing handler
1130340af27wh	brz,pt	%o5, .do_copy
1131280575bPatrick McGehearty	nop