xref: /illumos-gate/usr/src/uts/intel/ml/copy.S (revision 5d9d9091)
17c478bd9Sstevel@tonic-gate/*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
59acbbeafSnn * Common Development and Distribution License (the "License").
69acbbeafSnn * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate/*
22b08adf18SBill Holler * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
2622cc0e45SBill Holler/*
27b08adf18SBill Holler * Copyright (c) 2009, Intel Corporation
2822cc0e45SBill Holler * All rights reserved.
2922cc0e45SBill Holler */
3022cc0e45SBill Holler
317c478bd9Sstevel@tonic-gate/*       Copyright (c) 1990, 1991 UNIX System Laboratories, Inc.	*/
327c478bd9Sstevel@tonic-gate/*       Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T		*/
337c478bd9Sstevel@tonic-gate/*         All Rights Reserved						*/
347c478bd9Sstevel@tonic-gate
357c478bd9Sstevel@tonic-gate/*       Copyright (c) 1987, 1988 Microsoft Corporation			*/
367c478bd9Sstevel@tonic-gate/*         All Rights Reserved						*/
377c478bd9Sstevel@tonic-gate
383ce2fcdcSRobert Mustacchi/*
39425251fdSSam Gwydir * Copyright 2020 Joyent, Inc.
403ce2fcdcSRobert Mustacchi */
413ce2fcdcSRobert Mustacchi
427c478bd9Sstevel@tonic-gate#include <sys/errno.h>
437c478bd9Sstevel@tonic-gate#include <sys/asm_linkage.h>
447c478bd9Sstevel@tonic-gate
457c478bd9Sstevel@tonic-gate#include "assym.h"
467c478bd9Sstevel@tonic-gate
477c478bd9Sstevel@tonic-gate#define	KCOPY_MIN_SIZE	128	/* Must be >= 16 bytes */
487c478bd9Sstevel@tonic-gate#define	XCOPY_MIN_SIZE	128	/* Must be >= 16 bytes */
497c478bd9Sstevel@tonic-gate/*
507c478bd9Sstevel@tonic-gate * Non-temopral access (NTA) alignment requirement
517c478bd9Sstevel@tonic-gate */
527c478bd9Sstevel@tonic-gate#define	NTA_ALIGN_SIZE	4	/* Must be at least 4-byte aligned */
537c478bd9Sstevel@tonic-gate#define	NTA_ALIGN_MASK	_CONST(NTA_ALIGN_SIZE-1)
547c478bd9Sstevel@tonic-gate#define	COUNT_ALIGN_SIZE	16	/* Must be at least 16-byte aligned */
557c478bd9Sstevel@tonic-gate#define	COUNT_ALIGN_MASK	_CONST(COUNT_ALIGN_SIZE-1)
567c478bd9Sstevel@tonic-gate
573ce2fcdcSRobert Mustacchi/*
583ce2fcdcSRobert Mustacchi * With the introduction of Broadwell, Intel has introduced supervisor mode
593ce2fcdcSRobert Mustacchi * access protection -- SMAP. SMAP forces the kernel to set certain bits to
603ce2fcdcSRobert Mustacchi * enable access of user pages (AC in rflags, defines as PS_ACHK in
613ce2fcdcSRobert Mustacchi * <sys/psw.h>). One of the challenges is that the implementation of many of the
623ce2fcdcSRobert Mustacchi * userland copy routines directly use the kernel ones. For example, copyin and
633ce2fcdcSRobert Mustacchi * copyout simply go and jump to the do_copy_fault label and traditionally let
643ce2fcdcSRobert Mustacchi * those deal with the return for them. In fact, changing that is a can of frame
653ce2fcdcSRobert Mustacchi * pointers.
663ce2fcdcSRobert Mustacchi *
673ce2fcdcSRobert Mustacchi * Rules and Constraints:
683ce2fcdcSRobert Mustacchi *
69425251fdSSam Gwydir * 1. For anything that's not in copy.s, we have it do explicit smap_disable()
70425251fdSSam Gwydir * or smap_enable() calls.  This is restricted to the following three places:
71425251fdSSam Gwydir * DTrace, resume() in swtch.s and on_fault/no_fault. If you want to add it
72425251fdSSam Gwydir * somewhere else, we should be thinking twice.
733ce2fcdcSRobert Mustacchi *
743ce2fcdcSRobert Mustacchi * 2. We try to toggle this at the smallest window possible. This means that if
753ce2fcdcSRobert Mustacchi * we take a fault, need to try to use a copyop in copyin() or copyout(), or any
763ce2fcdcSRobert Mustacchi * other function, we will always leave with SMAP enabled (the kernel cannot
773ce2fcdcSRobert Mustacchi * access user pages).
783ce2fcdcSRobert Mustacchi *
793ce2fcdcSRobert Mustacchi * 3. None of the *_noerr() or ucopy/uzero routines should toggle SMAP. They are
803ce2fcdcSRobert Mustacchi * explicitly only allowed to be called while in an on_fault()/no_fault() handler,
813ce2fcdcSRobert Mustacchi * which already takes care of ensuring that SMAP is enabled and disabled. Note
823ce2fcdcSRobert Mustacchi * this means that when under an on_fault()/no_fault() handler, one must not
83425251fdSSam Gwydir * call the non-*_noerr() routines.
843ce2fcdcSRobert Mustacchi *
853ce2fcdcSRobert Mustacchi * 4. The first thing we should do after coming out of an lofault handler is to
86425251fdSSam Gwydir * make sure that we call smap_enable() again to ensure that we are safely
873ce2fcdcSRobert Mustacchi * protected, as more often than not, we will have disabled smap to get there.
883ce2fcdcSRobert Mustacchi *
89425251fdSSam Gwydir * 5. smap_enable() and smap_disable() don't exist: calls to these functions
90425251fdSSam Gwydir * generate runtime relocations, that are then processed into the necessary
91425251fdSSam Gwydir * clac/stac, via the krtld hotinlines mechanism and hotinline_smap().
923ce2fcdcSRobert Mustacchi *
933ce2fcdcSRobert Mustacchi * 6. For any inline user of SMAP, the appropriate SMAP_ENABLE_INSTR and
94425251fdSSam Gwydir * SMAP_DISABLE_INSTR macro should be used. If the number of these is changed,
95425251fdSSam Gwydir * you must update the constants SMAP_ENABLE_COUNT and SMAP_DISABLE_COUNT below.
963ce2fcdcSRobert Mustacchi *
97425251fdSSam Gwydir * 7. Generally this .s file is processed by a K&R style cpp. This means that it
983ce2fcdcSRobert Mustacchi * really has a lot of feelings about whitespace. In particular, if you have a
993ce2fcdcSRobert Mustacchi * macro FOO with the arguments FOO(1, 3), the second argument is in fact ' 3'.
1003ce2fcdcSRobert Mustacchi *
101425251fdSSam Gwydir * 8. In general, the kernel has its own value for rflags that gets used. This
1023ce2fcdcSRobert Mustacchi * is maintained in a few different places which vary based on how the thread
1033ce2fcdcSRobert Mustacchi * comes into existence and whether it's a user thread. In general, when the
1043ce2fcdcSRobert Mustacchi * kernel takes a trap, it always will set ourselves to a known set of flags,
1053ce2fcdcSRobert Mustacchi * mainly as part of ENABLE_INTR_FLAGS and F_OFF and F_ON. These ensure that
1063ce2fcdcSRobert Mustacchi * PS_ACHK is cleared for us. In addition, when using the sysenter instruction,
1073ce2fcdcSRobert Mustacchi * we mask off PS_ACHK off via the AMD_SFMASK MSR. See init_cpu_syscall() for
1083ce2fcdcSRobert Mustacchi * where that gets masked off.
1093ce2fcdcSRobert Mustacchi */
1103ce2fcdcSRobert Mustacchi
11122cc0e45SBill Holler/*
11222cc0e45SBill Holler * The optimal 64-bit bcopy and kcopy for modern x86 processors uses
11322cc0e45SBill Holler * "rep smovq" for large sizes. Performance data shows that many calls to
11422cc0e45SBill Holler * bcopy/kcopy/bzero/kzero operate on small buffers. For best performance for
11522cc0e45SBill Holler * these small sizes unrolled code is used. For medium sizes loops writing
11622cc0e45SBill Holler * 64-bytes per loop are used. Transition points were determined experimentally.
11765f20420SRobert Mustacchi */
11822cc0e45SBill Holler#define BZERO_USE_REP	(1024)
11922cc0e45SBill Holler#define BCOPY_DFLT_REP	(128)
12022cc0e45SBill Holler#define	BCOPY_NHM_REP	(768)
12122cc0e45SBill Holler
1227c478bd9Sstevel@tonic-gate/*
1237c478bd9Sstevel@tonic-gate * Copy a block of storage, returning an error code if `from' or
1247c478bd9Sstevel@tonic-gate * `to' takes a kernel pagefault which cannot be resolved.
1257c478bd9Sstevel@tonic-gate * Returns errno value on pagefault error, 0 if all ok
1267c478bd9Sstevel@tonic-gate */
1277c478bd9Sstevel@tonic-gate
1283ce2fcdcSRobert Mustacchi/*
1293ce2fcdcSRobert Mustacchi * I'm sorry about these macros, but copy.s is unsurprisingly sensitive to
1303ce2fcdcSRobert Mustacchi * additional call instructions.
1313ce2fcdcSRobert Mustacchi */
1323ce2fcdcSRobert Mustacchi#define	SMAP_DISABLE_COUNT	16
1333ce2fcdcSRobert Mustacchi#define	SMAP_ENABLE_COUNT	26
1343ce2fcdcSRobert Mustacchi
1353ce2fcdcSRobert Mustacchi#define	SMAP_DISABLE_INSTR(ITER)		\
136*5d9d9091SRichard Lowe	.globl	_smap_disable_patch_##ITER;	\
137*5d9d9091SRichard Lowe	_smap_disable_patch_##ITER##:;	\
1383ce2fcdcSRobert Mustacchi	nop; nop; nop;
1393ce2fcdcSRobert Mustacchi
1403ce2fcdcSRobert Mustacchi#define	SMAP_ENABLE_INSTR(ITER)			\
141*5d9d9091SRichard Lowe	.globl	_smap_enable_patch_##ITER;	\
142*5d9d9091SRichard Lowe	_smap_enable_patch_##ITER##:;	\
1433ce2fcdcSRobert Mustacchi	nop; nop; nop;
1443ce2fcdcSRobert Mustacchi
1457c478bd9Sstevel@tonic-gate	.globl	kernelbase
146ae115bc7Smrj	.globl	postbootkernelbase
1477c478bd9Sstevel@tonic-gate
1487c478bd9Sstevel@tonic-gate	ENTRY(kcopy)
1497c478bd9Sstevel@tonic-gate	pushq	%rbp
1507c478bd9Sstevel@tonic-gate	movq	%rsp, %rbp
1517c478bd9Sstevel@tonic-gate#ifdef DEBUG
15265f20420SRobert Mustacchi	cmpq	postbootkernelbase(%rip), %rdi		/* %rdi = from */
1537c478bd9Sstevel@tonic-gate	jb	0f
154ae115bc7Smrj	cmpq	postbootkernelbase(%rip), %rsi		/* %rsi = to */
1557c478bd9Sstevel@tonic-gate	jnb	1f
1567c478bd9Sstevel@tonic-gate0:	leaq	.kcopy_panic_msg(%rip), %rdi
1577c478bd9Sstevel@tonic-gate	xorl	%eax, %eax
1587c478bd9Sstevel@tonic-gate	call	panic
1597c478bd9Sstevel@tonic-gate1:
1607c478bd9Sstevel@tonic-gate#endif
1617c478bd9Sstevel@tonic-gate	/*
1627c478bd9Sstevel@tonic-gate	 * pass lofault value as 4th argument to do_copy_fault
1637c478bd9Sstevel@tonic-gate	 */
1647c478bd9Sstevel@tonic-gate	leaq	_kcopy_copyerr(%rip), %rcx
1657c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9	/* %r9 = thread addr */
1667c478bd9Sstevel@tonic-gate
1677c478bd9Sstevel@tonic-gatedo_copy_fault:
1687c478bd9Sstevel@tonic-gate	movq	T_LOFAULT(%r9), %r11	/* save the current lofault */
1697c478bd9Sstevel@tonic-gate	movq	%rcx, T_LOFAULT(%r9)	/* new lofault */
17022cc0e45SBill Holler	call	bcopy_altentry
1717c478bd9Sstevel@tonic-gate	xorl	%eax, %eax		/* return 0 (success) */
1723ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(0)
1737c478bd9Sstevel@tonic-gate
1747c478bd9Sstevel@tonic-gate	/*
1757c478bd9Sstevel@tonic-gate	 * A fault during do_copy_fault is indicated through an errno value
1767c478bd9Sstevel@tonic-gate	 * in %rax and we iretq from the trap handler to here.
1777c478bd9Sstevel@tonic-gate	 */
1787c478bd9Sstevel@tonic-gate_kcopy_copyerr:
1797c478bd9Sstevel@tonic-gate	movq	%r11, T_LOFAULT(%r9)	/* restore original lofault */
1807c478bd9Sstevel@tonic-gate	leave
1817c478bd9Sstevel@tonic-gate	ret
1827c478bd9Sstevel@tonic-gate	SET_SIZE(kcopy)
1837c478bd9Sstevel@tonic-gate
1847c478bd9Sstevel@tonic-gate#undef	ARG_FROM
1857c478bd9Sstevel@tonic-gate#undef	ARG_TO
1867c478bd9Sstevel@tonic-gate#undef	ARG_COUNT
1877c478bd9Sstevel@tonic-gate
1887c478bd9Sstevel@tonic-gate#define	COPY_LOOP_INIT(src, dst, cnt)	\
1897c478bd9Sstevel@tonic-gate	addq	cnt, src;			\
1907c478bd9Sstevel@tonic-gate	addq	cnt, dst;			\
1917c478bd9Sstevel@tonic-gate	shrq	$3, cnt;			\
1927c478bd9Sstevel@tonic-gate	neg	cnt
1937c478bd9Sstevel@tonic-gate
1947c478bd9Sstevel@tonic-gate	/* Copy 16 bytes per loop.  Uses %rax and %r8 */
1957c478bd9Sstevel@tonic-gate#define	COPY_LOOP_BODY(src, dst, cnt)	\
1967c478bd9Sstevel@tonic-gate	prefetchnta	0x100(src, cnt, 8);	\
1977c478bd9Sstevel@tonic-gate	movq	(src, cnt, 8), %rax;		\
1987c478bd9Sstevel@tonic-gate	movq	0x8(src, cnt, 8), %r8;		\
1997c478bd9Sstevel@tonic-gate	movnti	%rax, (dst, cnt, 8);		\
2007c478bd9Sstevel@tonic-gate	movnti	%r8, 0x8(dst, cnt, 8);		\
2017c478bd9Sstevel@tonic-gate	addq	$2, cnt
2027c478bd9Sstevel@tonic-gate
2037c478bd9Sstevel@tonic-gate	ENTRY(kcopy_nta)
2047c478bd9Sstevel@tonic-gate	pushq	%rbp
2057c478bd9Sstevel@tonic-gate	movq	%rsp, %rbp
2067c478bd9Sstevel@tonic-gate#ifdef DEBUG
20765f20420SRobert Mustacchi	cmpq	postbootkernelbase(%rip), %rdi		/* %rdi = from */
2087c478bd9Sstevel@tonic-gate	jb	0f
209ae115bc7Smrj	cmpq	postbootkernelbase(%rip), %rsi		/* %rsi = to */
2107c478bd9Sstevel@tonic-gate	jnb	1f
2117c478bd9Sstevel@tonic-gate0:	leaq	.kcopy_panic_msg(%rip), %rdi
2127c478bd9Sstevel@tonic-gate	xorl	%eax, %eax
2137c478bd9Sstevel@tonic-gate	call	panic
2147c478bd9Sstevel@tonic-gate1:
2157c478bd9Sstevel@tonic-gate#endif
2167c478bd9Sstevel@tonic-gate
2177c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9
2187c478bd9Sstevel@tonic-gate	cmpq	$0, %rcx		/* No non-temporal access? */
2197c478bd9Sstevel@tonic-gate	/*
2207c478bd9Sstevel@tonic-gate	 * pass lofault value as 4th argument to do_copy_fault
2217c478bd9Sstevel@tonic-gate	 */
2227c478bd9Sstevel@tonic-gate	leaq	_kcopy_nta_copyerr(%rip), %rcx	/* doesn't set rflags */
2237c478bd9Sstevel@tonic-gate	jnz	do_copy_fault		/* use regular access */
2247c478bd9Sstevel@tonic-gate	/*
2257c478bd9Sstevel@tonic-gate	 * Make sure cnt is >= KCOPY_MIN_SIZE
2267c478bd9Sstevel@tonic-gate	 */
2277c478bd9Sstevel@tonic-gate	cmpq	$KCOPY_MIN_SIZE, %rdx
2287c478bd9Sstevel@tonic-gate	jb	do_copy_fault
2297c478bd9Sstevel@tonic-gate
2307c478bd9Sstevel@tonic-gate	/*
2317c478bd9Sstevel@tonic-gate	 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
2327c478bd9Sstevel@tonic-gate	 * count is COUNT_ALIGN_SIZE aligned.
2337c478bd9Sstevel@tonic-gate	 */
2347c478bd9Sstevel@tonic-gate	movq	%rdi, %r10
2357c478bd9Sstevel@tonic-gate	orq	%rsi, %r10
2367c478bd9Sstevel@tonic-gate	andq	$NTA_ALIGN_MASK, %r10
2377c478bd9Sstevel@tonic-gate	orq	%rdx, %r10
2387c478bd9Sstevel@tonic-gate	andq	$COUNT_ALIGN_MASK, %r10
2397c478bd9Sstevel@tonic-gate	jnz	do_copy_fault
2407c478bd9Sstevel@tonic-gate
2417c478bd9Sstevel@tonic-gate	ALTENTRY(do_copy_fault_nta)
2427c478bd9Sstevel@tonic-gate	movq    %gs:CPU_THREAD, %r9     /* %r9 = thread addr */
2437c478bd9Sstevel@tonic-gate	movq    T_LOFAULT(%r9), %r11    /* save the current lofault */
2447c478bd9Sstevel@tonic-gate	movq    %rcx, T_LOFAULT(%r9)    /* new lofault */
2457c478bd9Sstevel@tonic-gate
2467c478bd9Sstevel@tonic-gate	/*
2477c478bd9Sstevel@tonic-gate	 * COPY_LOOP_BODY uses %rax and %r8
2487c478bd9Sstevel@tonic-gate	 */
2497c478bd9Sstevel@tonic-gate	COPY_LOOP_INIT(%rdi, %rsi, %rdx)
2507c478bd9Sstevel@tonic-gate2:	COPY_LOOP_BODY(%rdi, %rsi, %rdx)
2517c478bd9Sstevel@tonic-gate	jnz	2b
2527c478bd9Sstevel@tonic-gate
2537c478bd9Sstevel@tonic-gate	mfence
2547c478bd9Sstevel@tonic-gate	xorl	%eax, %eax		/* return 0 (success) */
2553ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(1)
2567c478bd9Sstevel@tonic-gate
2577c478bd9Sstevel@tonic-gate_kcopy_nta_copyerr:
2587c478bd9Sstevel@tonic-gate	movq	%r11, T_LOFAULT(%r9)    /* restore original lofault */
2597c478bd9Sstevel@tonic-gate	leave
2607c478bd9Sstevel@tonic-gate	ret
2617c478bd9Sstevel@tonic-gate	SET_SIZE(do_copy_fault_nta)
2627c478bd9Sstevel@tonic-gate	SET_SIZE(kcopy_nta)
2637c478bd9Sstevel@tonic-gate
2647c478bd9Sstevel@tonic-gate	ENTRY(bcopy)
2657c478bd9Sstevel@tonic-gate#ifdef DEBUG
2667c478bd9Sstevel@tonic-gate	orq	%rdx, %rdx		/* %rdx = count */
2677c478bd9Sstevel@tonic-gate	jz	1f
268ae115bc7Smrj	cmpq	postbootkernelbase(%rip), %rdi		/* %rdi = from */
2697c478bd9Sstevel@tonic-gate	jb	0f
27065f20420SRobert Mustacchi	cmpq	postbootkernelbase(%rip), %rsi		/* %rsi = to */
2717c478bd9Sstevel@tonic-gate	jnb	1f
2727c478bd9Sstevel@tonic-gate0:	leaq	.bcopy_panic_msg(%rip), %rdi
2737c478bd9Sstevel@tonic-gate	jmp	call_panic		/* setup stack and call panic */
2747c478bd9Sstevel@tonic-gate1:
2757c478bd9Sstevel@tonic-gate#endif
27622cc0e45SBill Holler	/*
27722cc0e45SBill Holler	 * bcopy_altentry() is called from kcopy, i.e., do_copy_fault.
27822cc0e45SBill Holler	 * kcopy assumes that bcopy doesn't touch %r9 and %r11. If bcopy
27922cc0e45SBill Holler	 * uses these registers in future they must be saved and restored.
28022cc0e45SBill Holler	 */
28122cc0e45SBill Holler	ALTENTRY(bcopy_altentry)
2827c478bd9Sstevel@tonic-gatedo_copy:
283*5d9d9091SRichard Lowe#define	L(s) .bcopy##s
28422cc0e45SBill Holler	cmpq	$0x50, %rdx		/* 80 */
2855aaab1a4SRobert Mustacchi	jae	bcopy_ck_size
28622cc0e45SBill Holler
28722cc0e45SBill Holler	/*
28822cc0e45SBill Holler	 * Performance data shows many caller's copy small buffers. So for
28922cc0e45SBill Holler	 * best perf for these sizes unrolled code is used. Store data without
29022cc0e45SBill Holler	 * worrying about alignment.
29122cc0e45SBill Holler	 */
29222cc0e45SBill Holler	leaq	L(fwdPxQx)(%rip), %r10
29322cc0e45SBill Holler	addq	%rdx, %rdi
29422cc0e45SBill Holler	addq	%rdx, %rsi
29522cc0e45SBill Holler	movslq	(%r10,%rdx,4), %rcx
29622cc0e45SBill Holler	leaq	(%rcx,%r10,1), %r10
29765f20420SRobert Mustacchi	INDIRECT_JMP_REG(r10)
29822cc0e45SBill Holler
29922cc0e45SBill Holler	.p2align 4
30022cc0e45SBill HollerL(fwdPxQx):
30122cc0e45SBill Holler	.int       L(P0Q0)-L(fwdPxQx)	/* 0 */
30222cc0e45SBill Holler	.int       L(P1Q0)-L(fwdPxQx)
30322cc0e45SBill Holler	.int       L(P2Q0)-L(fwdPxQx)
30422cc0e45SBill Holler	.int       L(P3Q0)-L(fwdPxQx)
30522cc0e45SBill Holler	.int       L(P4Q0)-L(fwdPxQx)
30622cc0e45SBill Holler	.int       L(P5Q0)-L(fwdPxQx)
30722cc0e45SBill Holler	.int       L(P6Q0)-L(fwdPxQx)
30865f20420SRobert Mustacchi	.int       L(P7Q0)-L(fwdPxQx)
30922cc0e45SBill Holler
31022cc0e45SBill Holler	.int       L(P0Q1)-L(fwdPxQx)	/* 8 */
31122cc0e45SBill Holler	.int       L(P1Q1)-L(fwdPxQx)
31222cc0e45SBill Holler	.int       L(P2Q1)-L(fwdPxQx)
31322cc0e45SBill Holler	.int       L(P3Q1)-L(fwdPxQx)
31422cc0e45SBill Holler	.int       L(P4Q1)-L(fwdPxQx)
31522cc0e45SBill Holler	.int       L(P5Q1)-L(fwdPxQx)
31622cc0e45SBill Holler	.int       L(P6Q1)-L(fwdPxQx)
31765f20420SRobert Mustacchi	.int       L(P7Q1)-L(fwdPxQx)
31822cc0e45SBill Holler
31922cc0e45SBill Holler	.int       L(P0Q2)-L(fwdPxQx)	/* 16 */
32022cc0e45SBill Holler	.int       L(P1Q2)-L(fwdPxQx)
32122cc0e45SBill Holler	.int       L(P2Q2)-L(fwdPxQx)
32222cc0e45SBill Holler	.int       L(P3Q2)-L(fwdPxQx)
32322cc0e45SBill Holler	.int       L(P4Q2)-L(fwdPxQx)
32422cc0e45SBill Holler	.int       L(P5Q2)-L(fwdPxQx)
32522cc0e45SBill Holler	.int       L(P6Q2)-L(fwdPxQx)
32665f20420SRobert Mustacchi	.int       L(P7Q2)-L(fwdPxQx)
32722cc0e45SBill Holler
32822cc0e45SBill Holler	.int       L(P0Q3)-L(fwdPxQx)	/* 24 */
32922cc0e45SBill Holler	.int       L(P1Q3)-L(fwdPxQx)
33022cc0e45SBill Holler	.int       L(P2Q3)-L(fwdPxQx)
33122cc0e45SBill Holler	.int       L(P3Q3)-L(fwdPxQx)
33222cc0e45SBill Holler	.int       L(P4Q3)-L(fwdPxQx)
33322cc0e45SBill Holler	.int       L(P5Q3)-L(fwdPxQx)
33422cc0e45SBill Holler	.int       L(P6Q3)-L(fwdPxQx)
33565f20420SRobert Mustacchi	.int       L(P7Q3)-L(fwdPxQx)
33622cc0e45SBill Holler
33722cc0e45SBill Holler	.int       L(P0Q4)-L(fwdPxQx)	/* 32 */
33822cc0e45SBill Holler	.int       L(P1Q4)-L(fwdPxQx)
33922cc0e45SBill Holler	.int       L(P2Q4)-L(fwdPxQx)
34022cc0e45SBill Holler	.int       L(P3Q4)-L(fwdPxQx)
34122cc0e45SBill Holler	.int       L(P4Q4)-L(fwdPxQx)
34222cc0e45SBill Holler	.int       L(P5Q4)-L(fwdPxQx)
34322cc0e45SBill Holler	.int       L(P6Q4)-L(fwdPxQx)
34465f20420SRobert Mustacchi	.int       L(P7Q4)-L(fwdPxQx)
34522cc0e45SBill Holler
34622cc0e45SBill Holler	.int       L(P0Q5)-L(fwdPxQx)	/* 40 */
34722cc0e45SBill Holler	.int       L(P1Q5)-L(fwdPxQx)
34822cc0e45SBill Holler	.int       L(P2Q5)-L(fwdPxQx)
34922cc0e45SBill Holler	.int       L(P3Q5)-L(fwdPxQx)
35022cc0e45SBill Holler	.int       L(P4Q5)-L(fwdPxQx)
35122cc0e45SBill Holler	.int       L(P5Q5)-L(fwdPxQx)
35222cc0e45SBill Holler	.int       L(P6Q5)-L(fwdPxQx)
35365f20420SRobert Mustacchi	.int       L(P7Q5)-L(fwdPxQx)
35422cc0e45SBill Holler
35522cc0e45SBill Holler	.int       L(P0Q6)-L(fwdPxQx)	/* 48 */
35622cc0e45SBill Holler	.int       L(P1Q6)-L(fwdPxQx)
35722cc0e45SBill Holler	.int       L(P2Q6)-L(fwdPxQx)
35822cc0e45SBill Holler	.int       L(P3Q6)-L(fwdPxQx)
35922cc0e45SBill Holler	.int       L(P4Q6)-L(fwdPxQx)
36022cc0e45SBill Holler	.int       L(P5Q6)-L(fwdPxQx)
36122cc0e45SBill Holler	.int       L(P6Q6)-L(fwdPxQx)
36265f20420SRobert Mustacchi	.int       L(P7Q6)-L(fwdPxQx)
36322cc0e45SBill Holler
36422cc0e45SBill Holler	.int       L(P0Q7)-L(fwdPxQx)	/* 56 */
36522cc0e45SBill Holler	.int       L(P1Q7)-L(fwdPxQx)
36622cc0e45SBill Holler	.int       L(P2Q7)-L(fwdPxQx)
36722cc0e45SBill Holler	.int       L(P3Q7)-L(fwdPxQx)
36822cc0e45SBill Holler	.int       L(P4Q7)-L(fwdPxQx)
36922cc0e45SBill Holler	.int       L(P5Q7)-L(fwdPxQx)
37022cc0e45SBill Holler	.int       L(P6Q7)-L(fwdPxQx)
37165f20420SRobert Mustacchi	.int       L(P7Q7)-L(fwdPxQx)
37222cc0e45SBill Holler
37322cc0e45SBill Holler	.int       L(P0Q8)-L(fwdPxQx)	/* 64 */
37422cc0e45SBill Holler	.int       L(P1Q8)-L(fwdPxQx)
37522cc0e45SBill Holler	.int       L(P2Q8)-L(fwdPxQx)
37622cc0e45SBill Holler	.int       L(P3Q8)-L(fwdPxQx)
37722cc0e45SBill Holler	.int       L(P4Q8)-L(fwdPxQx)
37822cc0e45SBill Holler	.int       L(P5Q8)-L(fwdPxQx)
37922cc0e45SBill Holler	.int       L(P6Q8)-L(fwdPxQx)
38022cc0e45SBill Holler	.int       L(P7Q8)-L(fwdPxQx)
38122cc0e45SBill Holler
38222cc0e45SBill Holler	.int       L(P0Q9)-L(fwdPxQx)	/* 72 */
38322cc0e45SBill Holler	.int       L(P1Q9)-L(fwdPxQx)
38422cc0e45SBill Holler	.int       L(P2Q9)-L(fwdPxQx)
38522cc0e45SBill Holler	.int       L(P3Q9)-L(fwdPxQx)
38622cc0e45SBill Holler	.int       L(P4Q9)-L(fwdPxQx)
38722cc0e45SBill Holler	.int       L(P5Q9)-L(fwdPxQx)
38822cc0e45SBill Holler	.int       L(P6Q9)-L(fwdPxQx)
38922cc0e45SBill Holler	.int       L(P7Q9)-L(fwdPxQx)	/* 79 */
39022cc0e45SBill Holler
39122cc0e45SBill Holler	.p2align 4
39222cc0e45SBill HollerL(P0Q9):
39322cc0e45SBill Holler	mov    -0x48(%rdi), %rcx
39422cc0e45SBill Holler	mov    %rcx, -0x48(%rsi)
39522cc0e45SBill HollerL(P0Q8):
39622cc0e45SBill Holler	mov    -0x40(%rdi), %r10
39722cc0e45SBill Holler	mov    %r10, -0x40(%rsi)
39822cc0e45SBill HollerL(P0Q7):
39922cc0e45SBill Holler	mov    -0x38(%rdi), %r8
40022cc0e45SBill Holler	mov    %r8, -0x38(%rsi)
40122cc0e45SBill HollerL(P0Q6):
40222cc0e45SBill Holler	mov    -0x30(%rdi), %rcx
40322cc0e45SBill Holler	mov    %rcx, -0x30(%rsi)
40422cc0e45SBill HollerL(P0Q5):
40522cc0e45SBill Holler	mov    -0x28(%rdi), %r10
40622cc0e45SBill Holler	mov    %r10, -0x28(%rsi)
40722cc0e45SBill HollerL(P0Q4):
40822cc0e45SBill Holler	mov    -0x20(%rdi), %r8
40922cc0e45SBill Holler	mov    %r8, -0x20(%rsi)
41022cc0e45SBill HollerL(P0Q3):
41122cc0e45SBill Holler	mov    -0x18(%rdi), %rcx
41222cc0e45SBill Holler	mov    %rcx, -0x18(%rsi)
41322cc0e45SBill HollerL(P0Q2):
41422cc0e45SBill Holler	mov    -0x10(%rdi), %r10
41522cc0e45SBill Holler	mov    %r10, -0x10(%rsi)
41622cc0e45SBill HollerL(P0Q1):
41722cc0e45SBill Holler	mov    -0x8(%rdi), %r8
41822cc0e45SBill Holler	mov    %r8, -0x8(%rsi)
41965f20420SRobert MustacchiL(P0Q0):
42065f20420SRobert Mustacchi	ret
42122cc0e45SBill Holler
42222cc0e45SBill Holler	.p2align 4
42322cc0e45SBill HollerL(P1Q9):
42422cc0e45SBill Holler	mov    -0x49(%rdi), %r8
42522cc0e45SBill Holler	mov    %r8, -0x49(%rsi)
42622cc0e45SBill HollerL(P1Q8):
42722cc0e45SBill Holler	mov    -0x41(%rdi), %rcx
42822cc0e45SBill Holler	mov    %rcx, -0x41(%rsi)
42922cc0e45SBill HollerL(P1Q7):
43022cc0e45SBill Holler	mov    -0x39(%rdi), %r10
43122cc0e45SBill Holler	mov    %r10, -0x39(%rsi)
43222cc0e45SBill HollerL(P1Q6):
43322cc0e45SBill Holler	mov    -0x31(%rdi), %r8
43422cc0e45SBill Holler	mov    %r8, -0x31(%rsi)
43522cc0e45SBill HollerL(P1Q5):
43622cc0e45SBill Holler	mov    -0x29(%rdi), %rcx
43722cc0e45SBill Holler	mov    %rcx, -0x29(%rsi)
43822cc0e45SBill HollerL(P1Q4):
43922cc0e45SBill Holler	mov    -0x21(%rdi), %r10
44022cc0e45SBill Holler	mov    %r10, -0x21(%rsi)
44122cc0e45SBill HollerL(P1Q3):
44222cc0e45SBill Holler	mov    -0x19(%rdi), %r8
44322cc0e45SBill Holler	mov    %r8, -0x19(%rsi)
44422cc0e45SBill HollerL(P1Q2):
44522cc0e45SBill Holler	mov    -0x11(%rdi), %rcx
44622cc0e45SBill Holler	mov    %rcx, -0x11(%rsi)
44722cc0e45SBill HollerL(P1Q1):
44822cc0e45SBill Holler	mov    -0x9(%rdi), %r10
44922cc0e45SBill Holler	mov    %r10, -0x9(%rsi)
45022cc0e45SBill HollerL(P1Q0):
45122cc0e45SBill Holler	movzbq -0x1(%rdi), %r8
45222cc0e45SBill Holler	mov    %r8b, -0x1(%rsi)
45365f20420SRobert Mustacchi	ret
45422cc0e45SBill Holler
45522cc0e45SBill Holler	.p2align 4
45622cc0e45SBill HollerL(P2Q9):
45722cc0e45SBill Holler	mov    -0x4a(%rdi), %r8
45822cc0e45SBill Holler	mov    %r8, -0x4a(%rsi)
45922cc0e45SBill HollerL(P2Q8):
46022cc0e45SBill Holler	mov    -0x42(%rdi), %rcx
46122cc0e45SBill Holler	mov    %rcx, -0x42(%rsi)
46222cc0e45SBill HollerL(P2Q7):
46322cc0e45SBill Holler	mov    -0x3a(%rdi), %r10
46422cc0e45SBill Holler	mov    %r10, -0x3a(%rsi)
46522cc0e45SBill HollerL(P2Q6):
46622cc0e45SBill Holler	mov    -0x32(%rdi), %r8
46722cc0e45SBill Holler	mov    %r8, -0x32(%rsi)
46822cc0e45SBill HollerL(P2Q5):
46922cc0e45SBill Holler	mov    -0x2a(%rdi), %rcx
47022cc0e45SBill Holler	mov    %rcx, -0x2a(%rsi)
47122cc0e45SBill HollerL(P2Q4):
47222cc0e45SBill Holler	mov    -0x22(%rdi), %r10
47322cc0e45SBill Holler	mov    %r10, -0x22(%rsi)
47422cc0e45SBill HollerL(P2Q3):
47522cc0e45SBill Holler	mov    -0x1a(%rdi), %r8
47622cc0e45SBill Holler	mov    %r8, -0x1a(%rsi)
47722cc0e45SBill HollerL(P2Q2):
47822cc0e45SBill Holler	mov    -0x12(%rdi), %rcx
47922cc0e45SBill Holler	mov    %rcx, -0x12(%rsi)
48022cc0e45SBill HollerL(P2Q1):
48122cc0e45SBill Holler	mov    -0xa(%rdi), %r10
48222cc0e45SBill Holler	mov    %r10, -0xa(%rsi)
48322cc0e45SBill HollerL(P2Q0):
48422cc0e45SBill Holler	movzwq -0x2(%rdi), %r8
48522cc0e45SBill Holler	mov    %r8w, -0x2(%rsi)
48665f20420SRobert Mustacchi	ret
48722cc0e45SBill Holler
48822cc0e45SBill Holler	.p2align 4
48922cc0e45SBill HollerL(P3Q9):
49022cc0e45SBill Holler	mov    -0x4b(%rdi), %r8
49122cc0e45SBill Holler	mov    %r8, -0x4b(%rsi)
49222cc0e45SBill HollerL(P3Q8):
49322cc0e45SBill Holler	mov    -0x43(%rdi), %rcx
49422cc0e45SBill Holler	mov    %rcx, -0x43(%rsi)
49522cc0e45SBill HollerL(P3Q7):
49622cc0e45SBill Holler	mov    -0x3b(%rdi), %r10
49722cc0e45SBill Holler	mov    %r10, -0x3b(%rsi)
49822cc0e45SBill HollerL(P3Q6):
49922cc0e45SBill Holler	mov    -0x33(%rdi), %r8
50022cc0e45SBill Holler	mov    %r8, -0x33(%rsi)
50122cc0e45SBill HollerL(P3Q5):
50222cc0e45SBill Holler	mov    -0x2b(%rdi), %rcx
50322cc0e45SBill Holler	mov    %rcx, -0x2b(%rsi)
50422cc0e45SBill HollerL(P3Q4):
50522cc0e45SBill Holler	mov    -0x23(%rdi), %r10
50622cc0e45SBill Holler	mov    %r10, -0x23(%rsi)
50722cc0e45SBill HollerL(P3Q3):
50822cc0e45SBill Holler	mov    -0x1b(%rdi), %r8
50922cc0e45SBill Holler	mov    %r8, -0x1b(%rsi)
51022cc0e45SBill HollerL(P3Q2):
51122cc0e45SBill Holler	mov    -0x13(%rdi), %rcx
51222cc0e45SBill Holler	mov    %rcx, -0x13(%rsi)
51322cc0e45SBill HollerL(P3Q1):
51422cc0e45SBill Holler	mov    -0xb(%rdi), %r10
51522cc0e45SBill Holler	mov    %r10, -0xb(%rsi)
51622cc0e45SBill Holler	/*
51765f20420SRobert Mustacchi	 * These trailing loads/stores have to do all their loads 1st,
51822cc0e45SBill Holler	 * then do the stores.
51922cc0e45SBill Holler	 */
52022cc0e45SBill HollerL(P3Q0):
52122cc0e45SBill Holler	movzwq -0x3(%rdi), %r8
52222cc0e45SBill Holler	movzbq -0x1(%rdi), %r10
52322cc0e45SBill Holler	mov    %r8w, -0x3(%rsi)
52422cc0e45SBill Holler	mov    %r10b, -0x1(%rsi)
52565f20420SRobert Mustacchi	ret
52622cc0e45SBill Holler
52722cc0e45SBill Holler	.p2align 4
52822cc0e45SBill HollerL(P4Q9):
52922cc0e45SBill Holler	mov    -0x4c(%rdi), %r8
53022cc0e45SBill Holler	mov    %r8, -0x4c(%rsi)
53122cc0e45SBill HollerL(P4Q8):
53222cc0e45SBill Holler	mov    -0x44(%rdi), %rcx
53322cc0e45SBill Holler	mov    %rcx, -0x44(%rsi)
53422cc0e45SBill HollerL(P4Q7):
53522cc0e45SBill Holler	mov    -0x3c(%rdi), %r10
53622cc0e45SBill Holler	mov    %r10, -0x3c(%rsi)
53722cc0e45SBill HollerL(P4Q6):
53822cc0e45SBill Holler	mov    -0x34(%rdi), %r8
53922cc0e45SBill Holler	mov    %r8, -0x34(%rsi)
54022cc0e45SBill HollerL(P4Q5):
54122cc0e45SBill Holler	mov    -0x2c(%rdi), %rcx
54222cc0e45SBill Holler	mov    %rcx, -0x2c(%rsi)
54322cc0e45SBill HollerL(P4Q4):
54422cc0e45SBill Holler	mov    -0x24(%rdi), %r10
54522cc0e45SBill Holler	mov    %r10, -0x24(%rsi)
54622cc0e45SBill HollerL(P4Q3):
54722cc0e45SBill Holler	mov    -0x1c(%rdi), %r8
54822cc0e45SBill Holler	mov    %r8, -0x1c(%rsi)
54922cc0e45SBill HollerL(P4Q2):
55022cc0e45SBill Holler	mov    -0x14(%rdi), %rcx
55122cc0e45SBill Holler	mov    %rcx, -0x14(%rsi)
55222cc0e45SBill HollerL(P4Q1):
55322cc0e45SBill Holler	mov    -0xc(%rdi), %r10
55422cc0e45SBill Holler	mov    %r10, -0xc(%rsi)
55522cc0e45SBill HollerL(P4Q0):
55622cc0e45SBill Holler	mov    -0x4(%rdi), %r8d
55722cc0e45SBill Holler	mov    %r8d, -0x4(%rsi)
55865f20420SRobert Mustacchi	ret
55922cc0e45SBill Holler
56022cc0e45SBill Holler	.p2align 4
56122cc0e45SBill HollerL(P5Q9):
56222cc0e45SBill Holler	mov    -0x4d(%rdi), %r8
56322cc0e45SBill Holler	mov    %r8, -0x4d(%rsi)
56422cc0e45SBill HollerL(P5Q8):
56522cc0e45SBill Holler	mov    -0x45(%rdi), %rcx
56622cc0e45SBill Holler	mov    %rcx, -0x45(%rsi)
56722cc0e45SBill HollerL(P5Q7):
56822cc0e45SBill Holler	mov    -0x3d(%rdi), %r10
56922cc0e45SBill Holler	mov    %r10, -0x3d(%rsi)
57022cc0e45SBill HollerL(P5Q6):
57122cc0e45SBill Holler	mov    -0x35(%rdi), %r8
57222cc0e45SBill Holler	mov    %r8, -0x35(%rsi)
57322cc0e45SBill HollerL(P5Q5):
57422cc0e45SBill Holler	mov    -0x2d(%rdi), %rcx
57522cc0e45SBill Holler	mov    %rcx, -0x2d(%rsi)
57622cc0e45SBill HollerL(P5Q4):
57722cc0e45SBill Holler	mov    -0x25(%rdi), %r10
57822cc0e45SBill Holler	mov    %r10, -0x25(%rsi)
57922cc0e45SBill HollerL(P5Q3):
58022cc0e45SBill Holler	mov    -0x1d(%rdi), %r8
58122cc0e45SBill Holler	mov    %r8, -0x1d(%rsi)
58222cc0e45SBill HollerL(P5Q2):
58322cc0e45SBill Holler	mov    -0x15(%rdi), %rcx
58422cc0e45SBill Holler	mov    %rcx, -0x15(%rsi)
58522cc0e45SBill HollerL(P5Q1):
58622cc0e45SBill Holler	mov    -0xd(%rdi), %r10
58722cc0e45SBill Holler	mov    %r10, -0xd(%rsi)
58822cc0e45SBill HollerL(P5Q0):
58922cc0e45SBill Holler	mov    -0x5(%rdi), %r8d
59022cc0e45SBill Holler	movzbq -0x1(%rdi), %r10
59122cc0e45SBill Holler	mov    %r8d, -0x5(%rsi)
59222cc0e45SBill Holler	mov    %r10b, -0x1(%rsi)
59365f20420SRobert Mustacchi	ret
59422cc0e45SBill Holler
59522cc0e45SBill Holler	.p2align 4
59622cc0e45SBill HollerL(P6Q9):
59722cc0e45SBill Holler	mov    -0x4e(%rdi), %r8
59822cc0e45SBill Holler	mov    %r8, -0x4e(%rsi)
59922cc0e45SBill HollerL(P6Q8):
60022cc0e45SBill Holler	mov    -0x46(%rdi), %rcx
60122cc0e45SBill Holler	mov    %rcx, -0x46(%rsi)
60222cc0e45SBill HollerL(P6Q7):
60322cc0e45SBill Holler	mov    -0x3e(%rdi), %r10
60422cc0e45SBill Holler	mov    %r10, -0x3e(%rsi)
60522cc0e45SBill HollerL(P6Q6):
60622cc0e45SBill Holler	mov    -0x36(%rdi), %r8
60722cc0e45SBill Holler	mov    %r8, -0x36(%rsi)
60822cc0e45SBill HollerL(P6Q5):
60922cc0e45SBill Holler	mov    -0x2e(%rdi), %rcx
61022cc0e45SBill Holler	mov    %rcx, -0x2e(%rsi)
61122cc0e45SBill HollerL(P6Q4):
61222cc0e45SBill Holler	mov    -0x26(%rdi), %r10
61322cc0e45SBill Holler	mov    %r10, -0x26(%rsi)
61422cc0e45SBill HollerL(P6Q3):
61522cc0e45SBill Holler	mov    -0x1e(%rdi), %r8
61622cc0e45SBill Holler	mov    %r8, -0x1e(%rsi)
61722cc0e45SBill HollerL(P6Q2):
61822cc0e45SBill Holler	mov    -0x16(%rdi), %rcx
61922cc0e45SBill Holler	mov    %rcx, -0x16(%rsi)
62022cc0e45SBill HollerL(P6Q1):
62122cc0e45SBill Holler	mov    -0xe(%rdi), %r10
62222cc0e45SBill Holler	mov    %r10, -0xe(%rsi)
62322cc0e45SBill HollerL(P6Q0):
62422cc0e45SBill Holler	mov    -0x6(%rdi), %r8d
62522cc0e45SBill Holler	movzwq -0x2(%rdi), %r10
62622cc0e45SBill Holler	mov    %r8d, -0x6(%rsi)
62722cc0e45SBill Holler	mov    %r10w, -0x2(%rsi)
62865f20420SRobert Mustacchi	ret
62922cc0e45SBill Holler
63022cc0e45SBill Holler	.p2align 4
63122cc0e45SBill HollerL(P7Q9):
63222cc0e45SBill Holler	mov    -0x4f(%rdi), %r8
63322cc0e45SBill Holler	mov    %r8, -0x4f(%rsi)
63422cc0e45SBill HollerL(P7Q8):
63522cc0e45SBill Holler	mov    -0x47(%rdi), %rcx
63622cc0e45SBill Holler	mov    %rcx, -0x47(%rsi)
63722cc0e45SBill HollerL(P7Q7):
63822cc0e45SBill Holler	mov    -0x3f(%rdi), %r10
63922cc0e45SBill Holler	mov    %r10, -0x3f(%rsi)
64022cc0e45SBill HollerL(P7Q6):
64122cc0e45SBill Holler	mov    -0x37(%rdi), %r8
64222cc0e45SBill Holler	mov    %r8, -0x37(%rsi)
64322cc0e45SBill HollerL(P7Q5):
64422cc0e45SBill Holler	mov    -0x2f(%rdi), %rcx
64522cc0e45SBill Holler	mov    %rcx, -0x2f(%rsi)
64622cc0e45SBill HollerL(P7Q4):
64722cc0e45SBill Holler	mov    -0x27(%rdi), %r10
64822cc0e45SBill Holler	mov    %r10, -0x27(%rsi)
64922cc0e45SBill HollerL(P7Q3):
65022cc0e45SBill Holler	mov    -0x1f(%rdi), %r8
65122cc0e45SBill Holler	mov    %r8, -0x1f(%rsi)
65222cc0e45SBill HollerL(P7Q2):
65322cc0e45SBill Holler	mov    -0x17(%rdi), %rcx
65422cc0e45SBill Holler	mov    %rcx, -0x17(%rsi)
65522cc0e45SBill HollerL(P7Q1):
65622cc0e45SBill Holler	mov    -0xf(%rdi), %r10
65722cc0e45SBill Holler	mov    %r10, -0xf(%rsi)
65822cc0e45SBill HollerL(P7Q0):
65922cc0e45SBill Holler	mov    -0x7(%rdi), %r8d
66022cc0e45SBill Holler	movzwq -0x3(%rdi), %r10
66122cc0e45SBill Holler	movzbq -0x1(%rdi), %rcx
66222cc0e45SBill Holler	mov    %r8d, -0x7(%rsi)
66322cc0e45SBill Holler	mov    %r10w, -0x3(%rsi)
66422cc0e45SBill Holler	mov    %cl, -0x1(%rsi)
66565f20420SRobert Mustacchi	ret
66622cc0e45SBill Holler
66722cc0e45SBill Holler	/*
66822cc0e45SBill Holler	 * For large sizes rep smovq is fastest.
66922cc0e45SBill Holler	 * Transition point determined experimentally as measured on
67022cc0e45SBill Holler	 * Intel Xeon processors (incl. Nehalem and previous generations) and
67122cc0e45SBill Holler	 * AMD Opteron. The transition value is patched at boot time to avoid
67222cc0e45SBill Holler	 * memory reference hit.
67322cc0e45SBill Holler	 */
67422cc0e45SBill Holler	.globl bcopy_patch_start
67522cc0e45SBill Hollerbcopy_patch_start:
67622cc0e45SBill Holler	cmpq	$BCOPY_NHM_REP, %rdx
67722cc0e45SBill Holler	.globl bcopy_patch_end
67822cc0e45SBill Hollerbcopy_patch_end:
67922cc0e45SBill Holler
68022cc0e45SBill Holler	.p2align 4
681455e370cSJohn Levon	ALTENTRY(bcopy_ck_size)
682455e370cSJohn Levon
68322cc0e45SBill Holler	cmpq	$BCOPY_DFLT_REP, %rdx
6845aaab1a4SRobert Mustacchi	jae	L(use_rep)
68522cc0e45SBill Holler
68622cc0e45SBill Holler	/*
68722cc0e45SBill Holler	 * Align to a 8-byte boundary. Avoids penalties from unaligned stores
68822cc0e45SBill Holler	 * as well as from stores spanning cachelines.
68922cc0e45SBill Holler	 */
69022cc0e45SBill Holler	test	$0x7, %rsi
69122cc0e45SBill Holler	jz	L(aligned_loop)
69222cc0e45SBill Holler	test	$0x1, %rsi
69322cc0e45SBill Holler	jz	2f
69422cc0e45SBill Holler	movzbq	(%rdi), %r8
69522cc0e45SBill Holler	dec	%rdx
69622cc0e45SBill Holler	inc	%rdi
69722cc0e45SBill Holler	mov	%r8b, (%rsi)
69822cc0e45SBill Holler	inc	%rsi
69922cc0e45SBill Holler2:
70022cc0e45SBill Holler	test	$0x2, %rsi
70122cc0e45SBill Holler	jz	4f
70222cc0e45SBill Holler	movzwq	(%rdi), %r8
70322cc0e45SBill Holler	sub	$0x2, %rdx
70422cc0e45SBill Holler	add	$0x2, %rdi
70522cc0e45SBill Holler	mov	%r8w, (%rsi)
70622cc0e45SBill Holler	add	$0x2, %rsi
70722cc0e45SBill Holler4:
70822cc0e45SBill Holler	test	$0x4, %rsi
70922cc0e45SBill Holler	jz	L(aligned_loop)
71022cc0e45SBill Holler	mov	(%rdi), %r8d
71122cc0e45SBill Holler	sub	$0x4, %rdx
71222cc0e45SBill Holler	add	$0x4, %rdi
71322cc0e45SBill Holler	mov	%r8d, (%rsi)
71422cc0e45SBill Holler	add	$0x4, %rsi
71522cc0e45SBill Holler
71622cc0e45SBill Holler	/*
71722cc0e45SBill Holler	 * Copy 64-bytes per loop
71822cc0e45SBill Holler	 */
71922cc0e45SBill Holler	.p2align 4
72022cc0e45SBill HollerL(aligned_loop):
72122cc0e45SBill Holler	mov	(%rdi), %r8
72222cc0e45SBill Holler	mov	0x8(%rdi), %r10
72322cc0e45SBill Holler	lea	-0x40(%rdx), %rdx
72422cc0e45SBill Holler	mov	%r8, (%rsi)
72522cc0e45SBill Holler	mov	%r10, 0x8(%rsi)
72622cc0e45SBill Holler	mov	0x10(%rdi), %rcx
72722cc0e45SBill Holler	mov	0x18(%rdi), %r8
72822cc0e45SBill Holler	mov	%rcx, 0x10(%rsi)
72922cc0e45SBill Holler	mov	%r8, 0x18(%rsi)
73022cc0e45SBill Holler
73122cc0e45SBill Holler	cmp	$0x40, %rdx
73222cc0e45SBill Holler	mov	0x20(%rdi), %r10
73322cc0e45SBill Holler	mov	0x28(%rdi), %rcx
73422cc0e45SBill Holler	mov	%r10, 0x20(%rsi)
73522cc0e45SBill Holler	mov	%rcx, 0x28(%rsi)
73622cc0e45SBill Holler	mov	0x30(%rdi), %r8
73722cc0e45SBill Holler	mov	0x38(%rdi), %r10
73822cc0e45SBill Holler	lea	0x40(%rdi), %rdi
73922cc0e45SBill Holler	mov	%r8, 0x30(%rsi)
74022cc0e45SBill Holler	mov	%r10, 0x38(%rsi)
74122cc0e45SBill Holler	lea	0x40(%rsi), %rsi
7425aaab1a4SRobert Mustacchi	jae	L(aligned_loop)
74322cc0e45SBill Holler
74422cc0e45SBill Holler	/*
74522cc0e45SBill Holler	 * Copy remaining bytes (0-63)
74622cc0e45SBill Holler	 */
74722cc0e45SBill HollerL(do_remainder):
74822cc0e45SBill Holler	leaq	L(fwdPxQx)(%rip), %r10
74922cc0e45SBill Holler	addq	%rdx, %rdi
75022cc0e45SBill Holler	addq	%rdx, %rsi
75122cc0e45SBill Holler	movslq	(%r10,%rdx,4), %rcx
75222cc0e45SBill Holler	leaq	(%rcx,%r10,1), %r10
75365f20420SRobert Mustacchi	INDIRECT_JMP_REG(r10)
75422cc0e45SBill Holler
75522cc0e45SBill Holler	/*
75622cc0e45SBill Holler	 * Use rep smovq. Clear remainder via unrolled code
75722cc0e45SBill Holler	 */
75822cc0e45SBill Holler	.p2align 4
75922cc0e45SBill HollerL(use_rep):
7607c478bd9Sstevel@tonic-gate	xchgq	%rdi, %rsi		/* %rsi = source, %rdi = destination */
7617c478bd9Sstevel@tonic-gate	movq	%rdx, %rcx		/* %rcx = count */
7627c478bd9Sstevel@tonic-gate	shrq	$3, %rcx		/* 8-byte word count */
7637c478bd9Sstevel@tonic-gate	rep
7647c478bd9Sstevel@tonic-gate	  smovq
7657c478bd9Sstevel@tonic-gate
76622cc0e45SBill Holler	xchgq	%rsi, %rdi		/* %rdi = src, %rsi = destination */
76722cc0e45SBill Holler	andq	$7, %rdx		/* remainder */
76822cc0e45SBill Holler	jnz	L(do_remainder)
7697c478bd9Sstevel@tonic-gate	ret
77022cc0e45SBill Holler#undef	L
771455e370cSJohn Levon	SET_SIZE(bcopy_ck_size)
7727c478bd9Sstevel@tonic-gate
7737c478bd9Sstevel@tonic-gate#ifdef DEBUG
7747c478bd9Sstevel@tonic-gate	/*
7757c478bd9Sstevel@tonic-gate	 * Setup frame on the run-time stack. The end of the input argument
7767c478bd9Sstevel@tonic-gate	 * area must be aligned on a 16 byte boundary. The stack pointer %rsp,
7777c478bd9Sstevel@tonic-gate	 * always points to the end of the latest allocated stack frame.
7787c478bd9Sstevel@tonic-gate	 * panic(const char *format, ...) is a varargs function. When a
7797c478bd9Sstevel@tonic-gate	 * function taking variable arguments is called, %rax must be set
7807c478bd9Sstevel@tonic-gate	 * to eight times the number of floating point parameters passed
7817c478bd9Sstevel@tonic-gate	 * to the function in SSE registers.
7827c478bd9Sstevel@tonic-gate	 */
7837c478bd9Sstevel@tonic-gatecall_panic:
7847c478bd9Sstevel@tonic-gate	pushq	%rbp			/* align stack properly */
7857c478bd9Sstevel@tonic-gate	movq	%rsp, %rbp
7867c478bd9Sstevel@tonic-gate	xorl	%eax, %eax		/* no variable arguments */
7877c478bd9Sstevel@tonic-gate	call	panic			/* %rdi = format string */
7887c478bd9Sstevel@tonic-gate#endif
78922cc0e45SBill Holler	SET_SIZE(bcopy_altentry)
7907c478bd9Sstevel@tonic-gate	SET_SIZE(bcopy)
7917c478bd9Sstevel@tonic-gate
7927c478bd9Sstevel@tonic-gate
7937c478bd9Sstevel@tonic-gate/*
7947c478bd9Sstevel@tonic-gate * Zero a block of storage, returning an error code if we
7957c478bd9Sstevel@tonic-gate * take a kernel pagefault which cannot be resolved.
7967c478bd9Sstevel@tonic-gate * Returns errno value on pagefault error, 0 if all ok
7977c478bd9Sstevel@tonic-gate */
7987c478bd9Sstevel@tonic-gate
7997c478bd9Sstevel@tonic-gate	ENTRY(kzero)
8007c478bd9Sstevel@tonic-gate#ifdef DEBUG
801ae115bc7Smrj        cmpq	postbootkernelbase(%rip), %rdi	/* %rdi = addr */
8027c478bd9Sstevel@tonic-gate        jnb	0f
8037c478bd9Sstevel@tonic-gate        leaq	.kzero_panic_msg(%rip), %rdi
8047c478bd9Sstevel@tonic-gate	jmp	call_panic		/* setup stack and call panic */
8057c478bd9Sstevel@tonic-gate0:
8067c478bd9Sstevel@tonic-gate#endif
8077c478bd9Sstevel@tonic-gate	/*
80865f20420SRobert Mustacchi	 * pass lofault value as 3rd argument for fault return
8097c478bd9Sstevel@tonic-gate	 */
8107c478bd9Sstevel@tonic-gate	leaq	_kzeroerr(%rip), %rdx
8117c478bd9Sstevel@tonic-gate
8127c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9	/* %r9 = thread addr */
8137c478bd9Sstevel@tonic-gate	movq	T_LOFAULT(%r9), %r11	/* save the current lofault */
8147c478bd9Sstevel@tonic-gate	movq	%rdx, T_LOFAULT(%r9)	/* new lofault */
81522cc0e45SBill Holler	call	bzero_altentry
816b08adf18SBill Holler	xorl	%eax, %eax
817b08adf18SBill Holler	movq	%r11, T_LOFAULT(%r9)	/* restore the original lofault */
818b08adf18SBill Holler	ret
8197c478bd9Sstevel@tonic-gate	/*
820b08adf18SBill Holler	 * A fault during bzero is indicated through an errno value
8217c478bd9Sstevel@tonic-gate	 * in %rax when we iretq to here.
8227c478bd9Sstevel@tonic-gate	 */
8237c478bd9Sstevel@tonic-gate_kzeroerr:
824b08adf18SBill Holler	addq	$8, %rsp		/* pop bzero_altentry call ret addr */
8257c478bd9Sstevel@tonic-gate	movq	%r11, T_LOFAULT(%r9)	/* restore the original lofault */
8267c478bd9Sstevel@tonic-gate	ret
8277c478bd9Sstevel@tonic-gate	SET_SIZE(kzero)
8287c478bd9Sstevel@tonic-gate
8297c478bd9Sstevel@tonic-gate/*
8307c478bd9Sstevel@tonic-gate * Zero a block of storage.
8317c478bd9Sstevel@tonic-gate */
8327c478bd9Sstevel@tonic-gate
8337c478bd9Sstevel@tonic-gate	ENTRY(bzero)
8347c478bd9Sstevel@tonic-gate#ifdef DEBUG
835ae115bc7Smrj	cmpq	postbootkernelbase(%rip), %rdi	/* %rdi = addr */
8367c478bd9Sstevel@tonic-gate	jnb	0f
8377c478bd9Sstevel@tonic-gate	leaq	.bzero_panic_msg(%rip), %rdi
8387c478bd9Sstevel@tonic-gate	jmp	call_panic		/* setup stack and call panic */
8397c478bd9Sstevel@tonic-gate0:
8407c478bd9Sstevel@tonic-gate#endif
84122cc0e45SBill Holler	ALTENTRY(bzero_altentry)
8427c478bd9Sstevel@tonic-gatedo_zero:
843*5d9d9091SRichard Lowe#define	L(s) .bzero##s
84422cc0e45SBill Holler	xorl	%eax, %eax
84522cc0e45SBill Holler
84622cc0e45SBill Holler	cmpq	$0x50, %rsi		/* 80 */
8475aaab1a4SRobert Mustacchi	jae	L(ck_align)
84822cc0e45SBill Holler
84922cc0e45SBill Holler	/*
85022cc0e45SBill Holler	 * Performance data shows many caller's are zeroing small buffers. So
85122cc0e45SBill Holler	 * for best perf for these sizes unrolled code is used. Store zeros
85222cc0e45SBill Holler	 * without worrying about alignment.
85322cc0e45SBill Holler	 */
85422cc0e45SBill Holler	leaq	L(setPxQx)(%rip), %r10
85522cc0e45SBill Holler	addq	%rsi, %rdi
85622cc0e45SBill Holler	movslq	(%r10,%rsi,4), %rcx
85722cc0e45SBill Holler	leaq	(%rcx,%r10,1), %r10
85865f20420SRobert Mustacchi	INDIRECT_JMP_REG(r10)
85922cc0e45SBill Holler
86022cc0e45SBill Holler	.p2align 4
86122cc0e45SBill HollerL(setPxQx):
86222cc0e45SBill Holler	.int       L(P0Q0)-L(setPxQx)	/* 0 */
86322cc0e45SBill Holler	.int       L(P1Q0)-L(setPxQx)
86422cc0e45SBill Holler	.int       L(P2Q0)-L(setPxQx)
86522cc0e45SBill Holler	.int       L(P3Q0)-L(setPxQx)
86622cc0e45SBill Holler	.int       L(P4Q0)-L(setPxQx)
86722cc0e45SBill Holler	.int       L(P5Q0)-L(setPxQx)
86822cc0e45SBill Holler	.int       L(P6Q0)-L(setPxQx)
86965f20420SRobert Mustacchi	.int       L(P7Q0)-L(setPxQx)
87022cc0e45SBill Holler
87122cc0e45SBill Holler	.int       L(P0Q1)-L(setPxQx)	/* 8 */
87222cc0e45SBill Holler	.int       L(P1Q1)-L(setPxQx)
87322cc0e45SBill Holler	.int       L(P2Q1)-L(setPxQx)
87422cc0e45SBill Holler	.int       L(P3Q1)-L(setPxQx)
87522cc0e45SBill Holler	.int       L(P4Q1)-L(setPxQx)
87622cc0e45SBill Holler	.int       L(P5Q1)-L(setPxQx)
87722cc0e45SBill Holler	.int       L(P6Q1)-L(setPxQx)
87865f20420SRobert Mustacchi	.int       L(P7Q1)-L(setPxQx)
87922cc0e45SBill Holler
88022cc0e45SBill Holler	.int       L(P0Q2)-L(setPxQx)	/* 16 */
88122cc0e45SBill Holler	.int       L(P1Q2)-L(setPxQx)
88222cc0e45SBill Holler	.int       L(P2Q2)-L(setPxQx)
88322cc0e45SBill Holler	.int       L(P3Q2)-L(setPxQx)
88422cc0e45SBill Holler	.int       L(P4Q2)-L(setPxQx)
88522cc0e45SBill Holler	.int       L(P5Q2)-L(setPxQx)
88622cc0e45SBill Holler	.int       L(P6Q2)-L(setPxQx)
88765f20420SRobert Mustacchi	.int       L(P7Q2)-L(setPxQx)
88822cc0e45SBill Holler
88922cc0e45SBill Holler	.int       L(P0Q3)-L(setPxQx)	/* 24 */
89022cc0e45SBill Holler	.int       L(P1Q3)-L(setPxQx)
89122cc0e45SBill Holler	.int       L(P2Q3)-L(setPxQx)
89222cc0e45SBill Holler	.int       L(P3Q3)-L(setPxQx)
89322cc0e45SBill Holler	.int       L(P4Q3)-L(setPxQx)
89422cc0e45SBill Holler	.int       L(P5Q3)-L(setPxQx)
89522cc0e45SBill Holler	.int       L(P6Q3)-L(setPxQx)
89665f20420SRobert Mustacchi	.int       L(P7Q3)-L(setPxQx)
89722cc0e45SBill Holler
89822cc0e45SBill Holler	.int       L(P0Q4)-L(setPxQx)	/* 32 */
89922cc0e45SBill Holler	.int       L(P1Q4)-L(setPxQx)
90022cc0e45SBill Holler	.int       L(P2Q4)-L(setPxQx)
90122cc0e45SBill Holler	.int       L(P3Q4)-L(setPxQx)
90222cc0e45SBill Holler	.int       L(P4Q4)-L(setPxQx)
90322cc0e45SBill Holler	.int       L(P5Q4)-L(setPxQx)
90422cc0e45SBill Holler	.int       L(P6Q4)-L(setPxQx)
90565f20420SRobert Mustacchi	.int       L(P7Q4)-L(setPxQx)
90622cc0e45SBill Holler
90722cc0e45SBill Holler	.int       L(P0Q5)-L(setPxQx)	/* 40 */
90822cc0e45SBill Holler	.int       L(P1Q5)-L(setPxQx)
90922cc0e45SBill Holler	.int       L(P2Q5)-L(setPxQx)
91022cc0e45SBill Holler	.int       L(P3Q5)-L(setPxQx)
91122cc0e45SBill Holler	.int       L(P4Q5)-L(setPxQx)
91222cc0e45SBill Holler	.int       L(P5Q5)-L(setPxQx)
91322cc0e45SBill Holler	.int       L(P6Q5)-L(setPxQx)
91465f20420SRobert Mustacchi	.int       L(P7Q5)-L(setPxQx)
91522cc0e45SBill Holler
91622cc0e45SBill Holler	.int       L(P0Q6)-L(setPxQx)	/* 48 */
91722cc0e45SBill Holler	.int       L(P1Q6)-L(setPxQx)
91822cc0e45SBill Holler	.int       L(P2Q6)-L(setPxQx)
91922cc0e45SBill Holler	.int       L(P3Q6)-L(setPxQx)
92022cc0e45SBill Holler	.int       L(P4Q6)-L(setPxQx)
92122cc0e45SBill Holler	.int       L(P5Q6)-L(setPxQx)
92222cc0e45SBill Holler	.int       L(P6Q6)-L(setPxQx)
92365f20420SRobert Mustacchi	.int       L(P7Q6)-L(setPxQx)
92422cc0e45SBill Holler
92522cc0e45SBill Holler	.int       L(P0Q7)-L(setPxQx)	/* 56 */
92622cc0e45SBill Holler	.int       L(P1Q7)-L(setPxQx)
92722cc0e45SBill Holler	.int       L(P2Q7)-L(setPxQx)
92822cc0e45SBill Holler	.int       L(P3Q7)-L(setPxQx)
92922cc0e45SBill Holler	.int       L(P4Q7)-L(setPxQx)
93022cc0e45SBill Holler	.int       L(P5Q7)-L(setPxQx)
93122cc0e45SBill Holler	.int       L(P6Q7)-L(setPxQx)
93265f20420SRobert Mustacchi	.int       L(P7Q7)-L(setPxQx)
93322cc0e45SBill Holler
93422cc0e45SBill Holler	.int       L(P0Q8)-L(setPxQx)	/* 64 */
93522cc0e45SBill Holler	.int       L(P1Q8)-L(setPxQx)
93622cc0e45SBill Holler	.int       L(P2Q8)-L(setPxQx)
93722cc0e45SBill Holler	.int       L(P3Q8)-L(setPxQx)
93822cc0e45SBill Holler	.int       L(P4Q8)-L(setPxQx)
93922cc0e45SBill Holler	.int       L(P5Q8)-L(setPxQx)
94022cc0e45SBill Holler	.int       L(P6Q8)-L(setPxQx)
94122cc0e45SBill Holler	.int       L(P7Q8)-L(setPxQx)
94222cc0e45SBill Holler
94322cc0e45SBill Holler	.int       L(P0Q9)-L(setPxQx)	/* 72 */
94422cc0e45SBill Holler	.int       L(P1Q9)-L(setPxQx)
94522cc0e45SBill Holler	.int       L(P2Q9)-L(setPxQx)
94622cc0e45SBill Holler	.int       L(P3Q9)-L(setPxQx)
94722cc0e45SBill Holler	.int       L(P4Q9)-L(setPxQx)
94822cc0e45SBill Holler	.int       L(P5Q9)-L(setPxQx)
94922cc0e45SBill Holler	.int       L(P6Q9)-L(setPxQx)
95022cc0e45SBill Holler	.int       L(P7Q9)-L(setPxQx)	/* 79 */
95122cc0e45SBill Holler
95222cc0e45SBill Holler	.p2align 4
95322cc0e45SBill HollerL(P0Q9): mov    %rax, -0x48(%rdi)
95422cc0e45SBill HollerL(P0Q8): mov    %rax, -0x40(%rdi)
95522cc0e45SBill HollerL(P0Q7): mov    %rax, -0x38(%rdi)
95622cc0e45SBill HollerL(P0Q6): mov    %rax, -0x30(%rdi)
95722cc0e45SBill HollerL(P0Q5): mov    %rax, -0x28(%rdi)
95822cc0e45SBill HollerL(P0Q4): mov    %rax, -0x20(%rdi)
95922cc0e45SBill HollerL(P0Q3): mov    %rax, -0x18(%rdi)
96022cc0e45SBill HollerL(P0Q2): mov    %rax, -0x10(%rdi)
96122cc0e45SBill HollerL(P0Q1): mov    %rax, -0x8(%rdi)
96265f20420SRobert MustacchiL(P0Q0):
96322cc0e45SBill Holler	 ret
96422cc0e45SBill Holler
96522cc0e45SBill Holler	.p2align 4
96622cc0e45SBill HollerL(P1Q9): mov    %rax, -0x49(%rdi)
96722cc0e45SBill HollerL(P1Q8): mov    %rax, -0x41(%rdi)
96822cc0e45SBill HollerL(P1Q7): mov    %rax, -0x39(%rdi)
96922cc0e45SBill HollerL(P1Q6): mov    %rax, -0x31(%rdi)
97022cc0e45SBill HollerL(P1Q5): mov    %rax, -0x29(%rdi)
97122cc0e45SBill HollerL(P1Q4): mov    %rax, -0x21(%rdi)
97222cc0e45SBill HollerL(P1Q3): mov    %rax, -0x19(%rdi)
97322cc0e45SBill HollerL(P1Q2): mov    %rax, -0x11(%rdi)
97422cc0e45SBill HollerL(P1Q1): mov    %rax, -0x9(%rdi)
97522cc0e45SBill HollerL(P1Q0): mov    %al, -0x1(%rdi)
97622cc0e45SBill Holler	 ret
97722cc0e45SBill Holler
97822cc0e45SBill Holler	.p2align 4
97922cc0e45SBill HollerL(P2Q9): mov    %rax, -0x4a(%rdi)
98022cc0e45SBill HollerL(P2Q8): mov    %rax, -0x42(%rdi)
98122cc0e45SBill HollerL(P2Q7): mov    %rax, -0x3a(%rdi)
98222cc0e45SBill HollerL(P2Q6): mov    %rax, -0x32(%rdi)
98322cc0e45SBill HollerL(P2Q5): mov    %rax, -0x2a(%rdi)
98422cc0e45SBill HollerL(P2Q4): mov    %rax, -0x22(%rdi)
98522cc0e45SBill HollerL(P2Q3): mov    %rax, -0x1a(%rdi)
98622cc0e45SBill HollerL(P2Q2): mov    %rax, -0x12(%rdi)
98722cc0e45SBill HollerL(P2Q1): mov    %rax, -0xa(%rdi)
98822cc0e45SBill HollerL(P2Q0): mov    %ax, -0x2(%rdi)
98922cc0e45SBill Holler	 ret
99022cc0e45SBill Holler
99122cc0e45SBill Holler	.p2align 4
99222cc0e45SBill HollerL(P3Q9): mov    %rax, -0x4b(%rdi)
99322cc0e45SBill HollerL(P3Q8): mov    %rax, -0x43(%rdi)
99422cc0e45SBill HollerL(P3Q7): mov    %rax, -0x3b(%rdi)
99522cc0e45SBill HollerL(P3Q6): mov    %rax, -0x33(%rdi)
99622cc0e45SBill HollerL(P3Q5): mov    %rax, -0x2b(%rdi)
99722cc0e45SBill HollerL(P3Q4): mov    %rax, -0x23(%rdi)
99822cc0e45SBill HollerL(P3Q3): mov    %rax, -0x1b(%rdi)
99922cc0e45SBill HollerL(P3Q2): mov    %rax, -0x13(%rdi)
100022cc0e45SBill HollerL(P3Q1): mov    %rax, -0xb(%rdi)
100122cc0e45SBill HollerL(P3Q0): mov    %ax, -0x3(%rdi)
100222cc0e45SBill Holler	 mov    %al, -0x1(%rdi)
100322cc0e45SBill Holler	 ret
100422cc0e45SBill Holler
100522cc0e45SBill Holler	.p2align 4
100622cc0e45SBill HollerL(P4Q9): mov    %rax, -0x4c(%rdi)
100722cc0e45SBill HollerL(P4Q8): mov    %rax, -0x44(%rdi)
100822cc0e45SBill HollerL(P4Q7): mov    %rax, -0x3c(%rdi)
100922cc0e45SBill HollerL(P4Q6): mov    %rax, -0x34(%rdi)
101022cc0e45SBill HollerL(P4Q5): mov    %rax, -0x2c(%rdi)
101122cc0e45SBill HollerL(P4Q4): mov    %rax, -0x24(%rdi)
101222cc0e45SBill HollerL(P4Q3): mov    %rax, -0x1c(%rdi)
101322cc0e45SBill HollerL(P4Q2): mov    %rax, -0x14(%rdi)
101422cc0e45SBill HollerL(P4Q1): mov    %rax, -0xc(%rdi)
101522cc0e45SBill HollerL(P4Q0): mov    %eax, -0x4(%rdi)
101622cc0e45SBill Holler	 ret
101722cc0e45SBill Holler
101822cc0e45SBill Holler	.p2align 4
101922cc0e45SBill HollerL(P5Q9): mov    %rax, -0x4d(%rdi)
102022cc0e45SBill HollerL(P5Q8): mov    %rax, -0x45(%rdi)
102122cc0e45SBill HollerL(P5Q7): mov    %rax, -0x3d(%rdi)
102222cc0e45SBill HollerL(P5Q6): mov    %rax, -0x35(%rdi)
102322cc0e45SBill HollerL(P5Q5): mov    %rax, -0x2d(%rdi)
102422cc0e45SBill HollerL(P5Q4): mov    %rax, -0x25(%rdi)
102522cc0e45SBill HollerL(P5Q3): mov    %rax, -0x1d(%rdi)
102622cc0e45SBill HollerL(P5Q2): mov    %rax, -0x15(%rdi)
102722cc0e45SBill HollerL(P5Q1): mov    %rax, -0xd(%rdi)
102822cc0e45SBill HollerL(P5Q0): mov    %eax, -0x5(%rdi)
102922cc0e45SBill Holler	 mov    %al, -0x1(%rdi)
103022cc0e45SBill Holler	 ret
103122cc0e45SBill Holler
103222cc0e45SBill Holler	.p2align 4
103322cc0e45SBill HollerL(P6Q9): mov    %rax, -0x4e(%rdi)
103422cc0e45SBill HollerL(P6Q8): mov    %rax, -0x46(%rdi)
103522cc0e45SBill HollerL(P6Q7): mov    %rax, -0x3e(%rdi)
103622cc0e45SBill HollerL(P6Q6): mov    %rax, -0x36(%rdi)
103722cc0e45SBill HollerL(P6Q5): mov    %rax, -0x2e(%rdi)
103822cc0e45SBill HollerL(P6Q4): mov    %rax, -0x26(%rdi)
103922cc0e45SBill HollerL(P6Q3): mov    %rax, -0x1e(%rdi)
104022cc0e45SBill HollerL(P6Q2): mov    %rax, -0x16(%rdi)
104122cc0e45SBill HollerL(P6Q1): mov    %rax, -0xe(%rdi)
104222cc0e45SBill HollerL(P6Q0): mov    %eax, -0x6(%rdi)
104322cc0e45SBill Holler	 mov    %ax, -0x2(%rdi)
104422cc0e45SBill Holler	 ret
104522cc0e45SBill Holler
104622cc0e45SBill Holler	.p2align 4
104722cc0e45SBill HollerL(P7Q9): mov    %rax, -0x4f(%rdi)
104822cc0e45SBill HollerL(P7Q8): mov    %rax, -0x47(%rdi)
104922cc0e45SBill HollerL(P7Q7): mov    %rax, -0x3f(%rdi)
105022cc0e45SBill HollerL(P7Q6): mov    %rax, -0x37(%rdi)
105122cc0e45SBill HollerL(P7Q5): mov    %rax, -0x2f(%rdi)
105222cc0e45SBill HollerL(P7Q4): mov    %rax, -0x27(%rdi)
105322cc0e45SBill HollerL(P7Q3): mov    %rax, -0x1f(%rdi)
105422cc0e45SBill HollerL(P7Q2): mov    %rax, -0x17(%rdi)
105522cc0e45SBill HollerL(P7Q1): mov    %rax, -0xf(%rdi)
105622cc0e45SBill HollerL(P7Q0): mov    %eax, -0x7(%rdi)
105722cc0e45SBill Holler	 mov    %ax, -0x3(%rdi)
105822cc0e45SBill Holler	 mov    %al, -0x1(%rdi)
105922cc0e45SBill Holler	 ret
106022cc0e45SBill Holler
106122cc0e45SBill Holler	/*
106222cc0e45SBill Holler	 * Align to a 16-byte boundary. Avoids penalties from unaligned stores
106322cc0e45SBill Holler	 * as well as from stores spanning cachelines. Note 16-byte alignment
106422cc0e45SBill Holler	 * is better in case where rep sstosq is used.
106522cc0e45SBill Holler	 */
106622cc0e45SBill Holler	.p2align 4
106722cc0e45SBill HollerL(ck_align):
106822cc0e45SBill Holler	test	$0xf, %rdi
106922cc0e45SBill Holler	jz	L(aligned_now)
107022cc0e45SBill Holler	test	$1, %rdi
107122cc0e45SBill Holler	jz	2f
107222cc0e45SBill Holler	mov	%al, (%rdi)
107322cc0e45SBill Holler	dec	%rsi
107422cc0e45SBill Holler	lea	1(%rdi),%rdi
107522cc0e45SBill Holler2:
107622cc0e45SBill Holler	test	$2, %rdi
107722cc0e45SBill Holler	jz	4f
107822cc0e45SBill Holler	mov	%ax, (%rdi)
107922cc0e45SBill Holler	sub	$2, %rsi
108022cc0e45SBill Holler	lea	2(%rdi),%rdi
108122cc0e45SBill Holler4:
108222cc0e45SBill Holler	test	$4, %rdi
108322cc0e45SBill Holler	jz	8f
108422cc0e45SBill Holler	mov	%eax, (%rdi)
108522cc0e45SBill Holler	sub	$4, %rsi
108622cc0e45SBill Holler	lea	4(%rdi),%rdi
108722cc0e45SBill Holler8:
108822cc0e45SBill Holler	test	$8, %rdi
108922cc0e45SBill Holler	jz	L(aligned_now)
109022cc0e45SBill Holler	mov	%rax, (%rdi)
109122cc0e45SBill Holler	sub	$8, %rsi
109222cc0e45SBill Holler	lea	8(%rdi),%rdi
109322cc0e45SBill Holler
109422cc0e45SBill Holler	/*
109522cc0e45SBill Holler	 * For large sizes rep sstoq is fastest.
109622cc0e45SBill Holler	 * Transition point determined experimentally as measured on
109722cc0e45SBill Holler	 * Intel Xeon processors (incl. Nehalem) and AMD Opteron.
109822cc0e45SBill Holler	 */
109922cc0e45SBill HollerL(aligned_now):
110022cc0e45SBill Holler	cmp	$BZERO_USE_REP, %rsi
11015aaab1a4SRobert Mustacchi	ja	L(use_rep)
110222cc0e45SBill Holler
110322cc0e45SBill Holler	/*
110422cc0e45SBill Holler	 * zero 64-bytes per loop
110522cc0e45SBill Holler	 */
110622cc0e45SBill Holler	.p2align 4
110722cc0e45SBill HollerL(bzero_loop):
110822cc0e45SBill Holler	leaq	-0x40(%rsi), %rsi
110922cc0e45SBill Holler	cmpq	$0x40, %rsi
111065f20420SRobert Mustacchi	movq	%rax, (%rdi)
111165f20420SRobert Mustacchi	movq	%rax, 0x8(%rdi)
111265f20420SRobert Mustacchi	movq	%rax, 0x10(%rdi)
111365f20420SRobert Mustacchi	movq	%rax, 0x18(%rdi)
111465f20420SRobert Mustacchi	movq	%rax, 0x20(%rdi)
111565f20420SRobert Mustacchi	movq	%rax, 0x28(%rdi)
111665f20420SRobert Mustacchi	movq	%rax, 0x30(%rdi)
111765f20420SRobert Mustacchi	movq	%rax, 0x38(%rdi)
111822cc0e45SBill Holler	leaq	0x40(%rdi), %rdi
11195aaab1a4SRobert Mustacchi	jae	L(bzero_loop)
112022cc0e45SBill Holler
112122cc0e45SBill Holler	/*
112222cc0e45SBill Holler	 * Clear any remaining bytes..
112322cc0e45SBill Holler	 */
112422cc0e45SBill Holler9:
112522cc0e45SBill Holler	leaq	L(setPxQx)(%rip), %r10
112622cc0e45SBill Holler	addq	%rsi, %rdi
112722cc0e45SBill Holler	movslq	(%r10,%rsi,4), %rcx
112822cc0e45SBill Holler	leaq	(%rcx,%r10,1), %r10
112965f20420SRobert Mustacchi	INDIRECT_JMP_REG(r10)
113022cc0e45SBill Holler
113122cc0e45SBill Holler	/*
113222cc0e45SBill Holler	 * Use rep sstoq. Clear any remainder via unrolled code
113322cc0e45SBill Holler	 */
113422cc0e45SBill Holler	.p2align 4
113522cc0e45SBill HollerL(use_rep):
11367c478bd9Sstevel@tonic-gate	movq	%rsi, %rcx		/* get size in bytes */
11377c478bd9Sstevel@tonic-gate	shrq	$3, %rcx		/* count of 8-byte words to zero */
11387c478bd9Sstevel@tonic-gate	rep
11397c478bd9Sstevel@tonic-gate	  sstoq				/* %rcx = words to clear (%rax=0) */
114022cc0e45SBill Holler	andq	$7, %rsi		/* remaining bytes */
114122cc0e45SBill Holler	jnz	9b
11427c478bd9Sstevel@tonic-gate	ret
114322cc0e45SBill Holler#undef	L
114422cc0e45SBill Holler	SET_SIZE(bzero_altentry)
11457c478bd9Sstevel@tonic-gate	SET_SIZE(bzero)
11467c478bd9Sstevel@tonic-gate
11477c478bd9Sstevel@tonic-gate/*
11487c478bd9Sstevel@tonic-gate * Transfer data to and from user space -
11497c478bd9Sstevel@tonic-gate * Note that these routines can cause faults
11507c478bd9Sstevel@tonic-gate * It is assumed that the kernel has nothing at
11517c478bd9Sstevel@tonic-gate * less than KERNELBASE in the virtual address space.
11527c478bd9Sstevel@tonic-gate *
11537c478bd9Sstevel@tonic-gate * Note that copyin(9F) and copyout(9F) are part of the
11547c478bd9Sstevel@tonic-gate * DDI/DKI which specifies that they return '-1' on "errors."
11557c478bd9Sstevel@tonic-gate *
11567c478bd9Sstevel@tonic-gate * Sigh.
11577c478bd9Sstevel@tonic-gate *
11587c478bd9Sstevel@tonic-gate * So there's two extremely similar routines - xcopyin_nta() and
11597c478bd9Sstevel@tonic-gate * xcopyout_nta() which return the errno that we've faithfully computed.
11607c478bd9Sstevel@tonic-gate * This allows other callers (e.g. uiomove(9F)) to work correctly.
11617c478bd9Sstevel@tonic-gate * Given that these are used pretty heavily, we expand the calling
11627c478bd9Sstevel@tonic-gate * sequences inline for all flavours (rather than making wrappers).
11637c478bd9Sstevel@tonic-gate */
11647c478bd9Sstevel@tonic-gate
11657c478bd9Sstevel@tonic-gate/*
11667c478bd9Sstevel@tonic-gate * Copy user data to kernel space.
11677c478bd9Sstevel@tonic-gate */
11687c478bd9Sstevel@tonic-gate
11697c478bd9Sstevel@tonic-gate	ENTRY(copyin)
11707c478bd9Sstevel@tonic-gate	pushq	%rbp
11717c478bd9Sstevel@tonic-gate	movq	%rsp, %rbp
1172b08adf18SBill Holler	subq	$24, %rsp
11737c478bd9Sstevel@tonic-gate
11747c478bd9Sstevel@tonic-gate	/*
11757c478bd9Sstevel@tonic-gate	 * save args in case we trap and need to rerun as a copyop
11767c478bd9Sstevel@tonic-gate	 */
11777c478bd9Sstevel@tonic-gate	movq	%rdi, (%rsp)
11787c478bd9Sstevel@tonic-gate	movq	%rsi, 0x8(%rsp)
11797c478bd9Sstevel@tonic-gate	movq	%rdx, 0x10(%rsp)
11807c478bd9Sstevel@tonic-gate
11817c478bd9Sstevel@tonic-gate	movq	kernelbase(%rip), %rax
11827c478bd9Sstevel@tonic-gate#ifdef DEBUG
11837c478bd9Sstevel@tonic-gate	cmpq	%rax, %rsi		/* %rsi = kaddr */
11847c478bd9Sstevel@tonic-gate	jnb	1f
11857c478bd9Sstevel@tonic-gate	leaq	.copyin_panic_msg(%rip), %rdi
11867c478bd9Sstevel@tonic-gate	xorl	%eax, %eax
11877c478bd9Sstevel@tonic-gate	call	panic
11887c478bd9Sstevel@tonic-gate1:
11897c478bd9Sstevel@tonic-gate#endif
11907c478bd9Sstevel@tonic-gate	/*
11917c478bd9Sstevel@tonic-gate	 * pass lofault value as 4th argument to do_copy_fault
11927c478bd9Sstevel@tonic-gate	 */
11937c478bd9Sstevel@tonic-gate	leaq	_copyin_err(%rip), %rcx
11947c478bd9Sstevel@tonic-gate
11957c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9
11967c478bd9Sstevel@tonic-gate	cmpq	%rax, %rdi		/* test uaddr < kernelbase */
11973ce2fcdcSRobert Mustacchi	jae	3f			/* take copyop if uaddr > kernelbase */
11983ce2fcdcSRobert Mustacchi	SMAP_DISABLE_INSTR(0)
11993ce2fcdcSRobert Mustacchi	jmp	do_copy_fault		/* Takes care of leave for us */
12007c478bd9Sstevel@tonic-gate
12017c478bd9Sstevel@tonic-gate_copyin_err:
12023ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(2)
120365f20420SRobert Mustacchi	movq	%r11, T_LOFAULT(%r9)	/* restore original lofault */
1204b08adf18SBill Holler	addq	$8, %rsp		/* pop bcopy_altentry call ret addr */
12057c478bd9Sstevel@tonic-gate3:
12067c478bd9Sstevel@tonic-gate	movq	T_COPYOPS(%r9), %rax
12077c478bd9Sstevel@tonic-gate	cmpq	$0, %rax
12087c478bd9Sstevel@tonic-gate	jz	2f
12097c478bd9Sstevel@tonic-gate	/*
12107c478bd9Sstevel@tonic-gate	 * reload args for the copyop
12117c478bd9Sstevel@tonic-gate	 */
12127c478bd9Sstevel@tonic-gate	movq	(%rsp), %rdi
12137c478bd9Sstevel@tonic-gate	movq	0x8(%rsp), %rsi
12147c478bd9Sstevel@tonic-gate	movq	0x10(%rsp), %rdx
12157c478bd9Sstevel@tonic-gate	leave
121665f20420SRobert Mustacchi	movq	CP_COPYIN(%rax), %rax
121765f20420SRobert Mustacchi	INDIRECT_JMP_REG(rax)
12187c478bd9Sstevel@tonic-gate
121965f20420SRobert Mustacchi2:	movl	$-1, %eax
12207c478bd9Sstevel@tonic-gate	leave
12217c478bd9Sstevel@tonic-gate	ret
12227c478bd9Sstevel@tonic-gate	SET_SIZE(copyin)
12237c478bd9Sstevel@tonic-gate
12247c478bd9Sstevel@tonic-gate	ENTRY(xcopyin_nta)
12257c478bd9Sstevel@tonic-gate	pushq	%rbp
12267c478bd9Sstevel@tonic-gate	movq	%rsp, %rbp
1227b08adf18SBill Holler	subq	$24, %rsp
12287c478bd9Sstevel@tonic-gate
12297c478bd9Sstevel@tonic-gate	/*
12307c478bd9Sstevel@tonic-gate	 * save args in case we trap and need to rerun as a copyop
12317c478bd9Sstevel@tonic-gate	 * %rcx is consumed in this routine so we don't need to save
12327c478bd9Sstevel@tonic-gate	 * it.
12337c478bd9Sstevel@tonic-gate	 */
12347c478bd9Sstevel@tonic-gate	movq	%rdi, (%rsp)
12357c478bd9Sstevel@tonic-gate	movq	%rsi, 0x8(%rsp)
12367c478bd9Sstevel@tonic-gate	movq	%rdx, 0x10(%rsp)
12377c478bd9Sstevel@tonic-gate
12387c478bd9Sstevel@tonic-gate	movq	kernelbase(%rip), %rax
12397c478bd9Sstevel@tonic-gate#ifdef DEBUG
12407c478bd9Sstevel@tonic-gate	cmpq	%rax, %rsi		/* %rsi = kaddr */
12417c478bd9Sstevel@tonic-gate	jnb	1f
12427c478bd9Sstevel@tonic-gate	leaq	.xcopyin_panic_msg(%rip), %rdi
12437c478bd9Sstevel@tonic-gate	xorl	%eax, %eax
12447c478bd9Sstevel@tonic-gate	call	panic
12457c478bd9Sstevel@tonic-gate1:
12467c478bd9Sstevel@tonic-gate#endif
12477c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9
12487c478bd9Sstevel@tonic-gate	cmpq	%rax, %rdi		/* test uaddr < kernelbase */
1249b737e79eSnn	jae	4f
12507c478bd9Sstevel@tonic-gate	cmpq	$0, %rcx		/* No non-temporal access? */
12517c478bd9Sstevel@tonic-gate	/*
12527c478bd9Sstevel@tonic-gate	 * pass lofault value as 4th argument to do_copy_fault
12537c478bd9Sstevel@tonic-gate	 */
12547c478bd9Sstevel@tonic-gate	leaq	_xcopyin_err(%rip), %rcx	/* doesn't set rflags */
12553ce2fcdcSRobert Mustacchi	jnz	6f			/* use regular access */
12567c478bd9Sstevel@tonic-gate	/*
12577c478bd9Sstevel@tonic-gate	 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
12587c478bd9Sstevel@tonic-gate	 */
12597c478bd9Sstevel@tonic-gate	cmpq	$XCOPY_MIN_SIZE, %rdx
12603ce2fcdcSRobert Mustacchi	jae	5f
12613ce2fcdcSRobert Mustacchi6:
12623ce2fcdcSRobert Mustacchi	SMAP_DISABLE_INSTR(1)
12633ce2fcdcSRobert Mustacchi	jmp	do_copy_fault
126465f20420SRobert Mustacchi
12657c478bd9Sstevel@tonic-gate	/*
12667c478bd9Sstevel@tonic-gate	 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
12677c478bd9Sstevel@tonic-gate	 * count is COUNT_ALIGN_SIZE aligned.
12687c478bd9Sstevel@tonic-gate	 */
12693ce2fcdcSRobert Mustacchi5:
12707c478bd9Sstevel@tonic-gate	movq	%rdi, %r10
12717c478bd9Sstevel@tonic-gate	orq	%rsi, %r10
12727c478bd9Sstevel@tonic-gate	andq	$NTA_ALIGN_MASK, %r10
12737c478bd9Sstevel@tonic-gate	orq	%rdx, %r10
12747c478bd9Sstevel@tonic-gate	andq	$COUNT_ALIGN_MASK, %r10
127565f20420SRobert Mustacchi	jnz	6b
1276b08adf18SBill Holler	leaq	_xcopyin_nta_err(%rip), %rcx	/* doesn't set rflags */
12773ce2fcdcSRobert Mustacchi	SMAP_DISABLE_INSTR(2)
12787c478bd9Sstevel@tonic-gate	jmp	do_copy_fault_nta	/* use non-temporal access */
127965f20420SRobert Mustacchi
1280b737e79eSnn4:
1281b737e79eSnn	movl	$EFAULT, %eax
1282b737e79eSnn	jmp	3f
1283b737e79eSnn
12847c478bd9Sstevel@tonic-gate	/*
12857c478bd9Sstevel@tonic-gate	 * A fault during do_copy_fault or do_copy_fault_nta is
12867c478bd9Sstevel@tonic-gate	 * indicated through an errno value in %rax and we iret from the
12877c478bd9Sstevel@tonic-gate	 * trap handler to here.
12887c478bd9Sstevel@tonic-gate	 */
12897c478bd9Sstevel@tonic-gate_xcopyin_err:
1290b08adf18SBill Holler	addq	$8, %rsp		/* pop bcopy_altentry call ret addr */
1291b08adf18SBill Holler_xcopyin_nta_err:
12923ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(3)
12937c478bd9Sstevel@tonic-gate	movq	%r11, T_LOFAULT(%r9)	/* restore original lofault */
12947c478bd9Sstevel@tonic-gate3:
12957c478bd9Sstevel@tonic-gate	movq	T_COPYOPS(%r9), %r8
12967c478bd9Sstevel@tonic-gate	cmpq	$0, %r8
12977c478bd9Sstevel@tonic-gate	jz	2f
12987c478bd9Sstevel@tonic-gate
12997c478bd9Sstevel@tonic-gate	/*
13007c478bd9Sstevel@tonic-gate	 * reload args for the copyop
13017c478bd9Sstevel@tonic-gate	 */
13027c478bd9Sstevel@tonic-gate	movq	(%rsp), %rdi
13037c478bd9Sstevel@tonic-gate	movq	0x8(%rsp), %rsi
13047c478bd9Sstevel@tonic-gate	movq	0x10(%rsp), %rdx
13057c478bd9Sstevel@tonic-gate	leave
130665f20420SRobert Mustacchi	movq	CP_XCOPYIN(%r8), %r8
130765f20420SRobert Mustacchi	INDIRECT_JMP_REG(r8)
13087c478bd9Sstevel@tonic-gate
13097c478bd9Sstevel@tonic-gate2:	leave
13107c478bd9Sstevel@tonic-gate	ret
13117c478bd9Sstevel@tonic-gate	SET_SIZE(xcopyin_nta)
13127c478bd9Sstevel@tonic-gate
13137c478bd9Sstevel@tonic-gate/*
13147c478bd9Sstevel@tonic-gate * Copy kernel data to user space.
13157c478bd9Sstevel@tonic-gate */
13167c478bd9Sstevel@tonic-gate
13177c478bd9Sstevel@tonic-gate	ENTRY(copyout)
13187c478bd9Sstevel@tonic-gate	pushq	%rbp
13197c478bd9Sstevel@tonic-gate	movq	%rsp, %rbp
1320b08adf18SBill Holler	subq	$24, %rsp
13217c478bd9Sstevel@tonic-gate
13227c478bd9Sstevel@tonic-gate	/*
13237c478bd9Sstevel@tonic-gate	 * save args in case we trap and need to rerun as a copyop
13247c478bd9Sstevel@tonic-gate	 */
13257c478bd9Sstevel@tonic-gate	movq	%rdi, (%rsp)
13267c478bd9Sstevel@tonic-gate	movq	%rsi, 0x8(%rsp)
13277c478bd9Sstevel@tonic-gate	movq	%rdx, 0x10(%rsp)
13287c478bd9Sstevel@tonic-gate
13297c478bd9Sstevel@tonic-gate	movq	kernelbase(%rip), %rax
13307c478bd9Sstevel@tonic-gate#ifdef DEBUG
13317c478bd9Sstevel@tonic-gate	cmpq	%rax, %rdi		/* %rdi = kaddr */
13327c478bd9Sstevel@tonic-gate	jnb	1f
13337c478bd9Sstevel@tonic-gate	leaq	.copyout_panic_msg(%rip), %rdi
13347c478bd9Sstevel@tonic-gate	xorl	%eax, %eax
13357c478bd9Sstevel@tonic-gate	call	panic
13367c478bd9Sstevel@tonic-gate1:
13377c478bd9Sstevel@tonic-gate#endif
13387c478bd9Sstevel@tonic-gate	/*
13397c478bd9Sstevel@tonic-gate	 * pass lofault value as 4th argument to do_copy_fault
13407c478bd9Sstevel@tonic-gate	 */
13417c478bd9Sstevel@tonic-gate	leaq	_copyout_err(%rip), %rcx
13427c478bd9Sstevel@tonic-gate
13437c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9
13447c478bd9Sstevel@tonic-gate	cmpq	%rax, %rsi		/* test uaddr < kernelbase */
13453ce2fcdcSRobert Mustacchi	jae	3f			/* take copyop if uaddr > kernelbase */
13463ce2fcdcSRobert Mustacchi	SMAP_DISABLE_INSTR(3)
13473ce2fcdcSRobert Mustacchi	jmp	do_copy_fault		/* Calls leave for us */
13487c478bd9Sstevel@tonic-gate
13497c478bd9Sstevel@tonic-gate_copyout_err:
13503ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(4)
13517c478bd9Sstevel@tonic-gate	movq	%r11, T_LOFAULT(%r9)	/* restore original lofault */
1352b08adf18SBill Holler	addq	$8, %rsp		/* pop bcopy_altentry call ret addr */
13537c478bd9Sstevel@tonic-gate3:
13547c478bd9Sstevel@tonic-gate	movq	T_COPYOPS(%r9), %rax
13557c478bd9Sstevel@tonic-gate	cmpq	$0, %rax
13567c478bd9Sstevel@tonic-gate	jz	2f
13577c478bd9Sstevel@tonic-gate
13587c478bd9Sstevel@tonic-gate	/*
13597c478bd9Sstevel@tonic-gate	 * reload args for the copyop
13607c478bd9Sstevel@tonic-gate	 */
13617c478bd9Sstevel@tonic-gate	movq	(%rsp), %rdi
13627c478bd9Sstevel@tonic-gate	movq	0x8(%rsp), %rsi
13637c478bd9Sstevel@tonic-gate	movq	0x10(%rsp), %rdx
13647c478bd9Sstevel@tonic-gate	leave
136565f20420SRobert Mustacchi	movq	CP_COPYOUT(%rax), %rax
136665f20420SRobert Mustacchi	INDIRECT_JMP_REG(rax)
13677c478bd9Sstevel@tonic-gate
13687c478bd9Sstevel@tonic-gate2:	movl	$-1, %eax
13697c478bd9Sstevel@tonic-gate	leave
13707c478bd9Sstevel@tonic-gate	ret
13717c478bd9Sstevel@tonic-gate	SET_SIZE(copyout)
13727c478bd9Sstevel@tonic-gate
13737c478bd9Sstevel@tonic-gate	ENTRY(xcopyout_nta)
13747c478bd9Sstevel@tonic-gate	pushq	%rbp
13757c478bd9Sstevel@tonic-gate	movq	%rsp, %rbp
1376b08adf18SBill Holler	subq	$24, %rsp
13777c478bd9Sstevel@tonic-gate
13787c478bd9Sstevel@tonic-gate	/*
13797c478bd9Sstevel@tonic-gate	 * save args in case we trap and need to rerun as a copyop
13807c478bd9Sstevel@tonic-gate	 */
13817c478bd9Sstevel@tonic-gate	movq	%rdi, (%rsp)
13827c478bd9Sstevel@tonic-gate	movq	%rsi, 0x8(%rsp)
13837c478bd9Sstevel@tonic-gate	movq	%rdx, 0x10(%rsp)
13847c478bd9Sstevel@tonic-gate
13857c478bd9Sstevel@tonic-gate	movq	kernelbase(%rip), %rax
13867c478bd9Sstevel@tonic-gate#ifdef DEBUG
13877c478bd9Sstevel@tonic-gate	cmpq	%rax, %rdi		/* %rdi = kaddr */
13887c478bd9Sstevel@tonic-gate	jnb	1f
13897c478bd9Sstevel@tonic-gate	leaq	.xcopyout_panic_msg(%rip), %rdi
13907c478bd9Sstevel@tonic-gate	xorl	%eax, %eax
13917c478bd9Sstevel@tonic-gate	call	panic
13927c478bd9Sstevel@tonic-gate1:
13937c478bd9Sstevel@tonic-gate#endif
13947c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9
13957c478bd9Sstevel@tonic-gate	cmpq	%rax, %rsi		/* test uaddr < kernelbase */
1396b737e79eSnn	jae	4f
13977c478bd9Sstevel@tonic-gate
13987c478bd9Sstevel@tonic-gate	cmpq	$0, %rcx		/* No non-temporal access? */
13997c478bd9Sstevel@tonic-gate	/*
14007c478bd9Sstevel@tonic-gate	 * pass lofault value as 4th argument to do_copy_fault
14017c478bd9Sstevel@tonic-gate	 */
14027c478bd9Sstevel@tonic-gate	leaq	_xcopyout_err(%rip), %rcx
14033ce2fcdcSRobert Mustacchi	jnz	6f
14047c478bd9Sstevel@tonic-gate	/*
14057c478bd9Sstevel@tonic-gate	 * Make sure cnt is >= XCOPY_MIN_SIZE bytes
14067c478bd9Sstevel@tonic-gate	 */
14077c478bd9Sstevel@tonic-gate	cmpq	$XCOPY_MIN_SIZE, %rdx
14083ce2fcdcSRobert Mustacchi	jae	5f
14093ce2fcdcSRobert Mustacchi6:
14103ce2fcdcSRobert Mustacchi	SMAP_DISABLE_INSTR(4)
14113ce2fcdcSRobert Mustacchi	jmp	do_copy_fault
141265f20420SRobert Mustacchi
14137c478bd9Sstevel@tonic-gate	/*
14147c478bd9Sstevel@tonic-gate	 * Make sure src and dst are NTA_ALIGN_SIZE aligned,
14157c478bd9Sstevel@tonic-gate	 * count is COUNT_ALIGN_SIZE aligned.
14167c478bd9Sstevel@tonic-gate	 */
14173ce2fcdcSRobert Mustacchi5:
14187c478bd9Sstevel@tonic-gate	movq	%rdi, %r10
14197c478bd9Sstevel@tonic-gate	orq	%rsi, %r10
14207c478bd9Sstevel@tonic-gate	andq	$NTA_ALIGN_MASK, %r10
14217c478bd9Sstevel@tonic-gate	orq	%rdx, %r10
14227c478bd9Sstevel@tonic-gate	andq	$COUNT_ALIGN_MASK, %r10
142365f20420SRobert Mustacchi	jnz	6b
1424b08adf18SBill Holler	leaq	_xcopyout_nta_err(%rip), %rcx
14253ce2fcdcSRobert Mustacchi	SMAP_DISABLE_INSTR(5)
14263ce2fcdcSRobert Mustacchi	call	do_copy_fault_nta
14273ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(5)
14283ce2fcdcSRobert Mustacchi	ret
14297c478bd9Sstevel@tonic-gate
1430b737e79eSnn4:
1431b737e79eSnn	movl	$EFAULT, %eax
1432b737e79eSnn	jmp	3f
1433b737e79eSnn
14347c478bd9Sstevel@tonic-gate	/*
14357c478bd9Sstevel@tonic-gate	 * A fault during do_copy_fault or do_copy_fault_nta is
14367c478bd9Sstevel@tonic-gate	 * indicated through an errno value in %rax and we iret from the
14377c478bd9Sstevel@tonic-gate	 * trap handler to here.
14387c478bd9Sstevel@tonic-gate	 */
14397c478bd9Sstevel@tonic-gate_xcopyout_err:
1440b08adf18SBill Holler	addq	$8, %rsp		/* pop bcopy_altentry call ret addr */
1441b08adf18SBill Holler_xcopyout_nta_err:
14423ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(6)
14437c478bd9Sstevel@tonic-gate	movq	%r11, T_LOFAULT(%r9)	/* restore original lofault */
14447c478bd9Sstevel@tonic-gate3:
14457c478bd9Sstevel@tonic-gate	movq	T_COPYOPS(%r9), %r8
14467c478bd9Sstevel@tonic-gate	cmpq	$0, %r8
14477c478bd9Sstevel@tonic-gate	jz	2f
14487c478bd9Sstevel@tonic-gate
14497c478bd9Sstevel@tonic-gate	/*
14507c478bd9Sstevel@tonic-gate	 * reload args for the copyop
14517c478bd9Sstevel@tonic-gate	 */
14527c478bd9Sstevel@tonic-gate	movq	(%rsp), %rdi
14537c478bd9Sstevel@tonic-gate	movq	0x8(%rsp), %rsi
14547c478bd9Sstevel@tonic-gate	movq	0x10(%rsp), %rdx
14557c478bd9Sstevel@tonic-gate	leave
145665f20420SRobert Mustacchi	movq	CP_XCOPYOUT(%r8), %r8
145765f20420SRobert Mustacchi	INDIRECT_JMP_REG(r8)
14587c478bd9Sstevel@tonic-gate
14597c478bd9Sstevel@tonic-gate2:	leave
14607c478bd9Sstevel@tonic-gate	ret
14617c478bd9Sstevel@tonic-gate	SET_SIZE(xcopyout_nta)
14627c478bd9Sstevel@tonic-gate
14639b0bb795SJohn Levon/*
14649b0bb795SJohn Levon * Copy a null terminated string from one point to another in
14659b0bb795SJohn Levon * the kernel address space.
14669b0bb795SJohn Levon */
14677c478bd9Sstevel@tonic-gate
14687c478bd9Sstevel@tonic-gate	ENTRY(copystr)
14697c478bd9Sstevel@tonic-gate	pushq	%rbp
14707c478bd9Sstevel@tonic-gate	movq	%rsp, %rbp
14717c478bd9Sstevel@tonic-gate#ifdef DEBUG
14727c478bd9Sstevel@tonic-gate	movq	kernelbase(%rip), %rax
14737c478bd9Sstevel@tonic-gate	cmpq	%rax, %rdi		/* %rdi = from */
14747c478bd9Sstevel@tonic-gate	jb	0f
14757c478bd9Sstevel@tonic-gate	cmpq	%rax, %rsi		/* %rsi = to */
14767c478bd9Sstevel@tonic-gate	jnb	1f
14777c478bd9Sstevel@tonic-gate0:	leaq	.copystr_panic_msg(%rip), %rdi
14787c478bd9Sstevel@tonic-gate	xorl	%eax, %eax
14797c478bd9Sstevel@tonic-gate	call	panic
14807c478bd9Sstevel@tonic-gate1:
14817c478bd9Sstevel@tonic-gate#endif
14827c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9
14837c478bd9Sstevel@tonic-gate	movq	T_LOFAULT(%r9), %r8	/* pass current lofault value as */
14847c478bd9Sstevel@tonic-gate					/* 5th argument to do_copystr */
14853ce2fcdcSRobert Mustacchi	xorl	%r10d,%r10d		/* pass smap restore need in %r10d */
14863ce2fcdcSRobert Mustacchi					/* as a non-ABI 6th arg */
14877c478bd9Sstevel@tonic-gatedo_copystr:
14887c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9	/* %r9 = thread addr */
14897c478bd9Sstevel@tonic-gate	movq    T_LOFAULT(%r9), %r11	/* save the current lofault */
14907c478bd9Sstevel@tonic-gate	movq	%r8, T_LOFAULT(%r9)	/* new lofault */
14917c478bd9Sstevel@tonic-gate
14927c478bd9Sstevel@tonic-gate	movq	%rdx, %r8		/* save maxlength */
14937c478bd9Sstevel@tonic-gate
14947c478bd9Sstevel@tonic-gate	cmpq	$0, %rdx		/* %rdx = maxlength */
14957c478bd9Sstevel@tonic-gate	je	copystr_enametoolong	/* maxlength == 0 */
14967c478bd9Sstevel@tonic-gate
14977c478bd9Sstevel@tonic-gatecopystr_loop:
14987c478bd9Sstevel@tonic-gate	decq	%r8
14997c478bd9Sstevel@tonic-gate	movb	(%rdi), %al
15007c478bd9Sstevel@tonic-gate	incq	%rdi
15017c478bd9Sstevel@tonic-gate	movb	%al, (%rsi)
15027c478bd9Sstevel@tonic-gate	incq	%rsi
15037c478bd9Sstevel@tonic-gate	cmpb	$0, %al
15047c478bd9Sstevel@tonic-gate	je	copystr_null		/* null char */
15057c478bd9Sstevel@tonic-gate	cmpq	$0, %r8
15067c478bd9Sstevel@tonic-gate	jne	copystr_loop
15077c478bd9Sstevel@tonic-gate
15087c478bd9Sstevel@tonic-gatecopystr_enametoolong:
15097c478bd9Sstevel@tonic-gate	movl	$ENAMETOOLONG, %eax
15107c478bd9Sstevel@tonic-gate	jmp	copystr_out
15117c478bd9Sstevel@tonic-gate
15127c478bd9Sstevel@tonic-gatecopystr_null:
15137c478bd9Sstevel@tonic-gate	xorl	%eax, %eax		/* no error */
15147c478bd9Sstevel@tonic-gate
15157c478bd9Sstevel@tonic-gatecopystr_out:
15167c478bd9Sstevel@tonic-gate	cmpq	$0, %rcx		/* want length? */
15173ce2fcdcSRobert Mustacchi	je	copystr_smap		/* no */
15187c478bd9Sstevel@tonic-gate	subq	%r8, %rdx		/* compute length and store it */
15197c478bd9Sstevel@tonic-gate	movq	%rdx, (%rcx)
15207c478bd9Sstevel@tonic-gate
15213ce2fcdcSRobert Mustacchicopystr_smap:
15223ce2fcdcSRobert Mustacchi	cmpl	$0, %r10d
15233ce2fcdcSRobert Mustacchi	jz	copystr_done
15243ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(7)
15253ce2fcdcSRobert Mustacchi
15267c478bd9Sstevel@tonic-gatecopystr_done:
15277c478bd9Sstevel@tonic-gate	movq	%r11, T_LOFAULT(%r9)	/* restore the original lofault */
15287c478bd9Sstevel@tonic-gate	leave
15297c478bd9Sstevel@tonic-gate	ret
15307c478bd9Sstevel@tonic-gate	SET_SIZE(copystr)
15317c478bd9Sstevel@tonic-gate
15327c478bd9Sstevel@tonic-gate/*
15337c478bd9Sstevel@tonic-gate * Copy a null terminated string from the user address space into
15347c478bd9Sstevel@tonic-gate * the kernel address space.
15357c478bd9Sstevel@tonic-gate */
15367c478bd9Sstevel@tonic-gate
15377c478bd9Sstevel@tonic-gate	ENTRY(copyinstr)
15387c478bd9Sstevel@tonic-gate	pushq	%rbp
15397c478bd9Sstevel@tonic-gate	movq	%rsp, %rbp
15407c478bd9Sstevel@tonic-gate	subq	$32, %rsp
15417c478bd9Sstevel@tonic-gate
15427c478bd9Sstevel@tonic-gate	/*
15437c478bd9Sstevel@tonic-gate	 * save args in case we trap and need to rerun as a copyop
15447c478bd9Sstevel@tonic-gate	 */
15457c478bd9Sstevel@tonic-gate	movq	%rdi, (%rsp)
15467c478bd9Sstevel@tonic-gate	movq	%rsi, 0x8(%rsp)
15477c478bd9Sstevel@tonic-gate	movq	%rdx, 0x10(%rsp)
15487c478bd9Sstevel@tonic-gate	movq	%rcx, 0x18(%rsp)
15497c478bd9Sstevel@tonic-gate
15507c478bd9Sstevel@tonic-gate	movq	kernelbase(%rip), %rax
15517c478bd9Sstevel@tonic-gate#ifdef DEBUG
15527c478bd9Sstevel@tonic-gate	cmpq	%rax, %rsi		/* %rsi = kaddr */
15537c478bd9Sstevel@tonic-gate	jnb	1f
15547c478bd9Sstevel@tonic-gate	leaq	.copyinstr_panic_msg(%rip), %rdi
15557c478bd9Sstevel@tonic-gate	xorl	%eax, %eax
15567c478bd9Sstevel@tonic-gate	call	panic
15577c478bd9Sstevel@tonic-gate1:
15587c478bd9Sstevel@tonic-gate#endif
15597c478bd9Sstevel@tonic-gate	/*
15607c478bd9Sstevel@tonic-gate	 * pass lofault value as 5th argument to do_copystr
15613ce2fcdcSRobert Mustacchi	 * do_copystr expects whether or not we need smap in %r10d
15627c478bd9Sstevel@tonic-gate	 */
15637c478bd9Sstevel@tonic-gate	leaq	_copyinstr_error(%rip), %r8
15643ce2fcdcSRobert Mustacchi	movl	$1, %r10d
15657c478bd9Sstevel@tonic-gate
15667c478bd9Sstevel@tonic-gate	cmpq	%rax, %rdi		/* test uaddr < kernelbase */
15673ce2fcdcSRobert Mustacchi	jae	4f
15683ce2fcdcSRobert Mustacchi	SMAP_DISABLE_INSTR(6)
15693ce2fcdcSRobert Mustacchi	jmp	do_copystr
15703ce2fcdcSRobert Mustacchi4:
15717c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9
15727c478bd9Sstevel@tonic-gate	jmp	3f
15737c478bd9Sstevel@tonic-gate
15747c478bd9Sstevel@tonic-gate_copyinstr_error:
15753ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(8)
15767c478bd9Sstevel@tonic-gate	movq	%r11, T_LOFAULT(%r9)	/* restore original lofault */
15777c478bd9Sstevel@tonic-gate3:
15787c478bd9Sstevel@tonic-gate	movq	T_COPYOPS(%r9), %rax
15797c478bd9Sstevel@tonic-gate	cmpq	$0, %rax
15807c478bd9Sstevel@tonic-gate	jz	2f
15817c478bd9Sstevel@tonic-gate
15827c478bd9Sstevel@tonic-gate	/*
15837c478bd9Sstevel@tonic-gate	 * reload args for the copyop
15847c478bd9Sstevel@tonic-gate	 */
15857c478bd9Sstevel@tonic-gate	movq	(%rsp), %rdi
15867c478bd9Sstevel@tonic-gate	movq	0x8(%rsp), %rsi
15877c478bd9Sstevel@tonic-gate	movq	0x10(%rsp), %rdx
15887c478bd9Sstevel@tonic-gate	movq	0x18(%rsp), %rcx
15897c478bd9Sstevel@tonic-gate	leave
159065f20420SRobert Mustacchi	movq	CP_COPYINSTR(%rax), %rax
159165f20420SRobert Mustacchi	INDIRECT_JMP_REG(rax)
159265f20420SRobert Mustacchi
15937c478bd9Sstevel@tonic-gate2:	movl	$EFAULT, %eax		/* return EFAULT */
15947c478bd9Sstevel@tonic-gate	leave
15957c478bd9Sstevel@tonic-gate	ret
15967c478bd9Sstevel@tonic-gate	SET_SIZE(copyinstr)
15977c478bd9Sstevel@tonic-gate
15987c478bd9Sstevel@tonic-gate/*
15997c478bd9Sstevel@tonic-gate * Copy a null terminated string from the kernel
16007c478bd9Sstevel@tonic-gate * address space to the user address space.
16017c478bd9Sstevel@tonic-gate */
16027c478bd9Sstevel@tonic-gate
16037c478bd9Sstevel@tonic-gate	ENTRY(copyoutstr)
16047c478bd9Sstevel@tonic-gate	pushq	%rbp
16057c478bd9Sstevel@tonic-gate	movq	%rsp, %rbp
16067c478bd9Sstevel@tonic-gate	subq	$32, %rsp
16077c478bd9Sstevel@tonic-gate
16087c478bd9Sstevel@tonic-gate	/*
16097c478bd9Sstevel@tonic-gate	 * save args in case we trap and need to rerun as a copyop
16107c478bd9Sstevel@tonic-gate	 */
16117c478bd9Sstevel@tonic-gate	movq	%rdi, (%rsp)
16127c478bd9Sstevel@tonic-gate	movq	%rsi, 0x8(%rsp)
16137c478bd9Sstevel@tonic-gate	movq	%rdx, 0x10(%rsp)
16147c478bd9Sstevel@tonic-gate	movq	%rcx, 0x18(%rsp)
16157c478bd9Sstevel@tonic-gate
16167c478bd9Sstevel@tonic-gate	movq	kernelbase(%rip), %rax
16177c478bd9Sstevel@tonic-gate#ifdef DEBUG
16187c478bd9Sstevel@tonic-gate	cmpq	%rax, %rdi		/* %rdi = kaddr */
16197c478bd9Sstevel@tonic-gate	jnb	1f
16207c478bd9Sstevel@tonic-gate	leaq	.copyoutstr_panic_msg(%rip), %rdi
16217c478bd9Sstevel@tonic-gate	jmp	call_panic		/* setup stack and call panic */
16227c478bd9Sstevel@tonic-gate1:
16237c478bd9Sstevel@tonic-gate#endif
16247c478bd9Sstevel@tonic-gate	/*
16257c478bd9Sstevel@tonic-gate	 * pass lofault value as 5th argument to do_copystr
16263ce2fcdcSRobert Mustacchi	 * pass one as 6th argument to do_copystr in %r10d
16277c478bd9Sstevel@tonic-gate	 */
16287c478bd9Sstevel@tonic-gate	leaq	_copyoutstr_error(%rip), %r8
16293ce2fcdcSRobert Mustacchi	movl	$1, %r10d
16307c478bd9Sstevel@tonic-gate
16317c478bd9Sstevel@tonic-gate	cmpq	%rax, %rsi		/* test uaddr < kernelbase */
16323ce2fcdcSRobert Mustacchi	jae	4f
16333ce2fcdcSRobert Mustacchi	SMAP_DISABLE_INSTR(7)
16343ce2fcdcSRobert Mustacchi	jmp	do_copystr
16353ce2fcdcSRobert Mustacchi4:
16367c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9
16377c478bd9Sstevel@tonic-gate	jmp	3f
16387c478bd9Sstevel@tonic-gate
16397c478bd9Sstevel@tonic-gate_copyoutstr_error:
16403ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(9)
16417c478bd9Sstevel@tonic-gate	movq	%r11, T_LOFAULT(%r9)	/* restore the original lofault */
16427c478bd9Sstevel@tonic-gate3:
16437c478bd9Sstevel@tonic-gate	movq	T_COPYOPS(%r9), %rax
16447c478bd9Sstevel@tonic-gate	cmpq	$0, %rax
16457c478bd9Sstevel@tonic-gate	jz	2f
16467c478bd9Sstevel@tonic-gate
16477c478bd9Sstevel@tonic-gate	/*
16487c478bd9Sstevel@tonic-gate	 * reload args for the copyop
16497c478bd9Sstevel@tonic-gate	 */
16507c478bd9Sstevel@tonic-gate	movq	(%rsp), %rdi
16517c478bd9Sstevel@tonic-gate	movq	0x8(%rsp), %rsi
16527c478bd9Sstevel@tonic-gate	movq	0x10(%rsp), %rdx
16537c478bd9Sstevel@tonic-gate	movq	0x18(%rsp), %rcx
16547c478bd9Sstevel@tonic-gate	leave
165565f20420SRobert Mustacchi	movq	CP_COPYOUTSTR(%rax), %rax
165665f20420SRobert Mustacchi	INDIRECT_JMP_REG(rax)
165765f20420SRobert Mustacchi
16587c478bd9Sstevel@tonic-gate2:	movl	$EFAULT, %eax		/* return EFAULT */
16597c478bd9Sstevel@tonic-gate	leave
16607c478bd9Sstevel@tonic-gate	ret
166165f20420SRobert Mustacchi	SET_SIZE(copyoutstr)
166265f20420SRobert Mustacchi
16637c478bd9Sstevel@tonic-gate/*
16647c478bd9Sstevel@tonic-gate * Since all of the fuword() variants are so similar, we have a macro to spit
16657c478bd9Sstevel@tonic-gate * them out.  This allows us to create DTrace-unobservable functions easily.
16667c478bd9Sstevel@tonic-gate */
166765f20420SRobert Mustacchi
16687c478bd9Sstevel@tonic-gate/*
16693ce2fcdcSRobert Mustacchi * Note that we don't save and reload the arguments here
16703ce2fcdcSRobert Mustacchi * because their values are not altered in the copy path.
16713ce2fcdcSRobert Mustacchi * Additionally, when successful, the smap_enable jmp will
16723ce2fcdcSRobert Mustacchi * actually return us to our original caller.
16737c478bd9Sstevel@tonic-gate */
16747c478bd9Sstevel@tonic-gate
16753ce2fcdcSRobert Mustacchi#define	FUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)	\
16767c478bd9Sstevel@tonic-gate	ENTRY(NAME)				\
16777c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9;		\
16787c478bd9Sstevel@tonic-gate	cmpq	kernelbase(%rip), %rdi;		\
16797c478bd9Sstevel@tonic-gate	jae	1f;				\
1680*5d9d9091SRichard Lowe	leaq	_flt_##NAME, %rdx;		\
16817c478bd9Sstevel@tonic-gate	movq	%rdx, T_LOFAULT(%r9);		\
16823ce2fcdcSRobert Mustacchi	SMAP_DISABLE_INSTR(DISNUM)		\
16837c478bd9Sstevel@tonic-gate	INSTR	(%rdi), REG;			\
16847c478bd9Sstevel@tonic-gate	movq	$0, T_LOFAULT(%r9);		\
16857c478bd9Sstevel@tonic-gate	INSTR	REG, (%rsi);			\
16867c478bd9Sstevel@tonic-gate	xorl	%eax, %eax;			\
16873ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(EN1)			\
16887c478bd9Sstevel@tonic-gate	ret;					\
1689*5d9d9091SRichard Lowe_flt_##NAME:					\
16903ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(EN2)			\
16917c478bd9Sstevel@tonic-gate	movq	$0, T_LOFAULT(%r9);		\
16927c478bd9Sstevel@tonic-gate1:						\
16937c478bd9Sstevel@tonic-gate	movq	T_COPYOPS(%r9), %rax;		\
16947c478bd9Sstevel@tonic-gate	cmpq	$0, %rax;			\
16957c478bd9Sstevel@tonic-gate	jz	2f;				\
169665f20420SRobert Mustacchi	movq	COPYOP(%rax), %rax;		\
169765f20420SRobert Mustacchi	INDIRECT_JMP_REG(rax);			\
16987c478bd9Sstevel@tonic-gate2:						\
16997c478bd9Sstevel@tonic-gate	movl	$-1, %eax;			\
17007c478bd9Sstevel@tonic-gate	ret;					\
17017c478bd9Sstevel@tonic-gate	SET_SIZE(NAME)
170265f20420SRobert Mustacchi
17033ce2fcdcSRobert Mustacchi	FUWORD(fuword64, movq, %rax, CP_FUWORD64,8,10,11)
17043ce2fcdcSRobert Mustacchi	FUWORD(fuword32, movl, %eax, CP_FUWORD32,9,12,13)
17053ce2fcdcSRobert Mustacchi	FUWORD(fuword16, movw, %ax, CP_FUWORD16,10,14,15)
17063ce2fcdcSRobert Mustacchi	FUWORD(fuword8, movb, %al, CP_FUWORD8,11,16,17)
17077c478bd9Sstevel@tonic-gate
17087c478bd9Sstevel@tonic-gate#undef	FUWORD
17097c478bd9Sstevel@tonic-gate
17107c478bd9Sstevel@tonic-gate/*
17117c478bd9Sstevel@tonic-gate * Set user word.
17127c478bd9Sstevel@tonic-gate */
17137c478bd9Sstevel@tonic-gate
17147c478bd9Sstevel@tonic-gate/*
17153ce2fcdcSRobert Mustacchi * Note that we don't save and reload the arguments here
17163ce2fcdcSRobert Mustacchi * because their values are not altered in the copy path.
17177c478bd9Sstevel@tonic-gate */
17187c478bd9Sstevel@tonic-gate
17193ce2fcdcSRobert Mustacchi#define	SUWORD(NAME, INSTR, REG, COPYOP, DISNUM, EN1, EN2)	\
17207c478bd9Sstevel@tonic-gate	ENTRY(NAME)				\
17217c478bd9Sstevel@tonic-gate	movq	%gs:CPU_THREAD, %r9;		\
17227c478bd9Sstevel@tonic-gate	cmpq	kernelbase(%rip), %rdi;		\
17237c478bd9Sstevel@tonic-gate	jae	1f;				\
1724*5d9d9091SRichard Lowe	leaq	_flt_##NAME, %rdx;		\
17253ce2fcdcSRobert Mustacchi	SMAP_DISABLE_INSTR(DISNUM)		\
17267c478bd9Sstevel@tonic-gate	movq	%rdx, T_LOFAULT(%r9);		\
17277c478bd9Sstevel@tonic-gate	INSTR	REG, (%rdi);			\
17287c478bd9Sstevel@tonic-gate	movq	$0, T_LOFAULT(%r9);		\
17297c478bd9Sstevel@tonic-gate	xorl	%eax, %eax;			\
17303ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(EN1)			\
17317c478bd9Sstevel@tonic-gate	ret;					\
1732*5d9d9091SRichard Lowe_flt_##NAME:					\
17333ce2fcdcSRobert Mustacchi	SMAP_ENABLE_INSTR(EN2)			\
17347c478bd9Sstevel@tonic-gate	movq	$0, T_LOFAULT(%r9);		\
17357c478bd9Sstevel@tonic-gate1:						\
17367c478bd9Sstevel@tonic-gate	movq	T_COPYOPS(%r9), %rax;		\
17377c478bd9Sstevel@tonic-gate	cmpq	$0, %rax;			\
17387c478bd9Sstevel@tonic-gate	jz	3f;				\
173965f20420SRobert Mustacchi	movq	COPYOP(%rax), %rax;		\
174065f20420SRobert Mustacchi	INDIRECT_JMP_REG(rax);			\
17417c478bd9Sstevel@tonic-gate3:						\
17427c478bd9Sstevel@tonic-gate	movl	$-1, %eax;			\
17437c478bd9Sstevel@tonic-gate	ret;					\
17447c478bd9Sstevel@tonic-gate	SET_SIZE(NAME)
17457c478bd9Sstevel@tonic-gate
17463ce2fcdcSRobert Mustacchi	SUWORD(suword64, movq, %rsi, CP_SUWORD64,12,18,19)
17473ce2fcdcSRobert Mustacchi	SUWORD(suword32, movl, %esi, CP_SUWORD32,13,20,21)
17483ce2fcdcSRobert Mustacchi	SUWORD(suword16, movw, %si, CP_SUWORD16,14,22,23)
17493ce2fcdcSRobert Mustacchi	SUWORD(suword8, movb, %sil, CP_SUWORD8,15,24,25)
17507c478bd9Sstevel@tonic-gate
17517c478bd9Sstevel@tonic-gate#undef	SUWORD
17527c478bd9Sstevel@tonic-gate
17537c478bd9Sstevel@tonic-gate#define	FUWORD_NOERR(NAME, INSTR, REG)		\
17547c478bd9Sstevel@tonic-gate	ENTRY(NAME)				\
17557c478bd9Sstevel@tonic-gate	cmpq	kernelbase(%rip), %rdi;		\
17567c478bd9Sstevel@tonic-gate	cmovnbq	kernelbase(%rip), %rdi;		\
17577c478bd9Sstevel@tonic-gate	INSTR	(%rdi), REG;			\
17587c478bd9Sstevel@tonic-gate	INSTR	REG, (%rsi);			\
17597c478bd9Sstevel@tonic-gate	ret;					\
17607c478bd9Sstevel@tonic-gate	SET_SIZE(NAME)
17617c478bd9Sstevel@tonic-gate
17627c478bd9Sstevel@tonic-gate	FUWORD_NOERR(fuword64_noerr, movq, %rax)
17637c478bd9Sstevel@tonic-gate	FUWORD_NOERR(fuword32_noerr, movl, %eax)
17647c478bd9Sstevel@tonic-gate	FUWORD_NOERR(fuword16_noerr, movw, %ax)
17657c478bd9Sstevel@tonic-gate	FUWORD_NOERR(fuword8_noerr, movb, %al)
17667c478bd9Sstevel@tonic-gate
17677c478bd9Sstevel@tonic-gate#undef	FUWORD_NOERR
17687c478bd9Sstevel@tonic-gate
17697c478bd9Sstevel@tonic-gate#define	SUWORD_NOERR(NAME, INSTR, REG)		\
17707c478bd9Sstevel@tonic-gate	ENTRY(NAME)				\
17717c478bd9Sstevel@tonic-gate	cmpq	kernelbase(%rip), %rdi;		\
17727c478bd9Sstevel@tonic-gate	cmovnbq	kernelbase(%rip), %rdi;		\
17737c478bd9Sstevel@tonic-gate	INSTR	REG, (%rdi);			\
17747c478bd9Sstevel@tonic-gate	ret;					\
17757c478bd9Sstevel@tonic-gate	SET_SIZE(NAME)
17767c478bd9Sstevel@tonic-gate
17777c478bd9Sstevel@tonic-gate	SUWORD_NOERR(suword64_noerr, movq, %rsi)
17787c478bd9Sstevel@tonic-gate	SUWORD_NOERR(suword32_noerr, movl, %esi)
17797c478bd9Sstevel@tonic-gate	SUWORD_NOERR(suword16_noerr, movw, %si)
17807c478bd9Sstevel@tonic-gate	SUWORD_NOERR(suword8_noerr, movb, %sil)
17817c478bd9Sstevel@tonic-gate
17827c478bd9Sstevel@tonic-gate#undef	SUWORD_NOERR
17837c478bd9Sstevel@tonic-gate
17847c478bd9Sstevel@tonic-gate
17857c478bd9Sstevel@tonic-gate	.weak	subyte
17867c478bd9Sstevel@tonic-gate	subyte=suword8
17877c478bd9Sstevel@tonic-gate	.weak	subyte_noerr
17887c478bd9Sstevel@tonic-gate	subyte_noerr=suword8_noerr
17897c478bd9Sstevel@tonic-gate
17907c478bd9Sstevel@tonic-gate	.weak	fulword
17917c478bd9Sstevel@tonic-gate	fulword=fuword64
17927c478bd9Sstevel@tonic-gate	.weak	fulword_noerr
17937c478bd9Sstevel@tonic-gate	fulword_noerr=fuword64_noerr
17947c478bd9Sstevel@tonic-gate	.weak	sulword
17957c478bd9Sstevel@tonic-gate	sulword=suword64
17967c478bd9Sstevel@tonic-gate	.weak	sulword_noerr
17977c478bd9Sstevel@tonic-gate	sulword_noerr=suword64_noerr
17987c478bd9Sstevel@tonic-gate
17997c478bd9Sstevel@tonic-gate	ENTRY(copyin_noerr)
18007c478bd9Sstevel@tonic-gate	movq	kernelbase(%rip), %rax
18017c478bd9Sstevel@tonic-gate#ifdef DEBUG
18027c478bd9Sstevel@tonic-gate	cmpq	%rax, %rsi		/* %rsi = kto */
18037c478bd9Sstevel@tonic-gate	jae	1f
18047c478bd9Sstevel@tonic-gate	leaq	.cpyin_ne_pmsg(%rip), %rdi
18057c478bd9Sstevel@tonic-gate	jmp	call_panic		/* setup stack and call panic */
18067c478bd9Sstevel@tonic-gate1:
18077c478bd9Sstevel@tonic-gate#endif
18087c478bd9Sstevel@tonic-gate	cmpq	%rax, %rdi		/* ufrom < kernelbase */
18097c478bd9Sstevel@tonic-gate	jb	do_copy
18107c478bd9Sstevel@tonic-gate	movq	%rax, %rdi		/* force fault at kernelbase */
18117c478bd9Sstevel@tonic-gate	jmp	do_copy
18127c478bd9Sstevel@tonic-gate	SET_SIZE(copyin_noerr)
18137c478bd9Sstevel@tonic-gate
18147c478bd9Sstevel@tonic-gate	ENTRY(copyout_noerr)
18157c478bd9Sstevel@tonic-gate	movq	kernelbase(%rip), %rax
18167c478bd9Sstevel@tonic-gate#ifdef DEBUG
18177c478bd9Sstevel@tonic-gate	cmpq	%rax, %rdi		/* %rdi = kfrom */
18187c478bd9Sstevel@tonic-gate	jae	1f
18197c478bd9Sstevel@tonic-gate	leaq	.cpyout_ne_pmsg(%rip), %rdi
18207c478bd9Sstevel@tonic-gate	jmp	call_panic		/* setup stack and call panic */
18217c478bd9Sstevel@tonic-gate1:
18227c478bd9Sstevel@tonic-gate#endif
18237c478bd9Sstevel@tonic-gate	cmpq	%rax, %rsi		/* uto < kernelbase */
18247c478bd9Sstevel@tonic-gate	jb	do_copy
18257c478bd9Sstevel@tonic-gate	movq	%rax, %rsi		/* force fault at kernelbase */
18267c478bd9Sstevel@tonic-gate	jmp	do_copy
18277c478bd9Sstevel@tonic-gate	SET_SIZE(copyout_noerr)
18287c478bd9Sstevel@tonic-gate
18297c478bd9Sstevel@tonic-gate	ENTRY(uzero)
183040c00cd7Sahl	movq	kernelbase(%rip), %rax
183140c00cd7Sahl	cmpq	%rax, %rdi
183240c00cd7Sahl	jb	do_zero
183340c00cd7Sahl	movq	%rax, %rdi	/* force fault at kernelbase */
18347c478bd9Sstevel@tonic-gate	jmp	do_zero
18357c478bd9Sstevel@tonic-gate	SET_SIZE(uzero)
18367c478bd9Sstevel@tonic-gate
18377c478bd9Sstevel@tonic-gate	ENTRY(ucopy)
18387c478bd9Sstevel@tonic-gate	movq	kernelbase(%rip), %rax
183940c00cd7Sahl	cmpq	%rax, %rdi
18409acbbeafSnn	cmovaeq	%rax, %rdi	/* force fault at kernelbase */
184140c00cd7Sahl	cmpq	%rax, %rsi
18429acbbeafSnn	cmovaeq	%rax, %rsi	/* force fault at kernelbase */
18437c478bd9Sstevel@tonic-gate	jmp	do_copy
18447c478bd9Sstevel@tonic-gate	SET_SIZE(ucopy)
18457c478bd9Sstevel@tonic-gate
18463ce2fcdcSRobert Mustacchi	/*
18473ce2fcdcSRobert Mustacchi	 * Note, the frame pointer is required here becuase do_copystr expects
18483ce2fcdcSRobert Mustacchi	 * to be able to pop it off!
18493ce2fcdcSRobert Mustacchi	 */
18509acbbeafSnn	ENTRY(ucopystr)
1851cbff3abdSRobert Mustacchi	pushq	%rbp
1852cbff3abdSRobert Mustacchi	movq	%rsp, %rbp
18539acbbeafSnn	movq	kernelbase(%rip), %rax
18549acbbeafSnn	cmpq	%rax, %rdi
18559acbbeafSnn	cmovaeq	%rax, %rdi	/* force fault at kernelbase */
18569acbbeafSnn	cmpq	%rax, %rsi
18579acbbeafSnn	cmovaeq	%rax, %rsi	/* force fault at kernelbase */
18589acbbeafSnn	/* do_copystr expects lofault address in %r8 */
18593ce2fcdcSRobert Mustacchi	/* do_copystr expects whether or not we need smap in %r10 */
18603ce2fcdcSRobert Mustacchi	xorl	%r10d, %r10d
18619acbbeafSnn	movq	%gs:CPU_THREAD, %r8
18629acbbeafSnn	movq	T_LOFAULT(%r8), %r8
18639acbbeafSnn	jmp	do_copystr
18649acbbeafSnn	SET_SIZE(ucopystr)
18659acbbeafSnn
18667c478bd9Sstevel@tonic-gate#ifdef DEBUG
18677c478bd9Sstevel@tonic-gate	.data
18687c478bd9Sstevel@tonic-gate.kcopy_panic_msg:
18697c478bd9Sstevel@tonic-gate	.string "kcopy: arguments below kernelbase"
18707c478bd9Sstevel@tonic-gate.bcopy_panic_msg:
18717c478bd9Sstevel@tonic-gate	.string "bcopy: arguments below kernelbase"
18727c478bd9Sstevel@tonic-gate.kzero_panic_msg:
18737c478bd9Sstevel@tonic-gate        .string "kzero: arguments below kernelbase"
18747c478bd9Sstevel@tonic-gate.bzero_panic_msg:
18757c478bd9Sstevel@tonic-gate	.string	"bzero: arguments below kernelbase"
18767c478bd9Sstevel@tonic-gate.copyin_panic_msg:
18777c478bd9Sstevel@tonic-gate	.string "copyin: kaddr argument below kernelbase"
18787c478bd9Sstevel@tonic-gate.xcopyin_panic_msg:
18797c478bd9Sstevel@tonic-gate	.string	"xcopyin: kaddr argument below kernelbase"
18807c478bd9Sstevel@tonic-gate.copyout_panic_msg:
18817c478bd9Sstevel@tonic-gate	.string "copyout: kaddr argument below kernelbase"
18827c478bd9Sstevel@tonic-gate.xcopyout_panic_msg:
18837c478bd9Sstevel@tonic-gate	.string	"xcopyout: kaddr argument below kernelbase"
18847c478bd9Sstevel@tonic-gate.copystr_panic_msg:
18857c478bd9Sstevel@tonic-gate	.string	"copystr: arguments in user space"
18867c478bd9Sstevel@tonic-gate.copyinstr_panic_msg:
18877c478bd9Sstevel@tonic-gate	.string	"copyinstr: kaddr argument not in kernel address space"
18887c478bd9Sstevel@tonic-gate.copyoutstr_panic_msg:
18897c478bd9Sstevel@tonic-gate	.string	"copyoutstr: kaddr argument not in kernel address space"
18907c478bd9Sstevel@tonic-gate.cpyin_ne_pmsg:
18917c478bd9Sstevel@tonic-gate	.string "copyin_noerr: argument not in kernel address space"
18927c478bd9Sstevel@tonic-gate.cpyout_ne_pmsg:
18937c478bd9Sstevel@tonic-gate	.string "copyout_noerr: argument not in kernel address space"
18947c478bd9Sstevel@tonic-gate#endif
18957c478bd9Sstevel@tonic-gate
18963ce2fcdcSRobert Mustacchi.data
189765f20420SRobert Mustacchi.align	4
18983ce2fcdcSRobert Mustacchi.globl	_smap_enable_patch_count
18993ce2fcdcSRobert Mustacchi.type	_smap_enable_patch_count,@object
19003ce2fcdcSRobert Mustacchi.size	_smap_enable_patch_count, 4
19013ce2fcdcSRobert Mustacchi_smap_enable_patch_count:
19023ce2fcdcSRobert Mustacchi	.long	SMAP_ENABLE_COUNT
19033ce2fcdcSRobert Mustacchi
19043ce2fcdcSRobert Mustacchi.globl	_smap_disable_patch_count
19053ce2fcdcSRobert Mustacchi.type	_smap_disable_patch_count,@object
19063ce2fcdcSRobert Mustacchi.size	_smap_disable_patch_count, 4
19073ce2fcdcSRobert Mustacchi_smap_disable_patch_count:
19083ce2fcdcSRobert Mustacchi	.long SMAP_DISABLE_COUNT
1909