xref: /illumos-gate/usr/src/uts/intel/os/desctbls.c (revision 075343cb)
17c478bd9Sstevel@tonic-gate /*
2ae115bc7Smrj  * CDDL HEADER START
3ae115bc7Smrj  *
4ae115bc7Smrj  * The contents of this file are subject to the terms of the
5ae115bc7Smrj  * Common Development and Distribution License (the "License").
6ae115bc7Smrj  * You may not use this file except in compliance with the License.
7ae115bc7Smrj  *
8ae115bc7Smrj  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ae115bc7Smrj  * or http://www.opensolaris.org/os/licensing.
10ae115bc7Smrj  * See the License for the specific language governing permissions
11ae115bc7Smrj  * and limitations under the License.
12ae115bc7Smrj  *
13ae115bc7Smrj  * When distributing Covered Code, include this CDDL HEADER in each
14ae115bc7Smrj  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ae115bc7Smrj  * If applicable, add the following below this CDDL HEADER, with the
16ae115bc7Smrj  * fields enclosed by brackets "[]" replaced with your own identifying
17ae115bc7Smrj  * information: Portions Copyright [yyyy] [name of copyright owner]
18ae115bc7Smrj  *
19ae115bc7Smrj  * CDDL HEADER END
20ae115bc7Smrj  */
21ae115bc7Smrj 
22ae115bc7Smrj /*
23eb5a5c78SSurya Prakki  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
26f16a0f4cSRobert Mustacchi /*
2774ecdb51SJohn Levon  * Copyright 2018 Joyent, Inc. All rights reserved.
280ea62e6fSDan Cross  * Copyright 2022 Oxide Computer Compnay
29f16a0f4cSRobert Mustacchi  */
30f16a0f4cSRobert Mustacchi 
317c478bd9Sstevel@tonic-gate /*
327c478bd9Sstevel@tonic-gate  * Copyright (c) 1992 Terrence R. Lambert.
337c478bd9Sstevel@tonic-gate  * Copyright (c) 1990 The Regents of the University of California.
347c478bd9Sstevel@tonic-gate  * All rights reserved.
357c478bd9Sstevel@tonic-gate  *
367c478bd9Sstevel@tonic-gate  * This code is derived from software contributed to Berkeley by
377c478bd9Sstevel@tonic-gate  * William Jolitz.
387c478bd9Sstevel@tonic-gate  *
397c478bd9Sstevel@tonic-gate  * Redistribution and use in source and binary forms, with or without
407c478bd9Sstevel@tonic-gate  * modification, are permitted provided that the following conditions
417c478bd9Sstevel@tonic-gate  * are met:
427c478bd9Sstevel@tonic-gate  * 1. Redistributions of source code must retain the above copyright
437c478bd9Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer.
447c478bd9Sstevel@tonic-gate  * 2. Redistributions in binary form must reproduce the above copyright
457c478bd9Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer in the
467c478bd9Sstevel@tonic-gate  *    documentation and/or other materials provided with the distribution.
477c478bd9Sstevel@tonic-gate  * 3. All advertising materials mentioning features or use of this software
487c478bd9Sstevel@tonic-gate  *    must display the following acknowledgement:
497c478bd9Sstevel@tonic-gate  *	This product includes software developed by the University of
507c478bd9Sstevel@tonic-gate  *	California, Berkeley and its contributors.
517c478bd9Sstevel@tonic-gate  * 4. Neither the name of the University nor the names of its contributors
527c478bd9Sstevel@tonic-gate  *    may be used to endorse or promote products derived from this software
537c478bd9Sstevel@tonic-gate  *    without specific prior written permission.
547c478bd9Sstevel@tonic-gate  *
557c478bd9Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
567c478bd9Sstevel@tonic-gate  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
577c478bd9Sstevel@tonic-gate  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
587c478bd9Sstevel@tonic-gate  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
597c478bd9Sstevel@tonic-gate  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
607c478bd9Sstevel@tonic-gate  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
617c478bd9Sstevel@tonic-gate  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
627c478bd9Sstevel@tonic-gate  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
637c478bd9Sstevel@tonic-gate  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
647c478bd9Sstevel@tonic-gate  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
657c478bd9Sstevel@tonic-gate  * SUCH DAMAGE.
667c478bd9Sstevel@tonic-gate  *
677c478bd9Sstevel@tonic-gate  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
687c478bd9Sstevel@tonic-gate  */
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate #include <sys/types.h>
71ae115bc7Smrj #include <sys/sysmacros.h>
727c478bd9Sstevel@tonic-gate #include <sys/tss.h>
737c478bd9Sstevel@tonic-gate #include <sys/segments.h>
747c478bd9Sstevel@tonic-gate #include <sys/trap.h>
757c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
76ae115bc7Smrj #include <sys/bootconf.h>
777c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h>
78ae115bc7Smrj #include <sys/controlregs.h>
797c478bd9Sstevel@tonic-gate #include <sys/archsystm.h>
807c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
817c478bd9Sstevel@tonic-gate #include <sys/kobj.h>
827c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
837c478bd9Sstevel@tonic-gate #include <sys/reboot.h>
847c478bd9Sstevel@tonic-gate #include <sys/kdi.h>
85ae115bc7Smrj #include <sys/mach_mmu.h>
860baeff3dSrab #include <sys/systm.h>
8774ecdb51SJohn Levon #include <sys/note.h>
88843e1988Sjohnlev 
89843e1988Sjohnlev #ifdef __xpv
90843e1988Sjohnlev #include <sys/hypervisor.h>
91843e1988Sjohnlev #include <vm/as.h>
92843e1988Sjohnlev #endif
93843e1988Sjohnlev 
94ae115bc7Smrj #include <sys/promif.h>
95ae115bc7Smrj #include <sys/bootinfo.h>
96ae115bc7Smrj #include <vm/kboot_mmu.h>
97843e1988Sjohnlev #include <vm/hat_pte.h>
987c478bd9Sstevel@tonic-gate 
997c478bd9Sstevel@tonic-gate /*
1007c478bd9Sstevel@tonic-gate  * cpu0 and default tables and structures.
1017c478bd9Sstevel@tonic-gate  */
102ae115bc7Smrj user_desc_t	*gdt0;
103843e1988Sjohnlev #if !defined(__xpv)
1047c478bd9Sstevel@tonic-gate desctbr_t	gdt0_default_r;
105843e1988Sjohnlev #endif
1067c478bd9Sstevel@tonic-gate 
107027bcc9fSToomas Soome gate_desc_t	*idt0;		/* interrupt descriptor table */
1087c478bd9Sstevel@tonic-gate 
109f16a0f4cSRobert Mustacchi tss_t		*ktss0;			/* kernel task state structure */
1107c478bd9Sstevel@tonic-gate 
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate user_desc_t	zero_udesc;		/* base zero user desc native procs */
113843e1988Sjohnlev user_desc_t	null_udesc;		/* null user descriptor */
114843e1988Sjohnlev system_desc_t	null_sdesc;		/* null system descriptor */
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate user_desc_t	zero_u32desc;		/* 32-bit compatibility procs */
1177c478bd9Sstevel@tonic-gate 
118843e1988Sjohnlev user_desc_t	ucs_on;
119843e1988Sjohnlev user_desc_t	ucs_off;
120843e1988Sjohnlev user_desc_t	ucs32_on;
121843e1988Sjohnlev user_desc_t	ucs32_off;
122843e1988Sjohnlev 
12374ecdb51SJohn Levon /*
12474ecdb51SJohn Levon  * If the size of this is changed, you must update hat_pcp_setup() and the
12574ecdb51SJohn Levon  * definitions in exception.s
12674ecdb51SJohn Levon  */
12774ecdb51SJohn Levon extern char dblfault_stack0[DEFAULTSTKSZ];
12874ecdb51SJohn Levon extern char nmi_stack0[DEFAULTSTKSZ];
12974ecdb51SJohn Levon extern char mce_stack0[DEFAULTSTKSZ];
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate extern void	fast_null(void);
1327c478bd9Sstevel@tonic-gate extern hrtime_t	get_hrtime(void);
1337c478bd9Sstevel@tonic-gate extern hrtime_t	gethrvtime(void);
1347c478bd9Sstevel@tonic-gate extern hrtime_t	get_hrestime(void);
1357c478bd9Sstevel@tonic-gate extern uint64_t	getlgrp(void);
1367c478bd9Sstevel@tonic-gate 
1377c478bd9Sstevel@tonic-gate void (*(fasttable[]))(void) = {
1387c478bd9Sstevel@tonic-gate 	fast_null,			/* T_FNULL routine */
1397c478bd9Sstevel@tonic-gate 	fast_null,			/* T_FGETFP routine (initially null) */
1407c478bd9Sstevel@tonic-gate 	fast_null,			/* T_FSETFP routine (initially null) */
141027bcc9fSToomas Soome 	(void (*)())(uintptr_t)get_hrtime,	/* T_GETHRTIME */
142027bcc9fSToomas Soome 	(void (*)())(uintptr_t)gethrvtime,	/* T_GETHRVTIME */
143027bcc9fSToomas Soome 	(void (*)())(uintptr_t)get_hrestime,	/* T_GETHRESTIME */
144027bcc9fSToomas Soome 	(void (*)())(uintptr_t)getlgrp		/* T_GETLGRP */
1457c478bd9Sstevel@tonic-gate };
1467c478bd9Sstevel@tonic-gate 
1479acbbeafSnn /*
1489acbbeafSnn  * Structure containing pre-computed descriptors to allow us to temporarily
1499acbbeafSnn  * interpose on a standard handler.
1509acbbeafSnn  */
1519acbbeafSnn struct interposing_handler {
1529acbbeafSnn 	int ih_inum;
1539acbbeafSnn 	gate_desc_t ih_interp_desc;
1549acbbeafSnn 	gate_desc_t ih_default_desc;
1559acbbeafSnn };
1569acbbeafSnn 
1579acbbeafSnn /*
1589acbbeafSnn  * The brand infrastructure interposes on two handlers, and we use one as a
1599acbbeafSnn  * NULL signpost.
1609acbbeafSnn  */
161eb5a5c78SSurya Prakki static struct interposing_handler brand_tbl[2];
1629acbbeafSnn 
1637c478bd9Sstevel@tonic-gate /*
1647c478bd9Sstevel@tonic-gate  * software prototypes for default local descriptor table
1657c478bd9Sstevel@tonic-gate  */
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate /*
1687c478bd9Sstevel@tonic-gate  * Routines for loading segment descriptors in format the hardware
1697c478bd9Sstevel@tonic-gate  * can understand.
1707c478bd9Sstevel@tonic-gate  */
1717c478bd9Sstevel@tonic-gate 
1727c478bd9Sstevel@tonic-gate /*
1737c478bd9Sstevel@tonic-gate  * In long mode we have the new L or long mode attribute bit
1747c478bd9Sstevel@tonic-gate  * for code segments. Only the conforming bit in type is used along
1757c478bd9Sstevel@tonic-gate  * with descriptor priority and present bits. Default operand size must
1767c478bd9Sstevel@tonic-gate  * be zero when in long mode. In 32-bit compatibility mode all fields
1777c478bd9Sstevel@tonic-gate  * are treated as in legacy mode. For data segments while in long mode
1787c478bd9Sstevel@tonic-gate  * only the present bit is loaded.
1797c478bd9Sstevel@tonic-gate  */
1807c478bd9Sstevel@tonic-gate void
set_usegd(user_desc_t * dp,uint_t lmode,void * base,uint32_t size,uint_t type,uint_t dpl,uint_t gran,uint_t defopsz)181*075343cbSDan Cross set_usegd(user_desc_t *dp, uint_t lmode, void *base, uint32_t size,
1827c478bd9Sstevel@tonic-gate     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
1837c478bd9Sstevel@tonic-gate {
1847c478bd9Sstevel@tonic-gate 	ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
185a0955b86SJohn Levon 	/* This should never be a "system" segment. */
186a0955b86SJohn Levon 	ASSERT3U(type & SDT_S, !=, 0);
187*075343cbSDan Cross 	ASSERT3P(dp, !=, NULL);
1887c478bd9Sstevel@tonic-gate 
1897c478bd9Sstevel@tonic-gate 	/*
1907c478bd9Sstevel@tonic-gate 	 * 64-bit long mode.
1917c478bd9Sstevel@tonic-gate 	 */
1927c478bd9Sstevel@tonic-gate 	if (lmode == SDP_LONG)
1937c478bd9Sstevel@tonic-gate 		dp->usd_def32 = 0;		/* 32-bit operands only */
1947c478bd9Sstevel@tonic-gate 	else
1957c478bd9Sstevel@tonic-gate 		/*
1967c478bd9Sstevel@tonic-gate 		 * 32-bit compatibility mode.
1977c478bd9Sstevel@tonic-gate 		 */
1987c478bd9Sstevel@tonic-gate 		dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32-bit ops */
1997c478bd9Sstevel@tonic-gate 
200a0955b86SJohn Levon 	/*
201a0955b86SJohn Levon 	 * We should always set the "accessed" bit (SDT_A), otherwise the CPU
202a0955b86SJohn Levon 	 * will write to the GDT whenever we change segment registers around.
203a0955b86SJohn Levon 	 * With KPTI on, the GDT is read-only in the user page table, which
204a0955b86SJohn Levon 	 * causes crashes if we don't set this.
205a0955b86SJohn Levon 	 */
206a0955b86SJohn Levon 	ASSERT3U(type & SDT_A, !=, 0);
207a0955b86SJohn Levon 
2087c478bd9Sstevel@tonic-gate 	dp->usd_long = lmode;	/* 64-bit mode */
2097c478bd9Sstevel@tonic-gate 	dp->usd_type = type;
2107c478bd9Sstevel@tonic-gate 	dp->usd_dpl = dpl;
2117c478bd9Sstevel@tonic-gate 	dp->usd_p = 1;
2127c478bd9Sstevel@tonic-gate 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
2137c478bd9Sstevel@tonic-gate 
2147c478bd9Sstevel@tonic-gate 	dp->usd_lobase = (uintptr_t)base;
2157c478bd9Sstevel@tonic-gate 	dp->usd_midbase = (uintptr_t)base >> 16;
2167c478bd9Sstevel@tonic-gate 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
2177c478bd9Sstevel@tonic-gate 	dp->usd_lolimit = size;
2187c478bd9Sstevel@tonic-gate 	dp->usd_hilimit = (uintptr_t)size >> 16;
2197c478bd9Sstevel@tonic-gate }
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate /*
2227c478bd9Sstevel@tonic-gate  * Install system segment descriptor for LDT and TSS segments.
2237c478bd9Sstevel@tonic-gate  */
2247c478bd9Sstevel@tonic-gate 
2257c478bd9Sstevel@tonic-gate void
set_syssegd(system_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl)2267c478bd9Sstevel@tonic-gate set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
2277c478bd9Sstevel@tonic-gate     uint_t dpl)
2287c478bd9Sstevel@tonic-gate {
2297c478bd9Sstevel@tonic-gate 	dp->ssd_lolimit = size;
2307c478bd9Sstevel@tonic-gate 	dp->ssd_hilimit = (uintptr_t)size >> 16;
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate 	dp->ssd_lobase = (uintptr_t)base;
2337c478bd9Sstevel@tonic-gate 	dp->ssd_midbase = (uintptr_t)base >> 16;
2347c478bd9Sstevel@tonic-gate 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
2357c478bd9Sstevel@tonic-gate 	dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
2367c478bd9Sstevel@tonic-gate 
2377c478bd9Sstevel@tonic-gate 	dp->ssd_type = type;
2387c478bd9Sstevel@tonic-gate 	dp->ssd_zero1 = 0;	/* must be zero */
2397c478bd9Sstevel@tonic-gate 	dp->ssd_zero2 = 0;
2407c478bd9Sstevel@tonic-gate 	dp->ssd_dpl = dpl;
2417c478bd9Sstevel@tonic-gate 	dp->ssd_p = 1;
2427c478bd9Sstevel@tonic-gate 	dp->ssd_gran = 0;	/* force byte units */
2437c478bd9Sstevel@tonic-gate }
2447c478bd9Sstevel@tonic-gate 
245843e1988Sjohnlev void *
get_ssd_base(system_desc_t * dp)246843e1988Sjohnlev get_ssd_base(system_desc_t *dp)
247843e1988Sjohnlev {
248843e1988Sjohnlev 	uintptr_t	base;
249843e1988Sjohnlev 
250843e1988Sjohnlev 	base = (uintptr_t)dp->ssd_lobase |
251843e1988Sjohnlev 	    (uintptr_t)dp->ssd_midbase << 16 |
252843e1988Sjohnlev 	    (uintptr_t)dp->ssd_hibase << (16 + 8) |
253843e1988Sjohnlev 	    (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
254843e1988Sjohnlev 	return ((void *)base);
255843e1988Sjohnlev }
256843e1988Sjohnlev 
2577c478bd9Sstevel@tonic-gate /*
2587c478bd9Sstevel@tonic-gate  * Install gate segment descriptor for interrupt, trap, call and task gates.
25974ecdb51SJohn Levon  *
26074ecdb51SJohn Levon  * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on
26174ecdb51SJohn Levon  * all interrupts.  We have different ISTs for each class of exceptions that are
26274ecdb51SJohn Levon  * most likely to occur while handling an existing exception; while many of
26374ecdb51SJohn Levon  * these are just going to panic, it's nice not to trample on the existing
26474ecdb51SJohn Levon  * exception state for debugging purposes.
26574ecdb51SJohn Levon  *
26674ecdb51SJohn Levon  * Normal interrupts are all redirected unconditionally to the KPTI trampoline
26774ecdb51SJohn Levon  * stack space. This unifies the trampoline handling between user and kernel
26874ecdb51SJohn Levon  * space (and avoids the need to touch %gs).
26974ecdb51SJohn Levon  *
27074ecdb51SJohn Levon  * The KDI IDT *all* uses the DBG IST: consider single stepping tr_pftrap, when
27174ecdb51SJohn Levon  * we do a read from KMDB that cause another #PF.  Without its own IST, this
27274ecdb51SJohn Levon  * would stomp on the kernel's mcpu_kpti_flt frame.
2737c478bd9Sstevel@tonic-gate  */
27474ecdb51SJohn Levon uint_t
idt_vector_to_ist(uint_t vector)27574ecdb51SJohn Levon idt_vector_to_ist(uint_t vector)
2767c478bd9Sstevel@tonic-gate {
27774ecdb51SJohn Levon #if defined(__xpv)
27874ecdb51SJohn Levon 	_NOTE(ARGUNUSED(vector));
27974ecdb51SJohn Levon 	return (IST_NONE);
28074ecdb51SJohn Levon #else
28174ecdb51SJohn Levon 	switch (vector) {
28274ecdb51SJohn Levon 	/* These should always use IST even without KPTI enabled. */
28374ecdb51SJohn Levon 	case T_DBLFLT:
28474ecdb51SJohn Levon 		return (IST_DF);
28574ecdb51SJohn Levon 	case T_NMIFLT:
28674ecdb51SJohn Levon 		return (IST_NMI);
28774ecdb51SJohn Levon 	case T_MCE:
28874ecdb51SJohn Levon 		return (IST_MCE);
28974ecdb51SJohn Levon 
29074ecdb51SJohn Levon 	case T_BPTFLT:
29174ecdb51SJohn Levon 	case T_SGLSTP:
29274ecdb51SJohn Levon 		if (kpti_enable == 1) {
29374ecdb51SJohn Levon 			return (IST_DBG);
29474ecdb51SJohn Levon 		}
29574ecdb51SJohn Levon 		return (IST_NONE);
29674ecdb51SJohn Levon 	case T_STKFLT:
29774ecdb51SJohn Levon 	case T_GPFLT:
29874ecdb51SJohn Levon 	case T_PGFLT:
29974ecdb51SJohn Levon 		if (kpti_enable == 1) {
30074ecdb51SJohn Levon 			return (IST_NESTABLE);
30174ecdb51SJohn Levon 		}
30274ecdb51SJohn Levon 		return (IST_NONE);
30374ecdb51SJohn Levon 	default:
30474ecdb51SJohn Levon 		if (kpti_enable == 1) {
30574ecdb51SJohn Levon 			return (IST_DEFAULT);
30674ecdb51SJohn Levon 		}
30774ecdb51SJohn Levon 		return (IST_NONE);
30874ecdb51SJohn Levon 	}
309843e1988Sjohnlev #endif
3107c478bd9Sstevel@tonic-gate }
3117c478bd9Sstevel@tonic-gate 
3127c478bd9Sstevel@tonic-gate void
set_gatesegd(gate_desc_t * dp,void (* func)(void),selector_t sel,uint_t type,uint_t dpl,uint_t ist)3137c478bd9Sstevel@tonic-gate set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
31474ecdb51SJohn Levon     uint_t type, uint_t dpl, uint_t ist)
3157c478bd9Sstevel@tonic-gate {
3167c478bd9Sstevel@tonic-gate 	dp->sgd_looffset = (uintptr_t)func;
3177c478bd9Sstevel@tonic-gate 	dp->sgd_hioffset = (uintptr_t)func >> 16;
31874ecdb51SJohn Levon 	dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
3197c478bd9Sstevel@tonic-gate 	dp->sgd_selector =  (uint16_t)sel;
32074ecdb51SJohn Levon 	dp->sgd_ist = ist;
3217c478bd9Sstevel@tonic-gate 	dp->sgd_type = type;
3227c478bd9Sstevel@tonic-gate 	dp->sgd_dpl = dpl;
3237c478bd9Sstevel@tonic-gate 	dp->sgd_p = 1;
3247c478bd9Sstevel@tonic-gate }
3257c478bd9Sstevel@tonic-gate 
326843e1988Sjohnlev /*
327843e1988Sjohnlev  * Updates a single user descriptor in the the GDT of the current cpu.
328843e1988Sjohnlev  * Caller is responsible for preventing cpu migration.
329843e1988Sjohnlev  */
330843e1988Sjohnlev 
331843e1988Sjohnlev void
gdt_update_usegd(uint_t sidx,user_desc_t * udp)332843e1988Sjohnlev gdt_update_usegd(uint_t sidx, user_desc_t *udp)
333843e1988Sjohnlev {
334a0955b86SJohn Levon #if defined(DEBUG)
335a0955b86SJohn Levon 	/* This should never be a "system" segment, but it might be null. */
336a0955b86SJohn Levon 	if (udp->usd_p != 0 || udp->usd_type != 0) {
337a0955b86SJohn Levon 		ASSERT3U(udp->usd_type & SDT_S, !=, 0);
338a0955b86SJohn Levon 	}
339a0955b86SJohn Levon 	/*
340a0955b86SJohn Levon 	 * We should always set the "accessed" bit (SDT_A), otherwise the CPU
341a0955b86SJohn Levon 	 * will write to the GDT whenever we change segment registers around.
342a0955b86SJohn Levon 	 * With KPTI on, the GDT is read-only in the user page table, which
343a0955b86SJohn Levon 	 * causes crashes if we don't set this.
344a0955b86SJohn Levon 	 */
345a0955b86SJohn Levon 	if (udp->usd_p != 0 || udp->usd_type != 0) {
346a0955b86SJohn Levon 		ASSERT3U(udp->usd_type & SDT_A, !=, 0);
347a0955b86SJohn Levon 	}
348a0955b86SJohn Levon #endif
349843e1988Sjohnlev 
350a0955b86SJohn Levon #if defined(__xpv)
351843e1988Sjohnlev 	uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
352843e1988Sjohnlev 
353843e1988Sjohnlev 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
354843e1988Sjohnlev 		panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
355843e1988Sjohnlev 
356843e1988Sjohnlev #else	/* __xpv */
357843e1988Sjohnlev 	CPU->cpu_gdt[sidx] = *udp;
358843e1988Sjohnlev #endif	/* __xpv */
359843e1988Sjohnlev }
360843e1988Sjohnlev 
361843e1988Sjohnlev /*
362843e1988Sjohnlev  * Writes single descriptor pointed to by udp into a processes
363843e1988Sjohnlev  * LDT entry pointed to by ldp.
364843e1988Sjohnlev  */
365843e1988Sjohnlev int
ldt_update_segd(user_desc_t * ldp,user_desc_t * udp)366843e1988Sjohnlev ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
367843e1988Sjohnlev {
368a0955b86SJohn Levon #if defined(DEBUG)
369a0955b86SJohn Levon 	/* This should never be a "system" segment, but it might be null. */
370a0955b86SJohn Levon 	if (udp->usd_p != 0 || udp->usd_type != 0) {
371a0955b86SJohn Levon 		ASSERT3U(udp->usd_type & SDT_S, !=, 0);
372a0955b86SJohn Levon 	}
373a0955b86SJohn Levon 	/*
374a0955b86SJohn Levon 	 * We should always set the "accessed" bit (SDT_A), otherwise the CPU
375a0955b86SJohn Levon 	 * will write to the LDT whenever we change segment registers around.
376a0955b86SJohn Levon 	 * With KPTI on, the LDT is read-only in the user page table, which
377a0955b86SJohn Levon 	 * causes crashes if we don't set this.
378a0955b86SJohn Levon 	 */
379a0955b86SJohn Levon 	if (udp->usd_p != 0 || udp->usd_type != 0) {
380a0955b86SJohn Levon 		ASSERT3U(udp->usd_type & SDT_A, !=, 0);
381a0955b86SJohn Levon 	}
382a0955b86SJohn Levon #endif
383843e1988Sjohnlev 
384a0955b86SJohn Levon #if defined(__xpv)
385843e1988Sjohnlev 	uint64_t dpa;
386843e1988Sjohnlev 
387843e1988Sjohnlev 	dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
388843e1988Sjohnlev 	    ((uintptr_t)ldp & PAGEOFFSET);
389843e1988Sjohnlev 
390843e1988Sjohnlev 	/*
391843e1988Sjohnlev 	 * The hypervisor is a little more restrictive about what it
392843e1988Sjohnlev 	 * supports in the LDT.
393843e1988Sjohnlev 	 */
394843e1988Sjohnlev 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
395843e1988Sjohnlev 		return (EINVAL);
396843e1988Sjohnlev 
397843e1988Sjohnlev #else	/* __xpv */
398843e1988Sjohnlev 	*ldp = *udp;
399843e1988Sjohnlev 
400843e1988Sjohnlev #endif	/* __xpv */
401843e1988Sjohnlev 	return (0);
402843e1988Sjohnlev }
403843e1988Sjohnlev 
404843e1988Sjohnlev #if defined(__xpv)
405843e1988Sjohnlev 
406843e1988Sjohnlev /*
407843e1988Sjohnlev  * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
408843e1988Sjohnlev  * Returns true if a valid entry was written.
409843e1988Sjohnlev  */
410843e1988Sjohnlev int
xen_idt_to_trap_info(uint_t vec,gate_desc_t * sgd,void * ti_arg)411843e1988Sjohnlev xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
412843e1988Sjohnlev {
413843e1988Sjohnlev 	trap_info_t *ti = ti_arg;	/* XXPV	Aargh - segments.h comment */
414843e1988Sjohnlev 
415843e1988Sjohnlev 	/*
416843e1988Sjohnlev 	 * skip holes in the IDT
417843e1988Sjohnlev 	 */
418843e1988Sjohnlev 	if (GATESEG_GETOFFSET(sgd) == 0)
419843e1988Sjohnlev 		return (0);
420843e1988Sjohnlev 
421843e1988Sjohnlev 	ASSERT(sgd->sgd_type == SDT_SYSIGT);
422843e1988Sjohnlev 	ti->vector = vec;
423843e1988Sjohnlev 	TI_SET_DPL(ti, sgd->sgd_dpl);
424843e1988Sjohnlev 
425843e1988Sjohnlev 	/*
426843e1988Sjohnlev 	 * Is this an interrupt gate?
427843e1988Sjohnlev 	 */
428843e1988Sjohnlev 	if (sgd->sgd_type == SDT_SYSIGT) {
429843e1988Sjohnlev 		/* LINTED */
430843e1988Sjohnlev 		TI_SET_IF(ti, 1);
431843e1988Sjohnlev 	}
432843e1988Sjohnlev 	ti->cs = sgd->sgd_selector;
433843e1988Sjohnlev 	ti->cs |= SEL_KPL;	/* force into ring 3. see KCS_SEL  */
434843e1988Sjohnlev 	ti->address = GATESEG_GETOFFSET(sgd);
435843e1988Sjohnlev 	return (1);
436843e1988Sjohnlev }
437843e1988Sjohnlev 
438843e1988Sjohnlev /*
439843e1988Sjohnlev  * Convert a single hw format gate descriptor and write it into our virtual IDT.
440843e1988Sjohnlev  */
441843e1988Sjohnlev void
xen_idt_write(gate_desc_t * sgd,uint_t vec)442843e1988Sjohnlev xen_idt_write(gate_desc_t *sgd, uint_t vec)
443843e1988Sjohnlev {
444843e1988Sjohnlev 	trap_info_t trapinfo[2];
445843e1988Sjohnlev 
446843e1988Sjohnlev 	bzero(trapinfo, sizeof (trapinfo));
447843e1988Sjohnlev 	if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
448843e1988Sjohnlev 		return;
449843e1988Sjohnlev 	if (xen_set_trap_table(trapinfo) != 0)
450843e1988Sjohnlev 		panic("xen_idt_write: xen_set_trap_table() failed");
451843e1988Sjohnlev }
452843e1988Sjohnlev 
453843e1988Sjohnlev #endif	/* __xpv */
454843e1988Sjohnlev 
4557c478bd9Sstevel@tonic-gate 
4567c478bd9Sstevel@tonic-gate /*
4577c478bd9Sstevel@tonic-gate  * Build kernel GDT.
4587c478bd9Sstevel@tonic-gate  */
4597c478bd9Sstevel@tonic-gate 
4607c478bd9Sstevel@tonic-gate static void
init_gdt_common(user_desc_t * gdt)461ae115bc7Smrj init_gdt_common(user_desc_t *gdt)
4627c478bd9Sstevel@tonic-gate {
463ae115bc7Smrj 	int i;
4647c478bd9Sstevel@tonic-gate 
465*075343cbSDan Cross 	ASSERT3P(gdt, !=, NULL);
466*075343cbSDan Cross 
467*075343cbSDan Cross 	init_boot_gdt(gdt);
468*075343cbSDan Cross 
4697c478bd9Sstevel@tonic-gate 	/*
4707c478bd9Sstevel@tonic-gate 	 * 64-bit kernel code segment.
4717c478bd9Sstevel@tonic-gate 	 */
472ae115bc7Smrj 	set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
4737c478bd9Sstevel@tonic-gate 	    SDP_PAGES, SDP_OP32);
4747c478bd9Sstevel@tonic-gate 
4757c478bd9Sstevel@tonic-gate 	/*
4767c478bd9Sstevel@tonic-gate 	 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
477*075343cbSDan Cross 	 * mode, but we set it here to SDP_LIMIT_MAX so that we can use the
478*075343cbSDan Cross 	 * SYSRET instruction to return from system calls back to 32-bit
479*075343cbSDan Cross 	 * applications.  SYSRET doesn't update the base, limit, or attributes
480*075343cbSDan Cross 	 * of %ss or %ds descriptors. We therefore must ensure that the kernel
481*075343cbSDan Cross 	 * uses something, though it will be ignored by hardware, that is
482*075343cbSDan Cross 	 * compatible with 32-bit apps. For the same reason we must set the
483*075343cbSDan Cross 	 * default op size of this descriptor to 32-bit operands.
4847c478bd9Sstevel@tonic-gate 	 */
485*075343cbSDan Cross 	set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
4867c478bd9Sstevel@tonic-gate 	    SEL_KPL, SDP_PAGES, SDP_OP32);
487ae115bc7Smrj 	gdt[GDT_KDATA].usd_def32 = 1;
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate 	/*
4907c478bd9Sstevel@tonic-gate 	 * 64-bit user code segment.
4917c478bd9Sstevel@tonic-gate 	 */
492ae115bc7Smrj 	set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
4937c478bd9Sstevel@tonic-gate 	    SDP_PAGES, SDP_OP32);
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate 	/*
4967c478bd9Sstevel@tonic-gate 	 * 32-bit user code segment.
4977c478bd9Sstevel@tonic-gate 	 */
498*075343cbSDan Cross 	set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMERA,
4997c478bd9Sstevel@tonic-gate 	    SEL_UPL, SDP_PAGES, SDP_OP32);
5007c478bd9Sstevel@tonic-gate 
501843e1988Sjohnlev 	/*
502843e1988Sjohnlev 	 * See gdt_ucode32() and gdt_ucode_native().
503843e1988Sjohnlev 	 */
504843e1988Sjohnlev 	ucs_on = ucs_off = gdt[GDT_UCODE];
505843e1988Sjohnlev 	ucs_off.usd_p = 0;	/* forces #np fault */
506843e1988Sjohnlev 
507843e1988Sjohnlev 	ucs32_on = ucs32_off = gdt[GDT_U32CODE];
508843e1988Sjohnlev 	ucs32_off.usd_p = 0;	/* forces #np fault */
509843e1988Sjohnlev 
5107c478bd9Sstevel@tonic-gate 	/*
5117c478bd9Sstevel@tonic-gate 	 * 32 and 64 bit data segments can actually share the same descriptor.
5127c478bd9Sstevel@tonic-gate 	 * In long mode only the present bit is checked but all other fields
5137c478bd9Sstevel@tonic-gate 	 * are loaded. But in compatibility mode all fields are interpreted
5147c478bd9Sstevel@tonic-gate 	 * as in legacy mode so they must be set correctly for a 32-bit data
5157c478bd9Sstevel@tonic-gate 	 * segment.
5167c478bd9Sstevel@tonic-gate 	 */
517*075343cbSDan Cross 	set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
518*075343cbSDan Cross 	    SEL_UPL, SDP_PAGES, SDP_OP32);
5197c478bd9Sstevel@tonic-gate 
520843e1988Sjohnlev #if !defined(__xpv)
521843e1988Sjohnlev 
5227c478bd9Sstevel@tonic-gate 	/*
5230baeff3dSrab 	 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
5240baeff3dSrab 	 * in the GDT is 0.
5257c478bd9Sstevel@tonic-gate 	 */
5267c478bd9Sstevel@tonic-gate 
5277c478bd9Sstevel@tonic-gate 	/*
5287c478bd9Sstevel@tonic-gate 	 * Kernel TSS
5297c478bd9Sstevel@tonic-gate 	 */
5300cfdb603Sjosephb 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
5310cfdb603Sjosephb 	    sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
5327c478bd9Sstevel@tonic-gate 
533843e1988Sjohnlev #endif	/* !__xpv */
534843e1988Sjohnlev 
5357c478bd9Sstevel@tonic-gate 	/*
5367c478bd9Sstevel@tonic-gate 	 * Initialize fs and gs descriptors for 32 bit processes.
5377c478bd9Sstevel@tonic-gate 	 * Only attributes and limits are initialized, the effective
5387c478bd9Sstevel@tonic-gate 	 * base address is programmed via fsbase/gsbase.
5397c478bd9Sstevel@tonic-gate 	 */
540*075343cbSDan Cross 	set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
5417c478bd9Sstevel@tonic-gate 	    SEL_UPL, SDP_PAGES, SDP_OP32);
542*075343cbSDan Cross 	set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
5437c478bd9Sstevel@tonic-gate 	    SEL_UPL, SDP_PAGES, SDP_OP32);
5447c478bd9Sstevel@tonic-gate 
5459acbbeafSnn 	/*
5469acbbeafSnn 	 * Initialize the descriptors set aside for brand usage.
5479acbbeafSnn 	 * Only attributes and limits are initialized.
5489acbbeafSnn 	 */
5499acbbeafSnn 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
550*075343cbSDan Cross 		set_usegd(&gdt0[i], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
5519acbbeafSnn 		    SEL_UPL, SDP_PAGES, SDP_OP32);
5529acbbeafSnn 
5537c478bd9Sstevel@tonic-gate 	/*
5547c478bd9Sstevel@tonic-gate 	 * Initialize convenient zero base user descriptors for clearing
5557c478bd9Sstevel@tonic-gate 	 * lwp private %fs and %gs descriptors in GDT. See setregs() for
5567c478bd9Sstevel@tonic-gate 	 * an example.
5577c478bd9Sstevel@tonic-gate 	 */
5587c478bd9Sstevel@tonic-gate 	set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
5597c478bd9Sstevel@tonic-gate 	    SDP_BYTES, SDP_OP32);
560*075343cbSDan Cross 	set_usegd(&zero_u32desc, SDP_SHORT, 0, SDP_LIMIT_MAX, SDT_MEMRWA,
561*075343cbSDan Cross 	    SEL_UPL, SDP_PAGES, SDP_OP32);
5627c478bd9Sstevel@tonic-gate }
5637c478bd9Sstevel@tonic-gate 
564843e1988Sjohnlev #if defined(__xpv)
565843e1988Sjohnlev 
566843e1988Sjohnlev static user_desc_t *
init_gdt(void)567843e1988Sjohnlev init_gdt(void)
568843e1988Sjohnlev {
569843e1988Sjohnlev 	uint64_t gdtpa;
570843e1988Sjohnlev 	ulong_t ma[1];		/* XXPV should be a memory_t */
571843e1988Sjohnlev 	ulong_t addr;
572843e1988Sjohnlev 
573843e1988Sjohnlev #if !defined(__lint)
574843e1988Sjohnlev 	/*
575843e1988Sjohnlev 	 * Our gdt is never larger than a single page.
576843e1988Sjohnlev 	 */
577843e1988Sjohnlev 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
578843e1988Sjohnlev #endif
579843e1988Sjohnlev 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
580843e1988Sjohnlev 	    PAGESIZE, PAGESIZE);
581*075343cbSDan Cross 	ASSERT3P(gdt0, !=, NULL);
582843e1988Sjohnlev 	bzero(gdt0, PAGESIZE);
583843e1988Sjohnlev 
584843e1988Sjohnlev 	init_gdt_common(gdt0);
585843e1988Sjohnlev 
586843e1988Sjohnlev 	/*
587843e1988Sjohnlev 	 * XXX Since we never invoke kmdb until after the kernel takes
588843e1988Sjohnlev 	 * over the descriptor tables why not have it use the kernel's
589843e1988Sjohnlev 	 * selectors?
590843e1988Sjohnlev 	 */
591843e1988Sjohnlev 	if (boothowto & RB_DEBUG) {
592*075343cbSDan Cross 		set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, SDP_LIMIT_MAX,
593*075343cbSDan Cross 		    SDT_MEMRWA, SEL_KPL, SDP_PAGES, SDP_OP32);
594*075343cbSDan Cross 		set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, SDP_LIMIT_MAX,
595*075343cbSDan Cross 		    SDT_MEMERA, SEL_KPL, SDP_PAGES, SDP_OP32);
596843e1988Sjohnlev 	}
597843e1988Sjohnlev 
598843e1988Sjohnlev 	/*
599843e1988Sjohnlev 	 * Clear write permission for page containing the gdt and install it.
600843e1988Sjohnlev 	 */
601843e1988Sjohnlev 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
602843e1988Sjohnlev 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
603843e1988Sjohnlev 	kbm_read_only((uintptr_t)gdt0, gdtpa);
604843e1988Sjohnlev 	xen_set_gdt(ma, NGDT);
605843e1988Sjohnlev 
606843e1988Sjohnlev 	/*
607843e1988Sjohnlev 	 * Reload the segment registers to use the new GDT.
608843e1988Sjohnlev 	 * On 64-bit, fixup KCS_SEL to be in ring 3.
609843e1988Sjohnlev 	 * See KCS_SEL in segments.h.
610843e1988Sjohnlev 	 */
611843e1988Sjohnlev 	load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
612843e1988Sjohnlev 
613843e1988Sjohnlev 	/*
614843e1988Sjohnlev 	 *  setup %gs for kernel
615843e1988Sjohnlev 	 */
616843e1988Sjohnlev 	xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
617843e1988Sjohnlev 
618843e1988Sjohnlev 	/*
619843e1988Sjohnlev 	 * XX64 We should never dereference off "other gsbase" or
620843e1988Sjohnlev 	 * "fsbase".  So, we should arrange to point FSBASE and
621843e1988Sjohnlev 	 * KGSBASE somewhere truly awful e.g. point it at the last
622843e1988Sjohnlev 	 * valid address below the hole so that any attempts to index
623843e1988Sjohnlev 	 * off them cause an exception.
624843e1988Sjohnlev 	 *
625843e1988Sjohnlev 	 * For now, point it at 8G -- at least it should be unmapped
626843e1988Sjohnlev 	 * until some 64-bit processes run.
627843e1988Sjohnlev 	 */
628843e1988Sjohnlev 	addr = 0x200000000ul;
629843e1988Sjohnlev 	xen_set_segment_base(SEGBASE_FS, addr);
630843e1988Sjohnlev 	xen_set_segment_base(SEGBASE_GS_USER, addr);
631843e1988Sjohnlev 	xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
632843e1988Sjohnlev 
633843e1988Sjohnlev 	return (gdt0);
634843e1988Sjohnlev }
635843e1988Sjohnlev 
636843e1988Sjohnlev #else	/* __xpv */
637843e1988Sjohnlev 
638ae115bc7Smrj static user_desc_t *
init_gdt(void)6397c478bd9Sstevel@tonic-gate init_gdt(void)
6407c478bd9Sstevel@tonic-gate {
641*075343cbSDan Cross 	desctbr_t	r_gdt;
6427c478bd9Sstevel@tonic-gate 
643ae115bc7Smrj #if !defined(__lint)
6447c478bd9Sstevel@tonic-gate 	/*
645ae115bc7Smrj 	 * Our gdt is never larger than a single page.
646ae115bc7Smrj 	 */
647ae115bc7Smrj 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
648ae115bc7Smrj #endif
649ae115bc7Smrj 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
650ae115bc7Smrj 	    PAGESIZE, PAGESIZE);
651ae115bc7Smrj 	bzero(gdt0, PAGESIZE);
652ae115bc7Smrj 
653ae115bc7Smrj 	init_gdt_common(gdt0);
654ae115bc7Smrj 
655ae115bc7Smrj 	/*
656ae115bc7Smrj 	 * Install our new GDT
657ae115bc7Smrj 	 */
658ae115bc7Smrj 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
659ae115bc7Smrj 	r_gdt.dtr_base = (uintptr_t)gdt0;
660ae115bc7Smrj 	wr_gdtr(&r_gdt);
661ae115bc7Smrj 
662ae115bc7Smrj 	/*
663ae115bc7Smrj 	 * Reload the segment registers to use the new GDT
664ae115bc7Smrj 	 */
665ae115bc7Smrj 	load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
666ae115bc7Smrj 
667ae115bc7Smrj 	/*
668ae115bc7Smrj 	 *  setup %gs for kernel
669ae115bc7Smrj 	 */
670ae115bc7Smrj 	wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
671ae115bc7Smrj 
672ae115bc7Smrj 	/*
673ae115bc7Smrj 	 * XX64 We should never dereference off "other gsbase" or
674ae115bc7Smrj 	 * "fsbase".  So, we should arrange to point FSBASE and
675ae115bc7Smrj 	 * KGSBASE somewhere truly awful e.g. point it at the last
676ae115bc7Smrj 	 * valid address below the hole so that any attempts to index
677ae115bc7Smrj 	 * off them cause an exception.
678ae115bc7Smrj 	 *
679ae115bc7Smrj 	 * For now, point it at 8G -- at least it should be unmapped
680ae115bc7Smrj 	 * until some 64-bit processes run.
681ae115bc7Smrj 	 */
682ae115bc7Smrj 	wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
683ae115bc7Smrj 	wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
684ae115bc7Smrj 	return (gdt0);
685ae115bc7Smrj }
686ae115bc7Smrj 
687843e1988Sjohnlev #endif	/* __xpv */
688843e1988Sjohnlev 
6897c478bd9Sstevel@tonic-gate 
6907c478bd9Sstevel@tonic-gate /*
6917c478bd9Sstevel@tonic-gate  * Build kernel IDT.
6927c478bd9Sstevel@tonic-gate  *
693ae115bc7Smrj  * Note that for amd64 we pretty much require every gate to be an interrupt
694ae115bc7Smrj  * gate which blocks interrupts atomically on entry; that's because of our
695ae115bc7Smrj  * dependency on using 'swapgs' every time we come into the kernel to find
696ae115bc7Smrj  * the cpu structure. If we get interrupted just before doing that, %cs could
697ae115bc7Smrj  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
698ae115bc7Smrj  * %gsbase is really still pointing at something in userland. Bad things will
699ae115bc7Smrj  * ensue. We also use interrupt gates for i386 as well even though this is not
700ae115bc7Smrj  * required for some traps.
7017c478bd9Sstevel@tonic-gate  *
7027c478bd9Sstevel@tonic-gate  * Perhaps they should have invented a trap gate that does an atomic swapgs?
7037c478bd9Sstevel@tonic-gate  */
7047c478bd9Sstevel@tonic-gate static void
init_idt_common(gate_desc_t * idt)705ae115bc7Smrj init_idt_common(gate_desc_t *idt)
7067c478bd9Sstevel@tonic-gate {
70774ecdb51SJohn Levon 	set_gatesegd(&idt[T_ZERODIV],
70874ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_div0trap : &div0trap,
70974ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ZERODIV));
71074ecdb51SJohn Levon 	set_gatesegd(&idt[T_SGLSTP],
71174ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_dbgtrap : &dbgtrap,
71274ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SGLSTP));
71374ecdb51SJohn Levon 	set_gatesegd(&idt[T_NMIFLT],
71474ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_nmiint : &nmiint,
71574ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NMIFLT));
71674ecdb51SJohn Levon 	set_gatesegd(&idt[T_BPTFLT],
71774ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_brktrap : &brktrap,
71874ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_BPTFLT));
71974ecdb51SJohn Levon 	set_gatesegd(&idt[T_OVFLW],
72074ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_ovflotrap : &ovflotrap,
72174ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_OVFLW));
72274ecdb51SJohn Levon 	set_gatesegd(&idt[T_BOUNDFLT],
72374ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_boundstrap : &boundstrap,
72474ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_BOUNDFLT));
72574ecdb51SJohn Levon 	set_gatesegd(&idt[T_ILLINST],
72674ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_invoptrap : &invoptrap,
72774ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ILLINST));
72874ecdb51SJohn Levon 	set_gatesegd(&idt[T_NOEXTFLT],
72974ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_ndptrap : &ndptrap,
73074ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NOEXTFLT));
7317c478bd9Sstevel@tonic-gate 
7327c478bd9Sstevel@tonic-gate 	/*
7337c478bd9Sstevel@tonic-gate 	 * double fault handler.
734843e1988Sjohnlev 	 *
735843e1988Sjohnlev 	 * Note that on the hypervisor a guest does not receive #df faults.
736843e1988Sjohnlev 	 * Instead a failsafe event is injected into the guest if its selectors
737843e1988Sjohnlev 	 * and/or stack is in a broken state. See xen_failsafe_callback.
7387c478bd9Sstevel@tonic-gate 	 */
739843e1988Sjohnlev #if !defined(__xpv)
74074ecdb51SJohn Levon 	set_gatesegd(&idt[T_DBLFLT],
74174ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_syserrtrap : &syserrtrap,
74274ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_DBLFLT));
743843e1988Sjohnlev #endif	/* !__xpv */
7447c478bd9Sstevel@tonic-gate 
7457c478bd9Sstevel@tonic-gate 	/*
746ae115bc7Smrj 	 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
7477c478bd9Sstevel@tonic-gate 	 */
74874ecdb51SJohn Levon 	set_gatesegd(&idt[T_TSSFLT],
74974ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_invtsstrap : &invtsstrap,
75074ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_TSSFLT));
75174ecdb51SJohn Levon 	set_gatesegd(&idt[T_SEGFLT],
75274ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_segnptrap : &segnptrap,
75374ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SEGFLT));
75474ecdb51SJohn Levon 	set_gatesegd(&idt[T_STKFLT],
75574ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_stktrap : &stktrap,
75674ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_STKFLT));
75774ecdb51SJohn Levon 	set_gatesegd(&idt[T_GPFLT],
75874ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_gptrap : &gptrap,
75974ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_GPFLT));
76074ecdb51SJohn Levon 	set_gatesegd(&idt[T_PGFLT],
76174ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_pftrap : &pftrap,
76274ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_PGFLT));
76374ecdb51SJohn Levon 	set_gatesegd(&idt[T_EXTERRFLT],
76474ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_ndperr : &ndperr,
76574ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_EXTERRFLT));
76674ecdb51SJohn Levon 	set_gatesegd(&idt[T_ALIGNMENT],
76774ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_achktrap : &achktrap,
76874ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ALIGNMENT));
76974ecdb51SJohn Levon 	set_gatesegd(&idt[T_MCE],
77074ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_mcetrap : &mcetrap,
77174ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_MCE));
77274ecdb51SJohn Levon 	set_gatesegd(&idt[T_SIMDFPE],
77374ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_xmtrap : &xmtrap,
77474ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE));
7757c478bd9Sstevel@tonic-gate 
7767c478bd9Sstevel@tonic-gate 	/*
7777c478bd9Sstevel@tonic-gate 	 * install fast trap handler at 210.
7787c478bd9Sstevel@tonic-gate 	 */
77974ecdb51SJohn Levon 	set_gatesegd(&idt[T_FASTTRAP],
78074ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_fasttrap : &fasttrap,
78174ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_FASTTRAP));
7827c478bd9Sstevel@tonic-gate 
7837c478bd9Sstevel@tonic-gate 	/*
7847c478bd9Sstevel@tonic-gate 	 * System call handler.
7857c478bd9Sstevel@tonic-gate 	 */
78674ecdb51SJohn Levon 	set_gatesegd(&idt[T_SYSCALLINT],
78774ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_sys_syscall_int : &sys_syscall_int,
78874ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT));
7897c478bd9Sstevel@tonic-gate 
7907c478bd9Sstevel@tonic-gate 	/*
791f498645aSahl 	 * Install the DTrace interrupt handler for the pid provider.
7927c478bd9Sstevel@tonic-gate 	 */
79374ecdb51SJohn Levon 	set_gatesegd(&idt[T_DTRACE_RET],
79474ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_dtrace_ret : &dtrace_ret,
79574ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET));
7967c478bd9Sstevel@tonic-gate 
7979acbbeafSnn 	/*
798eb5a5c78SSurya Prakki 	 * Prepare interposing descriptor for the syscall handler
799eb5a5c78SSurya Prakki 	 * and cache copy of the default descriptor.
8009acbbeafSnn 	 */
801eb5a5c78SSurya Prakki 	brand_tbl[0].ih_inum = T_SYSCALLINT;
802eb5a5c78SSurya Prakki 	brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
803ae115bc7Smrj 
80474ecdb51SJohn Levon 	set_gatesegd(&(brand_tbl[0].ih_interp_desc),
80574ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_brand_sys_syscall_int :
80674ecdb51SJohn Levon 	    &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL,
80774ecdb51SJohn Levon 	    idt_vector_to_ist(T_SYSCALLINT));
8089acbbeafSnn 
809eb5a5c78SSurya Prakki 	brand_tbl[1].ih_inum = 0;
8107c478bd9Sstevel@tonic-gate }
8117c478bd9Sstevel@tonic-gate 
812843e1988Sjohnlev #if defined(__xpv)
813843e1988Sjohnlev 
814843e1988Sjohnlev static void
init_idt(gate_desc_t * idt)815843e1988Sjohnlev init_idt(gate_desc_t *idt)
816843e1988Sjohnlev {
817843e1988Sjohnlev 	init_idt_common(idt);
818843e1988Sjohnlev }
819843e1988Sjohnlev 
820843e1988Sjohnlev #else	/* __xpv */
821843e1988Sjohnlev 
8227c478bd9Sstevel@tonic-gate static void
init_idt(gate_desc_t * idt)823ae115bc7Smrj init_idt(gate_desc_t *idt)
8247c478bd9Sstevel@tonic-gate {
8257c478bd9Sstevel@tonic-gate 	char	ivctname[80];
8267c478bd9Sstevel@tonic-gate 	void	(*ivctptr)(void);
8277c478bd9Sstevel@tonic-gate 	int	i;
8287c478bd9Sstevel@tonic-gate 
8297c478bd9Sstevel@tonic-gate 	/*
8307c478bd9Sstevel@tonic-gate 	 * Initialize entire table with 'reserved' trap and then overwrite
8317c478bd9Sstevel@tonic-gate 	 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
8327c478bd9Sstevel@tonic-gate 	 * since it can only be generated on a 386 processor. 15 is also
8337c478bd9Sstevel@tonic-gate 	 * unsupported and reserved.
8347c478bd9Sstevel@tonic-gate 	 */
83574ecdb51SJohn Levon 	for (i = 0; i < NIDT; i++) {
83674ecdb51SJohn Levon 		set_gatesegd(&idt[i],
83774ecdb51SJohn Levon 		    (kpti_enable == 1) ? &tr_resvtrap : &resvtrap,
83874ecdb51SJohn Levon 		    KCS_SEL, SDT_SYSIGT, TRP_KPL,
83974ecdb51SJohn Levon 		    idt_vector_to_ist(T_RESVTRAP));
84074ecdb51SJohn Levon 	}
8417c478bd9Sstevel@tonic-gate 
8427c478bd9Sstevel@tonic-gate 	/*
8437c478bd9Sstevel@tonic-gate 	 * 20-31 reserved
8447c478bd9Sstevel@tonic-gate 	 */
84574ecdb51SJohn Levon 	for (i = 20; i < 32; i++) {
84674ecdb51SJohn Levon 		set_gatesegd(&idt[i],
84774ecdb51SJohn Levon 		    (kpti_enable == 1) ? &tr_invaltrap : &invaltrap,
84874ecdb51SJohn Levon 		    KCS_SEL, SDT_SYSIGT, TRP_KPL,
84974ecdb51SJohn Levon 		    idt_vector_to_ist(T_INVALTRAP));
85074ecdb51SJohn Levon 	}
8517c478bd9Sstevel@tonic-gate 
8527c478bd9Sstevel@tonic-gate 	/*
8537c478bd9Sstevel@tonic-gate 	 * interrupts 32 - 255
8547c478bd9Sstevel@tonic-gate 	 */
8557c478bd9Sstevel@tonic-gate 	for (i = 32; i < 256; i++) {
85674ecdb51SJohn Levon 		(void) snprintf(ivctname, sizeof (ivctname),
85774ecdb51SJohn Levon 		    (kpti_enable == 1) ? "tr_ivct%d" : "ivct%d", i);
8587c478bd9Sstevel@tonic-gate 		ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
8597c478bd9Sstevel@tonic-gate 		if (ivctptr == NULL)
8607c478bd9Sstevel@tonic-gate 			panic("kobj_getsymvalue(%s) failed", ivctname);
8617c478bd9Sstevel@tonic-gate 
86274ecdb51SJohn Levon 		set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
86374ecdb51SJohn Levon 		    idt_vector_to_ist(i));
8647c478bd9Sstevel@tonic-gate 	}
8657c478bd9Sstevel@tonic-gate 
8669acbbeafSnn 	/*
867ae115bc7Smrj 	 * Now install the common ones. Note that it will overlay some
868ae115bc7Smrj 	 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
8697c478bd9Sstevel@tonic-gate 	 */
870ae115bc7Smrj 	init_idt_common(idt);
8717c478bd9Sstevel@tonic-gate }
8727c478bd9Sstevel@tonic-gate 
873843e1988Sjohnlev #endif	/* __xpv */
874843e1988Sjohnlev 
8757c478bd9Sstevel@tonic-gate /*
8760baeff3dSrab  * The kernel does not deal with LDTs unless a user explicitly creates
8770baeff3dSrab  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
8780baeff3dSrab  * to reference the LDT will therefore cause a #gp. System calls made via the
8790baeff3dSrab  * obsolete lcall mechanism are emulated by the #gp fault handler.
8807c478bd9Sstevel@tonic-gate  */
8817c478bd9Sstevel@tonic-gate static void
init_ldt(void)8827c478bd9Sstevel@tonic-gate init_ldt(void)
8837c478bd9Sstevel@tonic-gate {
884843e1988Sjohnlev #if defined(__xpv)
885843e1988Sjohnlev 	xen_set_ldt(NULL, 0);
886843e1988Sjohnlev #else
8870baeff3dSrab 	wr_ldtr(0);
888843e1988Sjohnlev #endif
8897c478bd9Sstevel@tonic-gate }
8907c478bd9Sstevel@tonic-gate 
891843e1988Sjohnlev #if !defined(__xpv)
8927c478bd9Sstevel@tonic-gate 
8937c478bd9Sstevel@tonic-gate static void
init_tss(void)8947c478bd9Sstevel@tonic-gate init_tss(void)
8957c478bd9Sstevel@tonic-gate {
89674ecdb51SJohn Levon 	extern struct cpu cpus[];
8977c478bd9Sstevel@tonic-gate 
8987c478bd9Sstevel@tonic-gate 	/*
89974ecdb51SJohn Levon 	 * tss_rsp0 is dynamically filled in by resume() (in swtch.s) on each
90074ecdb51SJohn Levon 	 * context switch but it'll be overwritten with this same value anyway.
9017c478bd9Sstevel@tonic-gate 	 */
90274ecdb51SJohn Levon 	if (kpti_enable == 1) {
90374ecdb51SJohn Levon 		ktss0->tss_rsp0 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
90474ecdb51SJohn Levon 	}
9057c478bd9Sstevel@tonic-gate 
90674ecdb51SJohn Levon 	/* Set up the IST stacks for double fault, NMI, MCE. */
90774ecdb51SJohn Levon 	ktss0->tss_ist1 = (uintptr_t)&dblfault_stack0[sizeof (dblfault_stack0)];
90874ecdb51SJohn Levon 	ktss0->tss_ist2 = (uintptr_t)&nmi_stack0[sizeof (nmi_stack0)];
90974ecdb51SJohn Levon 	ktss0->tss_ist3 = (uintptr_t)&mce_stack0[sizeof (mce_stack0)];
9107c478bd9Sstevel@tonic-gate 
9117c478bd9Sstevel@tonic-gate 	/*
91274ecdb51SJohn Levon 	 * This IST stack is used for #DB,#BP (debug) interrupts (when KPTI is
91374ecdb51SJohn Levon 	 * enabled), and also for KDI (always).
9147c478bd9Sstevel@tonic-gate 	 */
91574ecdb51SJohn Levon 	ktss0->tss_ist4 = (uint64_t)&cpus->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;
9167c478bd9Sstevel@tonic-gate 
91774ecdb51SJohn Levon 	if (kpti_enable == 1) {
91874ecdb51SJohn Levon 		/* This IST stack is used for #GP,#PF,#SS (fault) interrupts. */
91974ecdb51SJohn Levon 		ktss0->tss_ist5 =
92074ecdb51SJohn Levon 		    (uint64_t)&cpus->cpu_m.mcpu_kpti_flt.kf_tr_rsp;
9217c478bd9Sstevel@tonic-gate 
92274ecdb51SJohn Levon 		/* This IST stack is used for all other intrs (for KPTI). */
92374ecdb51SJohn Levon 		ktss0->tss_ist6 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
92474ecdb51SJohn Levon 	}
9257c478bd9Sstevel@tonic-gate 
9267c478bd9Sstevel@tonic-gate 	/*
9277c478bd9Sstevel@tonic-gate 	 * Set I/O bit map offset equal to size of TSS segment limit
9287c478bd9Sstevel@tonic-gate 	 * for no I/O permission map. This will force all user I/O
9297c478bd9Sstevel@tonic-gate 	 * instructions to generate #gp fault.
9307c478bd9Sstevel@tonic-gate 	 */
9310cfdb603Sjosephb 	ktss0->tss_bitmapbase = sizeof (*ktss0);
9327c478bd9Sstevel@tonic-gate 
9337c478bd9Sstevel@tonic-gate 	/*
9347c478bd9Sstevel@tonic-gate 	 * Point %tr to descriptor for ktss0 in gdt.
9357c478bd9Sstevel@tonic-gate 	 */
9367c478bd9Sstevel@tonic-gate 	wr_tsr(KTSS_SEL);
9377c478bd9Sstevel@tonic-gate }
9387c478bd9Sstevel@tonic-gate 
939843e1988Sjohnlev #endif	/* !__xpv */
940843e1988Sjohnlev 
941843e1988Sjohnlev #if defined(__xpv)
942843e1988Sjohnlev 
943843e1988Sjohnlev void
init_desctbls(void)944843e1988Sjohnlev init_desctbls(void)
945843e1988Sjohnlev {
946843e1988Sjohnlev 	uint_t vec;
947843e1988Sjohnlev 	user_desc_t *gdt;
948843e1988Sjohnlev 
949843e1988Sjohnlev 	/*
950843e1988Sjohnlev 	 * Setup and install our GDT.
951843e1988Sjohnlev 	 */
952843e1988Sjohnlev 	gdt = init_gdt();
953843e1988Sjohnlev 
954843e1988Sjohnlev 	/*
955843e1988Sjohnlev 	 * Store static pa of gdt to speed up pa_to_ma() translations
956843e1988Sjohnlev 	 * on lwp context switches.
957843e1988Sjohnlev 	 */
958843e1988Sjohnlev 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
9590cfdb603Sjosephb 	CPU->cpu_gdt = gdt;
960843e1988Sjohnlev 	CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
961843e1988Sjohnlev 
962843e1988Sjohnlev 	/*
963843e1988Sjohnlev 	 * Setup and install our IDT.
964843e1988Sjohnlev 	 */
9650cfdb603Sjosephb #if !defined(__lint)
9660cfdb603Sjosephb 	ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
9670cfdb603Sjosephb #endif
9680cfdb603Sjosephb 	idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
9690cfdb603Sjosephb 	    PAGESIZE, PAGESIZE);
9709844da31SSeth Goldberg 	bzero(idt0, PAGESIZE);
9710cfdb603Sjosephb 	init_idt(idt0);
972843e1988Sjohnlev 	for (vec = 0; vec < NIDT; vec++)
973843e1988Sjohnlev 		xen_idt_write(&idt0[vec], vec);
974843e1988Sjohnlev 
9750cfdb603Sjosephb 	CPU->cpu_idt = idt0;
976843e1988Sjohnlev 
977843e1988Sjohnlev 	/*
978843e1988Sjohnlev 	 * set default kernel stack
979843e1988Sjohnlev 	 */
980843e1988Sjohnlev 	xen_stack_switch(KDS_SEL,
981843e1988Sjohnlev 	    (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
982843e1988Sjohnlev 
983843e1988Sjohnlev 	xen_init_callbacks();
984843e1988Sjohnlev 
985843e1988Sjohnlev 	init_ldt();
986843e1988Sjohnlev }
987843e1988Sjohnlev 
988843e1988Sjohnlev #else	/* __xpv */
9897c478bd9Sstevel@tonic-gate 
9907c478bd9Sstevel@tonic-gate void
init_desctbls(void)991ae115bc7Smrj init_desctbls(void)
9927c478bd9Sstevel@tonic-gate {
993ae115bc7Smrj 	user_desc_t *gdt;
994ae115bc7Smrj 	desctbr_t idtr;
995ae115bc7Smrj 
9960cfdb603Sjosephb 	/*
9970cfdb603Sjosephb 	 * Allocate IDT and TSS structures on unique pages for better
9980cfdb603Sjosephb 	 * performance in virtual machines.
9990cfdb603Sjosephb 	 */
10000cfdb603Sjosephb #if !defined(__lint)
10010cfdb603Sjosephb 	ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
10020cfdb603Sjosephb #endif
10030cfdb603Sjosephb 	idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
10040cfdb603Sjosephb 	    PAGESIZE, PAGESIZE);
10059844da31SSeth Goldberg 	bzero(idt0, PAGESIZE);
10060cfdb603Sjosephb #if !defined(__lint)
10070cfdb603Sjosephb 	ASSERT(sizeof (*ktss0) <= PAGESIZE);
10080cfdb603Sjosephb #endif
1009f16a0f4cSRobert Mustacchi 	ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
10100cfdb603Sjosephb 	    PAGESIZE, PAGESIZE);
10119844da31SSeth Goldberg 	bzero(ktss0, PAGESIZE);
10120cfdb603Sjosephb 
10130cfdb603Sjosephb 
1014ae115bc7Smrj 	/*
1015ae115bc7Smrj 	 * Setup and install our GDT.
1016ae115bc7Smrj 	 */
1017ae115bc7Smrj 	gdt = init_gdt();
1018ae115bc7Smrj 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
10190cfdb603Sjosephb 	CPU->cpu_gdt = gdt;
1020ae115bc7Smrj 
102174ecdb51SJohn Levon 	/*
102274ecdb51SJohn Levon 	 * Initialize this CPU's LDT.
102374ecdb51SJohn Levon 	 */
102474ecdb51SJohn Levon 	CPU->cpu_m.mcpu_ldt = BOP_ALLOC(bootops, (caddr_t)LDT_VA,
102574ecdb51SJohn Levon 	    LDT_CPU_SIZE, PAGESIZE);
102674ecdb51SJohn Levon 	bzero(CPU->cpu_m.mcpu_ldt, LDT_CPU_SIZE);
102774ecdb51SJohn Levon 	CPU->cpu_m.mcpu_ldt_len = 0;
102874ecdb51SJohn Levon 
1029ae115bc7Smrj 	/*
1030ae115bc7Smrj 	 * Setup and install our IDT.
1031ae115bc7Smrj 	 */
10320cfdb603Sjosephb 	init_idt(idt0);
1033ae115bc7Smrj 
1034ae115bc7Smrj 	idtr.dtr_base = (uintptr_t)idt0;
10350cfdb603Sjosephb 	idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1036ae115bc7Smrj 	wr_idtr(&idtr);
10370cfdb603Sjosephb 	CPU->cpu_idt = idt0;
1038ae115bc7Smrj 
1039ae115bc7Smrj 
10407c478bd9Sstevel@tonic-gate 	init_tss();
10410cfdb603Sjosephb 	CPU->cpu_tss = ktss0;
10427c478bd9Sstevel@tonic-gate 	init_ldt();
104374ecdb51SJohn Levon 
104474ecdb51SJohn Levon 	/* Stash this so that the NMI,MCE,#DF and KDI handlers can use it. */
104574ecdb51SJohn Levon 	kpti_safe_cr3 = (uint64_t)getcr3();
10467c478bd9Sstevel@tonic-gate }
10479acbbeafSnn 
1048843e1988Sjohnlev #endif	/* __xpv */
1049843e1988Sjohnlev 
1050309b04b8SJohn Levon #ifndef __xpv
1051309b04b8SJohn Levon /*
1052309b04b8SJohn Levon  * As per Intel Vol 3 27.5.2, the GDTR limit is reset to 64Kb on a VM exit, so
1053309b04b8SJohn Levon  * we have to manually fix it up ourselves.
1054309b04b8SJohn Levon  *
1055309b04b8SJohn Levon  * The caller may still need to make sure that it can't go off-CPU with the
1056309b04b8SJohn Levon  * incorrect limit, before calling this (such as disabling pre-emption).
1057309b04b8SJohn Levon  */
1058309b04b8SJohn Levon void
reset_gdtr_limit(void)1059309b04b8SJohn Levon reset_gdtr_limit(void)
1060309b04b8SJohn Levon {
1061309b04b8SJohn Levon 	ulong_t flags = intr_clear();
1062309b04b8SJohn Levon 	desctbr_t gdtr;
1063309b04b8SJohn Levon 
1064309b04b8SJohn Levon 	rd_gdtr(&gdtr);
1065309b04b8SJohn Levon 	gdtr.dtr_limit = (sizeof (user_desc_t) * NGDT) - 1;
1066309b04b8SJohn Levon 	wr_gdtr(&gdtr);
1067309b04b8SJohn Levon 
1068309b04b8SJohn Levon 	intr_restore(flags);
1069309b04b8SJohn Levon }
1070309b04b8SJohn Levon #endif /* __xpv */
1071309b04b8SJohn Levon 
1072ae115bc7Smrj /*
1073*075343cbSDan Cross  * We need a GDT owned by the kernel and not the bootstrap relatively
1074*075343cbSDan Cross  * early in kernel initialization (e.g., to have segments we can reliably
1075*075343cbSDan Cross  * catch an exception on).
1076843e1988Sjohnlev  *
1077*075343cbSDan Cross  * Initializes a GDT with segments normally defined in the boot loader.
1078ae115bc7Smrj  */
1079ae115bc7Smrj void
init_boot_gdt(user_desc_t * bgdt)1080ae115bc7Smrj init_boot_gdt(user_desc_t *bgdt)
1081ae115bc7Smrj {
1082*075343cbSDan Cross 	ASSERT3P(bgdt, !=, NULL);
1083*075343cbSDan Cross 
1084*075343cbSDan Cross #ifdef	__xpv
1085*075343cbSDan Cross 	/* XXX: It is unclear why this 32-bit data segment is marked long. */
1086*075343cbSDan Cross 	set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
1087*075343cbSDan Cross 	    SEL_KPL, SDP_PAGES, SDP_OP32);
1088*075343cbSDan Cross #else
1089*075343cbSDan Cross 	/*
1090*075343cbSDan Cross 	 * Reset boot segments.  These ostensibly come from the boot loader,
1091*075343cbSDan Cross 	 * but we reset them to match our expectations, particulary if we
1092*075343cbSDan Cross 	 * are not using that loader.
1093*075343cbSDan Cross 	 */
1094*075343cbSDan Cross 	set_usegd(&bgdt[GDT_B32DATA], SDP_SHORT, NULL, SDP_LIMIT_MAX,
1095*075343cbSDan Cross 	    SDT_MEMRWA, SEL_KPL, SDP_PAGES, SDP_OP32);
1096*075343cbSDan Cross 	set_usegd(&bgdt[GDT_B32CODE], SDP_SHORT, NULL, SDP_LIMIT_MAX,
1097*075343cbSDan Cross 	    SDT_MEMERA, SEL_KPL, SDP_PAGES, SDP_OP32);
1098*075343cbSDan Cross 
1099*075343cbSDan Cross 	/*
1100*075343cbSDan Cross 	 * 16-bit segments for making BIOS calls (not applicable on all
1101*075343cbSDan Cross 	 * architectures).
1102*075343cbSDan Cross 	 */
1103*075343cbSDan Cross 	set_usegd(&bgdt[GDT_B16CODE], SDP_SHORT, NULL, SDP_LIMIT_MAX,
1104*075343cbSDan Cross 	    SDT_MEMERA, SEL_KPL, 0, 0);
1105*075343cbSDan Cross 	/*
1106*075343cbSDan Cross 	 * XXX: SDP_OP32 makes this a 32-bit segment, which seems wrong
1107*075343cbSDan Cross 	 * here, but that's what boot_gdt.s used.
1108*075343cbSDan Cross 	 */
1109*075343cbSDan Cross 	set_usegd(&bgdt[GDT_B16DATA], SDP_SHORT, NULL, SDP_LIMIT_MAX,
1110*075343cbSDan Cross 	    SDT_MEMRWA, SEL_KPL, 0, SDP_OP32);
1111*075343cbSDan Cross #endif	/* __xpv */
1112*075343cbSDan Cross 
1113*075343cbSDan Cross 	/*
1114*075343cbSDan Cross 	 * A 64-bit code segment used in early boot.  Early IDTs refer to this.
1115*075343cbSDan Cross 	 */
1116*075343cbSDan Cross 	set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, SDP_LIMIT_MAX, SDT_MEMERA,
1117*075343cbSDan Cross 	    SEL_KPL, SDP_PAGES, SDP_OP32);
1118ae115bc7Smrj }
1119ae115bc7Smrj 
11209acbbeafSnn /*
11219acbbeafSnn  * Enable interpositioning on the system call path by rewriting the
11229acbbeafSnn  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
11239acbbeafSnn  * the branded entry points.
11249acbbeafSnn  */
11259acbbeafSnn void
brand_interpositioning_enable(void * arg __unused)11265a469116SPatrick Mooney brand_interpositioning_enable(void *arg __unused)
11279acbbeafSnn {
1128843e1988Sjohnlev 	gate_desc_t	*idt = CPU->cpu_idt;
1129027bcc9fSToomas Soome 	int		i;
11309acbbeafSnn 
1131843e1988Sjohnlev 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1132843e1988Sjohnlev 
1133843e1988Sjohnlev 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1134843e1988Sjohnlev 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1135843e1988Sjohnlev #if defined(__xpv)
1136843e1988Sjohnlev 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1137843e1988Sjohnlev 		    brand_tbl[i].ih_inum);
1138843e1988Sjohnlev #endif
1139843e1988Sjohnlev 	}
11409acbbeafSnn 
1141843e1988Sjohnlev #if defined(__xpv)
1142843e1988Sjohnlev 
1143843e1988Sjohnlev 	/*
1144843e1988Sjohnlev 	 * Currently the hypervisor only supports 64-bit syscalls via
1145843e1988Sjohnlev 	 * syscall instruction. The 32-bit syscalls are handled by
1146843e1988Sjohnlev 	 * interrupt gate above.
1147843e1988Sjohnlev 	 */
1148843e1988Sjohnlev 	xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1149843e1988Sjohnlev 	    CALLBACKF_mask_events);
1150843e1988Sjohnlev 
1151843e1988Sjohnlev #else
1152843e1988Sjohnlev 
11537417cfdeSKuriakose Kuruvilla 	if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
115474ecdb51SJohn Levon 		if (kpti_enable == 1) {
115574ecdb51SJohn Levon 			wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_brand_sys_syscall);
115674ecdb51SJohn Levon 			wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_brand_sys_syscall32);
115774ecdb51SJohn Levon 		} else {
115874ecdb51SJohn Levon 			wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
115974ecdb51SJohn Levon 			wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
116074ecdb51SJohn Levon 		}
1161843e1988Sjohnlev 	}
1162843e1988Sjohnlev 
11639acbbeafSnn #endif
11649acbbeafSnn 
116574ecdb51SJohn Levon 	if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
116674ecdb51SJohn Levon 		if (kpti_enable == 1) {
116774ecdb51SJohn Levon 			wrmsr(MSR_INTC_SEP_EIP,
116874ecdb51SJohn Levon 			    (uintptr_t)tr_brand_sys_sysenter);
116974ecdb51SJohn Levon 		} else {
117074ecdb51SJohn Levon 			wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
117174ecdb51SJohn Levon 		}
117274ecdb51SJohn Levon 	}
11739acbbeafSnn }
11749acbbeafSnn 
11759acbbeafSnn /*
11769acbbeafSnn  * Disable interpositioning on the system call path by rewriting the
11779acbbeafSnn  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
11789acbbeafSnn  * the standard entry points, which bypass the interpositioning hooks.
11799acbbeafSnn  */
11809acbbeafSnn void
brand_interpositioning_disable(void * arg __unused)11815a469116SPatrick Mooney brand_interpositioning_disable(void *arg __unused)
11829acbbeafSnn {
1183843e1988Sjohnlev 	gate_desc_t	*idt = CPU->cpu_idt;
11849acbbeafSnn 	int i;
11859acbbeafSnn 
1186843e1988Sjohnlev 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1187843e1988Sjohnlev 
1188843e1988Sjohnlev 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1189843e1988Sjohnlev 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1190843e1988Sjohnlev #if defined(__xpv)
1191843e1988Sjohnlev 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1192843e1988Sjohnlev 		    brand_tbl[i].ih_inum);
1193843e1988Sjohnlev #endif
1194843e1988Sjohnlev 	}
11959acbbeafSnn 
1196843e1988Sjohnlev #if defined(__xpv)
1197843e1988Sjohnlev 
1198843e1988Sjohnlev 	/*
1199843e1988Sjohnlev 	 * See comment above in brand_interpositioning_enable.
1200843e1988Sjohnlev 	 */
1201843e1988Sjohnlev 	xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1202843e1988Sjohnlev 	    CALLBACKF_mask_events);
1203843e1988Sjohnlev 
1204843e1988Sjohnlev #else
1205843e1988Sjohnlev 
12067417cfdeSKuriakose Kuruvilla 	if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
120774ecdb51SJohn Levon 		if (kpti_enable == 1) {
120874ecdb51SJohn Levon 			wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_sys_syscall);
120974ecdb51SJohn Levon 			wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_sys_syscall32);
121074ecdb51SJohn Levon 		} else {
121174ecdb51SJohn Levon 			wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
121274ecdb51SJohn Levon 			wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
121374ecdb51SJohn Levon 		}
1214843e1988Sjohnlev 	}
1215843e1988Sjohnlev 
12169acbbeafSnn #endif
12179acbbeafSnn 
121874ecdb51SJohn Levon 	if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
121974ecdb51SJohn Levon 		if (kpti_enable == 1) {
122074ecdb51SJohn Levon 			wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)tr_sys_sysenter);
122174ecdb51SJohn Levon 		} else {
122274ecdb51SJohn Levon 			wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
122374ecdb51SJohn Levon 		}
122474ecdb51SJohn Levon 	}
12259acbbeafSnn }
1226