xref: /illumos-gate/usr/src/uts/intel/os/desctbls.c (revision f0089e39)
17c478bd9Sstevel@tonic-gate /*
2ae115bc7Smrj  * CDDL HEADER START
3ae115bc7Smrj  *
4ae115bc7Smrj  * The contents of this file are subject to the terms of the
5ae115bc7Smrj  * Common Development and Distribution License (the "License").
6ae115bc7Smrj  * You may not use this file except in compliance with the License.
7ae115bc7Smrj  *
8ae115bc7Smrj  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ae115bc7Smrj  * or http://www.opensolaris.org/os/licensing.
10ae115bc7Smrj  * See the License for the specific language governing permissions
11ae115bc7Smrj  * and limitations under the License.
12ae115bc7Smrj  *
13ae115bc7Smrj  * When distributing Covered Code, include this CDDL HEADER in each
14ae115bc7Smrj  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ae115bc7Smrj  * If applicable, add the following below this CDDL HEADER, with the
16ae115bc7Smrj  * fields enclosed by brackets "[]" replaced with your own identifying
17ae115bc7Smrj  * information: Portions Copyright [yyyy] [name of copyright owner]
18ae115bc7Smrj  *
19ae115bc7Smrj  * CDDL HEADER END
20ae115bc7Smrj  */
21ae115bc7Smrj 
22ae115bc7Smrj /*
23eb5a5c78SSurya Prakki  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
26f16a0f4cSRobert Mustacchi /*
2774ecdb51SJohn Levon  * Copyright 2018 Joyent, Inc. All rights reserved.
28f16a0f4cSRobert Mustacchi  */
29f16a0f4cSRobert Mustacchi 
307c478bd9Sstevel@tonic-gate /*
317c478bd9Sstevel@tonic-gate  * Copyright (c) 1992 Terrence R. Lambert.
327c478bd9Sstevel@tonic-gate  * Copyright (c) 1990 The Regents of the University of California.
337c478bd9Sstevel@tonic-gate  * All rights reserved.
347c478bd9Sstevel@tonic-gate  *
357c478bd9Sstevel@tonic-gate  * This code is derived from software contributed to Berkeley by
367c478bd9Sstevel@tonic-gate  * William Jolitz.
377c478bd9Sstevel@tonic-gate  *
387c478bd9Sstevel@tonic-gate  * Redistribution and use in source and binary forms, with or without
397c478bd9Sstevel@tonic-gate  * modification, are permitted provided that the following conditions
407c478bd9Sstevel@tonic-gate  * are met:
417c478bd9Sstevel@tonic-gate  * 1. Redistributions of source code must retain the above copyright
427c478bd9Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer.
437c478bd9Sstevel@tonic-gate  * 2. Redistributions in binary form must reproduce the above copyright
447c478bd9Sstevel@tonic-gate  *    notice, this list of conditions and the following disclaimer in the
457c478bd9Sstevel@tonic-gate  *    documentation and/or other materials provided with the distribution.
467c478bd9Sstevel@tonic-gate  * 3. All advertising materials mentioning features or use of this software
477c478bd9Sstevel@tonic-gate  *    must display the following acknowledgement:
487c478bd9Sstevel@tonic-gate  *	This product includes software developed by the University of
497c478bd9Sstevel@tonic-gate  *	California, Berkeley and its contributors.
507c478bd9Sstevel@tonic-gate  * 4. Neither the name of the University nor the names of its contributors
517c478bd9Sstevel@tonic-gate  *    may be used to endorse or promote products derived from this software
527c478bd9Sstevel@tonic-gate  *    without specific prior written permission.
537c478bd9Sstevel@tonic-gate  *
547c478bd9Sstevel@tonic-gate  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
557c478bd9Sstevel@tonic-gate  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
567c478bd9Sstevel@tonic-gate  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
577c478bd9Sstevel@tonic-gate  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
587c478bd9Sstevel@tonic-gate  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
597c478bd9Sstevel@tonic-gate  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
607c478bd9Sstevel@tonic-gate  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
617c478bd9Sstevel@tonic-gate  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
627c478bd9Sstevel@tonic-gate  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
637c478bd9Sstevel@tonic-gate  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
647c478bd9Sstevel@tonic-gate  * SUCH DAMAGE.
657c478bd9Sstevel@tonic-gate  *
667c478bd9Sstevel@tonic-gate  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
677c478bd9Sstevel@tonic-gate  */
687c478bd9Sstevel@tonic-gate 
697c478bd9Sstevel@tonic-gate #include <sys/types.h>
70ae115bc7Smrj #include <sys/sysmacros.h>
717c478bd9Sstevel@tonic-gate #include <sys/tss.h>
727c478bd9Sstevel@tonic-gate #include <sys/segments.h>
737c478bd9Sstevel@tonic-gate #include <sys/trap.h>
747c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
75ae115bc7Smrj #include <sys/bootconf.h>
767c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h>
77ae115bc7Smrj #include <sys/controlregs.h>
787c478bd9Sstevel@tonic-gate #include <sys/archsystm.h>
797c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
807c478bd9Sstevel@tonic-gate #include <sys/kobj.h>
817c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
827c478bd9Sstevel@tonic-gate #include <sys/reboot.h>
837c478bd9Sstevel@tonic-gate #include <sys/kdi.h>
84ae115bc7Smrj #include <sys/mach_mmu.h>
850baeff3dSrab #include <sys/systm.h>
8674ecdb51SJohn Levon #include <sys/note.h>
87843e1988Sjohnlev 
88843e1988Sjohnlev #ifdef __xpv
89843e1988Sjohnlev #include <sys/hypervisor.h>
90843e1988Sjohnlev #include <vm/as.h>
91843e1988Sjohnlev #endif
92843e1988Sjohnlev 
93ae115bc7Smrj #include <sys/promif.h>
94ae115bc7Smrj #include <sys/bootinfo.h>
95ae115bc7Smrj #include <vm/kboot_mmu.h>
96843e1988Sjohnlev #include <vm/hat_pte.h>
977c478bd9Sstevel@tonic-gate 
987c478bd9Sstevel@tonic-gate /*
997c478bd9Sstevel@tonic-gate  * cpu0 and default tables and structures.
1007c478bd9Sstevel@tonic-gate  */
101ae115bc7Smrj user_desc_t	*gdt0;
102843e1988Sjohnlev #if !defined(__xpv)
1037c478bd9Sstevel@tonic-gate desctbr_t	gdt0_default_r;
104843e1988Sjohnlev #endif
1057c478bd9Sstevel@tonic-gate 
106027bcc9fSToomas Soome gate_desc_t	*idt0;		/* interrupt descriptor table */
1077c478bd9Sstevel@tonic-gate 
108f16a0f4cSRobert Mustacchi tss_t		*ktss0;			/* kernel task state structure */
1097c478bd9Sstevel@tonic-gate 
1107c478bd9Sstevel@tonic-gate 
1117c478bd9Sstevel@tonic-gate user_desc_t	zero_udesc;		/* base zero user desc native procs */
112843e1988Sjohnlev user_desc_t	null_udesc;		/* null user descriptor */
113843e1988Sjohnlev system_desc_t	null_sdesc;		/* null system descriptor */
1147c478bd9Sstevel@tonic-gate 
1157c478bd9Sstevel@tonic-gate user_desc_t	zero_u32desc;		/* 32-bit compatibility procs */
1167c478bd9Sstevel@tonic-gate 
117843e1988Sjohnlev user_desc_t	ucs_on;
118843e1988Sjohnlev user_desc_t	ucs_off;
119843e1988Sjohnlev user_desc_t	ucs32_on;
120843e1988Sjohnlev user_desc_t	ucs32_off;
121843e1988Sjohnlev 
12274ecdb51SJohn Levon /*
12374ecdb51SJohn Levon  * If the size of this is changed, you must update hat_pcp_setup() and the
12474ecdb51SJohn Levon  * definitions in exception.s
12574ecdb51SJohn Levon  */
12674ecdb51SJohn Levon extern char dblfault_stack0[DEFAULTSTKSZ];
12774ecdb51SJohn Levon extern char nmi_stack0[DEFAULTSTKSZ];
12874ecdb51SJohn Levon extern char mce_stack0[DEFAULTSTKSZ];
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate extern void	fast_null(void);
1317c478bd9Sstevel@tonic-gate extern hrtime_t	get_hrtime(void);
1327c478bd9Sstevel@tonic-gate extern hrtime_t	gethrvtime(void);
1337c478bd9Sstevel@tonic-gate extern hrtime_t	get_hrestime(void);
1347c478bd9Sstevel@tonic-gate extern uint64_t	getlgrp(void);
1357c478bd9Sstevel@tonic-gate 
1367c478bd9Sstevel@tonic-gate void (*(fasttable[]))(void) = {
1377c478bd9Sstevel@tonic-gate 	fast_null,			/* T_FNULL routine */
1387c478bd9Sstevel@tonic-gate 	fast_null,			/* T_FGETFP routine (initially null) */
1397c478bd9Sstevel@tonic-gate 	fast_null,			/* T_FSETFP routine (initially null) */
140027bcc9fSToomas Soome 	(void (*)())(uintptr_t)get_hrtime,	/* T_GETHRTIME */
141027bcc9fSToomas Soome 	(void (*)())(uintptr_t)gethrvtime,	/* T_GETHRVTIME */
142027bcc9fSToomas Soome 	(void (*)())(uintptr_t)get_hrestime,	/* T_GETHRESTIME */
143027bcc9fSToomas Soome 	(void (*)())(uintptr_t)getlgrp		/* T_GETLGRP */
1447c478bd9Sstevel@tonic-gate };
1457c478bd9Sstevel@tonic-gate 
1469acbbeafSnn /*
1479acbbeafSnn  * Structure containing pre-computed descriptors to allow us to temporarily
1489acbbeafSnn  * interpose on a standard handler.
1499acbbeafSnn  */
1509acbbeafSnn struct interposing_handler {
1519acbbeafSnn 	int ih_inum;
1529acbbeafSnn 	gate_desc_t ih_interp_desc;
1539acbbeafSnn 	gate_desc_t ih_default_desc;
1549acbbeafSnn };
1559acbbeafSnn 
1569acbbeafSnn /*
1579acbbeafSnn  * The brand infrastructure interposes on two handlers, and we use one as a
1589acbbeafSnn  * NULL signpost.
1599acbbeafSnn  */
160eb5a5c78SSurya Prakki static struct interposing_handler brand_tbl[2];
1619acbbeafSnn 
1627c478bd9Sstevel@tonic-gate /*
1637c478bd9Sstevel@tonic-gate  * software prototypes for default local descriptor table
1647c478bd9Sstevel@tonic-gate  */
1657c478bd9Sstevel@tonic-gate 
1667c478bd9Sstevel@tonic-gate /*
1677c478bd9Sstevel@tonic-gate  * Routines for loading segment descriptors in format the hardware
1687c478bd9Sstevel@tonic-gate  * can understand.
1697c478bd9Sstevel@tonic-gate  */
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate /*
1727c478bd9Sstevel@tonic-gate  * In long mode we have the new L or long mode attribute bit
1737c478bd9Sstevel@tonic-gate  * for code segments. Only the conforming bit in type is used along
1747c478bd9Sstevel@tonic-gate  * with descriptor priority and present bits. Default operand size must
1757c478bd9Sstevel@tonic-gate  * be zero when in long mode. In 32-bit compatibility mode all fields
1767c478bd9Sstevel@tonic-gate  * are treated as in legacy mode. For data segments while in long mode
1777c478bd9Sstevel@tonic-gate  * only the present bit is loaded.
1787c478bd9Sstevel@tonic-gate  */
1797c478bd9Sstevel@tonic-gate void
1807c478bd9Sstevel@tonic-gate set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
1817c478bd9Sstevel@tonic-gate     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
1827c478bd9Sstevel@tonic-gate {
1837c478bd9Sstevel@tonic-gate 	ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
184a0955b86SJohn Levon 	/* This should never be a "system" segment. */
185a0955b86SJohn Levon 	ASSERT3U(type & SDT_S, !=, 0);
1867c478bd9Sstevel@tonic-gate 
1877c478bd9Sstevel@tonic-gate 	/*
1887c478bd9Sstevel@tonic-gate 	 * 64-bit long mode.
1897c478bd9Sstevel@tonic-gate 	 */
1907c478bd9Sstevel@tonic-gate 	if (lmode == SDP_LONG)
1917c478bd9Sstevel@tonic-gate 		dp->usd_def32 = 0;		/* 32-bit operands only */
1927c478bd9Sstevel@tonic-gate 	else
1937c478bd9Sstevel@tonic-gate 		/*
1947c478bd9Sstevel@tonic-gate 		 * 32-bit compatibility mode.
1957c478bd9Sstevel@tonic-gate 		 */
1967c478bd9Sstevel@tonic-gate 		dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32-bit ops */
1977c478bd9Sstevel@tonic-gate 
198a0955b86SJohn Levon 	/*
199a0955b86SJohn Levon 	 * We should always set the "accessed" bit (SDT_A), otherwise the CPU
200a0955b86SJohn Levon 	 * will write to the GDT whenever we change segment registers around.
201a0955b86SJohn Levon 	 * With KPTI on, the GDT is read-only in the user page table, which
202a0955b86SJohn Levon 	 * causes crashes if we don't set this.
203a0955b86SJohn Levon 	 */
204a0955b86SJohn Levon 	ASSERT3U(type & SDT_A, !=, 0);
205a0955b86SJohn Levon 
2067c478bd9Sstevel@tonic-gate 	dp->usd_long = lmode;	/* 64-bit mode */
2077c478bd9Sstevel@tonic-gate 	dp->usd_type = type;
2087c478bd9Sstevel@tonic-gate 	dp->usd_dpl = dpl;
2097c478bd9Sstevel@tonic-gate 	dp->usd_p = 1;
2107c478bd9Sstevel@tonic-gate 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
2117c478bd9Sstevel@tonic-gate 
2127c478bd9Sstevel@tonic-gate 	dp->usd_lobase = (uintptr_t)base;
2137c478bd9Sstevel@tonic-gate 	dp->usd_midbase = (uintptr_t)base >> 16;
2147c478bd9Sstevel@tonic-gate 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
2157c478bd9Sstevel@tonic-gate 	dp->usd_lolimit = size;
2167c478bd9Sstevel@tonic-gate 	dp->usd_hilimit = (uintptr_t)size >> 16;
2177c478bd9Sstevel@tonic-gate }
2187c478bd9Sstevel@tonic-gate 
2197c478bd9Sstevel@tonic-gate /*
2207c478bd9Sstevel@tonic-gate  * Install system segment descriptor for LDT and TSS segments.
2217c478bd9Sstevel@tonic-gate  */
2227c478bd9Sstevel@tonic-gate 
2237c478bd9Sstevel@tonic-gate void
2247c478bd9Sstevel@tonic-gate set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
2257c478bd9Sstevel@tonic-gate     uint_t dpl)
2267c478bd9Sstevel@tonic-gate {
2277c478bd9Sstevel@tonic-gate 	dp->ssd_lolimit = size;
2287c478bd9Sstevel@tonic-gate 	dp->ssd_hilimit = (uintptr_t)size >> 16;
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate 	dp->ssd_lobase = (uintptr_t)base;
2317c478bd9Sstevel@tonic-gate 	dp->ssd_midbase = (uintptr_t)base >> 16;
2327c478bd9Sstevel@tonic-gate 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
2337c478bd9Sstevel@tonic-gate 	dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate 	dp->ssd_type = type;
2367c478bd9Sstevel@tonic-gate 	dp->ssd_zero1 = 0;	/* must be zero */
2377c478bd9Sstevel@tonic-gate 	dp->ssd_zero2 = 0;
2387c478bd9Sstevel@tonic-gate 	dp->ssd_dpl = dpl;
2397c478bd9Sstevel@tonic-gate 	dp->ssd_p = 1;
2407c478bd9Sstevel@tonic-gate 	dp->ssd_gran = 0;	/* force byte units */
2417c478bd9Sstevel@tonic-gate }
2427c478bd9Sstevel@tonic-gate 
243843e1988Sjohnlev void *
244843e1988Sjohnlev get_ssd_base(system_desc_t *dp)
245843e1988Sjohnlev {
246843e1988Sjohnlev 	uintptr_t	base;
247843e1988Sjohnlev 
248843e1988Sjohnlev 	base = (uintptr_t)dp->ssd_lobase |
249843e1988Sjohnlev 	    (uintptr_t)dp->ssd_midbase << 16 |
250843e1988Sjohnlev 	    (uintptr_t)dp->ssd_hibase << (16 + 8) |
251843e1988Sjohnlev 	    (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
252843e1988Sjohnlev 	return ((void *)base);
253843e1988Sjohnlev }
254843e1988Sjohnlev 
2557c478bd9Sstevel@tonic-gate /*
2567c478bd9Sstevel@tonic-gate  * Install gate segment descriptor for interrupt, trap, call and task gates.
25774ecdb51SJohn Levon  *
25874ecdb51SJohn Levon  * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on
25974ecdb51SJohn Levon  * all interrupts.  We have different ISTs for each class of exceptions that are
26074ecdb51SJohn Levon  * most likely to occur while handling an existing exception; while many of
26174ecdb51SJohn Levon  * these are just going to panic, it's nice not to trample on the existing
26274ecdb51SJohn Levon  * exception state for debugging purposes.
26374ecdb51SJohn Levon  *
26474ecdb51SJohn Levon  * Normal interrupts are all redirected unconditionally to the KPTI trampoline
26574ecdb51SJohn Levon  * stack space. This unifies the trampoline handling between user and kernel
26674ecdb51SJohn Levon  * space (and avoids the need to touch %gs).
26774ecdb51SJohn Levon  *
26874ecdb51SJohn Levon  * The KDI IDT *all* uses the DBG IST: consider single stepping tr_pftrap, when
26974ecdb51SJohn Levon  * we do a read from KMDB that cause another #PF.  Without its own IST, this
27074ecdb51SJohn Levon  * would stomp on the kernel's mcpu_kpti_flt frame.
2717c478bd9Sstevel@tonic-gate  */
27274ecdb51SJohn Levon uint_t
27374ecdb51SJohn Levon idt_vector_to_ist(uint_t vector)
2747c478bd9Sstevel@tonic-gate {
27574ecdb51SJohn Levon #if defined(__xpv)
27674ecdb51SJohn Levon 	_NOTE(ARGUNUSED(vector));
27774ecdb51SJohn Levon 	return (IST_NONE);
27874ecdb51SJohn Levon #else
27974ecdb51SJohn Levon 	switch (vector) {
28074ecdb51SJohn Levon 	/* These should always use IST even without KPTI enabled. */
28174ecdb51SJohn Levon 	case T_DBLFLT:
28274ecdb51SJohn Levon 		return (IST_DF);
28374ecdb51SJohn Levon 	case T_NMIFLT:
28474ecdb51SJohn Levon 		return (IST_NMI);
28574ecdb51SJohn Levon 	case T_MCE:
28674ecdb51SJohn Levon 		return (IST_MCE);
28774ecdb51SJohn Levon 
28874ecdb51SJohn Levon 	case T_BPTFLT:
28974ecdb51SJohn Levon 	case T_SGLSTP:
29074ecdb51SJohn Levon 		if (kpti_enable == 1) {
29174ecdb51SJohn Levon 			return (IST_DBG);
29274ecdb51SJohn Levon 		}
29374ecdb51SJohn Levon 		return (IST_NONE);
29474ecdb51SJohn Levon 	case T_STKFLT:
29574ecdb51SJohn Levon 	case T_GPFLT:
29674ecdb51SJohn Levon 	case T_PGFLT:
29774ecdb51SJohn Levon 		if (kpti_enable == 1) {
29874ecdb51SJohn Levon 			return (IST_NESTABLE);
29974ecdb51SJohn Levon 		}
30074ecdb51SJohn Levon 		return (IST_NONE);
30174ecdb51SJohn Levon 	default:
30274ecdb51SJohn Levon 		if (kpti_enable == 1) {
30374ecdb51SJohn Levon 			return (IST_DEFAULT);
30474ecdb51SJohn Levon 		}
30574ecdb51SJohn Levon 		return (IST_NONE);
30674ecdb51SJohn Levon 	}
307843e1988Sjohnlev #endif
3087c478bd9Sstevel@tonic-gate }
3097c478bd9Sstevel@tonic-gate 
3107c478bd9Sstevel@tonic-gate void
3117c478bd9Sstevel@tonic-gate set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
31274ecdb51SJohn Levon     uint_t type, uint_t dpl, uint_t ist)
3137c478bd9Sstevel@tonic-gate {
3147c478bd9Sstevel@tonic-gate 	dp->sgd_looffset = (uintptr_t)func;
3157c478bd9Sstevel@tonic-gate 	dp->sgd_hioffset = (uintptr_t)func >> 16;
31674ecdb51SJohn Levon 	dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
3177c478bd9Sstevel@tonic-gate 	dp->sgd_selector =  (uint16_t)sel;
31874ecdb51SJohn Levon 	dp->sgd_ist = ist;
3197c478bd9Sstevel@tonic-gate 	dp->sgd_type = type;
3207c478bd9Sstevel@tonic-gate 	dp->sgd_dpl = dpl;
3217c478bd9Sstevel@tonic-gate 	dp->sgd_p = 1;
3227c478bd9Sstevel@tonic-gate }
3237c478bd9Sstevel@tonic-gate 
324843e1988Sjohnlev /*
325843e1988Sjohnlev  * Updates a single user descriptor in the the GDT of the current cpu.
326843e1988Sjohnlev  * Caller is responsible for preventing cpu migration.
327843e1988Sjohnlev  */
328843e1988Sjohnlev 
329843e1988Sjohnlev void
330843e1988Sjohnlev gdt_update_usegd(uint_t sidx, user_desc_t *udp)
331843e1988Sjohnlev {
332a0955b86SJohn Levon #if defined(DEBUG)
333a0955b86SJohn Levon 	/* This should never be a "system" segment, but it might be null. */
334a0955b86SJohn Levon 	if (udp->usd_p != 0 || udp->usd_type != 0) {
335a0955b86SJohn Levon 		ASSERT3U(udp->usd_type & SDT_S, !=, 0);
336a0955b86SJohn Levon 	}
337a0955b86SJohn Levon 	/*
338a0955b86SJohn Levon 	 * We should always set the "accessed" bit (SDT_A), otherwise the CPU
339a0955b86SJohn Levon 	 * will write to the GDT whenever we change segment registers around.
340a0955b86SJohn Levon 	 * With KPTI on, the GDT is read-only in the user page table, which
341a0955b86SJohn Levon 	 * causes crashes if we don't set this.
342a0955b86SJohn Levon 	 */
343a0955b86SJohn Levon 	if (udp->usd_p != 0 || udp->usd_type != 0) {
344a0955b86SJohn Levon 		ASSERT3U(udp->usd_type & SDT_A, !=, 0);
345a0955b86SJohn Levon 	}
346a0955b86SJohn Levon #endif
347843e1988Sjohnlev 
348a0955b86SJohn Levon #if defined(__xpv)
349843e1988Sjohnlev 	uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
350843e1988Sjohnlev 
351843e1988Sjohnlev 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
352843e1988Sjohnlev 		panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
353843e1988Sjohnlev 
354843e1988Sjohnlev #else	/* __xpv */
355843e1988Sjohnlev 	CPU->cpu_gdt[sidx] = *udp;
356843e1988Sjohnlev #endif	/* __xpv */
357843e1988Sjohnlev }
358843e1988Sjohnlev 
359843e1988Sjohnlev /*
360843e1988Sjohnlev  * Writes single descriptor pointed to by udp into a processes
361843e1988Sjohnlev  * LDT entry pointed to by ldp.
362843e1988Sjohnlev  */
363843e1988Sjohnlev int
364843e1988Sjohnlev ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
365843e1988Sjohnlev {
366a0955b86SJohn Levon #if defined(DEBUG)
367a0955b86SJohn Levon 	/* This should never be a "system" segment, but it might be null. */
368a0955b86SJohn Levon 	if (udp->usd_p != 0 || udp->usd_type != 0) {
369a0955b86SJohn Levon 		ASSERT3U(udp->usd_type & SDT_S, !=, 0);
370a0955b86SJohn Levon 	}
371a0955b86SJohn Levon 	/*
372a0955b86SJohn Levon 	 * We should always set the "accessed" bit (SDT_A), otherwise the CPU
373a0955b86SJohn Levon 	 * will write to the LDT whenever we change segment registers around.
374a0955b86SJohn Levon 	 * With KPTI on, the LDT is read-only in the user page table, which
375a0955b86SJohn Levon 	 * causes crashes if we don't set this.
376a0955b86SJohn Levon 	 */
377a0955b86SJohn Levon 	if (udp->usd_p != 0 || udp->usd_type != 0) {
378a0955b86SJohn Levon 		ASSERT3U(udp->usd_type & SDT_A, !=, 0);
379a0955b86SJohn Levon 	}
380a0955b86SJohn Levon #endif
381843e1988Sjohnlev 
382a0955b86SJohn Levon #if defined(__xpv)
383843e1988Sjohnlev 	uint64_t dpa;
384843e1988Sjohnlev 
385843e1988Sjohnlev 	dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
386843e1988Sjohnlev 	    ((uintptr_t)ldp & PAGEOFFSET);
387843e1988Sjohnlev 
388843e1988Sjohnlev 	/*
389843e1988Sjohnlev 	 * The hypervisor is a little more restrictive about what it
390843e1988Sjohnlev 	 * supports in the LDT.
391843e1988Sjohnlev 	 */
392843e1988Sjohnlev 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
393843e1988Sjohnlev 		return (EINVAL);
394843e1988Sjohnlev 
395843e1988Sjohnlev #else	/* __xpv */
396843e1988Sjohnlev 	*ldp = *udp;
397843e1988Sjohnlev 
398843e1988Sjohnlev #endif	/* __xpv */
399843e1988Sjohnlev 	return (0);
400843e1988Sjohnlev }
401843e1988Sjohnlev 
402843e1988Sjohnlev #if defined(__xpv)
403843e1988Sjohnlev 
404843e1988Sjohnlev /*
405843e1988Sjohnlev  * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
406843e1988Sjohnlev  * Returns true if a valid entry was written.
407843e1988Sjohnlev  */
408843e1988Sjohnlev int
409843e1988Sjohnlev xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
410843e1988Sjohnlev {
411843e1988Sjohnlev 	trap_info_t *ti = ti_arg;	/* XXPV	Aargh - segments.h comment */
412843e1988Sjohnlev 
413843e1988Sjohnlev 	/*
414843e1988Sjohnlev 	 * skip holes in the IDT
415843e1988Sjohnlev 	 */
416843e1988Sjohnlev 	if (GATESEG_GETOFFSET(sgd) == 0)
417843e1988Sjohnlev 		return (0);
418843e1988Sjohnlev 
419843e1988Sjohnlev 	ASSERT(sgd->sgd_type == SDT_SYSIGT);
420843e1988Sjohnlev 	ti->vector = vec;
421843e1988Sjohnlev 	TI_SET_DPL(ti, sgd->sgd_dpl);
422843e1988Sjohnlev 
423843e1988Sjohnlev 	/*
424843e1988Sjohnlev 	 * Is this an interrupt gate?
425843e1988Sjohnlev 	 */
426843e1988Sjohnlev 	if (sgd->sgd_type == SDT_SYSIGT) {
427843e1988Sjohnlev 		/* LINTED */
428843e1988Sjohnlev 		TI_SET_IF(ti, 1);
429843e1988Sjohnlev 	}
430843e1988Sjohnlev 	ti->cs = sgd->sgd_selector;
431843e1988Sjohnlev 	ti->cs |= SEL_KPL;	/* force into ring 3. see KCS_SEL  */
432843e1988Sjohnlev 	ti->address = GATESEG_GETOFFSET(sgd);
433843e1988Sjohnlev 	return (1);
434843e1988Sjohnlev }
435843e1988Sjohnlev 
436843e1988Sjohnlev /*
437843e1988Sjohnlev  * Convert a single hw format gate descriptor and write it into our virtual IDT.
438843e1988Sjohnlev  */
439843e1988Sjohnlev void
440843e1988Sjohnlev xen_idt_write(gate_desc_t *sgd, uint_t vec)
441843e1988Sjohnlev {
442843e1988Sjohnlev 	trap_info_t trapinfo[2];
443843e1988Sjohnlev 
444843e1988Sjohnlev 	bzero(trapinfo, sizeof (trapinfo));
445843e1988Sjohnlev 	if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
446843e1988Sjohnlev 		return;
447843e1988Sjohnlev 	if (xen_set_trap_table(trapinfo) != 0)
448843e1988Sjohnlev 		panic("xen_idt_write: xen_set_trap_table() failed");
449843e1988Sjohnlev }
450843e1988Sjohnlev 
451843e1988Sjohnlev #endif	/* __xpv */
452843e1988Sjohnlev 
4537c478bd9Sstevel@tonic-gate 
4547c478bd9Sstevel@tonic-gate /*
4557c478bd9Sstevel@tonic-gate  * Build kernel GDT.
4567c478bd9Sstevel@tonic-gate  */
4577c478bd9Sstevel@tonic-gate 
4587c478bd9Sstevel@tonic-gate static void
459ae115bc7Smrj init_gdt_common(user_desc_t *gdt)
4607c478bd9Sstevel@tonic-gate {
461ae115bc7Smrj 	int i;
4627c478bd9Sstevel@tonic-gate 
4637c478bd9Sstevel@tonic-gate 	/*
4647c478bd9Sstevel@tonic-gate 	 * 64-bit kernel code segment.
4657c478bd9Sstevel@tonic-gate 	 */
466ae115bc7Smrj 	set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
4677c478bd9Sstevel@tonic-gate 	    SDP_PAGES, SDP_OP32);
4687c478bd9Sstevel@tonic-gate 
4697c478bd9Sstevel@tonic-gate 	/*
4707c478bd9Sstevel@tonic-gate 	 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
4717c478bd9Sstevel@tonic-gate 	 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
4727c478bd9Sstevel@tonic-gate 	 * instruction to return from system calls back to 32-bit applications.
4737c478bd9Sstevel@tonic-gate 	 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
4747c478bd9Sstevel@tonic-gate 	 * descriptors. We therefore must ensure that the kernel uses something,
4757c478bd9Sstevel@tonic-gate 	 * though it will be ignored by hardware, that is compatible with 32-bit
4767c478bd9Sstevel@tonic-gate 	 * apps. For the same reason we must set the default op size of this
4777c478bd9Sstevel@tonic-gate 	 * descriptor to 32-bit operands.
4787c478bd9Sstevel@tonic-gate 	 */
479ae115bc7Smrj 	set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
4807c478bd9Sstevel@tonic-gate 	    SEL_KPL, SDP_PAGES, SDP_OP32);
481ae115bc7Smrj 	gdt[GDT_KDATA].usd_def32 = 1;
4827c478bd9Sstevel@tonic-gate 
4837c478bd9Sstevel@tonic-gate 	/*
4847c478bd9Sstevel@tonic-gate 	 * 64-bit user code segment.
4857c478bd9Sstevel@tonic-gate 	 */
486ae115bc7Smrj 	set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
4877c478bd9Sstevel@tonic-gate 	    SDP_PAGES, SDP_OP32);
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate 	/*
4907c478bd9Sstevel@tonic-gate 	 * 32-bit user code segment.
4917c478bd9Sstevel@tonic-gate 	 */
492ae115bc7Smrj 	set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
4937c478bd9Sstevel@tonic-gate 	    SEL_UPL, SDP_PAGES, SDP_OP32);
4947c478bd9Sstevel@tonic-gate 
495843e1988Sjohnlev 	/*
496843e1988Sjohnlev 	 * See gdt_ucode32() and gdt_ucode_native().
497843e1988Sjohnlev 	 */
498843e1988Sjohnlev 	ucs_on = ucs_off = gdt[GDT_UCODE];
499843e1988Sjohnlev 	ucs_off.usd_p = 0;	/* forces #np fault */
500843e1988Sjohnlev 
501843e1988Sjohnlev 	ucs32_on = ucs32_off = gdt[GDT_U32CODE];
502843e1988Sjohnlev 	ucs32_off.usd_p = 0;	/* forces #np fault */
503843e1988Sjohnlev 
5047c478bd9Sstevel@tonic-gate 	/*
5057c478bd9Sstevel@tonic-gate 	 * 32 and 64 bit data segments can actually share the same descriptor.
5067c478bd9Sstevel@tonic-gate 	 * In long mode only the present bit is checked but all other fields
5077c478bd9Sstevel@tonic-gate 	 * are loaded. But in compatibility mode all fields are interpreted
5087c478bd9Sstevel@tonic-gate 	 * as in legacy mode so they must be set correctly for a 32-bit data
5097c478bd9Sstevel@tonic-gate 	 * segment.
5107c478bd9Sstevel@tonic-gate 	 */
511ae115bc7Smrj 	set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
5127c478bd9Sstevel@tonic-gate 	    SDP_PAGES, SDP_OP32);
5137c478bd9Sstevel@tonic-gate 
514843e1988Sjohnlev #if !defined(__xpv)
515843e1988Sjohnlev 
5167c478bd9Sstevel@tonic-gate 	/*
5170baeff3dSrab 	 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
5180baeff3dSrab 	 * in the GDT is 0.
5197c478bd9Sstevel@tonic-gate 	 */
5207c478bd9Sstevel@tonic-gate 
5217c478bd9Sstevel@tonic-gate 	/*
5227c478bd9Sstevel@tonic-gate 	 * Kernel TSS
5237c478bd9Sstevel@tonic-gate 	 */
5240cfdb603Sjosephb 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
5250cfdb603Sjosephb 	    sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
5267c478bd9Sstevel@tonic-gate 
527843e1988Sjohnlev #endif	/* !__xpv */
528843e1988Sjohnlev 
5297c478bd9Sstevel@tonic-gate 	/*
5307c478bd9Sstevel@tonic-gate 	 * Initialize fs and gs descriptors for 32 bit processes.
5317c478bd9Sstevel@tonic-gate 	 * Only attributes and limits are initialized, the effective
5327c478bd9Sstevel@tonic-gate 	 * base address is programmed via fsbase/gsbase.
5337c478bd9Sstevel@tonic-gate 	 */
534ae115bc7Smrj 	set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
5357c478bd9Sstevel@tonic-gate 	    SEL_UPL, SDP_PAGES, SDP_OP32);
536ae115bc7Smrj 	set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
5377c478bd9Sstevel@tonic-gate 	    SEL_UPL, SDP_PAGES, SDP_OP32);
5387c478bd9Sstevel@tonic-gate 
5399acbbeafSnn 	/*
5409acbbeafSnn 	 * Initialize the descriptors set aside for brand usage.
5419acbbeafSnn 	 * Only attributes and limits are initialized.
5429acbbeafSnn 	 */
5439acbbeafSnn 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
544ae115bc7Smrj 		set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
5459acbbeafSnn 		    SEL_UPL, SDP_PAGES, SDP_OP32);
5469acbbeafSnn 
5477c478bd9Sstevel@tonic-gate 	/*
5487c478bd9Sstevel@tonic-gate 	 * Initialize convenient zero base user descriptors for clearing
5497c478bd9Sstevel@tonic-gate 	 * lwp private %fs and %gs descriptors in GDT. See setregs() for
5507c478bd9Sstevel@tonic-gate 	 * an example.
5517c478bd9Sstevel@tonic-gate 	 */
5527c478bd9Sstevel@tonic-gate 	set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
5537c478bd9Sstevel@tonic-gate 	    SDP_BYTES, SDP_OP32);
5547c478bd9Sstevel@tonic-gate 	set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
5557c478bd9Sstevel@tonic-gate 	    SDP_PAGES, SDP_OP32);
5567c478bd9Sstevel@tonic-gate }
5577c478bd9Sstevel@tonic-gate 
558843e1988Sjohnlev #if defined(__xpv)
559843e1988Sjohnlev 
560843e1988Sjohnlev static user_desc_t *
561843e1988Sjohnlev init_gdt(void)
562843e1988Sjohnlev {
563843e1988Sjohnlev 	uint64_t gdtpa;
564843e1988Sjohnlev 	ulong_t ma[1];		/* XXPV should be a memory_t */
565843e1988Sjohnlev 	ulong_t addr;
566843e1988Sjohnlev 
567843e1988Sjohnlev #if !defined(__lint)
568843e1988Sjohnlev 	/*
569843e1988Sjohnlev 	 * Our gdt is never larger than a single page.
570843e1988Sjohnlev 	 */
571843e1988Sjohnlev 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
572843e1988Sjohnlev #endif
573843e1988Sjohnlev 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
574843e1988Sjohnlev 	    PAGESIZE, PAGESIZE);
575843e1988Sjohnlev 	bzero(gdt0, PAGESIZE);
576843e1988Sjohnlev 
577843e1988Sjohnlev 	init_gdt_common(gdt0);
578843e1988Sjohnlev 
579843e1988Sjohnlev 	/*
580843e1988Sjohnlev 	 * XXX Since we never invoke kmdb until after the kernel takes
581843e1988Sjohnlev 	 * over the descriptor tables why not have it use the kernel's
582843e1988Sjohnlev 	 * selectors?
583843e1988Sjohnlev 	 */
584843e1988Sjohnlev 	if (boothowto & RB_DEBUG) {
585843e1988Sjohnlev 		set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
586843e1988Sjohnlev 		    SEL_KPL, SDP_PAGES, SDP_OP32);
587843e1988Sjohnlev 		set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
588843e1988Sjohnlev 		    SEL_KPL, SDP_PAGES, SDP_OP32);
589843e1988Sjohnlev 	}
590843e1988Sjohnlev 
591843e1988Sjohnlev 	/*
592843e1988Sjohnlev 	 * Clear write permission for page containing the gdt and install it.
593843e1988Sjohnlev 	 */
594843e1988Sjohnlev 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
595843e1988Sjohnlev 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
596843e1988Sjohnlev 	kbm_read_only((uintptr_t)gdt0, gdtpa);
597843e1988Sjohnlev 	xen_set_gdt(ma, NGDT);
598843e1988Sjohnlev 
599843e1988Sjohnlev 	/*
600843e1988Sjohnlev 	 * Reload the segment registers to use the new GDT.
601843e1988Sjohnlev 	 * On 64-bit, fixup KCS_SEL to be in ring 3.
602843e1988Sjohnlev 	 * See KCS_SEL in segments.h.
603843e1988Sjohnlev 	 */
604843e1988Sjohnlev 	load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
605843e1988Sjohnlev 
606843e1988Sjohnlev 	/*
607843e1988Sjohnlev 	 *  setup %gs for kernel
608843e1988Sjohnlev 	 */
609843e1988Sjohnlev 	xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
610843e1988Sjohnlev 
611843e1988Sjohnlev 	/*
612843e1988Sjohnlev 	 * XX64 We should never dereference off "other gsbase" or
613843e1988Sjohnlev 	 * "fsbase".  So, we should arrange to point FSBASE and
614843e1988Sjohnlev 	 * KGSBASE somewhere truly awful e.g. point it at the last
615843e1988Sjohnlev 	 * valid address below the hole so that any attempts to index
616843e1988Sjohnlev 	 * off them cause an exception.
617843e1988Sjohnlev 	 *
618843e1988Sjohnlev 	 * For now, point it at 8G -- at least it should be unmapped
619843e1988Sjohnlev 	 * until some 64-bit processes run.
620843e1988Sjohnlev 	 */
621843e1988Sjohnlev 	addr = 0x200000000ul;
622843e1988Sjohnlev 	xen_set_segment_base(SEGBASE_FS, addr);
623843e1988Sjohnlev 	xen_set_segment_base(SEGBASE_GS_USER, addr);
624843e1988Sjohnlev 	xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
625843e1988Sjohnlev 
626843e1988Sjohnlev 	return (gdt0);
627843e1988Sjohnlev }
628843e1988Sjohnlev 
629843e1988Sjohnlev #else	/* __xpv */
630843e1988Sjohnlev 
631ae115bc7Smrj static user_desc_t *
6327c478bd9Sstevel@tonic-gate init_gdt(void)
6337c478bd9Sstevel@tonic-gate {
6347c478bd9Sstevel@tonic-gate 	desctbr_t	r_bgdt, r_gdt;
6357c478bd9Sstevel@tonic-gate 	user_desc_t	*bgdt;
6367c478bd9Sstevel@tonic-gate 
637ae115bc7Smrj #if !defined(__lint)
6387c478bd9Sstevel@tonic-gate 	/*
639ae115bc7Smrj 	 * Our gdt is never larger than a single page.
640ae115bc7Smrj 	 */
641ae115bc7Smrj 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
642ae115bc7Smrj #endif
643ae115bc7Smrj 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
644ae115bc7Smrj 	    PAGESIZE, PAGESIZE);
645ae115bc7Smrj 	bzero(gdt0, PAGESIZE);
646ae115bc7Smrj 
647ae115bc7Smrj 	init_gdt_common(gdt0);
648ae115bc7Smrj 
649ae115bc7Smrj 	/*
650ae115bc7Smrj 	 * Copy in from boot's gdt to our gdt.
651ae115bc7Smrj 	 * Entry 0 is the null descriptor by definition.
6527c478bd9Sstevel@tonic-gate 	 */
6537c478bd9Sstevel@tonic-gate 	rd_gdtr(&r_bgdt);
6547c478bd9Sstevel@tonic-gate 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
6557c478bd9Sstevel@tonic-gate 	if (bgdt == NULL)
6567c478bd9Sstevel@tonic-gate 		panic("null boot gdt");
6577c478bd9Sstevel@tonic-gate 
658ae115bc7Smrj 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
659ae115bc7Smrj 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
660ae115bc7Smrj 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
661ae115bc7Smrj 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
662ae115bc7Smrj 	gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
663ae115bc7Smrj 
664ae115bc7Smrj 	/*
665ae115bc7Smrj 	 * Install our new GDT
666ae115bc7Smrj 	 */
667ae115bc7Smrj 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
668ae115bc7Smrj 	r_gdt.dtr_base = (uintptr_t)gdt0;
669ae115bc7Smrj 	wr_gdtr(&r_gdt);
670ae115bc7Smrj 
671ae115bc7Smrj 	/*
672ae115bc7Smrj 	 * Reload the segment registers to use the new GDT
673ae115bc7Smrj 	 */
674ae115bc7Smrj 	load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
675ae115bc7Smrj 
676ae115bc7Smrj 	/*
677ae115bc7Smrj 	 *  setup %gs for kernel
678ae115bc7Smrj 	 */
679ae115bc7Smrj 	wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
680ae115bc7Smrj 
681ae115bc7Smrj 	/*
682ae115bc7Smrj 	 * XX64 We should never dereference off "other gsbase" or
683ae115bc7Smrj 	 * "fsbase".  So, we should arrange to point FSBASE and
684ae115bc7Smrj 	 * KGSBASE somewhere truly awful e.g. point it at the last
685ae115bc7Smrj 	 * valid address below the hole so that any attempts to index
686ae115bc7Smrj 	 * off them cause an exception.
687ae115bc7Smrj 	 *
688ae115bc7Smrj 	 * For now, point it at 8G -- at least it should be unmapped
689ae115bc7Smrj 	 * until some 64-bit processes run.
690ae115bc7Smrj 	 */
691ae115bc7Smrj 	wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
692ae115bc7Smrj 	wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
693ae115bc7Smrj 	return (gdt0);
694ae115bc7Smrj }
695ae115bc7Smrj 
696843e1988Sjohnlev #endif	/* __xpv */
697843e1988Sjohnlev 
6987c478bd9Sstevel@tonic-gate 
6997c478bd9Sstevel@tonic-gate /*
7007c478bd9Sstevel@tonic-gate  * Build kernel IDT.
7017c478bd9Sstevel@tonic-gate  *
702ae115bc7Smrj  * Note that for amd64 we pretty much require every gate to be an interrupt
703ae115bc7Smrj  * gate which blocks interrupts atomically on entry; that's because of our
704ae115bc7Smrj  * dependency on using 'swapgs' every time we come into the kernel to find
705ae115bc7Smrj  * the cpu structure. If we get interrupted just before doing that, %cs could
706ae115bc7Smrj  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
707ae115bc7Smrj  * %gsbase is really still pointing at something in userland. Bad things will
708ae115bc7Smrj  * ensue. We also use interrupt gates for i386 as well even though this is not
709ae115bc7Smrj  * required for some traps.
7107c478bd9Sstevel@tonic-gate  *
7117c478bd9Sstevel@tonic-gate  * Perhaps they should have invented a trap gate that does an atomic swapgs?
7127c478bd9Sstevel@tonic-gate  */
7137c478bd9Sstevel@tonic-gate static void
714ae115bc7Smrj init_idt_common(gate_desc_t *idt)
7157c478bd9Sstevel@tonic-gate {
71674ecdb51SJohn Levon 	set_gatesegd(&idt[T_ZERODIV],
71774ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_div0trap : &div0trap,
71874ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ZERODIV));
71974ecdb51SJohn Levon 	set_gatesegd(&idt[T_SGLSTP],
72074ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_dbgtrap : &dbgtrap,
72174ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SGLSTP));
72274ecdb51SJohn Levon 	set_gatesegd(&idt[T_NMIFLT],
72374ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_nmiint : &nmiint,
72474ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NMIFLT));
72574ecdb51SJohn Levon 	set_gatesegd(&idt[T_BPTFLT],
72674ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_brktrap : &brktrap,
72774ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_BPTFLT));
72874ecdb51SJohn Levon 	set_gatesegd(&idt[T_OVFLW],
72974ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_ovflotrap : &ovflotrap,
73074ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_OVFLW));
73174ecdb51SJohn Levon 	set_gatesegd(&idt[T_BOUNDFLT],
73274ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_boundstrap : &boundstrap,
73374ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_BOUNDFLT));
73474ecdb51SJohn Levon 	set_gatesegd(&idt[T_ILLINST],
73574ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_invoptrap : &invoptrap,
73674ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ILLINST));
73774ecdb51SJohn Levon 	set_gatesegd(&idt[T_NOEXTFLT],
73874ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_ndptrap : &ndptrap,
73974ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NOEXTFLT));
7407c478bd9Sstevel@tonic-gate 
7417c478bd9Sstevel@tonic-gate 	/*
7427c478bd9Sstevel@tonic-gate 	 * double fault handler.
743843e1988Sjohnlev 	 *
744843e1988Sjohnlev 	 * Note that on the hypervisor a guest does not receive #df faults.
745843e1988Sjohnlev 	 * Instead a failsafe event is injected into the guest if its selectors
746843e1988Sjohnlev 	 * and/or stack is in a broken state. See xen_failsafe_callback.
7477c478bd9Sstevel@tonic-gate 	 */
748843e1988Sjohnlev #if !defined(__xpv)
74974ecdb51SJohn Levon 	set_gatesegd(&idt[T_DBLFLT],
75074ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_syserrtrap : &syserrtrap,
75174ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_DBLFLT));
752843e1988Sjohnlev #endif	/* !__xpv */
7537c478bd9Sstevel@tonic-gate 
7547c478bd9Sstevel@tonic-gate 	/*
755ae115bc7Smrj 	 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
7567c478bd9Sstevel@tonic-gate 	 */
75774ecdb51SJohn Levon 	set_gatesegd(&idt[T_TSSFLT],
75874ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_invtsstrap : &invtsstrap,
75974ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_TSSFLT));
76074ecdb51SJohn Levon 	set_gatesegd(&idt[T_SEGFLT],
76174ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_segnptrap : &segnptrap,
76274ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SEGFLT));
76374ecdb51SJohn Levon 	set_gatesegd(&idt[T_STKFLT],
76474ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_stktrap : &stktrap,
76574ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_STKFLT));
76674ecdb51SJohn Levon 	set_gatesegd(&idt[T_GPFLT],
76774ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_gptrap : &gptrap,
76874ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_GPFLT));
76974ecdb51SJohn Levon 	set_gatesegd(&idt[T_PGFLT],
77074ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_pftrap : &pftrap,
77174ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_PGFLT));
77274ecdb51SJohn Levon 	set_gatesegd(&idt[T_EXTERRFLT],
77374ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_ndperr : &ndperr,
77474ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_EXTERRFLT));
77574ecdb51SJohn Levon 	set_gatesegd(&idt[T_ALIGNMENT],
77674ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_achktrap : &achktrap,
77774ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ALIGNMENT));
77874ecdb51SJohn Levon 	set_gatesegd(&idt[T_MCE],
77974ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_mcetrap : &mcetrap,
78074ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_MCE));
78174ecdb51SJohn Levon 	set_gatesegd(&idt[T_SIMDFPE],
78274ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_xmtrap : &xmtrap,
78374ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE));
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate 	/*
7867c478bd9Sstevel@tonic-gate 	 * install fast trap handler at 210.
7877c478bd9Sstevel@tonic-gate 	 */
78874ecdb51SJohn Levon 	set_gatesegd(&idt[T_FASTTRAP],
78974ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_fasttrap : &fasttrap,
79074ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_FASTTRAP));
7917c478bd9Sstevel@tonic-gate 
7927c478bd9Sstevel@tonic-gate 	/*
7937c478bd9Sstevel@tonic-gate 	 * System call handler.
7947c478bd9Sstevel@tonic-gate 	 */
79574ecdb51SJohn Levon 	set_gatesegd(&idt[T_SYSCALLINT],
79674ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_sys_syscall_int : &sys_syscall_int,
79774ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT));
7987c478bd9Sstevel@tonic-gate 
7997c478bd9Sstevel@tonic-gate 	/*
800f498645aSahl 	 * Install the DTrace interrupt handler for the pid provider.
8017c478bd9Sstevel@tonic-gate 	 */
80274ecdb51SJohn Levon 	set_gatesegd(&idt[T_DTRACE_RET],
80374ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_dtrace_ret : &dtrace_ret,
80474ecdb51SJohn Levon 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET));
8057c478bd9Sstevel@tonic-gate 
8069acbbeafSnn 	/*
807eb5a5c78SSurya Prakki 	 * Prepare interposing descriptor for the syscall handler
808eb5a5c78SSurya Prakki 	 * and cache copy of the default descriptor.
8099acbbeafSnn 	 */
810eb5a5c78SSurya Prakki 	brand_tbl[0].ih_inum = T_SYSCALLINT;
811eb5a5c78SSurya Prakki 	brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
812ae115bc7Smrj 
81374ecdb51SJohn Levon 	set_gatesegd(&(brand_tbl[0].ih_interp_desc),
81474ecdb51SJohn Levon 	    (kpti_enable == 1) ? &tr_brand_sys_syscall_int :
81574ecdb51SJohn Levon 	    &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL,
81674ecdb51SJohn Levon 	    idt_vector_to_ist(T_SYSCALLINT));
8179acbbeafSnn 
818eb5a5c78SSurya Prakki 	brand_tbl[1].ih_inum = 0;
8197c478bd9Sstevel@tonic-gate }
8207c478bd9Sstevel@tonic-gate 
821843e1988Sjohnlev #if defined(__xpv)
822843e1988Sjohnlev 
823843e1988Sjohnlev static void
824843e1988Sjohnlev init_idt(gate_desc_t *idt)
825843e1988Sjohnlev {
826843e1988Sjohnlev 	init_idt_common(idt);
827843e1988Sjohnlev }
828843e1988Sjohnlev 
829843e1988Sjohnlev #else	/* __xpv */
830843e1988Sjohnlev 
8317c478bd9Sstevel@tonic-gate static void
832ae115bc7Smrj init_idt(gate_desc_t *idt)
8337c478bd9Sstevel@tonic-gate {
8347c478bd9Sstevel@tonic-gate 	char	ivctname[80];
8357c478bd9Sstevel@tonic-gate 	void	(*ivctptr)(void);
8367c478bd9Sstevel@tonic-gate 	int	i;
8377c478bd9Sstevel@tonic-gate 
8387c478bd9Sstevel@tonic-gate 	/*
8397c478bd9Sstevel@tonic-gate 	 * Initialize entire table with 'reserved' trap and then overwrite
8407c478bd9Sstevel@tonic-gate 	 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
8417c478bd9Sstevel@tonic-gate 	 * since it can only be generated on a 386 processor. 15 is also
8427c478bd9Sstevel@tonic-gate 	 * unsupported and reserved.
8437c478bd9Sstevel@tonic-gate 	 */
84474ecdb51SJohn Levon #if !defined(__xpv)
84574ecdb51SJohn Levon 	for (i = 0; i < NIDT; i++) {
84674ecdb51SJohn Levon 		set_gatesegd(&idt[i],
84774ecdb51SJohn Levon 		    (kpti_enable == 1) ? &tr_resvtrap : &resvtrap,
84874ecdb51SJohn Levon 		    KCS_SEL, SDT_SYSIGT, TRP_KPL,
84974ecdb51SJohn Levon 		    idt_vector_to_ist(T_RESVTRAP));
85074ecdb51SJohn Levon 	}
85174ecdb51SJohn Levon #else
85274ecdb51SJohn Levon 	for (i = 0; i < NIDT; i++) {
8539844da31SSeth Goldberg 		set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
85474ecdb51SJohn Levon 		    IST_NONE);
85574ecdb51SJohn Levon 	}
85674ecdb51SJohn Levon #endif
8577c478bd9Sstevel@tonic-gate 
8587c478bd9Sstevel@tonic-gate 	/*
8597c478bd9Sstevel@tonic-gate 	 * 20-31 reserved
8607c478bd9Sstevel@tonic-gate 	 */
86174ecdb51SJohn Levon #if !defined(__xpv)
86274ecdb51SJohn Levon 	for (i = 20; i < 32; i++) {
86374ecdb51SJohn Levon 		set_gatesegd(&idt[i],
86474ecdb51SJohn Levon 		    (kpti_enable == 1) ? &tr_invaltrap : &invaltrap,
86574ecdb51SJohn Levon 		    KCS_SEL, SDT_SYSIGT, TRP_KPL,
86674ecdb51SJohn Levon 		    idt_vector_to_ist(T_INVALTRAP));
86774ecdb51SJohn Levon 	}
86874ecdb51SJohn Levon #else
86974ecdb51SJohn Levon 	for (i = 20; i < 32; i++) {
8709844da31SSeth Goldberg 		set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
87174ecdb51SJohn Levon 		    IST_NONE);
87274ecdb51SJohn Levon 	}
87374ecdb51SJohn Levon #endif
8747c478bd9Sstevel@tonic-gate 
8757c478bd9Sstevel@tonic-gate 	/*
8767c478bd9Sstevel@tonic-gate 	 * interrupts 32 - 255
8777c478bd9Sstevel@tonic-gate 	 */
8787c478bd9Sstevel@tonic-gate 	for (i = 32; i < 256; i++) {
87974ecdb51SJohn Levon #if !defined(__xpv)
88074ecdb51SJohn Levon 		(void) snprintf(ivctname, sizeof (ivctname),
88174ecdb51SJohn Levon 		    (kpti_enable == 1) ? "tr_ivct%d" : "ivct%d", i);
88274ecdb51SJohn Levon #else
8837c478bd9Sstevel@tonic-gate 		(void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
88474ecdb51SJohn Levon #endif
8857c478bd9Sstevel@tonic-gate 		ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
8867c478bd9Sstevel@tonic-gate 		if (ivctptr == NULL)
8877c478bd9Sstevel@tonic-gate 			panic("kobj_getsymvalue(%s) failed", ivctname);
8887c478bd9Sstevel@tonic-gate 
88974ecdb51SJohn Levon 		set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
89074ecdb51SJohn Levon 		    idt_vector_to_ist(i));
8917c478bd9Sstevel@tonic-gate 	}
8927c478bd9Sstevel@tonic-gate 
8939acbbeafSnn 	/*
894ae115bc7Smrj 	 * Now install the common ones. Note that it will overlay some
895ae115bc7Smrj 	 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
8967c478bd9Sstevel@tonic-gate 	 */
897ae115bc7Smrj 	init_idt_common(idt);
8987c478bd9Sstevel@tonic-gate }
8997c478bd9Sstevel@tonic-gate 
900843e1988Sjohnlev #endif	/* __xpv */
901843e1988Sjohnlev 
9027c478bd9Sstevel@tonic-gate /*
9030baeff3dSrab  * The kernel does not deal with LDTs unless a user explicitly creates
9040baeff3dSrab  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
9050baeff3dSrab  * to reference the LDT will therefore cause a #gp. System calls made via the
9060baeff3dSrab  * obsolete lcall mechanism are emulated by the #gp fault handler.
9077c478bd9Sstevel@tonic-gate  */
9087c478bd9Sstevel@tonic-gate static void
9097c478bd9Sstevel@tonic-gate init_ldt(void)
9107c478bd9Sstevel@tonic-gate {
911843e1988Sjohnlev #if defined(__xpv)
912843e1988Sjohnlev 	xen_set_ldt(NULL, 0);
913843e1988Sjohnlev #else
9140baeff3dSrab 	wr_ldtr(0);
915843e1988Sjohnlev #endif
9167c478bd9Sstevel@tonic-gate }
9177c478bd9Sstevel@tonic-gate 
918843e1988Sjohnlev #if !defined(__xpv)
9197c478bd9Sstevel@tonic-gate 
9207c478bd9Sstevel@tonic-gate static void
9217c478bd9Sstevel@tonic-gate init_tss(void)
9227c478bd9Sstevel@tonic-gate {
92374ecdb51SJohn Levon 	extern struct cpu cpus[];
9247c478bd9Sstevel@tonic-gate 
9257c478bd9Sstevel@tonic-gate 	/*
92674ecdb51SJohn Levon 	 * tss_rsp0 is dynamically filled in by resume() (in swtch.s) on each
92774ecdb51SJohn Levon 	 * context switch but it'll be overwritten with this same value anyway.
9287c478bd9Sstevel@tonic-gate 	 */
92974ecdb51SJohn Levon 	if (kpti_enable == 1) {
93074ecdb51SJohn Levon 		ktss0->tss_rsp0 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
93174ecdb51SJohn Levon 	}
9327c478bd9Sstevel@tonic-gate 
93374ecdb51SJohn Levon 	/* Set up the IST stacks for double fault, NMI, MCE. */
93474ecdb51SJohn Levon 	ktss0->tss_ist1 = (uintptr_t)&dblfault_stack0[sizeof (dblfault_stack0)];
93574ecdb51SJohn Levon 	ktss0->tss_ist2 = (uintptr_t)&nmi_stack0[sizeof (nmi_stack0)];
93674ecdb51SJohn Levon 	ktss0->tss_ist3 = (uintptr_t)&mce_stack0[sizeof (mce_stack0)];
9377c478bd9Sstevel@tonic-gate 
9387c478bd9Sstevel@tonic-gate 	/*
93974ecdb51SJohn Levon 	 * This IST stack is used for #DB,#BP (debug) interrupts (when KPTI is
94074ecdb51SJohn Levon 	 * enabled), and also for KDI (always).
9417c478bd9Sstevel@tonic-gate 	 */
94274ecdb51SJohn Levon 	ktss0->tss_ist4 = (uint64_t)&cpus->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;
9437c478bd9Sstevel@tonic-gate 
94474ecdb51SJohn Levon 	if (kpti_enable == 1) {
94574ecdb51SJohn Levon 		/* This IST stack is used for #GP,#PF,#SS (fault) interrupts. */
94674ecdb51SJohn Levon 		ktss0->tss_ist5 =
94774ecdb51SJohn Levon 		    (uint64_t)&cpus->cpu_m.mcpu_kpti_flt.kf_tr_rsp;
9487c478bd9Sstevel@tonic-gate 
94974ecdb51SJohn Levon 		/* This IST stack is used for all other intrs (for KPTI). */
95074ecdb51SJohn Levon 		ktss0->tss_ist6 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
95174ecdb51SJohn Levon 	}
9527c478bd9Sstevel@tonic-gate 
9537c478bd9Sstevel@tonic-gate 	/*
9547c478bd9Sstevel@tonic-gate 	 * Set I/O bit map offset equal to size of TSS segment limit
9557c478bd9Sstevel@tonic-gate 	 * for no I/O permission map. This will force all user I/O
9567c478bd9Sstevel@tonic-gate 	 * instructions to generate #gp fault.
9577c478bd9Sstevel@tonic-gate 	 */
9580cfdb603Sjosephb 	ktss0->tss_bitmapbase = sizeof (*ktss0);
9597c478bd9Sstevel@tonic-gate 
9607c478bd9Sstevel@tonic-gate 	/*
9617c478bd9Sstevel@tonic-gate 	 * Point %tr to descriptor for ktss0 in gdt.
9627c478bd9Sstevel@tonic-gate 	 */
9637c478bd9Sstevel@tonic-gate 	wr_tsr(KTSS_SEL);
9647c478bd9Sstevel@tonic-gate }
9657c478bd9Sstevel@tonic-gate 
966843e1988Sjohnlev #endif	/* !__xpv */
967843e1988Sjohnlev 
968843e1988Sjohnlev #if defined(__xpv)
969843e1988Sjohnlev 
970843e1988Sjohnlev void
971843e1988Sjohnlev init_desctbls(void)
972843e1988Sjohnlev {
973843e1988Sjohnlev 	uint_t vec;
974843e1988Sjohnlev 	user_desc_t *gdt;
975843e1988Sjohnlev 
976843e1988Sjohnlev 	/*
977843e1988Sjohnlev 	 * Setup and install our GDT.
978843e1988Sjohnlev 	 */
979843e1988Sjohnlev 	gdt = init_gdt();
980843e1988Sjohnlev 
981843e1988Sjohnlev 	/*
982843e1988Sjohnlev 	 * Store static pa of gdt to speed up pa_to_ma() translations
983843e1988Sjohnlev 	 * on lwp context switches.
984843e1988Sjohnlev 	 */
985843e1988Sjohnlev 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
9860cfdb603Sjosephb 	CPU->cpu_gdt = gdt;
987843e1988Sjohnlev 	CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
988843e1988Sjohnlev 
989843e1988Sjohnlev 	/*
990843e1988Sjohnlev 	 * Setup and install our IDT.
991843e1988Sjohnlev 	 */
9920cfdb603Sjosephb #if !defined(__lint)
9930cfdb603Sjosephb 	ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
9940cfdb603Sjosephb #endif
9950cfdb603Sjosephb 	idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
9960cfdb603Sjosephb 	    PAGESIZE, PAGESIZE);
9979844da31SSeth Goldberg 	bzero(idt0, PAGESIZE);
9980cfdb603Sjosephb 	init_idt(idt0);
999843e1988Sjohnlev 	for (vec = 0; vec < NIDT; vec++)
1000843e1988Sjohnlev 		xen_idt_write(&idt0[vec], vec);
1001843e1988Sjohnlev 
10020cfdb603Sjosephb 	CPU->cpu_idt = idt0;
1003843e1988Sjohnlev 
1004843e1988Sjohnlev 	/*
1005843e1988Sjohnlev 	 * set default kernel stack
1006843e1988Sjohnlev 	 */
1007843e1988Sjohnlev 	xen_stack_switch(KDS_SEL,
1008843e1988Sjohnlev 	    (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1009843e1988Sjohnlev 
1010843e1988Sjohnlev 	xen_init_callbacks();
1011843e1988Sjohnlev 
1012843e1988Sjohnlev 	init_ldt();
1013843e1988Sjohnlev }
1014843e1988Sjohnlev 
1015843e1988Sjohnlev #else	/* __xpv */
10167c478bd9Sstevel@tonic-gate 
10177c478bd9Sstevel@tonic-gate void
1018ae115bc7Smrj init_desctbls(void)
10197c478bd9Sstevel@tonic-gate {
1020ae115bc7Smrj 	user_desc_t *gdt;
1021ae115bc7Smrj 	desctbr_t idtr;
1022ae115bc7Smrj 
10230cfdb603Sjosephb 	/*
10240cfdb603Sjosephb 	 * Allocate IDT and TSS structures on unique pages for better
10250cfdb603Sjosephb 	 * performance in virtual machines.
10260cfdb603Sjosephb 	 */
10270cfdb603Sjosephb #if !defined(__lint)
10280cfdb603Sjosephb 	ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
10290cfdb603Sjosephb #endif
10300cfdb603Sjosephb 	idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
10310cfdb603Sjosephb 	    PAGESIZE, PAGESIZE);
10329844da31SSeth Goldberg 	bzero(idt0, PAGESIZE);
10330cfdb603Sjosephb #if !defined(__lint)
10340cfdb603Sjosephb 	ASSERT(sizeof (*ktss0) <= PAGESIZE);
10350cfdb603Sjosephb #endif
1036f16a0f4cSRobert Mustacchi 	ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
10370cfdb603Sjosephb 	    PAGESIZE, PAGESIZE);
10389844da31SSeth Goldberg 	bzero(ktss0, PAGESIZE);
10390cfdb603Sjosephb 
10400cfdb603Sjosephb 
1041ae115bc7Smrj 	/*
1042ae115bc7Smrj 	 * Setup and install our GDT.
1043ae115bc7Smrj 	 */
1044ae115bc7Smrj 	gdt = init_gdt();
1045ae115bc7Smrj 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
10460cfdb603Sjosephb 	CPU->cpu_gdt = gdt;
1047ae115bc7Smrj 
104874ecdb51SJohn Levon 	/*
104974ecdb51SJohn Levon 	 * Initialize this CPU's LDT.
105074ecdb51SJohn Levon 	 */
105174ecdb51SJohn Levon 	CPU->cpu_m.mcpu_ldt = BOP_ALLOC(bootops, (caddr_t)LDT_VA,
105274ecdb51SJohn Levon 	    LDT_CPU_SIZE, PAGESIZE);
105374ecdb51SJohn Levon 	bzero(CPU->cpu_m.mcpu_ldt, LDT_CPU_SIZE);
105474ecdb51SJohn Levon 	CPU->cpu_m.mcpu_ldt_len = 0;
105574ecdb51SJohn Levon 
1056ae115bc7Smrj 	/*
1057ae115bc7Smrj 	 * Setup and install our IDT.
1058ae115bc7Smrj 	 */
10590cfdb603Sjosephb 	init_idt(idt0);
1060ae115bc7Smrj 
1061ae115bc7Smrj 	idtr.dtr_base = (uintptr_t)idt0;
10620cfdb603Sjosephb 	idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1063ae115bc7Smrj 	wr_idtr(&idtr);
10640cfdb603Sjosephb 	CPU->cpu_idt = idt0;
1065ae115bc7Smrj 
1066ae115bc7Smrj 
10677c478bd9Sstevel@tonic-gate 	init_tss();
10680cfdb603Sjosephb 	CPU->cpu_tss = ktss0;
10697c478bd9Sstevel@tonic-gate 	init_ldt();
107074ecdb51SJohn Levon 
107174ecdb51SJohn Levon 	/* Stash this so that the NMI,MCE,#DF and KDI handlers can use it. */
107274ecdb51SJohn Levon 	kpti_safe_cr3 = (uint64_t)getcr3();
10737c478bd9Sstevel@tonic-gate }
10749acbbeafSnn 
1075843e1988Sjohnlev #endif	/* __xpv */
1076843e1988Sjohnlev 
1077309b04b8SJohn Levon #ifndef __xpv
1078309b04b8SJohn Levon /*
1079309b04b8SJohn Levon  * As per Intel Vol 3 27.5.2, the GDTR limit is reset to 64Kb on a VM exit, so
1080309b04b8SJohn Levon  * we have to manually fix it up ourselves.
1081309b04b8SJohn Levon  *
1082309b04b8SJohn Levon  * The caller may still need to make sure that it can't go off-CPU with the
1083309b04b8SJohn Levon  * incorrect limit, before calling this (such as disabling pre-emption).
1084309b04b8SJohn Levon  */
1085309b04b8SJohn Levon void
1086309b04b8SJohn Levon reset_gdtr_limit(void)
1087309b04b8SJohn Levon {
1088309b04b8SJohn Levon 	ulong_t flags = intr_clear();
1089309b04b8SJohn Levon 	desctbr_t gdtr;
1090309b04b8SJohn Levon 
1091309b04b8SJohn Levon 	rd_gdtr(&gdtr);
1092309b04b8SJohn Levon 	gdtr.dtr_limit = (sizeof (user_desc_t) * NGDT) - 1;
1093309b04b8SJohn Levon 	wr_gdtr(&gdtr);
1094309b04b8SJohn Levon 
1095309b04b8SJohn Levon 	intr_restore(flags);
1096309b04b8SJohn Levon }
1097309b04b8SJohn Levon #endif /* __xpv */
1098309b04b8SJohn Levon 
1099ae115bc7Smrj /*
1100ae115bc7Smrj  * In the early kernel, we need to set up a simple GDT to run on.
1101843e1988Sjohnlev  *
1102843e1988Sjohnlev  * XXPV	Can dboot use this too?  See dboot_gdt.s
1103ae115bc7Smrj  */
1104ae115bc7Smrj void
1105ae115bc7Smrj init_boot_gdt(user_desc_t *bgdt)
1106ae115bc7Smrj {
1107ae115bc7Smrj 	set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1108ae115bc7Smrj 	    SDP_PAGES, SDP_OP32);
1109ae115bc7Smrj 	set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1110ae115bc7Smrj 	    SDP_PAGES, SDP_OP32);
1111ae115bc7Smrj }
1112ae115bc7Smrj 
11139acbbeafSnn /*
11149acbbeafSnn  * Enable interpositioning on the system call path by rewriting the
11159acbbeafSnn  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
11169acbbeafSnn  * the branded entry points.
11179acbbeafSnn  */
11189acbbeafSnn void
11199acbbeafSnn brand_interpositioning_enable(void)
11209acbbeafSnn {
1121843e1988Sjohnlev 	gate_desc_t	*idt = CPU->cpu_idt;
1122027bcc9fSToomas Soome 	int		i;
11239acbbeafSnn 
1124843e1988Sjohnlev 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1125843e1988Sjohnlev 
1126843e1988Sjohnlev 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1127843e1988Sjohnlev 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1128843e1988Sjohnlev #if defined(__xpv)
1129843e1988Sjohnlev 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1130843e1988Sjohnlev 		    brand_tbl[i].ih_inum);
1131843e1988Sjohnlev #endif
1132843e1988Sjohnlev 	}
11339acbbeafSnn 
1134843e1988Sjohnlev #if defined(__xpv)
1135843e1988Sjohnlev 
1136843e1988Sjohnlev 	/*
1137843e1988Sjohnlev 	 * Currently the hypervisor only supports 64-bit syscalls via
1138843e1988Sjohnlev 	 * syscall instruction. The 32-bit syscalls are handled by
1139843e1988Sjohnlev 	 * interrupt gate above.
1140843e1988Sjohnlev 	 */
1141843e1988Sjohnlev 	xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1142843e1988Sjohnlev 	    CALLBACKF_mask_events);
1143843e1988Sjohnlev 
1144843e1988Sjohnlev #else
1145843e1988Sjohnlev 
11467417cfdeSKuriakose Kuruvilla 	if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
114774ecdb51SJohn Levon 		if (kpti_enable == 1) {
114874ecdb51SJohn Levon 			wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_brand_sys_syscall);
114974ecdb51SJohn Levon 			wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_brand_sys_syscall32);
115074ecdb51SJohn Levon 		} else {
115174ecdb51SJohn Levon 			wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
115274ecdb51SJohn Levon 			wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
115374ecdb51SJohn Levon 		}
1154843e1988Sjohnlev 	}
1155843e1988Sjohnlev 
11569acbbeafSnn #endif
11579acbbeafSnn 
115874ecdb51SJohn Levon 	if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
115974ecdb51SJohn Levon 		if (kpti_enable == 1) {
116074ecdb51SJohn Levon 			wrmsr(MSR_INTC_SEP_EIP,
116174ecdb51SJohn Levon 			    (uintptr_t)tr_brand_sys_sysenter);
116274ecdb51SJohn Levon 		} else {
116374ecdb51SJohn Levon 			wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
116474ecdb51SJohn Levon 		}
116574ecdb51SJohn Levon 	}
11669acbbeafSnn }
11679acbbeafSnn 
11689acbbeafSnn /*
11699acbbeafSnn  * Disable interpositioning on the system call path by rewriting the
11709acbbeafSnn  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
11719acbbeafSnn  * the standard entry points, which bypass the interpositioning hooks.
11729acbbeafSnn  */
11739acbbeafSnn void
11749acbbeafSnn brand_interpositioning_disable(void)
11759acbbeafSnn {
1176843e1988Sjohnlev 	gate_desc_t	*idt = CPU->cpu_idt;
11779acbbeafSnn 	int i;
11789acbbeafSnn 
1179843e1988Sjohnlev 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1180843e1988Sjohnlev 
1181843e1988Sjohnlev 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1182843e1988Sjohnlev 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1183843e1988Sjohnlev #if defined(__xpv)
1184843e1988Sjohnlev 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1185843e1988Sjohnlev 		    brand_tbl[i].ih_inum);
1186843e1988Sjohnlev #endif
1187843e1988Sjohnlev 	}
11889acbbeafSnn 
1189843e1988Sjohnlev #if defined(__xpv)
1190843e1988Sjohnlev 
1191843e1988Sjohnlev 	/*
1192843e1988Sjohnlev 	 * See comment above in brand_interpositioning_enable.
1193843e1988Sjohnlev 	 */
1194843e1988Sjohnlev 	xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1195843e1988Sjohnlev 	    CALLBACKF_mask_events);
1196843e1988Sjohnlev 
1197843e1988Sjohnlev #else
1198843e1988Sjohnlev 
11997417cfdeSKuriakose Kuruvilla 	if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
120074ecdb51SJohn Levon 		if (kpti_enable == 1) {
120174ecdb51SJohn Levon 			wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_sys_syscall);
120274ecdb51SJohn Levon 			wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_sys_syscall32);
120374ecdb51SJohn Levon 		} else {
120474ecdb51SJohn Levon 			wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
120574ecdb51SJohn Levon 			wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
120674ecdb51SJohn Levon 		}
1207843e1988Sjohnlev 	}
1208843e1988Sjohnlev 
12099acbbeafSnn #endif
12109acbbeafSnn 
121174ecdb51SJohn Levon 	if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
121274ecdb51SJohn Levon 		if (kpti_enable == 1) {
121374ecdb51SJohn Levon 			wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)tr_sys_sysenter);
121474ecdb51SJohn Levon 		} else {
121574ecdb51SJohn Levon 			wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
121674ecdb51SJohn Levon 		}
121774ecdb51SJohn Levon 	}
12189acbbeafSnn }
1219