17c478bd9Sstevel@tonic-gate /*
2ae115bc7Smrj * CDDL HEADER START
3ae115bc7Smrj *
4ae115bc7Smrj * The contents of this file are subject to the terms of the
5ae115bc7Smrj * Common Development and Distribution License (the "License").
6ae115bc7Smrj * You may not use this file except in compliance with the License.
7ae115bc7Smrj *
8ae115bc7Smrj * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ae115bc7Smrj * or http://www.opensolaris.org/os/licensing.
10ae115bc7Smrj * See the License for the specific language governing permissions
11ae115bc7Smrj * and limitations under the License.
12ae115bc7Smrj *
13ae115bc7Smrj * When distributing Covered Code, include this CDDL HEADER in each
14ae115bc7Smrj * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ae115bc7Smrj * If applicable, add the following below this CDDL HEADER, with the
16ae115bc7Smrj * fields enclosed by brackets "[]" replaced with your own identifying
17ae115bc7Smrj * information: Portions Copyright [yyyy] [name of copyright owner]
18ae115bc7Smrj *
19ae115bc7Smrj * CDDL HEADER END
20ae115bc7Smrj */
21ae115bc7Smrj
22ae115bc7Smrj /*
23eb5a5c78SSurya Prakki * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
26f16a0f4cSRobert Mustacchi /*
2774ecdb51SJohn Levon * Copyright 2018 Joyent, Inc. All rights reserved.
280ea62e6fSDan Cross * Copyright 2022 Oxide Computer Compnay
29f16a0f4cSRobert Mustacchi */
30f16a0f4cSRobert Mustacchi
317c478bd9Sstevel@tonic-gate /*
327c478bd9Sstevel@tonic-gate * Copyright (c) 1992 Terrence R. Lambert.
337c478bd9Sstevel@tonic-gate * Copyright (c) 1990 The Regents of the University of California.
347c478bd9Sstevel@tonic-gate * All rights reserved.
357c478bd9Sstevel@tonic-gate *
367c478bd9Sstevel@tonic-gate * This code is derived from software contributed to Berkeley by
377c478bd9Sstevel@tonic-gate * William Jolitz.
387c478bd9Sstevel@tonic-gate *
397c478bd9Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without
407c478bd9Sstevel@tonic-gate * modification, are permitted provided that the following conditions
417c478bd9Sstevel@tonic-gate * are met:
427c478bd9Sstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright
437c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer.
447c478bd9Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright
457c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer in the
467c478bd9Sstevel@tonic-gate * documentation and/or other materials provided with the distribution.
477c478bd9Sstevel@tonic-gate * 3. All advertising materials mentioning features or use of this software
487c478bd9Sstevel@tonic-gate * must display the following acknowledgement:
497c478bd9Sstevel@tonic-gate * This product includes software developed by the University of
507c478bd9Sstevel@tonic-gate * California, Berkeley and its contributors.
517c478bd9Sstevel@tonic-gate * 4. Neither the name of the University nor the names of its contributors
527c478bd9Sstevel@tonic-gate * may be used to endorse or promote products derived from this software
537c478bd9Sstevel@tonic-gate * without specific prior written permission.
547c478bd9Sstevel@tonic-gate *
557c478bd9Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
567c478bd9Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
577c478bd9Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
587c478bd9Sstevel@tonic-gate * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
597c478bd9Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
607c478bd9Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
617c478bd9Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
627c478bd9Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
637c478bd9Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
647c478bd9Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
657c478bd9Sstevel@tonic-gate * SUCH DAMAGE.
667c478bd9Sstevel@tonic-gate *
677c478bd9Sstevel@tonic-gate * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91
687c478bd9Sstevel@tonic-gate */
697c478bd9Sstevel@tonic-gate
707c478bd9Sstevel@tonic-gate #include <sys/types.h>
71ae115bc7Smrj #include <sys/sysmacros.h>
727c478bd9Sstevel@tonic-gate #include <sys/tss.h>
737c478bd9Sstevel@tonic-gate #include <sys/segments.h>
747c478bd9Sstevel@tonic-gate #include <sys/trap.h>
757c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
76ae115bc7Smrj #include <sys/bootconf.h>
777c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h>
78ae115bc7Smrj #include <sys/controlregs.h>
797c478bd9Sstevel@tonic-gate #include <sys/archsystm.h>
807c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
817c478bd9Sstevel@tonic-gate #include <sys/kobj.h>
827c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
837c478bd9Sstevel@tonic-gate #include <sys/reboot.h>
847c478bd9Sstevel@tonic-gate #include <sys/kdi.h>
85ae115bc7Smrj #include <sys/mach_mmu.h>
860baeff3dSrab #include <sys/systm.h>
8774ecdb51SJohn Levon #include <sys/note.h>
88843e1988Sjohnlev
89843e1988Sjohnlev #ifdef __xpv
90843e1988Sjohnlev #include <sys/hypervisor.h>
91843e1988Sjohnlev #include <vm/as.h>
92843e1988Sjohnlev #endif
93843e1988Sjohnlev
94ae115bc7Smrj #include <sys/promif.h>
95ae115bc7Smrj #include <sys/bootinfo.h>
96ae115bc7Smrj #include <vm/kboot_mmu.h>
97843e1988Sjohnlev #include <vm/hat_pte.h>
987c478bd9Sstevel@tonic-gate
997c478bd9Sstevel@tonic-gate /*
1007c478bd9Sstevel@tonic-gate * cpu0 and default tables and structures.
1017c478bd9Sstevel@tonic-gate */
102ae115bc7Smrj user_desc_t *gdt0;
103843e1988Sjohnlev #if !defined(__xpv)
1047c478bd9Sstevel@tonic-gate desctbr_t gdt0_default_r;
105843e1988Sjohnlev #endif
1067c478bd9Sstevel@tonic-gate
107027bcc9fSToomas Soome gate_desc_t *idt0; /* interrupt descriptor table */
1087c478bd9Sstevel@tonic-gate
109f16a0f4cSRobert Mustacchi tss_t *ktss0; /* kernel task state structure */
1107c478bd9Sstevel@tonic-gate
1117c478bd9Sstevel@tonic-gate
1127c478bd9Sstevel@tonic-gate user_desc_t zero_udesc; /* base zero user desc native procs */
113843e1988Sjohnlev user_desc_t null_udesc; /* null user descriptor */
114843e1988Sjohnlev system_desc_t null_sdesc; /* null system descriptor */
1157c478bd9Sstevel@tonic-gate
1167c478bd9Sstevel@tonic-gate user_desc_t zero_u32desc; /* 32-bit compatibility procs */
1177c478bd9Sstevel@tonic-gate
118843e1988Sjohnlev user_desc_t ucs_on;
119843e1988Sjohnlev user_desc_t ucs_off;
120843e1988Sjohnlev user_desc_t ucs32_on;
121843e1988Sjohnlev user_desc_t ucs32_off;
122843e1988Sjohnlev
12374ecdb51SJohn Levon /*
12474ecdb51SJohn Levon * If the size of this is changed, you must update hat_pcp_setup() and the
12574ecdb51SJohn Levon * definitions in exception.s
12674ecdb51SJohn Levon */
12774ecdb51SJohn Levon extern char dblfault_stack0[DEFAULTSTKSZ];
12874ecdb51SJohn Levon extern char nmi_stack0[DEFAULTSTKSZ];
12974ecdb51SJohn Levon extern char mce_stack0[DEFAULTSTKSZ];
1307c478bd9Sstevel@tonic-gate
1317c478bd9Sstevel@tonic-gate extern void fast_null(void);
1327c478bd9Sstevel@tonic-gate extern hrtime_t get_hrtime(void);
1337c478bd9Sstevel@tonic-gate extern hrtime_t gethrvtime(void);
1347c478bd9Sstevel@tonic-gate extern hrtime_t get_hrestime(void);
1357c478bd9Sstevel@tonic-gate extern uint64_t getlgrp(void);
1367c478bd9Sstevel@tonic-gate
1377c478bd9Sstevel@tonic-gate void (*(fasttable[]))(void) = {
1387c478bd9Sstevel@tonic-gate fast_null, /* T_FNULL routine */
1397c478bd9Sstevel@tonic-gate fast_null, /* T_FGETFP routine (initially null) */
1407c478bd9Sstevel@tonic-gate fast_null, /* T_FSETFP routine (initially null) */
141027bcc9fSToomas Soome (void (*)())(uintptr_t)get_hrtime, /* T_GETHRTIME */
142027bcc9fSToomas Soome (void (*)())(uintptr_t)gethrvtime, /* T_GETHRVTIME */
143027bcc9fSToomas Soome (void (*)())(uintptr_t)get_hrestime, /* T_GETHRESTIME */
144027bcc9fSToomas Soome (void (*)())(uintptr_t)getlgrp /* T_GETLGRP */
1457c478bd9Sstevel@tonic-gate };
1467c478bd9Sstevel@tonic-gate
1479acbbeafSnn /*
1489acbbeafSnn * Structure containing pre-computed descriptors to allow us to temporarily
1499acbbeafSnn * interpose on a standard handler.
1509acbbeafSnn */
1519acbbeafSnn struct interposing_handler {
1529acbbeafSnn int ih_inum;
1539acbbeafSnn gate_desc_t ih_interp_desc;
1549acbbeafSnn gate_desc_t ih_default_desc;
1559acbbeafSnn };
1569acbbeafSnn
1579acbbeafSnn /*
1589acbbeafSnn * The brand infrastructure interposes on two handlers, and we use one as a
1599acbbeafSnn * NULL signpost.
1609acbbeafSnn */
161eb5a5c78SSurya Prakki static struct interposing_handler brand_tbl[2];
1629acbbeafSnn
1637c478bd9Sstevel@tonic-gate /*
1647c478bd9Sstevel@tonic-gate * software prototypes for default local descriptor table
1657c478bd9Sstevel@tonic-gate */
1667c478bd9Sstevel@tonic-gate
1677c478bd9Sstevel@tonic-gate /*
1687c478bd9Sstevel@tonic-gate * Routines for loading segment descriptors in format the hardware
1697c478bd9Sstevel@tonic-gate * can understand.
1707c478bd9Sstevel@tonic-gate */
1717c478bd9Sstevel@tonic-gate
1727c478bd9Sstevel@tonic-gate /*
1737c478bd9Sstevel@tonic-gate * In long mode we have the new L or long mode attribute bit
1747c478bd9Sstevel@tonic-gate * for code segments. Only the conforming bit in type is used along
1757c478bd9Sstevel@tonic-gate * with descriptor priority and present bits. Default operand size must
1767c478bd9Sstevel@tonic-gate * be zero when in long mode. In 32-bit compatibility mode all fields
1777c478bd9Sstevel@tonic-gate * are treated as in legacy mode. For data segments while in long mode
1787c478bd9Sstevel@tonic-gate * only the present bit is loaded.
1797c478bd9Sstevel@tonic-gate */
1807c478bd9Sstevel@tonic-gate void
set_usegd(user_desc_t * dp,uint_t lmode,void * base,uint32_t size,uint_t type,uint_t dpl,uint_t gran,uint_t defopsz)181*075343cbSDan Cross set_usegd(user_desc_t *dp, uint_t lmode, void *base, uint32_t size,
1827c478bd9Sstevel@tonic-gate uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
1837c478bd9Sstevel@tonic-gate {
1847c478bd9Sstevel@tonic-gate ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
185a0955b86SJohn Levon /* This should never be a "system" segment. */
186a0955b86SJohn Levon ASSERT3U(type & SDT_S, !=, 0);
187*075343cbSDan Cross ASSERT3P(dp, !=, NULL);
1887c478bd9Sstevel@tonic-gate
1897c478bd9Sstevel@tonic-gate /*
1907c478bd9Sstevel@tonic-gate * 64-bit long mode.
1917c478bd9Sstevel@tonic-gate */
1927c478bd9Sstevel@tonic-gate if (lmode == SDP_LONG)
1937c478bd9Sstevel@tonic-gate dp->usd_def32 = 0; /* 32-bit operands only */
1947c478bd9Sstevel@tonic-gate else
1957c478bd9Sstevel@tonic-gate /*
1967c478bd9Sstevel@tonic-gate * 32-bit compatibility mode.
1977c478bd9Sstevel@tonic-gate */
1987c478bd9Sstevel@tonic-gate dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */
1997c478bd9Sstevel@tonic-gate
200a0955b86SJohn Levon /*
201a0955b86SJohn Levon * We should always set the "accessed" bit (SDT_A), otherwise the CPU
202a0955b86SJohn Levon * will write to the GDT whenever we change segment registers around.
203a0955b86SJohn Levon * With KPTI on, the GDT is read-only in the user page table, which
204a0955b86SJohn Levon * causes crashes if we don't set this.
205a0955b86SJohn Levon */
206a0955b86SJohn Levon ASSERT3U(type & SDT_A, !=, 0);
207a0955b86SJohn Levon
2087c478bd9Sstevel@tonic-gate dp->usd_long = lmode; /* 64-bit mode */
2097c478bd9Sstevel@tonic-gate dp->usd_type = type;
2107c478bd9Sstevel@tonic-gate dp->usd_dpl = dpl;
2117c478bd9Sstevel@tonic-gate dp->usd_p = 1;
2127c478bd9Sstevel@tonic-gate dp->usd_gran = gran; /* 0 = bytes, 1 = pages */
2137c478bd9Sstevel@tonic-gate
2147c478bd9Sstevel@tonic-gate dp->usd_lobase = (uintptr_t)base;
2157c478bd9Sstevel@tonic-gate dp->usd_midbase = (uintptr_t)base >> 16;
2167c478bd9Sstevel@tonic-gate dp->usd_hibase = (uintptr_t)base >> (16 + 8);
2177c478bd9Sstevel@tonic-gate dp->usd_lolimit = size;
2187c478bd9Sstevel@tonic-gate dp->usd_hilimit = (uintptr_t)size >> 16;
2197c478bd9Sstevel@tonic-gate }
2207c478bd9Sstevel@tonic-gate
2217c478bd9Sstevel@tonic-gate /*
2227c478bd9Sstevel@tonic-gate * Install system segment descriptor for LDT and TSS segments.
2237c478bd9Sstevel@tonic-gate */
2247c478bd9Sstevel@tonic-gate
2257c478bd9Sstevel@tonic-gate void
set_syssegd(system_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl)2267c478bd9Sstevel@tonic-gate set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
2277c478bd9Sstevel@tonic-gate uint_t dpl)
2287c478bd9Sstevel@tonic-gate {
2297c478bd9Sstevel@tonic-gate dp->ssd_lolimit = size;
2307c478bd9Sstevel@tonic-gate dp->ssd_hilimit = (uintptr_t)size >> 16;
2317c478bd9Sstevel@tonic-gate
2327c478bd9Sstevel@tonic-gate dp->ssd_lobase = (uintptr_t)base;
2337c478bd9Sstevel@tonic-gate dp->ssd_midbase = (uintptr_t)base >> 16;
2347c478bd9Sstevel@tonic-gate dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
2357c478bd9Sstevel@tonic-gate dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
2367c478bd9Sstevel@tonic-gate
2377c478bd9Sstevel@tonic-gate dp->ssd_type = type;
2387c478bd9Sstevel@tonic-gate dp->ssd_zero1 = 0; /* must be zero */
2397c478bd9Sstevel@tonic-gate dp->ssd_zero2 = 0;
2407c478bd9Sstevel@tonic-gate dp->ssd_dpl = dpl;
2417c478bd9Sstevel@tonic-gate dp->ssd_p = 1;
2427c478bd9Sstevel@tonic-gate dp->ssd_gran = 0; /* force byte units */
2437c478bd9Sstevel@tonic-gate }
2447c478bd9Sstevel@tonic-gate
245843e1988Sjohnlev void *
get_ssd_base(system_desc_t * dp)246843e1988Sjohnlev get_ssd_base(system_desc_t *dp)
247843e1988Sjohnlev {
248843e1988Sjohnlev uintptr_t base;
249843e1988Sjohnlev
250843e1988Sjohnlev base = (uintptr_t)dp->ssd_lobase |
251843e1988Sjohnlev (uintptr_t)dp->ssd_midbase << 16 |
252843e1988Sjohnlev (uintptr_t)dp->ssd_hibase << (16 + 8) |
253843e1988Sjohnlev (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
254843e1988Sjohnlev return ((void *)base);
255843e1988Sjohnlev }
256843e1988Sjohnlev
2577c478bd9Sstevel@tonic-gate /*
2587c478bd9Sstevel@tonic-gate * Install gate segment descriptor for interrupt, trap, call and task gates.
25974ecdb51SJohn Levon *
26074ecdb51SJohn Levon * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on
26174ecdb51SJohn Levon * all interrupts. We have different ISTs for each class of exceptions that are
26274ecdb51SJohn Levon * most likely to occur while handling an existing exception; while many of
26374ecdb51SJohn Levon * these are just going to panic, it's nice not to trample on the existing
26474ecdb51SJohn Levon * exception state for debugging purposes.
26574ecdb51SJohn Levon *
26674ecdb51SJohn Levon * Normal interrupts are all redirected unconditionally to the KPTI trampoline
26774ecdb51SJohn Levon * stack space. This unifies the trampoline handling between user and kernel
26874ecdb51SJohn Levon * space (and avoids the need to touch %gs).
26974ecdb51SJohn Levon *
27074ecdb51SJohn Levon * The KDI IDT *all* uses the DBG IST: consider single stepping tr_pftrap, when
27174ecdb51SJohn Levon * we do a read from KMDB that cause another #PF. Without its own IST, this
27274ecdb51SJohn Levon * would stomp on the kernel's mcpu_kpti_flt frame.
2737c478bd9Sstevel@tonic-gate */
27474ecdb51SJohn Levon uint_t
idt_vector_to_ist(uint_t vector)27574ecdb51SJohn Levon idt_vector_to_ist(uint_t vector)
2767c478bd9Sstevel@tonic-gate {
27774ecdb51SJohn Levon #if defined(__xpv)
27874ecdb51SJohn Levon _NOTE(ARGUNUSED(vector));
27974ecdb51SJohn Levon return (IST_NONE);
28074ecdb51SJohn Levon #else
28174ecdb51SJohn Levon switch (vector) {
28274ecdb51SJohn Levon /* These should always use IST even without KPTI enabled. */
28374ecdb51SJohn Levon case T_DBLFLT:
28474ecdb51SJohn Levon return (IST_DF);
28574ecdb51SJohn Levon case T_NMIFLT:
28674ecdb51SJohn Levon return (IST_NMI);
28774ecdb51SJohn Levon case T_MCE:
28874ecdb51SJohn Levon return (IST_MCE);
28974ecdb51SJohn Levon
29074ecdb51SJohn Levon case T_BPTFLT:
29174ecdb51SJohn Levon case T_SGLSTP:
29274ecdb51SJohn Levon if (kpti_enable == 1) {
29374ecdb51SJohn Levon return (IST_DBG);
29474ecdb51SJohn Levon }
29574ecdb51SJohn Levon return (IST_NONE);
29674ecdb51SJohn Levon case T_STKFLT:
29774ecdb51SJohn Levon case T_GPFLT:
29874ecdb51SJohn Levon case T_PGFLT:
29974ecdb51SJohn Levon if (kpti_enable == 1) {
30074ecdb51SJohn Levon return (IST_NESTABLE);
30174ecdb51SJohn Levon }
30274ecdb51SJohn Levon return (IST_NONE);
30374ecdb51SJohn Levon default:
30474ecdb51SJohn Levon if (kpti_enable == 1) {
30574ecdb51SJohn Levon return (IST_DEFAULT);
30674ecdb51SJohn Levon }
30774ecdb51SJohn Levon return (IST_NONE);
30874ecdb51SJohn Levon }
309843e1988Sjohnlev #endif
3107c478bd9Sstevel@tonic-gate }
3117c478bd9Sstevel@tonic-gate
3127c478bd9Sstevel@tonic-gate void
set_gatesegd(gate_desc_t * dp,void (* func)(void),selector_t sel,uint_t type,uint_t dpl,uint_t ist)3137c478bd9Sstevel@tonic-gate set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
31474ecdb51SJohn Levon uint_t type, uint_t dpl, uint_t ist)
3157c478bd9Sstevel@tonic-gate {
3167c478bd9Sstevel@tonic-gate dp->sgd_looffset = (uintptr_t)func;
3177c478bd9Sstevel@tonic-gate dp->sgd_hioffset = (uintptr_t)func >> 16;
31874ecdb51SJohn Levon dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
3197c478bd9Sstevel@tonic-gate dp->sgd_selector = (uint16_t)sel;
32074ecdb51SJohn Levon dp->sgd_ist = ist;
3217c478bd9Sstevel@tonic-gate dp->sgd_type = type;
3227c478bd9Sstevel@tonic-gate dp->sgd_dpl = dpl;
3237c478bd9Sstevel@tonic-gate dp->sgd_p = 1;
3247c478bd9Sstevel@tonic-gate }
3257c478bd9Sstevel@tonic-gate
326843e1988Sjohnlev /*
327843e1988Sjohnlev * Updates a single user descriptor in the the GDT of the current cpu.
328843e1988Sjohnlev * Caller is responsible for preventing cpu migration.
329843e1988Sjohnlev */
330843e1988Sjohnlev
331843e1988Sjohnlev void
gdt_update_usegd(uint_t sidx,user_desc_t * udp)332843e1988Sjohnlev gdt_update_usegd(uint_t sidx, user_desc_t *udp)
333843e1988Sjohnlev {
334a0955b86SJohn Levon #if defined(DEBUG)
335a0955b86SJohn Levon /* This should never be a "system" segment, but it might be null. */
336a0955b86SJohn Levon if (udp->usd_p != 0 || udp->usd_type != 0) {
337a0955b86SJohn Levon ASSERT3U(udp->usd_type & SDT_S, !=, 0);
338a0955b86SJohn Levon }
339a0955b86SJohn Levon /*
340a0955b86SJohn Levon * We should always set the "accessed" bit (SDT_A), otherwise the CPU
341a0955b86SJohn Levon * will write to the GDT whenever we change segment registers around.
342a0955b86SJohn Levon * With KPTI on, the GDT is read-only in the user page table, which
343a0955b86SJohn Levon * causes crashes if we don't set this.
344a0955b86SJohn Levon */
345a0955b86SJohn Levon if (udp->usd_p != 0 || udp->usd_type != 0) {
346a0955b86SJohn Levon ASSERT3U(udp->usd_type & SDT_A, !=, 0);
347a0955b86SJohn Levon }
348a0955b86SJohn Levon #endif
349843e1988Sjohnlev
350a0955b86SJohn Levon #if defined(__xpv)
351843e1988Sjohnlev uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
352843e1988Sjohnlev
353843e1988Sjohnlev if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
354843e1988Sjohnlev panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
355843e1988Sjohnlev
356843e1988Sjohnlev #else /* __xpv */
357843e1988Sjohnlev CPU->cpu_gdt[sidx] = *udp;
358843e1988Sjohnlev #endif /* __xpv */
359843e1988Sjohnlev }
360843e1988Sjohnlev
361843e1988Sjohnlev /*
362843e1988Sjohnlev * Writes single descriptor pointed to by udp into a processes
363843e1988Sjohnlev * LDT entry pointed to by ldp.
364843e1988Sjohnlev */
365843e1988Sjohnlev int
ldt_update_segd(user_desc_t * ldp,user_desc_t * udp)366843e1988Sjohnlev ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
367843e1988Sjohnlev {
368a0955b86SJohn Levon #if defined(DEBUG)
369a0955b86SJohn Levon /* This should never be a "system" segment, but it might be null. */
370a0955b86SJohn Levon if (udp->usd_p != 0 || udp->usd_type != 0) {
371a0955b86SJohn Levon ASSERT3U(udp->usd_type & SDT_S, !=, 0);
372a0955b86SJohn Levon }
373a0955b86SJohn Levon /*
374a0955b86SJohn Levon * We should always set the "accessed" bit (SDT_A), otherwise the CPU
375a0955b86SJohn Levon * will write to the LDT whenever we change segment registers around.
376a0955b86SJohn Levon * With KPTI on, the LDT is read-only in the user page table, which
377a0955b86SJohn Levon * causes crashes if we don't set this.
378a0955b86SJohn Levon */
379a0955b86SJohn Levon if (udp->usd_p != 0 || udp->usd_type != 0) {
380a0955b86SJohn Levon ASSERT3U(udp->usd_type & SDT_A, !=, 0);
381a0955b86SJohn Levon }
382a0955b86SJohn Levon #endif
383843e1988Sjohnlev
384a0955b86SJohn Levon #if defined(__xpv)
385843e1988Sjohnlev uint64_t dpa;
386843e1988Sjohnlev
387843e1988Sjohnlev dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
388843e1988Sjohnlev ((uintptr_t)ldp & PAGEOFFSET);
389843e1988Sjohnlev
390843e1988Sjohnlev /*
391843e1988Sjohnlev * The hypervisor is a little more restrictive about what it
392843e1988Sjohnlev * supports in the LDT.
393843e1988Sjohnlev */
394843e1988Sjohnlev if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
395843e1988Sjohnlev return (EINVAL);
396843e1988Sjohnlev
397843e1988Sjohnlev #else /* __xpv */
398843e1988Sjohnlev *ldp = *udp;
399843e1988Sjohnlev
400843e1988Sjohnlev #endif /* __xpv */
401843e1988Sjohnlev return (0);
402843e1988Sjohnlev }
403843e1988Sjohnlev
404843e1988Sjohnlev #if defined(__xpv)
405843e1988Sjohnlev
406843e1988Sjohnlev /*
407843e1988Sjohnlev * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
408843e1988Sjohnlev * Returns true if a valid entry was written.
409843e1988Sjohnlev */
410843e1988Sjohnlev int
xen_idt_to_trap_info(uint_t vec,gate_desc_t * sgd,void * ti_arg)411843e1988Sjohnlev xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
412843e1988Sjohnlev {
413843e1988Sjohnlev trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */
414843e1988Sjohnlev
415843e1988Sjohnlev /*
416843e1988Sjohnlev * skip holes in the IDT
417843e1988Sjohnlev */
418843e1988Sjohnlev if (GATESEG_GETOFFSET(sgd) == 0)
419843e1988Sjohnlev return (0);
420843e1988Sjohnlev
421843e1988Sjohnlev ASSERT(sgd->sgd_type == SDT_SYSIGT);
422843e1988Sjohnlev ti->vector = vec;
423843e1988Sjohnlev TI_SET_DPL(ti, sgd->sgd_dpl);
424843e1988Sjohnlev
425843e1988Sjohnlev /*
426843e1988Sjohnlev * Is this an interrupt gate?
427843e1988Sjohnlev */
428843e1988Sjohnlev if (sgd->sgd_type == SDT_SYSIGT) {
429843e1988Sjohnlev /* LINTED */
430843e1988Sjohnlev TI_SET_IF(ti, 1);
431843e1988Sjohnlev }
432843e1988Sjohnlev ti->cs = sgd->sgd_selector;
433843e1988Sjohnlev ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */
434843e1988Sjohnlev ti->address = GATESEG_GETOFFSET(sgd);
435843e1988Sjohnlev return (1);
436843e1988Sjohnlev }
437843e1988Sjohnlev
438843e1988Sjohnlev /*
439843e1988Sjohnlev * Convert a single hw format gate descriptor and write it into our virtual IDT.
440843e1988Sjohnlev */
441843e1988Sjohnlev void
xen_idt_write(gate_desc_t * sgd,uint_t vec)442843e1988Sjohnlev xen_idt_write(gate_desc_t *sgd, uint_t vec)
443843e1988Sjohnlev {
444843e1988Sjohnlev trap_info_t trapinfo[2];
445843e1988Sjohnlev
446843e1988Sjohnlev bzero(trapinfo, sizeof (trapinfo));
447843e1988Sjohnlev if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
448843e1988Sjohnlev return;
449843e1988Sjohnlev if (xen_set_trap_table(trapinfo) != 0)
450843e1988Sjohnlev panic("xen_idt_write: xen_set_trap_table() failed");
451843e1988Sjohnlev }
452843e1988Sjohnlev
453843e1988Sjohnlev #endif /* __xpv */
454843e1988Sjohnlev
4557c478bd9Sstevel@tonic-gate
4567c478bd9Sstevel@tonic-gate /*
4577c478bd9Sstevel@tonic-gate * Build kernel GDT.
4587c478bd9Sstevel@tonic-gate */
4597c478bd9Sstevel@tonic-gate
4607c478bd9Sstevel@tonic-gate static void
init_gdt_common(user_desc_t * gdt)461ae115bc7Smrj init_gdt_common(user_desc_t *gdt)
4627c478bd9Sstevel@tonic-gate {
463ae115bc7Smrj int i;
4647c478bd9Sstevel@tonic-gate
465*075343cbSDan Cross ASSERT3P(gdt, !=, NULL);
466*075343cbSDan Cross
467*075343cbSDan Cross init_boot_gdt(gdt);
468*075343cbSDan Cross
4697c478bd9Sstevel@tonic-gate /*
4707c478bd9Sstevel@tonic-gate * 64-bit kernel code segment.
4717c478bd9Sstevel@tonic-gate */
472ae115bc7Smrj set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
4737c478bd9Sstevel@tonic-gate SDP_PAGES, SDP_OP32);
4747c478bd9Sstevel@tonic-gate
4757c478bd9Sstevel@tonic-gate /*
4767c478bd9Sstevel@tonic-gate * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
477*075343cbSDan Cross * mode, but we set it here to SDP_LIMIT_MAX so that we can use the
478*075343cbSDan Cross * SYSRET instruction to return from system calls back to 32-bit
479*075343cbSDan Cross * applications. SYSRET doesn't update the base, limit, or attributes
480*075343cbSDan Cross * of %ss or %ds descriptors. We therefore must ensure that the kernel
481*075343cbSDan Cross * uses something, though it will be ignored by hardware, that is
482*075343cbSDan Cross * compatible with 32-bit apps. For the same reason we must set the
483*075343cbSDan Cross * default op size of this descriptor to 32-bit operands.
4847c478bd9Sstevel@tonic-gate */
485*075343cbSDan Cross set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
4867c478bd9Sstevel@tonic-gate SEL_KPL, SDP_PAGES, SDP_OP32);
487ae115bc7Smrj gdt[GDT_KDATA].usd_def32 = 1;
4887c478bd9Sstevel@tonic-gate
4897c478bd9Sstevel@tonic-gate /*
4907c478bd9Sstevel@tonic-gate * 64-bit user code segment.
4917c478bd9Sstevel@tonic-gate */
492ae115bc7Smrj set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
4937c478bd9Sstevel@tonic-gate SDP_PAGES, SDP_OP32);
4947c478bd9Sstevel@tonic-gate
4957c478bd9Sstevel@tonic-gate /*
4967c478bd9Sstevel@tonic-gate * 32-bit user code segment.
4977c478bd9Sstevel@tonic-gate */
498*075343cbSDan Cross set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMERA,
4997c478bd9Sstevel@tonic-gate SEL_UPL, SDP_PAGES, SDP_OP32);
5007c478bd9Sstevel@tonic-gate
501843e1988Sjohnlev /*
502843e1988Sjohnlev * See gdt_ucode32() and gdt_ucode_native().
503843e1988Sjohnlev */
504843e1988Sjohnlev ucs_on = ucs_off = gdt[GDT_UCODE];
505843e1988Sjohnlev ucs_off.usd_p = 0; /* forces #np fault */
506843e1988Sjohnlev
507843e1988Sjohnlev ucs32_on = ucs32_off = gdt[GDT_U32CODE];
508843e1988Sjohnlev ucs32_off.usd_p = 0; /* forces #np fault */
509843e1988Sjohnlev
5107c478bd9Sstevel@tonic-gate /*
5117c478bd9Sstevel@tonic-gate * 32 and 64 bit data segments can actually share the same descriptor.
5127c478bd9Sstevel@tonic-gate * In long mode only the present bit is checked but all other fields
5137c478bd9Sstevel@tonic-gate * are loaded. But in compatibility mode all fields are interpreted
5147c478bd9Sstevel@tonic-gate * as in legacy mode so they must be set correctly for a 32-bit data
5157c478bd9Sstevel@tonic-gate * segment.
5167c478bd9Sstevel@tonic-gate */
517*075343cbSDan Cross set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
518*075343cbSDan Cross SEL_UPL, SDP_PAGES, SDP_OP32);
5197c478bd9Sstevel@tonic-gate
520843e1988Sjohnlev #if !defined(__xpv)
521843e1988Sjohnlev
5227c478bd9Sstevel@tonic-gate /*
5230baeff3dSrab * The 64-bit kernel has no default LDT. By default, the LDT descriptor
5240baeff3dSrab * in the GDT is 0.
5257c478bd9Sstevel@tonic-gate */
5267c478bd9Sstevel@tonic-gate
5277c478bd9Sstevel@tonic-gate /*
5287c478bd9Sstevel@tonic-gate * Kernel TSS
5297c478bd9Sstevel@tonic-gate */
5300cfdb603Sjosephb set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
5310cfdb603Sjosephb sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
5327c478bd9Sstevel@tonic-gate
533843e1988Sjohnlev #endif /* !__xpv */
534843e1988Sjohnlev
5357c478bd9Sstevel@tonic-gate /*
5367c478bd9Sstevel@tonic-gate * Initialize fs and gs descriptors for 32 bit processes.
5377c478bd9Sstevel@tonic-gate * Only attributes and limits are initialized, the effective
5387c478bd9Sstevel@tonic-gate * base address is programmed via fsbase/gsbase.
5397c478bd9Sstevel@tonic-gate */
540*075343cbSDan Cross set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
5417c478bd9Sstevel@tonic-gate SEL_UPL, SDP_PAGES, SDP_OP32);
542*075343cbSDan Cross set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
5437c478bd9Sstevel@tonic-gate SEL_UPL, SDP_PAGES, SDP_OP32);
5447c478bd9Sstevel@tonic-gate
5459acbbeafSnn /*
5469acbbeafSnn * Initialize the descriptors set aside for brand usage.
5479acbbeafSnn * Only attributes and limits are initialized.
5489acbbeafSnn */
5499acbbeafSnn for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
550*075343cbSDan Cross set_usegd(&gdt0[i], SDP_SHORT, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
5519acbbeafSnn SEL_UPL, SDP_PAGES, SDP_OP32);
5529acbbeafSnn
5537c478bd9Sstevel@tonic-gate /*
5547c478bd9Sstevel@tonic-gate * Initialize convenient zero base user descriptors for clearing
5557c478bd9Sstevel@tonic-gate * lwp private %fs and %gs descriptors in GDT. See setregs() for
5567c478bd9Sstevel@tonic-gate * an example.
5577c478bd9Sstevel@tonic-gate */
5587c478bd9Sstevel@tonic-gate set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
5597c478bd9Sstevel@tonic-gate SDP_BYTES, SDP_OP32);
560*075343cbSDan Cross set_usegd(&zero_u32desc, SDP_SHORT, 0, SDP_LIMIT_MAX, SDT_MEMRWA,
561*075343cbSDan Cross SEL_UPL, SDP_PAGES, SDP_OP32);
5627c478bd9Sstevel@tonic-gate }
5637c478bd9Sstevel@tonic-gate
564843e1988Sjohnlev #if defined(__xpv)
565843e1988Sjohnlev
566843e1988Sjohnlev static user_desc_t *
init_gdt(void)567843e1988Sjohnlev init_gdt(void)
568843e1988Sjohnlev {
569843e1988Sjohnlev uint64_t gdtpa;
570843e1988Sjohnlev ulong_t ma[1]; /* XXPV should be a memory_t */
571843e1988Sjohnlev ulong_t addr;
572843e1988Sjohnlev
573843e1988Sjohnlev #if !defined(__lint)
574843e1988Sjohnlev /*
575843e1988Sjohnlev * Our gdt is never larger than a single page.
576843e1988Sjohnlev */
577843e1988Sjohnlev ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
578843e1988Sjohnlev #endif
579843e1988Sjohnlev gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
580843e1988Sjohnlev PAGESIZE, PAGESIZE);
581*075343cbSDan Cross ASSERT3P(gdt0, !=, NULL);
582843e1988Sjohnlev bzero(gdt0, PAGESIZE);
583843e1988Sjohnlev
584843e1988Sjohnlev init_gdt_common(gdt0);
585843e1988Sjohnlev
586843e1988Sjohnlev /*
587843e1988Sjohnlev * XXX Since we never invoke kmdb until after the kernel takes
588843e1988Sjohnlev * over the descriptor tables why not have it use the kernel's
589843e1988Sjohnlev * selectors?
590843e1988Sjohnlev */
591843e1988Sjohnlev if (boothowto & RB_DEBUG) {
592*075343cbSDan Cross set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, SDP_LIMIT_MAX,
593*075343cbSDan Cross SDT_MEMRWA, SEL_KPL, SDP_PAGES, SDP_OP32);
594*075343cbSDan Cross set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, SDP_LIMIT_MAX,
595*075343cbSDan Cross SDT_MEMERA, SEL_KPL, SDP_PAGES, SDP_OP32);
596843e1988Sjohnlev }
597843e1988Sjohnlev
598843e1988Sjohnlev /*
599843e1988Sjohnlev * Clear write permission for page containing the gdt and install it.
600843e1988Sjohnlev */
601843e1988Sjohnlev gdtpa = pfn_to_pa(va_to_pfn(gdt0));
602843e1988Sjohnlev ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
603843e1988Sjohnlev kbm_read_only((uintptr_t)gdt0, gdtpa);
604843e1988Sjohnlev xen_set_gdt(ma, NGDT);
605843e1988Sjohnlev
606843e1988Sjohnlev /*
607843e1988Sjohnlev * Reload the segment registers to use the new GDT.
608843e1988Sjohnlev * On 64-bit, fixup KCS_SEL to be in ring 3.
609843e1988Sjohnlev * See KCS_SEL in segments.h.
610843e1988Sjohnlev */
611843e1988Sjohnlev load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
612843e1988Sjohnlev
613843e1988Sjohnlev /*
614843e1988Sjohnlev * setup %gs for kernel
615843e1988Sjohnlev */
616843e1988Sjohnlev xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
617843e1988Sjohnlev
618843e1988Sjohnlev /*
619843e1988Sjohnlev * XX64 We should never dereference off "other gsbase" or
620843e1988Sjohnlev * "fsbase". So, we should arrange to point FSBASE and
621843e1988Sjohnlev * KGSBASE somewhere truly awful e.g. point it at the last
622843e1988Sjohnlev * valid address below the hole so that any attempts to index
623843e1988Sjohnlev * off them cause an exception.
624843e1988Sjohnlev *
625843e1988Sjohnlev * For now, point it at 8G -- at least it should be unmapped
626843e1988Sjohnlev * until some 64-bit processes run.
627843e1988Sjohnlev */
628843e1988Sjohnlev addr = 0x200000000ul;
629843e1988Sjohnlev xen_set_segment_base(SEGBASE_FS, addr);
630843e1988Sjohnlev xen_set_segment_base(SEGBASE_GS_USER, addr);
631843e1988Sjohnlev xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
632843e1988Sjohnlev
633843e1988Sjohnlev return (gdt0);
634843e1988Sjohnlev }
635843e1988Sjohnlev
636843e1988Sjohnlev #else /* __xpv */
637843e1988Sjohnlev
638ae115bc7Smrj static user_desc_t *
init_gdt(void)6397c478bd9Sstevel@tonic-gate init_gdt(void)
6407c478bd9Sstevel@tonic-gate {
641*075343cbSDan Cross desctbr_t r_gdt;
6427c478bd9Sstevel@tonic-gate
643ae115bc7Smrj #if !defined(__lint)
6447c478bd9Sstevel@tonic-gate /*
645ae115bc7Smrj * Our gdt is never larger than a single page.
646ae115bc7Smrj */
647ae115bc7Smrj ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
648ae115bc7Smrj #endif
649ae115bc7Smrj gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
650ae115bc7Smrj PAGESIZE, PAGESIZE);
651ae115bc7Smrj bzero(gdt0, PAGESIZE);
652ae115bc7Smrj
653ae115bc7Smrj init_gdt_common(gdt0);
654ae115bc7Smrj
655ae115bc7Smrj /*
656ae115bc7Smrj * Install our new GDT
657ae115bc7Smrj */
658ae115bc7Smrj r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
659ae115bc7Smrj r_gdt.dtr_base = (uintptr_t)gdt0;
660ae115bc7Smrj wr_gdtr(&r_gdt);
661ae115bc7Smrj
662ae115bc7Smrj /*
663ae115bc7Smrj * Reload the segment registers to use the new GDT
664ae115bc7Smrj */
665ae115bc7Smrj load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
666ae115bc7Smrj
667ae115bc7Smrj /*
668ae115bc7Smrj * setup %gs for kernel
669ae115bc7Smrj */
670ae115bc7Smrj wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
671ae115bc7Smrj
672ae115bc7Smrj /*
673ae115bc7Smrj * XX64 We should never dereference off "other gsbase" or
674ae115bc7Smrj * "fsbase". So, we should arrange to point FSBASE and
675ae115bc7Smrj * KGSBASE somewhere truly awful e.g. point it at the last
676ae115bc7Smrj * valid address below the hole so that any attempts to index
677ae115bc7Smrj * off them cause an exception.
678ae115bc7Smrj *
679ae115bc7Smrj * For now, point it at 8G -- at least it should be unmapped
680ae115bc7Smrj * until some 64-bit processes run.
681ae115bc7Smrj */
682ae115bc7Smrj wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
683ae115bc7Smrj wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
684ae115bc7Smrj return (gdt0);
685ae115bc7Smrj }
686ae115bc7Smrj
687843e1988Sjohnlev #endif /* __xpv */
688843e1988Sjohnlev
6897c478bd9Sstevel@tonic-gate
6907c478bd9Sstevel@tonic-gate /*
6917c478bd9Sstevel@tonic-gate * Build kernel IDT.
6927c478bd9Sstevel@tonic-gate *
693ae115bc7Smrj * Note that for amd64 we pretty much require every gate to be an interrupt
694ae115bc7Smrj * gate which blocks interrupts atomically on entry; that's because of our
695ae115bc7Smrj * dependency on using 'swapgs' every time we come into the kernel to find
696ae115bc7Smrj * the cpu structure. If we get interrupted just before doing that, %cs could
697ae115bc7Smrj * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
698ae115bc7Smrj * %gsbase is really still pointing at something in userland. Bad things will
699ae115bc7Smrj * ensue. We also use interrupt gates for i386 as well even though this is not
700ae115bc7Smrj * required for some traps.
7017c478bd9Sstevel@tonic-gate *
7027c478bd9Sstevel@tonic-gate * Perhaps they should have invented a trap gate that does an atomic swapgs?
7037c478bd9Sstevel@tonic-gate */
7047c478bd9Sstevel@tonic-gate static void
init_idt_common(gate_desc_t * idt)705ae115bc7Smrj init_idt_common(gate_desc_t *idt)
7067c478bd9Sstevel@tonic-gate {
70774ecdb51SJohn Levon set_gatesegd(&idt[T_ZERODIV],
70874ecdb51SJohn Levon (kpti_enable == 1) ? &tr_div0trap : &div0trap,
70974ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ZERODIV));
71074ecdb51SJohn Levon set_gatesegd(&idt[T_SGLSTP],
71174ecdb51SJohn Levon (kpti_enable == 1) ? &tr_dbgtrap : &dbgtrap,
71274ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SGLSTP));
71374ecdb51SJohn Levon set_gatesegd(&idt[T_NMIFLT],
71474ecdb51SJohn Levon (kpti_enable == 1) ? &tr_nmiint : &nmiint,
71574ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NMIFLT));
71674ecdb51SJohn Levon set_gatesegd(&idt[T_BPTFLT],
71774ecdb51SJohn Levon (kpti_enable == 1) ? &tr_brktrap : &brktrap,
71874ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_BPTFLT));
71974ecdb51SJohn Levon set_gatesegd(&idt[T_OVFLW],
72074ecdb51SJohn Levon (kpti_enable == 1) ? &tr_ovflotrap : &ovflotrap,
72174ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_OVFLW));
72274ecdb51SJohn Levon set_gatesegd(&idt[T_BOUNDFLT],
72374ecdb51SJohn Levon (kpti_enable == 1) ? &tr_boundstrap : &boundstrap,
72474ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_BOUNDFLT));
72574ecdb51SJohn Levon set_gatesegd(&idt[T_ILLINST],
72674ecdb51SJohn Levon (kpti_enable == 1) ? &tr_invoptrap : &invoptrap,
72774ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ILLINST));
72874ecdb51SJohn Levon set_gatesegd(&idt[T_NOEXTFLT],
72974ecdb51SJohn Levon (kpti_enable == 1) ? &tr_ndptrap : &ndptrap,
73074ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NOEXTFLT));
7317c478bd9Sstevel@tonic-gate
7327c478bd9Sstevel@tonic-gate /*
7337c478bd9Sstevel@tonic-gate * double fault handler.
734843e1988Sjohnlev *
735843e1988Sjohnlev * Note that on the hypervisor a guest does not receive #df faults.
736843e1988Sjohnlev * Instead a failsafe event is injected into the guest if its selectors
737843e1988Sjohnlev * and/or stack is in a broken state. See xen_failsafe_callback.
7387c478bd9Sstevel@tonic-gate */
739843e1988Sjohnlev #if !defined(__xpv)
74074ecdb51SJohn Levon set_gatesegd(&idt[T_DBLFLT],
74174ecdb51SJohn Levon (kpti_enable == 1) ? &tr_syserrtrap : &syserrtrap,
74274ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_DBLFLT));
743843e1988Sjohnlev #endif /* !__xpv */
7447c478bd9Sstevel@tonic-gate
7457c478bd9Sstevel@tonic-gate /*
746ae115bc7Smrj * T_EXTOVRFLT coprocessor-segment-overrun not supported.
7477c478bd9Sstevel@tonic-gate */
74874ecdb51SJohn Levon set_gatesegd(&idt[T_TSSFLT],
74974ecdb51SJohn Levon (kpti_enable == 1) ? &tr_invtsstrap : &invtsstrap,
75074ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_TSSFLT));
75174ecdb51SJohn Levon set_gatesegd(&idt[T_SEGFLT],
75274ecdb51SJohn Levon (kpti_enable == 1) ? &tr_segnptrap : &segnptrap,
75374ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SEGFLT));
75474ecdb51SJohn Levon set_gatesegd(&idt[T_STKFLT],
75574ecdb51SJohn Levon (kpti_enable == 1) ? &tr_stktrap : &stktrap,
75674ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_STKFLT));
75774ecdb51SJohn Levon set_gatesegd(&idt[T_GPFLT],
75874ecdb51SJohn Levon (kpti_enable == 1) ? &tr_gptrap : &gptrap,
75974ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_GPFLT));
76074ecdb51SJohn Levon set_gatesegd(&idt[T_PGFLT],
76174ecdb51SJohn Levon (kpti_enable == 1) ? &tr_pftrap : &pftrap,
76274ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_PGFLT));
76374ecdb51SJohn Levon set_gatesegd(&idt[T_EXTERRFLT],
76474ecdb51SJohn Levon (kpti_enable == 1) ? &tr_ndperr : &ndperr,
76574ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_EXTERRFLT));
76674ecdb51SJohn Levon set_gatesegd(&idt[T_ALIGNMENT],
76774ecdb51SJohn Levon (kpti_enable == 1) ? &tr_achktrap : &achktrap,
76874ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ALIGNMENT));
76974ecdb51SJohn Levon set_gatesegd(&idt[T_MCE],
77074ecdb51SJohn Levon (kpti_enable == 1) ? &tr_mcetrap : &mcetrap,
77174ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_MCE));
77274ecdb51SJohn Levon set_gatesegd(&idt[T_SIMDFPE],
77374ecdb51SJohn Levon (kpti_enable == 1) ? &tr_xmtrap : &xmtrap,
77474ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE));
7757c478bd9Sstevel@tonic-gate
7767c478bd9Sstevel@tonic-gate /*
7777c478bd9Sstevel@tonic-gate * install fast trap handler at 210.
7787c478bd9Sstevel@tonic-gate */
77974ecdb51SJohn Levon set_gatesegd(&idt[T_FASTTRAP],
78074ecdb51SJohn Levon (kpti_enable == 1) ? &tr_fasttrap : &fasttrap,
78174ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_FASTTRAP));
7827c478bd9Sstevel@tonic-gate
7837c478bd9Sstevel@tonic-gate /*
7847c478bd9Sstevel@tonic-gate * System call handler.
7857c478bd9Sstevel@tonic-gate */
78674ecdb51SJohn Levon set_gatesegd(&idt[T_SYSCALLINT],
78774ecdb51SJohn Levon (kpti_enable == 1) ? &tr_sys_syscall_int : &sys_syscall_int,
78874ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT));
7897c478bd9Sstevel@tonic-gate
7907c478bd9Sstevel@tonic-gate /*
791f498645aSahl * Install the DTrace interrupt handler for the pid provider.
7927c478bd9Sstevel@tonic-gate */
79374ecdb51SJohn Levon set_gatesegd(&idt[T_DTRACE_RET],
79474ecdb51SJohn Levon (kpti_enable == 1) ? &tr_dtrace_ret : &dtrace_ret,
79574ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET));
7967c478bd9Sstevel@tonic-gate
7979acbbeafSnn /*
798eb5a5c78SSurya Prakki * Prepare interposing descriptor for the syscall handler
799eb5a5c78SSurya Prakki * and cache copy of the default descriptor.
8009acbbeafSnn */
801eb5a5c78SSurya Prakki brand_tbl[0].ih_inum = T_SYSCALLINT;
802eb5a5c78SSurya Prakki brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
803ae115bc7Smrj
80474ecdb51SJohn Levon set_gatesegd(&(brand_tbl[0].ih_interp_desc),
80574ecdb51SJohn Levon (kpti_enable == 1) ? &tr_brand_sys_syscall_int :
80674ecdb51SJohn Levon &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL,
80774ecdb51SJohn Levon idt_vector_to_ist(T_SYSCALLINT));
8089acbbeafSnn
809eb5a5c78SSurya Prakki brand_tbl[1].ih_inum = 0;
8107c478bd9Sstevel@tonic-gate }
8117c478bd9Sstevel@tonic-gate
812843e1988Sjohnlev #if defined(__xpv)
813843e1988Sjohnlev
814843e1988Sjohnlev static void
init_idt(gate_desc_t * idt)815843e1988Sjohnlev init_idt(gate_desc_t *idt)
816843e1988Sjohnlev {
817843e1988Sjohnlev init_idt_common(idt);
818843e1988Sjohnlev }
819843e1988Sjohnlev
820843e1988Sjohnlev #else /* __xpv */
821843e1988Sjohnlev
8227c478bd9Sstevel@tonic-gate static void
init_idt(gate_desc_t * idt)823ae115bc7Smrj init_idt(gate_desc_t *idt)
8247c478bd9Sstevel@tonic-gate {
8257c478bd9Sstevel@tonic-gate char ivctname[80];
8267c478bd9Sstevel@tonic-gate void (*ivctptr)(void);
8277c478bd9Sstevel@tonic-gate int i;
8287c478bd9Sstevel@tonic-gate
8297c478bd9Sstevel@tonic-gate /*
8307c478bd9Sstevel@tonic-gate * Initialize entire table with 'reserved' trap and then overwrite
8317c478bd9Sstevel@tonic-gate * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
8327c478bd9Sstevel@tonic-gate * since it can only be generated on a 386 processor. 15 is also
8337c478bd9Sstevel@tonic-gate * unsupported and reserved.
8347c478bd9Sstevel@tonic-gate */
83574ecdb51SJohn Levon for (i = 0; i < NIDT; i++) {
83674ecdb51SJohn Levon set_gatesegd(&idt[i],
83774ecdb51SJohn Levon (kpti_enable == 1) ? &tr_resvtrap : &resvtrap,
83874ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL,
83974ecdb51SJohn Levon idt_vector_to_ist(T_RESVTRAP));
84074ecdb51SJohn Levon }
8417c478bd9Sstevel@tonic-gate
8427c478bd9Sstevel@tonic-gate /*
8437c478bd9Sstevel@tonic-gate * 20-31 reserved
8447c478bd9Sstevel@tonic-gate */
84574ecdb51SJohn Levon for (i = 20; i < 32; i++) {
84674ecdb51SJohn Levon set_gatesegd(&idt[i],
84774ecdb51SJohn Levon (kpti_enable == 1) ? &tr_invaltrap : &invaltrap,
84874ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL,
84974ecdb51SJohn Levon idt_vector_to_ist(T_INVALTRAP));
85074ecdb51SJohn Levon }
8517c478bd9Sstevel@tonic-gate
8527c478bd9Sstevel@tonic-gate /*
8537c478bd9Sstevel@tonic-gate * interrupts 32 - 255
8547c478bd9Sstevel@tonic-gate */
8557c478bd9Sstevel@tonic-gate for (i = 32; i < 256; i++) {
85674ecdb51SJohn Levon (void) snprintf(ivctname, sizeof (ivctname),
85774ecdb51SJohn Levon (kpti_enable == 1) ? "tr_ivct%d" : "ivct%d", i);
8587c478bd9Sstevel@tonic-gate ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
8597c478bd9Sstevel@tonic-gate if (ivctptr == NULL)
8607c478bd9Sstevel@tonic-gate panic("kobj_getsymvalue(%s) failed", ivctname);
8617c478bd9Sstevel@tonic-gate
86274ecdb51SJohn Levon set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
86374ecdb51SJohn Levon idt_vector_to_ist(i));
8647c478bd9Sstevel@tonic-gate }
8657c478bd9Sstevel@tonic-gate
8669acbbeafSnn /*
867ae115bc7Smrj * Now install the common ones. Note that it will overlay some
868ae115bc7Smrj * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
8697c478bd9Sstevel@tonic-gate */
870ae115bc7Smrj init_idt_common(idt);
8717c478bd9Sstevel@tonic-gate }
8727c478bd9Sstevel@tonic-gate
873843e1988Sjohnlev #endif /* __xpv */
874843e1988Sjohnlev
8757c478bd9Sstevel@tonic-gate /*
8760baeff3dSrab * The kernel does not deal with LDTs unless a user explicitly creates
8770baeff3dSrab * one. Under normal circumstances, the LDTR contains 0. Any process attempting
8780baeff3dSrab * to reference the LDT will therefore cause a #gp. System calls made via the
8790baeff3dSrab * obsolete lcall mechanism are emulated by the #gp fault handler.
8807c478bd9Sstevel@tonic-gate */
8817c478bd9Sstevel@tonic-gate static void
init_ldt(void)8827c478bd9Sstevel@tonic-gate init_ldt(void)
8837c478bd9Sstevel@tonic-gate {
884843e1988Sjohnlev #if defined(__xpv)
885843e1988Sjohnlev xen_set_ldt(NULL, 0);
886843e1988Sjohnlev #else
8870baeff3dSrab wr_ldtr(0);
888843e1988Sjohnlev #endif
8897c478bd9Sstevel@tonic-gate }
8907c478bd9Sstevel@tonic-gate
891843e1988Sjohnlev #if !defined(__xpv)
8927c478bd9Sstevel@tonic-gate
8937c478bd9Sstevel@tonic-gate static void
init_tss(void)8947c478bd9Sstevel@tonic-gate init_tss(void)
8957c478bd9Sstevel@tonic-gate {
89674ecdb51SJohn Levon extern struct cpu cpus[];
8977c478bd9Sstevel@tonic-gate
8987c478bd9Sstevel@tonic-gate /*
89974ecdb51SJohn Levon * tss_rsp0 is dynamically filled in by resume() (in swtch.s) on each
90074ecdb51SJohn Levon * context switch but it'll be overwritten with this same value anyway.
9017c478bd9Sstevel@tonic-gate */
90274ecdb51SJohn Levon if (kpti_enable == 1) {
90374ecdb51SJohn Levon ktss0->tss_rsp0 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
90474ecdb51SJohn Levon }
9057c478bd9Sstevel@tonic-gate
90674ecdb51SJohn Levon /* Set up the IST stacks for double fault, NMI, MCE. */
90774ecdb51SJohn Levon ktss0->tss_ist1 = (uintptr_t)&dblfault_stack0[sizeof (dblfault_stack0)];
90874ecdb51SJohn Levon ktss0->tss_ist2 = (uintptr_t)&nmi_stack0[sizeof (nmi_stack0)];
90974ecdb51SJohn Levon ktss0->tss_ist3 = (uintptr_t)&mce_stack0[sizeof (mce_stack0)];
9107c478bd9Sstevel@tonic-gate
9117c478bd9Sstevel@tonic-gate /*
91274ecdb51SJohn Levon * This IST stack is used for #DB,#BP (debug) interrupts (when KPTI is
91374ecdb51SJohn Levon * enabled), and also for KDI (always).
9147c478bd9Sstevel@tonic-gate */
91574ecdb51SJohn Levon ktss0->tss_ist4 = (uint64_t)&cpus->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;
9167c478bd9Sstevel@tonic-gate
91774ecdb51SJohn Levon if (kpti_enable == 1) {
91874ecdb51SJohn Levon /* This IST stack is used for #GP,#PF,#SS (fault) interrupts. */
91974ecdb51SJohn Levon ktss0->tss_ist5 =
92074ecdb51SJohn Levon (uint64_t)&cpus->cpu_m.mcpu_kpti_flt.kf_tr_rsp;
9217c478bd9Sstevel@tonic-gate
92274ecdb51SJohn Levon /* This IST stack is used for all other intrs (for KPTI). */
92374ecdb51SJohn Levon ktss0->tss_ist6 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
92474ecdb51SJohn Levon }
9257c478bd9Sstevel@tonic-gate
9267c478bd9Sstevel@tonic-gate /*
9277c478bd9Sstevel@tonic-gate * Set I/O bit map offset equal to size of TSS segment limit
9287c478bd9Sstevel@tonic-gate * for no I/O permission map. This will force all user I/O
9297c478bd9Sstevel@tonic-gate * instructions to generate #gp fault.
9307c478bd9Sstevel@tonic-gate */
9310cfdb603Sjosephb ktss0->tss_bitmapbase = sizeof (*ktss0);
9327c478bd9Sstevel@tonic-gate
9337c478bd9Sstevel@tonic-gate /*
9347c478bd9Sstevel@tonic-gate * Point %tr to descriptor for ktss0 in gdt.
9357c478bd9Sstevel@tonic-gate */
9367c478bd9Sstevel@tonic-gate wr_tsr(KTSS_SEL);
9377c478bd9Sstevel@tonic-gate }
9387c478bd9Sstevel@tonic-gate
939843e1988Sjohnlev #endif /* !__xpv */
940843e1988Sjohnlev
941843e1988Sjohnlev #if defined(__xpv)
942843e1988Sjohnlev
943843e1988Sjohnlev void
init_desctbls(void)944843e1988Sjohnlev init_desctbls(void)
945843e1988Sjohnlev {
946843e1988Sjohnlev uint_t vec;
947843e1988Sjohnlev user_desc_t *gdt;
948843e1988Sjohnlev
949843e1988Sjohnlev /*
950843e1988Sjohnlev * Setup and install our GDT.
951843e1988Sjohnlev */
952843e1988Sjohnlev gdt = init_gdt();
953843e1988Sjohnlev
954843e1988Sjohnlev /*
955843e1988Sjohnlev * Store static pa of gdt to speed up pa_to_ma() translations
956843e1988Sjohnlev * on lwp context switches.
957843e1988Sjohnlev */
958843e1988Sjohnlev ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
9590cfdb603Sjosephb CPU->cpu_gdt = gdt;
960843e1988Sjohnlev CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
961843e1988Sjohnlev
962843e1988Sjohnlev /*
963843e1988Sjohnlev * Setup and install our IDT.
964843e1988Sjohnlev */
9650cfdb603Sjosephb #if !defined(__lint)
9660cfdb603Sjosephb ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
9670cfdb603Sjosephb #endif
9680cfdb603Sjosephb idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
9690cfdb603Sjosephb PAGESIZE, PAGESIZE);
9709844da31SSeth Goldberg bzero(idt0, PAGESIZE);
9710cfdb603Sjosephb init_idt(idt0);
972843e1988Sjohnlev for (vec = 0; vec < NIDT; vec++)
973843e1988Sjohnlev xen_idt_write(&idt0[vec], vec);
974843e1988Sjohnlev
9750cfdb603Sjosephb CPU->cpu_idt = idt0;
976843e1988Sjohnlev
977843e1988Sjohnlev /*
978843e1988Sjohnlev * set default kernel stack
979843e1988Sjohnlev */
980843e1988Sjohnlev xen_stack_switch(KDS_SEL,
981843e1988Sjohnlev (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
982843e1988Sjohnlev
983843e1988Sjohnlev xen_init_callbacks();
984843e1988Sjohnlev
985843e1988Sjohnlev init_ldt();
986843e1988Sjohnlev }
987843e1988Sjohnlev
988843e1988Sjohnlev #else /* __xpv */
9897c478bd9Sstevel@tonic-gate
9907c478bd9Sstevel@tonic-gate void
init_desctbls(void)991ae115bc7Smrj init_desctbls(void)
9927c478bd9Sstevel@tonic-gate {
993ae115bc7Smrj user_desc_t *gdt;
994ae115bc7Smrj desctbr_t idtr;
995ae115bc7Smrj
9960cfdb603Sjosephb /*
9970cfdb603Sjosephb * Allocate IDT and TSS structures on unique pages for better
9980cfdb603Sjosephb * performance in virtual machines.
9990cfdb603Sjosephb */
10000cfdb603Sjosephb #if !defined(__lint)
10010cfdb603Sjosephb ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
10020cfdb603Sjosephb #endif
10030cfdb603Sjosephb idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
10040cfdb603Sjosephb PAGESIZE, PAGESIZE);
10059844da31SSeth Goldberg bzero(idt0, PAGESIZE);
10060cfdb603Sjosephb #if !defined(__lint)
10070cfdb603Sjosephb ASSERT(sizeof (*ktss0) <= PAGESIZE);
10080cfdb603Sjosephb #endif
1009f16a0f4cSRobert Mustacchi ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
10100cfdb603Sjosephb PAGESIZE, PAGESIZE);
10119844da31SSeth Goldberg bzero(ktss0, PAGESIZE);
10120cfdb603Sjosephb
10130cfdb603Sjosephb
1014ae115bc7Smrj /*
1015ae115bc7Smrj * Setup and install our GDT.
1016ae115bc7Smrj */
1017ae115bc7Smrj gdt = init_gdt();
1018ae115bc7Smrj ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
10190cfdb603Sjosephb CPU->cpu_gdt = gdt;
1020ae115bc7Smrj
102174ecdb51SJohn Levon /*
102274ecdb51SJohn Levon * Initialize this CPU's LDT.
102374ecdb51SJohn Levon */
102474ecdb51SJohn Levon CPU->cpu_m.mcpu_ldt = BOP_ALLOC(bootops, (caddr_t)LDT_VA,
102574ecdb51SJohn Levon LDT_CPU_SIZE, PAGESIZE);
102674ecdb51SJohn Levon bzero(CPU->cpu_m.mcpu_ldt, LDT_CPU_SIZE);
102774ecdb51SJohn Levon CPU->cpu_m.mcpu_ldt_len = 0;
102874ecdb51SJohn Levon
1029ae115bc7Smrj /*
1030ae115bc7Smrj * Setup and install our IDT.
1031ae115bc7Smrj */
10320cfdb603Sjosephb init_idt(idt0);
1033ae115bc7Smrj
1034ae115bc7Smrj idtr.dtr_base = (uintptr_t)idt0;
10350cfdb603Sjosephb idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1036ae115bc7Smrj wr_idtr(&idtr);
10370cfdb603Sjosephb CPU->cpu_idt = idt0;
1038ae115bc7Smrj
1039ae115bc7Smrj
10407c478bd9Sstevel@tonic-gate init_tss();
10410cfdb603Sjosephb CPU->cpu_tss = ktss0;
10427c478bd9Sstevel@tonic-gate init_ldt();
104374ecdb51SJohn Levon
104474ecdb51SJohn Levon /* Stash this so that the NMI,MCE,#DF and KDI handlers can use it. */
104574ecdb51SJohn Levon kpti_safe_cr3 = (uint64_t)getcr3();
10467c478bd9Sstevel@tonic-gate }
10479acbbeafSnn
1048843e1988Sjohnlev #endif /* __xpv */
1049843e1988Sjohnlev
1050309b04b8SJohn Levon #ifndef __xpv
1051309b04b8SJohn Levon /*
1052309b04b8SJohn Levon * As per Intel Vol 3 27.5.2, the GDTR limit is reset to 64Kb on a VM exit, so
1053309b04b8SJohn Levon * we have to manually fix it up ourselves.
1054309b04b8SJohn Levon *
1055309b04b8SJohn Levon * The caller may still need to make sure that it can't go off-CPU with the
1056309b04b8SJohn Levon * incorrect limit, before calling this (such as disabling pre-emption).
1057309b04b8SJohn Levon */
1058309b04b8SJohn Levon void
reset_gdtr_limit(void)1059309b04b8SJohn Levon reset_gdtr_limit(void)
1060309b04b8SJohn Levon {
1061309b04b8SJohn Levon ulong_t flags = intr_clear();
1062309b04b8SJohn Levon desctbr_t gdtr;
1063309b04b8SJohn Levon
1064309b04b8SJohn Levon rd_gdtr(&gdtr);
1065309b04b8SJohn Levon gdtr.dtr_limit = (sizeof (user_desc_t) * NGDT) - 1;
1066309b04b8SJohn Levon wr_gdtr(&gdtr);
1067309b04b8SJohn Levon
1068309b04b8SJohn Levon intr_restore(flags);
1069309b04b8SJohn Levon }
1070309b04b8SJohn Levon #endif /* __xpv */
1071309b04b8SJohn Levon
1072ae115bc7Smrj /*
1073*075343cbSDan Cross * We need a GDT owned by the kernel and not the bootstrap relatively
1074*075343cbSDan Cross * early in kernel initialization (e.g., to have segments we can reliably
1075*075343cbSDan Cross * catch an exception on).
1076843e1988Sjohnlev *
1077*075343cbSDan Cross * Initializes a GDT with segments normally defined in the boot loader.
1078ae115bc7Smrj */
1079ae115bc7Smrj void
init_boot_gdt(user_desc_t * bgdt)1080ae115bc7Smrj init_boot_gdt(user_desc_t *bgdt)
1081ae115bc7Smrj {
1082*075343cbSDan Cross ASSERT3P(bgdt, !=, NULL);
1083*075343cbSDan Cross
1084*075343cbSDan Cross #ifdef __xpv
1085*075343cbSDan Cross /* XXX: It is unclear why this 32-bit data segment is marked long. */
1086*075343cbSDan Cross set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, SDP_LIMIT_MAX, SDT_MEMRWA,
1087*075343cbSDan Cross SEL_KPL, SDP_PAGES, SDP_OP32);
1088*075343cbSDan Cross #else
1089*075343cbSDan Cross /*
1090*075343cbSDan Cross * Reset boot segments. These ostensibly come from the boot loader,
1091*075343cbSDan Cross * but we reset them to match our expectations, particulary if we
1092*075343cbSDan Cross * are not using that loader.
1093*075343cbSDan Cross */
1094*075343cbSDan Cross set_usegd(&bgdt[GDT_B32DATA], SDP_SHORT, NULL, SDP_LIMIT_MAX,
1095*075343cbSDan Cross SDT_MEMRWA, SEL_KPL, SDP_PAGES, SDP_OP32);
1096*075343cbSDan Cross set_usegd(&bgdt[GDT_B32CODE], SDP_SHORT, NULL, SDP_LIMIT_MAX,
1097*075343cbSDan Cross SDT_MEMERA, SEL_KPL, SDP_PAGES, SDP_OP32);
1098*075343cbSDan Cross
1099*075343cbSDan Cross /*
1100*075343cbSDan Cross * 16-bit segments for making BIOS calls (not applicable on all
1101*075343cbSDan Cross * architectures).
1102*075343cbSDan Cross */
1103*075343cbSDan Cross set_usegd(&bgdt[GDT_B16CODE], SDP_SHORT, NULL, SDP_LIMIT_MAX,
1104*075343cbSDan Cross SDT_MEMERA, SEL_KPL, 0, 0);
1105*075343cbSDan Cross /*
1106*075343cbSDan Cross * XXX: SDP_OP32 makes this a 32-bit segment, which seems wrong
1107*075343cbSDan Cross * here, but that's what boot_gdt.s used.
1108*075343cbSDan Cross */
1109*075343cbSDan Cross set_usegd(&bgdt[GDT_B16DATA], SDP_SHORT, NULL, SDP_LIMIT_MAX,
1110*075343cbSDan Cross SDT_MEMRWA, SEL_KPL, 0, SDP_OP32);
1111*075343cbSDan Cross #endif /* __xpv */
1112*075343cbSDan Cross
1113*075343cbSDan Cross /*
1114*075343cbSDan Cross * A 64-bit code segment used in early boot. Early IDTs refer to this.
1115*075343cbSDan Cross */
1116*075343cbSDan Cross set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, SDP_LIMIT_MAX, SDT_MEMERA,
1117*075343cbSDan Cross SEL_KPL, SDP_PAGES, SDP_OP32);
1118ae115bc7Smrj }
1119ae115bc7Smrj
11209acbbeafSnn /*
11219acbbeafSnn * Enable interpositioning on the system call path by rewriting the
11229acbbeafSnn * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
11239acbbeafSnn * the branded entry points.
11249acbbeafSnn */
11259acbbeafSnn void
brand_interpositioning_enable(void * arg __unused)11265a469116SPatrick Mooney brand_interpositioning_enable(void *arg __unused)
11279acbbeafSnn {
1128843e1988Sjohnlev gate_desc_t *idt = CPU->cpu_idt;
1129027bcc9fSToomas Soome int i;
11309acbbeafSnn
1131843e1988Sjohnlev ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1132843e1988Sjohnlev
1133843e1988Sjohnlev for (i = 0; brand_tbl[i].ih_inum; i++) {
1134843e1988Sjohnlev idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1135843e1988Sjohnlev #if defined(__xpv)
1136843e1988Sjohnlev xen_idt_write(&idt[brand_tbl[i].ih_inum],
1137843e1988Sjohnlev brand_tbl[i].ih_inum);
1138843e1988Sjohnlev #endif
1139843e1988Sjohnlev }
11409acbbeafSnn
1141843e1988Sjohnlev #if defined(__xpv)
1142843e1988Sjohnlev
1143843e1988Sjohnlev /*
1144843e1988Sjohnlev * Currently the hypervisor only supports 64-bit syscalls via
1145843e1988Sjohnlev * syscall instruction. The 32-bit syscalls are handled by
1146843e1988Sjohnlev * interrupt gate above.
1147843e1988Sjohnlev */
1148843e1988Sjohnlev xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1149843e1988Sjohnlev CALLBACKF_mask_events);
1150843e1988Sjohnlev
1151843e1988Sjohnlev #else
1152843e1988Sjohnlev
11537417cfdeSKuriakose Kuruvilla if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
115474ecdb51SJohn Levon if (kpti_enable == 1) {
115574ecdb51SJohn Levon wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_brand_sys_syscall);
115674ecdb51SJohn Levon wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_brand_sys_syscall32);
115774ecdb51SJohn Levon } else {
115874ecdb51SJohn Levon wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
115974ecdb51SJohn Levon wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
116074ecdb51SJohn Levon }
1161843e1988Sjohnlev }
1162843e1988Sjohnlev
11639acbbeafSnn #endif
11649acbbeafSnn
116574ecdb51SJohn Levon if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
116674ecdb51SJohn Levon if (kpti_enable == 1) {
116774ecdb51SJohn Levon wrmsr(MSR_INTC_SEP_EIP,
116874ecdb51SJohn Levon (uintptr_t)tr_brand_sys_sysenter);
116974ecdb51SJohn Levon } else {
117074ecdb51SJohn Levon wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
117174ecdb51SJohn Levon }
117274ecdb51SJohn Levon }
11739acbbeafSnn }
11749acbbeafSnn
11759acbbeafSnn /*
11769acbbeafSnn * Disable interpositioning on the system call path by rewriting the
11779acbbeafSnn * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
11789acbbeafSnn * the standard entry points, which bypass the interpositioning hooks.
11799acbbeafSnn */
11809acbbeafSnn void
brand_interpositioning_disable(void * arg __unused)11815a469116SPatrick Mooney brand_interpositioning_disable(void *arg __unused)
11829acbbeafSnn {
1183843e1988Sjohnlev gate_desc_t *idt = CPU->cpu_idt;
11849acbbeafSnn int i;
11859acbbeafSnn
1186843e1988Sjohnlev ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1187843e1988Sjohnlev
1188843e1988Sjohnlev for (i = 0; brand_tbl[i].ih_inum; i++) {
1189843e1988Sjohnlev idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1190843e1988Sjohnlev #if defined(__xpv)
1191843e1988Sjohnlev xen_idt_write(&idt[brand_tbl[i].ih_inum],
1192843e1988Sjohnlev brand_tbl[i].ih_inum);
1193843e1988Sjohnlev #endif
1194843e1988Sjohnlev }
11959acbbeafSnn
1196843e1988Sjohnlev #if defined(__xpv)
1197843e1988Sjohnlev
1198843e1988Sjohnlev /*
1199843e1988Sjohnlev * See comment above in brand_interpositioning_enable.
1200843e1988Sjohnlev */
1201843e1988Sjohnlev xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1202843e1988Sjohnlev CALLBACKF_mask_events);
1203843e1988Sjohnlev
1204843e1988Sjohnlev #else
1205843e1988Sjohnlev
12067417cfdeSKuriakose Kuruvilla if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
120774ecdb51SJohn Levon if (kpti_enable == 1) {
120874ecdb51SJohn Levon wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_sys_syscall);
120974ecdb51SJohn Levon wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_sys_syscall32);
121074ecdb51SJohn Levon } else {
121174ecdb51SJohn Levon wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
121274ecdb51SJohn Levon wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
121374ecdb51SJohn Levon }
1214843e1988Sjohnlev }
1215843e1988Sjohnlev
12169acbbeafSnn #endif
12179acbbeafSnn
121874ecdb51SJohn Levon if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
121974ecdb51SJohn Levon if (kpti_enable == 1) {
122074ecdb51SJohn Levon wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)tr_sys_sysenter);
122174ecdb51SJohn Levon } else {
122274ecdb51SJohn Levon wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
122374ecdb51SJohn Levon }
122474ecdb51SJohn Levon }
12259acbbeafSnn }
1226