17c478bd9Sstevel@tonic-gate /* 2ae115bc7Smrj * CDDL HEADER START 3ae115bc7Smrj * 4ae115bc7Smrj * The contents of this file are subject to the terms of the 5ae115bc7Smrj * Common Development and Distribution License (the "License"). 6ae115bc7Smrj * You may not use this file except in compliance with the License. 7ae115bc7Smrj * 8ae115bc7Smrj * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9ae115bc7Smrj * or http://www.opensolaris.org/os/licensing. 10ae115bc7Smrj * See the License for the specific language governing permissions 11ae115bc7Smrj * and limitations under the License. 12ae115bc7Smrj * 13ae115bc7Smrj * When distributing Covered Code, include this CDDL HEADER in each 14ae115bc7Smrj * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15ae115bc7Smrj * If applicable, add the following below this CDDL HEADER, with the 16ae115bc7Smrj * fields enclosed by brackets "[]" replaced with your own identifying 17ae115bc7Smrj * information: Portions Copyright [yyyy] [name of copyright owner] 18ae115bc7Smrj * 19ae115bc7Smrj * CDDL HEADER END 20ae115bc7Smrj */ 21ae115bc7Smrj 22ae115bc7Smrj /* 23eb5a5c78SSurya Prakki * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 26f16a0f4cSRobert Mustacchi /* 2774ecdb51SJohn Levon * Copyright 2018 Joyent, Inc. All rights reserved. 28f16a0f4cSRobert Mustacchi */ 29f16a0f4cSRobert Mustacchi 307c478bd9Sstevel@tonic-gate /* 317c478bd9Sstevel@tonic-gate * Copyright (c) 1992 Terrence R. Lambert. 327c478bd9Sstevel@tonic-gate * Copyright (c) 1990 The Regents of the University of California. 337c478bd9Sstevel@tonic-gate * All rights reserved. 347c478bd9Sstevel@tonic-gate * 357c478bd9Sstevel@tonic-gate * This code is derived from software contributed to Berkeley by 367c478bd9Sstevel@tonic-gate * William Jolitz. 377c478bd9Sstevel@tonic-gate * 387c478bd9Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without 397c478bd9Sstevel@tonic-gate * modification, are permitted provided that the following conditions 407c478bd9Sstevel@tonic-gate * are met: 417c478bd9Sstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright 427c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer. 437c478bd9Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright 447c478bd9Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer in the 457c478bd9Sstevel@tonic-gate * documentation and/or other materials provided with the distribution. 467c478bd9Sstevel@tonic-gate * 3. All advertising materials mentioning features or use of this software 477c478bd9Sstevel@tonic-gate * must display the following acknowledgement: 487c478bd9Sstevel@tonic-gate * This product includes software developed by the University of 497c478bd9Sstevel@tonic-gate * California, Berkeley and its contributors. 507c478bd9Sstevel@tonic-gate * 4. Neither the name of the University nor the names of its contributors 517c478bd9Sstevel@tonic-gate * may be used to endorse or promote products derived from this software 527c478bd9Sstevel@tonic-gate * without specific prior written permission. 537c478bd9Sstevel@tonic-gate * 547c478bd9Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 557c478bd9Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 567c478bd9Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 577c478bd9Sstevel@tonic-gate * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 587c478bd9Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 597c478bd9Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 607c478bd9Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 617c478bd9Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 627c478bd9Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 637c478bd9Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 647c478bd9Sstevel@tonic-gate * SUCH DAMAGE. 657c478bd9Sstevel@tonic-gate * 667c478bd9Sstevel@tonic-gate * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 677c478bd9Sstevel@tonic-gate */ 687c478bd9Sstevel@tonic-gate 697c478bd9Sstevel@tonic-gate #include <sys/types.h> 70ae115bc7Smrj #include <sys/sysmacros.h> 717c478bd9Sstevel@tonic-gate #include <sys/tss.h> 727c478bd9Sstevel@tonic-gate #include <sys/segments.h> 737c478bd9Sstevel@tonic-gate #include <sys/trap.h> 747c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 75ae115bc7Smrj #include <sys/bootconf.h> 767c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h> 77ae115bc7Smrj #include <sys/controlregs.h> 787c478bd9Sstevel@tonic-gate #include <sys/archsystm.h> 797c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 807c478bd9Sstevel@tonic-gate #include <sys/kobj.h> 817c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 827c478bd9Sstevel@tonic-gate #include <sys/reboot.h> 837c478bd9Sstevel@tonic-gate #include <sys/kdi.h> 84ae115bc7Smrj #include <sys/mach_mmu.h> 850baeff3dSrab #include <sys/systm.h> 8674ecdb51SJohn Levon #include <sys/note.h> 87843e1988Sjohnlev 88843e1988Sjohnlev #ifdef __xpv 89843e1988Sjohnlev #include <sys/hypervisor.h> 90843e1988Sjohnlev #include <vm/as.h> 91843e1988Sjohnlev #endif 92843e1988Sjohnlev 93ae115bc7Smrj #include <sys/promif.h> 94ae115bc7Smrj #include <sys/bootinfo.h> 95ae115bc7Smrj #include <vm/kboot_mmu.h> 96843e1988Sjohnlev #include <vm/hat_pte.h> 977c478bd9Sstevel@tonic-gate 987c478bd9Sstevel@tonic-gate /* 997c478bd9Sstevel@tonic-gate * cpu0 and default tables and structures. 1007c478bd9Sstevel@tonic-gate */ 101ae115bc7Smrj user_desc_t *gdt0; 102843e1988Sjohnlev #if !defined(__xpv) 1037c478bd9Sstevel@tonic-gate desctbr_t gdt0_default_r; 104843e1988Sjohnlev #endif 1057c478bd9Sstevel@tonic-gate 106027bcc9fSToomas Soome gate_desc_t *idt0; /* interrupt descriptor table */ 1077c478bd9Sstevel@tonic-gate 108f16a0f4cSRobert Mustacchi tss_t *ktss0; /* kernel task state structure */ 1097c478bd9Sstevel@tonic-gate 1107c478bd9Sstevel@tonic-gate 1117c478bd9Sstevel@tonic-gate user_desc_t zero_udesc; /* base zero user desc native procs */ 112843e1988Sjohnlev user_desc_t null_udesc; /* null user descriptor */ 113843e1988Sjohnlev system_desc_t null_sdesc; /* null system descriptor */ 1147c478bd9Sstevel@tonic-gate 1157c478bd9Sstevel@tonic-gate user_desc_t zero_u32desc; /* 32-bit compatibility procs */ 1167c478bd9Sstevel@tonic-gate 117843e1988Sjohnlev user_desc_t ucs_on; 118843e1988Sjohnlev user_desc_t ucs_off; 119843e1988Sjohnlev user_desc_t ucs32_on; 120843e1988Sjohnlev user_desc_t ucs32_off; 121843e1988Sjohnlev 12274ecdb51SJohn Levon /* 12374ecdb51SJohn Levon * If the size of this is changed, you must update hat_pcp_setup() and the 12474ecdb51SJohn Levon * definitions in exception.s 12574ecdb51SJohn Levon */ 12674ecdb51SJohn Levon extern char dblfault_stack0[DEFAULTSTKSZ]; 12774ecdb51SJohn Levon extern char nmi_stack0[DEFAULTSTKSZ]; 12874ecdb51SJohn Levon extern char mce_stack0[DEFAULTSTKSZ]; 1297c478bd9Sstevel@tonic-gate 1307c478bd9Sstevel@tonic-gate extern void fast_null(void); 1317c478bd9Sstevel@tonic-gate extern hrtime_t get_hrtime(void); 1327c478bd9Sstevel@tonic-gate extern hrtime_t gethrvtime(void); 1337c478bd9Sstevel@tonic-gate extern hrtime_t get_hrestime(void); 1347c478bd9Sstevel@tonic-gate extern uint64_t getlgrp(void); 1357c478bd9Sstevel@tonic-gate 1367c478bd9Sstevel@tonic-gate void (*(fasttable[]))(void) = { 1377c478bd9Sstevel@tonic-gate fast_null, /* T_FNULL routine */ 1387c478bd9Sstevel@tonic-gate fast_null, /* T_FGETFP routine (initially null) */ 1397c478bd9Sstevel@tonic-gate fast_null, /* T_FSETFP routine (initially null) */ 140027bcc9fSToomas Soome (void (*)())(uintptr_t)get_hrtime, /* T_GETHRTIME */ 141027bcc9fSToomas Soome (void (*)())(uintptr_t)gethrvtime, /* T_GETHRVTIME */ 142027bcc9fSToomas Soome (void (*)())(uintptr_t)get_hrestime, /* T_GETHRESTIME */ 143027bcc9fSToomas Soome (void (*)())(uintptr_t)getlgrp /* T_GETLGRP */ 1447c478bd9Sstevel@tonic-gate }; 1457c478bd9Sstevel@tonic-gate 1469acbbeafSnn /* 1479acbbeafSnn * Structure containing pre-computed descriptors to allow us to temporarily 1489acbbeafSnn * interpose on a standard handler. 1499acbbeafSnn */ 1509acbbeafSnn struct interposing_handler { 1519acbbeafSnn int ih_inum; 1529acbbeafSnn gate_desc_t ih_interp_desc; 1539acbbeafSnn gate_desc_t ih_default_desc; 1549acbbeafSnn }; 1559acbbeafSnn 1569acbbeafSnn /* 1579acbbeafSnn * The brand infrastructure interposes on two handlers, and we use one as a 1589acbbeafSnn * NULL signpost. 1599acbbeafSnn */ 160eb5a5c78SSurya Prakki static struct interposing_handler brand_tbl[2]; 1619acbbeafSnn 1627c478bd9Sstevel@tonic-gate /* 1637c478bd9Sstevel@tonic-gate * software prototypes for default local descriptor table 1647c478bd9Sstevel@tonic-gate */ 1657c478bd9Sstevel@tonic-gate 1667c478bd9Sstevel@tonic-gate /* 1677c478bd9Sstevel@tonic-gate * Routines for loading segment descriptors in format the hardware 1687c478bd9Sstevel@tonic-gate * can understand. 1697c478bd9Sstevel@tonic-gate */ 1707c478bd9Sstevel@tonic-gate 1717c478bd9Sstevel@tonic-gate /* 1727c478bd9Sstevel@tonic-gate * In long mode we have the new L or long mode attribute bit 1737c478bd9Sstevel@tonic-gate * for code segments. Only the conforming bit in type is used along 1747c478bd9Sstevel@tonic-gate * with descriptor priority and present bits. Default operand size must 1757c478bd9Sstevel@tonic-gate * be zero when in long mode. In 32-bit compatibility mode all fields 1767c478bd9Sstevel@tonic-gate * are treated as in legacy mode. For data segments while in long mode 1777c478bd9Sstevel@tonic-gate * only the present bit is loaded. 1787c478bd9Sstevel@tonic-gate */ 1797c478bd9Sstevel@tonic-gate void 1807c478bd9Sstevel@tonic-gate set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size, 1817c478bd9Sstevel@tonic-gate uint_t type, uint_t dpl, uint_t gran, uint_t defopsz) 1827c478bd9Sstevel@tonic-gate { 1837c478bd9Sstevel@tonic-gate ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG); 184a0955b86SJohn Levon /* This should never be a "system" segment. */ 185a0955b86SJohn Levon ASSERT3U(type & SDT_S, !=, 0); 1867c478bd9Sstevel@tonic-gate 1877c478bd9Sstevel@tonic-gate /* 1887c478bd9Sstevel@tonic-gate * 64-bit long mode. 1897c478bd9Sstevel@tonic-gate */ 1907c478bd9Sstevel@tonic-gate if (lmode == SDP_LONG) 1917c478bd9Sstevel@tonic-gate dp->usd_def32 = 0; /* 32-bit operands only */ 1927c478bd9Sstevel@tonic-gate else 1937c478bd9Sstevel@tonic-gate /* 1947c478bd9Sstevel@tonic-gate * 32-bit compatibility mode. 1957c478bd9Sstevel@tonic-gate */ 1967c478bd9Sstevel@tonic-gate dp->usd_def32 = defopsz; /* 0 = 16, 1 = 32-bit ops */ 1977c478bd9Sstevel@tonic-gate 198a0955b86SJohn Levon /* 199a0955b86SJohn Levon * We should always set the "accessed" bit (SDT_A), otherwise the CPU 200a0955b86SJohn Levon * will write to the GDT whenever we change segment registers around. 201a0955b86SJohn Levon * With KPTI on, the GDT is read-only in the user page table, which 202a0955b86SJohn Levon * causes crashes if we don't set this. 203a0955b86SJohn Levon */ 204a0955b86SJohn Levon ASSERT3U(type & SDT_A, !=, 0); 205a0955b86SJohn Levon 2067c478bd9Sstevel@tonic-gate dp->usd_long = lmode; /* 64-bit mode */ 2077c478bd9Sstevel@tonic-gate dp->usd_type = type; 2087c478bd9Sstevel@tonic-gate dp->usd_dpl = dpl; 2097c478bd9Sstevel@tonic-gate dp->usd_p = 1; 2107c478bd9Sstevel@tonic-gate dp->usd_gran = gran; /* 0 = bytes, 1 = pages */ 2117c478bd9Sstevel@tonic-gate 2127c478bd9Sstevel@tonic-gate dp->usd_lobase = (uintptr_t)base; 2137c478bd9Sstevel@tonic-gate dp->usd_midbase = (uintptr_t)base >> 16; 2147c478bd9Sstevel@tonic-gate dp->usd_hibase = (uintptr_t)base >> (16 + 8); 2157c478bd9Sstevel@tonic-gate dp->usd_lolimit = size; 2167c478bd9Sstevel@tonic-gate dp->usd_hilimit = (uintptr_t)size >> 16; 2177c478bd9Sstevel@tonic-gate } 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate /* 2207c478bd9Sstevel@tonic-gate * Install system segment descriptor for LDT and TSS segments. 2217c478bd9Sstevel@tonic-gate */ 2227c478bd9Sstevel@tonic-gate 2237c478bd9Sstevel@tonic-gate void 2247c478bd9Sstevel@tonic-gate set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type, 2257c478bd9Sstevel@tonic-gate uint_t dpl) 2267c478bd9Sstevel@tonic-gate { 2277c478bd9Sstevel@tonic-gate dp->ssd_lolimit = size; 2287c478bd9Sstevel@tonic-gate dp->ssd_hilimit = (uintptr_t)size >> 16; 2297c478bd9Sstevel@tonic-gate 2307c478bd9Sstevel@tonic-gate dp->ssd_lobase = (uintptr_t)base; 2317c478bd9Sstevel@tonic-gate dp->ssd_midbase = (uintptr_t)base >> 16; 2327c478bd9Sstevel@tonic-gate dp->ssd_hibase = (uintptr_t)base >> (16 + 8); 2337c478bd9Sstevel@tonic-gate dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8); 2347c478bd9Sstevel@tonic-gate 2357c478bd9Sstevel@tonic-gate dp->ssd_type = type; 2367c478bd9Sstevel@tonic-gate dp->ssd_zero1 = 0; /* must be zero */ 2377c478bd9Sstevel@tonic-gate dp->ssd_zero2 = 0; 2387c478bd9Sstevel@tonic-gate dp->ssd_dpl = dpl; 2397c478bd9Sstevel@tonic-gate dp->ssd_p = 1; 2407c478bd9Sstevel@tonic-gate dp->ssd_gran = 0; /* force byte units */ 2417c478bd9Sstevel@tonic-gate } 2427c478bd9Sstevel@tonic-gate 243843e1988Sjohnlev void * 244843e1988Sjohnlev get_ssd_base(system_desc_t *dp) 245843e1988Sjohnlev { 246843e1988Sjohnlev uintptr_t base; 247843e1988Sjohnlev 248843e1988Sjohnlev base = (uintptr_t)dp->ssd_lobase | 249843e1988Sjohnlev (uintptr_t)dp->ssd_midbase << 16 | 250843e1988Sjohnlev (uintptr_t)dp->ssd_hibase << (16 + 8) | 251843e1988Sjohnlev (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8); 252843e1988Sjohnlev return ((void *)base); 253843e1988Sjohnlev } 254843e1988Sjohnlev 2557c478bd9Sstevel@tonic-gate /* 2567c478bd9Sstevel@tonic-gate * Install gate segment descriptor for interrupt, trap, call and task gates. 25774ecdb51SJohn Levon * 25874ecdb51SJohn Levon * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on 25974ecdb51SJohn Levon * all interrupts. We have different ISTs for each class of exceptions that are 26074ecdb51SJohn Levon * most likely to occur while handling an existing exception; while many of 26174ecdb51SJohn Levon * these are just going to panic, it's nice not to trample on the existing 26274ecdb51SJohn Levon * exception state for debugging purposes. 26374ecdb51SJohn Levon * 26474ecdb51SJohn Levon * Normal interrupts are all redirected unconditionally to the KPTI trampoline 26574ecdb51SJohn Levon * stack space. This unifies the trampoline handling between user and kernel 26674ecdb51SJohn Levon * space (and avoids the need to touch %gs). 26774ecdb51SJohn Levon * 26874ecdb51SJohn Levon * The KDI IDT *all* uses the DBG IST: consider single stepping tr_pftrap, when 26974ecdb51SJohn Levon * we do a read from KMDB that cause another #PF. Without its own IST, this 27074ecdb51SJohn Levon * would stomp on the kernel's mcpu_kpti_flt frame. 2717c478bd9Sstevel@tonic-gate */ 27274ecdb51SJohn Levon uint_t 27374ecdb51SJohn Levon idt_vector_to_ist(uint_t vector) 2747c478bd9Sstevel@tonic-gate { 27574ecdb51SJohn Levon #if defined(__xpv) 27674ecdb51SJohn Levon _NOTE(ARGUNUSED(vector)); 27774ecdb51SJohn Levon return (IST_NONE); 27874ecdb51SJohn Levon #else 27974ecdb51SJohn Levon switch (vector) { 28074ecdb51SJohn Levon /* These should always use IST even without KPTI enabled. */ 28174ecdb51SJohn Levon case T_DBLFLT: 28274ecdb51SJohn Levon return (IST_DF); 28374ecdb51SJohn Levon case T_NMIFLT: 28474ecdb51SJohn Levon return (IST_NMI); 28574ecdb51SJohn Levon case T_MCE: 28674ecdb51SJohn Levon return (IST_MCE); 28774ecdb51SJohn Levon 28874ecdb51SJohn Levon case T_BPTFLT: 28974ecdb51SJohn Levon case T_SGLSTP: 29074ecdb51SJohn Levon if (kpti_enable == 1) { 29174ecdb51SJohn Levon return (IST_DBG); 29274ecdb51SJohn Levon } 29374ecdb51SJohn Levon return (IST_NONE); 29474ecdb51SJohn Levon case T_STKFLT: 29574ecdb51SJohn Levon case T_GPFLT: 29674ecdb51SJohn Levon case T_PGFLT: 29774ecdb51SJohn Levon if (kpti_enable == 1) { 29874ecdb51SJohn Levon return (IST_NESTABLE); 29974ecdb51SJohn Levon } 30074ecdb51SJohn Levon return (IST_NONE); 30174ecdb51SJohn Levon default: 30274ecdb51SJohn Levon if (kpti_enable == 1) { 30374ecdb51SJohn Levon return (IST_DEFAULT); 30474ecdb51SJohn Levon } 30574ecdb51SJohn Levon return (IST_NONE); 30674ecdb51SJohn Levon } 307843e1988Sjohnlev #endif 3087c478bd9Sstevel@tonic-gate } 3097c478bd9Sstevel@tonic-gate 3107c478bd9Sstevel@tonic-gate void 3117c478bd9Sstevel@tonic-gate set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel, 31274ecdb51SJohn Levon uint_t type, uint_t dpl, uint_t ist) 3137c478bd9Sstevel@tonic-gate { 3147c478bd9Sstevel@tonic-gate dp->sgd_looffset = (uintptr_t)func; 3157c478bd9Sstevel@tonic-gate dp->sgd_hioffset = (uintptr_t)func >> 16; 31674ecdb51SJohn Levon dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16); 3177c478bd9Sstevel@tonic-gate dp->sgd_selector = (uint16_t)sel; 31874ecdb51SJohn Levon dp->sgd_ist = ist; 3197c478bd9Sstevel@tonic-gate dp->sgd_type = type; 3207c478bd9Sstevel@tonic-gate dp->sgd_dpl = dpl; 3217c478bd9Sstevel@tonic-gate dp->sgd_p = 1; 3227c478bd9Sstevel@tonic-gate } 3237c478bd9Sstevel@tonic-gate 324843e1988Sjohnlev /* 325843e1988Sjohnlev * Updates a single user descriptor in the the GDT of the current cpu. 326843e1988Sjohnlev * Caller is responsible for preventing cpu migration. 327843e1988Sjohnlev */ 328843e1988Sjohnlev 329843e1988Sjohnlev void 330843e1988Sjohnlev gdt_update_usegd(uint_t sidx, user_desc_t *udp) 331843e1988Sjohnlev { 332a0955b86SJohn Levon #if defined(DEBUG) 333a0955b86SJohn Levon /* This should never be a "system" segment, but it might be null. */ 334a0955b86SJohn Levon if (udp->usd_p != 0 || udp->usd_type != 0) { 335a0955b86SJohn Levon ASSERT3U(udp->usd_type & SDT_S, !=, 0); 336a0955b86SJohn Levon } 337a0955b86SJohn Levon /* 338a0955b86SJohn Levon * We should always set the "accessed" bit (SDT_A), otherwise the CPU 339a0955b86SJohn Levon * will write to the GDT whenever we change segment registers around. 340a0955b86SJohn Levon * With KPTI on, the GDT is read-only in the user page table, which 341a0955b86SJohn Levon * causes crashes if we don't set this. 342a0955b86SJohn Levon */ 343a0955b86SJohn Levon if (udp->usd_p != 0 || udp->usd_type != 0) { 344a0955b86SJohn Levon ASSERT3U(udp->usd_type & SDT_A, !=, 0); 345a0955b86SJohn Levon } 346a0955b86SJohn Levon #endif 347843e1988Sjohnlev 348a0955b86SJohn Levon #if defined(__xpv) 349843e1988Sjohnlev uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx; 350843e1988Sjohnlev 351843e1988Sjohnlev if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp)) 352843e1988Sjohnlev panic("gdt_update_usegd: HYPERVISOR_update_descriptor"); 353843e1988Sjohnlev 354843e1988Sjohnlev #else /* __xpv */ 355843e1988Sjohnlev CPU->cpu_gdt[sidx] = *udp; 356843e1988Sjohnlev #endif /* __xpv */ 357843e1988Sjohnlev } 358843e1988Sjohnlev 359843e1988Sjohnlev /* 360843e1988Sjohnlev * Writes single descriptor pointed to by udp into a processes 361843e1988Sjohnlev * LDT entry pointed to by ldp. 362843e1988Sjohnlev */ 363843e1988Sjohnlev int 364843e1988Sjohnlev ldt_update_segd(user_desc_t *ldp, user_desc_t *udp) 365843e1988Sjohnlev { 366a0955b86SJohn Levon #if defined(DEBUG) 367a0955b86SJohn Levon /* This should never be a "system" segment, but it might be null. */ 368a0955b86SJohn Levon if (udp->usd_p != 0 || udp->usd_type != 0) { 369a0955b86SJohn Levon ASSERT3U(udp->usd_type & SDT_S, !=, 0); 370a0955b86SJohn Levon } 371a0955b86SJohn Levon /* 372a0955b86SJohn Levon * We should always set the "accessed" bit (SDT_A), otherwise the CPU 373a0955b86SJohn Levon * will write to the LDT whenever we change segment registers around. 374a0955b86SJohn Levon * With KPTI on, the LDT is read-only in the user page table, which 375a0955b86SJohn Levon * causes crashes if we don't set this. 376a0955b86SJohn Levon */ 377a0955b86SJohn Levon if (udp->usd_p != 0 || udp->usd_type != 0) { 378a0955b86SJohn Levon ASSERT3U(udp->usd_type & SDT_A, !=, 0); 379a0955b86SJohn Levon } 380a0955b86SJohn Levon #endif 381843e1988Sjohnlev 382a0955b86SJohn Levon #if defined(__xpv) 383843e1988Sjohnlev uint64_t dpa; 384843e1988Sjohnlev 385843e1988Sjohnlev dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) | 386843e1988Sjohnlev ((uintptr_t)ldp & PAGEOFFSET); 387843e1988Sjohnlev 388843e1988Sjohnlev /* 389843e1988Sjohnlev * The hypervisor is a little more restrictive about what it 390843e1988Sjohnlev * supports in the LDT. 391843e1988Sjohnlev */ 392843e1988Sjohnlev if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0) 393843e1988Sjohnlev return (EINVAL); 394843e1988Sjohnlev 395843e1988Sjohnlev #else /* __xpv */ 396843e1988Sjohnlev *ldp = *udp; 397843e1988Sjohnlev 398843e1988Sjohnlev #endif /* __xpv */ 399843e1988Sjohnlev return (0); 400843e1988Sjohnlev } 401843e1988Sjohnlev 402843e1988Sjohnlev #if defined(__xpv) 403843e1988Sjohnlev 404843e1988Sjohnlev /* 405843e1988Sjohnlev * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor. 406843e1988Sjohnlev * Returns true if a valid entry was written. 407843e1988Sjohnlev */ 408843e1988Sjohnlev int 409843e1988Sjohnlev xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg) 410843e1988Sjohnlev { 411843e1988Sjohnlev trap_info_t *ti = ti_arg; /* XXPV Aargh - segments.h comment */ 412843e1988Sjohnlev 413843e1988Sjohnlev /* 414843e1988Sjohnlev * skip holes in the IDT 415843e1988Sjohnlev */ 416843e1988Sjohnlev if (GATESEG_GETOFFSET(sgd) == 0) 417843e1988Sjohnlev return (0); 418843e1988Sjohnlev 419843e1988Sjohnlev ASSERT(sgd->sgd_type == SDT_SYSIGT); 420843e1988Sjohnlev ti->vector = vec; 421843e1988Sjohnlev TI_SET_DPL(ti, sgd->sgd_dpl); 422843e1988Sjohnlev 423843e1988Sjohnlev /* 424843e1988Sjohnlev * Is this an interrupt gate? 425843e1988Sjohnlev */ 426843e1988Sjohnlev if (sgd->sgd_type == SDT_SYSIGT) { 427843e1988Sjohnlev /* LINTED */ 428843e1988Sjohnlev TI_SET_IF(ti, 1); 429843e1988Sjohnlev } 430843e1988Sjohnlev ti->cs = sgd->sgd_selector; 431843e1988Sjohnlev ti->cs |= SEL_KPL; /* force into ring 3. see KCS_SEL */ 432843e1988Sjohnlev ti->address = GATESEG_GETOFFSET(sgd); 433843e1988Sjohnlev return (1); 434843e1988Sjohnlev } 435843e1988Sjohnlev 436843e1988Sjohnlev /* 437843e1988Sjohnlev * Convert a single hw format gate descriptor and write it into our virtual IDT. 438843e1988Sjohnlev */ 439843e1988Sjohnlev void 440843e1988Sjohnlev xen_idt_write(gate_desc_t *sgd, uint_t vec) 441843e1988Sjohnlev { 442843e1988Sjohnlev trap_info_t trapinfo[2]; 443843e1988Sjohnlev 444843e1988Sjohnlev bzero(trapinfo, sizeof (trapinfo)); 445843e1988Sjohnlev if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0) 446843e1988Sjohnlev return; 447843e1988Sjohnlev if (xen_set_trap_table(trapinfo) != 0) 448843e1988Sjohnlev panic("xen_idt_write: xen_set_trap_table() failed"); 449843e1988Sjohnlev } 450843e1988Sjohnlev 451843e1988Sjohnlev #endif /* __xpv */ 452843e1988Sjohnlev 4537c478bd9Sstevel@tonic-gate 4547c478bd9Sstevel@tonic-gate /* 4557c478bd9Sstevel@tonic-gate * Build kernel GDT. 4567c478bd9Sstevel@tonic-gate */ 4577c478bd9Sstevel@tonic-gate 4587c478bd9Sstevel@tonic-gate static void 459ae115bc7Smrj init_gdt_common(user_desc_t *gdt) 4607c478bd9Sstevel@tonic-gate { 461ae115bc7Smrj int i; 4627c478bd9Sstevel@tonic-gate 4637c478bd9Sstevel@tonic-gate /* 4647c478bd9Sstevel@tonic-gate * 64-bit kernel code segment. 4657c478bd9Sstevel@tonic-gate */ 466ae115bc7Smrj set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL, 4677c478bd9Sstevel@tonic-gate SDP_PAGES, SDP_OP32); 4687c478bd9Sstevel@tonic-gate 4697c478bd9Sstevel@tonic-gate /* 4707c478bd9Sstevel@tonic-gate * 64-bit kernel data segment. The limit attribute is ignored in 64-bit 4717c478bd9Sstevel@tonic-gate * mode, but we set it here to 0xFFFF so that we can use the SYSRET 4727c478bd9Sstevel@tonic-gate * instruction to return from system calls back to 32-bit applications. 4737c478bd9Sstevel@tonic-gate * SYSRET doesn't update the base, limit, or attributes of %ss or %ds 4747c478bd9Sstevel@tonic-gate * descriptors. We therefore must ensure that the kernel uses something, 4757c478bd9Sstevel@tonic-gate * though it will be ignored by hardware, that is compatible with 32-bit 4767c478bd9Sstevel@tonic-gate * apps. For the same reason we must set the default op size of this 4777c478bd9Sstevel@tonic-gate * descriptor to 32-bit operands. 4787c478bd9Sstevel@tonic-gate */ 479ae115bc7Smrj set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 4807c478bd9Sstevel@tonic-gate SEL_KPL, SDP_PAGES, SDP_OP32); 481ae115bc7Smrj gdt[GDT_KDATA].usd_def32 = 1; 4827c478bd9Sstevel@tonic-gate 4837c478bd9Sstevel@tonic-gate /* 4847c478bd9Sstevel@tonic-gate * 64-bit user code segment. 4857c478bd9Sstevel@tonic-gate */ 486ae115bc7Smrj set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL, 4877c478bd9Sstevel@tonic-gate SDP_PAGES, SDP_OP32); 4887c478bd9Sstevel@tonic-gate 4897c478bd9Sstevel@tonic-gate /* 4907c478bd9Sstevel@tonic-gate * 32-bit user code segment. 4917c478bd9Sstevel@tonic-gate */ 492ae115bc7Smrj set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA, 4937c478bd9Sstevel@tonic-gate SEL_UPL, SDP_PAGES, SDP_OP32); 4947c478bd9Sstevel@tonic-gate 495843e1988Sjohnlev /* 496843e1988Sjohnlev * See gdt_ucode32() and gdt_ucode_native(). 497843e1988Sjohnlev */ 498843e1988Sjohnlev ucs_on = ucs_off = gdt[GDT_UCODE]; 499843e1988Sjohnlev ucs_off.usd_p = 0; /* forces #np fault */ 500843e1988Sjohnlev 501843e1988Sjohnlev ucs32_on = ucs32_off = gdt[GDT_U32CODE]; 502843e1988Sjohnlev ucs32_off.usd_p = 0; /* forces #np fault */ 503843e1988Sjohnlev 5047c478bd9Sstevel@tonic-gate /* 5057c478bd9Sstevel@tonic-gate * 32 and 64 bit data segments can actually share the same descriptor. 5067c478bd9Sstevel@tonic-gate * In long mode only the present bit is checked but all other fields 5077c478bd9Sstevel@tonic-gate * are loaded. But in compatibility mode all fields are interpreted 5087c478bd9Sstevel@tonic-gate * as in legacy mode so they must be set correctly for a 32-bit data 5097c478bd9Sstevel@tonic-gate * segment. 5107c478bd9Sstevel@tonic-gate */ 511ae115bc7Smrj set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL, 5127c478bd9Sstevel@tonic-gate SDP_PAGES, SDP_OP32); 5137c478bd9Sstevel@tonic-gate 514843e1988Sjohnlev #if !defined(__xpv) 515843e1988Sjohnlev 5167c478bd9Sstevel@tonic-gate /* 5170baeff3dSrab * The 64-bit kernel has no default LDT. By default, the LDT descriptor 5180baeff3dSrab * in the GDT is 0. 5197c478bd9Sstevel@tonic-gate */ 5207c478bd9Sstevel@tonic-gate 5217c478bd9Sstevel@tonic-gate /* 5227c478bd9Sstevel@tonic-gate * Kernel TSS 5237c478bd9Sstevel@tonic-gate */ 5240cfdb603Sjosephb set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0, 5250cfdb603Sjosephb sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL); 5267c478bd9Sstevel@tonic-gate 527843e1988Sjohnlev #endif /* !__xpv */ 528843e1988Sjohnlev 5297c478bd9Sstevel@tonic-gate /* 5307c478bd9Sstevel@tonic-gate * Initialize fs and gs descriptors for 32 bit processes. 5317c478bd9Sstevel@tonic-gate * Only attributes and limits are initialized, the effective 5327c478bd9Sstevel@tonic-gate * base address is programmed via fsbase/gsbase. 5337c478bd9Sstevel@tonic-gate */ 534ae115bc7Smrj set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 5357c478bd9Sstevel@tonic-gate SEL_UPL, SDP_PAGES, SDP_OP32); 536ae115bc7Smrj set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA, 5377c478bd9Sstevel@tonic-gate SEL_UPL, SDP_PAGES, SDP_OP32); 5387c478bd9Sstevel@tonic-gate 5399acbbeafSnn /* 5409acbbeafSnn * Initialize the descriptors set aside for brand usage. 5419acbbeafSnn * Only attributes and limits are initialized. 5429acbbeafSnn */ 5439acbbeafSnn for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++) 544ae115bc7Smrj set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA, 5459acbbeafSnn SEL_UPL, SDP_PAGES, SDP_OP32); 5469acbbeafSnn 5477c478bd9Sstevel@tonic-gate /* 5487c478bd9Sstevel@tonic-gate * Initialize convenient zero base user descriptors for clearing 5497c478bd9Sstevel@tonic-gate * lwp private %fs and %gs descriptors in GDT. See setregs() for 5507c478bd9Sstevel@tonic-gate * an example. 5517c478bd9Sstevel@tonic-gate */ 5527c478bd9Sstevel@tonic-gate set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL, 5537c478bd9Sstevel@tonic-gate SDP_BYTES, SDP_OP32); 5547c478bd9Sstevel@tonic-gate set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL, 5557c478bd9Sstevel@tonic-gate SDP_PAGES, SDP_OP32); 5567c478bd9Sstevel@tonic-gate } 5577c478bd9Sstevel@tonic-gate 558843e1988Sjohnlev #if defined(__xpv) 559843e1988Sjohnlev 560843e1988Sjohnlev static user_desc_t * 561843e1988Sjohnlev init_gdt(void) 562843e1988Sjohnlev { 563843e1988Sjohnlev uint64_t gdtpa; 564843e1988Sjohnlev ulong_t ma[1]; /* XXPV should be a memory_t */ 565843e1988Sjohnlev ulong_t addr; 566843e1988Sjohnlev 567843e1988Sjohnlev #if !defined(__lint) 568843e1988Sjohnlev /* 569843e1988Sjohnlev * Our gdt is never larger than a single page. 570843e1988Sjohnlev */ 571843e1988Sjohnlev ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 572843e1988Sjohnlev #endif 573843e1988Sjohnlev gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 574843e1988Sjohnlev PAGESIZE, PAGESIZE); 575843e1988Sjohnlev bzero(gdt0, PAGESIZE); 576843e1988Sjohnlev 577843e1988Sjohnlev init_gdt_common(gdt0); 578843e1988Sjohnlev 579843e1988Sjohnlev /* 580843e1988Sjohnlev * XXX Since we never invoke kmdb until after the kernel takes 581843e1988Sjohnlev * over the descriptor tables why not have it use the kernel's 582843e1988Sjohnlev * selectors? 583843e1988Sjohnlev */ 584843e1988Sjohnlev if (boothowto & RB_DEBUG) { 585843e1988Sjohnlev set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, 586843e1988Sjohnlev SEL_KPL, SDP_PAGES, SDP_OP32); 587843e1988Sjohnlev set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, 588843e1988Sjohnlev SEL_KPL, SDP_PAGES, SDP_OP32); 589843e1988Sjohnlev } 590843e1988Sjohnlev 591843e1988Sjohnlev /* 592843e1988Sjohnlev * Clear write permission for page containing the gdt and install it. 593843e1988Sjohnlev */ 594843e1988Sjohnlev gdtpa = pfn_to_pa(va_to_pfn(gdt0)); 595843e1988Sjohnlev ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT); 596843e1988Sjohnlev kbm_read_only((uintptr_t)gdt0, gdtpa); 597843e1988Sjohnlev xen_set_gdt(ma, NGDT); 598843e1988Sjohnlev 599843e1988Sjohnlev /* 600843e1988Sjohnlev * Reload the segment registers to use the new GDT. 601843e1988Sjohnlev * On 64-bit, fixup KCS_SEL to be in ring 3. 602843e1988Sjohnlev * See KCS_SEL in segments.h. 603843e1988Sjohnlev */ 604843e1988Sjohnlev load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL); 605843e1988Sjohnlev 606843e1988Sjohnlev /* 607843e1988Sjohnlev * setup %gs for kernel 608843e1988Sjohnlev */ 609843e1988Sjohnlev xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]); 610843e1988Sjohnlev 611843e1988Sjohnlev /* 612843e1988Sjohnlev * XX64 We should never dereference off "other gsbase" or 613843e1988Sjohnlev * "fsbase". So, we should arrange to point FSBASE and 614843e1988Sjohnlev * KGSBASE somewhere truly awful e.g. point it at the last 615843e1988Sjohnlev * valid address below the hole so that any attempts to index 616843e1988Sjohnlev * off them cause an exception. 617843e1988Sjohnlev * 618843e1988Sjohnlev * For now, point it at 8G -- at least it should be unmapped 619843e1988Sjohnlev * until some 64-bit processes run. 620843e1988Sjohnlev */ 621843e1988Sjohnlev addr = 0x200000000ul; 622843e1988Sjohnlev xen_set_segment_base(SEGBASE_FS, addr); 623843e1988Sjohnlev xen_set_segment_base(SEGBASE_GS_USER, addr); 624843e1988Sjohnlev xen_set_segment_base(SEGBASE_GS_USER_SEL, 0); 625843e1988Sjohnlev 626843e1988Sjohnlev return (gdt0); 627843e1988Sjohnlev } 628843e1988Sjohnlev 629843e1988Sjohnlev #else /* __xpv */ 630843e1988Sjohnlev 631ae115bc7Smrj static user_desc_t * 6327c478bd9Sstevel@tonic-gate init_gdt(void) 6337c478bd9Sstevel@tonic-gate { 6347c478bd9Sstevel@tonic-gate desctbr_t r_bgdt, r_gdt; 6357c478bd9Sstevel@tonic-gate user_desc_t *bgdt; 6367c478bd9Sstevel@tonic-gate 637ae115bc7Smrj #if !defined(__lint) 6387c478bd9Sstevel@tonic-gate /* 639ae115bc7Smrj * Our gdt is never larger than a single page. 640ae115bc7Smrj */ 641ae115bc7Smrj ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE); 642ae115bc7Smrj #endif 643ae115bc7Smrj gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA, 644ae115bc7Smrj PAGESIZE, PAGESIZE); 645ae115bc7Smrj bzero(gdt0, PAGESIZE); 646ae115bc7Smrj 647ae115bc7Smrj init_gdt_common(gdt0); 648ae115bc7Smrj 649ae115bc7Smrj /* 650ae115bc7Smrj * Copy in from boot's gdt to our gdt. 651ae115bc7Smrj * Entry 0 is the null descriptor by definition. 6527c478bd9Sstevel@tonic-gate */ 6537c478bd9Sstevel@tonic-gate rd_gdtr(&r_bgdt); 6547c478bd9Sstevel@tonic-gate bgdt = (user_desc_t *)r_bgdt.dtr_base; 6557c478bd9Sstevel@tonic-gate if (bgdt == NULL) 6567c478bd9Sstevel@tonic-gate panic("null boot gdt"); 6577c478bd9Sstevel@tonic-gate 658ae115bc7Smrj gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA]; 659ae115bc7Smrj gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE]; 660ae115bc7Smrj gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE]; 661ae115bc7Smrj gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA]; 662ae115bc7Smrj gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE]; 663ae115bc7Smrj 664ae115bc7Smrj /* 665ae115bc7Smrj * Install our new GDT 666ae115bc7Smrj */ 667ae115bc7Smrj r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1; 668ae115bc7Smrj r_gdt.dtr_base = (uintptr_t)gdt0; 669ae115bc7Smrj wr_gdtr(&r_gdt); 670ae115bc7Smrj 671ae115bc7Smrj /* 672ae115bc7Smrj * Reload the segment registers to use the new GDT 673ae115bc7Smrj */ 674ae115bc7Smrj load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL); 675ae115bc7Smrj 676ae115bc7Smrj /* 677ae115bc7Smrj * setup %gs for kernel 678ae115bc7Smrj */ 679ae115bc7Smrj wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]); 680ae115bc7Smrj 681ae115bc7Smrj /* 682ae115bc7Smrj * XX64 We should never dereference off "other gsbase" or 683ae115bc7Smrj * "fsbase". So, we should arrange to point FSBASE and 684ae115bc7Smrj * KGSBASE somewhere truly awful e.g. point it at the last 685ae115bc7Smrj * valid address below the hole so that any attempts to index 686ae115bc7Smrj * off them cause an exception. 687ae115bc7Smrj * 688ae115bc7Smrj * For now, point it at 8G -- at least it should be unmapped 689ae115bc7Smrj * until some 64-bit processes run. 690ae115bc7Smrj */ 691ae115bc7Smrj wrmsr(MSR_AMD_FSBASE, 0x200000000ul); 692ae115bc7Smrj wrmsr(MSR_AMD_KGSBASE, 0x200000000ul); 693ae115bc7Smrj return (gdt0); 694ae115bc7Smrj } 695ae115bc7Smrj 696843e1988Sjohnlev #endif /* __xpv */ 697843e1988Sjohnlev 6987c478bd9Sstevel@tonic-gate 6997c478bd9Sstevel@tonic-gate /* 7007c478bd9Sstevel@tonic-gate * Build kernel IDT. 7017c478bd9Sstevel@tonic-gate * 702ae115bc7Smrj * Note that for amd64 we pretty much require every gate to be an interrupt 703ae115bc7Smrj * gate which blocks interrupts atomically on entry; that's because of our 704ae115bc7Smrj * dependency on using 'swapgs' every time we come into the kernel to find 705ae115bc7Smrj * the cpu structure. If we get interrupted just before doing that, %cs could 706ae115bc7Smrj * be in kernel mode (so that the trap prolog doesn't do a swapgs), but 707ae115bc7Smrj * %gsbase is really still pointing at something in userland. Bad things will 708ae115bc7Smrj * ensue. We also use interrupt gates for i386 as well even though this is not 709ae115bc7Smrj * required for some traps. 7107c478bd9Sstevel@tonic-gate * 7117c478bd9Sstevel@tonic-gate * Perhaps they should have invented a trap gate that does an atomic swapgs? 7127c478bd9Sstevel@tonic-gate */ 7137c478bd9Sstevel@tonic-gate static void 714ae115bc7Smrj init_idt_common(gate_desc_t *idt) 7157c478bd9Sstevel@tonic-gate { 71674ecdb51SJohn Levon set_gatesegd(&idt[T_ZERODIV], 71774ecdb51SJohn Levon (kpti_enable == 1) ? &tr_div0trap : &div0trap, 71874ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ZERODIV)); 71974ecdb51SJohn Levon set_gatesegd(&idt[T_SGLSTP], 72074ecdb51SJohn Levon (kpti_enable == 1) ? &tr_dbgtrap : &dbgtrap, 72174ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SGLSTP)); 72274ecdb51SJohn Levon set_gatesegd(&idt[T_NMIFLT], 72374ecdb51SJohn Levon (kpti_enable == 1) ? &tr_nmiint : &nmiint, 72474ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NMIFLT)); 72574ecdb51SJohn Levon set_gatesegd(&idt[T_BPTFLT], 72674ecdb51SJohn Levon (kpti_enable == 1) ? &tr_brktrap : &brktrap, 72774ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_BPTFLT)); 72874ecdb51SJohn Levon set_gatesegd(&idt[T_OVFLW], 72974ecdb51SJohn Levon (kpti_enable == 1) ? &tr_ovflotrap : &ovflotrap, 73074ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_OVFLW)); 73174ecdb51SJohn Levon set_gatesegd(&idt[T_BOUNDFLT], 73274ecdb51SJohn Levon (kpti_enable == 1) ? &tr_boundstrap : &boundstrap, 73374ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_BOUNDFLT)); 73474ecdb51SJohn Levon set_gatesegd(&idt[T_ILLINST], 73574ecdb51SJohn Levon (kpti_enable == 1) ? &tr_invoptrap : &invoptrap, 73674ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ILLINST)); 73774ecdb51SJohn Levon set_gatesegd(&idt[T_NOEXTFLT], 73874ecdb51SJohn Levon (kpti_enable == 1) ? &tr_ndptrap : &ndptrap, 73974ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NOEXTFLT)); 7407c478bd9Sstevel@tonic-gate 7417c478bd9Sstevel@tonic-gate /* 7427c478bd9Sstevel@tonic-gate * double fault handler. 743843e1988Sjohnlev * 744843e1988Sjohnlev * Note that on the hypervisor a guest does not receive #df faults. 745843e1988Sjohnlev * Instead a failsafe event is injected into the guest if its selectors 746843e1988Sjohnlev * and/or stack is in a broken state. See xen_failsafe_callback. 7477c478bd9Sstevel@tonic-gate */ 748843e1988Sjohnlev #if !defined(__xpv) 74974ecdb51SJohn Levon set_gatesegd(&idt[T_DBLFLT], 75074ecdb51SJohn Levon (kpti_enable == 1) ? &tr_syserrtrap : &syserrtrap, 75174ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_DBLFLT)); 752843e1988Sjohnlev #endif /* !__xpv */ 7537c478bd9Sstevel@tonic-gate 7547c478bd9Sstevel@tonic-gate /* 755ae115bc7Smrj * T_EXTOVRFLT coprocessor-segment-overrun not supported. 7567c478bd9Sstevel@tonic-gate */ 75774ecdb51SJohn Levon set_gatesegd(&idt[T_TSSFLT], 75874ecdb51SJohn Levon (kpti_enable == 1) ? &tr_invtsstrap : &invtsstrap, 75974ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_TSSFLT)); 76074ecdb51SJohn Levon set_gatesegd(&idt[T_SEGFLT], 76174ecdb51SJohn Levon (kpti_enable == 1) ? &tr_segnptrap : &segnptrap, 76274ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SEGFLT)); 76374ecdb51SJohn Levon set_gatesegd(&idt[T_STKFLT], 76474ecdb51SJohn Levon (kpti_enable == 1) ? &tr_stktrap : &stktrap, 76574ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_STKFLT)); 76674ecdb51SJohn Levon set_gatesegd(&idt[T_GPFLT], 76774ecdb51SJohn Levon (kpti_enable == 1) ? &tr_gptrap : &gptrap, 76874ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_GPFLT)); 76974ecdb51SJohn Levon set_gatesegd(&idt[T_PGFLT], 77074ecdb51SJohn Levon (kpti_enable == 1) ? &tr_pftrap : &pftrap, 77174ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_PGFLT)); 77274ecdb51SJohn Levon set_gatesegd(&idt[T_EXTERRFLT], 77374ecdb51SJohn Levon (kpti_enable == 1) ? &tr_ndperr : &ndperr, 77474ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_EXTERRFLT)); 77574ecdb51SJohn Levon set_gatesegd(&idt[T_ALIGNMENT], 77674ecdb51SJohn Levon (kpti_enable == 1) ? &tr_achktrap : &achktrap, 77774ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ALIGNMENT)); 77874ecdb51SJohn Levon set_gatesegd(&idt[T_MCE], 77974ecdb51SJohn Levon (kpti_enable == 1) ? &tr_mcetrap : &mcetrap, 78074ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_MCE)); 78174ecdb51SJohn Levon set_gatesegd(&idt[T_SIMDFPE], 78274ecdb51SJohn Levon (kpti_enable == 1) ? &tr_xmtrap : &xmtrap, 78374ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE)); 7847c478bd9Sstevel@tonic-gate 7857c478bd9Sstevel@tonic-gate /* 7867c478bd9Sstevel@tonic-gate * install fast trap handler at 210. 7877c478bd9Sstevel@tonic-gate */ 78874ecdb51SJohn Levon set_gatesegd(&idt[T_FASTTRAP], 78974ecdb51SJohn Levon (kpti_enable == 1) ? &tr_fasttrap : &fasttrap, 79074ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_FASTTRAP)); 7917c478bd9Sstevel@tonic-gate 7927c478bd9Sstevel@tonic-gate /* 7937c478bd9Sstevel@tonic-gate * System call handler. 7947c478bd9Sstevel@tonic-gate */ 79574ecdb51SJohn Levon set_gatesegd(&idt[T_SYSCALLINT], 79674ecdb51SJohn Levon (kpti_enable == 1) ? &tr_sys_syscall_int : &sys_syscall_int, 79774ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT)); 7987c478bd9Sstevel@tonic-gate 7997c478bd9Sstevel@tonic-gate /* 800f498645aSahl * Install the DTrace interrupt handler for the pid provider. 8017c478bd9Sstevel@tonic-gate */ 80274ecdb51SJohn Levon set_gatesegd(&idt[T_DTRACE_RET], 80374ecdb51SJohn Levon (kpti_enable == 1) ? &tr_dtrace_ret : &dtrace_ret, 80474ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET)); 8057c478bd9Sstevel@tonic-gate 8069acbbeafSnn /* 807eb5a5c78SSurya Prakki * Prepare interposing descriptor for the syscall handler 808eb5a5c78SSurya Prakki * and cache copy of the default descriptor. 8099acbbeafSnn */ 810eb5a5c78SSurya Prakki brand_tbl[0].ih_inum = T_SYSCALLINT; 811eb5a5c78SSurya Prakki brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT]; 812ae115bc7Smrj 81374ecdb51SJohn Levon set_gatesegd(&(brand_tbl[0].ih_interp_desc), 81474ecdb51SJohn Levon (kpti_enable == 1) ? &tr_brand_sys_syscall_int : 81574ecdb51SJohn Levon &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL, 81674ecdb51SJohn Levon idt_vector_to_ist(T_SYSCALLINT)); 8179acbbeafSnn 818eb5a5c78SSurya Prakki brand_tbl[1].ih_inum = 0; 8197c478bd9Sstevel@tonic-gate } 8207c478bd9Sstevel@tonic-gate 821843e1988Sjohnlev #if defined(__xpv) 822843e1988Sjohnlev 823843e1988Sjohnlev static void 824843e1988Sjohnlev init_idt(gate_desc_t *idt) 825843e1988Sjohnlev { 826843e1988Sjohnlev init_idt_common(idt); 827843e1988Sjohnlev } 828843e1988Sjohnlev 829843e1988Sjohnlev #else /* __xpv */ 830843e1988Sjohnlev 8317c478bd9Sstevel@tonic-gate static void 832ae115bc7Smrj init_idt(gate_desc_t *idt) 8337c478bd9Sstevel@tonic-gate { 8347c478bd9Sstevel@tonic-gate char ivctname[80]; 8357c478bd9Sstevel@tonic-gate void (*ivctptr)(void); 8367c478bd9Sstevel@tonic-gate int i; 8377c478bd9Sstevel@tonic-gate 8387c478bd9Sstevel@tonic-gate /* 8397c478bd9Sstevel@tonic-gate * Initialize entire table with 'reserved' trap and then overwrite 8407c478bd9Sstevel@tonic-gate * specific entries. T_EXTOVRFLT (9) is unsupported and reserved 8417c478bd9Sstevel@tonic-gate * since it can only be generated on a 386 processor. 15 is also 8427c478bd9Sstevel@tonic-gate * unsupported and reserved. 8437c478bd9Sstevel@tonic-gate */ 84474ecdb51SJohn Levon #if !defined(__xpv) 84574ecdb51SJohn Levon for (i = 0; i < NIDT; i++) { 84674ecdb51SJohn Levon set_gatesegd(&idt[i], 84774ecdb51SJohn Levon (kpti_enable == 1) ? &tr_resvtrap : &resvtrap, 84874ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, 84974ecdb51SJohn Levon idt_vector_to_ist(T_RESVTRAP)); 85074ecdb51SJohn Levon } 85174ecdb51SJohn Levon #else 85274ecdb51SJohn Levon for (i = 0; i < NIDT; i++) { 8539844da31SSeth Goldberg set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 85474ecdb51SJohn Levon IST_NONE); 85574ecdb51SJohn Levon } 85674ecdb51SJohn Levon #endif 8577c478bd9Sstevel@tonic-gate 8587c478bd9Sstevel@tonic-gate /* 8597c478bd9Sstevel@tonic-gate * 20-31 reserved 8607c478bd9Sstevel@tonic-gate */ 86174ecdb51SJohn Levon #if !defined(__xpv) 86274ecdb51SJohn Levon for (i = 20; i < 32; i++) { 86374ecdb51SJohn Levon set_gatesegd(&idt[i], 86474ecdb51SJohn Levon (kpti_enable == 1) ? &tr_invaltrap : &invaltrap, 86574ecdb51SJohn Levon KCS_SEL, SDT_SYSIGT, TRP_KPL, 86674ecdb51SJohn Levon idt_vector_to_ist(T_INVALTRAP)); 86774ecdb51SJohn Levon } 86874ecdb51SJohn Levon #else 86974ecdb51SJohn Levon for (i = 20; i < 32; i++) { 8709844da31SSeth Goldberg set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL, 87174ecdb51SJohn Levon IST_NONE); 87274ecdb51SJohn Levon } 87374ecdb51SJohn Levon #endif 8747c478bd9Sstevel@tonic-gate 8757c478bd9Sstevel@tonic-gate /* 8767c478bd9Sstevel@tonic-gate * interrupts 32 - 255 8777c478bd9Sstevel@tonic-gate */ 8787c478bd9Sstevel@tonic-gate for (i = 32; i < 256; i++) { 87974ecdb51SJohn Levon #if !defined(__xpv) 88074ecdb51SJohn Levon (void) snprintf(ivctname, sizeof (ivctname), 88174ecdb51SJohn Levon (kpti_enable == 1) ? "tr_ivct%d" : "ivct%d", i); 88274ecdb51SJohn Levon #else 8837c478bd9Sstevel@tonic-gate (void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i); 88474ecdb51SJohn Levon #endif 8857c478bd9Sstevel@tonic-gate ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0); 8867c478bd9Sstevel@tonic-gate if (ivctptr == NULL) 8877c478bd9Sstevel@tonic-gate panic("kobj_getsymvalue(%s) failed", ivctname); 8887c478bd9Sstevel@tonic-gate 88974ecdb51SJohn Levon set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL, 89074ecdb51SJohn Levon idt_vector_to_ist(i)); 8917c478bd9Sstevel@tonic-gate } 8927c478bd9Sstevel@tonic-gate 8939acbbeafSnn /* 894ae115bc7Smrj * Now install the common ones. Note that it will overlay some 895ae115bc7Smrj * entries installed above like T_SYSCALLINT, T_FASTTRAP etc. 8967c478bd9Sstevel@tonic-gate */ 897ae115bc7Smrj init_idt_common(idt); 8987c478bd9Sstevel@tonic-gate } 8997c478bd9Sstevel@tonic-gate 900843e1988Sjohnlev #endif /* __xpv */ 901843e1988Sjohnlev 9027c478bd9Sstevel@tonic-gate /* 9030baeff3dSrab * The kernel does not deal with LDTs unless a user explicitly creates 9040baeff3dSrab * one. Under normal circumstances, the LDTR contains 0. Any process attempting 9050baeff3dSrab * to reference the LDT will therefore cause a #gp. System calls made via the 9060baeff3dSrab * obsolete lcall mechanism are emulated by the #gp fault handler. 9077c478bd9Sstevel@tonic-gate */ 9087c478bd9Sstevel@tonic-gate static void 9097c478bd9Sstevel@tonic-gate init_ldt(void) 9107c478bd9Sstevel@tonic-gate { 911843e1988Sjohnlev #if defined(__xpv) 912843e1988Sjohnlev xen_set_ldt(NULL, 0); 913843e1988Sjohnlev #else 9140baeff3dSrab wr_ldtr(0); 915843e1988Sjohnlev #endif 9167c478bd9Sstevel@tonic-gate } 9177c478bd9Sstevel@tonic-gate 918843e1988Sjohnlev #if !defined(__xpv) 9197c478bd9Sstevel@tonic-gate 9207c478bd9Sstevel@tonic-gate static void 9217c478bd9Sstevel@tonic-gate init_tss(void) 9227c478bd9Sstevel@tonic-gate { 92374ecdb51SJohn Levon extern struct cpu cpus[]; 9247c478bd9Sstevel@tonic-gate 9257c478bd9Sstevel@tonic-gate /* 92674ecdb51SJohn Levon * tss_rsp0 is dynamically filled in by resume() (in swtch.s) on each 92774ecdb51SJohn Levon * context switch but it'll be overwritten with this same value anyway. 9287c478bd9Sstevel@tonic-gate */ 92974ecdb51SJohn Levon if (kpti_enable == 1) { 93074ecdb51SJohn Levon ktss0->tss_rsp0 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp; 93174ecdb51SJohn Levon } 9327c478bd9Sstevel@tonic-gate 93374ecdb51SJohn Levon /* Set up the IST stacks for double fault, NMI, MCE. */ 93474ecdb51SJohn Levon ktss0->tss_ist1 = (uintptr_t)&dblfault_stack0[sizeof (dblfault_stack0)]; 93574ecdb51SJohn Levon ktss0->tss_ist2 = (uintptr_t)&nmi_stack0[sizeof (nmi_stack0)]; 93674ecdb51SJohn Levon ktss0->tss_ist3 = (uintptr_t)&mce_stack0[sizeof (mce_stack0)]; 9377c478bd9Sstevel@tonic-gate 9387c478bd9Sstevel@tonic-gate /* 93974ecdb51SJohn Levon * This IST stack is used for #DB,#BP (debug) interrupts (when KPTI is 94074ecdb51SJohn Levon * enabled), and also for KDI (always). 9417c478bd9Sstevel@tonic-gate */ 94274ecdb51SJohn Levon ktss0->tss_ist4 = (uint64_t)&cpus->cpu_m.mcpu_kpti_dbg.kf_tr_rsp; 9437c478bd9Sstevel@tonic-gate 94474ecdb51SJohn Levon if (kpti_enable == 1) { 94574ecdb51SJohn Levon /* This IST stack is used for #GP,#PF,#SS (fault) interrupts. */ 94674ecdb51SJohn Levon ktss0->tss_ist5 = 94774ecdb51SJohn Levon (uint64_t)&cpus->cpu_m.mcpu_kpti_flt.kf_tr_rsp; 9487c478bd9Sstevel@tonic-gate 94974ecdb51SJohn Levon /* This IST stack is used for all other intrs (for KPTI). */ 95074ecdb51SJohn Levon ktss0->tss_ist6 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp; 95174ecdb51SJohn Levon } 9527c478bd9Sstevel@tonic-gate 9537c478bd9Sstevel@tonic-gate /* 9547c478bd9Sstevel@tonic-gate * Set I/O bit map offset equal to size of TSS segment limit 9557c478bd9Sstevel@tonic-gate * for no I/O permission map. This will force all user I/O 9567c478bd9Sstevel@tonic-gate * instructions to generate #gp fault. 9577c478bd9Sstevel@tonic-gate */ 9580cfdb603Sjosephb ktss0->tss_bitmapbase = sizeof (*ktss0); 9597c478bd9Sstevel@tonic-gate 9607c478bd9Sstevel@tonic-gate /* 9617c478bd9Sstevel@tonic-gate * Point %tr to descriptor for ktss0 in gdt. 9627c478bd9Sstevel@tonic-gate */ 9637c478bd9Sstevel@tonic-gate wr_tsr(KTSS_SEL); 9647c478bd9Sstevel@tonic-gate } 9657c478bd9Sstevel@tonic-gate 966843e1988Sjohnlev #endif /* !__xpv */ 967843e1988Sjohnlev 968843e1988Sjohnlev #if defined(__xpv) 969843e1988Sjohnlev 970843e1988Sjohnlev void 971843e1988Sjohnlev init_desctbls(void) 972843e1988Sjohnlev { 973843e1988Sjohnlev uint_t vec; 974843e1988Sjohnlev user_desc_t *gdt; 975843e1988Sjohnlev 976843e1988Sjohnlev /* 977843e1988Sjohnlev * Setup and install our GDT. 978843e1988Sjohnlev */ 979843e1988Sjohnlev gdt = init_gdt(); 980843e1988Sjohnlev 981843e1988Sjohnlev /* 982843e1988Sjohnlev * Store static pa of gdt to speed up pa_to_ma() translations 983843e1988Sjohnlev * on lwp context switches. 984843e1988Sjohnlev */ 985843e1988Sjohnlev ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 9860cfdb603Sjosephb CPU->cpu_gdt = gdt; 987843e1988Sjohnlev CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt)); 988843e1988Sjohnlev 989843e1988Sjohnlev /* 990843e1988Sjohnlev * Setup and install our IDT. 991843e1988Sjohnlev */ 9920cfdb603Sjosephb #if !defined(__lint) 9930cfdb603Sjosephb ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); 9940cfdb603Sjosephb #endif 9950cfdb603Sjosephb idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, 9960cfdb603Sjosephb PAGESIZE, PAGESIZE); 9979844da31SSeth Goldberg bzero(idt0, PAGESIZE); 9980cfdb603Sjosephb init_idt(idt0); 999843e1988Sjohnlev for (vec = 0; vec < NIDT; vec++) 1000843e1988Sjohnlev xen_idt_write(&idt0[vec], vec); 1001843e1988Sjohnlev 10020cfdb603Sjosephb CPU->cpu_idt = idt0; 1003843e1988Sjohnlev 1004843e1988Sjohnlev /* 1005843e1988Sjohnlev * set default kernel stack 1006843e1988Sjohnlev */ 1007843e1988Sjohnlev xen_stack_switch(KDS_SEL, 1008843e1988Sjohnlev (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]); 1009843e1988Sjohnlev 1010843e1988Sjohnlev xen_init_callbacks(); 1011843e1988Sjohnlev 1012843e1988Sjohnlev init_ldt(); 1013843e1988Sjohnlev } 1014843e1988Sjohnlev 1015843e1988Sjohnlev #else /* __xpv */ 10167c478bd9Sstevel@tonic-gate 10177c478bd9Sstevel@tonic-gate void 1018ae115bc7Smrj init_desctbls(void) 10197c478bd9Sstevel@tonic-gate { 1020ae115bc7Smrj user_desc_t *gdt; 1021ae115bc7Smrj desctbr_t idtr; 1022ae115bc7Smrj 10230cfdb603Sjosephb /* 10240cfdb603Sjosephb * Allocate IDT and TSS structures on unique pages for better 10250cfdb603Sjosephb * performance in virtual machines. 10260cfdb603Sjosephb */ 10270cfdb603Sjosephb #if !defined(__lint) 10280cfdb603Sjosephb ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE); 10290cfdb603Sjosephb #endif 10300cfdb603Sjosephb idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA, 10310cfdb603Sjosephb PAGESIZE, PAGESIZE); 10329844da31SSeth Goldberg bzero(idt0, PAGESIZE); 10330cfdb603Sjosephb #if !defined(__lint) 10340cfdb603Sjosephb ASSERT(sizeof (*ktss0) <= PAGESIZE); 10350cfdb603Sjosephb #endif 1036f16a0f4cSRobert Mustacchi ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA, 10370cfdb603Sjosephb PAGESIZE, PAGESIZE); 10389844da31SSeth Goldberg bzero(ktss0, PAGESIZE); 10390cfdb603Sjosephb 10400cfdb603Sjosephb 1041ae115bc7Smrj /* 1042ae115bc7Smrj * Setup and install our GDT. 1043ae115bc7Smrj */ 1044ae115bc7Smrj gdt = init_gdt(); 1045ae115bc7Smrj ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE)); 10460cfdb603Sjosephb CPU->cpu_gdt = gdt; 1047ae115bc7Smrj 104874ecdb51SJohn Levon /* 104974ecdb51SJohn Levon * Initialize this CPU's LDT. 105074ecdb51SJohn Levon */ 105174ecdb51SJohn Levon CPU->cpu_m.mcpu_ldt = BOP_ALLOC(bootops, (caddr_t)LDT_VA, 105274ecdb51SJohn Levon LDT_CPU_SIZE, PAGESIZE); 105374ecdb51SJohn Levon bzero(CPU->cpu_m.mcpu_ldt, LDT_CPU_SIZE); 105474ecdb51SJohn Levon CPU->cpu_m.mcpu_ldt_len = 0; 105574ecdb51SJohn Levon 1056ae115bc7Smrj /* 1057ae115bc7Smrj * Setup and install our IDT. 1058ae115bc7Smrj */ 10590cfdb603Sjosephb init_idt(idt0); 1060ae115bc7Smrj 1061ae115bc7Smrj idtr.dtr_base = (uintptr_t)idt0; 10620cfdb603Sjosephb idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1; 1063ae115bc7Smrj wr_idtr(&idtr); 10640cfdb603Sjosephb CPU->cpu_idt = idt0; 1065ae115bc7Smrj 1066ae115bc7Smrj 10677c478bd9Sstevel@tonic-gate init_tss(); 10680cfdb603Sjosephb CPU->cpu_tss = ktss0; 10697c478bd9Sstevel@tonic-gate init_ldt(); 107074ecdb51SJohn Levon 107174ecdb51SJohn Levon /* Stash this so that the NMI,MCE,#DF and KDI handlers can use it. */ 107274ecdb51SJohn Levon kpti_safe_cr3 = (uint64_t)getcr3(); 10737c478bd9Sstevel@tonic-gate } 10749acbbeafSnn 1075843e1988Sjohnlev #endif /* __xpv */ 1076843e1988Sjohnlev 1077309b04b8SJohn Levon #ifndef __xpv 1078309b04b8SJohn Levon /* 1079309b04b8SJohn Levon * As per Intel Vol 3 27.5.2, the GDTR limit is reset to 64Kb on a VM exit, so 1080309b04b8SJohn Levon * we have to manually fix it up ourselves. 1081309b04b8SJohn Levon * 1082309b04b8SJohn Levon * The caller may still need to make sure that it can't go off-CPU with the 1083309b04b8SJohn Levon * incorrect limit, before calling this (such as disabling pre-emption). 1084309b04b8SJohn Levon */ 1085309b04b8SJohn Levon void 1086309b04b8SJohn Levon reset_gdtr_limit(void) 1087309b04b8SJohn Levon { 1088309b04b8SJohn Levon ulong_t flags = intr_clear(); 1089309b04b8SJohn Levon desctbr_t gdtr; 1090309b04b8SJohn Levon 1091309b04b8SJohn Levon rd_gdtr(&gdtr); 1092309b04b8SJohn Levon gdtr.dtr_limit = (sizeof (user_desc_t) * NGDT) - 1; 1093309b04b8SJohn Levon wr_gdtr(&gdtr); 1094309b04b8SJohn Levon 1095309b04b8SJohn Levon intr_restore(flags); 1096309b04b8SJohn Levon } 1097309b04b8SJohn Levon #endif /* __xpv */ 1098309b04b8SJohn Levon 1099ae115bc7Smrj /* 1100ae115bc7Smrj * In the early kernel, we need to set up a simple GDT to run on. 1101843e1988Sjohnlev * 1102843e1988Sjohnlev * XXPV Can dboot use this too? See dboot_gdt.s 1103ae115bc7Smrj */ 1104ae115bc7Smrj void 1105ae115bc7Smrj init_boot_gdt(user_desc_t *bgdt) 1106ae115bc7Smrj { 1107ae115bc7Smrj set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL, 1108ae115bc7Smrj SDP_PAGES, SDP_OP32); 1109ae115bc7Smrj set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL, 1110ae115bc7Smrj SDP_PAGES, SDP_OP32); 1111ae115bc7Smrj } 1112ae115bc7Smrj 11139acbbeafSnn /* 11149acbbeafSnn * Enable interpositioning on the system call path by rewriting the 11159acbbeafSnn * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 11169acbbeafSnn * the branded entry points. 11179acbbeafSnn */ 11189acbbeafSnn void 11199acbbeafSnn brand_interpositioning_enable(void) 11209acbbeafSnn { 1121843e1988Sjohnlev gate_desc_t *idt = CPU->cpu_idt; 1122027bcc9fSToomas Soome int i; 11239acbbeafSnn 1124843e1988Sjohnlev ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1125843e1988Sjohnlev 1126843e1988Sjohnlev for (i = 0; brand_tbl[i].ih_inum; i++) { 1127843e1988Sjohnlev idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc; 1128843e1988Sjohnlev #if defined(__xpv) 1129843e1988Sjohnlev xen_idt_write(&idt[brand_tbl[i].ih_inum], 1130843e1988Sjohnlev brand_tbl[i].ih_inum); 1131843e1988Sjohnlev #endif 1132843e1988Sjohnlev } 11339acbbeafSnn 1134843e1988Sjohnlev #if defined(__xpv) 1135843e1988Sjohnlev 1136843e1988Sjohnlev /* 1137843e1988Sjohnlev * Currently the hypervisor only supports 64-bit syscalls via 1138843e1988Sjohnlev * syscall instruction. The 32-bit syscalls are handled by 1139843e1988Sjohnlev * interrupt gate above. 1140843e1988Sjohnlev */ 1141843e1988Sjohnlev xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall, 1142843e1988Sjohnlev CALLBACKF_mask_events); 1143843e1988Sjohnlev 1144843e1988Sjohnlev #else 1145843e1988Sjohnlev 11467417cfdeSKuriakose Kuruvilla if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { 114774ecdb51SJohn Levon if (kpti_enable == 1) { 114874ecdb51SJohn Levon wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_brand_sys_syscall); 114974ecdb51SJohn Levon wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_brand_sys_syscall32); 115074ecdb51SJohn Levon } else { 115174ecdb51SJohn Levon wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall); 115274ecdb51SJohn Levon wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32); 115374ecdb51SJohn Levon } 1154843e1988Sjohnlev } 1155843e1988Sjohnlev 11569acbbeafSnn #endif 11579acbbeafSnn 115874ecdb51SJohn Levon if (is_x86_feature(x86_featureset, X86FSET_SEP)) { 115974ecdb51SJohn Levon if (kpti_enable == 1) { 116074ecdb51SJohn Levon wrmsr(MSR_INTC_SEP_EIP, 116174ecdb51SJohn Levon (uintptr_t)tr_brand_sys_sysenter); 116274ecdb51SJohn Levon } else { 116374ecdb51SJohn Levon wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter); 116474ecdb51SJohn Levon } 116574ecdb51SJohn Levon } 11669acbbeafSnn } 11679acbbeafSnn 11689acbbeafSnn /* 11699acbbeafSnn * Disable interpositioning on the system call path by rewriting the 11709acbbeafSnn * sys{call|enter} MSRs and the syscall-related entries in the IDT to use 11719acbbeafSnn * the standard entry points, which bypass the interpositioning hooks. 11729acbbeafSnn */ 11739acbbeafSnn void 11749acbbeafSnn brand_interpositioning_disable(void) 11759acbbeafSnn { 1176843e1988Sjohnlev gate_desc_t *idt = CPU->cpu_idt; 11779acbbeafSnn int i; 11789acbbeafSnn 1179843e1988Sjohnlev ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL); 1180843e1988Sjohnlev 1181843e1988Sjohnlev for (i = 0; brand_tbl[i].ih_inum; i++) { 1182843e1988Sjohnlev idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc; 1183843e1988Sjohnlev #if defined(__xpv) 1184843e1988Sjohnlev xen_idt_write(&idt[brand_tbl[i].ih_inum], 1185843e1988Sjohnlev brand_tbl[i].ih_inum); 1186843e1988Sjohnlev #endif 1187843e1988Sjohnlev } 11889acbbeafSnn 1189843e1988Sjohnlev #if defined(__xpv) 1190843e1988Sjohnlev 1191843e1988Sjohnlev /* 1192843e1988Sjohnlev * See comment above in brand_interpositioning_enable. 1193843e1988Sjohnlev */ 1194843e1988Sjohnlev xen_set_callback(sys_syscall, CALLBACKTYPE_syscall, 1195843e1988Sjohnlev CALLBACKF_mask_events); 1196843e1988Sjohnlev 1197843e1988Sjohnlev #else 1198843e1988Sjohnlev 11997417cfdeSKuriakose Kuruvilla if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) { 120074ecdb51SJohn Levon if (kpti_enable == 1) { 120174ecdb51SJohn Levon wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_sys_syscall); 120274ecdb51SJohn Levon wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_sys_syscall32); 120374ecdb51SJohn Levon } else { 120474ecdb51SJohn Levon wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall); 120574ecdb51SJohn Levon wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32); 120674ecdb51SJohn Levon } 1207843e1988Sjohnlev } 1208843e1988Sjohnlev 12099acbbeafSnn #endif 12109acbbeafSnn 121174ecdb51SJohn Levon if (is_x86_feature(x86_featureset, X86FSET_SEP)) { 121274ecdb51SJohn Levon if (kpti_enable == 1) { 121374ecdb51SJohn Levon wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)tr_sys_sysenter); 121474ecdb51SJohn Levon } else { 121574ecdb51SJohn Levon wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter); 121674ecdb51SJohn Levon } 121774ecdb51SJohn Levon } 12189acbbeafSnn } 1219