xref: /illumos-gate/usr/src/uts/intel/os/desctbls.c (revision f0089e39)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2018 Joyent, Inc. All rights reserved.
28  */
29 
30 /*
31  * Copyright (c) 1992 Terrence R. Lambert.
32  * Copyright (c) 1990 The Regents of the University of California.
33  * All rights reserved.
34  *
35  * This code is derived from software contributed to Berkeley by
36  * William Jolitz.
37  *
38  * Redistribution and use in source and binary forms, with or without
39  * modification, are permitted provided that the following conditions
40  * are met:
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in the
45  *    documentation and/or other materials provided with the distribution.
46  * 3. All advertising materials mentioning features or use of this software
47  *    must display the following acknowledgement:
48  *	This product includes software developed by the University of
49  *	California, Berkeley and its contributors.
50  * 4. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
67  */
68 
69 #include <sys/types.h>
70 #include <sys/sysmacros.h>
71 #include <sys/tss.h>
72 #include <sys/segments.h>
73 #include <sys/trap.h>
74 #include <sys/cpuvar.h>
75 #include <sys/bootconf.h>
76 #include <sys/x86_archext.h>
77 #include <sys/controlregs.h>
78 #include <sys/archsystm.h>
79 #include <sys/machsystm.h>
80 #include <sys/kobj.h>
81 #include <sys/cmn_err.h>
82 #include <sys/reboot.h>
83 #include <sys/kdi.h>
84 #include <sys/mach_mmu.h>
85 #include <sys/systm.h>
86 #include <sys/note.h>
87 
88 #ifdef __xpv
89 #include <sys/hypervisor.h>
90 #include <vm/as.h>
91 #endif
92 
93 #include <sys/promif.h>
94 #include <sys/bootinfo.h>
95 #include <vm/kboot_mmu.h>
96 #include <vm/hat_pte.h>
97 
98 /*
99  * cpu0 and default tables and structures.
100  */
101 user_desc_t	*gdt0;
102 #if !defined(__xpv)
103 desctbr_t	gdt0_default_r;
104 #endif
105 
106 gate_desc_t	*idt0;		/* interrupt descriptor table */
107 
108 tss_t		*ktss0;			/* kernel task state structure */
109 
110 
111 user_desc_t	zero_udesc;		/* base zero user desc native procs */
112 user_desc_t	null_udesc;		/* null user descriptor */
113 system_desc_t	null_sdesc;		/* null system descriptor */
114 
115 user_desc_t	zero_u32desc;		/* 32-bit compatibility procs */
116 
117 user_desc_t	ucs_on;
118 user_desc_t	ucs_off;
119 user_desc_t	ucs32_on;
120 user_desc_t	ucs32_off;
121 
122 /*
123  * If the size of this is changed, you must update hat_pcp_setup() and the
124  * definitions in exception.s
125  */
126 extern char dblfault_stack0[DEFAULTSTKSZ];
127 extern char nmi_stack0[DEFAULTSTKSZ];
128 extern char mce_stack0[DEFAULTSTKSZ];
129 
130 extern void	fast_null(void);
131 extern hrtime_t	get_hrtime(void);
132 extern hrtime_t	gethrvtime(void);
133 extern hrtime_t	get_hrestime(void);
134 extern uint64_t	getlgrp(void);
135 
136 void (*(fasttable[]))(void) = {
137 	fast_null,			/* T_FNULL routine */
138 	fast_null,			/* T_FGETFP routine (initially null) */
139 	fast_null,			/* T_FSETFP routine (initially null) */
140 	(void (*)())(uintptr_t)get_hrtime,	/* T_GETHRTIME */
141 	(void (*)())(uintptr_t)gethrvtime,	/* T_GETHRVTIME */
142 	(void (*)())(uintptr_t)get_hrestime,	/* T_GETHRESTIME */
143 	(void (*)())(uintptr_t)getlgrp		/* T_GETLGRP */
144 };
145 
146 /*
147  * Structure containing pre-computed descriptors to allow us to temporarily
148  * interpose on a standard handler.
149  */
150 struct interposing_handler {
151 	int ih_inum;
152 	gate_desc_t ih_interp_desc;
153 	gate_desc_t ih_default_desc;
154 };
155 
156 /*
157  * The brand infrastructure interposes on two handlers, and we use one as a
158  * NULL signpost.
159  */
160 static struct interposing_handler brand_tbl[2];
161 
162 /*
163  * software prototypes for default local descriptor table
164  */
165 
166 /*
167  * Routines for loading segment descriptors in format the hardware
168  * can understand.
169  */
170 
171 /*
172  * In long mode we have the new L or long mode attribute bit
173  * for code segments. Only the conforming bit in type is used along
174  * with descriptor priority and present bits. Default operand size must
175  * be zero when in long mode. In 32-bit compatibility mode all fields
176  * are treated as in legacy mode. For data segments while in long mode
177  * only the present bit is loaded.
178  */
179 void
set_usegd(user_desc_t * dp,uint_t lmode,void * base,size_t size,uint_t type,uint_t dpl,uint_t gran,uint_t defopsz)180 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
181     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
182 {
183 	ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
184 	/* This should never be a "system" segment. */
185 	ASSERT3U(type & SDT_S, !=, 0);
186 
187 	/*
188 	 * 64-bit long mode.
189 	 */
190 	if (lmode == SDP_LONG)
191 		dp->usd_def32 = 0;		/* 32-bit operands only */
192 	else
193 		/*
194 		 * 32-bit compatibility mode.
195 		 */
196 		dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32-bit ops */
197 
198 	/*
199 	 * We should always set the "accessed" bit (SDT_A), otherwise the CPU
200 	 * will write to the GDT whenever we change segment registers around.
201 	 * With KPTI on, the GDT is read-only in the user page table, which
202 	 * causes crashes if we don't set this.
203 	 */
204 	ASSERT3U(type & SDT_A, !=, 0);
205 
206 	dp->usd_long = lmode;	/* 64-bit mode */
207 	dp->usd_type = type;
208 	dp->usd_dpl = dpl;
209 	dp->usd_p = 1;
210 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
211 
212 	dp->usd_lobase = (uintptr_t)base;
213 	dp->usd_midbase = (uintptr_t)base >> 16;
214 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
215 	dp->usd_lolimit = size;
216 	dp->usd_hilimit = (uintptr_t)size >> 16;
217 }
218 
219 /*
220  * Install system segment descriptor for LDT and TSS segments.
221  */
222 
223 void
set_syssegd(system_desc_t * dp,void * base,size_t size,uint_t type,uint_t dpl)224 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
225     uint_t dpl)
226 {
227 	dp->ssd_lolimit = size;
228 	dp->ssd_hilimit = (uintptr_t)size >> 16;
229 
230 	dp->ssd_lobase = (uintptr_t)base;
231 	dp->ssd_midbase = (uintptr_t)base >> 16;
232 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
233 	dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
234 
235 	dp->ssd_type = type;
236 	dp->ssd_zero1 = 0;	/* must be zero */
237 	dp->ssd_zero2 = 0;
238 	dp->ssd_dpl = dpl;
239 	dp->ssd_p = 1;
240 	dp->ssd_gran = 0;	/* force byte units */
241 }
242 
243 void *
get_ssd_base(system_desc_t * dp)244 get_ssd_base(system_desc_t *dp)
245 {
246 	uintptr_t	base;
247 
248 	base = (uintptr_t)dp->ssd_lobase |
249 	    (uintptr_t)dp->ssd_midbase << 16 |
250 	    (uintptr_t)dp->ssd_hibase << (16 + 8) |
251 	    (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
252 	return ((void *)base);
253 }
254 
255 /*
256  * Install gate segment descriptor for interrupt, trap, call and task gates.
257  *
258  * For 64 bit native if we have KPTI enabled, we use the IST stack mechanism on
259  * all interrupts.  We have different ISTs for each class of exceptions that are
260  * most likely to occur while handling an existing exception; while many of
261  * these are just going to panic, it's nice not to trample on the existing
262  * exception state for debugging purposes.
263  *
264  * Normal interrupts are all redirected unconditionally to the KPTI trampoline
265  * stack space. This unifies the trampoline handling between user and kernel
266  * space (and avoids the need to touch %gs).
267  *
268  * The KDI IDT *all* uses the DBG IST: consider single stepping tr_pftrap, when
269  * we do a read from KMDB that cause another #PF.  Without its own IST, this
270  * would stomp on the kernel's mcpu_kpti_flt frame.
271  */
272 uint_t
idt_vector_to_ist(uint_t vector)273 idt_vector_to_ist(uint_t vector)
274 {
275 #if defined(__xpv)
276 	_NOTE(ARGUNUSED(vector));
277 	return (IST_NONE);
278 #else
279 	switch (vector) {
280 	/* These should always use IST even without KPTI enabled. */
281 	case T_DBLFLT:
282 		return (IST_DF);
283 	case T_NMIFLT:
284 		return (IST_NMI);
285 	case T_MCE:
286 		return (IST_MCE);
287 
288 	case T_BPTFLT:
289 	case T_SGLSTP:
290 		if (kpti_enable == 1) {
291 			return (IST_DBG);
292 		}
293 		return (IST_NONE);
294 	case T_STKFLT:
295 	case T_GPFLT:
296 	case T_PGFLT:
297 		if (kpti_enable == 1) {
298 			return (IST_NESTABLE);
299 		}
300 		return (IST_NONE);
301 	default:
302 		if (kpti_enable == 1) {
303 			return (IST_DEFAULT);
304 		}
305 		return (IST_NONE);
306 	}
307 #endif
308 }
309 
310 void
set_gatesegd(gate_desc_t * dp,void (* func)(void),selector_t sel,uint_t type,uint_t dpl,uint_t ist)311 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
312     uint_t type, uint_t dpl, uint_t ist)
313 {
314 	dp->sgd_looffset = (uintptr_t)func;
315 	dp->sgd_hioffset = (uintptr_t)func >> 16;
316 	dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
317 	dp->sgd_selector =  (uint16_t)sel;
318 	dp->sgd_ist = ist;
319 	dp->sgd_type = type;
320 	dp->sgd_dpl = dpl;
321 	dp->sgd_p = 1;
322 }
323 
324 /*
325  * Updates a single user descriptor in the the GDT of the current cpu.
326  * Caller is responsible for preventing cpu migration.
327  */
328 
329 void
gdt_update_usegd(uint_t sidx,user_desc_t * udp)330 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
331 {
332 #if defined(DEBUG)
333 	/* This should never be a "system" segment, but it might be null. */
334 	if (udp->usd_p != 0 || udp->usd_type != 0) {
335 		ASSERT3U(udp->usd_type & SDT_S, !=, 0);
336 	}
337 	/*
338 	 * We should always set the "accessed" bit (SDT_A), otherwise the CPU
339 	 * will write to the GDT whenever we change segment registers around.
340 	 * With KPTI on, the GDT is read-only in the user page table, which
341 	 * causes crashes if we don't set this.
342 	 */
343 	if (udp->usd_p != 0 || udp->usd_type != 0) {
344 		ASSERT3U(udp->usd_type & SDT_A, !=, 0);
345 	}
346 #endif
347 
348 #if defined(__xpv)
349 	uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
350 
351 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
352 		panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
353 
354 #else	/* __xpv */
355 	CPU->cpu_gdt[sidx] = *udp;
356 #endif	/* __xpv */
357 }
358 
359 /*
360  * Writes single descriptor pointed to by udp into a processes
361  * LDT entry pointed to by ldp.
362  */
363 int
ldt_update_segd(user_desc_t * ldp,user_desc_t * udp)364 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
365 {
366 #if defined(DEBUG)
367 	/* This should never be a "system" segment, but it might be null. */
368 	if (udp->usd_p != 0 || udp->usd_type != 0) {
369 		ASSERT3U(udp->usd_type & SDT_S, !=, 0);
370 	}
371 	/*
372 	 * We should always set the "accessed" bit (SDT_A), otherwise the CPU
373 	 * will write to the LDT whenever we change segment registers around.
374 	 * With KPTI on, the LDT is read-only in the user page table, which
375 	 * causes crashes if we don't set this.
376 	 */
377 	if (udp->usd_p != 0 || udp->usd_type != 0) {
378 		ASSERT3U(udp->usd_type & SDT_A, !=, 0);
379 	}
380 #endif
381 
382 #if defined(__xpv)
383 	uint64_t dpa;
384 
385 	dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
386 	    ((uintptr_t)ldp & PAGEOFFSET);
387 
388 	/*
389 	 * The hypervisor is a little more restrictive about what it
390 	 * supports in the LDT.
391 	 */
392 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
393 		return (EINVAL);
394 
395 #else	/* __xpv */
396 	*ldp = *udp;
397 
398 #endif	/* __xpv */
399 	return (0);
400 }
401 
402 #if defined(__xpv)
403 
404 /*
405  * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
406  * Returns true if a valid entry was written.
407  */
408 int
xen_idt_to_trap_info(uint_t vec,gate_desc_t * sgd,void * ti_arg)409 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
410 {
411 	trap_info_t *ti = ti_arg;	/* XXPV	Aargh - segments.h comment */
412 
413 	/*
414 	 * skip holes in the IDT
415 	 */
416 	if (GATESEG_GETOFFSET(sgd) == 0)
417 		return (0);
418 
419 	ASSERT(sgd->sgd_type == SDT_SYSIGT);
420 	ti->vector = vec;
421 	TI_SET_DPL(ti, sgd->sgd_dpl);
422 
423 	/*
424 	 * Is this an interrupt gate?
425 	 */
426 	if (sgd->sgd_type == SDT_SYSIGT) {
427 		/* LINTED */
428 		TI_SET_IF(ti, 1);
429 	}
430 	ti->cs = sgd->sgd_selector;
431 	ti->cs |= SEL_KPL;	/* force into ring 3. see KCS_SEL  */
432 	ti->address = GATESEG_GETOFFSET(sgd);
433 	return (1);
434 }
435 
436 /*
437  * Convert a single hw format gate descriptor and write it into our virtual IDT.
438  */
439 void
xen_idt_write(gate_desc_t * sgd,uint_t vec)440 xen_idt_write(gate_desc_t *sgd, uint_t vec)
441 {
442 	trap_info_t trapinfo[2];
443 
444 	bzero(trapinfo, sizeof (trapinfo));
445 	if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
446 		return;
447 	if (xen_set_trap_table(trapinfo) != 0)
448 		panic("xen_idt_write: xen_set_trap_table() failed");
449 }
450 
451 #endif	/* __xpv */
452 
453 
454 /*
455  * Build kernel GDT.
456  */
457 
458 static void
init_gdt_common(user_desc_t * gdt)459 init_gdt_common(user_desc_t *gdt)
460 {
461 	int i;
462 
463 	/*
464 	 * 64-bit kernel code segment.
465 	 */
466 	set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
467 	    SDP_PAGES, SDP_OP32);
468 
469 	/*
470 	 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
471 	 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
472 	 * instruction to return from system calls back to 32-bit applications.
473 	 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
474 	 * descriptors. We therefore must ensure that the kernel uses something,
475 	 * though it will be ignored by hardware, that is compatible with 32-bit
476 	 * apps. For the same reason we must set the default op size of this
477 	 * descriptor to 32-bit operands.
478 	 */
479 	set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
480 	    SEL_KPL, SDP_PAGES, SDP_OP32);
481 	gdt[GDT_KDATA].usd_def32 = 1;
482 
483 	/*
484 	 * 64-bit user code segment.
485 	 */
486 	set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
487 	    SDP_PAGES, SDP_OP32);
488 
489 	/*
490 	 * 32-bit user code segment.
491 	 */
492 	set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
493 	    SEL_UPL, SDP_PAGES, SDP_OP32);
494 
495 	/*
496 	 * See gdt_ucode32() and gdt_ucode_native().
497 	 */
498 	ucs_on = ucs_off = gdt[GDT_UCODE];
499 	ucs_off.usd_p = 0;	/* forces #np fault */
500 
501 	ucs32_on = ucs32_off = gdt[GDT_U32CODE];
502 	ucs32_off.usd_p = 0;	/* forces #np fault */
503 
504 	/*
505 	 * 32 and 64 bit data segments can actually share the same descriptor.
506 	 * In long mode only the present bit is checked but all other fields
507 	 * are loaded. But in compatibility mode all fields are interpreted
508 	 * as in legacy mode so they must be set correctly for a 32-bit data
509 	 * segment.
510 	 */
511 	set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
512 	    SDP_PAGES, SDP_OP32);
513 
514 #if !defined(__xpv)
515 
516 	/*
517 	 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
518 	 * in the GDT is 0.
519 	 */
520 
521 	/*
522 	 * Kernel TSS
523 	 */
524 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], ktss0,
525 	    sizeof (*ktss0) - 1, SDT_SYSTSS, SEL_KPL);
526 
527 #endif	/* !__xpv */
528 
529 	/*
530 	 * Initialize fs and gs descriptors for 32 bit processes.
531 	 * Only attributes and limits are initialized, the effective
532 	 * base address is programmed via fsbase/gsbase.
533 	 */
534 	set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
535 	    SEL_UPL, SDP_PAGES, SDP_OP32);
536 	set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
537 	    SEL_UPL, SDP_PAGES, SDP_OP32);
538 
539 	/*
540 	 * Initialize the descriptors set aside for brand usage.
541 	 * Only attributes and limits are initialized.
542 	 */
543 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
544 		set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
545 		    SEL_UPL, SDP_PAGES, SDP_OP32);
546 
547 	/*
548 	 * Initialize convenient zero base user descriptors for clearing
549 	 * lwp private %fs and %gs descriptors in GDT. See setregs() for
550 	 * an example.
551 	 */
552 	set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
553 	    SDP_BYTES, SDP_OP32);
554 	set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
555 	    SDP_PAGES, SDP_OP32);
556 }
557 
558 #if defined(__xpv)
559 
560 static user_desc_t *
init_gdt(void)561 init_gdt(void)
562 {
563 	uint64_t gdtpa;
564 	ulong_t ma[1];		/* XXPV should be a memory_t */
565 	ulong_t addr;
566 
567 #if !defined(__lint)
568 	/*
569 	 * Our gdt is never larger than a single page.
570 	 */
571 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
572 #endif
573 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
574 	    PAGESIZE, PAGESIZE);
575 	bzero(gdt0, PAGESIZE);
576 
577 	init_gdt_common(gdt0);
578 
579 	/*
580 	 * XXX Since we never invoke kmdb until after the kernel takes
581 	 * over the descriptor tables why not have it use the kernel's
582 	 * selectors?
583 	 */
584 	if (boothowto & RB_DEBUG) {
585 		set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
586 		    SEL_KPL, SDP_PAGES, SDP_OP32);
587 		set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
588 		    SEL_KPL, SDP_PAGES, SDP_OP32);
589 	}
590 
591 	/*
592 	 * Clear write permission for page containing the gdt and install it.
593 	 */
594 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
595 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
596 	kbm_read_only((uintptr_t)gdt0, gdtpa);
597 	xen_set_gdt(ma, NGDT);
598 
599 	/*
600 	 * Reload the segment registers to use the new GDT.
601 	 * On 64-bit, fixup KCS_SEL to be in ring 3.
602 	 * See KCS_SEL in segments.h.
603 	 */
604 	load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
605 
606 	/*
607 	 *  setup %gs for kernel
608 	 */
609 	xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
610 
611 	/*
612 	 * XX64 We should never dereference off "other gsbase" or
613 	 * "fsbase".  So, we should arrange to point FSBASE and
614 	 * KGSBASE somewhere truly awful e.g. point it at the last
615 	 * valid address below the hole so that any attempts to index
616 	 * off them cause an exception.
617 	 *
618 	 * For now, point it at 8G -- at least it should be unmapped
619 	 * until some 64-bit processes run.
620 	 */
621 	addr = 0x200000000ul;
622 	xen_set_segment_base(SEGBASE_FS, addr);
623 	xen_set_segment_base(SEGBASE_GS_USER, addr);
624 	xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
625 
626 	return (gdt0);
627 }
628 
629 #else	/* __xpv */
630 
631 static user_desc_t *
init_gdt(void)632 init_gdt(void)
633 {
634 	desctbr_t	r_bgdt, r_gdt;
635 	user_desc_t	*bgdt;
636 
637 #if !defined(__lint)
638 	/*
639 	 * Our gdt is never larger than a single page.
640 	 */
641 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
642 #endif
643 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
644 	    PAGESIZE, PAGESIZE);
645 	bzero(gdt0, PAGESIZE);
646 
647 	init_gdt_common(gdt0);
648 
649 	/*
650 	 * Copy in from boot's gdt to our gdt.
651 	 * Entry 0 is the null descriptor by definition.
652 	 */
653 	rd_gdtr(&r_bgdt);
654 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
655 	if (bgdt == NULL)
656 		panic("null boot gdt");
657 
658 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
659 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
660 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
661 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
662 	gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
663 
664 	/*
665 	 * Install our new GDT
666 	 */
667 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
668 	r_gdt.dtr_base = (uintptr_t)gdt0;
669 	wr_gdtr(&r_gdt);
670 
671 	/*
672 	 * Reload the segment registers to use the new GDT
673 	 */
674 	load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
675 
676 	/*
677 	 *  setup %gs for kernel
678 	 */
679 	wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
680 
681 	/*
682 	 * XX64 We should never dereference off "other gsbase" or
683 	 * "fsbase".  So, we should arrange to point FSBASE and
684 	 * KGSBASE somewhere truly awful e.g. point it at the last
685 	 * valid address below the hole so that any attempts to index
686 	 * off them cause an exception.
687 	 *
688 	 * For now, point it at 8G -- at least it should be unmapped
689 	 * until some 64-bit processes run.
690 	 */
691 	wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
692 	wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
693 	return (gdt0);
694 }
695 
696 #endif	/* __xpv */
697 
698 
699 /*
700  * Build kernel IDT.
701  *
702  * Note that for amd64 we pretty much require every gate to be an interrupt
703  * gate which blocks interrupts atomically on entry; that's because of our
704  * dependency on using 'swapgs' every time we come into the kernel to find
705  * the cpu structure. If we get interrupted just before doing that, %cs could
706  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
707  * %gsbase is really still pointing at something in userland. Bad things will
708  * ensue. We also use interrupt gates for i386 as well even though this is not
709  * required for some traps.
710  *
711  * Perhaps they should have invented a trap gate that does an atomic swapgs?
712  */
713 static void
init_idt_common(gate_desc_t * idt)714 init_idt_common(gate_desc_t *idt)
715 {
716 	set_gatesegd(&idt[T_ZERODIV],
717 	    (kpti_enable == 1) ? &tr_div0trap : &div0trap,
718 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ZERODIV));
719 	set_gatesegd(&idt[T_SGLSTP],
720 	    (kpti_enable == 1) ? &tr_dbgtrap : &dbgtrap,
721 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SGLSTP));
722 	set_gatesegd(&idt[T_NMIFLT],
723 	    (kpti_enable == 1) ? &tr_nmiint : &nmiint,
724 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NMIFLT));
725 	set_gatesegd(&idt[T_BPTFLT],
726 	    (kpti_enable == 1) ? &tr_brktrap : &brktrap,
727 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_BPTFLT));
728 	set_gatesegd(&idt[T_OVFLW],
729 	    (kpti_enable == 1) ? &tr_ovflotrap : &ovflotrap,
730 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_OVFLW));
731 	set_gatesegd(&idt[T_BOUNDFLT],
732 	    (kpti_enable == 1) ? &tr_boundstrap : &boundstrap,
733 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_BOUNDFLT));
734 	set_gatesegd(&idt[T_ILLINST],
735 	    (kpti_enable == 1) ? &tr_invoptrap : &invoptrap,
736 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ILLINST));
737 	set_gatesegd(&idt[T_NOEXTFLT],
738 	    (kpti_enable == 1) ? &tr_ndptrap : &ndptrap,
739 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_NOEXTFLT));
740 
741 	/*
742 	 * double fault handler.
743 	 *
744 	 * Note that on the hypervisor a guest does not receive #df faults.
745 	 * Instead a failsafe event is injected into the guest if its selectors
746 	 * and/or stack is in a broken state. See xen_failsafe_callback.
747 	 */
748 #if !defined(__xpv)
749 	set_gatesegd(&idt[T_DBLFLT],
750 	    (kpti_enable == 1) ? &tr_syserrtrap : &syserrtrap,
751 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_DBLFLT));
752 #endif	/* !__xpv */
753 
754 	/*
755 	 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
756 	 */
757 	set_gatesegd(&idt[T_TSSFLT],
758 	    (kpti_enable == 1) ? &tr_invtsstrap : &invtsstrap,
759 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_TSSFLT));
760 	set_gatesegd(&idt[T_SEGFLT],
761 	    (kpti_enable == 1) ? &tr_segnptrap : &segnptrap,
762 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SEGFLT));
763 	set_gatesegd(&idt[T_STKFLT],
764 	    (kpti_enable == 1) ? &tr_stktrap : &stktrap,
765 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_STKFLT));
766 	set_gatesegd(&idt[T_GPFLT],
767 	    (kpti_enable == 1) ? &tr_gptrap : &gptrap,
768 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_GPFLT));
769 	set_gatesegd(&idt[T_PGFLT],
770 	    (kpti_enable == 1) ? &tr_pftrap : &pftrap,
771 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_PGFLT));
772 	set_gatesegd(&idt[T_EXTERRFLT],
773 	    (kpti_enable == 1) ? &tr_ndperr : &ndperr,
774 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_EXTERRFLT));
775 	set_gatesegd(&idt[T_ALIGNMENT],
776 	    (kpti_enable == 1) ? &tr_achktrap : &achktrap,
777 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_ALIGNMENT));
778 	set_gatesegd(&idt[T_MCE],
779 	    (kpti_enable == 1) ? &tr_mcetrap : &mcetrap,
780 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_MCE));
781 	set_gatesegd(&idt[T_SIMDFPE],
782 	    (kpti_enable == 1) ? &tr_xmtrap : &xmtrap,
783 	    KCS_SEL, SDT_SYSIGT, TRP_KPL, idt_vector_to_ist(T_SIMDFPE));
784 
785 	/*
786 	 * install fast trap handler at 210.
787 	 */
788 	set_gatesegd(&idt[T_FASTTRAP],
789 	    (kpti_enable == 1) ? &tr_fasttrap : &fasttrap,
790 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_FASTTRAP));
791 
792 	/*
793 	 * System call handler.
794 	 */
795 	set_gatesegd(&idt[T_SYSCALLINT],
796 	    (kpti_enable == 1) ? &tr_sys_syscall_int : &sys_syscall_int,
797 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_SYSCALLINT));
798 
799 	/*
800 	 * Install the DTrace interrupt handler for the pid provider.
801 	 */
802 	set_gatesegd(&idt[T_DTRACE_RET],
803 	    (kpti_enable == 1) ? &tr_dtrace_ret : &dtrace_ret,
804 	    KCS_SEL, SDT_SYSIGT, TRP_UPL, idt_vector_to_ist(T_DTRACE_RET));
805 
806 	/*
807 	 * Prepare interposing descriptor for the syscall handler
808 	 * and cache copy of the default descriptor.
809 	 */
810 	brand_tbl[0].ih_inum = T_SYSCALLINT;
811 	brand_tbl[0].ih_default_desc = idt0[T_SYSCALLINT];
812 
813 	set_gatesegd(&(brand_tbl[0].ih_interp_desc),
814 	    (kpti_enable == 1) ? &tr_brand_sys_syscall_int :
815 	    &brand_sys_syscall_int, KCS_SEL, SDT_SYSIGT, TRP_UPL,
816 	    idt_vector_to_ist(T_SYSCALLINT));
817 
818 	brand_tbl[1].ih_inum = 0;
819 }
820 
821 #if defined(__xpv)
822 
823 static void
init_idt(gate_desc_t * idt)824 init_idt(gate_desc_t *idt)
825 {
826 	init_idt_common(idt);
827 }
828 
829 #else	/* __xpv */
830 
831 static void
init_idt(gate_desc_t * idt)832 init_idt(gate_desc_t *idt)
833 {
834 	char	ivctname[80];
835 	void	(*ivctptr)(void);
836 	int	i;
837 
838 	/*
839 	 * Initialize entire table with 'reserved' trap and then overwrite
840 	 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
841 	 * since it can only be generated on a 386 processor. 15 is also
842 	 * unsupported and reserved.
843 	 */
844 #if !defined(__xpv)
845 	for (i = 0; i < NIDT; i++) {
846 		set_gatesegd(&idt[i],
847 		    (kpti_enable == 1) ? &tr_resvtrap : &resvtrap,
848 		    KCS_SEL, SDT_SYSIGT, TRP_KPL,
849 		    idt_vector_to_ist(T_RESVTRAP));
850 	}
851 #else
852 	for (i = 0; i < NIDT; i++) {
853 		set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
854 		    IST_NONE);
855 	}
856 #endif
857 
858 	/*
859 	 * 20-31 reserved
860 	 */
861 #if !defined(__xpv)
862 	for (i = 20; i < 32; i++) {
863 		set_gatesegd(&idt[i],
864 		    (kpti_enable == 1) ? &tr_invaltrap : &invaltrap,
865 		    KCS_SEL, SDT_SYSIGT, TRP_KPL,
866 		    idt_vector_to_ist(T_INVALTRAP));
867 	}
868 #else
869 	for (i = 20; i < 32; i++) {
870 		set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL,
871 		    IST_NONE);
872 	}
873 #endif
874 
875 	/*
876 	 * interrupts 32 - 255
877 	 */
878 	for (i = 32; i < 256; i++) {
879 #if !defined(__xpv)
880 		(void) snprintf(ivctname, sizeof (ivctname),
881 		    (kpti_enable == 1) ? "tr_ivct%d" : "ivct%d", i);
882 #else
883 		(void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
884 #endif
885 		ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
886 		if (ivctptr == NULL)
887 			panic("kobj_getsymvalue(%s) failed", ivctname);
888 
889 		set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL,
890 		    idt_vector_to_ist(i));
891 	}
892 
893 	/*
894 	 * Now install the common ones. Note that it will overlay some
895 	 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
896 	 */
897 	init_idt_common(idt);
898 }
899 
900 #endif	/* __xpv */
901 
902 /*
903  * The kernel does not deal with LDTs unless a user explicitly creates
904  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
905  * to reference the LDT will therefore cause a #gp. System calls made via the
906  * obsolete lcall mechanism are emulated by the #gp fault handler.
907  */
908 static void
init_ldt(void)909 init_ldt(void)
910 {
911 #if defined(__xpv)
912 	xen_set_ldt(NULL, 0);
913 #else
914 	wr_ldtr(0);
915 #endif
916 }
917 
918 #if !defined(__xpv)
919 
920 static void
init_tss(void)921 init_tss(void)
922 {
923 	extern struct cpu cpus[];
924 
925 	/*
926 	 * tss_rsp0 is dynamically filled in by resume() (in swtch.s) on each
927 	 * context switch but it'll be overwritten with this same value anyway.
928 	 */
929 	if (kpti_enable == 1) {
930 		ktss0->tss_rsp0 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
931 	}
932 
933 	/* Set up the IST stacks for double fault, NMI, MCE. */
934 	ktss0->tss_ist1 = (uintptr_t)&dblfault_stack0[sizeof (dblfault_stack0)];
935 	ktss0->tss_ist2 = (uintptr_t)&nmi_stack0[sizeof (nmi_stack0)];
936 	ktss0->tss_ist3 = (uintptr_t)&mce_stack0[sizeof (mce_stack0)];
937 
938 	/*
939 	 * This IST stack is used for #DB,#BP (debug) interrupts (when KPTI is
940 	 * enabled), and also for KDI (always).
941 	 */
942 	ktss0->tss_ist4 = (uint64_t)&cpus->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;
943 
944 	if (kpti_enable == 1) {
945 		/* This IST stack is used for #GP,#PF,#SS (fault) interrupts. */
946 		ktss0->tss_ist5 =
947 		    (uint64_t)&cpus->cpu_m.mcpu_kpti_flt.kf_tr_rsp;
948 
949 		/* This IST stack is used for all other intrs (for KPTI). */
950 		ktss0->tss_ist6 = (uint64_t)&cpus->cpu_m.mcpu_kpti.kf_tr_rsp;
951 	}
952 
953 	/*
954 	 * Set I/O bit map offset equal to size of TSS segment limit
955 	 * for no I/O permission map. This will force all user I/O
956 	 * instructions to generate #gp fault.
957 	 */
958 	ktss0->tss_bitmapbase = sizeof (*ktss0);
959 
960 	/*
961 	 * Point %tr to descriptor for ktss0 in gdt.
962 	 */
963 	wr_tsr(KTSS_SEL);
964 }
965 
966 #endif	/* !__xpv */
967 
968 #if defined(__xpv)
969 
970 void
init_desctbls(void)971 init_desctbls(void)
972 {
973 	uint_t vec;
974 	user_desc_t *gdt;
975 
976 	/*
977 	 * Setup and install our GDT.
978 	 */
979 	gdt = init_gdt();
980 
981 	/*
982 	 * Store static pa of gdt to speed up pa_to_ma() translations
983 	 * on lwp context switches.
984 	 */
985 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
986 	CPU->cpu_gdt = gdt;
987 	CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
988 
989 	/*
990 	 * Setup and install our IDT.
991 	 */
992 #if !defined(__lint)
993 	ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
994 #endif
995 	idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
996 	    PAGESIZE, PAGESIZE);
997 	bzero(idt0, PAGESIZE);
998 	init_idt(idt0);
999 	for (vec = 0; vec < NIDT; vec++)
1000 		xen_idt_write(&idt0[vec], vec);
1001 
1002 	CPU->cpu_idt = idt0;
1003 
1004 	/*
1005 	 * set default kernel stack
1006 	 */
1007 	xen_stack_switch(KDS_SEL,
1008 	    (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1009 
1010 	xen_init_callbacks();
1011 
1012 	init_ldt();
1013 }
1014 
1015 #else	/* __xpv */
1016 
1017 void
init_desctbls(void)1018 init_desctbls(void)
1019 {
1020 	user_desc_t *gdt;
1021 	desctbr_t idtr;
1022 
1023 	/*
1024 	 * Allocate IDT and TSS structures on unique pages for better
1025 	 * performance in virtual machines.
1026 	 */
1027 #if !defined(__lint)
1028 	ASSERT(NIDT * sizeof (*idt0) <= PAGESIZE);
1029 #endif
1030 	idt0 = (gate_desc_t *)BOP_ALLOC(bootops, (caddr_t)IDT_VA,
1031 	    PAGESIZE, PAGESIZE);
1032 	bzero(idt0, PAGESIZE);
1033 #if !defined(__lint)
1034 	ASSERT(sizeof (*ktss0) <= PAGESIZE);
1035 #endif
1036 	ktss0 = (tss_t *)BOP_ALLOC(bootops, (caddr_t)KTSS_VA,
1037 	    PAGESIZE, PAGESIZE);
1038 	bzero(ktss0, PAGESIZE);
1039 
1040 
1041 	/*
1042 	 * Setup and install our GDT.
1043 	 */
1044 	gdt = init_gdt();
1045 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1046 	CPU->cpu_gdt = gdt;
1047 
1048 	/*
1049 	 * Initialize this CPU's LDT.
1050 	 */
1051 	CPU->cpu_m.mcpu_ldt = BOP_ALLOC(bootops, (caddr_t)LDT_VA,
1052 	    LDT_CPU_SIZE, PAGESIZE);
1053 	bzero(CPU->cpu_m.mcpu_ldt, LDT_CPU_SIZE);
1054 	CPU->cpu_m.mcpu_ldt_len = 0;
1055 
1056 	/*
1057 	 * Setup and install our IDT.
1058 	 */
1059 	init_idt(idt0);
1060 
1061 	idtr.dtr_base = (uintptr_t)idt0;
1062 	idtr.dtr_limit = (NIDT * sizeof (*idt0)) - 1;
1063 	wr_idtr(&idtr);
1064 	CPU->cpu_idt = idt0;
1065 
1066 
1067 	init_tss();
1068 	CPU->cpu_tss = ktss0;
1069 	init_ldt();
1070 
1071 	/* Stash this so that the NMI,MCE,#DF and KDI handlers can use it. */
1072 	kpti_safe_cr3 = (uint64_t)getcr3();
1073 }
1074 
1075 #endif	/* __xpv */
1076 
1077 #ifndef __xpv
1078 /*
1079  * As per Intel Vol 3 27.5.2, the GDTR limit is reset to 64Kb on a VM exit, so
1080  * we have to manually fix it up ourselves.
1081  *
1082  * The caller may still need to make sure that it can't go off-CPU with the
1083  * incorrect limit, before calling this (such as disabling pre-emption).
1084  */
1085 void
reset_gdtr_limit(void)1086 reset_gdtr_limit(void)
1087 {
1088 	ulong_t flags = intr_clear();
1089 	desctbr_t gdtr;
1090 
1091 	rd_gdtr(&gdtr);
1092 	gdtr.dtr_limit = (sizeof (user_desc_t) * NGDT) - 1;
1093 	wr_gdtr(&gdtr);
1094 
1095 	intr_restore(flags);
1096 }
1097 #endif /* __xpv */
1098 
1099 /*
1100  * In the early kernel, we need to set up a simple GDT to run on.
1101  *
1102  * XXPV	Can dboot use this too?  See dboot_gdt.s
1103  */
1104 void
init_boot_gdt(user_desc_t * bgdt)1105 init_boot_gdt(user_desc_t *bgdt)
1106 {
1107 	set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1108 	    SDP_PAGES, SDP_OP32);
1109 	set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1110 	    SDP_PAGES, SDP_OP32);
1111 }
1112 
1113 /*
1114  * Enable interpositioning on the system call path by rewriting the
1115  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1116  * the branded entry points.
1117  */
1118 void
brand_interpositioning_enable(void)1119 brand_interpositioning_enable(void)
1120 {
1121 	gate_desc_t	*idt = CPU->cpu_idt;
1122 	int		i;
1123 
1124 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1125 
1126 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1127 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1128 #if defined(__xpv)
1129 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1130 		    brand_tbl[i].ih_inum);
1131 #endif
1132 	}
1133 
1134 #if defined(__xpv)
1135 
1136 	/*
1137 	 * Currently the hypervisor only supports 64-bit syscalls via
1138 	 * syscall instruction. The 32-bit syscalls are handled by
1139 	 * interrupt gate above.
1140 	 */
1141 	xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1142 	    CALLBACKF_mask_events);
1143 
1144 #else
1145 
1146 	if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1147 		if (kpti_enable == 1) {
1148 			wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_brand_sys_syscall);
1149 			wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_brand_sys_syscall32);
1150 		} else {
1151 			wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1152 			wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1153 		}
1154 	}
1155 
1156 #endif
1157 
1158 	if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
1159 		if (kpti_enable == 1) {
1160 			wrmsr(MSR_INTC_SEP_EIP,
1161 			    (uintptr_t)tr_brand_sys_sysenter);
1162 		} else {
1163 			wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1164 		}
1165 	}
1166 }
1167 
1168 /*
1169  * Disable interpositioning on the system call path by rewriting the
1170  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1171  * the standard entry points, which bypass the interpositioning hooks.
1172  */
1173 void
brand_interpositioning_disable(void)1174 brand_interpositioning_disable(void)
1175 {
1176 	gate_desc_t	*idt = CPU->cpu_idt;
1177 	int i;
1178 
1179 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1180 
1181 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1182 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1183 #if defined(__xpv)
1184 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1185 		    brand_tbl[i].ih_inum);
1186 #endif
1187 	}
1188 
1189 #if defined(__xpv)
1190 
1191 	/*
1192 	 * See comment above in brand_interpositioning_enable.
1193 	 */
1194 	xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1195 	    CALLBACKF_mask_events);
1196 
1197 #else
1198 
1199 	if (is_x86_feature(x86_featureset, X86FSET_ASYSC)) {
1200 		if (kpti_enable == 1) {
1201 			wrmsr(MSR_AMD_LSTAR, (uintptr_t)tr_sys_syscall);
1202 			wrmsr(MSR_AMD_CSTAR, (uintptr_t)tr_sys_syscall32);
1203 		} else {
1204 			wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1205 			wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1206 		}
1207 	}
1208 
1209 #endif
1210 
1211 	if (is_x86_feature(x86_featureset, X86FSET_SEP)) {
1212 		if (kpti_enable == 1) {
1213 			wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)tr_sys_sysenter);
1214 		} else {
1215 			wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1216 		}
1217 	}
1218 }
1219