xref: /illumos-gate/usr/src/uts/intel/os/desctbls.c (revision 843e1988)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * Copyright (c) 1992 Terrence R. Lambert.
31  * Copyright (c) 1990 The Regents of the University of California.
32  * All rights reserved.
33  *
34  * This code is derived from software contributed to Berkeley by
35  * William Jolitz.
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  * 1. Redistributions of source code must retain the above copyright
41  *    notice, this list of conditions and the following disclaimer.
42  * 2. Redistributions in binary form must reproduce the above copyright
43  *    notice, this list of conditions and the following disclaimer in the
44  *    documentation and/or other materials provided with the distribution.
45  * 3. All advertising materials mentioning features or use of this software
46  *    must display the following acknowledgement:
47  *	This product includes software developed by the University of
48  *	California, Berkeley and its contributors.
49  * 4. Neither the name of the University nor the names of its contributors
50  *    may be used to endorse or promote products derived from this software
51  *    without specific prior written permission.
52  *
53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  *
65  *	from: @(#)machdep.c	7.4 (Berkeley) 6/3/91
66  */
67 
68 #include <sys/types.h>
69 #include <sys/sysmacros.h>
70 #include <sys/tss.h>
71 #include <sys/segments.h>
72 #include <sys/trap.h>
73 #include <sys/cpuvar.h>
74 #include <sys/bootconf.h>
75 #include <sys/x86_archext.h>
76 #include <sys/controlregs.h>
77 #include <sys/archsystm.h>
78 #include <sys/machsystm.h>
79 #include <sys/kobj.h>
80 #include <sys/cmn_err.h>
81 #include <sys/reboot.h>
82 #include <sys/kdi.h>
83 #include <sys/mach_mmu.h>
84 #include <sys/systm.h>
85 
86 #ifdef __xpv
87 #include <sys/hypervisor.h>
88 #include <vm/as.h>
89 #endif
90 
91 #include <sys/promif.h>
92 #include <sys/bootinfo.h>
93 #include <vm/kboot_mmu.h>
94 #include <vm/hat_pte.h>
95 
96 /*
97  * cpu0 and default tables and structures.
98  */
99 user_desc_t	*gdt0;
100 #if !defined(__xpv)
101 desctbr_t	gdt0_default_r;
102 #endif
103 
104 #pragma	align	16(idt0)
105 gate_desc_t	idt0[NIDT]; 		/* interrupt descriptor table */
106 #if defined(__i386)
107 desctbr_t	idt0_default_r;		/* describes idt0 in IDTR format */
108 #endif
109 
110 #pragma align	16(ktss0)
111 struct tss	ktss0;			/* kernel task state structure */
112 
113 #if defined(__i386)
114 #pragma align	16(dftss0)
115 struct tss	dftss0;			/* #DF double-fault exception */
116 #endif	/* __i386 */
117 
118 user_desc_t	zero_udesc;		/* base zero user desc native procs */
119 user_desc_t	null_udesc;		/* null user descriptor */
120 system_desc_t	null_sdesc;		/* null system descriptor */
121 
122 #if defined(__amd64)
123 user_desc_t	zero_u32desc;		/* 32-bit compatibility procs */
124 #endif	/* __amd64 */
125 
126 #if defined(__amd64)
127 user_desc_t	ucs_on;
128 user_desc_t	ucs_off;
129 user_desc_t	ucs32_on;
130 user_desc_t	ucs32_off;
131 #endif	/* __amd64 */
132 
133 #pragma	align	16(dblfault_stack0)
134 char		dblfault_stack0[DEFAULTSTKSZ];
135 
136 extern void	fast_null(void);
137 extern hrtime_t	get_hrtime(void);
138 extern hrtime_t	gethrvtime(void);
139 extern hrtime_t	get_hrestime(void);
140 extern uint64_t	getlgrp(void);
141 
142 void (*(fasttable[]))(void) = {
143 	fast_null,			/* T_FNULL routine */
144 	fast_null,			/* T_FGETFP routine (initially null) */
145 	fast_null,			/* T_FSETFP routine (initially null) */
146 	(void (*)())get_hrtime,		/* T_GETHRTIME */
147 	(void (*)())gethrvtime,		/* T_GETHRVTIME */
148 	(void (*)())get_hrestime,	/* T_GETHRESTIME */
149 	(void (*)())getlgrp		/* T_GETLGRP */
150 };
151 
152 /*
153  * Structure containing pre-computed descriptors to allow us to temporarily
154  * interpose on a standard handler.
155  */
156 struct interposing_handler {
157 	int ih_inum;
158 	gate_desc_t ih_interp_desc;
159 	gate_desc_t ih_default_desc;
160 };
161 
162 /*
163  * The brand infrastructure interposes on two handlers, and we use one as a
164  * NULL signpost.
165  */
166 static struct interposing_handler brand_tbl[3];
167 
168 /*
169  * software prototypes for default local descriptor table
170  */
171 
172 /*
173  * Routines for loading segment descriptors in format the hardware
174  * can understand.
175  */
176 
177 #if defined(__amd64)
178 
179 /*
180  * In long mode we have the new L or long mode attribute bit
181  * for code segments. Only the conforming bit in type is used along
182  * with descriptor priority and present bits. Default operand size must
183  * be zero when in long mode. In 32-bit compatibility mode all fields
184  * are treated as in legacy mode. For data segments while in long mode
185  * only the present bit is loaded.
186  */
187 void
188 set_usegd(user_desc_t *dp, uint_t lmode, void *base, size_t size,
189     uint_t type, uint_t dpl, uint_t gran, uint_t defopsz)
190 {
191 	ASSERT(lmode == SDP_SHORT || lmode == SDP_LONG);
192 
193 	/*
194 	 * 64-bit long mode.
195 	 */
196 	if (lmode == SDP_LONG)
197 		dp->usd_def32 = 0;		/* 32-bit operands only */
198 	else
199 		/*
200 		 * 32-bit compatibility mode.
201 		 */
202 		dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32-bit ops */
203 
204 	dp->usd_long = lmode;	/* 64-bit mode */
205 	dp->usd_type = type;
206 	dp->usd_dpl = dpl;
207 	dp->usd_p = 1;
208 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
209 
210 	dp->usd_lobase = (uintptr_t)base;
211 	dp->usd_midbase = (uintptr_t)base >> 16;
212 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
213 	dp->usd_lolimit = size;
214 	dp->usd_hilimit = (uintptr_t)size >> 16;
215 }
216 
217 #elif defined(__i386)
218 
219 /*
220  * Install user segment descriptor for code and data.
221  */
222 void
223 set_usegd(user_desc_t *dp, void *base, size_t size, uint_t type,
224     uint_t dpl, uint_t gran, uint_t defopsz)
225 {
226 	dp->usd_lolimit = size;
227 	dp->usd_hilimit = (uintptr_t)size >> 16;
228 
229 	dp->usd_lobase = (uintptr_t)base;
230 	dp->usd_midbase = (uintptr_t)base >> 16;
231 	dp->usd_hibase = (uintptr_t)base >> (16 + 8);
232 
233 	dp->usd_type = type;
234 	dp->usd_dpl = dpl;
235 	dp->usd_p = 1;
236 	dp->usd_def32 = defopsz;	/* 0 = 16, 1 = 32 bit operands */
237 	dp->usd_gran = gran;		/* 0 = bytes, 1 = pages */
238 }
239 
240 #endif	/* __i386 */
241 
242 /*
243  * Install system segment descriptor for LDT and TSS segments.
244  */
245 
246 #if defined(__amd64)
247 
248 void
249 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
250     uint_t dpl)
251 {
252 	dp->ssd_lolimit = size;
253 	dp->ssd_hilimit = (uintptr_t)size >> 16;
254 
255 	dp->ssd_lobase = (uintptr_t)base;
256 	dp->ssd_midbase = (uintptr_t)base >> 16;
257 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
258 	dp->ssd_hi64base = (uintptr_t)base >> (16 + 8 + 8);
259 
260 	dp->ssd_type = type;
261 	dp->ssd_zero1 = 0;	/* must be zero */
262 	dp->ssd_zero2 = 0;
263 	dp->ssd_dpl = dpl;
264 	dp->ssd_p = 1;
265 	dp->ssd_gran = 0;	/* force byte units */
266 }
267 
268 void *
269 get_ssd_base(system_desc_t *dp)
270 {
271 	uintptr_t	base;
272 
273 	base = (uintptr_t)dp->ssd_lobase |
274 	    (uintptr_t)dp->ssd_midbase << 16 |
275 	    (uintptr_t)dp->ssd_hibase << (16 + 8) |
276 	    (uintptr_t)dp->ssd_hi64base << (16 + 8 + 8);
277 	return ((void *)base);
278 }
279 
280 #elif defined(__i386)
281 
282 void
283 set_syssegd(system_desc_t *dp, void *base, size_t size, uint_t type,
284     uint_t dpl)
285 {
286 	dp->ssd_lolimit = size;
287 	dp->ssd_hilimit = (uintptr_t)size >> 16;
288 
289 	dp->ssd_lobase = (uintptr_t)base;
290 	dp->ssd_midbase = (uintptr_t)base >> 16;
291 	dp->ssd_hibase = (uintptr_t)base >> (16 + 8);
292 
293 	dp->ssd_type = type;
294 	dp->ssd_zero = 0;	/* must be zero */
295 	dp->ssd_dpl = dpl;
296 	dp->ssd_p = 1;
297 	dp->ssd_gran = 0;	/* force byte units */
298 }
299 
300 void *
301 get_ssd_base(system_desc_t *dp)
302 {
303 	uintptr_t	base;
304 
305 	base = (uintptr_t)dp->ssd_lobase |
306 	    (uintptr_t)dp->ssd_midbase << 16 |
307 	    (uintptr_t)dp->ssd_hibase << (16 + 8);
308 	return ((void *)base);
309 }
310 
311 #endif	/* __i386 */
312 
313 /*
314  * Install gate segment descriptor for interrupt, trap, call and task gates.
315  */
316 
317 #if defined(__amd64)
318 
319 void
320 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
321     uint_t type, uint_t dpl)
322 {
323 	dp->sgd_looffset = (uintptr_t)func;
324 	dp->sgd_hioffset = (uintptr_t)func >> 16;
325 	dp->sgd_hi64offset = (uintptr_t)func >> (16 + 16);
326 
327 	dp->sgd_selector =  (uint16_t)sel;
328 
329 	/*
330 	 * For 64 bit native we use the IST stack mechanism
331 	 * for double faults. All other traps use the CPL = 0
332 	 * (tss_rsp0) stack.
333 	 */
334 #if !defined(__xpv)
335 	if (type == T_DBLFLT)
336 		dp->sgd_ist = 1;
337 	else
338 #endif
339 		dp->sgd_ist = 0;
340 
341 	dp->sgd_type = type;
342 	dp->sgd_dpl = dpl;
343 	dp->sgd_p = 1;
344 }
345 
346 #elif defined(__i386)
347 
348 void
349 set_gatesegd(gate_desc_t *dp, void (*func)(void), selector_t sel,
350     uint_t type, uint_t dpl)
351 {
352 	dp->sgd_looffset = (uintptr_t)func;
353 	dp->sgd_hioffset = (uintptr_t)func >> 16;
354 
355 	dp->sgd_selector =  (uint16_t)sel;
356 	dp->sgd_stkcpy = 0;	/* always zero bytes */
357 	dp->sgd_type = type;
358 	dp->sgd_dpl = dpl;
359 	dp->sgd_p = 1;
360 }
361 
362 #endif	/* __i386 */
363 
364 /*
365  * Updates a single user descriptor in the the GDT of the current cpu.
366  * Caller is responsible for preventing cpu migration.
367  */
368 
369 void
370 gdt_update_usegd(uint_t sidx, user_desc_t *udp)
371 {
372 #if defined(__xpv)
373 
374 	uint64_t dpa = CPU->cpu_m.mcpu_gdtpa + sizeof (*udp) * sidx;
375 
376 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp))
377 		panic("gdt_update_usegd: HYPERVISOR_update_descriptor");
378 
379 #else	/* __xpv */
380 
381 	CPU->cpu_gdt[sidx] = *udp;
382 
383 #endif	/* __xpv */
384 }
385 
386 /*
387  * Writes single descriptor pointed to by udp into a processes
388  * LDT entry pointed to by ldp.
389  */
390 int
391 ldt_update_segd(user_desc_t *ldp, user_desc_t *udp)
392 {
393 #if defined(__xpv)
394 
395 	uint64_t dpa;
396 
397 	dpa = mmu_ptob(hat_getpfnum(kas.a_hat, (caddr_t)ldp)) |
398 	    ((uintptr_t)ldp & PAGEOFFSET);
399 
400 	/*
401 	 * The hypervisor is a little more restrictive about what it
402 	 * supports in the LDT.
403 	 */
404 	if (HYPERVISOR_update_descriptor(pa_to_ma(dpa), *(uint64_t *)udp) != 0)
405 		return (EINVAL);
406 
407 #else	/* __xpv */
408 
409 	*ldp = *udp;
410 
411 #endif	/* __xpv */
412 	return (0);
413 }
414 
415 #if defined(__xpv)
416 
417 /*
418  * Converts hw format gate descriptor into pseudo-IDT format for the hypervisor.
419  * Returns true if a valid entry was written.
420  */
421 int
422 xen_idt_to_trap_info(uint_t vec, gate_desc_t *sgd, void *ti_arg)
423 {
424 	trap_info_t *ti = ti_arg;	/* XXPV	Aargh - segments.h comment */
425 
426 	/*
427 	 * skip holes in the IDT
428 	 */
429 	if (GATESEG_GETOFFSET(sgd) == 0)
430 		return (0);
431 
432 	ASSERT(sgd->sgd_type == SDT_SYSIGT);
433 	ti->vector = vec;
434 	TI_SET_DPL(ti, sgd->sgd_dpl);
435 
436 	/*
437 	 * Is this an interrupt gate?
438 	 */
439 	if (sgd->sgd_type == SDT_SYSIGT) {
440 		/* LINTED */
441 		TI_SET_IF(ti, 1);
442 	}
443 	ti->cs = sgd->sgd_selector;
444 #if defined(__amd64)
445 	ti->cs |= SEL_KPL;	/* force into ring 3. see KCS_SEL  */
446 #endif
447 	ti->address = GATESEG_GETOFFSET(sgd);
448 	return (1);
449 }
450 
451 /*
452  * Convert a single hw format gate descriptor and write it into our virtual IDT.
453  */
454 void
455 xen_idt_write(gate_desc_t *sgd, uint_t vec)
456 {
457 	trap_info_t trapinfo[2];
458 
459 	bzero(trapinfo, sizeof (trapinfo));
460 	if (xen_idt_to_trap_info(vec, sgd, &trapinfo[0]) == 0)
461 		return;
462 	if (xen_set_trap_table(trapinfo) != 0)
463 		panic("xen_idt_write: xen_set_trap_table() failed");
464 }
465 
466 #endif	/* __xpv */
467 
468 #if defined(__amd64)
469 
470 /*
471  * Build kernel GDT.
472  */
473 
474 static void
475 init_gdt_common(user_desc_t *gdt)
476 {
477 	int i;
478 
479 	/*
480 	 * 64-bit kernel code segment.
481 	 */
482 	set_usegd(&gdt[GDT_KCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_KPL,
483 	    SDP_PAGES, SDP_OP32);
484 
485 	/*
486 	 * 64-bit kernel data segment. The limit attribute is ignored in 64-bit
487 	 * mode, but we set it here to 0xFFFF so that we can use the SYSRET
488 	 * instruction to return from system calls back to 32-bit applications.
489 	 * SYSRET doesn't update the base, limit, or attributes of %ss or %ds
490 	 * descriptors. We therefore must ensure that the kernel uses something,
491 	 * though it will be ignored by hardware, that is compatible with 32-bit
492 	 * apps. For the same reason we must set the default op size of this
493 	 * descriptor to 32-bit operands.
494 	 */
495 	set_usegd(&gdt[GDT_KDATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
496 	    SEL_KPL, SDP_PAGES, SDP_OP32);
497 	gdt[GDT_KDATA].usd_def32 = 1;
498 
499 	/*
500 	 * 64-bit user code segment.
501 	 */
502 	set_usegd(&gdt[GDT_UCODE], SDP_LONG, NULL, 0, SDT_MEMERA, SEL_UPL,
503 	    SDP_PAGES, SDP_OP32);
504 
505 	/*
506 	 * 32-bit user code segment.
507 	 */
508 	set_usegd(&gdt[GDT_U32CODE], SDP_SHORT, NULL, -1, SDT_MEMERA,
509 	    SEL_UPL, SDP_PAGES, SDP_OP32);
510 
511 	/*
512 	 * See gdt_ucode32() and gdt_ucode_native().
513 	 */
514 	ucs_on = ucs_off = gdt[GDT_UCODE];
515 	ucs_off.usd_p = 0;	/* forces #np fault */
516 
517 	ucs32_on = ucs32_off = gdt[GDT_U32CODE];
518 	ucs32_off.usd_p = 0;	/* forces #np fault */
519 
520 	/*
521 	 * 32 and 64 bit data segments can actually share the same descriptor.
522 	 * In long mode only the present bit is checked but all other fields
523 	 * are loaded. But in compatibility mode all fields are interpreted
524 	 * as in legacy mode so they must be set correctly for a 32-bit data
525 	 * segment.
526 	 */
527 	set_usegd(&gdt[GDT_UDATA], SDP_SHORT, NULL, -1, SDT_MEMRWA, SEL_UPL,
528 	    SDP_PAGES, SDP_OP32);
529 
530 #if !defined(__xpv)
531 
532 	/*
533 	 * The 64-bit kernel has no default LDT. By default, the LDT descriptor
534 	 * in the GDT is 0.
535 	 */
536 
537 	/*
538 	 * Kernel TSS
539 	 */
540 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], &ktss0,
541 	    sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL);
542 
543 #endif	/* !__xpv */
544 
545 	/*
546 	 * Initialize fs and gs descriptors for 32 bit processes.
547 	 * Only attributes and limits are initialized, the effective
548 	 * base address is programmed via fsbase/gsbase.
549 	 */
550 	set_usegd(&gdt[GDT_LWPFS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
551 	    SEL_UPL, SDP_PAGES, SDP_OP32);
552 	set_usegd(&gdt[GDT_LWPGS], SDP_SHORT, NULL, -1, SDT_MEMRWA,
553 	    SEL_UPL, SDP_PAGES, SDP_OP32);
554 
555 	/*
556 	 * Initialize the descriptors set aside for brand usage.
557 	 * Only attributes and limits are initialized.
558 	 */
559 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
560 		set_usegd(&gdt0[i], SDP_SHORT, NULL, -1, SDT_MEMRWA,
561 		    SEL_UPL, SDP_PAGES, SDP_OP32);
562 
563 	/*
564 	 * Initialize convenient zero base user descriptors for clearing
565 	 * lwp private %fs and %gs descriptors in GDT. See setregs() for
566 	 * an example.
567 	 */
568 	set_usegd(&zero_udesc, SDP_LONG, 0, 0, SDT_MEMRWA, SEL_UPL,
569 	    SDP_BYTES, SDP_OP32);
570 	set_usegd(&zero_u32desc, SDP_SHORT, 0, -1, SDT_MEMRWA, SEL_UPL,
571 	    SDP_PAGES, SDP_OP32);
572 }
573 
574 #if defined(__xpv)
575 
576 static user_desc_t *
577 init_gdt(void)
578 {
579 	uint64_t gdtpa;
580 	ulong_t ma[1];		/* XXPV should be a memory_t */
581 	ulong_t addr;
582 
583 #if !defined(__lint)
584 	/*
585 	 * Our gdt is never larger than a single page.
586 	 */
587 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
588 #endif
589 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
590 	    PAGESIZE, PAGESIZE);
591 	if (gdt0 == NULL)
592 		panic("init_gdt: BOP_ALLOC failed");
593 	bzero(gdt0, PAGESIZE);
594 
595 	init_gdt_common(gdt0);
596 
597 	/*
598 	 * XXX Since we never invoke kmdb until after the kernel takes
599 	 * over the descriptor tables why not have it use the kernel's
600 	 * selectors?
601 	 */
602 	if (boothowto & RB_DEBUG) {
603 		set_usegd(&gdt0[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA,
604 		    SEL_KPL, SDP_PAGES, SDP_OP32);
605 		set_usegd(&gdt0[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA,
606 		    SEL_KPL, SDP_PAGES, SDP_OP32);
607 	}
608 
609 	/*
610 	 * Clear write permission for page containing the gdt and install it.
611 	 */
612 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
613 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
614 	kbm_read_only((uintptr_t)gdt0, gdtpa);
615 	xen_set_gdt(ma, NGDT);
616 
617 	/*
618 	 * Reload the segment registers to use the new GDT.
619 	 * On 64-bit, fixup KCS_SEL to be in ring 3.
620 	 * See KCS_SEL in segments.h.
621 	 */
622 	load_segment_registers((KCS_SEL | SEL_KPL), KFS_SEL, KGS_SEL, KDS_SEL);
623 
624 	/*
625 	 *  setup %gs for kernel
626 	 */
627 	xen_set_segment_base(SEGBASE_GS_KERNEL, (ulong_t)&cpus[0]);
628 
629 	/*
630 	 * XX64 We should never dereference off "other gsbase" or
631 	 * "fsbase".  So, we should arrange to point FSBASE and
632 	 * KGSBASE somewhere truly awful e.g. point it at the last
633 	 * valid address below the hole so that any attempts to index
634 	 * off them cause an exception.
635 	 *
636 	 * For now, point it at 8G -- at least it should be unmapped
637 	 * until some 64-bit processes run.
638 	 */
639 	addr = 0x200000000ul;
640 	xen_set_segment_base(SEGBASE_FS, addr);
641 	xen_set_segment_base(SEGBASE_GS_USER, addr);
642 	xen_set_segment_base(SEGBASE_GS_USER_SEL, 0);
643 
644 	return (gdt0);
645 }
646 
647 #else	/* __xpv */
648 
649 static user_desc_t *
650 init_gdt(void)
651 {
652 	desctbr_t	r_bgdt, r_gdt;
653 	user_desc_t	*bgdt;
654 
655 #if !defined(__lint)
656 	/*
657 	 * Our gdt is never larger than a single page.
658 	 */
659 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
660 #endif
661 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
662 	    PAGESIZE, PAGESIZE);
663 	if (gdt0 == NULL)
664 		panic("init_gdt: BOP_ALLOC failed");
665 	bzero(gdt0, PAGESIZE);
666 
667 	init_gdt_common(gdt0);
668 
669 	/*
670 	 * Copy in from boot's gdt to our gdt.
671 	 * Entry 0 is the null descriptor by definition.
672 	 */
673 	rd_gdtr(&r_bgdt);
674 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
675 	if (bgdt == NULL)
676 		panic("null boot gdt");
677 
678 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
679 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
680 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
681 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
682 	gdt0[GDT_B64CODE] = bgdt[GDT_B64CODE];
683 
684 	/*
685 	 * Install our new GDT
686 	 */
687 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
688 	r_gdt.dtr_base = (uintptr_t)gdt0;
689 	wr_gdtr(&r_gdt);
690 
691 	/*
692 	 * Reload the segment registers to use the new GDT
693 	 */
694 	load_segment_registers(KCS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
695 
696 	/*
697 	 *  setup %gs for kernel
698 	 */
699 	wrmsr(MSR_AMD_GSBASE, (uint64_t)&cpus[0]);
700 
701 	/*
702 	 * XX64 We should never dereference off "other gsbase" or
703 	 * "fsbase".  So, we should arrange to point FSBASE and
704 	 * KGSBASE somewhere truly awful e.g. point it at the last
705 	 * valid address below the hole so that any attempts to index
706 	 * off them cause an exception.
707 	 *
708 	 * For now, point it at 8G -- at least it should be unmapped
709 	 * until some 64-bit processes run.
710 	 */
711 	wrmsr(MSR_AMD_FSBASE, 0x200000000ul);
712 	wrmsr(MSR_AMD_KGSBASE, 0x200000000ul);
713 	return (gdt0);
714 }
715 
716 #endif	/* __xpv */
717 
718 #elif defined(__i386)
719 
720 static void
721 init_gdt_common(user_desc_t *gdt)
722 {
723 	int i;
724 
725 	/*
726 	 * Text and data for both kernel and user span entire 32 bit
727 	 * address space.
728 	 */
729 
730 	/*
731 	 * kernel code segment.
732 	 */
733 	set_usegd(&gdt[GDT_KCODE], NULL, -1, SDT_MEMERA, SEL_KPL, SDP_PAGES,
734 	    SDP_OP32);
735 
736 	/*
737 	 * kernel data segment.
738 	 */
739 	set_usegd(&gdt[GDT_KDATA], NULL, -1, SDT_MEMRWA, SEL_KPL, SDP_PAGES,
740 	    SDP_OP32);
741 
742 	/*
743 	 * user code segment.
744 	 */
745 	set_usegd(&gdt[GDT_UCODE], NULL, -1, SDT_MEMERA, SEL_UPL, SDP_PAGES,
746 	    SDP_OP32);
747 
748 	/*
749 	 * user data segment.
750 	 */
751 	set_usegd(&gdt[GDT_UDATA], NULL, -1, SDT_MEMRWA, SEL_UPL, SDP_PAGES,
752 	    SDP_OP32);
753 
754 #if !defined(__xpv)
755 
756 	/*
757 	 * TSS for T_DBLFLT (double fault) handler
758 	 */
759 	set_syssegd((system_desc_t *)&gdt[GDT_DBFLT], &dftss0,
760 	    sizeof (dftss0) - 1, SDT_SYSTSS, SEL_KPL);
761 
762 	/*
763 	 * TSS for kernel
764 	 */
765 	set_syssegd((system_desc_t *)&gdt[GDT_KTSS], &ktss0,
766 	    sizeof (ktss0) - 1, SDT_SYSTSS, SEL_KPL);
767 
768 #endif	/* !__xpv */
769 
770 	/*
771 	 * %gs selector for kernel
772 	 */
773 	set_usegd(&gdt[GDT_GS], &cpus[0], sizeof (struct cpu) -1, SDT_MEMRWA,
774 	    SEL_KPL, SDP_BYTES, SDP_OP32);
775 
776 	/*
777 	 * Initialize lwp private descriptors.
778 	 * Only attributes and limits are initialized, the effective
779 	 * base address is programmed via fsbase/gsbase.
780 	 */
781 	set_usegd(&gdt[GDT_LWPFS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
782 	    SDP_PAGES, SDP_OP32);
783 	set_usegd(&gdt[GDT_LWPGS], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
784 	    SDP_PAGES, SDP_OP32);
785 
786 	/*
787 	 * Initialize the descriptors set aside for brand usage.
788 	 * Only attributes and limits are initialized.
789 	 */
790 	for (i = GDT_BRANDMIN; i <= GDT_BRANDMAX; i++)
791 		set_usegd(&gdt0[i], NULL, (size_t)-1, SDT_MEMRWA, SEL_UPL,
792 		    SDP_PAGES, SDP_OP32);
793 	/*
794 	 * Initialize convenient zero base user descriptor for clearing
795 	 * lwp  private %fs and %gs descriptors in GDT. See setregs() for
796 	 * an example.
797 	 */
798 	set_usegd(&zero_udesc, NULL, -1, SDT_MEMRWA, SEL_UPL,
799 	    SDP_BYTES, SDP_OP32);
800 }
801 
802 #if defined(__xpv)
803 
804 static user_desc_t *
805 init_gdt(void)
806 {
807 	uint64_t gdtpa;
808 	ulong_t ma[1];		/* XXPV should be a memory_t */
809 
810 #if !defined(__lint)
811 	/*
812 	 * Our gdt is never larger than a single page.
813 	 */
814 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
815 #endif
816 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
817 	    PAGESIZE, PAGESIZE);
818 	if (gdt0 == NULL)
819 		panic("init_gdt: BOP_ALLOC failed");
820 	bzero(gdt0, PAGESIZE);
821 
822 	init_gdt_common(gdt0);
823 	gdtpa = pfn_to_pa(va_to_pfn(gdt0));
824 
825 	/*
826 	 * XXX Since we never invoke kmdb until after the kernel takes
827 	 * over the descriptor tables why not have it use the kernel's
828 	 * selectors?
829 	 */
830 	if (boothowto & RB_DEBUG) {
831 		set_usegd(&gdt0[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
832 		    SDP_PAGES, SDP_OP32);
833 		set_usegd(&gdt0[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
834 		    SDP_PAGES, SDP_OP32);
835 	}
836 
837 	/*
838 	 * Clear write permission for page containing the gdt and install it.
839 	 */
840 	ma[0] = (ulong_t)(pa_to_ma(gdtpa) >> PAGESHIFT);
841 	kbm_read_only((uintptr_t)gdt0, gdtpa);
842 	xen_set_gdt(ma, NGDT);
843 
844 	/*
845 	 * Reload the segment registers to use the new GDT
846 	 */
847 	load_segment_registers(
848 	    KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
849 
850 	return (gdt0);
851 }
852 
853 #else	/* __xpv */
854 
855 static user_desc_t *
856 init_gdt(void)
857 {
858 	desctbr_t	r_bgdt, r_gdt;
859 	user_desc_t	*bgdt;
860 
861 #if !defined(__lint)
862 	/*
863 	 * Our gdt is never larger than a single page.
864 	 */
865 	ASSERT((sizeof (*gdt0) * NGDT) <= PAGESIZE);
866 #endif
867 	/*
868 	 * XXX this allocation belongs in our caller, not here.
869 	 */
870 	gdt0 = (user_desc_t *)BOP_ALLOC(bootops, (caddr_t)GDT_VA,
871 	    PAGESIZE, PAGESIZE);
872 	if (gdt0 == NULL)
873 		panic("init_gdt: BOP_ALLOC failed");
874 	bzero(gdt0, PAGESIZE);
875 
876 	init_gdt_common(gdt0);
877 
878 	/*
879 	 * Copy in from boot's gdt to our gdt entries.
880 	 * Entry 0 is null descriptor by definition.
881 	 */
882 	rd_gdtr(&r_bgdt);
883 	bgdt = (user_desc_t *)r_bgdt.dtr_base;
884 	if (bgdt == NULL)
885 		panic("null boot gdt");
886 
887 	gdt0[GDT_B32DATA] = bgdt[GDT_B32DATA];
888 	gdt0[GDT_B32CODE] = bgdt[GDT_B32CODE];
889 	gdt0[GDT_B16CODE] = bgdt[GDT_B16CODE];
890 	gdt0[GDT_B16DATA] = bgdt[GDT_B16DATA];
891 
892 	/*
893 	 * Install our new GDT
894 	 */
895 	r_gdt.dtr_limit = (sizeof (*gdt0) * NGDT) - 1;
896 	r_gdt.dtr_base = (uintptr_t)gdt0;
897 	wr_gdtr(&r_gdt);
898 
899 	/*
900 	 * Reload the segment registers to use the new GDT
901 	 */
902 	load_segment_registers(
903 	    KCS_SEL, KDS_SEL, KDS_SEL, KFS_SEL, KGS_SEL, KDS_SEL);
904 
905 	return (gdt0);
906 }
907 
908 #endif	/* __xpv */
909 #endif	/* __i386 */
910 
911 /*
912  * Build kernel IDT.
913  *
914  * Note that for amd64 we pretty much require every gate to be an interrupt
915  * gate which blocks interrupts atomically on entry; that's because of our
916  * dependency on using 'swapgs' every time we come into the kernel to find
917  * the cpu structure. If we get interrupted just before doing that, %cs could
918  * be in kernel mode (so that the trap prolog doesn't do a swapgs), but
919  * %gsbase is really still pointing at something in userland. Bad things will
920  * ensue. We also use interrupt gates for i386 as well even though this is not
921  * required for some traps.
922  *
923  * Perhaps they should have invented a trap gate that does an atomic swapgs?
924  */
925 static void
926 init_idt_common(gate_desc_t *idt)
927 {
928 	set_gatesegd(&idt[T_ZERODIV], &div0trap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
929 	set_gatesegd(&idt[T_SGLSTP], &dbgtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
930 	set_gatesegd(&idt[T_NMIFLT], &nmiint, KCS_SEL, SDT_SYSIGT, TRP_KPL);
931 	set_gatesegd(&idt[T_BPTFLT], &brktrap, KCS_SEL, SDT_SYSIGT, TRP_UPL);
932 	set_gatesegd(&idt[T_OVFLW], &ovflotrap, KCS_SEL, SDT_SYSIGT, TRP_UPL);
933 	set_gatesegd(&idt[T_BOUNDFLT], &boundstrap, KCS_SEL, SDT_SYSIGT,
934 	    TRP_KPL);
935 	set_gatesegd(&idt[T_ILLINST], &invoptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
936 	set_gatesegd(&idt[T_NOEXTFLT], &ndptrap,  KCS_SEL, SDT_SYSIGT, TRP_KPL);
937 
938 	/*
939 	 * double fault handler.
940 	 *
941 	 * Note that on the hypervisor a guest does not receive #df faults.
942 	 * Instead a failsafe event is injected into the guest if its selectors
943 	 * and/or stack is in a broken state. See xen_failsafe_callback.
944 	 */
945 #if !defined(__xpv)
946 #if defined(__amd64)
947 
948 	set_gatesegd(&idt[T_DBLFLT], &syserrtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
949 
950 #elif defined(__i386)
951 
952 	/*
953 	 * task gate required.
954 	 */
955 	set_gatesegd(&idt[T_DBLFLT], NULL, DFTSS_SEL, SDT_SYSTASKGT, TRP_KPL);
956 
957 #endif	/* __i386 */
958 #endif	/* !__xpv */
959 
960 	/*
961 	 * T_EXTOVRFLT coprocessor-segment-overrun not supported.
962 	 */
963 
964 	set_gatesegd(&idt[T_TSSFLT], &invtsstrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
965 	set_gatesegd(&idt[T_SEGFLT], &segnptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
966 	set_gatesegd(&idt[T_STKFLT], &stktrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
967 	set_gatesegd(&idt[T_GPFLT], &gptrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
968 	set_gatesegd(&idt[T_PGFLT], &pftrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
969 	set_gatesegd(&idt[T_EXTERRFLT], &ndperr, KCS_SEL, SDT_SYSIGT, TRP_KPL);
970 	set_gatesegd(&idt[T_ALIGNMENT], &achktrap, KCS_SEL, SDT_SYSIGT,
971 	    TRP_KPL);
972 	set_gatesegd(&idt[T_MCE], &mcetrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
973 	set_gatesegd(&idt[T_SIMDFPE], &xmtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
974 
975 	/*
976 	 * install "int80" handler at, well, 0x80.
977 	 */
978 	set_gatesegd(&idt0[T_INT80], &sys_int80, KCS_SEL, SDT_SYSIGT, TRP_UPL);
979 
980 	/*
981 	 * install fast trap handler at 210.
982 	 */
983 	set_gatesegd(&idt[T_FASTTRAP], &fasttrap, KCS_SEL, SDT_SYSIGT, TRP_UPL);
984 
985 	/*
986 	 * System call handler.
987 	 */
988 #if defined(__amd64)
989 	set_gatesegd(&idt[T_SYSCALLINT], &sys_syscall_int, KCS_SEL, SDT_SYSIGT,
990 	    TRP_UPL);
991 
992 #elif defined(__i386)
993 	set_gatesegd(&idt[T_SYSCALLINT], &sys_call, KCS_SEL, SDT_SYSIGT,
994 	    TRP_UPL);
995 #endif	/* __i386 */
996 
997 	/*
998 	 * Install the DTrace interrupt handler for the pid provider.
999 	 */
1000 	set_gatesegd(&idt[T_DTRACE_RET], &dtrace_ret, KCS_SEL,
1001 	    SDT_SYSIGT, TRP_UPL);
1002 
1003 	/*
1004 	 * Prepare interposing descriptors for the branded "int80"
1005 	 * and syscall handlers and cache copies of the default
1006 	 * descriptors.
1007 	 */
1008 	brand_tbl[0].ih_inum = T_INT80;
1009 	brand_tbl[0].ih_default_desc = idt0[T_INT80];
1010 	set_gatesegd(&(brand_tbl[0].ih_interp_desc), &brand_sys_int80, KCS_SEL,
1011 	    SDT_SYSIGT, TRP_UPL);
1012 
1013 	brand_tbl[1].ih_inum = T_SYSCALLINT;
1014 	brand_tbl[1].ih_default_desc = idt0[T_SYSCALLINT];
1015 
1016 #if defined(__amd64)
1017 	set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_syscall_int,
1018 	    KCS_SEL, SDT_SYSIGT, TRP_UPL);
1019 #elif defined(__i386)
1020 	set_gatesegd(&(brand_tbl[1].ih_interp_desc), &brand_sys_call,
1021 	    KCS_SEL, SDT_SYSIGT, TRP_UPL);
1022 #endif	/* __i386 */
1023 
1024 	brand_tbl[2].ih_inum = 0;
1025 }
1026 
1027 #if defined(__xpv)
1028 
1029 static void
1030 init_idt(gate_desc_t *idt)
1031 {
1032 	/*
1033 	 * currently nothing extra for the hypervisor
1034 	 */
1035 	init_idt_common(idt);
1036 }
1037 
1038 #else	/* __xpv */
1039 
1040 static void
1041 init_idt(gate_desc_t *idt)
1042 {
1043 	char	ivctname[80];
1044 	void	(*ivctptr)(void);
1045 	int	i;
1046 
1047 	/*
1048 	 * Initialize entire table with 'reserved' trap and then overwrite
1049 	 * specific entries. T_EXTOVRFLT (9) is unsupported and reserved
1050 	 * since it can only be generated on a 386 processor. 15 is also
1051 	 * unsupported and reserved.
1052 	 */
1053 	for (i = 0; i < NIDT; i++)
1054 		set_gatesegd(&idt[i], &resvtrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
1055 
1056 	/*
1057 	 * 20-31 reserved
1058 	 */
1059 	for (i = 20; i < 32; i++)
1060 		set_gatesegd(&idt[i], &invaltrap, KCS_SEL, SDT_SYSIGT, TRP_KPL);
1061 
1062 	/*
1063 	 * interrupts 32 - 255
1064 	 */
1065 	for (i = 32; i < 256; i++) {
1066 		(void) snprintf(ivctname, sizeof (ivctname), "ivct%d", i);
1067 		ivctptr = (void (*)(void))kobj_getsymvalue(ivctname, 0);
1068 		if (ivctptr == NULL)
1069 			panic("kobj_getsymvalue(%s) failed", ivctname);
1070 
1071 		set_gatesegd(&idt[i], ivctptr, KCS_SEL, SDT_SYSIGT, TRP_KPL);
1072 	}
1073 
1074 	/*
1075 	 * Now install the common ones. Note that it will overlay some
1076 	 * entries installed above like T_SYSCALLINT, T_FASTTRAP etc.
1077 	 */
1078 	init_idt_common(idt);
1079 }
1080 
1081 #endif	/* __xpv */
1082 
1083 /*
1084  * The kernel does not deal with LDTs unless a user explicitly creates
1085  * one. Under normal circumstances, the LDTR contains 0. Any process attempting
1086  * to reference the LDT will therefore cause a #gp. System calls made via the
1087  * obsolete lcall mechanism are emulated by the #gp fault handler.
1088  */
1089 static void
1090 init_ldt(void)
1091 {
1092 #if defined(__xpv)
1093 	xen_set_ldt(NULL, 0);
1094 #else
1095 	wr_ldtr(0);
1096 #endif
1097 }
1098 
1099 #if !defined(__xpv)
1100 #if defined(__amd64)
1101 
1102 static void
1103 init_tss(void)
1104 {
1105 	/*
1106 	 * tss_rsp0 is dynamically filled in by resume() on each context switch.
1107 	 * All exceptions but #DF will run on the thread stack.
1108 	 * Set up the double fault stack here.
1109 	 */
1110 	ktss0.tss_ist1 =
1111 	    (uint64_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1112 
1113 	/*
1114 	 * Set I/O bit map offset equal to size of TSS segment limit
1115 	 * for no I/O permission map. This will force all user I/O
1116 	 * instructions to generate #gp fault.
1117 	 */
1118 	ktss0.tss_bitmapbase = sizeof (ktss0);
1119 
1120 	/*
1121 	 * Point %tr to descriptor for ktss0 in gdt.
1122 	 */
1123 	wr_tsr(KTSS_SEL);
1124 }
1125 
1126 #elif defined(__i386)
1127 
1128 static void
1129 init_tss(void)
1130 {
1131 	/*
1132 	 * ktss0.tss_esp dynamically filled in by resume() on each
1133 	 * context switch.
1134 	 */
1135 	ktss0.tss_ss0	= KDS_SEL;
1136 	ktss0.tss_eip	= (uint32_t)_start;
1137 	ktss0.tss_ds	= ktss0.tss_es = ktss0.tss_ss = KDS_SEL;
1138 	ktss0.tss_cs	= KCS_SEL;
1139 	ktss0.tss_fs	= KFS_SEL;
1140 	ktss0.tss_gs	= KGS_SEL;
1141 	ktss0.tss_ldt	= ULDT_SEL;
1142 
1143 	/*
1144 	 * Initialize double fault tss.
1145 	 */
1146 	dftss0.tss_esp0	= (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1147 	dftss0.tss_ss0	= KDS_SEL;
1148 
1149 	/*
1150 	 * tss_cr3 will get initialized in hat_kern_setup() once our page
1151 	 * tables have been setup.
1152 	 */
1153 	dftss0.tss_eip	= (uint32_t)syserrtrap;
1154 	dftss0.tss_esp	= (uint32_t)&dblfault_stack0[sizeof (dblfault_stack0)];
1155 	dftss0.tss_cs	= KCS_SEL;
1156 	dftss0.tss_ds	= KDS_SEL;
1157 	dftss0.tss_es	= KDS_SEL;
1158 	dftss0.tss_ss	= KDS_SEL;
1159 	dftss0.tss_fs	= KFS_SEL;
1160 	dftss0.tss_gs	= KGS_SEL;
1161 
1162 	/*
1163 	 * Set I/O bit map offset equal to size of TSS segment limit
1164 	 * for no I/O permission map. This will force all user I/O
1165 	 * instructions to generate #gp fault.
1166 	 */
1167 	ktss0.tss_bitmapbase = sizeof (ktss0);
1168 
1169 	/*
1170 	 * Point %tr to descriptor for ktss0 in gdt.
1171 	 */
1172 	wr_tsr(KTSS_SEL);
1173 }
1174 
1175 #endif	/* __i386 */
1176 #endif	/* !__xpv */
1177 
1178 #if defined(__xpv)
1179 
1180 void
1181 init_desctbls(void)
1182 {
1183 	uint_t vec;
1184 	user_desc_t *gdt;
1185 
1186 	/*
1187 	 * Setup and install our GDT.
1188 	 */
1189 	gdt = init_gdt();
1190 
1191 	/*
1192 	 * Store static pa of gdt to speed up pa_to_ma() translations
1193 	 * on lwp context switches.
1194 	 */
1195 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1196 	CPU->cpu_m.mcpu_gdt = gdt;
1197 	CPU->cpu_m.mcpu_gdtpa = pfn_to_pa(va_to_pfn(gdt));
1198 
1199 	/*
1200 	 * Setup and install our IDT.
1201 	 */
1202 	init_idt(&idt0[0]);
1203 	for (vec = 0; vec < NIDT; vec++)
1204 		xen_idt_write(&idt0[vec], vec);
1205 
1206 	CPU->cpu_m.mcpu_idt = idt0;
1207 
1208 	/*
1209 	 * set default kernel stack
1210 	 */
1211 	xen_stack_switch(KDS_SEL,
1212 	    (ulong_t)&dblfault_stack0[sizeof (dblfault_stack0)]);
1213 
1214 	xen_init_callbacks();
1215 
1216 	init_ldt();
1217 }
1218 
1219 #else	/* __xpv */
1220 
1221 void
1222 init_desctbls(void)
1223 {
1224 	user_desc_t *gdt;
1225 	desctbr_t idtr;
1226 
1227 	/*
1228 	 * Setup and install our GDT.
1229 	 */
1230 	gdt = init_gdt();
1231 	ASSERT(IS_P2ALIGNED((uintptr_t)gdt, PAGESIZE));
1232 	CPU->cpu_m.mcpu_gdt = gdt;
1233 
1234 	/*
1235 	 * Setup and install our IDT.
1236 	 */
1237 	init_idt(&idt0[0]);
1238 
1239 	idtr.dtr_base = (uintptr_t)idt0;
1240 	idtr.dtr_limit = sizeof (idt0) - 1;
1241 	wr_idtr(&idtr);
1242 	CPU->cpu_m.mcpu_idt = idt0;
1243 
1244 #if defined(__i386)
1245 	/*
1246 	 * We maintain a description of idt0 in convenient IDTR format
1247 	 * for #pf's on some older pentium processors. See pentium_pftrap().
1248 	 */
1249 	idt0_default_r = idtr;
1250 #endif	/* __i386 */
1251 
1252 	init_tss();
1253 	CPU->cpu_tss = &ktss0;
1254 	init_ldt();
1255 }
1256 
1257 #endif	/* __xpv */
1258 
1259 /*
1260  * In the early kernel, we need to set up a simple GDT to run on.
1261  *
1262  * XXPV	Can dboot use this too?  See dboot_gdt.s
1263  */
1264 void
1265 init_boot_gdt(user_desc_t *bgdt)
1266 {
1267 #if defined(__amd64)
1268 	set_usegd(&bgdt[GDT_B32DATA], SDP_LONG, NULL, -1, SDT_MEMRWA, SEL_KPL,
1269 	    SDP_PAGES, SDP_OP32);
1270 	set_usegd(&bgdt[GDT_B64CODE], SDP_LONG, NULL, -1, SDT_MEMERA, SEL_KPL,
1271 	    SDP_PAGES, SDP_OP32);
1272 #elif defined(__i386)
1273 	set_usegd(&bgdt[GDT_B32DATA], NULL, -1, SDT_MEMRWA, SEL_KPL,
1274 	    SDP_PAGES, SDP_OP32);
1275 	set_usegd(&bgdt[GDT_B32CODE], NULL, -1, SDT_MEMERA, SEL_KPL,
1276 	    SDP_PAGES, SDP_OP32);
1277 #endif	/* __i386 */
1278 }
1279 
1280 /*
1281  * Enable interpositioning on the system call path by rewriting the
1282  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1283  * the branded entry points.
1284  */
1285 void
1286 brand_interpositioning_enable(void)
1287 {
1288 	gate_desc_t	*idt = CPU->cpu_idt;
1289 	int 		i;
1290 
1291 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1292 
1293 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1294 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_interp_desc;
1295 #if defined(__xpv)
1296 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1297 		    brand_tbl[i].ih_inum);
1298 #endif
1299 	}
1300 
1301 #if defined(__amd64)
1302 #if defined(__xpv)
1303 
1304 	/*
1305 	 * Currently the hypervisor only supports 64-bit syscalls via
1306 	 * syscall instruction. The 32-bit syscalls are handled by
1307 	 * interrupt gate above.
1308 	 */
1309 	xen_set_callback(brand_sys_syscall, CALLBACKTYPE_syscall,
1310 	    CALLBACKF_mask_events);
1311 
1312 #else
1313 
1314 	if (x86_feature & X86_ASYSC) {
1315 		wrmsr(MSR_AMD_LSTAR, (uintptr_t)brand_sys_syscall);
1316 		wrmsr(MSR_AMD_CSTAR, (uintptr_t)brand_sys_syscall32);
1317 	}
1318 
1319 #endif
1320 #endif	/* __amd64 */
1321 
1322 	if (x86_feature & X86_SEP)
1323 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)brand_sys_sysenter);
1324 }
1325 
1326 /*
1327  * Disable interpositioning on the system call path by rewriting the
1328  * sys{call|enter} MSRs and the syscall-related entries in the IDT to use
1329  * the standard entry points, which bypass the interpositioning hooks.
1330  */
1331 void
1332 brand_interpositioning_disable(void)
1333 {
1334 	gate_desc_t	*idt = CPU->cpu_idt;
1335 	int i;
1336 
1337 	ASSERT(curthread->t_preempt != 0 || getpil() >= DISP_LEVEL);
1338 
1339 	for (i = 0; brand_tbl[i].ih_inum; i++) {
1340 		idt[brand_tbl[i].ih_inum] = brand_tbl[i].ih_default_desc;
1341 #if defined(__xpv)
1342 		xen_idt_write(&idt[brand_tbl[i].ih_inum],
1343 		    brand_tbl[i].ih_inum);
1344 #endif
1345 	}
1346 
1347 #if defined(__amd64)
1348 #if defined(__xpv)
1349 
1350 	/*
1351 	 * See comment above in brand_interpositioning_enable.
1352 	 */
1353 	xen_set_callback(sys_syscall, CALLBACKTYPE_syscall,
1354 	    CALLBACKF_mask_events);
1355 
1356 #else
1357 
1358 	if (x86_feature & X86_ASYSC) {
1359 		wrmsr(MSR_AMD_LSTAR, (uintptr_t)sys_syscall);
1360 		wrmsr(MSR_AMD_CSTAR, (uintptr_t)sys_syscall32);
1361 	}
1362 
1363 #endif
1364 #endif	/* __amd64 */
1365 
1366 	if (x86_feature & X86_SEP)
1367 		wrmsr(MSR_INTC_SEP_EIP, (uintptr_t)sys_sysenter);
1368 }
1369