xref: /illumos-gate/usr/src/cmd/bhyve/task_switch.c (revision 4c87aefe)
1*4c87aefeSPatrick Mooney /*-
2*4c87aefeSPatrick Mooney  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3*4c87aefeSPatrick Mooney  *
4*4c87aefeSPatrick Mooney  * Copyright (c) 2014 Neel Natu <neel@freebsd.org>
5*4c87aefeSPatrick Mooney  * All rights reserved.
6*4c87aefeSPatrick Mooney  *
7*4c87aefeSPatrick Mooney  * Redistribution and use in source and binary forms, with or without
8*4c87aefeSPatrick Mooney  * modification, are permitted provided that the following conditions
9*4c87aefeSPatrick Mooney  * are met:
10*4c87aefeSPatrick Mooney  * 1. Redistributions of source code must retain the above copyright
11*4c87aefeSPatrick Mooney  *    notice, this list of conditions and the following disclaimer.
12*4c87aefeSPatrick Mooney  * 2. Redistributions in binary form must reproduce the above copyright
13*4c87aefeSPatrick Mooney  *    notice, this list of conditions and the following disclaimer in the
14*4c87aefeSPatrick Mooney  *    documentation and/or other materials provided with the distribution.
15*4c87aefeSPatrick Mooney  *
16*4c87aefeSPatrick Mooney  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
17*4c87aefeSPatrick Mooney  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18*4c87aefeSPatrick Mooney  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19*4c87aefeSPatrick Mooney  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20*4c87aefeSPatrick Mooney  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21*4c87aefeSPatrick Mooney  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22*4c87aefeSPatrick Mooney  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23*4c87aefeSPatrick Mooney  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24*4c87aefeSPatrick Mooney  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25*4c87aefeSPatrick Mooney  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26*4c87aefeSPatrick Mooney  * SUCH DAMAGE.
27*4c87aefeSPatrick Mooney  */
28*4c87aefeSPatrick Mooney 
29*4c87aefeSPatrick Mooney #include <sys/cdefs.h>
30*4c87aefeSPatrick Mooney __FBSDID("$FreeBSD$");
31*4c87aefeSPatrick Mooney 
32*4c87aefeSPatrick Mooney #include <sys/param.h>
33*4c87aefeSPatrick Mooney #include <sys/_iovec.h>
34*4c87aefeSPatrick Mooney #include <sys/mman.h>
35*4c87aefeSPatrick Mooney 
36*4c87aefeSPatrick Mooney #include <x86/psl.h>
37*4c87aefeSPatrick Mooney #include <x86/segments.h>
38*4c87aefeSPatrick Mooney #include <x86/specialreg.h>
39*4c87aefeSPatrick Mooney #include <machine/vmm.h>
40*4c87aefeSPatrick Mooney #include <machine/vmm_instruction_emul.h>
41*4c87aefeSPatrick Mooney 
42*4c87aefeSPatrick Mooney #include <assert.h>
43*4c87aefeSPatrick Mooney #include <errno.h>
44*4c87aefeSPatrick Mooney #include <stdbool.h>
45*4c87aefeSPatrick Mooney #include <stdio.h>
46*4c87aefeSPatrick Mooney #include <stdlib.h>
47*4c87aefeSPatrick Mooney 
48*4c87aefeSPatrick Mooney #include <vmmapi.h>
49*4c87aefeSPatrick Mooney 
50*4c87aefeSPatrick Mooney #include "bhyverun.h"
51*4c87aefeSPatrick Mooney 
52*4c87aefeSPatrick Mooney /*
53*4c87aefeSPatrick Mooney  * Using 'struct i386tss' is tempting but causes myriad sign extension
54*4c87aefeSPatrick Mooney  * issues because all of its fields are defined as signed integers.
55*4c87aefeSPatrick Mooney  */
56*4c87aefeSPatrick Mooney struct tss32 {
57*4c87aefeSPatrick Mooney 	uint16_t	tss_link;
58*4c87aefeSPatrick Mooney 	uint16_t	rsvd1;
59*4c87aefeSPatrick Mooney 	uint32_t	tss_esp0;
60*4c87aefeSPatrick Mooney 	uint16_t	tss_ss0;
61*4c87aefeSPatrick Mooney 	uint16_t	rsvd2;
62*4c87aefeSPatrick Mooney 	uint32_t	tss_esp1;
63*4c87aefeSPatrick Mooney 	uint16_t	tss_ss1;
64*4c87aefeSPatrick Mooney 	uint16_t	rsvd3;
65*4c87aefeSPatrick Mooney 	uint32_t	tss_esp2;
66*4c87aefeSPatrick Mooney 	uint16_t	tss_ss2;
67*4c87aefeSPatrick Mooney 	uint16_t	rsvd4;
68*4c87aefeSPatrick Mooney 	uint32_t	tss_cr3;
69*4c87aefeSPatrick Mooney 	uint32_t	tss_eip;
70*4c87aefeSPatrick Mooney 	uint32_t	tss_eflags;
71*4c87aefeSPatrick Mooney 	uint32_t	tss_eax;
72*4c87aefeSPatrick Mooney 	uint32_t	tss_ecx;
73*4c87aefeSPatrick Mooney 	uint32_t	tss_edx;
74*4c87aefeSPatrick Mooney 	uint32_t	tss_ebx;
75*4c87aefeSPatrick Mooney 	uint32_t	tss_esp;
76*4c87aefeSPatrick Mooney 	uint32_t	tss_ebp;
77*4c87aefeSPatrick Mooney 	uint32_t	tss_esi;
78*4c87aefeSPatrick Mooney 	uint32_t	tss_edi;
79*4c87aefeSPatrick Mooney 	uint16_t	tss_es;
80*4c87aefeSPatrick Mooney 	uint16_t	rsvd5;
81*4c87aefeSPatrick Mooney 	uint16_t	tss_cs;
82*4c87aefeSPatrick Mooney 	uint16_t	rsvd6;
83*4c87aefeSPatrick Mooney 	uint16_t	tss_ss;
84*4c87aefeSPatrick Mooney 	uint16_t	rsvd7;
85*4c87aefeSPatrick Mooney 	uint16_t	tss_ds;
86*4c87aefeSPatrick Mooney 	uint16_t	rsvd8;
87*4c87aefeSPatrick Mooney 	uint16_t	tss_fs;
88*4c87aefeSPatrick Mooney 	uint16_t	rsvd9;
89*4c87aefeSPatrick Mooney 	uint16_t	tss_gs;
90*4c87aefeSPatrick Mooney 	uint16_t	rsvd10;
91*4c87aefeSPatrick Mooney 	uint16_t	tss_ldt;
92*4c87aefeSPatrick Mooney 	uint16_t	rsvd11;
93*4c87aefeSPatrick Mooney 	uint16_t	tss_trap;
94*4c87aefeSPatrick Mooney 	uint16_t	tss_iomap;
95*4c87aefeSPatrick Mooney };
96*4c87aefeSPatrick Mooney static_assert(sizeof(struct tss32) == 104, "compile-time assertion failed");
97*4c87aefeSPatrick Mooney 
98*4c87aefeSPatrick Mooney #define	SEL_START(sel)	(((sel) & ~0x7))
99*4c87aefeSPatrick Mooney #define	SEL_LIMIT(sel)	(((sel) | 0x7))
100*4c87aefeSPatrick Mooney #define	TSS_BUSY(type)	(((type) & 0x2) != 0)
101*4c87aefeSPatrick Mooney 
102*4c87aefeSPatrick Mooney static uint64_t
103*4c87aefeSPatrick Mooney GETREG(struct vmctx *ctx, int vcpu, int reg)
104*4c87aefeSPatrick Mooney {
105*4c87aefeSPatrick Mooney 	uint64_t val;
106*4c87aefeSPatrick Mooney 	int error;
107*4c87aefeSPatrick Mooney 
108*4c87aefeSPatrick Mooney 	error = vm_get_register(ctx, vcpu, reg, &val);
109*4c87aefeSPatrick Mooney 	assert(error == 0);
110*4c87aefeSPatrick Mooney 	return (val);
111*4c87aefeSPatrick Mooney }
112*4c87aefeSPatrick Mooney 
113*4c87aefeSPatrick Mooney static void
114*4c87aefeSPatrick Mooney SETREG(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
115*4c87aefeSPatrick Mooney {
116*4c87aefeSPatrick Mooney 	int error;
117*4c87aefeSPatrick Mooney 
118*4c87aefeSPatrick Mooney 	error = vm_set_register(ctx, vcpu, reg, val);
119*4c87aefeSPatrick Mooney 	assert(error == 0);
120*4c87aefeSPatrick Mooney }
121*4c87aefeSPatrick Mooney 
122*4c87aefeSPatrick Mooney static struct seg_desc
123*4c87aefeSPatrick Mooney usd_to_seg_desc(struct user_segment_descriptor *usd)
124*4c87aefeSPatrick Mooney {
125*4c87aefeSPatrick Mooney 	struct seg_desc seg_desc;
126*4c87aefeSPatrick Mooney 
127*4c87aefeSPatrick Mooney 	seg_desc.base = (u_int)USD_GETBASE(usd);
128*4c87aefeSPatrick Mooney 	if (usd->sd_gran)
129*4c87aefeSPatrick Mooney 		seg_desc.limit = (u_int)(USD_GETLIMIT(usd) << 12) | 0xfff;
130*4c87aefeSPatrick Mooney 	else
131*4c87aefeSPatrick Mooney 		seg_desc.limit = (u_int)USD_GETLIMIT(usd);
132*4c87aefeSPatrick Mooney 	seg_desc.access = usd->sd_type | usd->sd_dpl << 5 | usd->sd_p << 7;
133*4c87aefeSPatrick Mooney 	seg_desc.access |= usd->sd_xx << 12;
134*4c87aefeSPatrick Mooney 	seg_desc.access |= usd->sd_def32 << 14;
135*4c87aefeSPatrick Mooney 	seg_desc.access |= usd->sd_gran << 15;
136*4c87aefeSPatrick Mooney 
137*4c87aefeSPatrick Mooney 	return (seg_desc);
138*4c87aefeSPatrick Mooney }
139*4c87aefeSPatrick Mooney 
140*4c87aefeSPatrick Mooney /*
141*4c87aefeSPatrick Mooney  * Inject an exception with an error code that is a segment selector.
142*4c87aefeSPatrick Mooney  * The format of the error code is described in section 6.13, "Error Code",
143*4c87aefeSPatrick Mooney  * Intel SDM volume 3.
144*4c87aefeSPatrick Mooney  *
145*4c87aefeSPatrick Mooney  * Bit 0 (EXT) denotes whether the exception occurred during delivery
146*4c87aefeSPatrick Mooney  * of an external event like an interrupt.
147*4c87aefeSPatrick Mooney  *
148*4c87aefeSPatrick Mooney  * Bit 1 (IDT) indicates whether the selector points to a gate descriptor
149*4c87aefeSPatrick Mooney  * in the IDT.
150*4c87aefeSPatrick Mooney  *
151*4c87aefeSPatrick Mooney  * Bit 2(GDT/LDT) has the usual interpretation of Table Indicator (TI).
152*4c87aefeSPatrick Mooney  */
153*4c87aefeSPatrick Mooney static void
154*4c87aefeSPatrick Mooney sel_exception(struct vmctx *ctx, int vcpu, int vector, uint16_t sel, int ext)
155*4c87aefeSPatrick Mooney {
156*4c87aefeSPatrick Mooney 	/*
157*4c87aefeSPatrick Mooney 	 * Bit 2 from the selector is retained as-is in the error code.
158*4c87aefeSPatrick Mooney 	 *
159*4c87aefeSPatrick Mooney 	 * Bit 1 can be safely cleared because none of the selectors
160*4c87aefeSPatrick Mooney 	 * encountered during task switch emulation refer to a task
161*4c87aefeSPatrick Mooney 	 * gate in the IDT.
162*4c87aefeSPatrick Mooney 	 *
163*4c87aefeSPatrick Mooney 	 * Bit 0 is set depending on the value of 'ext'.
164*4c87aefeSPatrick Mooney 	 */
165*4c87aefeSPatrick Mooney 	sel &= ~0x3;
166*4c87aefeSPatrick Mooney 	if (ext)
167*4c87aefeSPatrick Mooney 		sel |= 0x1;
168*4c87aefeSPatrick Mooney 	vm_inject_fault(ctx, vcpu, vector, 1, sel);
169*4c87aefeSPatrick Mooney }
170*4c87aefeSPatrick Mooney 
171*4c87aefeSPatrick Mooney /*
172*4c87aefeSPatrick Mooney  * Return 0 if the selector 'sel' in within the limits of the GDT/LDT
173*4c87aefeSPatrick Mooney  * and non-zero otherwise.
174*4c87aefeSPatrick Mooney  */
175*4c87aefeSPatrick Mooney static int
176*4c87aefeSPatrick Mooney desc_table_limit_check(struct vmctx *ctx, int vcpu, uint16_t sel)
177*4c87aefeSPatrick Mooney {
178*4c87aefeSPatrick Mooney 	uint64_t base;
179*4c87aefeSPatrick Mooney 	uint32_t limit, access;
180*4c87aefeSPatrick Mooney 	int error, reg;
181*4c87aefeSPatrick Mooney 
182*4c87aefeSPatrick Mooney 	reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
183*4c87aefeSPatrick Mooney 	error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access);
184*4c87aefeSPatrick Mooney 	assert(error == 0);
185*4c87aefeSPatrick Mooney 
186*4c87aefeSPatrick Mooney 	if (reg == VM_REG_GUEST_LDTR) {
187*4c87aefeSPatrick Mooney 		if (SEG_DESC_UNUSABLE(access) || !SEG_DESC_PRESENT(access))
188*4c87aefeSPatrick Mooney 			return (-1);
189*4c87aefeSPatrick Mooney 	}
190*4c87aefeSPatrick Mooney 
191*4c87aefeSPatrick Mooney 	if (limit < SEL_LIMIT(sel))
192*4c87aefeSPatrick Mooney 		return (-1);
193*4c87aefeSPatrick Mooney 	else
194*4c87aefeSPatrick Mooney 		return (0);
195*4c87aefeSPatrick Mooney }
196*4c87aefeSPatrick Mooney 
197*4c87aefeSPatrick Mooney /*
198*4c87aefeSPatrick Mooney  * Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced
199*4c87aefeSPatrick Mooney  * by the selector 'sel'.
200*4c87aefeSPatrick Mooney  *
201*4c87aefeSPatrick Mooney  * Returns 0 on success.
202*4c87aefeSPatrick Mooney  * Returns 1 if an exception was injected into the guest.
203*4c87aefeSPatrick Mooney  * Returns -1 otherwise.
204*4c87aefeSPatrick Mooney  */
205*4c87aefeSPatrick Mooney static int
206*4c87aefeSPatrick Mooney desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
207*4c87aefeSPatrick Mooney     uint16_t sel, struct user_segment_descriptor *desc, bool doread,
208*4c87aefeSPatrick Mooney     int *faultptr)
209*4c87aefeSPatrick Mooney {
210*4c87aefeSPatrick Mooney 	struct iovec iov[2];
211*4c87aefeSPatrick Mooney 	uint64_t base;
212*4c87aefeSPatrick Mooney 	uint32_t limit, access;
213*4c87aefeSPatrick Mooney 	int error, reg;
214*4c87aefeSPatrick Mooney 
215*4c87aefeSPatrick Mooney 	reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
216*4c87aefeSPatrick Mooney 	error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access);
217*4c87aefeSPatrick Mooney 	assert(error == 0);
218*4c87aefeSPatrick Mooney 	assert(limit >= SEL_LIMIT(sel));
219*4c87aefeSPatrick Mooney 
220*4c87aefeSPatrick Mooney 	error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel),
221*4c87aefeSPatrick Mooney 	    sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov),
222*4c87aefeSPatrick Mooney 	    faultptr);
223*4c87aefeSPatrick Mooney 	if (error || *faultptr)
224*4c87aefeSPatrick Mooney 		return (error);
225*4c87aefeSPatrick Mooney 
226*4c87aefeSPatrick Mooney 	if (doread)
227*4c87aefeSPatrick Mooney 		vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
228*4c87aefeSPatrick Mooney 	else
229*4c87aefeSPatrick Mooney 		vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
230*4c87aefeSPatrick Mooney 	return (0);
231*4c87aefeSPatrick Mooney }
232*4c87aefeSPatrick Mooney 
233*4c87aefeSPatrick Mooney static int
234*4c87aefeSPatrick Mooney desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
235*4c87aefeSPatrick Mooney     uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
236*4c87aefeSPatrick Mooney {
237*4c87aefeSPatrick Mooney 	return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr));
238*4c87aefeSPatrick Mooney }
239*4c87aefeSPatrick Mooney 
240*4c87aefeSPatrick Mooney static int
241*4c87aefeSPatrick Mooney desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
242*4c87aefeSPatrick Mooney     uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
243*4c87aefeSPatrick Mooney {
244*4c87aefeSPatrick Mooney 	return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr));
245*4c87aefeSPatrick Mooney }
246*4c87aefeSPatrick Mooney 
247*4c87aefeSPatrick Mooney /*
248*4c87aefeSPatrick Mooney  * Read the TSS descriptor referenced by 'sel' into 'desc'.
249*4c87aefeSPatrick Mooney  *
250*4c87aefeSPatrick Mooney  * Returns 0 on success.
251*4c87aefeSPatrick Mooney  * Returns 1 if an exception was injected into the guest.
252*4c87aefeSPatrick Mooney  * Returns -1 otherwise.
253*4c87aefeSPatrick Mooney  */
254*4c87aefeSPatrick Mooney static int
255*4c87aefeSPatrick Mooney read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
256*4c87aefeSPatrick Mooney     uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
257*4c87aefeSPatrick Mooney {
258*4c87aefeSPatrick Mooney 	struct vm_guest_paging sup_paging;
259*4c87aefeSPatrick Mooney 	int error;
260*4c87aefeSPatrick Mooney 
261*4c87aefeSPatrick Mooney 	assert(!ISLDT(sel));
262*4c87aefeSPatrick Mooney 	assert(IDXSEL(sel) != 0);
263*4c87aefeSPatrick Mooney 
264*4c87aefeSPatrick Mooney 	/* Fetch the new TSS descriptor */
265*4c87aefeSPatrick Mooney 	if (desc_table_limit_check(ctx, vcpu, sel)) {
266*4c87aefeSPatrick Mooney 		if (ts->reason == TSR_IRET)
267*4c87aefeSPatrick Mooney 			sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
268*4c87aefeSPatrick Mooney 		else
269*4c87aefeSPatrick Mooney 			sel_exception(ctx, vcpu, IDT_GP, sel, ts->ext);
270*4c87aefeSPatrick Mooney 		return (1);
271*4c87aefeSPatrick Mooney 	}
272*4c87aefeSPatrick Mooney 
273*4c87aefeSPatrick Mooney 	sup_paging = ts->paging;
274*4c87aefeSPatrick Mooney 	sup_paging.cpl = 0;		/* implicit supervisor mode */
275*4c87aefeSPatrick Mooney 	error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr);
276*4c87aefeSPatrick Mooney 	return (error);
277*4c87aefeSPatrick Mooney }
278*4c87aefeSPatrick Mooney 
279*4c87aefeSPatrick Mooney static bool
280*4c87aefeSPatrick Mooney code_desc(int sd_type)
281*4c87aefeSPatrick Mooney {
282*4c87aefeSPatrick Mooney 	/* code descriptor */
283*4c87aefeSPatrick Mooney 	return ((sd_type & 0x18) == 0x18);
284*4c87aefeSPatrick Mooney }
285*4c87aefeSPatrick Mooney 
286*4c87aefeSPatrick Mooney static bool
287*4c87aefeSPatrick Mooney stack_desc(int sd_type)
288*4c87aefeSPatrick Mooney {
289*4c87aefeSPatrick Mooney 	/* writable data descriptor */
290*4c87aefeSPatrick Mooney 	return ((sd_type & 0x1A) == 0x12);
291*4c87aefeSPatrick Mooney }
292*4c87aefeSPatrick Mooney 
293*4c87aefeSPatrick Mooney static bool
294*4c87aefeSPatrick Mooney data_desc(int sd_type)
295*4c87aefeSPatrick Mooney {
296*4c87aefeSPatrick Mooney 	/* data descriptor or a readable code descriptor */
297*4c87aefeSPatrick Mooney 	return ((sd_type & 0x18) == 0x10 || (sd_type & 0x1A) == 0x1A);
298*4c87aefeSPatrick Mooney }
299*4c87aefeSPatrick Mooney 
300*4c87aefeSPatrick Mooney static bool
301*4c87aefeSPatrick Mooney ldt_desc(int sd_type)
302*4c87aefeSPatrick Mooney {
303*4c87aefeSPatrick Mooney 
304*4c87aefeSPatrick Mooney 	return (sd_type == SDT_SYSLDT);
305*4c87aefeSPatrick Mooney }
306*4c87aefeSPatrick Mooney 
307*4c87aefeSPatrick Mooney /*
308*4c87aefeSPatrick Mooney  * Validate the descriptor 'seg_desc' associated with 'segment'.
309*4c87aefeSPatrick Mooney  */
310*4c87aefeSPatrick Mooney static int
311*4c87aefeSPatrick Mooney validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
312*4c87aefeSPatrick Mooney     int segment, struct seg_desc *seg_desc, int *faultptr)
313*4c87aefeSPatrick Mooney {
314*4c87aefeSPatrick Mooney 	struct vm_guest_paging sup_paging;
315*4c87aefeSPatrick Mooney 	struct user_segment_descriptor usd;
316*4c87aefeSPatrick Mooney 	int error, idtvec;
317*4c87aefeSPatrick Mooney 	int cpl, dpl, rpl;
318*4c87aefeSPatrick Mooney 	uint16_t sel, cs;
319*4c87aefeSPatrick Mooney 	bool ldtseg, codeseg, stackseg, dataseg, conforming;
320*4c87aefeSPatrick Mooney 
321*4c87aefeSPatrick Mooney 	ldtseg = codeseg = stackseg = dataseg = false;
322*4c87aefeSPatrick Mooney 	switch (segment) {
323*4c87aefeSPatrick Mooney 	case VM_REG_GUEST_LDTR:
324*4c87aefeSPatrick Mooney 		ldtseg = true;
325*4c87aefeSPatrick Mooney 		break;
326*4c87aefeSPatrick Mooney 	case VM_REG_GUEST_CS:
327*4c87aefeSPatrick Mooney 		codeseg = true;
328*4c87aefeSPatrick Mooney 		break;
329*4c87aefeSPatrick Mooney 	case VM_REG_GUEST_SS:
330*4c87aefeSPatrick Mooney 		stackseg = true;
331*4c87aefeSPatrick Mooney 		break;
332*4c87aefeSPatrick Mooney 	case VM_REG_GUEST_DS:
333*4c87aefeSPatrick Mooney 	case VM_REG_GUEST_ES:
334*4c87aefeSPatrick Mooney 	case VM_REG_GUEST_FS:
335*4c87aefeSPatrick Mooney 	case VM_REG_GUEST_GS:
336*4c87aefeSPatrick Mooney 		dataseg = true;
337*4c87aefeSPatrick Mooney 		break;
338*4c87aefeSPatrick Mooney 	default:
339*4c87aefeSPatrick Mooney 		assert(0);
340*4c87aefeSPatrick Mooney 	}
341*4c87aefeSPatrick Mooney 
342*4c87aefeSPatrick Mooney 	/* Get the segment selector */
343*4c87aefeSPatrick Mooney 	sel = GETREG(ctx, vcpu, segment);
344*4c87aefeSPatrick Mooney 
345*4c87aefeSPatrick Mooney 	/* LDT selector must point into the GDT */
346*4c87aefeSPatrick Mooney 	if (ldtseg && ISLDT(sel)) {
347*4c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
348*4c87aefeSPatrick Mooney 		return (1);
349*4c87aefeSPatrick Mooney 	}
350*4c87aefeSPatrick Mooney 
351*4c87aefeSPatrick Mooney 	/* Descriptor table limit check */
352*4c87aefeSPatrick Mooney 	if (desc_table_limit_check(ctx, vcpu, sel)) {
353*4c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
354*4c87aefeSPatrick Mooney 		return (1);
355*4c87aefeSPatrick Mooney 	}
356*4c87aefeSPatrick Mooney 
357*4c87aefeSPatrick Mooney 	/* NULL selector */
358*4c87aefeSPatrick Mooney 	if (IDXSEL(sel) == 0) {
359*4c87aefeSPatrick Mooney 		/* Code and stack segment selectors cannot be NULL */
360*4c87aefeSPatrick Mooney 		if (codeseg || stackseg) {
361*4c87aefeSPatrick Mooney 			sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
362*4c87aefeSPatrick Mooney 			return (1);
363*4c87aefeSPatrick Mooney 		}
364*4c87aefeSPatrick Mooney 		seg_desc->base = 0;
365*4c87aefeSPatrick Mooney 		seg_desc->limit = 0;
366*4c87aefeSPatrick Mooney 		seg_desc->access = 0x10000;	/* unusable */
367*4c87aefeSPatrick Mooney 		return (0);
368*4c87aefeSPatrick Mooney 	}
369*4c87aefeSPatrick Mooney 
370*4c87aefeSPatrick Mooney 	/* Read the descriptor from the GDT/LDT */
371*4c87aefeSPatrick Mooney 	sup_paging = ts->paging;
372*4c87aefeSPatrick Mooney 	sup_paging.cpl = 0;	/* implicit supervisor mode */
373*4c87aefeSPatrick Mooney 	error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr);
374*4c87aefeSPatrick Mooney 	if (error || *faultptr)
375*4c87aefeSPatrick Mooney 		return (error);
376*4c87aefeSPatrick Mooney 
377*4c87aefeSPatrick Mooney 	/* Verify that the descriptor type is compatible with the segment */
378*4c87aefeSPatrick Mooney 	if ((ldtseg && !ldt_desc(usd.sd_type)) ||
379*4c87aefeSPatrick Mooney 	    (codeseg && !code_desc(usd.sd_type)) ||
380*4c87aefeSPatrick Mooney 	    (dataseg && !data_desc(usd.sd_type)) ||
381*4c87aefeSPatrick Mooney 	    (stackseg && !stack_desc(usd.sd_type))) {
382*4c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
383*4c87aefeSPatrick Mooney 		return (1);
384*4c87aefeSPatrick Mooney 	}
385*4c87aefeSPatrick Mooney 
386*4c87aefeSPatrick Mooney 	/* Segment must be marked present */
387*4c87aefeSPatrick Mooney 	if (!usd.sd_p) {
388*4c87aefeSPatrick Mooney 		if (ldtseg)
389*4c87aefeSPatrick Mooney 			idtvec = IDT_TS;
390*4c87aefeSPatrick Mooney 		else if (stackseg)
391*4c87aefeSPatrick Mooney 			idtvec = IDT_SS;
392*4c87aefeSPatrick Mooney 		else
393*4c87aefeSPatrick Mooney 			idtvec = IDT_NP;
394*4c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, idtvec, sel, ts->ext);
395*4c87aefeSPatrick Mooney 		return (1);
396*4c87aefeSPatrick Mooney 	}
397*4c87aefeSPatrick Mooney 
398*4c87aefeSPatrick Mooney 	cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS);
399*4c87aefeSPatrick Mooney 	cpl = cs & SEL_RPL_MASK;
400*4c87aefeSPatrick Mooney 	rpl = sel & SEL_RPL_MASK;
401*4c87aefeSPatrick Mooney 	dpl = usd.sd_dpl;
402*4c87aefeSPatrick Mooney 
403*4c87aefeSPatrick Mooney 	if (stackseg && (rpl != cpl || dpl != cpl)) {
404*4c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
405*4c87aefeSPatrick Mooney 		return (1);
406*4c87aefeSPatrick Mooney 	}
407*4c87aefeSPatrick Mooney 
408*4c87aefeSPatrick Mooney 	if (codeseg) {
409*4c87aefeSPatrick Mooney 		conforming = (usd.sd_type & 0x4) ? true : false;
410*4c87aefeSPatrick Mooney 		if ((conforming && (cpl < dpl)) ||
411*4c87aefeSPatrick Mooney 		    (!conforming && (cpl != dpl))) {
412*4c87aefeSPatrick Mooney 			sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
413*4c87aefeSPatrick Mooney 			return (1);
414*4c87aefeSPatrick Mooney 		}
415*4c87aefeSPatrick Mooney 	}
416*4c87aefeSPatrick Mooney 
417*4c87aefeSPatrick Mooney 	if (dataseg) {
418*4c87aefeSPatrick Mooney 		/*
419*4c87aefeSPatrick Mooney 		 * A data segment is always non-conforming except when it's
420*4c87aefeSPatrick Mooney 		 * descriptor is a readable, conforming code segment.
421*4c87aefeSPatrick Mooney 		 */
422*4c87aefeSPatrick Mooney 		if (code_desc(usd.sd_type) && (usd.sd_type & 0x4) != 0)
423*4c87aefeSPatrick Mooney 			conforming = true;
424*4c87aefeSPatrick Mooney 		else
425*4c87aefeSPatrick Mooney 			conforming = false;
426*4c87aefeSPatrick Mooney 
427*4c87aefeSPatrick Mooney 		if (!conforming && (rpl > dpl || cpl > dpl)) {
428*4c87aefeSPatrick Mooney 			sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
429*4c87aefeSPatrick Mooney 			return (1);
430*4c87aefeSPatrick Mooney 		}
431*4c87aefeSPatrick Mooney 	}
432*4c87aefeSPatrick Mooney 	*seg_desc = usd_to_seg_desc(&usd);
433*4c87aefeSPatrick Mooney 	return (0);
434*4c87aefeSPatrick Mooney }
435*4c87aefeSPatrick Mooney 
436*4c87aefeSPatrick Mooney static void
437*4c87aefeSPatrick Mooney tss32_save(struct vmctx *ctx, int vcpu, struct vm_task_switch *task_switch,
438*4c87aefeSPatrick Mooney     uint32_t eip, struct tss32 *tss, struct iovec *iov)
439*4c87aefeSPatrick Mooney {
440*4c87aefeSPatrick Mooney 
441*4c87aefeSPatrick Mooney 	/* General purpose registers */
442*4c87aefeSPatrick Mooney 	tss->tss_eax = GETREG(ctx, vcpu, VM_REG_GUEST_RAX);
443*4c87aefeSPatrick Mooney 	tss->tss_ecx = GETREG(ctx, vcpu, VM_REG_GUEST_RCX);
444*4c87aefeSPatrick Mooney 	tss->tss_edx = GETREG(ctx, vcpu, VM_REG_GUEST_RDX);
445*4c87aefeSPatrick Mooney 	tss->tss_ebx = GETREG(ctx, vcpu, VM_REG_GUEST_RBX);
446*4c87aefeSPatrick Mooney 	tss->tss_esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
447*4c87aefeSPatrick Mooney 	tss->tss_ebp = GETREG(ctx, vcpu, VM_REG_GUEST_RBP);
448*4c87aefeSPatrick Mooney 	tss->tss_esi = GETREG(ctx, vcpu, VM_REG_GUEST_RSI);
449*4c87aefeSPatrick Mooney 	tss->tss_edi = GETREG(ctx, vcpu, VM_REG_GUEST_RDI);
450*4c87aefeSPatrick Mooney 
451*4c87aefeSPatrick Mooney 	/* Segment selectors */
452*4c87aefeSPatrick Mooney 	tss->tss_es = GETREG(ctx, vcpu, VM_REG_GUEST_ES);
453*4c87aefeSPatrick Mooney 	tss->tss_cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS);
454*4c87aefeSPatrick Mooney 	tss->tss_ss = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
455*4c87aefeSPatrick Mooney 	tss->tss_ds = GETREG(ctx, vcpu, VM_REG_GUEST_DS);
456*4c87aefeSPatrick Mooney 	tss->tss_fs = GETREG(ctx, vcpu, VM_REG_GUEST_FS);
457*4c87aefeSPatrick Mooney 	tss->tss_gs = GETREG(ctx, vcpu, VM_REG_GUEST_GS);
458*4c87aefeSPatrick Mooney 
459*4c87aefeSPatrick Mooney 	/* eflags and eip */
460*4c87aefeSPatrick Mooney 	tss->tss_eflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
461*4c87aefeSPatrick Mooney 	if (task_switch->reason == TSR_IRET)
462*4c87aefeSPatrick Mooney 		tss->tss_eflags &= ~PSL_NT;
463*4c87aefeSPatrick Mooney 	tss->tss_eip = eip;
464*4c87aefeSPatrick Mooney 
465*4c87aefeSPatrick Mooney 	/* Copy updated old TSS into guest memory */
466*4c87aefeSPatrick Mooney 	vm_copyout(ctx, vcpu, tss, iov, sizeof(struct tss32));
467*4c87aefeSPatrick Mooney }
468*4c87aefeSPatrick Mooney 
469*4c87aefeSPatrick Mooney static void
470*4c87aefeSPatrick Mooney update_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *sd)
471*4c87aefeSPatrick Mooney {
472*4c87aefeSPatrick Mooney 	int error;
473*4c87aefeSPatrick Mooney 
474*4c87aefeSPatrick Mooney 	error = vm_set_desc(ctx, vcpu, reg, sd->base, sd->limit, sd->access);
475*4c87aefeSPatrick Mooney 	assert(error == 0);
476*4c87aefeSPatrick Mooney }
477*4c87aefeSPatrick Mooney 
478*4c87aefeSPatrick Mooney /*
479*4c87aefeSPatrick Mooney  * Update the vcpu registers to reflect the state of the new task.
480*4c87aefeSPatrick Mooney  */
481*4c87aefeSPatrick Mooney static int
482*4c87aefeSPatrick Mooney tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
483*4c87aefeSPatrick Mooney     uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr)
484*4c87aefeSPatrick Mooney {
485*4c87aefeSPatrick Mooney 	struct seg_desc seg_desc, seg_desc2;
486*4c87aefeSPatrick Mooney 	uint64_t *pdpte, maxphyaddr, reserved;
487*4c87aefeSPatrick Mooney 	uint32_t eflags;
488*4c87aefeSPatrick Mooney 	int error, i;
489*4c87aefeSPatrick Mooney 	bool nested;
490*4c87aefeSPatrick Mooney 
491*4c87aefeSPatrick Mooney 	nested = false;
492*4c87aefeSPatrick Mooney 	if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) {
493*4c87aefeSPatrick Mooney 		tss->tss_link = ot_sel;
494*4c87aefeSPatrick Mooney 		nested = true;
495*4c87aefeSPatrick Mooney 	}
496*4c87aefeSPatrick Mooney 
497*4c87aefeSPatrick Mooney 	eflags = tss->tss_eflags;
498*4c87aefeSPatrick Mooney 	if (nested)
499*4c87aefeSPatrick Mooney 		eflags |= PSL_NT;
500*4c87aefeSPatrick Mooney 
501*4c87aefeSPatrick Mooney 	/* LDTR */
502*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt);
503*4c87aefeSPatrick Mooney 
504*4c87aefeSPatrick Mooney 	/* PBDR */
505*4c87aefeSPatrick Mooney 	if (ts->paging.paging_mode != PAGING_MODE_FLAT) {
506*4c87aefeSPatrick Mooney 		if (ts->paging.paging_mode == PAGING_MODE_PAE) {
507*4c87aefeSPatrick Mooney 			/*
508*4c87aefeSPatrick Mooney 			 * XXX Assuming 36-bit MAXPHYADDR.
509*4c87aefeSPatrick Mooney 			 */
510*4c87aefeSPatrick Mooney 			maxphyaddr = (1UL << 36) - 1;
511*4c87aefeSPatrick Mooney 			pdpte = paddr_guest2host(ctx, tss->tss_cr3 & ~0x1f, 32);
512*4c87aefeSPatrick Mooney 			for (i = 0; i < 4; i++) {
513*4c87aefeSPatrick Mooney 				/* Check reserved bits if the PDPTE is valid */
514*4c87aefeSPatrick Mooney 				if (!(pdpte[i] & 0x1))
515*4c87aefeSPatrick Mooney 					continue;
516*4c87aefeSPatrick Mooney 				/*
517*4c87aefeSPatrick Mooney 				 * Bits 2:1, 8:5 and bits above the processor's
518*4c87aefeSPatrick Mooney 				 * maximum physical address are reserved.
519*4c87aefeSPatrick Mooney 				 */
520*4c87aefeSPatrick Mooney 				reserved = ~maxphyaddr | 0x1E6;
521*4c87aefeSPatrick Mooney 				if (pdpte[i] & reserved) {
522*4c87aefeSPatrick Mooney 					vm_inject_gp(ctx, vcpu);
523*4c87aefeSPatrick Mooney 					return (1);
524*4c87aefeSPatrick Mooney 				}
525*4c87aefeSPatrick Mooney 			}
526*4c87aefeSPatrick Mooney 			SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]);
527*4c87aefeSPatrick Mooney 			SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]);
528*4c87aefeSPatrick Mooney 			SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]);
529*4c87aefeSPatrick Mooney 			SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]);
530*4c87aefeSPatrick Mooney 		}
531*4c87aefeSPatrick Mooney 		SETREG(ctx, vcpu, VM_REG_GUEST_CR3, tss->tss_cr3);
532*4c87aefeSPatrick Mooney 		ts->paging.cr3 = tss->tss_cr3;
533*4c87aefeSPatrick Mooney 	}
534*4c87aefeSPatrick Mooney 
535*4c87aefeSPatrick Mooney 	/* eflags and eip */
536*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS, eflags);
537*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RIP, tss->tss_eip);
538*4c87aefeSPatrick Mooney 
539*4c87aefeSPatrick Mooney 	/* General purpose registers */
540*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RAX, tss->tss_eax);
541*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RCX, tss->tss_ecx);
542*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RDX, tss->tss_edx);
543*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RBX, tss->tss_ebx);
544*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RSP, tss->tss_esp);
545*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RBP, tss->tss_ebp);
546*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RSI, tss->tss_esi);
547*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RDI, tss->tss_edi);
548*4c87aefeSPatrick Mooney 
549*4c87aefeSPatrick Mooney 	/* Segment selectors */
550*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_ES, tss->tss_es);
551*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_CS, tss->tss_cs);
552*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_SS, tss->tss_ss);
553*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_DS, tss->tss_ds);
554*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_FS, tss->tss_fs);
555*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_GS, tss->tss_gs);
556*4c87aefeSPatrick Mooney 
557*4c87aefeSPatrick Mooney 	/*
558*4c87aefeSPatrick Mooney 	 * If this is a nested task then write out the new TSS to update
559*4c87aefeSPatrick Mooney 	 * the previous link field.
560*4c87aefeSPatrick Mooney 	 */
561*4c87aefeSPatrick Mooney 	if (nested)
562*4c87aefeSPatrick Mooney 		vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss));
563*4c87aefeSPatrick Mooney 
564*4c87aefeSPatrick Mooney 	/* Validate segment descriptors */
565*4c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc,
566*4c87aefeSPatrick Mooney 	    faultptr);
567*4c87aefeSPatrick Mooney 	if (error || *faultptr)
568*4c87aefeSPatrick Mooney 		return (error);
569*4c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc);
570*4c87aefeSPatrick Mooney 
571*4c87aefeSPatrick Mooney 	/*
572*4c87aefeSPatrick Mooney 	 * Section "Checks on Guest Segment Registers", Intel SDM, Vol 3.
573*4c87aefeSPatrick Mooney 	 *
574*4c87aefeSPatrick Mooney 	 * The SS and CS attribute checks on VM-entry are inter-dependent so
575*4c87aefeSPatrick Mooney 	 * we need to make sure that both segments are valid before updating
576*4c87aefeSPatrick Mooney 	 * either of them. This ensures that the VMCS state can pass the
577*4c87aefeSPatrick Mooney 	 * VM-entry checks so the guest can handle any exception injected
578*4c87aefeSPatrick Mooney 	 * during task switch emulation.
579*4c87aefeSPatrick Mooney 	 */
580*4c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc,
581*4c87aefeSPatrick Mooney 	    faultptr);
582*4c87aefeSPatrick Mooney 	if (error || *faultptr)
583*4c87aefeSPatrick Mooney 		return (error);
584*4c87aefeSPatrick Mooney 
585*4c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2,
586*4c87aefeSPatrick Mooney 	    faultptr);
587*4c87aefeSPatrick Mooney 	if (error || *faultptr)
588*4c87aefeSPatrick Mooney 		return (error);
589*4c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc);
590*4c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2);
591*4c87aefeSPatrick Mooney 	ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
592*4c87aefeSPatrick Mooney 
593*4c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc,
594*4c87aefeSPatrick Mooney 	    faultptr);
595*4c87aefeSPatrick Mooney 	if (error || *faultptr)
596*4c87aefeSPatrick Mooney 		return (error);
597*4c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc);
598*4c87aefeSPatrick Mooney 
599*4c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc,
600*4c87aefeSPatrick Mooney 	    faultptr);
601*4c87aefeSPatrick Mooney 	if (error || *faultptr)
602*4c87aefeSPatrick Mooney 		return (error);
603*4c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc);
604*4c87aefeSPatrick Mooney 
605*4c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc,
606*4c87aefeSPatrick Mooney 	    faultptr);
607*4c87aefeSPatrick Mooney 	if (error || *faultptr)
608*4c87aefeSPatrick Mooney 		return (error);
609*4c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc);
610*4c87aefeSPatrick Mooney 
611*4c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc,
612*4c87aefeSPatrick Mooney 	    faultptr);
613*4c87aefeSPatrick Mooney 	if (error || *faultptr)
614*4c87aefeSPatrick Mooney 		return (error);
615*4c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc);
616*4c87aefeSPatrick Mooney 
617*4c87aefeSPatrick Mooney 	return (0);
618*4c87aefeSPatrick Mooney }
619*4c87aefeSPatrick Mooney 
620*4c87aefeSPatrick Mooney /*
621*4c87aefeSPatrick Mooney  * Push an error code on the stack of the new task. This is needed if the
622*4c87aefeSPatrick Mooney  * task switch was triggered by a hardware exception that causes an error
623*4c87aefeSPatrick Mooney  * code to be saved (e.g. #PF).
624*4c87aefeSPatrick Mooney  */
625*4c87aefeSPatrick Mooney static int
626*4c87aefeSPatrick Mooney push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
627*4c87aefeSPatrick Mooney     int task_type, uint32_t errcode, int *faultptr)
628*4c87aefeSPatrick Mooney {
629*4c87aefeSPatrick Mooney 	struct iovec iov[2];
630*4c87aefeSPatrick Mooney 	struct seg_desc seg_desc;
631*4c87aefeSPatrick Mooney 	int stacksize, bytes, error;
632*4c87aefeSPatrick Mooney 	uint64_t gla, cr0, rflags;
633*4c87aefeSPatrick Mooney 	uint32_t esp;
634*4c87aefeSPatrick Mooney 	uint16_t stacksel;
635*4c87aefeSPatrick Mooney 
636*4c87aefeSPatrick Mooney 	*faultptr = 0;
637*4c87aefeSPatrick Mooney 
638*4c87aefeSPatrick Mooney 	cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
639*4c87aefeSPatrick Mooney 	rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
640*4c87aefeSPatrick Mooney 	stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
641*4c87aefeSPatrick Mooney 
642*4c87aefeSPatrick Mooney 	error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc.base,
643*4c87aefeSPatrick Mooney 	    &seg_desc.limit, &seg_desc.access);
644*4c87aefeSPatrick Mooney 	assert(error == 0);
645*4c87aefeSPatrick Mooney 
646*4c87aefeSPatrick Mooney 	/*
647*4c87aefeSPatrick Mooney 	 * Section "Error Code" in the Intel SDM vol 3: the error code is
648*4c87aefeSPatrick Mooney 	 * pushed on the stack as a doubleword or word (depending on the
649*4c87aefeSPatrick Mooney 	 * default interrupt, trap or task gate size).
650*4c87aefeSPatrick Mooney 	 */
651*4c87aefeSPatrick Mooney 	if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS)
652*4c87aefeSPatrick Mooney 		bytes = 4;
653*4c87aefeSPatrick Mooney 	else
654*4c87aefeSPatrick Mooney 		bytes = 2;
655*4c87aefeSPatrick Mooney 
656*4c87aefeSPatrick Mooney 	/*
657*4c87aefeSPatrick Mooney 	 * PUSH instruction from Intel SDM vol 2: the 'B' flag in the
658*4c87aefeSPatrick Mooney 	 * stack-segment descriptor determines the size of the stack
659*4c87aefeSPatrick Mooney 	 * pointer outside of 64-bit mode.
660*4c87aefeSPatrick Mooney 	 */
661*4c87aefeSPatrick Mooney 	if (SEG_DESC_DEF32(seg_desc.access))
662*4c87aefeSPatrick Mooney 		stacksize = 4;
663*4c87aefeSPatrick Mooney 	else
664*4c87aefeSPatrick Mooney 		stacksize = 2;
665*4c87aefeSPatrick Mooney 
666*4c87aefeSPatrick Mooney 	esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
667*4c87aefeSPatrick Mooney 	esp -= bytes;
668*4c87aefeSPatrick Mooney 
669*4c87aefeSPatrick Mooney 	if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
670*4c87aefeSPatrick Mooney 	    &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
671*4c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_SS, stacksel, 1);
672*4c87aefeSPatrick Mooney 		*faultptr = 1;
673*4c87aefeSPatrick Mooney 		return (0);
674*4c87aefeSPatrick Mooney 	}
675*4c87aefeSPatrick Mooney 
676*4c87aefeSPatrick Mooney 	if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
677*4c87aefeSPatrick Mooney 		vm_inject_ac(ctx, vcpu, 1);
678*4c87aefeSPatrick Mooney 		*faultptr = 1;
679*4c87aefeSPatrick Mooney 		return (0);
680*4c87aefeSPatrick Mooney 	}
681*4c87aefeSPatrick Mooney 
682*4c87aefeSPatrick Mooney 	error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE,
683*4c87aefeSPatrick Mooney 	    iov, nitems(iov), faultptr);
684*4c87aefeSPatrick Mooney 	if (error || *faultptr)
685*4c87aefeSPatrick Mooney 		return (error);
686*4c87aefeSPatrick Mooney 
687*4c87aefeSPatrick Mooney 	vm_copyout(ctx, vcpu, &errcode, iov, bytes);
688*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RSP, esp);
689*4c87aefeSPatrick Mooney 	return (0);
690*4c87aefeSPatrick Mooney }
691*4c87aefeSPatrick Mooney 
692*4c87aefeSPatrick Mooney /*
693*4c87aefeSPatrick Mooney  * Evaluate return value from helper functions and potentially return to
694*4c87aefeSPatrick Mooney  * the VM run loop.
695*4c87aefeSPatrick Mooney  */
696*4c87aefeSPatrick Mooney #define	CHKERR(error,fault)						\
697*4c87aefeSPatrick Mooney 	do {								\
698*4c87aefeSPatrick Mooney 		assert((error == 0) || (error == EFAULT));		\
699*4c87aefeSPatrick Mooney 		if (error)						\
700*4c87aefeSPatrick Mooney 			return (VMEXIT_ABORT);				\
701*4c87aefeSPatrick Mooney 		else if (fault)						\
702*4c87aefeSPatrick Mooney 			return (VMEXIT_CONTINUE);			\
703*4c87aefeSPatrick Mooney 	} while (0)
704*4c87aefeSPatrick Mooney 
705*4c87aefeSPatrick Mooney int
706*4c87aefeSPatrick Mooney vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
707*4c87aefeSPatrick Mooney {
708*4c87aefeSPatrick Mooney 	struct seg_desc nt;
709*4c87aefeSPatrick Mooney 	struct tss32 oldtss, newtss;
710*4c87aefeSPatrick Mooney 	struct vm_task_switch *task_switch;
711*4c87aefeSPatrick Mooney 	struct vm_guest_paging *paging, sup_paging;
712*4c87aefeSPatrick Mooney 	struct user_segment_descriptor nt_desc, ot_desc;
713*4c87aefeSPatrick Mooney 	struct iovec nt_iov[2], ot_iov[2];
714*4c87aefeSPatrick Mooney 	uint64_t cr0, ot_base;
715*4c87aefeSPatrick Mooney 	uint32_t eip, ot_lim, access;
716*4c87aefeSPatrick Mooney 	int error, ext, fault, minlimit, nt_type, ot_type, vcpu;
717*4c87aefeSPatrick Mooney 	enum task_switch_reason reason;
718*4c87aefeSPatrick Mooney 	uint16_t nt_sel, ot_sel;
719*4c87aefeSPatrick Mooney 
720*4c87aefeSPatrick Mooney 	task_switch = &vmexit->u.task_switch;
721*4c87aefeSPatrick Mooney 	nt_sel = task_switch->tsssel;
722*4c87aefeSPatrick Mooney 	ext = vmexit->u.task_switch.ext;
723*4c87aefeSPatrick Mooney 	reason = vmexit->u.task_switch.reason;
724*4c87aefeSPatrick Mooney 	paging = &vmexit->u.task_switch.paging;
725*4c87aefeSPatrick Mooney 	vcpu = *pvcpu;
726*4c87aefeSPatrick Mooney 
727*4c87aefeSPatrick Mooney 	assert(paging->cpu_mode == CPU_MODE_PROTECTED);
728*4c87aefeSPatrick Mooney 
729*4c87aefeSPatrick Mooney 	/*
730*4c87aefeSPatrick Mooney 	 * Calculate the instruction pointer to store in the old TSS.
731*4c87aefeSPatrick Mooney 	 */
732*4c87aefeSPatrick Mooney 	eip = vmexit->rip + vmexit->inst_length;
733*4c87aefeSPatrick Mooney 
734*4c87aefeSPatrick Mooney 	/*
735*4c87aefeSPatrick Mooney 	 * Section 4.6, "Access Rights" in Intel SDM Vol 3.
736*4c87aefeSPatrick Mooney 	 * The following page table accesses are implicitly supervisor mode:
737*4c87aefeSPatrick Mooney 	 * - accesses to GDT or LDT to load segment descriptors
738*4c87aefeSPatrick Mooney 	 * - accesses to the task state segment during task switch
739*4c87aefeSPatrick Mooney 	 */
740*4c87aefeSPatrick Mooney 	sup_paging = *paging;
741*4c87aefeSPatrick Mooney 	sup_paging.cpl = 0;	/* implicit supervisor mode */
742*4c87aefeSPatrick Mooney 
743*4c87aefeSPatrick Mooney 	/* Fetch the new TSS descriptor */
744*4c87aefeSPatrick Mooney 	error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc,
745*4c87aefeSPatrick Mooney 	    &fault);
746*4c87aefeSPatrick Mooney 	CHKERR(error, fault);
747*4c87aefeSPatrick Mooney 
748*4c87aefeSPatrick Mooney 	nt = usd_to_seg_desc(&nt_desc);
749*4c87aefeSPatrick Mooney 
750*4c87aefeSPatrick Mooney 	/* Verify the type of the new TSS */
751*4c87aefeSPatrick Mooney 	nt_type = SEG_DESC_TYPE(nt.access);
752*4c87aefeSPatrick Mooney 	if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS &&
753*4c87aefeSPatrick Mooney 	    nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) {
754*4c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
755*4c87aefeSPatrick Mooney 		goto done;
756*4c87aefeSPatrick Mooney 	}
757*4c87aefeSPatrick Mooney 
758*4c87aefeSPatrick Mooney 	/* TSS descriptor must have present bit set */
759*4c87aefeSPatrick Mooney 	if (!SEG_DESC_PRESENT(nt.access)) {
760*4c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_NP, nt_sel, ext);
761*4c87aefeSPatrick Mooney 		goto done;
762*4c87aefeSPatrick Mooney 	}
763*4c87aefeSPatrick Mooney 
764*4c87aefeSPatrick Mooney 	/*
765*4c87aefeSPatrick Mooney 	 * TSS must have a minimum length of 104 bytes for a 32-bit TSS and
766*4c87aefeSPatrick Mooney 	 * 44 bytes for a 16-bit TSS.
767*4c87aefeSPatrick Mooney 	 */
768*4c87aefeSPatrick Mooney 	if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS)
769*4c87aefeSPatrick Mooney 		minlimit = 104 - 1;
770*4c87aefeSPatrick Mooney 	else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS)
771*4c87aefeSPatrick Mooney 		minlimit = 44 - 1;
772*4c87aefeSPatrick Mooney 	else
773*4c87aefeSPatrick Mooney 		minlimit = 0;
774*4c87aefeSPatrick Mooney 
775*4c87aefeSPatrick Mooney 	assert(minlimit > 0);
776*4c87aefeSPatrick Mooney 	if (nt.limit < minlimit) {
777*4c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
778*4c87aefeSPatrick Mooney 		goto done;
779*4c87aefeSPatrick Mooney 	}
780*4c87aefeSPatrick Mooney 
781*4c87aefeSPatrick Mooney 	/* TSS must be busy if task switch is due to IRET */
782*4c87aefeSPatrick Mooney 	if (reason == TSR_IRET && !TSS_BUSY(nt_type)) {
783*4c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
784*4c87aefeSPatrick Mooney 		goto done;
785*4c87aefeSPatrick Mooney 	}
786*4c87aefeSPatrick Mooney 
787*4c87aefeSPatrick Mooney 	/*
788*4c87aefeSPatrick Mooney 	 * TSS must be available (not busy) if task switch reason is
789*4c87aefeSPatrick Mooney 	 * CALL, JMP, exception or interrupt.
790*4c87aefeSPatrick Mooney 	 */
791*4c87aefeSPatrick Mooney 	if (reason != TSR_IRET && TSS_BUSY(nt_type)) {
792*4c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_GP, nt_sel, ext);
793*4c87aefeSPatrick Mooney 		goto done;
794*4c87aefeSPatrick Mooney 	}
795*4c87aefeSPatrick Mooney 
796*4c87aefeSPatrick Mooney 	/* Fetch the new TSS */
797*4c87aefeSPatrick Mooney 	error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1,
798*4c87aefeSPatrick Mooney 	    PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault);
799*4c87aefeSPatrick Mooney 	CHKERR(error, fault);
800*4c87aefeSPatrick Mooney 	vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1);
801*4c87aefeSPatrick Mooney 
802*4c87aefeSPatrick Mooney 	/* Get the old TSS selector from the guest's task register */
803*4c87aefeSPatrick Mooney 	ot_sel = GETREG(ctx, vcpu, VM_REG_GUEST_TR);
804*4c87aefeSPatrick Mooney 	if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) {
805*4c87aefeSPatrick Mooney 		/*
806*4c87aefeSPatrick Mooney 		 * This might happen if a task switch was attempted without
807*4c87aefeSPatrick Mooney 		 * ever loading the task register with LTR. In this case the
808*4c87aefeSPatrick Mooney 		 * TR would contain the values from power-on:
809*4c87aefeSPatrick Mooney 		 * (sel = 0, base = 0, limit = 0xffff).
810*4c87aefeSPatrick Mooney 		 */
811*4c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, ot_sel, task_switch->ext);
812*4c87aefeSPatrick Mooney 		goto done;
813*4c87aefeSPatrick Mooney 	}
814*4c87aefeSPatrick Mooney 
815*4c87aefeSPatrick Mooney 	/* Get the old TSS base and limit from the guest's task register */
816*4c87aefeSPatrick Mooney 	error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim,
817*4c87aefeSPatrick Mooney 	    &access);
818*4c87aefeSPatrick Mooney 	assert(error == 0);
819*4c87aefeSPatrick Mooney 	assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access));
820*4c87aefeSPatrick Mooney 	ot_type = SEG_DESC_TYPE(access);
821*4c87aefeSPatrick Mooney 	assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
822*4c87aefeSPatrick Mooney 
823*4c87aefeSPatrick Mooney 	/* Fetch the old TSS descriptor */
824*4c87aefeSPatrick Mooney 	error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc,
825*4c87aefeSPatrick Mooney 	    &fault);
826*4c87aefeSPatrick Mooney 	CHKERR(error, fault);
827*4c87aefeSPatrick Mooney 
828*4c87aefeSPatrick Mooney 	/* Get the old TSS */
829*4c87aefeSPatrick Mooney 	error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1,
830*4c87aefeSPatrick Mooney 	    PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault);
831*4c87aefeSPatrick Mooney 	CHKERR(error, fault);
832*4c87aefeSPatrick Mooney 	vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1);
833*4c87aefeSPatrick Mooney 
834*4c87aefeSPatrick Mooney 	/*
835*4c87aefeSPatrick Mooney 	 * Clear the busy bit in the old TSS descriptor if the task switch
836*4c87aefeSPatrick Mooney 	 * due to an IRET or JMP instruction.
837*4c87aefeSPatrick Mooney 	 */
838*4c87aefeSPatrick Mooney 	if (reason == TSR_IRET || reason == TSR_JMP) {
839*4c87aefeSPatrick Mooney 		ot_desc.sd_type &= ~0x2;
840*4c87aefeSPatrick Mooney 		error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel,
841*4c87aefeSPatrick Mooney 		    &ot_desc, &fault);
842*4c87aefeSPatrick Mooney 		CHKERR(error, fault);
843*4c87aefeSPatrick Mooney 	}
844*4c87aefeSPatrick Mooney 
845*4c87aefeSPatrick Mooney 	if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
846*4c87aefeSPatrick Mooney 		fprintf(stderr, "Task switch to 16-bit TSS not supported\n");
847*4c87aefeSPatrick Mooney 		return (VMEXIT_ABORT);
848*4c87aefeSPatrick Mooney 	}
849*4c87aefeSPatrick Mooney 
850*4c87aefeSPatrick Mooney 	/* Save processor state in old TSS */
851*4c87aefeSPatrick Mooney 	tss32_save(ctx, vcpu, task_switch, eip, &oldtss, ot_iov);
852*4c87aefeSPatrick Mooney 
853*4c87aefeSPatrick Mooney 	/*
854*4c87aefeSPatrick Mooney 	 * If the task switch was triggered for any reason other than IRET
855*4c87aefeSPatrick Mooney 	 * then set the busy bit in the new TSS descriptor.
856*4c87aefeSPatrick Mooney 	 */
857*4c87aefeSPatrick Mooney 	if (reason != TSR_IRET) {
858*4c87aefeSPatrick Mooney 		nt_desc.sd_type |= 0x2;
859*4c87aefeSPatrick Mooney 		error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel,
860*4c87aefeSPatrick Mooney 		    &nt_desc, &fault);
861*4c87aefeSPatrick Mooney 		CHKERR(error, fault);
862*4c87aefeSPatrick Mooney 	}
863*4c87aefeSPatrick Mooney 
864*4c87aefeSPatrick Mooney 	/* Update task register to point at the new TSS */
865*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_TR, nt_sel);
866*4c87aefeSPatrick Mooney 
867*4c87aefeSPatrick Mooney 	/* Update the hidden descriptor state of the task register */
868*4c87aefeSPatrick Mooney 	nt = usd_to_seg_desc(&nt_desc);
869*4c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_TR, &nt);
870*4c87aefeSPatrick Mooney 
871*4c87aefeSPatrick Mooney 	/* Set CR0.TS */
872*4c87aefeSPatrick Mooney 	cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
873*4c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS);
874*4c87aefeSPatrick Mooney 
875*4c87aefeSPatrick Mooney 	/*
876*4c87aefeSPatrick Mooney 	 * We are now committed to the task switch. Any exceptions encountered
877*4c87aefeSPatrick Mooney 	 * after this point will be handled in the context of the new task and
878*4c87aefeSPatrick Mooney 	 * the saved instruction pointer will belong to the new task.
879*4c87aefeSPatrick Mooney 	 */
880*4c87aefeSPatrick Mooney 	error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, newtss.tss_eip);
881*4c87aefeSPatrick Mooney 	assert(error == 0);
882*4c87aefeSPatrick Mooney 
883*4c87aefeSPatrick Mooney 	/* Load processor state from new TSS */
884*4c87aefeSPatrick Mooney 	error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov,
885*4c87aefeSPatrick Mooney 	    &fault);
886*4c87aefeSPatrick Mooney 	CHKERR(error, fault);
887*4c87aefeSPatrick Mooney 
888*4c87aefeSPatrick Mooney 	/*
889*4c87aefeSPatrick Mooney 	 * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
890*4c87aefeSPatrick Mooney 	 * caused an error code to be generated, this error code is copied
891*4c87aefeSPatrick Mooney 	 * to the stack of the new task.
892*4c87aefeSPatrick Mooney 	 */
893*4c87aefeSPatrick Mooney 	if (task_switch->errcode_valid) {
894*4c87aefeSPatrick Mooney 		assert(task_switch->ext);
895*4c87aefeSPatrick Mooney 		assert(task_switch->reason == TSR_IDT_GATE);
896*4c87aefeSPatrick Mooney 		error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type,
897*4c87aefeSPatrick Mooney 		    task_switch->errcode, &fault);
898*4c87aefeSPatrick Mooney 		CHKERR(error, fault);
899*4c87aefeSPatrick Mooney 	}
900*4c87aefeSPatrick Mooney 
901*4c87aefeSPatrick Mooney 	/*
902*4c87aefeSPatrick Mooney 	 * Treatment of virtual-NMI blocking if NMI is delivered through
903*4c87aefeSPatrick Mooney 	 * a task gate.
904*4c87aefeSPatrick Mooney 	 *
905*4c87aefeSPatrick Mooney 	 * Section "Architectural State Before A VM Exit", Intel SDM, Vol3:
906*4c87aefeSPatrick Mooney 	 * If the virtual NMIs VM-execution control is 1, VM entry injects
907*4c87aefeSPatrick Mooney 	 * an NMI, and delivery of the NMI causes a task switch that causes
908*4c87aefeSPatrick Mooney 	 * a VM exit, virtual-NMI blocking is in effect before the VM exit
909*4c87aefeSPatrick Mooney 	 * commences.
910*4c87aefeSPatrick Mooney 	 *
911*4c87aefeSPatrick Mooney 	 * Thus, virtual-NMI blocking is in effect at the time of the task
912*4c87aefeSPatrick Mooney 	 * switch VM exit.
913*4c87aefeSPatrick Mooney 	 */
914*4c87aefeSPatrick Mooney 
915*4c87aefeSPatrick Mooney 	/*
916*4c87aefeSPatrick Mooney 	 * Treatment of virtual-NMI unblocking on IRET from NMI handler task.
917*4c87aefeSPatrick Mooney 	 *
918*4c87aefeSPatrick Mooney 	 * Section "Changes to Instruction Behavior in VMX Non-Root Operation"
919*4c87aefeSPatrick Mooney 	 * If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking.
920*4c87aefeSPatrick Mooney 	 * This unblocking of virtual-NMI occurs even if IRET causes a fault.
921*4c87aefeSPatrick Mooney 	 *
922*4c87aefeSPatrick Mooney 	 * Thus, virtual-NMI blocking is cleared at the time of the task switch
923*4c87aefeSPatrick Mooney 	 * VM exit.
924*4c87aefeSPatrick Mooney 	 */
925*4c87aefeSPatrick Mooney 
926*4c87aefeSPatrick Mooney 	/*
927*4c87aefeSPatrick Mooney 	 * If the task switch was triggered by an event delivered through
928*4c87aefeSPatrick Mooney 	 * the IDT then extinguish the pending event from the vcpu's
929*4c87aefeSPatrick Mooney 	 * exitintinfo.
930*4c87aefeSPatrick Mooney 	 */
931*4c87aefeSPatrick Mooney 	if (task_switch->reason == TSR_IDT_GATE) {
932*4c87aefeSPatrick Mooney 		error = vm_set_intinfo(ctx, vcpu, 0);
933*4c87aefeSPatrick Mooney 		assert(error == 0);
934*4c87aefeSPatrick Mooney 	}
935*4c87aefeSPatrick Mooney 
936*4c87aefeSPatrick Mooney 	/*
937*4c87aefeSPatrick Mooney 	 * XXX should inject debug exception if 'T' bit is 1
938*4c87aefeSPatrick Mooney 	 */
939*4c87aefeSPatrick Mooney done:
940*4c87aefeSPatrick Mooney 	return (VMEXIT_CONTINUE);
941*4c87aefeSPatrick Mooney }
942