xref: /illumos-gate/usr/src/cmd/bhyve/task_switch.c (revision 32640292)
14c87aefeSPatrick Mooney /*-
2*32640292SAndy Fiddaman  * SPDX-License-Identifier: BSD-2-Clause
34c87aefeSPatrick Mooney  *
44c87aefeSPatrick Mooney  * Copyright (c) 2014 Neel Natu <neel@freebsd.org>
54c87aefeSPatrick Mooney  * All rights reserved.
64c87aefeSPatrick Mooney  *
74c87aefeSPatrick Mooney  * Redistribution and use in source and binary forms, with or without
84c87aefeSPatrick Mooney  * modification, are permitted provided that the following conditions
94c87aefeSPatrick Mooney  * are met:
104c87aefeSPatrick Mooney  * 1. Redistributions of source code must retain the above copyright
114c87aefeSPatrick Mooney  *    notice, this list of conditions and the following disclaimer.
124c87aefeSPatrick Mooney  * 2. Redistributions in binary form must reproduce the above copyright
134c87aefeSPatrick Mooney  *    notice, this list of conditions and the following disclaimer in the
144c87aefeSPatrick Mooney  *    documentation and/or other materials provided with the distribution.
154c87aefeSPatrick Mooney  *
164c87aefeSPatrick Mooney  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
174c87aefeSPatrick Mooney  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
184c87aefeSPatrick Mooney  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
194c87aefeSPatrick Mooney  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
204c87aefeSPatrick Mooney  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
214c87aefeSPatrick Mooney  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
224c87aefeSPatrick Mooney  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
234c87aefeSPatrick Mooney  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
244c87aefeSPatrick Mooney  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
254c87aefeSPatrick Mooney  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
264c87aefeSPatrick Mooney  * SUCH DAMAGE.
274c87aefeSPatrick Mooney  */
28e0c0d44eSPatrick Mooney /*
29e0c0d44eSPatrick Mooney  * This file and its contents are supplied under the terms of the
30e0c0d44eSPatrick Mooney  * Common Development and Distribution License ("CDDL"), version 1.0.
31e0c0d44eSPatrick Mooney  * You may only use this file in accordance with the terms of version
32e0c0d44eSPatrick Mooney  * 1.0 of the CDDL.
33e0c0d44eSPatrick Mooney  *
34e0c0d44eSPatrick Mooney  * A full copy of the text of the CDDL should have accompanied this
35e0c0d44eSPatrick Mooney  * source.  A copy of the CDDL is also available via the Internet at
36e0c0d44eSPatrick Mooney  * http://www.illumos.org/license/CDDL.
37e0c0d44eSPatrick Mooney  *
38e0c0d44eSPatrick Mooney  * Copyright 2020 Oxide Computer Company
39e0c0d44eSPatrick Mooney  */
404c87aefeSPatrick Mooney 
414c87aefeSPatrick Mooney #include <sys/cdefs.h>
424c87aefeSPatrick Mooney 
434c87aefeSPatrick Mooney #include <sys/param.h>
444c87aefeSPatrick Mooney #include <sys/_iovec.h>
454c87aefeSPatrick Mooney #include <sys/mman.h>
464c87aefeSPatrick Mooney 
474c87aefeSPatrick Mooney #include <x86/psl.h>
484c87aefeSPatrick Mooney #include <x86/segments.h>
494c87aefeSPatrick Mooney #include <x86/specialreg.h>
504c87aefeSPatrick Mooney #include <machine/vmm.h>
514c87aefeSPatrick Mooney 
524c87aefeSPatrick Mooney #include <assert.h>
534c87aefeSPatrick Mooney #include <errno.h>
544c87aefeSPatrick Mooney #include <stdbool.h>
554c87aefeSPatrick Mooney #include <stdio.h>
564c87aefeSPatrick Mooney #include <stdlib.h>
574c87aefeSPatrick Mooney 
584c87aefeSPatrick Mooney #include <vmmapi.h>
594c87aefeSPatrick Mooney 
604c87aefeSPatrick Mooney #include "bhyverun.h"
61154972afSPatrick Mooney #include "debug.h"
624c87aefeSPatrick Mooney 
634c87aefeSPatrick Mooney /*
644c87aefeSPatrick Mooney  * Using 'struct i386tss' is tempting but causes myriad sign extension
654c87aefeSPatrick Mooney  * issues because all of its fields are defined as signed integers.
664c87aefeSPatrick Mooney  */
674c87aefeSPatrick Mooney struct tss32 {
684c87aefeSPatrick Mooney 	uint16_t	tss_link;
694c87aefeSPatrick Mooney 	uint16_t	rsvd1;
704c87aefeSPatrick Mooney 	uint32_t	tss_esp0;
714c87aefeSPatrick Mooney 	uint16_t	tss_ss0;
724c87aefeSPatrick Mooney 	uint16_t	rsvd2;
734c87aefeSPatrick Mooney 	uint32_t	tss_esp1;
744c87aefeSPatrick Mooney 	uint16_t	tss_ss1;
754c87aefeSPatrick Mooney 	uint16_t	rsvd3;
764c87aefeSPatrick Mooney 	uint32_t	tss_esp2;
774c87aefeSPatrick Mooney 	uint16_t	tss_ss2;
784c87aefeSPatrick Mooney 	uint16_t	rsvd4;
794c87aefeSPatrick Mooney 	uint32_t	tss_cr3;
804c87aefeSPatrick Mooney 	uint32_t	tss_eip;
814c87aefeSPatrick Mooney 	uint32_t	tss_eflags;
824c87aefeSPatrick Mooney 	uint32_t	tss_eax;
834c87aefeSPatrick Mooney 	uint32_t	tss_ecx;
844c87aefeSPatrick Mooney 	uint32_t	tss_edx;
854c87aefeSPatrick Mooney 	uint32_t	tss_ebx;
864c87aefeSPatrick Mooney 	uint32_t	tss_esp;
874c87aefeSPatrick Mooney 	uint32_t	tss_ebp;
884c87aefeSPatrick Mooney 	uint32_t	tss_esi;
894c87aefeSPatrick Mooney 	uint32_t	tss_edi;
904c87aefeSPatrick Mooney 	uint16_t	tss_es;
914c87aefeSPatrick Mooney 	uint16_t	rsvd5;
924c87aefeSPatrick Mooney 	uint16_t	tss_cs;
934c87aefeSPatrick Mooney 	uint16_t	rsvd6;
944c87aefeSPatrick Mooney 	uint16_t	tss_ss;
954c87aefeSPatrick Mooney 	uint16_t	rsvd7;
964c87aefeSPatrick Mooney 	uint16_t	tss_ds;
974c87aefeSPatrick Mooney 	uint16_t	rsvd8;
984c87aefeSPatrick Mooney 	uint16_t	tss_fs;
994c87aefeSPatrick Mooney 	uint16_t	rsvd9;
1004c87aefeSPatrick Mooney 	uint16_t	tss_gs;
1014c87aefeSPatrick Mooney 	uint16_t	rsvd10;
1024c87aefeSPatrick Mooney 	uint16_t	tss_ldt;
1034c87aefeSPatrick Mooney 	uint16_t	rsvd11;
1044c87aefeSPatrick Mooney 	uint16_t	tss_trap;
1054c87aefeSPatrick Mooney 	uint16_t	tss_iomap;
1064c87aefeSPatrick Mooney };
1074c87aefeSPatrick Mooney static_assert(sizeof(struct tss32) == 104, "compile-time assertion failed");
1084c87aefeSPatrick Mooney 
1094c87aefeSPatrick Mooney #define	SEL_START(sel)	(((sel) & ~0x7))
1104c87aefeSPatrick Mooney #define	SEL_LIMIT(sel)	(((sel) | 0x7))
1114c87aefeSPatrick Mooney #define	TSS_BUSY(type)	(((type) & 0x2) != 0)
1124c87aefeSPatrick Mooney 
1134c87aefeSPatrick Mooney static uint64_t
GETREG(struct vcpu * vcpu,int reg)114*32640292SAndy Fiddaman GETREG(struct vcpu *vcpu, int reg)
1154c87aefeSPatrick Mooney {
1164c87aefeSPatrick Mooney 	uint64_t val;
1174c87aefeSPatrick Mooney 	int error;
1184c87aefeSPatrick Mooney 
119*32640292SAndy Fiddaman 	error = vm_get_register(vcpu, reg, &val);
1204c87aefeSPatrick Mooney 	assert(error == 0);
1214c87aefeSPatrick Mooney 	return (val);
1224c87aefeSPatrick Mooney }
1234c87aefeSPatrick Mooney 
1244c87aefeSPatrick Mooney static void
SETREG(struct vcpu * vcpu,int reg,uint64_t val)125*32640292SAndy Fiddaman SETREG(struct vcpu *vcpu, int reg, uint64_t val)
1264c87aefeSPatrick Mooney {
1274c87aefeSPatrick Mooney 	int error;
1284c87aefeSPatrick Mooney 
129*32640292SAndy Fiddaman 	error = vm_set_register(vcpu, reg, val);
1304c87aefeSPatrick Mooney 	assert(error == 0);
1314c87aefeSPatrick Mooney }
1324c87aefeSPatrick Mooney 
1334c87aefeSPatrick Mooney static struct seg_desc
usd_to_seg_desc(struct user_segment_descriptor * usd)1344c87aefeSPatrick Mooney usd_to_seg_desc(struct user_segment_descriptor *usd)
1354c87aefeSPatrick Mooney {
1364c87aefeSPatrick Mooney 	struct seg_desc seg_desc;
1374c87aefeSPatrick Mooney 
1384c87aefeSPatrick Mooney 	seg_desc.base = (u_int)USD_GETBASE(usd);
1394c87aefeSPatrick Mooney 	if (usd->sd_gran)
1404c87aefeSPatrick Mooney 		seg_desc.limit = (u_int)(USD_GETLIMIT(usd) << 12) | 0xfff;
1414c87aefeSPatrick Mooney 	else
1424c87aefeSPatrick Mooney 		seg_desc.limit = (u_int)USD_GETLIMIT(usd);
1434c87aefeSPatrick Mooney 	seg_desc.access = usd->sd_type | usd->sd_dpl << 5 | usd->sd_p << 7;
1444c87aefeSPatrick Mooney 	seg_desc.access |= usd->sd_xx << 12;
1454c87aefeSPatrick Mooney 	seg_desc.access |= usd->sd_def32 << 14;
1464c87aefeSPatrick Mooney 	seg_desc.access |= usd->sd_gran << 15;
1474c87aefeSPatrick Mooney 
1484c87aefeSPatrick Mooney 	return (seg_desc);
1494c87aefeSPatrick Mooney }
1504c87aefeSPatrick Mooney 
1514c87aefeSPatrick Mooney /*
1524c87aefeSPatrick Mooney  * Inject an exception with an error code that is a segment selector.
1534c87aefeSPatrick Mooney  * The format of the error code is described in section 6.13, "Error Code",
1544c87aefeSPatrick Mooney  * Intel SDM volume 3.
1554c87aefeSPatrick Mooney  *
1564c87aefeSPatrick Mooney  * Bit 0 (EXT) denotes whether the exception occurred during delivery
1574c87aefeSPatrick Mooney  * of an external event like an interrupt.
1584c87aefeSPatrick Mooney  *
1594c87aefeSPatrick Mooney  * Bit 1 (IDT) indicates whether the selector points to a gate descriptor
1604c87aefeSPatrick Mooney  * in the IDT.
1614c87aefeSPatrick Mooney  *
1624c87aefeSPatrick Mooney  * Bit 2(GDT/LDT) has the usual interpretation of Table Indicator (TI).
1634c87aefeSPatrick Mooney  */
1644c87aefeSPatrick Mooney static void
sel_exception(struct vcpu * vcpu,int vector,uint16_t sel,int ext)165*32640292SAndy Fiddaman sel_exception(struct vcpu *vcpu, int vector, uint16_t sel, int ext)
1664c87aefeSPatrick Mooney {
1674c87aefeSPatrick Mooney 	/*
1684c87aefeSPatrick Mooney 	 * Bit 2 from the selector is retained as-is in the error code.
1694c87aefeSPatrick Mooney 	 *
1704c87aefeSPatrick Mooney 	 * Bit 1 can be safely cleared because none of the selectors
1714c87aefeSPatrick Mooney 	 * encountered during task switch emulation refer to a task
1724c87aefeSPatrick Mooney 	 * gate in the IDT.
1734c87aefeSPatrick Mooney 	 *
1744c87aefeSPatrick Mooney 	 * Bit 0 is set depending on the value of 'ext'.
1754c87aefeSPatrick Mooney 	 */
1764c87aefeSPatrick Mooney 	sel &= ~0x3;
1774c87aefeSPatrick Mooney 	if (ext)
1784c87aefeSPatrick Mooney 		sel |= 0x1;
179*32640292SAndy Fiddaman 	vm_inject_fault(vcpu, vector, 1, sel);
1804c87aefeSPatrick Mooney }
1814c87aefeSPatrick Mooney 
1824c87aefeSPatrick Mooney /*
1834c87aefeSPatrick Mooney  * Return 0 if the selector 'sel' in within the limits of the GDT/LDT
1844c87aefeSPatrick Mooney  * and non-zero otherwise.
1854c87aefeSPatrick Mooney  */
1864c87aefeSPatrick Mooney static int
desc_table_limit_check(struct vcpu * vcpu,uint16_t sel)187*32640292SAndy Fiddaman desc_table_limit_check(struct vcpu *vcpu, uint16_t sel)
1884c87aefeSPatrick Mooney {
1894c87aefeSPatrick Mooney 	uint64_t base;
1904c87aefeSPatrick Mooney 	uint32_t limit, access;
1914c87aefeSPatrick Mooney 	int error, reg;
1924c87aefeSPatrick Mooney 
1934c87aefeSPatrick Mooney 	reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
194*32640292SAndy Fiddaman 	error = vm_get_desc(vcpu, reg, &base, &limit, &access);
1954c87aefeSPatrick Mooney 	assert(error == 0);
1964c87aefeSPatrick Mooney 
1974c87aefeSPatrick Mooney 	if (reg == VM_REG_GUEST_LDTR) {
1984c87aefeSPatrick Mooney 		if (SEG_DESC_UNUSABLE(access) || !SEG_DESC_PRESENT(access))
1994c87aefeSPatrick Mooney 			return (-1);
2004c87aefeSPatrick Mooney 	}
2014c87aefeSPatrick Mooney 
2024c87aefeSPatrick Mooney 	if (limit < SEL_LIMIT(sel))
2034c87aefeSPatrick Mooney 		return (-1);
2044c87aefeSPatrick Mooney 	else
2054c87aefeSPatrick Mooney 		return (0);
2064c87aefeSPatrick Mooney }
2074c87aefeSPatrick Mooney 
2084c87aefeSPatrick Mooney /*
2094c87aefeSPatrick Mooney  * Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced
2104c87aefeSPatrick Mooney  * by the selector 'sel'.
2114c87aefeSPatrick Mooney  *
2124c87aefeSPatrick Mooney  * Returns 0 on success.
2134c87aefeSPatrick Mooney  * Returns 1 if an exception was injected into the guest.
2144c87aefeSPatrick Mooney  * Returns -1 otherwise.
2154c87aefeSPatrick Mooney  */
2164c87aefeSPatrick Mooney static int
desc_table_rw(struct vcpu * vcpu,struct vm_guest_paging * paging,uint16_t sel,struct user_segment_descriptor * desc,bool doread,int * faultptr)217*32640292SAndy Fiddaman desc_table_rw(struct vcpu *vcpu, struct vm_guest_paging *paging,
2184c87aefeSPatrick Mooney     uint16_t sel, struct user_segment_descriptor *desc, bool doread,
2194c87aefeSPatrick Mooney     int *faultptr)
2204c87aefeSPatrick Mooney {
2214c87aefeSPatrick Mooney 	struct iovec iov[2];
2224c87aefeSPatrick Mooney 	uint64_t base;
2234c87aefeSPatrick Mooney 	uint32_t limit, access;
2244c87aefeSPatrick Mooney 	int error, reg;
2254c87aefeSPatrick Mooney 
2264c87aefeSPatrick Mooney 	reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
227*32640292SAndy Fiddaman 	error = vm_get_desc(vcpu, reg, &base, &limit, &access);
2284c87aefeSPatrick Mooney 	assert(error == 0);
2294c87aefeSPatrick Mooney 	assert(limit >= SEL_LIMIT(sel));
2304c87aefeSPatrick Mooney 
231*32640292SAndy Fiddaman 	error = vm_copy_setup(vcpu, paging, base + SEL_START(sel),
2324c87aefeSPatrick Mooney 	    sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov),
2334c87aefeSPatrick Mooney 	    faultptr);
2344c87aefeSPatrick Mooney 	if (error || *faultptr)
2354c87aefeSPatrick Mooney 		return (error);
2364c87aefeSPatrick Mooney 
2374c87aefeSPatrick Mooney 	if (doread)
238*32640292SAndy Fiddaman 		vm_copyin(iov, desc, sizeof(*desc));
2394c87aefeSPatrick Mooney 	else
240*32640292SAndy Fiddaman 		vm_copyout(desc, iov, sizeof(*desc));
2414c87aefeSPatrick Mooney 	return (0);
2424c87aefeSPatrick Mooney }
2434c87aefeSPatrick Mooney 
2444c87aefeSPatrick Mooney static int
desc_table_read(struct vcpu * vcpu,struct vm_guest_paging * paging,uint16_t sel,struct user_segment_descriptor * desc,int * faultptr)245*32640292SAndy Fiddaman desc_table_read(struct vcpu *vcpu, struct vm_guest_paging *paging,
2464c87aefeSPatrick Mooney     uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
2474c87aefeSPatrick Mooney {
248*32640292SAndy Fiddaman 	return (desc_table_rw(vcpu, paging, sel, desc, true, faultptr));
2494c87aefeSPatrick Mooney }
2504c87aefeSPatrick Mooney 
2514c87aefeSPatrick Mooney static int
desc_table_write(struct vcpu * vcpu,struct vm_guest_paging * paging,uint16_t sel,struct user_segment_descriptor * desc,int * faultptr)252*32640292SAndy Fiddaman desc_table_write(struct vcpu *vcpu, struct vm_guest_paging *paging,
2534c87aefeSPatrick Mooney     uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
2544c87aefeSPatrick Mooney {
255*32640292SAndy Fiddaman 	return (desc_table_rw(vcpu, paging, sel, desc, false, faultptr));
2564c87aefeSPatrick Mooney }
2574c87aefeSPatrick Mooney 
2584c87aefeSPatrick Mooney /*
2594c87aefeSPatrick Mooney  * Read the TSS descriptor referenced by 'sel' into 'desc'.
2604c87aefeSPatrick Mooney  *
2614c87aefeSPatrick Mooney  * Returns 0 on success.
2624c87aefeSPatrick Mooney  * Returns 1 if an exception was injected into the guest.
2634c87aefeSPatrick Mooney  * Returns -1 otherwise.
2644c87aefeSPatrick Mooney  */
2654c87aefeSPatrick Mooney static int
read_tss_descriptor(struct vcpu * vcpu,struct vm_task_switch * ts,uint16_t sel,struct user_segment_descriptor * desc,int * faultptr)266*32640292SAndy Fiddaman read_tss_descriptor(struct vcpu *vcpu, struct vm_task_switch *ts,
2674c87aefeSPatrick Mooney     uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
2684c87aefeSPatrick Mooney {
2694c87aefeSPatrick Mooney 	struct vm_guest_paging sup_paging;
2704c87aefeSPatrick Mooney 	int error;
2714c87aefeSPatrick Mooney 
2724c87aefeSPatrick Mooney 	assert(!ISLDT(sel));
2734c87aefeSPatrick Mooney 	assert(IDXSEL(sel) != 0);
2744c87aefeSPatrick Mooney 
2754c87aefeSPatrick Mooney 	/* Fetch the new TSS descriptor */
276*32640292SAndy Fiddaman 	if (desc_table_limit_check(vcpu, sel)) {
2774c87aefeSPatrick Mooney 		if (ts->reason == TSR_IRET)
278*32640292SAndy Fiddaman 			sel_exception(vcpu, IDT_TS, sel, ts->ext);
2794c87aefeSPatrick Mooney 		else
280*32640292SAndy Fiddaman 			sel_exception(vcpu, IDT_GP, sel, ts->ext);
2814c87aefeSPatrick Mooney 		return (1);
2824c87aefeSPatrick Mooney 	}
2834c87aefeSPatrick Mooney 
2844c87aefeSPatrick Mooney 	sup_paging = ts->paging;
2854c87aefeSPatrick Mooney 	sup_paging.cpl = 0;		/* implicit supervisor mode */
286*32640292SAndy Fiddaman 	error = desc_table_read(vcpu, &sup_paging, sel, desc, faultptr);
2874c87aefeSPatrick Mooney 	return (error);
2884c87aefeSPatrick Mooney }
2894c87aefeSPatrick Mooney 
2904c87aefeSPatrick Mooney static bool
code_desc(int sd_type)2914c87aefeSPatrick Mooney code_desc(int sd_type)
2924c87aefeSPatrick Mooney {
2934c87aefeSPatrick Mooney 	/* code descriptor */
2944c87aefeSPatrick Mooney 	return ((sd_type & 0x18) == 0x18);
2954c87aefeSPatrick Mooney }
2964c87aefeSPatrick Mooney 
2974c87aefeSPatrick Mooney static bool
stack_desc(int sd_type)2984c87aefeSPatrick Mooney stack_desc(int sd_type)
2994c87aefeSPatrick Mooney {
3004c87aefeSPatrick Mooney 	/* writable data descriptor */
3014c87aefeSPatrick Mooney 	return ((sd_type & 0x1A) == 0x12);
3024c87aefeSPatrick Mooney }
3034c87aefeSPatrick Mooney 
3044c87aefeSPatrick Mooney static bool
data_desc(int sd_type)3054c87aefeSPatrick Mooney data_desc(int sd_type)
3064c87aefeSPatrick Mooney {
3074c87aefeSPatrick Mooney 	/* data descriptor or a readable code descriptor */
3084c87aefeSPatrick Mooney 	return ((sd_type & 0x18) == 0x10 || (sd_type & 0x1A) == 0x1A);
3094c87aefeSPatrick Mooney }
3104c87aefeSPatrick Mooney 
3114c87aefeSPatrick Mooney static bool
ldt_desc(int sd_type)3124c87aefeSPatrick Mooney ldt_desc(int sd_type)
3134c87aefeSPatrick Mooney {
3144c87aefeSPatrick Mooney 
3154c87aefeSPatrick Mooney 	return (sd_type == SDT_SYSLDT);
3164c87aefeSPatrick Mooney }
3174c87aefeSPatrick Mooney 
3184c87aefeSPatrick Mooney /*
3194c87aefeSPatrick Mooney  * Validate the descriptor 'seg_desc' associated with 'segment'.
3204c87aefeSPatrick Mooney  */
3214c87aefeSPatrick Mooney static int
validate_seg_desc(struct vcpu * vcpu,struct vm_task_switch * ts,int segment,struct seg_desc * seg_desc,int * faultptr)322*32640292SAndy Fiddaman validate_seg_desc(struct vcpu *vcpu, struct vm_task_switch *ts,
3234c87aefeSPatrick Mooney     int segment, struct seg_desc *seg_desc, int *faultptr)
3244c87aefeSPatrick Mooney {
3254c87aefeSPatrick Mooney 	struct vm_guest_paging sup_paging;
3264c87aefeSPatrick Mooney 	struct user_segment_descriptor usd;
3274c87aefeSPatrick Mooney 	int error, idtvec;
3284c87aefeSPatrick Mooney 	int cpl, dpl, rpl;
3294c87aefeSPatrick Mooney 	uint16_t sel, cs;
3304c87aefeSPatrick Mooney 	bool ldtseg, codeseg, stackseg, dataseg, conforming;
3314c87aefeSPatrick Mooney 
3324c87aefeSPatrick Mooney 	ldtseg = codeseg = stackseg = dataseg = false;
3334c87aefeSPatrick Mooney 	switch (segment) {
3344c87aefeSPatrick Mooney 	case VM_REG_GUEST_LDTR:
3354c87aefeSPatrick Mooney 		ldtseg = true;
3364c87aefeSPatrick Mooney 		break;
3374c87aefeSPatrick Mooney 	case VM_REG_GUEST_CS:
3384c87aefeSPatrick Mooney 		codeseg = true;
3394c87aefeSPatrick Mooney 		break;
3404c87aefeSPatrick Mooney 	case VM_REG_GUEST_SS:
3414c87aefeSPatrick Mooney 		stackseg = true;
3424c87aefeSPatrick Mooney 		break;
3434c87aefeSPatrick Mooney 	case VM_REG_GUEST_DS:
3444c87aefeSPatrick Mooney 	case VM_REG_GUEST_ES:
3454c87aefeSPatrick Mooney 	case VM_REG_GUEST_FS:
3464c87aefeSPatrick Mooney 	case VM_REG_GUEST_GS:
3474c87aefeSPatrick Mooney 		dataseg = true;
3484c87aefeSPatrick Mooney 		break;
3494c87aefeSPatrick Mooney 	default:
3504c87aefeSPatrick Mooney 		assert(0);
3514c87aefeSPatrick Mooney 	}
3524c87aefeSPatrick Mooney 
3534c87aefeSPatrick Mooney 	/* Get the segment selector */
354*32640292SAndy Fiddaman 	sel = GETREG(vcpu, segment);
3554c87aefeSPatrick Mooney 
3564c87aefeSPatrick Mooney 	/* LDT selector must point into the GDT */
3574c87aefeSPatrick Mooney 	if (ldtseg && ISLDT(sel)) {
358*32640292SAndy Fiddaman 		sel_exception(vcpu, IDT_TS, sel, ts->ext);
3594c87aefeSPatrick Mooney 		return (1);
3604c87aefeSPatrick Mooney 	}
3614c87aefeSPatrick Mooney 
3624c87aefeSPatrick Mooney 	/* Descriptor table limit check */
363*32640292SAndy Fiddaman 	if (desc_table_limit_check(vcpu, sel)) {
364*32640292SAndy Fiddaman 		sel_exception(vcpu, IDT_TS, sel, ts->ext);
3654c87aefeSPatrick Mooney 		return (1);
3664c87aefeSPatrick Mooney 	}
3674c87aefeSPatrick Mooney 
3684c87aefeSPatrick Mooney 	/* NULL selector */
3694c87aefeSPatrick Mooney 	if (IDXSEL(sel) == 0) {
3704c87aefeSPatrick Mooney 		/* Code and stack segment selectors cannot be NULL */
3714c87aefeSPatrick Mooney 		if (codeseg || stackseg) {
372*32640292SAndy Fiddaman 			sel_exception(vcpu, IDT_TS, sel, ts->ext);
3734c87aefeSPatrick Mooney 			return (1);
3744c87aefeSPatrick Mooney 		}
3754c87aefeSPatrick Mooney 		seg_desc->base = 0;
3764c87aefeSPatrick Mooney 		seg_desc->limit = 0;
3774c87aefeSPatrick Mooney 		seg_desc->access = 0x10000;	/* unusable */
3784c87aefeSPatrick Mooney 		return (0);
3794c87aefeSPatrick Mooney 	}
3804c87aefeSPatrick Mooney 
3814c87aefeSPatrick Mooney 	/* Read the descriptor from the GDT/LDT */
3824c87aefeSPatrick Mooney 	sup_paging = ts->paging;
3834c87aefeSPatrick Mooney 	sup_paging.cpl = 0;	/* implicit supervisor mode */
384*32640292SAndy Fiddaman 	error = desc_table_read(vcpu, &sup_paging, sel, &usd, faultptr);
3854c87aefeSPatrick Mooney 	if (error || *faultptr)
3864c87aefeSPatrick Mooney 		return (error);
3874c87aefeSPatrick Mooney 
3884c87aefeSPatrick Mooney 	/* Verify that the descriptor type is compatible with the segment */
3894c87aefeSPatrick Mooney 	if ((ldtseg && !ldt_desc(usd.sd_type)) ||
3904c87aefeSPatrick Mooney 	    (codeseg && !code_desc(usd.sd_type)) ||
3914c87aefeSPatrick Mooney 	    (dataseg && !data_desc(usd.sd_type)) ||
3924c87aefeSPatrick Mooney 	    (stackseg && !stack_desc(usd.sd_type))) {
393*32640292SAndy Fiddaman 		sel_exception(vcpu, IDT_TS, sel, ts->ext);
3944c87aefeSPatrick Mooney 		return (1);
3954c87aefeSPatrick Mooney 	}
3964c87aefeSPatrick Mooney 
3974c87aefeSPatrick Mooney 	/* Segment must be marked present */
3984c87aefeSPatrick Mooney 	if (!usd.sd_p) {
3994c87aefeSPatrick Mooney 		if (ldtseg)
4004c87aefeSPatrick Mooney 			idtvec = IDT_TS;
4014c87aefeSPatrick Mooney 		else if (stackseg)
4024c87aefeSPatrick Mooney 			idtvec = IDT_SS;
4034c87aefeSPatrick Mooney 		else
4044c87aefeSPatrick Mooney 			idtvec = IDT_NP;
405*32640292SAndy Fiddaman 		sel_exception(vcpu, idtvec, sel, ts->ext);
4064c87aefeSPatrick Mooney 		return (1);
4074c87aefeSPatrick Mooney 	}
4084c87aefeSPatrick Mooney 
409*32640292SAndy Fiddaman 	cs = GETREG(vcpu, VM_REG_GUEST_CS);
4104c87aefeSPatrick Mooney 	cpl = cs & SEL_RPL_MASK;
4114c87aefeSPatrick Mooney 	rpl = sel & SEL_RPL_MASK;
4124c87aefeSPatrick Mooney 	dpl = usd.sd_dpl;
4134c87aefeSPatrick Mooney 
4144c87aefeSPatrick Mooney 	if (stackseg && (rpl != cpl || dpl != cpl)) {
415*32640292SAndy Fiddaman 		sel_exception(vcpu, IDT_TS, sel, ts->ext);
4164c87aefeSPatrick Mooney 		return (1);
4174c87aefeSPatrick Mooney 	}
4184c87aefeSPatrick Mooney 
4194c87aefeSPatrick Mooney 	if (codeseg) {
4204c87aefeSPatrick Mooney 		conforming = (usd.sd_type & 0x4) ? true : false;
4214c87aefeSPatrick Mooney 		if ((conforming && (cpl < dpl)) ||
4224c87aefeSPatrick Mooney 		    (!conforming && (cpl != dpl))) {
423*32640292SAndy Fiddaman 			sel_exception(vcpu, IDT_TS, sel, ts->ext);
4244c87aefeSPatrick Mooney 			return (1);
4254c87aefeSPatrick Mooney 		}
4264c87aefeSPatrick Mooney 	}
4274c87aefeSPatrick Mooney 
4284c87aefeSPatrick Mooney 	if (dataseg) {
4294c87aefeSPatrick Mooney 		/*
4304c87aefeSPatrick Mooney 		 * A data segment is always non-conforming except when it's
4314c87aefeSPatrick Mooney 		 * descriptor is a readable, conforming code segment.
4324c87aefeSPatrick Mooney 		 */
4334c87aefeSPatrick Mooney 		if (code_desc(usd.sd_type) && (usd.sd_type & 0x4) != 0)
4344c87aefeSPatrick Mooney 			conforming = true;
4354c87aefeSPatrick Mooney 		else
4364c87aefeSPatrick Mooney 			conforming = false;
4374c87aefeSPatrick Mooney 
4384c87aefeSPatrick Mooney 		if (!conforming && (rpl > dpl || cpl > dpl)) {
439*32640292SAndy Fiddaman 			sel_exception(vcpu, IDT_TS, sel, ts->ext);
4404c87aefeSPatrick Mooney 			return (1);
4414c87aefeSPatrick Mooney 		}
4424c87aefeSPatrick Mooney 	}
4434c87aefeSPatrick Mooney 	*seg_desc = usd_to_seg_desc(&usd);
4444c87aefeSPatrick Mooney 	return (0);
4454c87aefeSPatrick Mooney }
4464c87aefeSPatrick Mooney 
4474c87aefeSPatrick Mooney static void
tss32_save(struct vcpu * vcpu,struct vm_task_switch * task_switch,uint32_t eip,struct tss32 * tss,struct iovec * iov)448*32640292SAndy Fiddaman tss32_save(struct vcpu *vcpu, struct vm_task_switch *task_switch,
4494c87aefeSPatrick Mooney     uint32_t eip, struct tss32 *tss, struct iovec *iov)
4504c87aefeSPatrick Mooney {
4514c87aefeSPatrick Mooney 
4524c87aefeSPatrick Mooney 	/* General purpose registers */
453*32640292SAndy Fiddaman 	tss->tss_eax = GETREG(vcpu, VM_REG_GUEST_RAX);
454*32640292SAndy Fiddaman 	tss->tss_ecx = GETREG(vcpu, VM_REG_GUEST_RCX);
455*32640292SAndy Fiddaman 	tss->tss_edx = GETREG(vcpu, VM_REG_GUEST_RDX);
456*32640292SAndy Fiddaman 	tss->tss_ebx = GETREG(vcpu, VM_REG_GUEST_RBX);
457*32640292SAndy Fiddaman 	tss->tss_esp = GETREG(vcpu, VM_REG_GUEST_RSP);
458*32640292SAndy Fiddaman 	tss->tss_ebp = GETREG(vcpu, VM_REG_GUEST_RBP);
459*32640292SAndy Fiddaman 	tss->tss_esi = GETREG(vcpu, VM_REG_GUEST_RSI);
460*32640292SAndy Fiddaman 	tss->tss_edi = GETREG(vcpu, VM_REG_GUEST_RDI);
4614c87aefeSPatrick Mooney 
4624c87aefeSPatrick Mooney 	/* Segment selectors */
463*32640292SAndy Fiddaman 	tss->tss_es = GETREG(vcpu, VM_REG_GUEST_ES);
464*32640292SAndy Fiddaman 	tss->tss_cs = GETREG(vcpu, VM_REG_GUEST_CS);
465*32640292SAndy Fiddaman 	tss->tss_ss = GETREG(vcpu, VM_REG_GUEST_SS);
466*32640292SAndy Fiddaman 	tss->tss_ds = GETREG(vcpu, VM_REG_GUEST_DS);
467*32640292SAndy Fiddaman 	tss->tss_fs = GETREG(vcpu, VM_REG_GUEST_FS);
468*32640292SAndy Fiddaman 	tss->tss_gs = GETREG(vcpu, VM_REG_GUEST_GS);
4694c87aefeSPatrick Mooney 
4704c87aefeSPatrick Mooney 	/* eflags and eip */
471*32640292SAndy Fiddaman 	tss->tss_eflags = GETREG(vcpu, VM_REG_GUEST_RFLAGS);
4724c87aefeSPatrick Mooney 	if (task_switch->reason == TSR_IRET)
4734c87aefeSPatrick Mooney 		tss->tss_eflags &= ~PSL_NT;
4744c87aefeSPatrick Mooney 	tss->tss_eip = eip;
4754c87aefeSPatrick Mooney 
4764c87aefeSPatrick Mooney 	/* Copy updated old TSS into guest memory */
477*32640292SAndy Fiddaman 	vm_copyout(tss, iov, sizeof(struct tss32));
4784c87aefeSPatrick Mooney }
4794c87aefeSPatrick Mooney 
4804c87aefeSPatrick Mooney static void
update_seg_desc(struct vcpu * vcpu,int reg,struct seg_desc * sd)481*32640292SAndy Fiddaman update_seg_desc(struct vcpu *vcpu, int reg, struct seg_desc *sd)
4824c87aefeSPatrick Mooney {
4834c87aefeSPatrick Mooney 	int error;
4844c87aefeSPatrick Mooney 
485*32640292SAndy Fiddaman 	error = vm_set_desc(vcpu, reg, sd->base, sd->limit, sd->access);
4864c87aefeSPatrick Mooney 	assert(error == 0);
4874c87aefeSPatrick Mooney }
4884c87aefeSPatrick Mooney 
4894c87aefeSPatrick Mooney /*
4904c87aefeSPatrick Mooney  * Update the vcpu registers to reflect the state of the new task.
4914c87aefeSPatrick Mooney  */
4924c87aefeSPatrick Mooney static int
tss32_restore(struct vmctx * ctx,struct vcpu * vcpu,struct vm_task_switch * ts,uint16_t ot_sel,struct tss32 * tss,struct iovec * iov,int * faultptr)493*32640292SAndy Fiddaman tss32_restore(struct vmctx *ctx, struct vcpu *vcpu, struct vm_task_switch *ts,
4944c87aefeSPatrick Mooney     uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr)
4954c87aefeSPatrick Mooney {
4964c87aefeSPatrick Mooney 	struct seg_desc seg_desc, seg_desc2;
4974c87aefeSPatrick Mooney 	uint64_t *pdpte, maxphyaddr, reserved;
4984c87aefeSPatrick Mooney 	uint32_t eflags;
4994c87aefeSPatrick Mooney 	int error, i;
5004c87aefeSPatrick Mooney 	bool nested;
5014c87aefeSPatrick Mooney 
5024c87aefeSPatrick Mooney 	nested = false;
5034c87aefeSPatrick Mooney 	if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) {
5044c87aefeSPatrick Mooney 		tss->tss_link = ot_sel;
5054c87aefeSPatrick Mooney 		nested = true;
5064c87aefeSPatrick Mooney 	}
5074c87aefeSPatrick Mooney 
5084c87aefeSPatrick Mooney 	eflags = tss->tss_eflags;
5094c87aefeSPatrick Mooney 	if (nested)
5104c87aefeSPatrick Mooney 		eflags |= PSL_NT;
5114c87aefeSPatrick Mooney 
5124c87aefeSPatrick Mooney 	/* LDTR */
513*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt);
5144c87aefeSPatrick Mooney 
5154c87aefeSPatrick Mooney 	/* PBDR */
5164c87aefeSPatrick Mooney 	if (ts->paging.paging_mode != PAGING_MODE_FLAT) {
5174c87aefeSPatrick Mooney 		if (ts->paging.paging_mode == PAGING_MODE_PAE) {
5184c87aefeSPatrick Mooney 			/*
5194c87aefeSPatrick Mooney 			 * XXX Assuming 36-bit MAXPHYADDR.
5204c87aefeSPatrick Mooney 			 */
5214c87aefeSPatrick Mooney 			maxphyaddr = (1UL << 36) - 1;
5224c87aefeSPatrick Mooney 			pdpte = paddr_guest2host(ctx, tss->tss_cr3 & ~0x1f, 32);
5234c87aefeSPatrick Mooney 			for (i = 0; i < 4; i++) {
5244c87aefeSPatrick Mooney 				/* Check reserved bits if the PDPTE is valid */
5254c87aefeSPatrick Mooney 				if (!(pdpte[i] & 0x1))
5264c87aefeSPatrick Mooney 					continue;
5274c87aefeSPatrick Mooney 				/*
5284c87aefeSPatrick Mooney 				 * Bits 2:1, 8:5 and bits above the processor's
5294c87aefeSPatrick Mooney 				 * maximum physical address are reserved.
5304c87aefeSPatrick Mooney 				 */
5314c87aefeSPatrick Mooney 				reserved = ~maxphyaddr | 0x1E6;
5324c87aefeSPatrick Mooney 				if (pdpte[i] & reserved) {
533*32640292SAndy Fiddaman 					vm_inject_gp(vcpu);
5344c87aefeSPatrick Mooney 					return (1);
5354c87aefeSPatrick Mooney 				}
5364c87aefeSPatrick Mooney 			}
537*32640292SAndy Fiddaman 			SETREG(vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]);
538*32640292SAndy Fiddaman 			SETREG(vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]);
539*32640292SAndy Fiddaman 			SETREG(vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]);
540*32640292SAndy Fiddaman 			SETREG(vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]);
5414c87aefeSPatrick Mooney 		}
542*32640292SAndy Fiddaman 		SETREG(vcpu, VM_REG_GUEST_CR3, tss->tss_cr3);
5434c87aefeSPatrick Mooney 		ts->paging.cr3 = tss->tss_cr3;
5444c87aefeSPatrick Mooney 	}
5454c87aefeSPatrick Mooney 
5464c87aefeSPatrick Mooney 	/* eflags and eip */
547*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_RFLAGS, eflags);
548*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_RIP, tss->tss_eip);
5494c87aefeSPatrick Mooney 
5504c87aefeSPatrick Mooney 	/* General purpose registers */
551*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_RAX, tss->tss_eax);
552*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_RCX, tss->tss_ecx);
553*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_RDX, tss->tss_edx);
554*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_RBX, tss->tss_ebx);
555*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_RSP, tss->tss_esp);
556*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_RBP, tss->tss_ebp);
557*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_RSI, tss->tss_esi);
558*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_RDI, tss->tss_edi);
5594c87aefeSPatrick Mooney 
5604c87aefeSPatrick Mooney 	/* Segment selectors */
561*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_ES, tss->tss_es);
562*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_CS, tss->tss_cs);
563*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_SS, tss->tss_ss);
564*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_DS, tss->tss_ds);
565*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_FS, tss->tss_fs);
566*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_GS, tss->tss_gs);
5674c87aefeSPatrick Mooney 
5684c87aefeSPatrick Mooney 	/*
5694c87aefeSPatrick Mooney 	 * If this is a nested task then write out the new TSS to update
5704c87aefeSPatrick Mooney 	 * the previous link field.
5714c87aefeSPatrick Mooney 	 */
5724c87aefeSPatrick Mooney 	if (nested)
573*32640292SAndy Fiddaman 		vm_copyout(tss, iov, sizeof(*tss));
5744c87aefeSPatrick Mooney 
5754c87aefeSPatrick Mooney 	/* Validate segment descriptors */
576*32640292SAndy Fiddaman 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc,
5774c87aefeSPatrick Mooney 	    faultptr);
5784c87aefeSPatrick Mooney 	if (error || *faultptr)
5794c87aefeSPatrick Mooney 		return (error);
580*32640292SAndy Fiddaman 	update_seg_desc(vcpu, VM_REG_GUEST_LDTR, &seg_desc);
5814c87aefeSPatrick Mooney 
5824c87aefeSPatrick Mooney 	/*
5834c87aefeSPatrick Mooney 	 * Section "Checks on Guest Segment Registers", Intel SDM, Vol 3.
5844c87aefeSPatrick Mooney 	 *
5854c87aefeSPatrick Mooney 	 * The SS and CS attribute checks on VM-entry are inter-dependent so
5864c87aefeSPatrick Mooney 	 * we need to make sure that both segments are valid before updating
5874c87aefeSPatrick Mooney 	 * either of them. This ensures that the VMCS state can pass the
5884c87aefeSPatrick Mooney 	 * VM-entry checks so the guest can handle any exception injected
5894c87aefeSPatrick Mooney 	 * during task switch emulation.
5904c87aefeSPatrick Mooney 	 */
591*32640292SAndy Fiddaman 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_CS, &seg_desc,
5924c87aefeSPatrick Mooney 	    faultptr);
5934c87aefeSPatrick Mooney 	if (error || *faultptr)
5944c87aefeSPatrick Mooney 		return (error);
5954c87aefeSPatrick Mooney 
596*32640292SAndy Fiddaman 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_SS, &seg_desc2,
5974c87aefeSPatrick Mooney 	    faultptr);
5984c87aefeSPatrick Mooney 	if (error || *faultptr)
5994c87aefeSPatrick Mooney 		return (error);
600*32640292SAndy Fiddaman 	update_seg_desc(vcpu, VM_REG_GUEST_CS, &seg_desc);
601*32640292SAndy Fiddaman 	update_seg_desc(vcpu, VM_REG_GUEST_SS, &seg_desc2);
6024c87aefeSPatrick Mooney 	ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
6034c87aefeSPatrick Mooney 
604*32640292SAndy Fiddaman 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_DS, &seg_desc,
6054c87aefeSPatrick Mooney 	    faultptr);
6064c87aefeSPatrick Mooney 	if (error || *faultptr)
6074c87aefeSPatrick Mooney 		return (error);
608*32640292SAndy Fiddaman 	update_seg_desc(vcpu, VM_REG_GUEST_DS, &seg_desc);
6094c87aefeSPatrick Mooney 
610*32640292SAndy Fiddaman 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_ES, &seg_desc,
6114c87aefeSPatrick Mooney 	    faultptr);
6124c87aefeSPatrick Mooney 	if (error || *faultptr)
6134c87aefeSPatrick Mooney 		return (error);
614*32640292SAndy Fiddaman 	update_seg_desc(vcpu, VM_REG_GUEST_ES, &seg_desc);
6154c87aefeSPatrick Mooney 
616*32640292SAndy Fiddaman 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_FS, &seg_desc,
6174c87aefeSPatrick Mooney 	    faultptr);
6184c87aefeSPatrick Mooney 	if (error || *faultptr)
6194c87aefeSPatrick Mooney 		return (error);
620*32640292SAndy Fiddaman 	update_seg_desc(vcpu, VM_REG_GUEST_FS, &seg_desc);
6214c87aefeSPatrick Mooney 
622*32640292SAndy Fiddaman 	error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_GS, &seg_desc,
6234c87aefeSPatrick Mooney 	    faultptr);
6244c87aefeSPatrick Mooney 	if (error || *faultptr)
6254c87aefeSPatrick Mooney 		return (error);
626*32640292SAndy Fiddaman 	update_seg_desc(vcpu, VM_REG_GUEST_GS, &seg_desc);
6274c87aefeSPatrick Mooney 
6284c87aefeSPatrick Mooney 	return (0);
6294c87aefeSPatrick Mooney }
6304c87aefeSPatrick Mooney 
631e0c0d44eSPatrick Mooney 
632e0c0d44eSPatrick Mooney /*
633e0c0d44eSPatrick Mooney  * Copy of vie_alignment_check() from vmm_instruction_emul.c
634e0c0d44eSPatrick Mooney  */
635e0c0d44eSPatrick Mooney static int
alignment_check(int cpl,int size,uint64_t cr0,uint64_t rf,uint64_t gla)636e0c0d44eSPatrick Mooney alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
637e0c0d44eSPatrick Mooney {
638e0c0d44eSPatrick Mooney 	assert(size == 1 || size == 2 || size == 4 || size == 8);
639e0c0d44eSPatrick Mooney 	assert(cpl >= 0 && cpl <= 3);
640e0c0d44eSPatrick Mooney 
641e0c0d44eSPatrick Mooney 	if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0)
642e0c0d44eSPatrick Mooney 		return (0);
643e0c0d44eSPatrick Mooney 
644e0c0d44eSPatrick Mooney 	return ((gla & (size - 1)) ? 1 : 0);
645e0c0d44eSPatrick Mooney }
646e0c0d44eSPatrick Mooney 
647e0c0d44eSPatrick Mooney /*
648e0c0d44eSPatrick Mooney  * Copy of vie_size2mask() from vmm_instruction_emul.c
649e0c0d44eSPatrick Mooney  */
650e0c0d44eSPatrick Mooney static uint64_t
size2mask(int size)651e0c0d44eSPatrick Mooney size2mask(int size)
652e0c0d44eSPatrick Mooney {
653e0c0d44eSPatrick Mooney 	switch (size) {
654e0c0d44eSPatrick Mooney 	case 1:
655e0c0d44eSPatrick Mooney 		return (0xff);
656e0c0d44eSPatrick Mooney 	case 2:
657e0c0d44eSPatrick Mooney 		return (0xffff);
658e0c0d44eSPatrick Mooney 	case 4:
659e0c0d44eSPatrick Mooney 		return (0xffffffff);
660e0c0d44eSPatrick Mooney 	case 8:
661e0c0d44eSPatrick Mooney 		return (0xffffffffffffffff);
662e0c0d44eSPatrick Mooney 	default:
663e0c0d44eSPatrick Mooney 		assert(0);
664e0c0d44eSPatrick Mooney 		/* not reached */
665e0c0d44eSPatrick Mooney 		return (0);
666e0c0d44eSPatrick Mooney 	}
667e0c0d44eSPatrick Mooney }
668e0c0d44eSPatrick Mooney 
669e0c0d44eSPatrick Mooney /*
670e0c0d44eSPatrick Mooney  * Copy of vie_calculate_gla() from vmm_instruction_emul.c
671e0c0d44eSPatrick Mooney  */
672e0c0d44eSPatrick Mooney static int
calculate_gla(enum vm_cpu_mode cpu_mode,enum vm_reg_name seg,struct seg_desc * desc,uint64_t offset,int length,int addrsize,int prot,uint64_t * gla)673e0c0d44eSPatrick Mooney calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
674e0c0d44eSPatrick Mooney     struct seg_desc *desc, uint64_t offset, int length, int addrsize,
675e0c0d44eSPatrick Mooney     int prot, uint64_t *gla)
676e0c0d44eSPatrick Mooney {
677e0c0d44eSPatrick Mooney 	uint64_t firstoff, low_limit, high_limit, segbase;
678e0c0d44eSPatrick Mooney 	int glasize, type;
679e0c0d44eSPatrick Mooney 
680e0c0d44eSPatrick Mooney 	assert(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS);
681e0c0d44eSPatrick Mooney 	assert((length == 1 || length == 2 || length == 4 || length == 8));
682e0c0d44eSPatrick Mooney 	assert((prot & ~(PROT_READ | PROT_WRITE)) == 0);
683e0c0d44eSPatrick Mooney 
684e0c0d44eSPatrick Mooney 	firstoff = offset;
685e0c0d44eSPatrick Mooney 	if (cpu_mode == CPU_MODE_64BIT) {
686e0c0d44eSPatrick Mooney 		assert(addrsize == 4 || addrsize == 8);
687e0c0d44eSPatrick Mooney 		glasize = 8;
688e0c0d44eSPatrick Mooney 	} else {
689e0c0d44eSPatrick Mooney 		assert(addrsize == 2 || addrsize == 4);
690e0c0d44eSPatrick Mooney 		glasize = 4;
691e0c0d44eSPatrick Mooney 		/*
692e0c0d44eSPatrick Mooney 		 * If the segment selector is loaded with a NULL selector
693e0c0d44eSPatrick Mooney 		 * then the descriptor is unusable and attempting to use
694e0c0d44eSPatrick Mooney 		 * it results in a #GP(0).
695e0c0d44eSPatrick Mooney 		 */
696e0c0d44eSPatrick Mooney 		if (SEG_DESC_UNUSABLE(desc->access))
697e0c0d44eSPatrick Mooney 			return (-1);
698e0c0d44eSPatrick Mooney 
699e0c0d44eSPatrick Mooney 		/*
700e0c0d44eSPatrick Mooney 		 * The processor generates a #NP exception when a segment
701e0c0d44eSPatrick Mooney 		 * register is loaded with a selector that points to a
702e0c0d44eSPatrick Mooney 		 * descriptor that is not present. If this was the case then
703e0c0d44eSPatrick Mooney 		 * it would have been checked before the VM-exit.
704e0c0d44eSPatrick Mooney 		 */
705e0c0d44eSPatrick Mooney 		assert(SEG_DESC_PRESENT(desc->access));
706e0c0d44eSPatrick Mooney 
707e0c0d44eSPatrick Mooney 		/*
708e0c0d44eSPatrick Mooney 		 * The descriptor type must indicate a code/data segment.
709e0c0d44eSPatrick Mooney 		 */
710e0c0d44eSPatrick Mooney 		type = SEG_DESC_TYPE(desc->access);
711e0c0d44eSPatrick Mooney 		assert(type >= 16 && type <= 31);
712e0c0d44eSPatrick Mooney 
713e0c0d44eSPatrick Mooney 		if (prot & PROT_READ) {
714e0c0d44eSPatrick Mooney 			/* #GP on a read access to a exec-only code segment */
715e0c0d44eSPatrick Mooney 			if ((type & 0xA) == 0x8)
716e0c0d44eSPatrick Mooney 				return (-1);
717e0c0d44eSPatrick Mooney 		}
718e0c0d44eSPatrick Mooney 
719e0c0d44eSPatrick Mooney 		if (prot & PROT_WRITE) {
720e0c0d44eSPatrick Mooney 			/*
721e0c0d44eSPatrick Mooney 			 * #GP on a write access to a code segment or a
722e0c0d44eSPatrick Mooney 			 * read-only data segment.
723e0c0d44eSPatrick Mooney 			 */
724e0c0d44eSPatrick Mooney 			if (type & 0x8)			/* code segment */
725e0c0d44eSPatrick Mooney 				return (-1);
726e0c0d44eSPatrick Mooney 
727e0c0d44eSPatrick Mooney 			if ((type & 0xA) == 0)		/* read-only data seg */
728e0c0d44eSPatrick Mooney 				return (-1);
729e0c0d44eSPatrick Mooney 		}
730e0c0d44eSPatrick Mooney 
731e0c0d44eSPatrick Mooney 		/*
732e0c0d44eSPatrick Mooney 		 * 'desc->limit' is fully expanded taking granularity into
733e0c0d44eSPatrick Mooney 		 * account.
734e0c0d44eSPatrick Mooney 		 */
735e0c0d44eSPatrick Mooney 		if ((type & 0xC) == 0x4) {
736e0c0d44eSPatrick Mooney 			/* expand-down data segment */
737e0c0d44eSPatrick Mooney 			low_limit = desc->limit + 1;
738e0c0d44eSPatrick Mooney 			high_limit = SEG_DESC_DEF32(desc->access) ?
739e0c0d44eSPatrick Mooney 			    0xffffffff : 0xffff;
740e0c0d44eSPatrick Mooney 		} else {
741e0c0d44eSPatrick Mooney 			/* code segment or expand-up data segment */
742e0c0d44eSPatrick Mooney 			low_limit = 0;
743e0c0d44eSPatrick Mooney 			high_limit = desc->limit;
744e0c0d44eSPatrick Mooney 		}
745e0c0d44eSPatrick Mooney 
746e0c0d44eSPatrick Mooney 		while (length > 0) {
747e0c0d44eSPatrick Mooney 			offset &= size2mask(addrsize);
748e0c0d44eSPatrick Mooney 			if (offset < low_limit || offset > high_limit)
749e0c0d44eSPatrick Mooney 				return (-1);
750e0c0d44eSPatrick Mooney 			offset++;
751e0c0d44eSPatrick Mooney 			length--;
752e0c0d44eSPatrick Mooney 		}
753e0c0d44eSPatrick Mooney 	}
754e0c0d44eSPatrick Mooney 
755e0c0d44eSPatrick Mooney 	/*
756e0c0d44eSPatrick Mooney 	 * In 64-bit mode all segments except %fs and %gs have a segment
757e0c0d44eSPatrick Mooney 	 * base address of 0.
758e0c0d44eSPatrick Mooney 	 */
759e0c0d44eSPatrick Mooney 	if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
760e0c0d44eSPatrick Mooney 	    seg != VM_REG_GUEST_GS) {
761e0c0d44eSPatrick Mooney 		segbase = 0;
762e0c0d44eSPatrick Mooney 	} else {
763e0c0d44eSPatrick Mooney 		segbase = desc->base;
764e0c0d44eSPatrick Mooney 	}
765e0c0d44eSPatrick Mooney 
766e0c0d44eSPatrick Mooney 	/*
767e0c0d44eSPatrick Mooney 	 * Truncate 'firstoff' to the effective address size before adding
768e0c0d44eSPatrick Mooney 	 * it to the segment base.
769e0c0d44eSPatrick Mooney 	 */
770e0c0d44eSPatrick Mooney 	firstoff &= size2mask(addrsize);
771e0c0d44eSPatrick Mooney 	*gla = (segbase + firstoff) & size2mask(glasize);
772e0c0d44eSPatrick Mooney 	return (0);
773e0c0d44eSPatrick Mooney }
774e0c0d44eSPatrick Mooney 
7754c87aefeSPatrick Mooney /*
7764c87aefeSPatrick Mooney  * Push an error code on the stack of the new task. This is needed if the
7774c87aefeSPatrick Mooney  * task switch was triggered by a hardware exception that causes an error
7784c87aefeSPatrick Mooney  * code to be saved (e.g. #PF).
7794c87aefeSPatrick Mooney  */
7804c87aefeSPatrick Mooney static int
push_errcode(struct vcpu * vcpu,struct vm_guest_paging * paging,int task_type,uint32_t errcode,int * faultptr)781*32640292SAndy Fiddaman push_errcode(struct vcpu *vcpu, struct vm_guest_paging *paging,
7824c87aefeSPatrick Mooney     int task_type, uint32_t errcode, int *faultptr)
7834c87aefeSPatrick Mooney {
7844c87aefeSPatrick Mooney 	struct iovec iov[2];
7854c87aefeSPatrick Mooney 	struct seg_desc seg_desc;
7864c87aefeSPatrick Mooney 	int stacksize, bytes, error;
7874c87aefeSPatrick Mooney 	uint64_t gla, cr0, rflags;
7884c87aefeSPatrick Mooney 	uint32_t esp;
7894c87aefeSPatrick Mooney 	uint16_t stacksel;
7904c87aefeSPatrick Mooney 
7914c87aefeSPatrick Mooney 	*faultptr = 0;
7924c87aefeSPatrick Mooney 
793*32640292SAndy Fiddaman 	cr0 = GETREG(vcpu, VM_REG_GUEST_CR0);
794*32640292SAndy Fiddaman 	rflags = GETREG(vcpu, VM_REG_GUEST_RFLAGS);
795*32640292SAndy Fiddaman 	stacksel = GETREG(vcpu, VM_REG_GUEST_SS);
7964c87aefeSPatrick Mooney 
797*32640292SAndy Fiddaman 	error = vm_get_desc(vcpu, VM_REG_GUEST_SS, &seg_desc.base,
7984c87aefeSPatrick Mooney 	    &seg_desc.limit, &seg_desc.access);
7994c87aefeSPatrick Mooney 	assert(error == 0);
8004c87aefeSPatrick Mooney 
8014c87aefeSPatrick Mooney 	/*
8024c87aefeSPatrick Mooney 	 * Section "Error Code" in the Intel SDM vol 3: the error code is
8034c87aefeSPatrick Mooney 	 * pushed on the stack as a doubleword or word (depending on the
8044c87aefeSPatrick Mooney 	 * default interrupt, trap or task gate size).
8054c87aefeSPatrick Mooney 	 */
8064c87aefeSPatrick Mooney 	if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS)
8074c87aefeSPatrick Mooney 		bytes = 4;
8084c87aefeSPatrick Mooney 	else
8094c87aefeSPatrick Mooney 		bytes = 2;
8104c87aefeSPatrick Mooney 
8114c87aefeSPatrick Mooney 	/*
8124c87aefeSPatrick Mooney 	 * PUSH instruction from Intel SDM vol 2: the 'B' flag in the
8134c87aefeSPatrick Mooney 	 * stack-segment descriptor determines the size of the stack
8144c87aefeSPatrick Mooney 	 * pointer outside of 64-bit mode.
8154c87aefeSPatrick Mooney 	 */
8164c87aefeSPatrick Mooney 	if (SEG_DESC_DEF32(seg_desc.access))
8174c87aefeSPatrick Mooney 		stacksize = 4;
8184c87aefeSPatrick Mooney 	else
8194c87aefeSPatrick Mooney 		stacksize = 2;
8204c87aefeSPatrick Mooney 
821*32640292SAndy Fiddaman 	esp = GETREG(vcpu, VM_REG_GUEST_RSP);
8224c87aefeSPatrick Mooney 	esp -= bytes;
8234c87aefeSPatrick Mooney 
824e0c0d44eSPatrick Mooney 	if (calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
8254c87aefeSPatrick Mooney 	    &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
826*32640292SAndy Fiddaman 		sel_exception(vcpu, IDT_SS, stacksel, 1);
8274c87aefeSPatrick Mooney 		*faultptr = 1;
8284c87aefeSPatrick Mooney 		return (0);
8294c87aefeSPatrick Mooney 	}
8304c87aefeSPatrick Mooney 
831e0c0d44eSPatrick Mooney 	if (alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
832*32640292SAndy Fiddaman 		vm_inject_ac(vcpu, 1);
8334c87aefeSPatrick Mooney 		*faultptr = 1;
8344c87aefeSPatrick Mooney 		return (0);
8354c87aefeSPatrick Mooney 	}
8364c87aefeSPatrick Mooney 
837*32640292SAndy Fiddaman 	error = vm_copy_setup(vcpu, paging, gla, bytes, PROT_WRITE,
8384c87aefeSPatrick Mooney 	    iov, nitems(iov), faultptr);
8394c87aefeSPatrick Mooney 	if (error || *faultptr)
8404c87aefeSPatrick Mooney 		return (error);
8414c87aefeSPatrick Mooney 
842*32640292SAndy Fiddaman 	vm_copyout(&errcode, iov, bytes);
843*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_RSP, esp);
8444c87aefeSPatrick Mooney 	return (0);
8454c87aefeSPatrick Mooney }
8464c87aefeSPatrick Mooney 
8474c87aefeSPatrick Mooney /*
8484c87aefeSPatrick Mooney  * Evaluate return value from helper functions and potentially return to
8494c87aefeSPatrick Mooney  * the VM run loop.
8504c87aefeSPatrick Mooney  */
8514c87aefeSPatrick Mooney #define	CHKERR(error,fault)						\
8524c87aefeSPatrick Mooney 	do {								\
8534c87aefeSPatrick Mooney 		assert((error == 0) || (error == EFAULT));		\
8544c87aefeSPatrick Mooney 		if (error)						\
8554c87aefeSPatrick Mooney 			return (VMEXIT_ABORT);				\
8564c87aefeSPatrick Mooney 		else if (fault)						\
8574c87aefeSPatrick Mooney 			return (VMEXIT_CONTINUE);			\
8584c87aefeSPatrick Mooney 	} while (0)
8594c87aefeSPatrick Mooney 
8604c87aefeSPatrick Mooney int
vmexit_task_switch(struct vmctx * ctx,struct vcpu * vcpu,struct vm_exit * vmexit)861*32640292SAndy Fiddaman vmexit_task_switch(struct vmctx *ctx, struct vcpu *vcpu, struct vm_exit *vmexit)
8624c87aefeSPatrick Mooney {
8634c87aefeSPatrick Mooney 	struct seg_desc nt;
8644c87aefeSPatrick Mooney 	struct tss32 oldtss, newtss;
8654c87aefeSPatrick Mooney 	struct vm_task_switch *task_switch;
8664c87aefeSPatrick Mooney 	struct vm_guest_paging *paging, sup_paging;
8674c87aefeSPatrick Mooney 	struct user_segment_descriptor nt_desc, ot_desc;
8684c87aefeSPatrick Mooney 	struct iovec nt_iov[2], ot_iov[2];
8694c87aefeSPatrick Mooney 	uint64_t cr0, ot_base;
8704c87aefeSPatrick Mooney 	uint32_t eip, ot_lim, access;
871*32640292SAndy Fiddaman 	int error, ext, fault, minlimit, nt_type, ot_type;
8724c87aefeSPatrick Mooney 	enum task_switch_reason reason;
8734c87aefeSPatrick Mooney 	uint16_t nt_sel, ot_sel;
8744c87aefeSPatrick Mooney 
8754c87aefeSPatrick Mooney 	task_switch = &vmexit->u.task_switch;
8764c87aefeSPatrick Mooney 	nt_sel = task_switch->tsssel;
8774c87aefeSPatrick Mooney 	ext = vmexit->u.task_switch.ext;
8784c87aefeSPatrick Mooney 	reason = vmexit->u.task_switch.reason;
8794c87aefeSPatrick Mooney 	paging = &vmexit->u.task_switch.paging;
8804c87aefeSPatrick Mooney 
8814c87aefeSPatrick Mooney 	assert(paging->cpu_mode == CPU_MODE_PROTECTED);
8824c87aefeSPatrick Mooney 
8834c87aefeSPatrick Mooney 	/*
8844c87aefeSPatrick Mooney 	 * Calculate the instruction pointer to store in the old TSS.
8854c87aefeSPatrick Mooney 	 */
8864c87aefeSPatrick Mooney 	eip = vmexit->rip + vmexit->inst_length;
8874c87aefeSPatrick Mooney 
8884c87aefeSPatrick Mooney 	/*
8894c87aefeSPatrick Mooney 	 * Section 4.6, "Access Rights" in Intel SDM Vol 3.
8904c87aefeSPatrick Mooney 	 * The following page table accesses are implicitly supervisor mode:
8914c87aefeSPatrick Mooney 	 * - accesses to GDT or LDT to load segment descriptors
8924c87aefeSPatrick Mooney 	 * - accesses to the task state segment during task switch
8934c87aefeSPatrick Mooney 	 */
8944c87aefeSPatrick Mooney 	sup_paging = *paging;
8954c87aefeSPatrick Mooney 	sup_paging.cpl = 0;	/* implicit supervisor mode */
8964c87aefeSPatrick Mooney 
8974c87aefeSPatrick Mooney 	/* Fetch the new TSS descriptor */
898*32640292SAndy Fiddaman 	error = read_tss_descriptor(vcpu, task_switch, nt_sel, &nt_desc,
8994c87aefeSPatrick Mooney 	    &fault);
9004c87aefeSPatrick Mooney 	CHKERR(error, fault);
9014c87aefeSPatrick Mooney 
9024c87aefeSPatrick Mooney 	nt = usd_to_seg_desc(&nt_desc);
9034c87aefeSPatrick Mooney 
9044c87aefeSPatrick Mooney 	/* Verify the type of the new TSS */
9054c87aefeSPatrick Mooney 	nt_type = SEG_DESC_TYPE(nt.access);
9064c87aefeSPatrick Mooney 	if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS &&
9074c87aefeSPatrick Mooney 	    nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) {
908*32640292SAndy Fiddaman 		sel_exception(vcpu, IDT_TS, nt_sel, ext);
9094c87aefeSPatrick Mooney 		goto done;
9104c87aefeSPatrick Mooney 	}
9114c87aefeSPatrick Mooney 
9124c87aefeSPatrick Mooney 	/* TSS descriptor must have present bit set */
9134c87aefeSPatrick Mooney 	if (!SEG_DESC_PRESENT(nt.access)) {
914*32640292SAndy Fiddaman 		sel_exception(vcpu, IDT_NP, nt_sel, ext);
9154c87aefeSPatrick Mooney 		goto done;
9164c87aefeSPatrick Mooney 	}
9174c87aefeSPatrick Mooney 
9184c87aefeSPatrick Mooney 	/*
9194c87aefeSPatrick Mooney 	 * TSS must have a minimum length of 104 bytes for a 32-bit TSS and
9204c87aefeSPatrick Mooney 	 * 44 bytes for a 16-bit TSS.
9214c87aefeSPatrick Mooney 	 */
9224c87aefeSPatrick Mooney 	if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS)
9234c87aefeSPatrick Mooney 		minlimit = 104 - 1;
9244c87aefeSPatrick Mooney 	else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS)
9254c87aefeSPatrick Mooney 		minlimit = 44 - 1;
9264c87aefeSPatrick Mooney 	else
9274c87aefeSPatrick Mooney 		minlimit = 0;
9284c87aefeSPatrick Mooney 
9294c87aefeSPatrick Mooney 	assert(minlimit > 0);
93059d65d31SAndy Fiddaman 	if (nt.limit < (unsigned int)minlimit) {
931*32640292SAndy Fiddaman 		sel_exception(vcpu, IDT_TS, nt_sel, ext);
9324c87aefeSPatrick Mooney 		goto done;
9334c87aefeSPatrick Mooney 	}
9344c87aefeSPatrick Mooney 
9354c87aefeSPatrick Mooney 	/* TSS must be busy if task switch is due to IRET */
9364c87aefeSPatrick Mooney 	if (reason == TSR_IRET && !TSS_BUSY(nt_type)) {
937*32640292SAndy Fiddaman 		sel_exception(vcpu, IDT_TS, nt_sel, ext);
9384c87aefeSPatrick Mooney 		goto done;
9394c87aefeSPatrick Mooney 	}
9404c87aefeSPatrick Mooney 
9414c87aefeSPatrick Mooney 	/*
9424c87aefeSPatrick Mooney 	 * TSS must be available (not busy) if task switch reason is
9434c87aefeSPatrick Mooney 	 * CALL, JMP, exception or interrupt.
9444c87aefeSPatrick Mooney 	 */
9454c87aefeSPatrick Mooney 	if (reason != TSR_IRET && TSS_BUSY(nt_type)) {
946*32640292SAndy Fiddaman 		sel_exception(vcpu, IDT_GP, nt_sel, ext);
9474c87aefeSPatrick Mooney 		goto done;
9484c87aefeSPatrick Mooney 	}
9494c87aefeSPatrick Mooney 
9504c87aefeSPatrick Mooney 	/* Fetch the new TSS */
951*32640292SAndy Fiddaman 	error = vm_copy_setup(vcpu, &sup_paging, nt.base, minlimit + 1,
9524c87aefeSPatrick Mooney 	    PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault);
9534c87aefeSPatrick Mooney 	CHKERR(error, fault);
954*32640292SAndy Fiddaman 	vm_copyin(nt_iov, &newtss, minlimit + 1);
9554c87aefeSPatrick Mooney 
9564c87aefeSPatrick Mooney 	/* Get the old TSS selector from the guest's task register */
957*32640292SAndy Fiddaman 	ot_sel = GETREG(vcpu, VM_REG_GUEST_TR);
9584c87aefeSPatrick Mooney 	if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) {
9594c87aefeSPatrick Mooney 		/*
9604c87aefeSPatrick Mooney 		 * This might happen if a task switch was attempted without
9614c87aefeSPatrick Mooney 		 * ever loading the task register with LTR. In this case the
9624c87aefeSPatrick Mooney 		 * TR would contain the values from power-on:
9634c87aefeSPatrick Mooney 		 * (sel = 0, base = 0, limit = 0xffff).
9644c87aefeSPatrick Mooney 		 */
965*32640292SAndy Fiddaman 		sel_exception(vcpu, IDT_TS, ot_sel, task_switch->ext);
9664c87aefeSPatrick Mooney 		goto done;
9674c87aefeSPatrick Mooney 	}
9684c87aefeSPatrick Mooney 
9694c87aefeSPatrick Mooney 	/* Get the old TSS base and limit from the guest's task register */
970*32640292SAndy Fiddaman 	error = vm_get_desc(vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim,
9714c87aefeSPatrick Mooney 	    &access);
9724c87aefeSPatrick Mooney 	assert(error == 0);
9734c87aefeSPatrick Mooney 	assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access));
9744c87aefeSPatrick Mooney 	ot_type = SEG_DESC_TYPE(access);
9754c87aefeSPatrick Mooney 	assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
9764c87aefeSPatrick Mooney 
9774c87aefeSPatrick Mooney 	/* Fetch the old TSS descriptor */
978*32640292SAndy Fiddaman 	error = read_tss_descriptor(vcpu, task_switch, ot_sel, &ot_desc,
9794c87aefeSPatrick Mooney 	    &fault);
9804c87aefeSPatrick Mooney 	CHKERR(error, fault);
9814c87aefeSPatrick Mooney 
9824c87aefeSPatrick Mooney 	/* Get the old TSS */
983*32640292SAndy Fiddaman 	error = vm_copy_setup(vcpu, &sup_paging, ot_base, minlimit + 1,
9844c87aefeSPatrick Mooney 	    PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault);
9854c87aefeSPatrick Mooney 	CHKERR(error, fault);
986*32640292SAndy Fiddaman 	vm_copyin(ot_iov, &oldtss, minlimit + 1);
9874c87aefeSPatrick Mooney 
9884c87aefeSPatrick Mooney 	/*
9894c87aefeSPatrick Mooney 	 * Clear the busy bit in the old TSS descriptor if the task switch
9904c87aefeSPatrick Mooney 	 * due to an IRET or JMP instruction.
9914c87aefeSPatrick Mooney 	 */
9924c87aefeSPatrick Mooney 	if (reason == TSR_IRET || reason == TSR_JMP) {
9934c87aefeSPatrick Mooney 		ot_desc.sd_type &= ~0x2;
994*32640292SAndy Fiddaman 		error = desc_table_write(vcpu, &sup_paging, ot_sel,
9954c87aefeSPatrick Mooney 		    &ot_desc, &fault);
9964c87aefeSPatrick Mooney 		CHKERR(error, fault);
9974c87aefeSPatrick Mooney 	}
9984c87aefeSPatrick Mooney 
9994c87aefeSPatrick Mooney 	if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
1000154972afSPatrick Mooney 		EPRINTLN("Task switch to 16-bit TSS not supported");
10014c87aefeSPatrick Mooney 		return (VMEXIT_ABORT);
10024c87aefeSPatrick Mooney 	}
10034c87aefeSPatrick Mooney 
10044c87aefeSPatrick Mooney 	/* Save processor state in old TSS */
1005*32640292SAndy Fiddaman 	tss32_save(vcpu, task_switch, eip, &oldtss, ot_iov);
10064c87aefeSPatrick Mooney 
10074c87aefeSPatrick Mooney 	/*
10084c87aefeSPatrick Mooney 	 * If the task switch was triggered for any reason other than IRET
10094c87aefeSPatrick Mooney 	 * then set the busy bit in the new TSS descriptor.
10104c87aefeSPatrick Mooney 	 */
10114c87aefeSPatrick Mooney 	if (reason != TSR_IRET) {
10124c87aefeSPatrick Mooney 		nt_desc.sd_type |= 0x2;
1013*32640292SAndy Fiddaman 		error = desc_table_write(vcpu, &sup_paging, nt_sel,
10144c87aefeSPatrick Mooney 		    &nt_desc, &fault);
10154c87aefeSPatrick Mooney 		CHKERR(error, fault);
10164c87aefeSPatrick Mooney 	}
10174c87aefeSPatrick Mooney 
10184c87aefeSPatrick Mooney 	/* Update task register to point at the new TSS */
1019*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_TR, nt_sel);
10204c87aefeSPatrick Mooney 
10214c87aefeSPatrick Mooney 	/* Update the hidden descriptor state of the task register */
10224c87aefeSPatrick Mooney 	nt = usd_to_seg_desc(&nt_desc);
1023*32640292SAndy Fiddaman 	update_seg_desc(vcpu, VM_REG_GUEST_TR, &nt);
10244c87aefeSPatrick Mooney 
10254c87aefeSPatrick Mooney 	/* Set CR0.TS */
1026*32640292SAndy Fiddaman 	cr0 = GETREG(vcpu, VM_REG_GUEST_CR0);
1027*32640292SAndy Fiddaman 	SETREG(vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS);
10284c87aefeSPatrick Mooney 
10294c87aefeSPatrick Mooney 	/*
10304c87aefeSPatrick Mooney 	 * We are now committed to the task switch. Any exceptions encountered
10314c87aefeSPatrick Mooney 	 * after this point will be handled in the context of the new task and
10324c87aefeSPatrick Mooney 	 * the saved instruction pointer will belong to the new task.
10334c87aefeSPatrick Mooney 	 */
1034*32640292SAndy Fiddaman 	error = vm_set_register(vcpu, VM_REG_GUEST_RIP, newtss.tss_eip);
10354c87aefeSPatrick Mooney 	assert(error == 0);
10364c87aefeSPatrick Mooney 
10374c87aefeSPatrick Mooney 	/* Load processor state from new TSS */
10384c87aefeSPatrick Mooney 	error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov,
10394c87aefeSPatrick Mooney 	    &fault);
10404c87aefeSPatrick Mooney 	CHKERR(error, fault);
10414c87aefeSPatrick Mooney 
10424c87aefeSPatrick Mooney 	/*
10434c87aefeSPatrick Mooney 	 * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
10444c87aefeSPatrick Mooney 	 * caused an error code to be generated, this error code is copied
10454c87aefeSPatrick Mooney 	 * to the stack of the new task.
10464c87aefeSPatrick Mooney 	 */
10474c87aefeSPatrick Mooney 	if (task_switch->errcode_valid) {
10484c87aefeSPatrick Mooney 		assert(task_switch->ext);
10494c87aefeSPatrick Mooney 		assert(task_switch->reason == TSR_IDT_GATE);
1050*32640292SAndy Fiddaman 		error = push_errcode(vcpu, &task_switch->paging, nt_type,
10514c87aefeSPatrick Mooney 		    task_switch->errcode, &fault);
10524c87aefeSPatrick Mooney 		CHKERR(error, fault);
10534c87aefeSPatrick Mooney 	}
10544c87aefeSPatrick Mooney 
10554c87aefeSPatrick Mooney 	/*
10564c87aefeSPatrick Mooney 	 * Treatment of virtual-NMI blocking if NMI is delivered through
10574c87aefeSPatrick Mooney 	 * a task gate.
10584c87aefeSPatrick Mooney 	 *
10594c87aefeSPatrick Mooney 	 * Section "Architectural State Before A VM Exit", Intel SDM, Vol3:
10604c87aefeSPatrick Mooney 	 * If the virtual NMIs VM-execution control is 1, VM entry injects
10614c87aefeSPatrick Mooney 	 * an NMI, and delivery of the NMI causes a task switch that causes
10624c87aefeSPatrick Mooney 	 * a VM exit, virtual-NMI blocking is in effect before the VM exit
10634c87aefeSPatrick Mooney 	 * commences.
10644c87aefeSPatrick Mooney 	 *
10654c87aefeSPatrick Mooney 	 * Thus, virtual-NMI blocking is in effect at the time of the task
10664c87aefeSPatrick Mooney 	 * switch VM exit.
10674c87aefeSPatrick Mooney 	 */
10684c87aefeSPatrick Mooney 
10694c87aefeSPatrick Mooney 	/*
10704c87aefeSPatrick Mooney 	 * Treatment of virtual-NMI unblocking on IRET from NMI handler task.
10714c87aefeSPatrick Mooney 	 *
10724c87aefeSPatrick Mooney 	 * Section "Changes to Instruction Behavior in VMX Non-Root Operation"
10734c87aefeSPatrick Mooney 	 * If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking.
10744c87aefeSPatrick Mooney 	 * This unblocking of virtual-NMI occurs even if IRET causes a fault.
10754c87aefeSPatrick Mooney 	 *
10764c87aefeSPatrick Mooney 	 * Thus, virtual-NMI blocking is cleared at the time of the task switch
10774c87aefeSPatrick Mooney 	 * VM exit.
10784c87aefeSPatrick Mooney 	 */
10794c87aefeSPatrick Mooney 
10804c87aefeSPatrick Mooney 	/*
10814c87aefeSPatrick Mooney 	 * If the task switch was triggered by an event delivered through
10824c87aefeSPatrick Mooney 	 * the IDT then extinguish the pending event from the vcpu's
10834c87aefeSPatrick Mooney 	 * exitintinfo.
10844c87aefeSPatrick Mooney 	 */
10854c87aefeSPatrick Mooney 	if (task_switch->reason == TSR_IDT_GATE) {
1086*32640292SAndy Fiddaman 		error = vm_set_intinfo(vcpu, 0);
10874c87aefeSPatrick Mooney 		assert(error == 0);
10884c87aefeSPatrick Mooney 	}
10894c87aefeSPatrick Mooney 
10904c87aefeSPatrick Mooney 	/*
10914c87aefeSPatrick Mooney 	 * XXX should inject debug exception if 'T' bit is 1
10924c87aefeSPatrick Mooney 	 */
10934c87aefeSPatrick Mooney done:
10944c87aefeSPatrick Mooney 	return (VMEXIT_CONTINUE);
10954c87aefeSPatrick Mooney }
1096