xref: /illumos-gate/usr/src/cmd/bhyve/task_switch.c (revision e0c0d44e)
14c87aefeSPatrick Mooney /*-
24c87aefeSPatrick Mooney  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
34c87aefeSPatrick Mooney  *
44c87aefeSPatrick Mooney  * Copyright (c) 2014 Neel Natu <neel@freebsd.org>
54c87aefeSPatrick Mooney  * All rights reserved.
64c87aefeSPatrick Mooney  *
74c87aefeSPatrick Mooney  * Redistribution and use in source and binary forms, with or without
84c87aefeSPatrick Mooney  * modification, are permitted provided that the following conditions
94c87aefeSPatrick Mooney  * are met:
104c87aefeSPatrick Mooney  * 1. Redistributions of source code must retain the above copyright
114c87aefeSPatrick Mooney  *    notice, this list of conditions and the following disclaimer.
124c87aefeSPatrick Mooney  * 2. Redistributions in binary form must reproduce the above copyright
134c87aefeSPatrick Mooney  *    notice, this list of conditions and the following disclaimer in the
144c87aefeSPatrick Mooney  *    documentation and/or other materials provided with the distribution.
154c87aefeSPatrick Mooney  *
164c87aefeSPatrick Mooney  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
174c87aefeSPatrick Mooney  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
184c87aefeSPatrick Mooney  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
194c87aefeSPatrick Mooney  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
204c87aefeSPatrick Mooney  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
214c87aefeSPatrick Mooney  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
224c87aefeSPatrick Mooney  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
234c87aefeSPatrick Mooney  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
244c87aefeSPatrick Mooney  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
254c87aefeSPatrick Mooney  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
264c87aefeSPatrick Mooney  * SUCH DAMAGE.
274c87aefeSPatrick Mooney  */
28*e0c0d44eSPatrick Mooney /*
29*e0c0d44eSPatrick Mooney  * This file and its contents are supplied under the terms of the
30*e0c0d44eSPatrick Mooney  * Common Development and Distribution License ("CDDL"), version 1.0.
31*e0c0d44eSPatrick Mooney  * You may only use this file in accordance with the terms of version
32*e0c0d44eSPatrick Mooney  * 1.0 of the CDDL.
33*e0c0d44eSPatrick Mooney  *
34*e0c0d44eSPatrick Mooney  * A full copy of the text of the CDDL should have accompanied this
35*e0c0d44eSPatrick Mooney  * source.  A copy of the CDDL is also available via the Internet at
36*e0c0d44eSPatrick Mooney  * http://www.illumos.org/license/CDDL.
37*e0c0d44eSPatrick Mooney  *
38*e0c0d44eSPatrick Mooney  * Copyright 2020 Oxide Computer Company
39*e0c0d44eSPatrick Mooney  */
404c87aefeSPatrick Mooney 
414c87aefeSPatrick Mooney #include <sys/cdefs.h>
424c87aefeSPatrick Mooney __FBSDID("$FreeBSD$");
434c87aefeSPatrick Mooney 
444c87aefeSPatrick Mooney #include <sys/param.h>
454c87aefeSPatrick Mooney #include <sys/_iovec.h>
464c87aefeSPatrick Mooney #include <sys/mman.h>
474c87aefeSPatrick Mooney 
484c87aefeSPatrick Mooney #include <x86/psl.h>
494c87aefeSPatrick Mooney #include <x86/segments.h>
504c87aefeSPatrick Mooney #include <x86/specialreg.h>
514c87aefeSPatrick Mooney #include <machine/vmm.h>
524c87aefeSPatrick Mooney 
534c87aefeSPatrick Mooney #include <assert.h>
544c87aefeSPatrick Mooney #include <errno.h>
554c87aefeSPatrick Mooney #include <stdbool.h>
564c87aefeSPatrick Mooney #include <stdio.h>
574c87aefeSPatrick Mooney #include <stdlib.h>
584c87aefeSPatrick Mooney 
594c87aefeSPatrick Mooney #include <vmmapi.h>
604c87aefeSPatrick Mooney 
614c87aefeSPatrick Mooney #include "bhyverun.h"
62154972afSPatrick Mooney #include "debug.h"
634c87aefeSPatrick Mooney 
644c87aefeSPatrick Mooney /*
654c87aefeSPatrick Mooney  * Using 'struct i386tss' is tempting but causes myriad sign extension
664c87aefeSPatrick Mooney  * issues because all of its fields are defined as signed integers.
674c87aefeSPatrick Mooney  */
684c87aefeSPatrick Mooney struct tss32 {
694c87aefeSPatrick Mooney 	uint16_t	tss_link;
704c87aefeSPatrick Mooney 	uint16_t	rsvd1;
714c87aefeSPatrick Mooney 	uint32_t	tss_esp0;
724c87aefeSPatrick Mooney 	uint16_t	tss_ss0;
734c87aefeSPatrick Mooney 	uint16_t	rsvd2;
744c87aefeSPatrick Mooney 	uint32_t	tss_esp1;
754c87aefeSPatrick Mooney 	uint16_t	tss_ss1;
764c87aefeSPatrick Mooney 	uint16_t	rsvd3;
774c87aefeSPatrick Mooney 	uint32_t	tss_esp2;
784c87aefeSPatrick Mooney 	uint16_t	tss_ss2;
794c87aefeSPatrick Mooney 	uint16_t	rsvd4;
804c87aefeSPatrick Mooney 	uint32_t	tss_cr3;
814c87aefeSPatrick Mooney 	uint32_t	tss_eip;
824c87aefeSPatrick Mooney 	uint32_t	tss_eflags;
834c87aefeSPatrick Mooney 	uint32_t	tss_eax;
844c87aefeSPatrick Mooney 	uint32_t	tss_ecx;
854c87aefeSPatrick Mooney 	uint32_t	tss_edx;
864c87aefeSPatrick Mooney 	uint32_t	tss_ebx;
874c87aefeSPatrick Mooney 	uint32_t	tss_esp;
884c87aefeSPatrick Mooney 	uint32_t	tss_ebp;
894c87aefeSPatrick Mooney 	uint32_t	tss_esi;
904c87aefeSPatrick Mooney 	uint32_t	tss_edi;
914c87aefeSPatrick Mooney 	uint16_t	tss_es;
924c87aefeSPatrick Mooney 	uint16_t	rsvd5;
934c87aefeSPatrick Mooney 	uint16_t	tss_cs;
944c87aefeSPatrick Mooney 	uint16_t	rsvd6;
954c87aefeSPatrick Mooney 	uint16_t	tss_ss;
964c87aefeSPatrick Mooney 	uint16_t	rsvd7;
974c87aefeSPatrick Mooney 	uint16_t	tss_ds;
984c87aefeSPatrick Mooney 	uint16_t	rsvd8;
994c87aefeSPatrick Mooney 	uint16_t	tss_fs;
1004c87aefeSPatrick Mooney 	uint16_t	rsvd9;
1014c87aefeSPatrick Mooney 	uint16_t	tss_gs;
1024c87aefeSPatrick Mooney 	uint16_t	rsvd10;
1034c87aefeSPatrick Mooney 	uint16_t	tss_ldt;
1044c87aefeSPatrick Mooney 	uint16_t	rsvd11;
1054c87aefeSPatrick Mooney 	uint16_t	tss_trap;
1064c87aefeSPatrick Mooney 	uint16_t	tss_iomap;
1074c87aefeSPatrick Mooney };
1084c87aefeSPatrick Mooney static_assert(sizeof(struct tss32) == 104, "compile-time assertion failed");
1094c87aefeSPatrick Mooney 
1104c87aefeSPatrick Mooney #define	SEL_START(sel)	(((sel) & ~0x7))
1114c87aefeSPatrick Mooney #define	SEL_LIMIT(sel)	(((sel) | 0x7))
1124c87aefeSPatrick Mooney #define	TSS_BUSY(type)	(((type) & 0x2) != 0)
1134c87aefeSPatrick Mooney 
1144c87aefeSPatrick Mooney static uint64_t
1154c87aefeSPatrick Mooney GETREG(struct vmctx *ctx, int vcpu, int reg)
1164c87aefeSPatrick Mooney {
1174c87aefeSPatrick Mooney 	uint64_t val;
1184c87aefeSPatrick Mooney 	int error;
1194c87aefeSPatrick Mooney 
1204c87aefeSPatrick Mooney 	error = vm_get_register(ctx, vcpu, reg, &val);
1214c87aefeSPatrick Mooney 	assert(error == 0);
1224c87aefeSPatrick Mooney 	return (val);
1234c87aefeSPatrick Mooney }
1244c87aefeSPatrick Mooney 
1254c87aefeSPatrick Mooney static void
1264c87aefeSPatrick Mooney SETREG(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
1274c87aefeSPatrick Mooney {
1284c87aefeSPatrick Mooney 	int error;
1294c87aefeSPatrick Mooney 
1304c87aefeSPatrick Mooney 	error = vm_set_register(ctx, vcpu, reg, val);
1314c87aefeSPatrick Mooney 	assert(error == 0);
1324c87aefeSPatrick Mooney }
1334c87aefeSPatrick Mooney 
1344c87aefeSPatrick Mooney static struct seg_desc
1354c87aefeSPatrick Mooney usd_to_seg_desc(struct user_segment_descriptor *usd)
1364c87aefeSPatrick Mooney {
1374c87aefeSPatrick Mooney 	struct seg_desc seg_desc;
1384c87aefeSPatrick Mooney 
1394c87aefeSPatrick Mooney 	seg_desc.base = (u_int)USD_GETBASE(usd);
1404c87aefeSPatrick Mooney 	if (usd->sd_gran)
1414c87aefeSPatrick Mooney 		seg_desc.limit = (u_int)(USD_GETLIMIT(usd) << 12) | 0xfff;
1424c87aefeSPatrick Mooney 	else
1434c87aefeSPatrick Mooney 		seg_desc.limit = (u_int)USD_GETLIMIT(usd);
1444c87aefeSPatrick Mooney 	seg_desc.access = usd->sd_type | usd->sd_dpl << 5 | usd->sd_p << 7;
1454c87aefeSPatrick Mooney 	seg_desc.access |= usd->sd_xx << 12;
1464c87aefeSPatrick Mooney 	seg_desc.access |= usd->sd_def32 << 14;
1474c87aefeSPatrick Mooney 	seg_desc.access |= usd->sd_gran << 15;
1484c87aefeSPatrick Mooney 
1494c87aefeSPatrick Mooney 	return (seg_desc);
1504c87aefeSPatrick Mooney }
1514c87aefeSPatrick Mooney 
1524c87aefeSPatrick Mooney /*
1534c87aefeSPatrick Mooney  * Inject an exception with an error code that is a segment selector.
1544c87aefeSPatrick Mooney  * The format of the error code is described in section 6.13, "Error Code",
1554c87aefeSPatrick Mooney  * Intel SDM volume 3.
1564c87aefeSPatrick Mooney  *
1574c87aefeSPatrick Mooney  * Bit 0 (EXT) denotes whether the exception occurred during delivery
1584c87aefeSPatrick Mooney  * of an external event like an interrupt.
1594c87aefeSPatrick Mooney  *
1604c87aefeSPatrick Mooney  * Bit 1 (IDT) indicates whether the selector points to a gate descriptor
1614c87aefeSPatrick Mooney  * in the IDT.
1624c87aefeSPatrick Mooney  *
1634c87aefeSPatrick Mooney  * Bit 2(GDT/LDT) has the usual interpretation of Table Indicator (TI).
1644c87aefeSPatrick Mooney  */
1654c87aefeSPatrick Mooney static void
1664c87aefeSPatrick Mooney sel_exception(struct vmctx *ctx, int vcpu, int vector, uint16_t sel, int ext)
1674c87aefeSPatrick Mooney {
1684c87aefeSPatrick Mooney 	/*
1694c87aefeSPatrick Mooney 	 * Bit 2 from the selector is retained as-is in the error code.
1704c87aefeSPatrick Mooney 	 *
1714c87aefeSPatrick Mooney 	 * Bit 1 can be safely cleared because none of the selectors
1724c87aefeSPatrick Mooney 	 * encountered during task switch emulation refer to a task
1734c87aefeSPatrick Mooney 	 * gate in the IDT.
1744c87aefeSPatrick Mooney 	 *
1754c87aefeSPatrick Mooney 	 * Bit 0 is set depending on the value of 'ext'.
1764c87aefeSPatrick Mooney 	 */
1774c87aefeSPatrick Mooney 	sel &= ~0x3;
1784c87aefeSPatrick Mooney 	if (ext)
1794c87aefeSPatrick Mooney 		sel |= 0x1;
1804c87aefeSPatrick Mooney 	vm_inject_fault(ctx, vcpu, vector, 1, sel);
1814c87aefeSPatrick Mooney }
1824c87aefeSPatrick Mooney 
1834c87aefeSPatrick Mooney /*
1844c87aefeSPatrick Mooney  * Return 0 if the selector 'sel' in within the limits of the GDT/LDT
1854c87aefeSPatrick Mooney  * and non-zero otherwise.
1864c87aefeSPatrick Mooney  */
1874c87aefeSPatrick Mooney static int
1884c87aefeSPatrick Mooney desc_table_limit_check(struct vmctx *ctx, int vcpu, uint16_t sel)
1894c87aefeSPatrick Mooney {
1904c87aefeSPatrick Mooney 	uint64_t base;
1914c87aefeSPatrick Mooney 	uint32_t limit, access;
1924c87aefeSPatrick Mooney 	int error, reg;
1934c87aefeSPatrick Mooney 
1944c87aefeSPatrick Mooney 	reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
1954c87aefeSPatrick Mooney 	error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access);
1964c87aefeSPatrick Mooney 	assert(error == 0);
1974c87aefeSPatrick Mooney 
1984c87aefeSPatrick Mooney 	if (reg == VM_REG_GUEST_LDTR) {
1994c87aefeSPatrick Mooney 		if (SEG_DESC_UNUSABLE(access) || !SEG_DESC_PRESENT(access))
2004c87aefeSPatrick Mooney 			return (-1);
2014c87aefeSPatrick Mooney 	}
2024c87aefeSPatrick Mooney 
2034c87aefeSPatrick Mooney 	if (limit < SEL_LIMIT(sel))
2044c87aefeSPatrick Mooney 		return (-1);
2054c87aefeSPatrick Mooney 	else
2064c87aefeSPatrick Mooney 		return (0);
2074c87aefeSPatrick Mooney }
2084c87aefeSPatrick Mooney 
2094c87aefeSPatrick Mooney /*
2104c87aefeSPatrick Mooney  * Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced
2114c87aefeSPatrick Mooney  * by the selector 'sel'.
2124c87aefeSPatrick Mooney  *
2134c87aefeSPatrick Mooney  * Returns 0 on success.
2144c87aefeSPatrick Mooney  * Returns 1 if an exception was injected into the guest.
2154c87aefeSPatrick Mooney  * Returns -1 otherwise.
2164c87aefeSPatrick Mooney  */
2174c87aefeSPatrick Mooney static int
2184c87aefeSPatrick Mooney desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
2194c87aefeSPatrick Mooney     uint16_t sel, struct user_segment_descriptor *desc, bool doread,
2204c87aefeSPatrick Mooney     int *faultptr)
2214c87aefeSPatrick Mooney {
2224c87aefeSPatrick Mooney 	struct iovec iov[2];
2234c87aefeSPatrick Mooney 	uint64_t base;
2244c87aefeSPatrick Mooney 	uint32_t limit, access;
2254c87aefeSPatrick Mooney 	int error, reg;
2264c87aefeSPatrick Mooney 
2274c87aefeSPatrick Mooney 	reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
2284c87aefeSPatrick Mooney 	error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access);
2294c87aefeSPatrick Mooney 	assert(error == 0);
2304c87aefeSPatrick Mooney 	assert(limit >= SEL_LIMIT(sel));
2314c87aefeSPatrick Mooney 
2324c87aefeSPatrick Mooney 	error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel),
2334c87aefeSPatrick Mooney 	    sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov),
2344c87aefeSPatrick Mooney 	    faultptr);
2354c87aefeSPatrick Mooney 	if (error || *faultptr)
2364c87aefeSPatrick Mooney 		return (error);
2374c87aefeSPatrick Mooney 
2384c87aefeSPatrick Mooney 	if (doread)
2394c87aefeSPatrick Mooney 		vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
2404c87aefeSPatrick Mooney 	else
2414c87aefeSPatrick Mooney 		vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
2424c87aefeSPatrick Mooney 	return (0);
2434c87aefeSPatrick Mooney }
2444c87aefeSPatrick Mooney 
2454c87aefeSPatrick Mooney static int
2464c87aefeSPatrick Mooney desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
2474c87aefeSPatrick Mooney     uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
2484c87aefeSPatrick Mooney {
2494c87aefeSPatrick Mooney 	return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr));
2504c87aefeSPatrick Mooney }
2514c87aefeSPatrick Mooney 
2524c87aefeSPatrick Mooney static int
2534c87aefeSPatrick Mooney desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
2544c87aefeSPatrick Mooney     uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
2554c87aefeSPatrick Mooney {
2564c87aefeSPatrick Mooney 	return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr));
2574c87aefeSPatrick Mooney }
2584c87aefeSPatrick Mooney 
2594c87aefeSPatrick Mooney /*
2604c87aefeSPatrick Mooney  * Read the TSS descriptor referenced by 'sel' into 'desc'.
2614c87aefeSPatrick Mooney  *
2624c87aefeSPatrick Mooney  * Returns 0 on success.
2634c87aefeSPatrick Mooney  * Returns 1 if an exception was injected into the guest.
2644c87aefeSPatrick Mooney  * Returns -1 otherwise.
2654c87aefeSPatrick Mooney  */
2664c87aefeSPatrick Mooney static int
2674c87aefeSPatrick Mooney read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
2684c87aefeSPatrick Mooney     uint16_t sel, struct user_segment_descriptor *desc, int *faultptr)
2694c87aefeSPatrick Mooney {
2704c87aefeSPatrick Mooney 	struct vm_guest_paging sup_paging;
2714c87aefeSPatrick Mooney 	int error;
2724c87aefeSPatrick Mooney 
2734c87aefeSPatrick Mooney 	assert(!ISLDT(sel));
2744c87aefeSPatrick Mooney 	assert(IDXSEL(sel) != 0);
2754c87aefeSPatrick Mooney 
2764c87aefeSPatrick Mooney 	/* Fetch the new TSS descriptor */
2774c87aefeSPatrick Mooney 	if (desc_table_limit_check(ctx, vcpu, sel)) {
2784c87aefeSPatrick Mooney 		if (ts->reason == TSR_IRET)
2794c87aefeSPatrick Mooney 			sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
2804c87aefeSPatrick Mooney 		else
2814c87aefeSPatrick Mooney 			sel_exception(ctx, vcpu, IDT_GP, sel, ts->ext);
2824c87aefeSPatrick Mooney 		return (1);
2834c87aefeSPatrick Mooney 	}
2844c87aefeSPatrick Mooney 
2854c87aefeSPatrick Mooney 	sup_paging = ts->paging;
2864c87aefeSPatrick Mooney 	sup_paging.cpl = 0;		/* implicit supervisor mode */
2874c87aefeSPatrick Mooney 	error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr);
2884c87aefeSPatrick Mooney 	return (error);
2894c87aefeSPatrick Mooney }
2904c87aefeSPatrick Mooney 
2914c87aefeSPatrick Mooney static bool
2924c87aefeSPatrick Mooney code_desc(int sd_type)
2934c87aefeSPatrick Mooney {
2944c87aefeSPatrick Mooney 	/* code descriptor */
2954c87aefeSPatrick Mooney 	return ((sd_type & 0x18) == 0x18);
2964c87aefeSPatrick Mooney }
2974c87aefeSPatrick Mooney 
2984c87aefeSPatrick Mooney static bool
2994c87aefeSPatrick Mooney stack_desc(int sd_type)
3004c87aefeSPatrick Mooney {
3014c87aefeSPatrick Mooney 	/* writable data descriptor */
3024c87aefeSPatrick Mooney 	return ((sd_type & 0x1A) == 0x12);
3034c87aefeSPatrick Mooney }
3044c87aefeSPatrick Mooney 
3054c87aefeSPatrick Mooney static bool
3064c87aefeSPatrick Mooney data_desc(int sd_type)
3074c87aefeSPatrick Mooney {
3084c87aefeSPatrick Mooney 	/* data descriptor or a readable code descriptor */
3094c87aefeSPatrick Mooney 	return ((sd_type & 0x18) == 0x10 || (sd_type & 0x1A) == 0x1A);
3104c87aefeSPatrick Mooney }
3114c87aefeSPatrick Mooney 
3124c87aefeSPatrick Mooney static bool
3134c87aefeSPatrick Mooney ldt_desc(int sd_type)
3144c87aefeSPatrick Mooney {
3154c87aefeSPatrick Mooney 
3164c87aefeSPatrick Mooney 	return (sd_type == SDT_SYSLDT);
3174c87aefeSPatrick Mooney }
3184c87aefeSPatrick Mooney 
3194c87aefeSPatrick Mooney /*
3204c87aefeSPatrick Mooney  * Validate the descriptor 'seg_desc' associated with 'segment'.
3214c87aefeSPatrick Mooney  */
3224c87aefeSPatrick Mooney static int
3234c87aefeSPatrick Mooney validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
3244c87aefeSPatrick Mooney     int segment, struct seg_desc *seg_desc, int *faultptr)
3254c87aefeSPatrick Mooney {
3264c87aefeSPatrick Mooney 	struct vm_guest_paging sup_paging;
3274c87aefeSPatrick Mooney 	struct user_segment_descriptor usd;
3284c87aefeSPatrick Mooney 	int error, idtvec;
3294c87aefeSPatrick Mooney 	int cpl, dpl, rpl;
3304c87aefeSPatrick Mooney 	uint16_t sel, cs;
3314c87aefeSPatrick Mooney 	bool ldtseg, codeseg, stackseg, dataseg, conforming;
3324c87aefeSPatrick Mooney 
3334c87aefeSPatrick Mooney 	ldtseg = codeseg = stackseg = dataseg = false;
3344c87aefeSPatrick Mooney 	switch (segment) {
3354c87aefeSPatrick Mooney 	case VM_REG_GUEST_LDTR:
3364c87aefeSPatrick Mooney 		ldtseg = true;
3374c87aefeSPatrick Mooney 		break;
3384c87aefeSPatrick Mooney 	case VM_REG_GUEST_CS:
3394c87aefeSPatrick Mooney 		codeseg = true;
3404c87aefeSPatrick Mooney 		break;
3414c87aefeSPatrick Mooney 	case VM_REG_GUEST_SS:
3424c87aefeSPatrick Mooney 		stackseg = true;
3434c87aefeSPatrick Mooney 		break;
3444c87aefeSPatrick Mooney 	case VM_REG_GUEST_DS:
3454c87aefeSPatrick Mooney 	case VM_REG_GUEST_ES:
3464c87aefeSPatrick Mooney 	case VM_REG_GUEST_FS:
3474c87aefeSPatrick Mooney 	case VM_REG_GUEST_GS:
3484c87aefeSPatrick Mooney 		dataseg = true;
3494c87aefeSPatrick Mooney 		break;
3504c87aefeSPatrick Mooney 	default:
3514c87aefeSPatrick Mooney 		assert(0);
3524c87aefeSPatrick Mooney 	}
3534c87aefeSPatrick Mooney 
3544c87aefeSPatrick Mooney 	/* Get the segment selector */
3554c87aefeSPatrick Mooney 	sel = GETREG(ctx, vcpu, segment);
3564c87aefeSPatrick Mooney 
3574c87aefeSPatrick Mooney 	/* LDT selector must point into the GDT */
3584c87aefeSPatrick Mooney 	if (ldtseg && ISLDT(sel)) {
3594c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
3604c87aefeSPatrick Mooney 		return (1);
3614c87aefeSPatrick Mooney 	}
3624c87aefeSPatrick Mooney 
3634c87aefeSPatrick Mooney 	/* Descriptor table limit check */
3644c87aefeSPatrick Mooney 	if (desc_table_limit_check(ctx, vcpu, sel)) {
3654c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
3664c87aefeSPatrick Mooney 		return (1);
3674c87aefeSPatrick Mooney 	}
3684c87aefeSPatrick Mooney 
3694c87aefeSPatrick Mooney 	/* NULL selector */
3704c87aefeSPatrick Mooney 	if (IDXSEL(sel) == 0) {
3714c87aefeSPatrick Mooney 		/* Code and stack segment selectors cannot be NULL */
3724c87aefeSPatrick Mooney 		if (codeseg || stackseg) {
3734c87aefeSPatrick Mooney 			sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
3744c87aefeSPatrick Mooney 			return (1);
3754c87aefeSPatrick Mooney 		}
3764c87aefeSPatrick Mooney 		seg_desc->base = 0;
3774c87aefeSPatrick Mooney 		seg_desc->limit = 0;
3784c87aefeSPatrick Mooney 		seg_desc->access = 0x10000;	/* unusable */
3794c87aefeSPatrick Mooney 		return (0);
3804c87aefeSPatrick Mooney 	}
3814c87aefeSPatrick Mooney 
3824c87aefeSPatrick Mooney 	/* Read the descriptor from the GDT/LDT */
3834c87aefeSPatrick Mooney 	sup_paging = ts->paging;
3844c87aefeSPatrick Mooney 	sup_paging.cpl = 0;	/* implicit supervisor mode */
3854c87aefeSPatrick Mooney 	error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr);
3864c87aefeSPatrick Mooney 	if (error || *faultptr)
3874c87aefeSPatrick Mooney 		return (error);
3884c87aefeSPatrick Mooney 
3894c87aefeSPatrick Mooney 	/* Verify that the descriptor type is compatible with the segment */
3904c87aefeSPatrick Mooney 	if ((ldtseg && !ldt_desc(usd.sd_type)) ||
3914c87aefeSPatrick Mooney 	    (codeseg && !code_desc(usd.sd_type)) ||
3924c87aefeSPatrick Mooney 	    (dataseg && !data_desc(usd.sd_type)) ||
3934c87aefeSPatrick Mooney 	    (stackseg && !stack_desc(usd.sd_type))) {
3944c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
3954c87aefeSPatrick Mooney 		return (1);
3964c87aefeSPatrick Mooney 	}
3974c87aefeSPatrick Mooney 
3984c87aefeSPatrick Mooney 	/* Segment must be marked present */
3994c87aefeSPatrick Mooney 	if (!usd.sd_p) {
4004c87aefeSPatrick Mooney 		if (ldtseg)
4014c87aefeSPatrick Mooney 			idtvec = IDT_TS;
4024c87aefeSPatrick Mooney 		else if (stackseg)
4034c87aefeSPatrick Mooney 			idtvec = IDT_SS;
4044c87aefeSPatrick Mooney 		else
4054c87aefeSPatrick Mooney 			idtvec = IDT_NP;
4064c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, idtvec, sel, ts->ext);
4074c87aefeSPatrick Mooney 		return (1);
4084c87aefeSPatrick Mooney 	}
4094c87aefeSPatrick Mooney 
4104c87aefeSPatrick Mooney 	cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS);
4114c87aefeSPatrick Mooney 	cpl = cs & SEL_RPL_MASK;
4124c87aefeSPatrick Mooney 	rpl = sel & SEL_RPL_MASK;
4134c87aefeSPatrick Mooney 	dpl = usd.sd_dpl;
4144c87aefeSPatrick Mooney 
4154c87aefeSPatrick Mooney 	if (stackseg && (rpl != cpl || dpl != cpl)) {
4164c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
4174c87aefeSPatrick Mooney 		return (1);
4184c87aefeSPatrick Mooney 	}
4194c87aefeSPatrick Mooney 
4204c87aefeSPatrick Mooney 	if (codeseg) {
4214c87aefeSPatrick Mooney 		conforming = (usd.sd_type & 0x4) ? true : false;
4224c87aefeSPatrick Mooney 		if ((conforming && (cpl < dpl)) ||
4234c87aefeSPatrick Mooney 		    (!conforming && (cpl != dpl))) {
4244c87aefeSPatrick Mooney 			sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
4254c87aefeSPatrick Mooney 			return (1);
4264c87aefeSPatrick Mooney 		}
4274c87aefeSPatrick Mooney 	}
4284c87aefeSPatrick Mooney 
4294c87aefeSPatrick Mooney 	if (dataseg) {
4304c87aefeSPatrick Mooney 		/*
4314c87aefeSPatrick Mooney 		 * A data segment is always non-conforming except when it's
4324c87aefeSPatrick Mooney 		 * descriptor is a readable, conforming code segment.
4334c87aefeSPatrick Mooney 		 */
4344c87aefeSPatrick Mooney 		if (code_desc(usd.sd_type) && (usd.sd_type & 0x4) != 0)
4354c87aefeSPatrick Mooney 			conforming = true;
4364c87aefeSPatrick Mooney 		else
4374c87aefeSPatrick Mooney 			conforming = false;
4384c87aefeSPatrick Mooney 
4394c87aefeSPatrick Mooney 		if (!conforming && (rpl > dpl || cpl > dpl)) {
4404c87aefeSPatrick Mooney 			sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
4414c87aefeSPatrick Mooney 			return (1);
4424c87aefeSPatrick Mooney 		}
4434c87aefeSPatrick Mooney 	}
4444c87aefeSPatrick Mooney 	*seg_desc = usd_to_seg_desc(&usd);
4454c87aefeSPatrick Mooney 	return (0);
4464c87aefeSPatrick Mooney }
4474c87aefeSPatrick Mooney 
4484c87aefeSPatrick Mooney static void
4494c87aefeSPatrick Mooney tss32_save(struct vmctx *ctx, int vcpu, struct vm_task_switch *task_switch,
4504c87aefeSPatrick Mooney     uint32_t eip, struct tss32 *tss, struct iovec *iov)
4514c87aefeSPatrick Mooney {
4524c87aefeSPatrick Mooney 
4534c87aefeSPatrick Mooney 	/* General purpose registers */
4544c87aefeSPatrick Mooney 	tss->tss_eax = GETREG(ctx, vcpu, VM_REG_GUEST_RAX);
4554c87aefeSPatrick Mooney 	tss->tss_ecx = GETREG(ctx, vcpu, VM_REG_GUEST_RCX);
4564c87aefeSPatrick Mooney 	tss->tss_edx = GETREG(ctx, vcpu, VM_REG_GUEST_RDX);
4574c87aefeSPatrick Mooney 	tss->tss_ebx = GETREG(ctx, vcpu, VM_REG_GUEST_RBX);
4584c87aefeSPatrick Mooney 	tss->tss_esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
4594c87aefeSPatrick Mooney 	tss->tss_ebp = GETREG(ctx, vcpu, VM_REG_GUEST_RBP);
4604c87aefeSPatrick Mooney 	tss->tss_esi = GETREG(ctx, vcpu, VM_REG_GUEST_RSI);
4614c87aefeSPatrick Mooney 	tss->tss_edi = GETREG(ctx, vcpu, VM_REG_GUEST_RDI);
4624c87aefeSPatrick Mooney 
4634c87aefeSPatrick Mooney 	/* Segment selectors */
4644c87aefeSPatrick Mooney 	tss->tss_es = GETREG(ctx, vcpu, VM_REG_GUEST_ES);
4654c87aefeSPatrick Mooney 	tss->tss_cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS);
4664c87aefeSPatrick Mooney 	tss->tss_ss = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
4674c87aefeSPatrick Mooney 	tss->tss_ds = GETREG(ctx, vcpu, VM_REG_GUEST_DS);
4684c87aefeSPatrick Mooney 	tss->tss_fs = GETREG(ctx, vcpu, VM_REG_GUEST_FS);
4694c87aefeSPatrick Mooney 	tss->tss_gs = GETREG(ctx, vcpu, VM_REG_GUEST_GS);
4704c87aefeSPatrick Mooney 
4714c87aefeSPatrick Mooney 	/* eflags and eip */
4724c87aefeSPatrick Mooney 	tss->tss_eflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
4734c87aefeSPatrick Mooney 	if (task_switch->reason == TSR_IRET)
4744c87aefeSPatrick Mooney 		tss->tss_eflags &= ~PSL_NT;
4754c87aefeSPatrick Mooney 	tss->tss_eip = eip;
4764c87aefeSPatrick Mooney 
4774c87aefeSPatrick Mooney 	/* Copy updated old TSS into guest memory */
4784c87aefeSPatrick Mooney 	vm_copyout(ctx, vcpu, tss, iov, sizeof(struct tss32));
4794c87aefeSPatrick Mooney }
4804c87aefeSPatrick Mooney 
4814c87aefeSPatrick Mooney static void
4824c87aefeSPatrick Mooney update_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *sd)
4834c87aefeSPatrick Mooney {
4844c87aefeSPatrick Mooney 	int error;
4854c87aefeSPatrick Mooney 
4864c87aefeSPatrick Mooney 	error = vm_set_desc(ctx, vcpu, reg, sd->base, sd->limit, sd->access);
4874c87aefeSPatrick Mooney 	assert(error == 0);
4884c87aefeSPatrick Mooney }
4894c87aefeSPatrick Mooney 
4904c87aefeSPatrick Mooney /*
4914c87aefeSPatrick Mooney  * Update the vcpu registers to reflect the state of the new task.
4924c87aefeSPatrick Mooney  */
4934c87aefeSPatrick Mooney static int
4944c87aefeSPatrick Mooney tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
4954c87aefeSPatrick Mooney     uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr)
4964c87aefeSPatrick Mooney {
4974c87aefeSPatrick Mooney 	struct seg_desc seg_desc, seg_desc2;
4984c87aefeSPatrick Mooney 	uint64_t *pdpte, maxphyaddr, reserved;
4994c87aefeSPatrick Mooney 	uint32_t eflags;
5004c87aefeSPatrick Mooney 	int error, i;
5014c87aefeSPatrick Mooney 	bool nested;
5024c87aefeSPatrick Mooney 
5034c87aefeSPatrick Mooney 	nested = false;
5044c87aefeSPatrick Mooney 	if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) {
5054c87aefeSPatrick Mooney 		tss->tss_link = ot_sel;
5064c87aefeSPatrick Mooney 		nested = true;
5074c87aefeSPatrick Mooney 	}
5084c87aefeSPatrick Mooney 
5094c87aefeSPatrick Mooney 	eflags = tss->tss_eflags;
5104c87aefeSPatrick Mooney 	if (nested)
5114c87aefeSPatrick Mooney 		eflags |= PSL_NT;
5124c87aefeSPatrick Mooney 
5134c87aefeSPatrick Mooney 	/* LDTR */
5144c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt);
5154c87aefeSPatrick Mooney 
5164c87aefeSPatrick Mooney 	/* PBDR */
5174c87aefeSPatrick Mooney 	if (ts->paging.paging_mode != PAGING_MODE_FLAT) {
5184c87aefeSPatrick Mooney 		if (ts->paging.paging_mode == PAGING_MODE_PAE) {
5194c87aefeSPatrick Mooney 			/*
5204c87aefeSPatrick Mooney 			 * XXX Assuming 36-bit MAXPHYADDR.
5214c87aefeSPatrick Mooney 			 */
5224c87aefeSPatrick Mooney 			maxphyaddr = (1UL << 36) - 1;
5234c87aefeSPatrick Mooney 			pdpte = paddr_guest2host(ctx, tss->tss_cr3 & ~0x1f, 32);
5244c87aefeSPatrick Mooney 			for (i = 0; i < 4; i++) {
5254c87aefeSPatrick Mooney 				/* Check reserved bits if the PDPTE is valid */
5264c87aefeSPatrick Mooney 				if (!(pdpte[i] & 0x1))
5274c87aefeSPatrick Mooney 					continue;
5284c87aefeSPatrick Mooney 				/*
5294c87aefeSPatrick Mooney 				 * Bits 2:1, 8:5 and bits above the processor's
5304c87aefeSPatrick Mooney 				 * maximum physical address are reserved.
5314c87aefeSPatrick Mooney 				 */
5324c87aefeSPatrick Mooney 				reserved = ~maxphyaddr | 0x1E6;
5334c87aefeSPatrick Mooney 				if (pdpte[i] & reserved) {
5344c87aefeSPatrick Mooney 					vm_inject_gp(ctx, vcpu);
5354c87aefeSPatrick Mooney 					return (1);
5364c87aefeSPatrick Mooney 				}
5374c87aefeSPatrick Mooney 			}
5384c87aefeSPatrick Mooney 			SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]);
5394c87aefeSPatrick Mooney 			SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]);
5404c87aefeSPatrick Mooney 			SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]);
5414c87aefeSPatrick Mooney 			SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]);
5424c87aefeSPatrick Mooney 		}
5434c87aefeSPatrick Mooney 		SETREG(ctx, vcpu, VM_REG_GUEST_CR3, tss->tss_cr3);
5444c87aefeSPatrick Mooney 		ts->paging.cr3 = tss->tss_cr3;
5454c87aefeSPatrick Mooney 	}
5464c87aefeSPatrick Mooney 
5474c87aefeSPatrick Mooney 	/* eflags and eip */
5484c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS, eflags);
5494c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RIP, tss->tss_eip);
5504c87aefeSPatrick Mooney 
5514c87aefeSPatrick Mooney 	/* General purpose registers */
5524c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RAX, tss->tss_eax);
5534c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RCX, tss->tss_ecx);
5544c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RDX, tss->tss_edx);
5554c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RBX, tss->tss_ebx);
5564c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RSP, tss->tss_esp);
5574c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RBP, tss->tss_ebp);
5584c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RSI, tss->tss_esi);
5594c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RDI, tss->tss_edi);
5604c87aefeSPatrick Mooney 
5614c87aefeSPatrick Mooney 	/* Segment selectors */
5624c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_ES, tss->tss_es);
5634c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_CS, tss->tss_cs);
5644c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_SS, tss->tss_ss);
5654c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_DS, tss->tss_ds);
5664c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_FS, tss->tss_fs);
5674c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_GS, tss->tss_gs);
5684c87aefeSPatrick Mooney 
5694c87aefeSPatrick Mooney 	/*
5704c87aefeSPatrick Mooney 	 * If this is a nested task then write out the new TSS to update
5714c87aefeSPatrick Mooney 	 * the previous link field.
5724c87aefeSPatrick Mooney 	 */
5734c87aefeSPatrick Mooney 	if (nested)
5744c87aefeSPatrick Mooney 		vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss));
5754c87aefeSPatrick Mooney 
5764c87aefeSPatrick Mooney 	/* Validate segment descriptors */
5774c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc,
5784c87aefeSPatrick Mooney 	    faultptr);
5794c87aefeSPatrick Mooney 	if (error || *faultptr)
5804c87aefeSPatrick Mooney 		return (error);
5814c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc);
5824c87aefeSPatrick Mooney 
5834c87aefeSPatrick Mooney 	/*
5844c87aefeSPatrick Mooney 	 * Section "Checks on Guest Segment Registers", Intel SDM, Vol 3.
5854c87aefeSPatrick Mooney 	 *
5864c87aefeSPatrick Mooney 	 * The SS and CS attribute checks on VM-entry are inter-dependent so
5874c87aefeSPatrick Mooney 	 * we need to make sure that both segments are valid before updating
5884c87aefeSPatrick Mooney 	 * either of them. This ensures that the VMCS state can pass the
5894c87aefeSPatrick Mooney 	 * VM-entry checks so the guest can handle any exception injected
5904c87aefeSPatrick Mooney 	 * during task switch emulation.
5914c87aefeSPatrick Mooney 	 */
5924c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc,
5934c87aefeSPatrick Mooney 	    faultptr);
5944c87aefeSPatrick Mooney 	if (error || *faultptr)
5954c87aefeSPatrick Mooney 		return (error);
5964c87aefeSPatrick Mooney 
5974c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2,
5984c87aefeSPatrick Mooney 	    faultptr);
5994c87aefeSPatrick Mooney 	if (error || *faultptr)
6004c87aefeSPatrick Mooney 		return (error);
6014c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc);
6024c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2);
6034c87aefeSPatrick Mooney 	ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
6044c87aefeSPatrick Mooney 
6054c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc,
6064c87aefeSPatrick Mooney 	    faultptr);
6074c87aefeSPatrick Mooney 	if (error || *faultptr)
6084c87aefeSPatrick Mooney 		return (error);
6094c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc);
6104c87aefeSPatrick Mooney 
6114c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc,
6124c87aefeSPatrick Mooney 	    faultptr);
6134c87aefeSPatrick Mooney 	if (error || *faultptr)
6144c87aefeSPatrick Mooney 		return (error);
6154c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc);
6164c87aefeSPatrick Mooney 
6174c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc,
6184c87aefeSPatrick Mooney 	    faultptr);
6194c87aefeSPatrick Mooney 	if (error || *faultptr)
6204c87aefeSPatrick Mooney 		return (error);
6214c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc);
6224c87aefeSPatrick Mooney 
6234c87aefeSPatrick Mooney 	error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc,
6244c87aefeSPatrick Mooney 	    faultptr);
6254c87aefeSPatrick Mooney 	if (error || *faultptr)
6264c87aefeSPatrick Mooney 		return (error);
6274c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc);
6284c87aefeSPatrick Mooney 
6294c87aefeSPatrick Mooney 	return (0);
6304c87aefeSPatrick Mooney }
6314c87aefeSPatrick Mooney 
632*e0c0d44eSPatrick Mooney 
633*e0c0d44eSPatrick Mooney /*
634*e0c0d44eSPatrick Mooney  * Copy of vie_alignment_check() from vmm_instruction_emul.c
635*e0c0d44eSPatrick Mooney  */
636*e0c0d44eSPatrick Mooney static int
637*e0c0d44eSPatrick Mooney alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
638*e0c0d44eSPatrick Mooney {
639*e0c0d44eSPatrick Mooney 	assert(size == 1 || size == 2 || size == 4 || size == 8);
640*e0c0d44eSPatrick Mooney 	assert(cpl >= 0 && cpl <= 3);
641*e0c0d44eSPatrick Mooney 
642*e0c0d44eSPatrick Mooney 	if (cpl != 3 || (cr0 & CR0_AM) == 0 || (rf & PSL_AC) == 0)
643*e0c0d44eSPatrick Mooney 		return (0);
644*e0c0d44eSPatrick Mooney 
645*e0c0d44eSPatrick Mooney 	return ((gla & (size - 1)) ? 1 : 0);
646*e0c0d44eSPatrick Mooney }
647*e0c0d44eSPatrick Mooney 
648*e0c0d44eSPatrick Mooney /*
649*e0c0d44eSPatrick Mooney  * Copy of vie_size2mask() from vmm_instruction_emul.c
650*e0c0d44eSPatrick Mooney  */
651*e0c0d44eSPatrick Mooney static uint64_t
652*e0c0d44eSPatrick Mooney size2mask(int size)
653*e0c0d44eSPatrick Mooney {
654*e0c0d44eSPatrick Mooney 	switch (size) {
655*e0c0d44eSPatrick Mooney 	case 1:
656*e0c0d44eSPatrick Mooney 		return (0xff);
657*e0c0d44eSPatrick Mooney 	case 2:
658*e0c0d44eSPatrick Mooney 		return (0xffff);
659*e0c0d44eSPatrick Mooney 	case 4:
660*e0c0d44eSPatrick Mooney 		return (0xffffffff);
661*e0c0d44eSPatrick Mooney 	case 8:
662*e0c0d44eSPatrick Mooney 		return (0xffffffffffffffff);
663*e0c0d44eSPatrick Mooney 	default:
664*e0c0d44eSPatrick Mooney 		assert(0);
665*e0c0d44eSPatrick Mooney 		/* not reached */
666*e0c0d44eSPatrick Mooney 		return (0);
667*e0c0d44eSPatrick Mooney 	}
668*e0c0d44eSPatrick Mooney }
669*e0c0d44eSPatrick Mooney 
670*e0c0d44eSPatrick Mooney /*
671*e0c0d44eSPatrick Mooney  * Copy of vie_calculate_gla() from vmm_instruction_emul.c
672*e0c0d44eSPatrick Mooney  */
673*e0c0d44eSPatrick Mooney static int
674*e0c0d44eSPatrick Mooney calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg,
675*e0c0d44eSPatrick Mooney     struct seg_desc *desc, uint64_t offset, int length, int addrsize,
676*e0c0d44eSPatrick Mooney     int prot, uint64_t *gla)
677*e0c0d44eSPatrick Mooney {
678*e0c0d44eSPatrick Mooney 	uint64_t firstoff, low_limit, high_limit, segbase;
679*e0c0d44eSPatrick Mooney 	int glasize, type;
680*e0c0d44eSPatrick Mooney 
681*e0c0d44eSPatrick Mooney 	assert(seg >= VM_REG_GUEST_ES && seg <= VM_REG_GUEST_GS);
682*e0c0d44eSPatrick Mooney 	assert((length == 1 || length == 2 || length == 4 || length == 8));
683*e0c0d44eSPatrick Mooney 	assert((prot & ~(PROT_READ | PROT_WRITE)) == 0);
684*e0c0d44eSPatrick Mooney 
685*e0c0d44eSPatrick Mooney 	firstoff = offset;
686*e0c0d44eSPatrick Mooney 	if (cpu_mode == CPU_MODE_64BIT) {
687*e0c0d44eSPatrick Mooney 		assert(addrsize == 4 || addrsize == 8);
688*e0c0d44eSPatrick Mooney 		glasize = 8;
689*e0c0d44eSPatrick Mooney 	} else {
690*e0c0d44eSPatrick Mooney 		assert(addrsize == 2 || addrsize == 4);
691*e0c0d44eSPatrick Mooney 		glasize = 4;
692*e0c0d44eSPatrick Mooney 		/*
693*e0c0d44eSPatrick Mooney 		 * If the segment selector is loaded with a NULL selector
694*e0c0d44eSPatrick Mooney 		 * then the descriptor is unusable and attempting to use
695*e0c0d44eSPatrick Mooney 		 * it results in a #GP(0).
696*e0c0d44eSPatrick Mooney 		 */
697*e0c0d44eSPatrick Mooney 		if (SEG_DESC_UNUSABLE(desc->access))
698*e0c0d44eSPatrick Mooney 			return (-1);
699*e0c0d44eSPatrick Mooney 
700*e0c0d44eSPatrick Mooney 		/*
701*e0c0d44eSPatrick Mooney 		 * The processor generates a #NP exception when a segment
702*e0c0d44eSPatrick Mooney 		 * register is loaded with a selector that points to a
703*e0c0d44eSPatrick Mooney 		 * descriptor that is not present. If this was the case then
704*e0c0d44eSPatrick Mooney 		 * it would have been checked before the VM-exit.
705*e0c0d44eSPatrick Mooney 		 */
706*e0c0d44eSPatrick Mooney 		assert(SEG_DESC_PRESENT(desc->access));
707*e0c0d44eSPatrick Mooney 
708*e0c0d44eSPatrick Mooney 		/*
709*e0c0d44eSPatrick Mooney 		 * The descriptor type must indicate a code/data segment.
710*e0c0d44eSPatrick Mooney 		 */
711*e0c0d44eSPatrick Mooney 		type = SEG_DESC_TYPE(desc->access);
712*e0c0d44eSPatrick Mooney 		assert(type >= 16 && type <= 31);
713*e0c0d44eSPatrick Mooney 
714*e0c0d44eSPatrick Mooney 		if (prot & PROT_READ) {
715*e0c0d44eSPatrick Mooney 			/* #GP on a read access to a exec-only code segment */
716*e0c0d44eSPatrick Mooney 			if ((type & 0xA) == 0x8)
717*e0c0d44eSPatrick Mooney 				return (-1);
718*e0c0d44eSPatrick Mooney 		}
719*e0c0d44eSPatrick Mooney 
720*e0c0d44eSPatrick Mooney 		if (prot & PROT_WRITE) {
721*e0c0d44eSPatrick Mooney 			/*
722*e0c0d44eSPatrick Mooney 			 * #GP on a write access to a code segment or a
723*e0c0d44eSPatrick Mooney 			 * read-only data segment.
724*e0c0d44eSPatrick Mooney 			 */
725*e0c0d44eSPatrick Mooney 			if (type & 0x8)			/* code segment */
726*e0c0d44eSPatrick Mooney 				return (-1);
727*e0c0d44eSPatrick Mooney 
728*e0c0d44eSPatrick Mooney 			if ((type & 0xA) == 0)		/* read-only data seg */
729*e0c0d44eSPatrick Mooney 				return (-1);
730*e0c0d44eSPatrick Mooney 		}
731*e0c0d44eSPatrick Mooney 
732*e0c0d44eSPatrick Mooney 		/*
733*e0c0d44eSPatrick Mooney 		 * 'desc->limit' is fully expanded taking granularity into
734*e0c0d44eSPatrick Mooney 		 * account.
735*e0c0d44eSPatrick Mooney 		 */
736*e0c0d44eSPatrick Mooney 		if ((type & 0xC) == 0x4) {
737*e0c0d44eSPatrick Mooney 			/* expand-down data segment */
738*e0c0d44eSPatrick Mooney 			low_limit = desc->limit + 1;
739*e0c0d44eSPatrick Mooney 			high_limit = SEG_DESC_DEF32(desc->access) ?
740*e0c0d44eSPatrick Mooney 			    0xffffffff : 0xffff;
741*e0c0d44eSPatrick Mooney 		} else {
742*e0c0d44eSPatrick Mooney 			/* code segment or expand-up data segment */
743*e0c0d44eSPatrick Mooney 			low_limit = 0;
744*e0c0d44eSPatrick Mooney 			high_limit = desc->limit;
745*e0c0d44eSPatrick Mooney 		}
746*e0c0d44eSPatrick Mooney 
747*e0c0d44eSPatrick Mooney 		while (length > 0) {
748*e0c0d44eSPatrick Mooney 			offset &= size2mask(addrsize);
749*e0c0d44eSPatrick Mooney 			if (offset < low_limit || offset > high_limit)
750*e0c0d44eSPatrick Mooney 				return (-1);
751*e0c0d44eSPatrick Mooney 			offset++;
752*e0c0d44eSPatrick Mooney 			length--;
753*e0c0d44eSPatrick Mooney 		}
754*e0c0d44eSPatrick Mooney 	}
755*e0c0d44eSPatrick Mooney 
756*e0c0d44eSPatrick Mooney 	/*
757*e0c0d44eSPatrick Mooney 	 * In 64-bit mode all segments except %fs and %gs have a segment
758*e0c0d44eSPatrick Mooney 	 * base address of 0.
759*e0c0d44eSPatrick Mooney 	 */
760*e0c0d44eSPatrick Mooney 	if (cpu_mode == CPU_MODE_64BIT && seg != VM_REG_GUEST_FS &&
761*e0c0d44eSPatrick Mooney 	    seg != VM_REG_GUEST_GS) {
762*e0c0d44eSPatrick Mooney 		segbase = 0;
763*e0c0d44eSPatrick Mooney 	} else {
764*e0c0d44eSPatrick Mooney 		segbase = desc->base;
765*e0c0d44eSPatrick Mooney 	}
766*e0c0d44eSPatrick Mooney 
767*e0c0d44eSPatrick Mooney 	/*
768*e0c0d44eSPatrick Mooney 	 * Truncate 'firstoff' to the effective address size before adding
769*e0c0d44eSPatrick Mooney 	 * it to the segment base.
770*e0c0d44eSPatrick Mooney 	 */
771*e0c0d44eSPatrick Mooney 	firstoff &= size2mask(addrsize);
772*e0c0d44eSPatrick Mooney 	*gla = (segbase + firstoff) & size2mask(glasize);
773*e0c0d44eSPatrick Mooney 	return (0);
774*e0c0d44eSPatrick Mooney }
775*e0c0d44eSPatrick Mooney 
7764c87aefeSPatrick Mooney /*
7774c87aefeSPatrick Mooney  * Push an error code on the stack of the new task. This is needed if the
7784c87aefeSPatrick Mooney  * task switch was triggered by a hardware exception that causes an error
7794c87aefeSPatrick Mooney  * code to be saved (e.g. #PF).
7804c87aefeSPatrick Mooney  */
7814c87aefeSPatrick Mooney static int
7824c87aefeSPatrick Mooney push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
7834c87aefeSPatrick Mooney     int task_type, uint32_t errcode, int *faultptr)
7844c87aefeSPatrick Mooney {
7854c87aefeSPatrick Mooney 	struct iovec iov[2];
7864c87aefeSPatrick Mooney 	struct seg_desc seg_desc;
7874c87aefeSPatrick Mooney 	int stacksize, bytes, error;
7884c87aefeSPatrick Mooney 	uint64_t gla, cr0, rflags;
7894c87aefeSPatrick Mooney 	uint32_t esp;
7904c87aefeSPatrick Mooney 	uint16_t stacksel;
7914c87aefeSPatrick Mooney 
7924c87aefeSPatrick Mooney 	*faultptr = 0;
7934c87aefeSPatrick Mooney 
7944c87aefeSPatrick Mooney 	cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
7954c87aefeSPatrick Mooney 	rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
7964c87aefeSPatrick Mooney 	stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
7974c87aefeSPatrick Mooney 
7984c87aefeSPatrick Mooney 	error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc.base,
7994c87aefeSPatrick Mooney 	    &seg_desc.limit, &seg_desc.access);
8004c87aefeSPatrick Mooney 	assert(error == 0);
8014c87aefeSPatrick Mooney 
8024c87aefeSPatrick Mooney 	/*
8034c87aefeSPatrick Mooney 	 * Section "Error Code" in the Intel SDM vol 3: the error code is
8044c87aefeSPatrick Mooney 	 * pushed on the stack as a doubleword or word (depending on the
8054c87aefeSPatrick Mooney 	 * default interrupt, trap or task gate size).
8064c87aefeSPatrick Mooney 	 */
8074c87aefeSPatrick Mooney 	if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS)
8084c87aefeSPatrick Mooney 		bytes = 4;
8094c87aefeSPatrick Mooney 	else
8104c87aefeSPatrick Mooney 		bytes = 2;
8114c87aefeSPatrick Mooney 
8124c87aefeSPatrick Mooney 	/*
8134c87aefeSPatrick Mooney 	 * PUSH instruction from Intel SDM vol 2: the 'B' flag in the
8144c87aefeSPatrick Mooney 	 * stack-segment descriptor determines the size of the stack
8154c87aefeSPatrick Mooney 	 * pointer outside of 64-bit mode.
8164c87aefeSPatrick Mooney 	 */
8174c87aefeSPatrick Mooney 	if (SEG_DESC_DEF32(seg_desc.access))
8184c87aefeSPatrick Mooney 		stacksize = 4;
8194c87aefeSPatrick Mooney 	else
8204c87aefeSPatrick Mooney 		stacksize = 2;
8214c87aefeSPatrick Mooney 
8224c87aefeSPatrick Mooney 	esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
8234c87aefeSPatrick Mooney 	esp -= bytes;
8244c87aefeSPatrick Mooney 
825*e0c0d44eSPatrick Mooney 	if (calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
8264c87aefeSPatrick Mooney 	    &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
8274c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_SS, stacksel, 1);
8284c87aefeSPatrick Mooney 		*faultptr = 1;
8294c87aefeSPatrick Mooney 		return (0);
8304c87aefeSPatrick Mooney 	}
8314c87aefeSPatrick Mooney 
832*e0c0d44eSPatrick Mooney 	if (alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
8334c87aefeSPatrick Mooney 		vm_inject_ac(ctx, vcpu, 1);
8344c87aefeSPatrick Mooney 		*faultptr = 1;
8354c87aefeSPatrick Mooney 		return (0);
8364c87aefeSPatrick Mooney 	}
8374c87aefeSPatrick Mooney 
8384c87aefeSPatrick Mooney 	error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE,
8394c87aefeSPatrick Mooney 	    iov, nitems(iov), faultptr);
8404c87aefeSPatrick Mooney 	if (error || *faultptr)
8414c87aefeSPatrick Mooney 		return (error);
8424c87aefeSPatrick Mooney 
8434c87aefeSPatrick Mooney 	vm_copyout(ctx, vcpu, &errcode, iov, bytes);
8444c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_RSP, esp);
8454c87aefeSPatrick Mooney 	return (0);
8464c87aefeSPatrick Mooney }
8474c87aefeSPatrick Mooney 
8484c87aefeSPatrick Mooney /*
8494c87aefeSPatrick Mooney  * Evaluate return value from helper functions and potentially return to
8504c87aefeSPatrick Mooney  * the VM run loop.
8514c87aefeSPatrick Mooney  */
8524c87aefeSPatrick Mooney #define	CHKERR(error,fault)						\
8534c87aefeSPatrick Mooney 	do {								\
8544c87aefeSPatrick Mooney 		assert((error == 0) || (error == EFAULT));		\
8554c87aefeSPatrick Mooney 		if (error)						\
8564c87aefeSPatrick Mooney 			return (VMEXIT_ABORT);				\
8574c87aefeSPatrick Mooney 		else if (fault)						\
8584c87aefeSPatrick Mooney 			return (VMEXIT_CONTINUE);			\
8594c87aefeSPatrick Mooney 	} while (0)
8604c87aefeSPatrick Mooney 
8614c87aefeSPatrick Mooney int
8624c87aefeSPatrick Mooney vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
8634c87aefeSPatrick Mooney {
8644c87aefeSPatrick Mooney 	struct seg_desc nt;
8654c87aefeSPatrick Mooney 	struct tss32 oldtss, newtss;
8664c87aefeSPatrick Mooney 	struct vm_task_switch *task_switch;
8674c87aefeSPatrick Mooney 	struct vm_guest_paging *paging, sup_paging;
8684c87aefeSPatrick Mooney 	struct user_segment_descriptor nt_desc, ot_desc;
8694c87aefeSPatrick Mooney 	struct iovec nt_iov[2], ot_iov[2];
8704c87aefeSPatrick Mooney 	uint64_t cr0, ot_base;
8714c87aefeSPatrick Mooney 	uint32_t eip, ot_lim, access;
8724c87aefeSPatrick Mooney 	int error, ext, fault, minlimit, nt_type, ot_type, vcpu;
8734c87aefeSPatrick Mooney 	enum task_switch_reason reason;
8744c87aefeSPatrick Mooney 	uint16_t nt_sel, ot_sel;
8754c87aefeSPatrick Mooney 
8764c87aefeSPatrick Mooney 	task_switch = &vmexit->u.task_switch;
8774c87aefeSPatrick Mooney 	nt_sel = task_switch->tsssel;
8784c87aefeSPatrick Mooney 	ext = vmexit->u.task_switch.ext;
8794c87aefeSPatrick Mooney 	reason = vmexit->u.task_switch.reason;
8804c87aefeSPatrick Mooney 	paging = &vmexit->u.task_switch.paging;
8814c87aefeSPatrick Mooney 	vcpu = *pvcpu;
8824c87aefeSPatrick Mooney 
8834c87aefeSPatrick Mooney 	assert(paging->cpu_mode == CPU_MODE_PROTECTED);
8844c87aefeSPatrick Mooney 
8854c87aefeSPatrick Mooney 	/*
8864c87aefeSPatrick Mooney 	 * Calculate the instruction pointer to store in the old TSS.
8874c87aefeSPatrick Mooney 	 */
8884c87aefeSPatrick Mooney 	eip = vmexit->rip + vmexit->inst_length;
8894c87aefeSPatrick Mooney 
8904c87aefeSPatrick Mooney 	/*
8914c87aefeSPatrick Mooney 	 * Section 4.6, "Access Rights" in Intel SDM Vol 3.
8924c87aefeSPatrick Mooney 	 * The following page table accesses are implicitly supervisor mode:
8934c87aefeSPatrick Mooney 	 * - accesses to GDT or LDT to load segment descriptors
8944c87aefeSPatrick Mooney 	 * - accesses to the task state segment during task switch
8954c87aefeSPatrick Mooney 	 */
8964c87aefeSPatrick Mooney 	sup_paging = *paging;
8974c87aefeSPatrick Mooney 	sup_paging.cpl = 0;	/* implicit supervisor mode */
8984c87aefeSPatrick Mooney 
8994c87aefeSPatrick Mooney 	/* Fetch the new TSS descriptor */
9004c87aefeSPatrick Mooney 	error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc,
9014c87aefeSPatrick Mooney 	    &fault);
9024c87aefeSPatrick Mooney 	CHKERR(error, fault);
9034c87aefeSPatrick Mooney 
9044c87aefeSPatrick Mooney 	nt = usd_to_seg_desc(&nt_desc);
9054c87aefeSPatrick Mooney 
9064c87aefeSPatrick Mooney 	/* Verify the type of the new TSS */
9074c87aefeSPatrick Mooney 	nt_type = SEG_DESC_TYPE(nt.access);
9084c87aefeSPatrick Mooney 	if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS &&
9094c87aefeSPatrick Mooney 	    nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) {
9104c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
9114c87aefeSPatrick Mooney 		goto done;
9124c87aefeSPatrick Mooney 	}
9134c87aefeSPatrick Mooney 
9144c87aefeSPatrick Mooney 	/* TSS descriptor must have present bit set */
9154c87aefeSPatrick Mooney 	if (!SEG_DESC_PRESENT(nt.access)) {
9164c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_NP, nt_sel, ext);
9174c87aefeSPatrick Mooney 		goto done;
9184c87aefeSPatrick Mooney 	}
9194c87aefeSPatrick Mooney 
9204c87aefeSPatrick Mooney 	/*
9214c87aefeSPatrick Mooney 	 * TSS must have a minimum length of 104 bytes for a 32-bit TSS and
9224c87aefeSPatrick Mooney 	 * 44 bytes for a 16-bit TSS.
9234c87aefeSPatrick Mooney 	 */
9244c87aefeSPatrick Mooney 	if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS)
9254c87aefeSPatrick Mooney 		minlimit = 104 - 1;
9264c87aefeSPatrick Mooney 	else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS)
9274c87aefeSPatrick Mooney 		minlimit = 44 - 1;
9284c87aefeSPatrick Mooney 	else
9294c87aefeSPatrick Mooney 		minlimit = 0;
9304c87aefeSPatrick Mooney 
9314c87aefeSPatrick Mooney 	assert(minlimit > 0);
9324c87aefeSPatrick Mooney 	if (nt.limit < minlimit) {
9334c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
9344c87aefeSPatrick Mooney 		goto done;
9354c87aefeSPatrick Mooney 	}
9364c87aefeSPatrick Mooney 
9374c87aefeSPatrick Mooney 	/* TSS must be busy if task switch is due to IRET */
9384c87aefeSPatrick Mooney 	if (reason == TSR_IRET && !TSS_BUSY(nt_type)) {
9394c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
9404c87aefeSPatrick Mooney 		goto done;
9414c87aefeSPatrick Mooney 	}
9424c87aefeSPatrick Mooney 
9434c87aefeSPatrick Mooney 	/*
9444c87aefeSPatrick Mooney 	 * TSS must be available (not busy) if task switch reason is
9454c87aefeSPatrick Mooney 	 * CALL, JMP, exception or interrupt.
9464c87aefeSPatrick Mooney 	 */
9474c87aefeSPatrick Mooney 	if (reason != TSR_IRET && TSS_BUSY(nt_type)) {
9484c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_GP, nt_sel, ext);
9494c87aefeSPatrick Mooney 		goto done;
9504c87aefeSPatrick Mooney 	}
9514c87aefeSPatrick Mooney 
9524c87aefeSPatrick Mooney 	/* Fetch the new TSS */
9534c87aefeSPatrick Mooney 	error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1,
9544c87aefeSPatrick Mooney 	    PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault);
9554c87aefeSPatrick Mooney 	CHKERR(error, fault);
9564c87aefeSPatrick Mooney 	vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1);
9574c87aefeSPatrick Mooney 
9584c87aefeSPatrick Mooney 	/* Get the old TSS selector from the guest's task register */
9594c87aefeSPatrick Mooney 	ot_sel = GETREG(ctx, vcpu, VM_REG_GUEST_TR);
9604c87aefeSPatrick Mooney 	if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) {
9614c87aefeSPatrick Mooney 		/*
9624c87aefeSPatrick Mooney 		 * This might happen if a task switch was attempted without
9634c87aefeSPatrick Mooney 		 * ever loading the task register with LTR. In this case the
9644c87aefeSPatrick Mooney 		 * TR would contain the values from power-on:
9654c87aefeSPatrick Mooney 		 * (sel = 0, base = 0, limit = 0xffff).
9664c87aefeSPatrick Mooney 		 */
9674c87aefeSPatrick Mooney 		sel_exception(ctx, vcpu, IDT_TS, ot_sel, task_switch->ext);
9684c87aefeSPatrick Mooney 		goto done;
9694c87aefeSPatrick Mooney 	}
9704c87aefeSPatrick Mooney 
9714c87aefeSPatrick Mooney 	/* Get the old TSS base and limit from the guest's task register */
9724c87aefeSPatrick Mooney 	error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim,
9734c87aefeSPatrick Mooney 	    &access);
9744c87aefeSPatrick Mooney 	assert(error == 0);
9754c87aefeSPatrick Mooney 	assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access));
9764c87aefeSPatrick Mooney 	ot_type = SEG_DESC_TYPE(access);
9774c87aefeSPatrick Mooney 	assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
9784c87aefeSPatrick Mooney 
9794c87aefeSPatrick Mooney 	/* Fetch the old TSS descriptor */
9804c87aefeSPatrick Mooney 	error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc,
9814c87aefeSPatrick Mooney 	    &fault);
9824c87aefeSPatrick Mooney 	CHKERR(error, fault);
9834c87aefeSPatrick Mooney 
9844c87aefeSPatrick Mooney 	/* Get the old TSS */
9854c87aefeSPatrick Mooney 	error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1,
9864c87aefeSPatrick Mooney 	    PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault);
9874c87aefeSPatrick Mooney 	CHKERR(error, fault);
9884c87aefeSPatrick Mooney 	vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1);
9894c87aefeSPatrick Mooney 
9904c87aefeSPatrick Mooney 	/*
9914c87aefeSPatrick Mooney 	 * Clear the busy bit in the old TSS descriptor if the task switch
9924c87aefeSPatrick Mooney 	 * due to an IRET or JMP instruction.
9934c87aefeSPatrick Mooney 	 */
9944c87aefeSPatrick Mooney 	if (reason == TSR_IRET || reason == TSR_JMP) {
9954c87aefeSPatrick Mooney 		ot_desc.sd_type &= ~0x2;
9964c87aefeSPatrick Mooney 		error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel,
9974c87aefeSPatrick Mooney 		    &ot_desc, &fault);
9984c87aefeSPatrick Mooney 		CHKERR(error, fault);
9994c87aefeSPatrick Mooney 	}
10004c87aefeSPatrick Mooney 
10014c87aefeSPatrick Mooney 	if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
1002154972afSPatrick Mooney 		EPRINTLN("Task switch to 16-bit TSS not supported");
10034c87aefeSPatrick Mooney 		return (VMEXIT_ABORT);
10044c87aefeSPatrick Mooney 	}
10054c87aefeSPatrick Mooney 
10064c87aefeSPatrick Mooney 	/* Save processor state in old TSS */
10074c87aefeSPatrick Mooney 	tss32_save(ctx, vcpu, task_switch, eip, &oldtss, ot_iov);
10084c87aefeSPatrick Mooney 
10094c87aefeSPatrick Mooney 	/*
10104c87aefeSPatrick Mooney 	 * If the task switch was triggered for any reason other than IRET
10114c87aefeSPatrick Mooney 	 * then set the busy bit in the new TSS descriptor.
10124c87aefeSPatrick Mooney 	 */
10134c87aefeSPatrick Mooney 	if (reason != TSR_IRET) {
10144c87aefeSPatrick Mooney 		nt_desc.sd_type |= 0x2;
10154c87aefeSPatrick Mooney 		error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel,
10164c87aefeSPatrick Mooney 		    &nt_desc, &fault);
10174c87aefeSPatrick Mooney 		CHKERR(error, fault);
10184c87aefeSPatrick Mooney 	}
10194c87aefeSPatrick Mooney 
10204c87aefeSPatrick Mooney 	/* Update task register to point at the new TSS */
10214c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_TR, nt_sel);
10224c87aefeSPatrick Mooney 
10234c87aefeSPatrick Mooney 	/* Update the hidden descriptor state of the task register */
10244c87aefeSPatrick Mooney 	nt = usd_to_seg_desc(&nt_desc);
10254c87aefeSPatrick Mooney 	update_seg_desc(ctx, vcpu, VM_REG_GUEST_TR, &nt);
10264c87aefeSPatrick Mooney 
10274c87aefeSPatrick Mooney 	/* Set CR0.TS */
10284c87aefeSPatrick Mooney 	cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
10294c87aefeSPatrick Mooney 	SETREG(ctx, vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS);
10304c87aefeSPatrick Mooney 
10314c87aefeSPatrick Mooney 	/*
10324c87aefeSPatrick Mooney 	 * We are now committed to the task switch. Any exceptions encountered
10334c87aefeSPatrick Mooney 	 * after this point will be handled in the context of the new task and
10344c87aefeSPatrick Mooney 	 * the saved instruction pointer will belong to the new task.
10354c87aefeSPatrick Mooney 	 */
10364c87aefeSPatrick Mooney 	error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, newtss.tss_eip);
10374c87aefeSPatrick Mooney 	assert(error == 0);
10384c87aefeSPatrick Mooney 
10394c87aefeSPatrick Mooney 	/* Load processor state from new TSS */
10404c87aefeSPatrick Mooney 	error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov,
10414c87aefeSPatrick Mooney 	    &fault);
10424c87aefeSPatrick Mooney 	CHKERR(error, fault);
10434c87aefeSPatrick Mooney 
10444c87aefeSPatrick Mooney 	/*
10454c87aefeSPatrick Mooney 	 * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
10464c87aefeSPatrick Mooney 	 * caused an error code to be generated, this error code is copied
10474c87aefeSPatrick Mooney 	 * to the stack of the new task.
10484c87aefeSPatrick Mooney 	 */
10494c87aefeSPatrick Mooney 	if (task_switch->errcode_valid) {
10504c87aefeSPatrick Mooney 		assert(task_switch->ext);
10514c87aefeSPatrick Mooney 		assert(task_switch->reason == TSR_IDT_GATE);
10524c87aefeSPatrick Mooney 		error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type,
10534c87aefeSPatrick Mooney 		    task_switch->errcode, &fault);
10544c87aefeSPatrick Mooney 		CHKERR(error, fault);
10554c87aefeSPatrick Mooney 	}
10564c87aefeSPatrick Mooney 
10574c87aefeSPatrick Mooney 	/*
10584c87aefeSPatrick Mooney 	 * Treatment of virtual-NMI blocking if NMI is delivered through
10594c87aefeSPatrick Mooney 	 * a task gate.
10604c87aefeSPatrick Mooney 	 *
10614c87aefeSPatrick Mooney 	 * Section "Architectural State Before A VM Exit", Intel SDM, Vol3:
10624c87aefeSPatrick Mooney 	 * If the virtual NMIs VM-execution control is 1, VM entry injects
10634c87aefeSPatrick Mooney 	 * an NMI, and delivery of the NMI causes a task switch that causes
10644c87aefeSPatrick Mooney 	 * a VM exit, virtual-NMI blocking is in effect before the VM exit
10654c87aefeSPatrick Mooney 	 * commences.
10664c87aefeSPatrick Mooney 	 *
10674c87aefeSPatrick Mooney 	 * Thus, virtual-NMI blocking is in effect at the time of the task
10684c87aefeSPatrick Mooney 	 * switch VM exit.
10694c87aefeSPatrick Mooney 	 */
10704c87aefeSPatrick Mooney 
10714c87aefeSPatrick Mooney 	/*
10724c87aefeSPatrick Mooney 	 * Treatment of virtual-NMI unblocking on IRET from NMI handler task.
10734c87aefeSPatrick Mooney 	 *
10744c87aefeSPatrick Mooney 	 * Section "Changes to Instruction Behavior in VMX Non-Root Operation"
10754c87aefeSPatrick Mooney 	 * If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking.
10764c87aefeSPatrick Mooney 	 * This unblocking of virtual-NMI occurs even if IRET causes a fault.
10774c87aefeSPatrick Mooney 	 *
10784c87aefeSPatrick Mooney 	 * Thus, virtual-NMI blocking is cleared at the time of the task switch
10794c87aefeSPatrick Mooney 	 * VM exit.
10804c87aefeSPatrick Mooney 	 */
10814c87aefeSPatrick Mooney 
10824c87aefeSPatrick Mooney 	/*
10834c87aefeSPatrick Mooney 	 * If the task switch was triggered by an event delivered through
10844c87aefeSPatrick Mooney 	 * the IDT then extinguish the pending event from the vcpu's
10854c87aefeSPatrick Mooney 	 * exitintinfo.
10864c87aefeSPatrick Mooney 	 */
10874c87aefeSPatrick Mooney 	if (task_switch->reason == TSR_IDT_GATE) {
10884c87aefeSPatrick Mooney 		error = vm_set_intinfo(ctx, vcpu, 0);
10894c87aefeSPatrick Mooney 		assert(error == 0);
10904c87aefeSPatrick Mooney 	}
10914c87aefeSPatrick Mooney 
10924c87aefeSPatrick Mooney 	/*
10934c87aefeSPatrick Mooney 	 * XXX should inject debug exception if 'T' bit is 1
10944c87aefeSPatrick Mooney 	 */
10954c87aefeSPatrick Mooney done:
10964c87aefeSPatrick Mooney 	return (VMEXIT_CONTINUE);
10974c87aefeSPatrick Mooney }
1098