xref: /illumos-gate/usr/src/uts/intel/io/vmm/intel/vmx.c (revision 54cf5b63)
1bf21cd93STycho Nightingale /*-
24c87aefeSPatrick Mooney  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
34c87aefeSPatrick Mooney  *
4bf21cd93STycho Nightingale  * Copyright (c) 2011 NetApp, Inc.
5bf21cd93STycho Nightingale  * All rights reserved.
64c87aefeSPatrick Mooney  * Copyright (c) 2018 Joyent, Inc.
7bf21cd93STycho Nightingale  *
8bf21cd93STycho Nightingale  * Redistribution and use in source and binary forms, with or without
9bf21cd93STycho Nightingale  * modification, are permitted provided that the following conditions
10bf21cd93STycho Nightingale  * are met:
11bf21cd93STycho Nightingale  * 1. Redistributions of source code must retain the above copyright
12bf21cd93STycho Nightingale  *    notice, this list of conditions and the following disclaimer.
13bf21cd93STycho Nightingale  * 2. Redistributions in binary form must reproduce the above copyright
14bf21cd93STycho Nightingale  *    notice, this list of conditions and the following disclaimer in the
15bf21cd93STycho Nightingale  *    documentation and/or other materials provided with the distribution.
16bf21cd93STycho Nightingale  *
17bf21cd93STycho Nightingale  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
27bf21cd93STycho Nightingale  * SUCH DAMAGE.
28bf21cd93STycho Nightingale  *
294c87aefeSPatrick Mooney  * $FreeBSD$
30bf21cd93STycho Nightingale  */
31bf21cd93STycho Nightingale /*
32bf21cd93STycho Nightingale  * This file and its contents are supplied under the terms of the
33bf21cd93STycho Nightingale  * Common Development and Distribution License ("CDDL"), version 1.0.
34bf21cd93STycho Nightingale  * You may only use this file in accordance with the terms of version
35bf21cd93STycho Nightingale  * 1.0 of the CDDL.
36bf21cd93STycho Nightingale  *
37bf21cd93STycho Nightingale  * A full copy of the text of the CDDL should have accompanied this
38bf21cd93STycho Nightingale  * source.  A copy of the CDDL is also available via the Internet at
39bf21cd93STycho Nightingale  * http://www.illumos.org/license/CDDL.
40bf21cd93STycho Nightingale  *
41bf21cd93STycho Nightingale  * Copyright 2015 Pluribus Networks Inc.
424c87aefeSPatrick Mooney  * Copyright 2018 Joyent, Inc.
4383b49c54SPatrick Mooney  * Copyright 2022 Oxide Computer Company
44bf21cd93STycho Nightingale  */
45bf21cd93STycho Nightingale 
46bf21cd93STycho Nightingale #include <sys/cdefs.h>
474c87aefeSPatrick Mooney __FBSDID("$FreeBSD$");
48bf21cd93STycho Nightingale 
49bf21cd93STycho Nightingale #include <sys/param.h>
50bf21cd93STycho Nightingale #include <sys/systm.h>
51bf21cd93STycho Nightingale #include <sys/kernel.h>
528130f8e1SPatrick Mooney #include <sys/kmem.h>
53bf21cd93STycho Nightingale #include <sys/pcpu.h>
54bf21cd93STycho Nightingale #include <sys/proc.h>
55bf21cd93STycho Nightingale #include <sys/sysctl.h>
56bf21cd93STycho Nightingale 
574c87aefeSPatrick Mooney #include <sys/x86_archext.h>
584c87aefeSPatrick Mooney #include <sys/smp_impldefs.h>
594c87aefeSPatrick Mooney #include <sys/smt.h>
604c87aefeSPatrick Mooney #include <sys/hma.h>
614c87aefeSPatrick Mooney #include <sys/trap.h>
620153d828SPatrick Mooney #include <sys/archsystm.h>
634c87aefeSPatrick Mooney 
64bf21cd93STycho Nightingale #include <machine/psl.h>
65bf21cd93STycho Nightingale #include <machine/cpufunc.h>
66bf21cd93STycho Nightingale #include <machine/md_var.h>
674c87aefeSPatrick Mooney #include <machine/reg.h>
68bf21cd93STycho Nightingale #include <machine/segments.h>
69bf21cd93STycho Nightingale #include <machine/specialreg.h>
70bf21cd93STycho Nightingale #include <machine/vmparam.h>
71cf409e3fSDan Cross #include <sys/vmm_vm.h>
72d2f938fdSPatrick Mooney #include <sys/vmm_kernel.h>
73bf21cd93STycho Nightingale 
74bf21cd93STycho Nightingale #include <machine/vmm.h>
75bf21cd93STycho Nightingale #include <machine/vmm_dev.h>
76e0c0d44eSPatrick Mooney #include <sys/vmm_instruction_emul.h>
77bf21cd93STycho Nightingale #include "vmm_lapic.h"
78bf21cd93STycho Nightingale #include "vmm_host.h"
79bf21cd93STycho Nightingale #include "vmm_ioport.h"
80bf21cd93STycho Nightingale #include "vmm_stat.h"
81bf21cd93STycho Nightingale #include "vatpic.h"
82bf21cd93STycho Nightingale #include "vlapic.h"
83bf21cd93STycho Nightingale #include "vlapic_priv.h"
84bf21cd93STycho Nightingale 
854c87aefeSPatrick Mooney #include "vmcs.h"
86bf21cd93STycho Nightingale #include "vmx.h"
87bf21cd93STycho Nightingale #include "vmx_msr.h"
88bf21cd93STycho Nightingale #include "vmx_controls.h"
89bf21cd93STycho Nightingale 
90bf21cd93STycho Nightingale #define	PINBASED_CTLS_ONE_SETTING					\
91bf21cd93STycho Nightingale 	(PINBASED_EXTINT_EXITING	|				\
922699b94cSPatrick Mooney 	PINBASED_NMI_EXITING		|				\
932699b94cSPatrick Mooney 	PINBASED_VIRTUAL_NMI)
94bf21cd93STycho Nightingale #define	PINBASED_CTLS_ZERO_SETTING	0
95bf21cd93STycho Nightingale 
962699b94cSPatrick Mooney #define	PROCBASED_CTLS_WINDOW_SETTING					\
97bf21cd93STycho Nightingale 	(PROCBASED_INT_WINDOW_EXITING	|				\
982699b94cSPatrick Mooney 	PROCBASED_NMI_WINDOW_EXITING)
99bf21cd93STycho Nightingale 
1004c87aefeSPatrick Mooney /* We consider TSC offset a necessity for unsynched TSC handling */
10184971882SPatrick Mooney #define	PROCBASED_CTLS_ONE_SETTING					\
102bf21cd93STycho Nightingale 	(PROCBASED_SECONDARY_CONTROLS	|				\
1032699b94cSPatrick Mooney 	PROCBASED_TSC_OFFSET		|				\
1042699b94cSPatrick Mooney 	PROCBASED_MWAIT_EXITING		|				\
1052699b94cSPatrick Mooney 	PROCBASED_MONITOR_EXITING	|				\
1062699b94cSPatrick Mooney 	PROCBASED_IO_EXITING		|				\
1072699b94cSPatrick Mooney 	PROCBASED_MSR_BITMAPS		|				\
1082699b94cSPatrick Mooney 	PROCBASED_CTLS_WINDOW_SETTING	|				\
1092699b94cSPatrick Mooney 	PROCBASED_CR8_LOAD_EXITING	|				\
1102699b94cSPatrick Mooney 	PROCBASED_CR8_STORE_EXITING)
1114c87aefeSPatrick Mooney 
112bf21cd93STycho Nightingale #define	PROCBASED_CTLS_ZERO_SETTING	\
113bf21cd93STycho Nightingale 	(PROCBASED_CR3_LOAD_EXITING |	\
114bf21cd93STycho Nightingale 	PROCBASED_CR3_STORE_EXITING |	\
115bf21cd93STycho Nightingale 	PROCBASED_IO_BITMAPS)
116bf21cd93STycho Nightingale 
117c3ae3afaSPatrick Mooney /*
118c3ae3afaSPatrick Mooney  * EPT and Unrestricted Guest are considered necessities.  The latter is not a
119c3ae3afaSPatrick Mooney  * requirement on FreeBSD, where grub2-bhyve is used to load guests directly
120c3ae3afaSPatrick Mooney  * without a bootrom starting in real mode.
121c3ae3afaSPatrick Mooney  */
122c3ae3afaSPatrick Mooney #define	PROCBASED_CTLS2_ONE_SETTING		\
123c3ae3afaSPatrick Mooney 	(PROCBASED2_ENABLE_EPT |		\
124c3ae3afaSPatrick Mooney 	PROCBASED2_UNRESTRICTED_GUEST)
125bf21cd93STycho Nightingale #define	PROCBASED_CTLS2_ZERO_SETTING	0
126bf21cd93STycho Nightingale 
127bf21cd93STycho Nightingale #define	VM_EXIT_CTLS_ONE_SETTING					\
1284c87aefeSPatrick Mooney 	(VM_EXIT_SAVE_DEBUG_CONTROLS		|			\
1294c87aefeSPatrick Mooney 	VM_EXIT_HOST_LMA			|			\
1304c87aefeSPatrick Mooney 	VM_EXIT_LOAD_PAT			|			\
131bf21cd93STycho Nightingale 	VM_EXIT_SAVE_EFER			|			\
132bf21cd93STycho Nightingale 	VM_EXIT_LOAD_EFER			|			\
1334c87aefeSPatrick Mooney 	VM_EXIT_ACKNOWLEDGE_INTERRUPT)
134bf21cd93STycho Nightingale 
1354c87aefeSPatrick Mooney #define	VM_EXIT_CTLS_ZERO_SETTING	0
136bf21cd93STycho Nightingale 
1374c87aefeSPatrick Mooney #define	VM_ENTRY_CTLS_ONE_SETTING					\
1384c87aefeSPatrick Mooney 	(VM_ENTRY_LOAD_DEBUG_CONTROLS		|			\
1394c87aefeSPatrick Mooney 	VM_ENTRY_LOAD_EFER)
140bf21cd93STycho Nightingale 
141bf21cd93STycho Nightingale #define	VM_ENTRY_CTLS_ZERO_SETTING					\
1424c87aefeSPatrick Mooney 	(VM_ENTRY_INTO_SMM			|			\
143bf21cd93STycho Nightingale 	VM_ENTRY_DEACTIVATE_DUAL_MONITOR)
144bf21cd93STycho Nightingale 
1450153d828SPatrick Mooney /*
1460153d828SPatrick Mooney  * Cover the EPT capabilities used by bhyve at present:
1470153d828SPatrick Mooney  * - 4-level page walks
1480153d828SPatrick Mooney  * - write-back memory type
1490153d828SPatrick Mooney  * - INVEPT operations (all types)
1500153d828SPatrick Mooney  * - INVVPID operations (single-context only)
1510153d828SPatrick Mooney  */
1520153d828SPatrick Mooney #define	EPT_CAPS_REQUIRED			\
1530153d828SPatrick Mooney 	(IA32_VMX_EPT_VPID_PWL4 |		\
1540153d828SPatrick Mooney 	IA32_VMX_EPT_VPID_TYPE_WB |		\
1550153d828SPatrick Mooney 	IA32_VMX_EPT_VPID_INVEPT |		\
1560153d828SPatrick Mooney 	IA32_VMX_EPT_VPID_INVEPT_SINGLE |	\
1570153d828SPatrick Mooney 	IA32_VMX_EPT_VPID_INVEPT_ALL |		\
1580153d828SPatrick Mooney 	IA32_VMX_EPT_VPID_INVVPID |		\
1590153d828SPatrick Mooney 	IA32_VMX_EPT_VPID_INVVPID_SINGLE)
1600153d828SPatrick Mooney 
161bf21cd93STycho Nightingale #define	HANDLED		1
162bf21cd93STycho Nightingale #define	UNHANDLED	0
163bf21cd93STycho Nightingale 
164bf21cd93STycho Nightingale SYSCTL_DECL(_hw_vmm);
165154972afSPatrick Mooney SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
166154972afSPatrick Mooney     NULL);
167bf21cd93STycho Nightingale 
168bf21cd93STycho Nightingale static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2;
169bf21cd93STycho Nightingale static uint32_t exit_ctls, entry_ctls;
170bf21cd93STycho Nightingale 
171bf21cd93STycho Nightingale static uint64_t cr0_ones_mask, cr0_zeros_mask;
172bf21cd93STycho Nightingale 
173bf21cd93STycho Nightingale static uint64_t cr4_ones_mask, cr4_zeros_mask;
174bf21cd93STycho Nightingale 
175bf21cd93STycho Nightingale static int vmx_initialized;
176bf21cd93STycho Nightingale 
1772699b94cSPatrick Mooney /* Do not flush RSB upon vmexit */
178007ca332SPatrick Mooney static int no_flush_rsb;
179007ca332SPatrick Mooney 
180bf21cd93STycho Nightingale /*
181bf21cd93STycho Nightingale  * Optional capabilities
182bf21cd93STycho Nightingale  */
1834c87aefeSPatrick Mooney 
1842699b94cSPatrick Mooney /* HLT triggers a VM-exit */
185bf21cd93STycho Nightingale static int cap_halt_exit;
1864c87aefeSPatrick Mooney 
1872699b94cSPatrick Mooney /* PAUSE triggers a VM-exit */
188bf21cd93STycho Nightingale static int cap_pause_exit;
1894c87aefeSPatrick Mooney 
1902699b94cSPatrick Mooney /* Monitor trap flag */
191bf21cd93STycho Nightingale static int cap_monitor_trap;
1924c87aefeSPatrick Mooney 
1932699b94cSPatrick Mooney /* Guests are allowed to use INVPCID */
194bf21cd93STycho Nightingale static int cap_invpcid;
195bf21cd93STycho Nightingale 
196c3ae3afaSPatrick Mooney /* Extra capabilities (VMX_CAP_*) beyond the minimum */
197c3ae3afaSPatrick Mooney static enum vmx_caps vmx_capabilities;
198bf21cd93STycho Nightingale 
1992699b94cSPatrick Mooney /* APICv posted interrupt vector */
2004c87aefeSPatrick Mooney static int pirvec = -1;
201bf21cd93STycho Nightingale 
2022699b94cSPatrick Mooney static uint_t vpid_alloc_failed;
203bf21cd93STycho Nightingale 
204154972afSPatrick Mooney int guest_l1d_flush;
205154972afSPatrick Mooney int guest_l1d_flush_sw;
2064c87aefeSPatrick Mooney 
207007ca332SPatrick Mooney /* MSR save region is composed of an array of 'struct msr_entry' */
208007ca332SPatrick Mooney struct msr_entry {
209007ca332SPatrick Mooney 	uint32_t	index;
210007ca332SPatrick Mooney 	uint32_t	reserved;
211007ca332SPatrick Mooney 	uint64_t	val;
212007ca332SPatrick Mooney };
213007ca332SPatrick Mooney 
2144c87aefeSPatrick Mooney static struct msr_entry msr_load_list[1] __aligned(16);
2154c87aefeSPatrick Mooney 
2164c87aefeSPatrick Mooney /*
2174c87aefeSPatrick Mooney  * The definitions of SDT probes for VMX.
2184c87aefeSPatrick Mooney  */
2194c87aefeSPatrick Mooney 
2202699b94cSPatrick Mooney /* BEGIN CSTYLED */
2214c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, entry,
2224c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2234c87aefeSPatrick Mooney 
2244c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, taskswitch,
2254c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *", "struct vm_task_switch *");
2264c87aefeSPatrick Mooney 
2274c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, craccess,
2284c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *", "uint64_t");
2294c87aefeSPatrick Mooney 
2304c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, rdmsr,
2314c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *", "uint32_t");
2324c87aefeSPatrick Mooney 
2334c87aefeSPatrick Mooney SDT_PROBE_DEFINE5(vmm, vmx, exit, wrmsr,
2344c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *", "uint32_t", "uint64_t");
2354c87aefeSPatrick Mooney 
2364c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, halt,
2374c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2384c87aefeSPatrick Mooney 
2394c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, mtrap,
2404c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2414c87aefeSPatrick Mooney 
2424c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, pause,
2434c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2444c87aefeSPatrick Mooney 
2454c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, intrwindow,
2464c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2474c87aefeSPatrick Mooney 
2484c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, interrupt,
2494c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *", "uint32_t");
2504c87aefeSPatrick Mooney 
2514c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, nmiwindow,
2524c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2534c87aefeSPatrick Mooney 
2544c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, inout,
2554c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2564c87aefeSPatrick Mooney 
2574c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, cpuid,
2584c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2594c87aefeSPatrick Mooney 
2604c87aefeSPatrick Mooney SDT_PROBE_DEFINE5(vmm, vmx, exit, exception,
2614c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *", "uint32_t", "int");
2624c87aefeSPatrick Mooney 
2634c87aefeSPatrick Mooney SDT_PROBE_DEFINE5(vmm, vmx, exit, nestedfault,
2644c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *", "uint64_t", "uint64_t");
2654c87aefeSPatrick Mooney 
2664c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, mmiofault,
2674c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *", "uint64_t");
2684c87aefeSPatrick Mooney 
2694c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, eoi,
2704c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2714c87aefeSPatrick Mooney 
2724c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, apicaccess,
2734c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2744c87aefeSPatrick Mooney 
2754c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, apicwrite,
2764c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *", "struct vlapic *");
2774c87aefeSPatrick Mooney 
2784c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, xsetbv,
2794c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2804c87aefeSPatrick Mooney 
2814c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, monitor,
2824c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2834c87aefeSPatrick Mooney 
2844c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, mwait,
2854c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2864c87aefeSPatrick Mooney 
2874c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, vminsn,
2884c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *");
2894c87aefeSPatrick Mooney 
2904c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, unknown,
2914c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *", "uint32_t");
2924c87aefeSPatrick Mooney 
2934c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, return,
2944c87aefeSPatrick Mooney     "struct vmx *", "int", "struct vm_exit *", "int");
2952699b94cSPatrick Mooney /* END CSTYLED */
2964c87aefeSPatrick Mooney 
297bf21cd93STycho Nightingale static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc);
298bf21cd93STycho Nightingale static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval);
299007ca332SPatrick Mooney static void vmx_apply_tsc_adjust(struct vmx *, int);
300c74a40a5SPatrick Mooney static void vmx_apicv_sync_tmr(struct vlapic *vlapic);
301c74a40a5SPatrick Mooney static void vmx_tpr_shadow_enter(struct vlapic *vlapic);
302c74a40a5SPatrick Mooney static void vmx_tpr_shadow_exit(struct vlapic *vlapic);
303bf21cd93STycho Nightingale 
3046b641d7aSPatrick Mooney static void
vmx_allow_x2apic_msrs(struct vmx * vmx,int vcpuid)3056b641d7aSPatrick Mooney vmx_allow_x2apic_msrs(struct vmx *vmx, int vcpuid)
306bf21cd93STycho Nightingale {
307bf21cd93STycho Nightingale 	/*
308bf21cd93STycho Nightingale 	 * Allow readonly access to the following x2APIC MSRs from the guest.
309bf21cd93STycho Nightingale 	 */
3106b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_ID);
3116b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_VERSION);
3126b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_LDR);
3136b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_SVR);
3146b641d7aSPatrick Mooney 
3156b641d7aSPatrick Mooney 	for (uint_t i = 0; i < 8; i++) {
3166b641d7aSPatrick Mooney 		guest_msr_ro(vmx, vcpuid, MSR_APIC_ISR0 + i);
3176b641d7aSPatrick Mooney 		guest_msr_ro(vmx, vcpuid, MSR_APIC_TMR0 + i);
3186b641d7aSPatrick Mooney 		guest_msr_ro(vmx, vcpuid, MSR_APIC_IRR0 + i);
3196b641d7aSPatrick Mooney 	}
3206b641d7aSPatrick Mooney 
3216b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_ESR);
3226b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_TIMER);
3236b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_THERMAL);
3246b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_PCINT);
3256b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_LINT0);
3266b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_LINT1);
3276b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_ERROR);
3286b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_ICR_TIMER);
3296b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_DCR_TIMER);
3306b641d7aSPatrick Mooney 	guest_msr_ro(vmx, vcpuid, MSR_APIC_ICR);
331bf21cd93STycho Nightingale 
332bf21cd93STycho Nightingale 	/*
333bf21cd93STycho Nightingale 	 * Allow TPR, EOI and SELF_IPI MSRs to be read and written by the guest.
334bf21cd93STycho Nightingale 	 *
335bf21cd93STycho Nightingale 	 * These registers get special treatment described in the section
336bf21cd93STycho Nightingale 	 * "Virtualizing MSR-Based APIC Accesses".
337bf21cd93STycho Nightingale 	 */
3386b641d7aSPatrick Mooney 	guest_msr_rw(vmx, vcpuid, MSR_APIC_TPR);
3396b641d7aSPatrick Mooney 	guest_msr_rw(vmx, vcpuid, MSR_APIC_EOI);
3406b641d7aSPatrick Mooney 	guest_msr_rw(vmx, vcpuid, MSR_APIC_SELF_IPI);
341bf21cd93STycho Nightingale }
342bf21cd93STycho Nightingale 
3432699b94cSPatrick Mooney static ulong_t
vmx_fix_cr0(ulong_t cr0)3442699b94cSPatrick Mooney vmx_fix_cr0(ulong_t cr0)
345bf21cd93STycho Nightingale {
346bf21cd93STycho Nightingale 	return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask);
347bf21cd93STycho Nightingale }
348bf21cd93STycho Nightingale 
349bf0dcd3fSPatrick Mooney /*
350bf0dcd3fSPatrick Mooney  * Given a live (VMCS-active) cr0 value, and its shadow counterpart, calculate
351bf0dcd3fSPatrick Mooney  * the value observable from the guest.
352bf0dcd3fSPatrick Mooney  */
353bf0dcd3fSPatrick Mooney static ulong_t
vmx_unshadow_cr0(uint64_t cr0,uint64_t shadow)354bf0dcd3fSPatrick Mooney vmx_unshadow_cr0(uint64_t cr0, uint64_t shadow)
355bf0dcd3fSPatrick Mooney {
356bf0dcd3fSPatrick Mooney 	return ((cr0 & ~cr0_ones_mask) |
357bf0dcd3fSPatrick Mooney 	    (shadow & (cr0_zeros_mask | cr0_ones_mask)));
358bf0dcd3fSPatrick Mooney }
359bf0dcd3fSPatrick Mooney 
3602699b94cSPatrick Mooney static ulong_t
vmx_fix_cr4(ulong_t cr4)3612699b94cSPatrick Mooney vmx_fix_cr4(ulong_t cr4)
362bf21cd93STycho Nightingale {
363bf21cd93STycho Nightingale 	return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask);
364bf21cd93STycho Nightingale }
365bf21cd93STycho Nightingale 
366bf0dcd3fSPatrick Mooney /*
367bf0dcd3fSPatrick Mooney  * Given a live (VMCS-active) cr4 value, and its shadow counterpart, calculate
368bf0dcd3fSPatrick Mooney  * the value observable from the guest.
369bf0dcd3fSPatrick Mooney  */
370bf0dcd3fSPatrick Mooney static ulong_t
vmx_unshadow_cr4(uint64_t cr4,uint64_t shadow)371bf0dcd3fSPatrick Mooney vmx_unshadow_cr4(uint64_t cr4, uint64_t shadow)
372bf0dcd3fSPatrick Mooney {
373bf0dcd3fSPatrick Mooney 	return ((cr4 & ~cr4_ones_mask) |
374bf0dcd3fSPatrick Mooney 	    (shadow & (cr4_zeros_mask | cr4_ones_mask)));
375bf0dcd3fSPatrick Mooney }
376bf0dcd3fSPatrick Mooney 
377bf21cd93STycho Nightingale static void
vpid_free(int vpid)378bf21cd93STycho Nightingale vpid_free(int vpid)
379bf21cd93STycho Nightingale {
380bf21cd93STycho Nightingale 	if (vpid < 0 || vpid > 0xffff)
381bf21cd93STycho Nightingale 		panic("vpid_free: invalid vpid %d", vpid);
382bf21cd93STycho Nightingale 
383bf21cd93STycho Nightingale 	/*
384bf21cd93STycho Nightingale 	 * VPIDs [0,VM_MAXCPU] are special and are not allocated from
385bf21cd93STycho Nightingale 	 * the unit number allocator.
386bf21cd93STycho Nightingale 	 */
387bf21cd93STycho Nightingale 
388bf21cd93STycho Nightingale 	if (vpid > VM_MAXCPU)
3894c87aefeSPatrick Mooney 		hma_vmx_vpid_free((uint16_t)vpid);
390bf21cd93STycho Nightingale }
391bf21cd93STycho Nightingale 
392bf21cd93STycho Nightingale static void
vpid_alloc(uint16_t * vpid,int num)393bf21cd93STycho Nightingale vpid_alloc(uint16_t *vpid, int num)
394bf21cd93STycho Nightingale {
395bf21cd93STycho Nightingale 	int i, x;
396bf21cd93STycho Nightingale 
397bf21cd93STycho Nightingale 	if (num <= 0 || num > VM_MAXCPU)
398bf21cd93STycho Nightingale 		panic("invalid number of vpids requested: %d", num);
399bf21cd93STycho Nightingale 
400bf21cd93STycho Nightingale 	/*
401bf21cd93STycho Nightingale 	 * If the "enable vpid" execution control is not enabled then the
402bf21cd93STycho Nightingale 	 * VPID is required to be 0 for all vcpus.
403bf21cd93STycho Nightingale 	 */
404bf21cd93STycho Nightingale 	if ((procbased_ctls2 & PROCBASED2_ENABLE_VPID) == 0) {
405bf21cd93STycho Nightingale 		for (i = 0; i < num; i++)
406bf21cd93STycho Nightingale 			vpid[i] = 0;
407bf21cd93STycho Nightingale 		return;
408bf21cd93STycho Nightingale 	}
409bf21cd93STycho Nightingale 
410bf21cd93STycho Nightingale 	/*
411bf21cd93STycho Nightingale 	 * Allocate a unique VPID for each vcpu from the unit number allocator.
412bf21cd93STycho Nightingale 	 */
413bf21cd93STycho Nightingale 	for (i = 0; i < num; i++) {
4144c87aefeSPatrick Mooney 		uint16_t tmp;
4154c87aefeSPatrick Mooney 
4164c87aefeSPatrick Mooney 		tmp = hma_vmx_vpid_alloc();
4174c87aefeSPatrick Mooney 		x = (tmp == 0) ? -1 : tmp;
418f703164bSPatrick Mooney 
419bf21cd93STycho Nightingale 		if (x == -1)
420bf21cd93STycho Nightingale 			break;
421bf21cd93STycho Nightingale 		else
422bf21cd93STycho Nightingale 			vpid[i] = x;
423bf21cd93STycho Nightingale 	}
424bf21cd93STycho Nightingale 
425bf21cd93STycho Nightingale 	if (i < num) {
426bf21cd93STycho Nightingale 		atomic_add_int(&vpid_alloc_failed, 1);
427bf21cd93STycho Nightingale 
428bf21cd93STycho Nightingale 		/*
429bf21cd93STycho Nightingale 		 * If the unit number allocator does not have enough unique
430bf21cd93STycho Nightingale 		 * VPIDs then we need to allocate from the [1,VM_MAXCPU] range.
431bf21cd93STycho Nightingale 		 *
432bf21cd93STycho Nightingale 		 * These VPIDs are not be unique across VMs but this does not
433bf21cd93STycho Nightingale 		 * affect correctness because the combined mappings are also
434bf21cd93STycho Nightingale 		 * tagged with the EP4TA which is unique for each VM.
435bf21cd93STycho Nightingale 		 *
436bf21cd93STycho Nightingale 		 * It is still sub-optimal because the invvpid will invalidate
437bf21cd93STycho Nightingale 		 * combined mappings for a particular VPID across all EP4TAs.
438bf21cd93STycho Nightingale 		 */
439bf21cd93STycho Nightingale 		while (i-- > 0)
440bf21cd93STycho Nightingale 			vpid_free(vpid[i]);
441bf21cd93STycho Nightingale 
442bf21cd93STycho Nightingale 		for (i = 0; i < num; i++)
443bf21cd93STycho Nightingale 			vpid[i] = i + 1;
444bf21cd93STycho Nightingale 	}
445bf21cd93STycho Nightingale }
446bf21cd93STycho Nightingale 
447bf21cd93STycho Nightingale static int
vmx_cleanup(void)4484c87aefeSPatrick Mooney vmx_cleanup(void)
449bf21cd93STycho Nightingale {
4504c87aefeSPatrick Mooney 	/* This is taken care of by the hma registration */
4514c87aefeSPatrick Mooney 	return (0);
4524c87aefeSPatrick Mooney }
4534c87aefeSPatrick Mooney 
4544c87aefeSPatrick Mooney static void
vmx_restore(void)4554c87aefeSPatrick Mooney vmx_restore(void)
4564c87aefeSPatrick Mooney {
4574c87aefeSPatrick Mooney 	/* No-op on illumos */
4584c87aefeSPatrick Mooney }
4594c87aefeSPatrick Mooney 
4604c87aefeSPatrick Mooney static int
vmx_init(void)4610153d828SPatrick Mooney vmx_init(void)
4624c87aefeSPatrick Mooney {
463154972afSPatrick Mooney 	int error;
4644c87aefeSPatrick Mooney 	uint64_t fixed0, fixed1;
465c3ae3afaSPatrick Mooney 	uint32_t tmp;
466c3ae3afaSPatrick Mooney 	enum vmx_caps avail_caps = VMX_CAP_NONE;
4674c87aefeSPatrick Mooney 
468bf21cd93STycho Nightingale 	/* Check support for primary processor-based VM-execution controls */
469bf21cd93STycho Nightingale 	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
4702699b94cSPatrick Mooney 	    MSR_VMX_TRUE_PROCBASED_CTLS,
4712699b94cSPatrick Mooney 	    PROCBASED_CTLS_ONE_SETTING,
4722699b94cSPatrick Mooney 	    PROCBASED_CTLS_ZERO_SETTING, &procbased_ctls);
473bf21cd93STycho Nightingale 	if (error) {
474bf21cd93STycho Nightingale 		printf("vmx_init: processor does not support desired primary "
4752699b94cSPatrick Mooney 		    "processor-based controls\n");
476bf21cd93STycho Nightingale 		return (error);
477bf21cd93STycho Nightingale 	}
478bf21cd93STycho Nightingale 
479bf21cd93STycho Nightingale 	/* Clear the processor-based ctl bits that are set on demand */
480bf21cd93STycho Nightingale 	procbased_ctls &= ~PROCBASED_CTLS_WINDOW_SETTING;
481bf21cd93STycho Nightingale 
482bf21cd93STycho Nightingale 	/* Check support for secondary processor-based VM-execution controls */
483bf21cd93STycho Nightingale 	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
4842699b94cSPatrick Mooney 	    MSR_VMX_PROCBASED_CTLS2,
4852699b94cSPatrick Mooney 	    PROCBASED_CTLS2_ONE_SETTING,
4862699b94cSPatrick Mooney 	    PROCBASED_CTLS2_ZERO_SETTING, &procbased_ctls2);
487bf21cd93STycho Nightingale 	if (error) {
488bf21cd93STycho Nightingale 		printf("vmx_init: processor does not support desired secondary "
4892699b94cSPatrick Mooney 		    "processor-based controls\n");
490bf21cd93STycho Nightingale 		return (error);
491bf21cd93STycho Nightingale 	}
492bf21cd93STycho Nightingale 
493bf21cd93STycho Nightingale 	/* Check support for VPID */
4942699b94cSPatrick Mooney 	error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
4952699b94cSPatrick Mooney 	    MSR_VMX_PROCBASED_CTLS2,
4962699b94cSPatrick Mooney 	    PROCBASED2_ENABLE_VPID,
4972699b94cSPatrick Mooney 	    0, &tmp);
498bf21cd93STycho Nightingale 	if (error == 0)
499bf21cd93STycho Nightingale 		procbased_ctls2 |= PROCBASED2_ENABLE_VPID;
500bf21cd93STycho Nightingale 
501bf21cd93STycho Nightingale 	/* Check support for pin-based VM-execution controls */
502bf21cd93STycho Nightingale 	error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS,
5032699b94cSPatrick Mooney 	    MSR_VMX_TRUE_PINBASED_CTLS,
5042699b94cSPatrick Mooney 	    PINBASED_CTLS_ONE_SETTING,
5052699b94cSPatrick Mooney 	    PINBASED_CTLS_ZERO_SETTING, &pinbased_ctls);
506bf21cd93STycho Nightingale 	if (error) {
507bf21cd93STycho Nightingale 		printf("vmx_init: processor does not support desired "
5082699b94cSPatrick Mooney 		    "pin-based controls\n");
509bf21cd93STycho Nightingale 		return (error);
510bf21cd93STycho Nightingale 	}
511bf21cd93STycho Nightingale 
512bf21cd93STycho Nightingale 	/* Check support for VM-exit controls */
513bf21cd93STycho Nightingale 	error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS,
5142699b94cSPatrick Mooney 	    VM_EXIT_CTLS_ONE_SETTING,
5152699b94cSPatrick Mooney 	    VM_EXIT_CTLS_ZERO_SETTING,
5162699b94cSPatrick Mooney 	    &exit_ctls);
517bf21cd93STycho Nightingale 	if (error) {
518bf21cd93STycho Nightingale 		printf("vmx_init: processor does not support desired "
519bf21cd93STycho Nightingale 		    "exit controls\n");
520bf21cd93STycho Nightingale 		return (error);
521bf21cd93STycho Nightingale 	}
522bf21cd93STycho Nightingale 
523bf21cd93STycho Nightingale 	/* Check support for VM-entry controls */
524bf21cd93STycho Nightingale 	error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS,
526bf21cd93STycho Nightingale 	    &entry_ctls);
527bf21cd93STycho Nightingale 	if (error) {
528bf21cd93STycho Nightingale 		printf("vmx_init: processor does not support desired "
529bf21cd93STycho Nightingale 		    "entry controls\n");
530bf21cd93STycho Nightingale 		return (error);
531bf21cd93STycho Nightingale 	}
532bf21cd93STycho Nightingale 
533bf21cd93STycho Nightingale 	/*
534bf21cd93STycho Nightingale 	 * Check support for optional features by testing them
535bf21cd93STycho Nightingale 	 * as individual bits
536bf21cd93STycho Nightingale 	 */
537bf21cd93STycho Nightingale 	cap_halt_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
5382699b94cSPatrick Mooney 	    MSR_VMX_TRUE_PROCBASED_CTLS,
5392699b94cSPatrick Mooney 	    PROCBASED_HLT_EXITING, 0,
5402699b94cSPatrick Mooney 	    &tmp) == 0);
541bf21cd93STycho Nightingale 
542bf21cd93STycho Nightingale 	cap_monitor_trap = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
5432699b94cSPatrick Mooney 	    MSR_VMX_PROCBASED_CTLS,
5442699b94cSPatrick Mooney 	    PROCBASED_MTF, 0,
5452699b94cSPatrick Mooney 	    &tmp) == 0);
546bf21cd93STycho Nightingale 
547bf21cd93STycho Nightingale 	cap_pause_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS,
5482699b94cSPatrick Mooney 	    MSR_VMX_TRUE_PROCBASED_CTLS,
5492699b94cSPatrick Mooney 	    PROCBASED_PAUSE_EXITING, 0,
5502699b94cSPatrick Mooney 	    &tmp) == 0);
551bf21cd93STycho Nightingale 
5524c87aefeSPatrick Mooney 	cap_invpcid = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
5544c87aefeSPatrick Mooney 	    &tmp) == 0);
5554c87aefeSPatrick Mooney 
5562699b94cSPatrick Mooney 	/*
5572699b94cSPatrick Mooney 	 * Check for APIC virtualization capabilities:
558c3ae3afaSPatrick Mooney 	 * - TPR shadowing
559c3ae3afaSPatrick Mooney 	 * - Full APICv (with or without x2APIC support)
560c3ae3afaSPatrick Mooney 	 * - Posted interrupt handling
561154972afSPatrick Mooney 	 */
562c3ae3afaSPatrick Mooney 	if (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, MSR_VMX_TRUE_PROCBASED_CTLS,
563c3ae3afaSPatrick Mooney 	    PROCBASED_USE_TPR_SHADOW, 0, &tmp) == 0) {
564c3ae3afaSPatrick Mooney 		avail_caps |= VMX_CAP_TPR_SHADOW;
565c3ae3afaSPatrick Mooney 
566c3ae3afaSPatrick Mooney 		const uint32_t apicv_bits =
567c3ae3afaSPatrick Mooney 		    PROCBASED2_VIRTUALIZE_APIC_ACCESSES |
569c3ae3afaSPatrick Mooney 		    PROCBASED2_VIRTUALIZE_X2APIC_MODE |
571c3ae3afaSPatrick Mooney 		if (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2,
572c3ae3afaSPatrick Mooney 		    MSR_VMX_PROCBASED_CTLS2, apicv_bits, 0, &tmp) == 0) {
573c3ae3afaSPatrick Mooney 			avail_caps |= VMX_CAP_APICV;
574154972afSPatrick Mooney 
5754c87aefeSPatrick Mooney 			/*
576c3ae3afaSPatrick Mooney 			 * It may make sense in the future to differentiate
577c3ae3afaSPatrick Mooney 			 * hardware (or software) configurations with APICv but
578c3ae3afaSPatrick Mooney 			 * no support for accelerating x2APIC mode.
5794c87aefeSPatrick Mooney 			 */
580c3ae3afaSPatrick Mooney 			avail_caps |= VMX_CAP_APICV_X2APIC;
581c3ae3afaSPatrick Mooney 
582c3ae3afaSPatrick Mooney 			error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS,
583c3ae3afaSPatrick Mooney 			    MSR_VMX_TRUE_PINBASED_CTLS,
584c3ae3afaSPatrick Mooney 			    PINBASED_POSTED_INTERRUPT, 0, &tmp);
585c3ae3afaSPatrick Mooney 			if (error == 0) {
586c3ae3afaSPatrick Mooney 				/*
587c3ae3afaSPatrick Mooney 				 * If the PSM-provided interfaces for requesting
588c3ae3afaSPatrick Mooney 				 * and using a PIR IPI vector are present, use
589c3ae3afaSPatrick Mooney 				 * them for posted interrupts.
590c3ae3afaSPatrick Mooney 				 */
591c3ae3afaSPatrick Mooney 				if (psm_get_pir_ipivect != NULL &&
592c3ae3afaSPatrick Mooney 				    psm_send_pir_ipi != NULL) {
593c3ae3afaSPatrick Mooney 					pirvec = psm_get_pir_ipivect();
594c3ae3afaSPatrick Mooney 					avail_caps |= VMX_CAP_APICV_PIR;
595c3ae3afaSPatrick Mooney 				}
5964c87aefeSPatrick Mooney 			}
5974c87aefeSPatrick Mooney 		}
5984c87aefeSPatrick Mooney 	}
5994c87aefeSPatrick Mooney 
6000153d828SPatrick Mooney 	/*
6010153d828SPatrick Mooney 	 * Check for necessary EPT capabilities
6020153d828SPatrick Mooney 	 *
6030153d828SPatrick Mooney 	 * TODO: Properly handle when IA32_VMX_EPT_VPID_HW_AD is missing and the
6040153d828SPatrick Mooney 	 * hypervisor intends to utilize dirty page tracking.
6050153d828SPatrick Mooney 	 */
6060153d828SPatrick Mooney 	uint64_t ept_caps = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP);
6070153d828SPatrick Mooney 	if ((ept_caps & EPT_CAPS_REQUIRED) != EPT_CAPS_REQUIRED) {
6080153d828SPatrick Mooney 		cmn_err(CE_WARN, "!Inadequate EPT capabilities: %lx", ept_caps);
6090153d828SPatrick Mooney 		return (EINVAL);
610bf21cd93STycho Nightingale 	}
611bf21cd93STycho Nightingale 
6124c87aefeSPatrick Mooney #ifdef __FreeBSD__
6134c87aefeSPatrick Mooney 	guest_l1d_flush = (cpu_ia32_arch_caps &
6144c87aefeSPatrick Mooney 	    IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) == 0;
6154c87aefeSPatrick Mooney 	TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush);
6164c87aefeSPatrick Mooney 
6174c87aefeSPatrick Mooney 	/*
6184c87aefeSPatrick Mooney 	 * L1D cache flush is enabled.  Use IA32_FLUSH_CMD MSR when
6194c87aefeSPatrick Mooney 	 * available.  Otherwise fall back to the software flush
6204c87aefeSPatrick Mooney 	 * method which loads enough data from the kernel text to
6214c87aefeSPatrick Mooney 	 * flush existing L1D content, both on VMX entry and on NMI
6224c87aefeSPatrick Mooney 	 * return.
6234c87aefeSPatrick Mooney 	 */
6244c87aefeSPatrick Mooney 	if (guest_l1d_flush) {
6254c87aefeSPatrick Mooney 		if ((cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) == 0) {
6264c87aefeSPatrick Mooney 			guest_l1d_flush_sw = 1;
6274c87aefeSPatrick Mooney 			TUNABLE_INT_FETCH("hw.vmm.l1d_flush_sw",
6284c87aefeSPatrick Mooney 			    &guest_l1d_flush_sw);
6294c87aefeSPatrick Mooney 		}
6304c87aefeSPatrick Mooney 		if (guest_l1d_flush_sw) {
6314c87aefeSPatrick Mooney 			if (nmi_flush_l1d_sw <= 1)
6324c87aefeSPatrick Mooney 				nmi_flush_l1d_sw = 1;
6334c87aefeSPatrick Mooney 		} else {
6344c87aefeSPatrick Mooney 			msr_load_list[0].index = MSR_IA32_FLUSH_CMD;
6354c87aefeSPatrick Mooney 			msr_load_list[0].val = IA32_FLUSH_CMD_L1D;
6364c87aefeSPatrick Mooney 		}
6374c87aefeSPatrick Mooney 	}
6384c87aefeSPatrick Mooney #else
6394c87aefeSPatrick Mooney 	/* L1D flushing is taken care of by smt_acquire() and friends */
6404c87aefeSPatrick Mooney 	guest_l1d_flush = 0;
6414c87aefeSPatrick Mooney #endif /* __FreeBSD__ */
6424c87aefeSPatrick Mooney 
643bf21cd93STycho Nightingale 	/*
644bf21cd93STycho Nightingale 	 * Stash the cr0 and cr4 bits that must be fixed to 0 or 1
645bf21cd93STycho Nightingale 	 */
646bf21cd93STycho Nightingale 	fixed0 = rdmsr(MSR_VMX_CR0_FIXED0);
647bf21cd93STycho Nightingale 	fixed1 = rdmsr(MSR_VMX_CR0_FIXED1);
648bf21cd93STycho Nightingale 	cr0_ones_mask = fixed0 & fixed1;
649bf21cd93STycho Nightingale 	cr0_zeros_mask = ~fixed0 & ~fixed1;
650bf21cd93STycho Nightingale 
651bf21cd93STycho Nightingale 	/*
652c3ae3afaSPatrick Mooney 	 * Since Unrestricted Guest was already verified present, CR0_PE and
653c3ae3afaSPatrick Mooney 	 * CR0_PG are allowed to be set to zero in VMX non-root operation
654bf21cd93STycho Nightingale 	 */
655c3ae3afaSPatrick Mooney 	cr0_ones_mask &= ~(CR0_PG | CR0_PE);
656bf21cd93STycho Nightingale 
657bf21cd93STycho Nightingale 	/*
658bf21cd93STycho Nightingale 	 * Do not allow the guest to set CR0_NW or CR0_CD.
659bf21cd93STycho Nightingale 	 */
660bf21cd93STycho Nightingale 	cr0_zeros_mask |= (CR0_NW | CR0_CD);
661bf21cd93STycho Nightingale 
662bf21cd93STycho Nightingale 	fixed0 = rdmsr(MSR_VMX_CR4_FIXED0);
663bf21cd93STycho Nightingale 	fixed1 = rdmsr(MSR_VMX_CR4_FIXED1);
664bf21cd93STycho Nightingale 	cr4_ones_mask = fixed0 & fixed1;
665bf21cd93STycho Nightingale 	cr4_zeros_mask = ~fixed0 & ~fixed1;
666bf21cd93STycho Nightingale 
667bf21cd93STycho Nightingale 	vmx_msr_init();
668bf21cd93STycho Nightingale 
669c3ae3afaSPatrick Mooney 	vmx_capabilities = avail_caps;
670bf21cd93STycho Nightingale 	vmx_initialized = 1;
671bf21cd93STycho Nightingale 
672bf21cd93STycho Nightingale 	return (0);
673bf21cd93STycho Nightingale }
674bf21cd93STycho Nightingale 
6754c87aefeSPatrick Mooney static void
vmx_trigger_hostintr(int vector)6764c87aefeSPatrick Mooney vmx_trigger_hostintr(int vector)
6774c87aefeSPatrick Mooney {
6784c87aefeSPatrick Mooney 	VERIFY(vector >= 32 && vector <= 255);
6794c87aefeSPatrick Mooney 	vmx_call_isr(vector - 32);
6804c87aefeSPatrick Mooney }
6814c87aefeSPatrick Mooney 
682bf21cd93STycho Nightingale static void *
vmx_vminit(struct vm * vm)6830153d828SPatrick Mooney vmx_vminit(struct vm *vm)
684bf21cd93STycho Nightingale {
685bf21cd93STycho Nightingale 	uint16_t vpid[VM_MAXCPU];
686007ca332SPatrick Mooney 	int i, error, datasel;
687bf21cd93STycho Nightingale 	struct vmx *vmx;
6884c87aefeSPatrick Mooney 	uint32_t exc_bitmap;
6894c87aefeSPatrick Mooney 	uint16_t maxcpus;
690c3ae3afaSPatrick Mooney 	uint32_t proc_ctls, proc2_ctls, pin_ctls;
6916b641d7aSPatrick Mooney 	uint64_t apic_access_pa = UINT64_MAX;
692bf21cd93STycho Nightingale 
6938130f8e1SPatrick Mooney 	vmx = kmem_zalloc(sizeof (struct vmx), KM_SLEEP);
6948130f8e1SPatrick Mooney 	VERIFY3U((uintptr_t)vmx & PAGE_MASK, ==, 0);
695bf21cd93STycho Nightingale 
6968130f8e1SPatrick Mooney 	vmx->vm = vm;
6970153d828SPatrick Mooney 	vmx->eptp = vmspace_table_root(vm_get_vmspace(vm));
6984c87aefeSPatrick Mooney 
699bf21cd93STycho Nightingale 	/*
700d1c02647SPatrick Mooney 	 * Clean up EP4TA-tagged guest-physical and combined mappings
701bf21cd93STycho Nightingale 	 *
702bf21cd93STycho Nightingale 	 * VMX transitions are not required to invalidate any guest physical
703bf21cd93STycho Nightingale 	 * mappings. So, it may be possible for stale guest physical mappings
704bf21cd93STycho Nightingale 	 * to be present in the processor TLBs.
705bf21cd93STycho Nightingale 	 *
706bf21cd93STycho Nightingale 	 * Combined mappings for this EP4TA are also invalidated for all VPIDs.
707bf21cd93STycho Nightingale 	 */
7080153d828SPatrick Mooney 	hma_vmx_invept_allcpus((uintptr_t)vmx->eptp);
709bf21cd93STycho Nightingale 
7106b641d7aSPatrick Mooney 	vmx_msr_bitmap_initialize(vmx);
711bf21cd93STycho Nightingale 
712bf21cd93STycho Nightingale 	vpid_alloc(vpid, VM_MAXCPU);
713bf21cd93STycho Nightingale 
714c3ae3afaSPatrick Mooney 	/* Grab the established defaults */
715c3ae3afaSPatrick Mooney 	proc_ctls = procbased_ctls;
716c3ae3afaSPatrick Mooney 	proc2_ctls = procbased_ctls2;
717c3ae3afaSPatrick Mooney 	pin_ctls = pinbased_ctls;
718c3ae3afaSPatrick Mooney 	/* For now, default to the available capabilities */
719c3ae3afaSPatrick Mooney 	vmx->vmx_caps = vmx_capabilities;
720c3ae3afaSPatrick Mooney 
721c3ae3afaSPatrick Mooney 	if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) {
722c3ae3afaSPatrick Mooney 		proc_ctls |= PROCBASED_USE_TPR_SHADOW;
723c3ae3afaSPatrick Mooney 		proc_ctls &= ~PROCBASED_CR8_LOAD_EXITING;
724c3ae3afaSPatrick Mooney 		proc_ctls &= ~PROCBASED_CR8_STORE_EXITING;
725c3ae3afaSPatrick Mooney 	}
726c3ae3afaSPatrick Mooney 	if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
727c3ae3afaSPatrick Mooney 		ASSERT(vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW));
728c3ae3afaSPatrick Mooney 
729c3ae3afaSPatrick Mooney 		proc2_ctls |= (PROCBASED2_VIRTUALIZE_APIC_ACCESSES |
731c3ae3afaSPatrick Mooney 		    PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY);
732c3ae3afaSPatrick Mooney 
7336b641d7aSPatrick Mooney 		/*
7346b641d7aSPatrick Mooney 		 * Allocate a page of memory to back the APIC access address for
7356b641d7aSPatrick Mooney 		 * when APICv features are in use.  Guest MMIO accesses should
7366b641d7aSPatrick Mooney 		 * never actually reach this page, but rather be intercepted.
7376b641d7aSPatrick Mooney 		 */
7386b641d7aSPatrick Mooney 		vmx->apic_access_page = kmem_zalloc(PAGESIZE, KM_SLEEP);
7396b641d7aSPatrick Mooney 		VERIFY3U((uintptr_t)vmx->apic_access_page & PAGEOFFSET, ==, 0);
7406b641d7aSPatrick Mooney 		apic_access_pa = vtophys(vmx->apic_access_page);
7416b641d7aSPatrick Mooney 
7424c87aefeSPatrick Mooney 		error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE,
7436b641d7aSPatrick Mooney 		    apic_access_pa);
7444c87aefeSPatrick Mooney 		/* XXX this should really return an error to the caller */
7454c87aefeSPatrick Mooney 		KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error));
7464c87aefeSPatrick Mooney 	}
747c3ae3afaSPatrick Mooney 	if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) {
748c3ae3afaSPatrick Mooney 		ASSERT(vmx_cap_en(vmx, VMX_CAP_APICV));
749c3ae3afaSPatrick Mooney 
750c3ae3afaSPatrick Mooney 		pin_ctls |= PINBASED_POSTED_INTERRUPT;
751c3ae3afaSPatrick Mooney 	}
7524c87aefeSPatrick Mooney 
7534c87aefeSPatrick Mooney 	maxcpus = vm_get_maxcpus(vm);
754007ca332SPatrick Mooney 	datasel = vmm_get_host_datasel();
7554c87aefeSPatrick Mooney 	for (i = 0; i < maxcpus; i++) {
7564c87aefeSPatrick Mooney 		/*
7574c87aefeSPatrick Mooney 		 * Cache physical address lookups for various components which
7584c87aefeSPatrick Mooney 		 * may be required inside the critical_enter() section implied
7594c87aefeSPatrick Mooney 		 * by VMPTRLD() below.
7604c87aefeSPatrick Mooney 		 */
7616b641d7aSPatrick Mooney 		vm_paddr_t msr_bitmap_pa = vtophys(vmx->msr_bitmap[i]);
7624c87aefeSPatrick Mooney 		vm_paddr_t apic_page_pa = vtophys(&vmx->apic_page[i]);
7634c87aefeSPatrick Mooney 		vm_paddr_t pir_desc_pa = vtophys(&vmx->pir_desc[i]);
7644c87aefeSPatrick Mooney 
765007ca332SPatrick Mooney 		vmx->vmcs_pa[i] = (uintptr_t)vtophys(&vmx->vmcs[i]);
766007ca332SPatrick Mooney 		vmcs_initialize(&vmx->vmcs[i], vmx->vmcs_pa[i]);
767bf21cd93STycho Nightingale 
768bf21cd93STycho Nightingale 		vmx_msr_guest_init(vmx, i);
769bf21cd93STycho Nightingale 
770007ca332SPatrick Mooney 		vmcs_load(vmx->vmcs_pa[i]);
771bf21cd93STycho Nightingale 
772007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_IA32_PAT, vmm_get_host_pat());
773007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_IA32_EFER, vmm_get_host_efer());
774007ca332SPatrick Mooney 
775007ca332SPatrick Mooney 		/* Load the control registers */
776007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_CR0, vmm_get_host_cr0());
777007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_CR4, vmm_get_host_cr4() | CR4_VMXE);
778007ca332SPatrick Mooney 
779007ca332SPatrick Mooney 		/* Load the segment selectors */
780007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_CS_SELECTOR, vmm_get_host_codesel());
781007ca332SPatrick Mooney 
782007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_ES_SELECTOR, datasel);
783007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_SS_SELECTOR, datasel);
784007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_DS_SELECTOR, datasel);
785007ca332SPatrick Mooney 
786007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel());
787007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel());
788007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_TR_SELECTOR, vmm_get_host_tsssel());
789007ca332SPatrick Mooney 
790007ca332SPatrick Mooney 		/*
791007ca332SPatrick Mooney 		 * Configure host sysenter MSRs to be restored on VM exit.
7922699b94cSPatrick Mooney 		 * The thread-specific MSR_INTC_SEP_ESP value is loaded in
7932699b94cSPatrick Mooney 		 * vmx_run.
794007ca332SPatrick Mooney 		 */
795007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL);
796007ca332SPatrick Mooney 		vmcs_write(VMCS_HOST_IA32_SYSENTER_EIP,
797007ca332SPatrick Mooney 		    rdmsr(MSR_SYSENTER_EIP_MSR));
798007ca332SPatrick Mooney 
799007ca332SPatrick Mooney 		/* instruction pointer */
800007ca332SPatrick Mooney 		if (no_flush_rsb) {
801007ca332SPatrick Mooney 			vmcs_write(VMCS_HOST_RIP, (uint64_t)vmx_exit_guest);
802007ca332SPatrick Mooney 		} else {
803007ca332SPatrick Mooney 			vmcs_write(VMCS_HOST_RIP,
804007ca332SPatrick Mooney 			    (uint64_t)vmx_exit_guest_flush_rsb);
805007ca332SPatrick Mooney 		}
806c3ae3afaSPatrick Mooney 
807007ca332SPatrick Mooney 		/* link pointer */
808007ca332SPatrick Mooney 		vmcs_write(VMCS_LINK_POINTER, ~0);
809007ca332SPatrick Mooney 
810007ca332SPatrick Mooney 		vmcs_write(VMCS_EPTP, vmx->eptp);
811007ca332SPatrick Mooney 		vmcs_write(VMCS_PIN_BASED_CTLS, pin_ctls);
812007ca332SPatrick Mooney 		vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls);
813007ca332SPatrick Mooney 		vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls);
814007ca332SPatrick Mooney 		vmcs_write(VMCS_EXIT_CTLS, exit_ctls);
815007ca332SPatrick Mooney 		vmcs_write(VMCS_ENTRY_CTLS, entry_ctls);
816007ca332SPatrick Mooney 		vmcs_write(VMCS_MSR_BITMAP, msr_bitmap_pa);
817007ca332SPatrick Mooney 		vmcs_write(VMCS_VPID, vpid[i]);
8184c87aefeSPatrick Mooney 
8194c87aefeSPatrick Mooney 		if (guest_l1d_flush && !guest_l1d_flush_sw) {
8200153d828SPatrick Mooney 			vmcs_write(VMCS_ENTRY_MSR_LOAD,
8210153d828SPatrick Mooney 			    vtophys(&msr_load_list[0]));
8224c87aefeSPatrick Mooney 			vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT,
8234c87aefeSPatrick Mooney 			    nitems(msr_load_list));
8244c87aefeSPatrick Mooney 			vmcs_write(VMCS_EXIT_MSR_STORE, 0);
8254c87aefeSPatrick Mooney 			vmcs_write(VMCS_EXIT_MSR_STORE_COUNT, 0);
8264c87aefeSPatrick Mooney 		}
827bf21cd93STycho Nightingale 
8284c87aefeSPatrick Mooney 		/* exception bitmap */
8294c87aefeSPatrick Mooney 		if (vcpu_trace_exceptions(vm, i))
8304c87aefeSPatrick Mooney 			exc_bitmap = 0xffffffff;
8314c87aefeSPatrick Mooney 		else
8324c87aefeSPatrick Mooney 			exc_bitmap = 1 << IDT_MC;
833007ca332SPatrick Mooney 		vmcs_write(VMCS_EXCEPTION_BITMAP, exc_bitmap);
834bf21cd93STycho Nightingale 
8354c87aefeSPatrick Mooney 		vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1;
836007ca332SPatrick Mooney 		vmcs_write(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1);
837bf21cd93STycho Nightingale 
838c3ae3afaSPatrick Mooney 		if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) {
839007ca332SPatrick Mooney 			vmcs_write(VMCS_VIRTUAL_APIC, apic_page_pa);
840154972afSPatrick Mooney 		}
841154972afSPatrick Mooney 
842c3ae3afaSPatrick Mooney 		if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
8436b641d7aSPatrick Mooney 			vmcs_write(VMCS_APIC_ACCESS, apic_access_pa);
844007ca332SPatrick Mooney 			vmcs_write(VMCS_EOI_EXIT0, 0);
845007ca332SPatrick Mooney 			vmcs_write(VMCS_EOI_EXIT1, 0);
846007ca332SPatrick Mooney 			vmcs_write(VMCS_EOI_EXIT2, 0);
847007ca332SPatrick Mooney 			vmcs_write(VMCS_EOI_EXIT3, 0);
8484c87aefeSPatrick Mooney 		}
849c3ae3afaSPatrick Mooney 		if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) {
850007ca332SPatrick Mooney 			vmcs_write(VMCS_PIR_VECTOR, pirvec);
851007ca332SPatrick Mooney 			vmcs_write(VMCS_PIR_DESC, pir_desc_pa);
8524c87aefeSPatrick Mooney 		}
853007ca332SPatrick Mooney 
854007ca332SPatrick Mooney 		/*
855007ca332SPatrick Mooney 		 * Set up the CR0/4 masks and configure the read shadow state
856007ca332SPatrick Mooney 		 * to the power-on register value from the Intel Sys Arch.
857007ca332SPatrick Mooney 		 *  CR0 - 0x60000010
858007ca332SPatrick Mooney 		 *  CR4 - 0
859007ca332SPatrick Mooney 		 */
860007ca332SPatrick Mooney 		vmcs_write(VMCS_CR0_MASK, cr0_ones_mask | cr0_zeros_mask);
861007ca332SPatrick Mooney 		vmcs_write(VMCS_CR0_SHADOW, 0x60000010);
862007ca332SPatrick Mooney 		vmcs_write(VMCS_CR4_MASK, cr4_ones_mask | cr4_zeros_mask);
863007ca332SPatrick Mooney 		vmcs_write(VMCS_CR4_SHADOW, 0);
864007ca332SPatrick Mooney 
865007ca332SPatrick Mooney 		vmcs_clear(vmx->vmcs_pa[i]);
866bf21cd93STycho Nightingale 
8674c87aefeSPatrick Mooney 		vmx->cap[i].set = 0;
868c3ae3afaSPatrick Mooney 		vmx->cap[i].proc_ctls = proc_ctls;
869c3ae3afaSPatrick Mooney 		vmx->cap[i].proc_ctls2 = proc2_ctls;
870154972afSPatrick Mooney 		vmx->cap[i].exc_bitmap = exc_bitmap;
871bf21cd93STycho Nightingale 
8724c87aefeSPatrick Mooney 		vmx->state[i].nextrip = ~0;
8734c87aefeSPatrick Mooney 		vmx->state[i].lastcpu = NOCPU;
8744c87aefeSPatrick Mooney 		vmx->state[i].vpid = vpid[i];
875bf21cd93STycho Nightingale 	}
876bf21cd93STycho Nightingale 
877bf21cd93STycho Nightingale 	return (vmx);
878bf21cd93STycho Nightingale }
879bf21cd93STycho Nightingale 
880bf21cd93STycho Nightingale static int
vmx_handle_cpuid(struct vm * vm,int vcpu,struct vmxctx * vmxctx)881bf21cd93STycho Nightingale vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx)
882bf21cd93STycho Nightingale {
8834c87aefeSPatrick Mooney 	int handled;
884bf21cd93STycho Nightingale 
8853c5f2a9dSPatrick Mooney 	handled = x86_emulate_cpuid(vm, vcpu, (uint64_t *)&vmxctx->guest_rax,
8863c5f2a9dSPatrick Mooney 	    (uint64_t *)&vmxctx->guest_rbx, (uint64_t *)&vmxctx->guest_rcx,
8873c5f2a9dSPatrick Mooney 	    (uint64_t *)&vmxctx->guest_rdx);
888bf21cd93STycho Nightingale 	return (handled);
889bf21cd93STycho Nightingale }
890bf21cd93STycho Nightingale 
8914c87aefeSPatrick Mooney static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved");
8924c87aefeSPatrick Mooney static VMM_STAT_INTEL(VCPU_INVVPID_DONE, "Number of vpid invalidations done");
8934c87aefeSPatrick Mooney 
894007ca332SPatrick Mooney #define	INVVPID_TYPE_ADDRESS		0UL
895007ca332SPatrick Mooney #define	INVVPID_TYPE_SINGLE_CONTEXT	1UL
896007ca332SPatrick Mooney #define	INVVPID_TYPE_ALL_CONTEXTS	2UL
897007ca332SPatrick Mooney 
898007ca332SPatrick Mooney struct invvpid_desc {
899007ca332SPatrick Mooney 	uint16_t	vpid;
900007ca332SPatrick Mooney 	uint16_t	_res1;
901007ca332SPatrick Mooney 	uint32_t	_res2;
902007ca332SPatrick Mooney 	uint64_t	linear_addr;
903007ca332SPatrick Mooney };
9042699b94cSPatrick Mooney CTASSERT(sizeof (struct invvpid_desc) == 16);
905007ca332SPatrick Mooney 
906007ca332SPatrick Mooney static __inline void
invvpid(uint64_t type,struct invvpid_desc desc)907007ca332SPatrick Mooney invvpid(uint64_t type, struct invvpid_desc desc)
908007ca332SPatrick Mooney {
909007ca332SPatrick Mooney 	int error;
910007ca332SPatrick Mooney 
91170ae9a33SPatrick Mooney 	DTRACE_PROBE3(vmx__invvpid, uint64_t, type, uint16_t, desc.vpid,
91270ae9a33SPatrick Mooney 	    uint64_t, desc.linear_addr);
91370ae9a33SPatrick Mooney 
914007ca332SPatrick Mooney 	__asm __volatile("invvpid %[desc], %[type];"
915007ca332SPatrick Mooney 	    VMX_SET_ERROR_CODE_ASM
916007ca332SPatrick Mooney 	    : [error] "=r" (error)
917007ca332SPatrick Mooney 	    : [desc] "m" (desc), [type] "r" (type)
918007ca332SPatrick Mooney 	    : "memory");
919007ca332SPatrick Mooney 
92070ae9a33SPatrick Mooney 	if (error) {
921007ca332SPatrick Mooney 		panic("invvpid error %d", error);
92270ae9a33SPatrick Mooney 	}
923007ca332SPatrick Mooney }
924007ca332SPatrick Mooney 
9254c87aefeSPatrick Mooney /*
926d1c02647SPatrick Mooney  * Invalidate guest mappings identified by its VPID from the TLB.
927d1c02647SPatrick Mooney  *
928d1c02647SPatrick Mooney  * This is effectively a flush of the guest TLB, removing only "combined
929d1c02647SPatrick Mooney  * mappings" (to use the VMX parlance).  Actions which modify the EPT structures
930d1c02647SPatrick Mooney  * for the instance (such as unmapping GPAs) would require an 'invept' flush.
9314c87aefeSPatrick Mooney  */
93270ae9a33SPatrick Mooney static void
vmx_invvpid(struct vmx * vmx,int vcpu,int running)9330153d828SPatrick Mooney vmx_invvpid(struct vmx *vmx, int vcpu, int running)
934bf21cd93STycho Nightingale {
935bf21cd93STycho Nightingale 	struct vmxstate *vmxstate;
9360153d828SPatrick Mooney 	struct vmspace *vms;
937bf21cd93STycho Nightingale 
938bf21cd93STycho Nightingale 	vmxstate = &vmx->state[vcpu];
93970ae9a33SPatrick Mooney 	if (vmxstate->vpid == 0) {
940bf21cd93STycho Nightingale 		return;
94170ae9a33SPatrick Mooney 	}
942bf21cd93STycho Nightingale 
9434c87aefeSPatrick Mooney 	if (!running) {
9444c87aefeSPatrick Mooney 		/*
9454c87aefeSPatrick Mooney 		 * Set the 'lastcpu' to an invalid host cpu.
9464c87aefeSPatrick Mooney 		 *
9474c87aefeSPatrick Mooney 		 * This will invalidate TLB entries tagged with the vcpu's
9484c87aefeSPatrick Mooney 		 * vpid the next time it runs via vmx_set_pcpu_defaults().
9494c87aefeSPatrick Mooney 		 */
9504c87aefeSPatrick Mooney 		vmxstate->lastcpu = NOCPU;
9514c87aefeSPatrick Mooney 		return;
9524c87aefeSPatrick Mooney 	}
953bf21cd93STycho Nightingale 
954bf21cd93STycho Nightingale 	/*
9554c87aefeSPatrick Mooney 	 * Invalidate all mappings tagged with 'vpid'
956bf21cd93STycho Nightingale 	 *
957d1c02647SPatrick Mooney 	 * This is done when a vCPU moves between host CPUs, where there may be
958d1c02647SPatrick Mooney 	 * stale TLB entries for this VPID on the target, or if emulated actions
959d1c02647SPatrick Mooney 	 * in the guest CPU have incurred an explicit TLB flush.
960bf21cd93STycho Nightingale 	 */
96170ae9a33SPatrick Mooney 	vms = vm_get_vmspace(vmx->vm);
9620153d828SPatrick Mooney 	if (vmspace_table_gen(vms) == vmx->eptgen[curcpu]) {
96370ae9a33SPatrick Mooney 		struct invvpid_desc invvpid_desc = {
96470ae9a33SPatrick Mooney 			.vpid = vmxstate->vpid,
96570ae9a33SPatrick Mooney 			.linear_addr = 0,
96670ae9a33SPatrick Mooney 			._res1 = 0,
96770ae9a33SPatrick Mooney 			._res2 = 0,
96870ae9a33SPatrick Mooney 		};
96970ae9a33SPatrick Mooney 
970bf21cd93STycho Nightingale 		invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc);
9714c87aefeSPatrick Mooney 		vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_DONE, 1);
9724c87aefeSPatrick Mooney 	} else {
9734c87aefeSPatrick Mooney 		/*
974d1c02647SPatrick Mooney 		 * The INVVPID can be skipped if an INVEPT is going to be
975d1c02647SPatrick Mooney 		 * performed before entering the guest.  The INVEPT will
976d1c02647SPatrick Mooney 		 * invalidate combined mappings for the EP4TA associated with
977d1c02647SPatrick Mooney 		 * this guest, in all VPIDs.
9784c87aefeSPatrick Mooney 		 */
9794c87aefeSPatrick Mooney 		vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_SAVED, 1);
980bf21cd93STycho Nightingale 	}
981bf21cd93STycho Nightingale }
982bf21cd93STycho Nightingale 
9830153d828SPatrick Mooney static __inline void
invept(uint64_t type,uint64_t eptp)9840153d828SPatrick Mooney invept(uint64_t type, uint64_t eptp)
9850153d828SPatrick Mooney {
9860153d828SPatrick Mooney 	int error;
9870153d828SPatrick Mooney 	struct invept_desc {
9880153d828SPatrick Mooney 		uint64_t eptp;
9890153d828SPatrick Mooney 		uint64_t _resv;
9900153d828SPatrick Mooney 	} desc = { eptp, 0 };
9910153d828SPatrick Mooney 
99270ae9a33SPatrick Mooney 	DTRACE_PROBE2(vmx__invept, uint64_t, type, uint64_t, eptp);
99370ae9a33SPatrick Mooney 
9940153d828SPatrick Mooney 	__asm __volatile("invept %[desc], %[type];"
9950153d828SPatrick Mooney 	    VMX_SET_ERROR_CODE_ASM
9960153d828SPatrick Mooney 	    : [error] "=r" (error)
9970153d828SPatrick Mooney 	    : [desc] "m" (desc), [type] "r" (type)
9980153d828SPatrick Mooney 	    : "memory");
9990153d828SPatrick Mooney 
10000153d828SPatrick Mooney 	if (error != 0) {
10010153d828SPatrick Mooney 		panic("invvpid error %d", error);
10020153d828SPatrick Mooney 	}
10030153d828SPatrick Mooney }
10040153d828SPatrick Mooney 
10054c87aefeSPatrick Mooney static void
vmx_set_pcpu_defaults(struct vmx * vmx,int vcpu)10060153d828SPatrick Mooney vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu)
1007bf21cd93STycho Nightingale {
10084c87aefeSPatrick Mooney 	struct vmxstate *vmxstate;
1009bf21cd93STycho Nightingale 
10104c87aefeSPatrick Mooney 	/*
10114c87aefeSPatrick Mooney 	 * Regardless of whether the VM appears to have migrated between CPUs,
10124c87aefeSPatrick Mooney 	 * save the host sysenter stack pointer.  As it points to the kernel
10134c87aefeSPatrick Mooney 	 * stack of each thread, the correct value must be maintained for every
10144c87aefeSPatrick Mooney 	 * trip into the critical section.
10154c87aefeSPatrick Mooney 	 */
10164c87aefeSPatrick Mooney 	vmcs_write(VMCS_HOST_IA32_SYSENTER_ESP, rdmsr(MSR_SYSENTER_ESP_MSR));
1017bf21cd93STycho Nightingale 
10184c87aefeSPatrick Mooney 	/*
10194c87aefeSPatrick Mooney 	 * Perform any needed TSC_OFFSET adjustment based on TSC_MSR writes or
10204c87aefeSPatrick Mooney 	 * migration between host CPUs with differing TSC values.
10214c87aefeSPatrick Mooney 	 */
1022007ca332SPatrick Mooney 	vmx_apply_tsc_adjust(vmx, vcpu);
10234c87aefeSPatrick Mooney 
10244c87aefeSPatrick Mooney 	vmxstate = &vmx->state[vcpu];
10254c87aefeSPatrick Mooney 	if (vmxstate->lastcpu == curcpu)
10264c87aefeSPatrick Mooney 		return;
10274c87aefeSPatrick Mooney 
10284c87aefeSPatrick Mooney 	vmxstate->lastcpu = curcpu;
10294c87aefeSPatrick Mooney 
10304c87aefeSPatrick Mooney 	vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1);
10314c87aefeSPatrick Mooney 
10324c87aefeSPatrick Mooney 	/* Load the per-CPU IDT address */
10334c87aefeSPatrick Mooney 	vmcs_write(VMCS_HOST_IDTR_BASE, vmm_get_host_idtrbase());
10344c87aefeSPatrick Mooney 	vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase());
10354c87aefeSPatrick Mooney 	vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase());
10364c87aefeSPatrick Mooney 	vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase());
10370153d828SPatrick Mooney 	vmx_invvpid(vmx, vcpu, 1);
10384c87aefeSPatrick Mooney }
10394c87aefeSPatrick Mooney 
10404c87aefeSPatrick Mooney /*
10414c87aefeSPatrick Mooney  * We depend on 'procbased_ctls' to have the Interrupt Window Exiting bit set.
10424c87aefeSPatrick Mooney  */
10444c87aefeSPatrick Mooney 
10454c87aefeSPatrick Mooney static __inline void
vmx_set_int_window_exiting(struct vmx * vmx,int vcpu)10464c87aefeSPatrick Mooney vmx_set_int_window_exiting(struct vmx *vmx, int vcpu)
1047bf21cd93STycho Nightingale {
1048bf21cd93STycho Nightingale 	if ((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) == 0) {
1049d4f59ae5SPatrick Mooney 		/* Enable interrupt window exiting */
1050bf21cd93STycho Nightingale 		vmx->cap[vcpu].proc_ctls |= PROCBASED_INT_WINDOW_EXITING;
1051bf21cd93STycho Nightingale 		vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
1052bf21cd93STycho Nightingale 	}
1053bf21cd93STycho Nightingale }
1054bf21cd93STycho Nightingale 
10554c87aefeSPatrick Mooney static __inline void
vmx_clear_int_window_exiting(struct vmx * vmx,int vcpu)1056bf21cd93STycho Nightingale vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu)
1057bf21cd93STycho Nightingale {
1058bf21cd93STycho Nightingale 	KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0,
10599dc804b9SPatrick Mooney 	    ("intr_window_exiting not set: %x", vmx->cap[vcpu].proc_ctls));
1060d4f59ae5SPatrick Mooney 
1061d4f59ae5SPatrick Mooney 	/* Disable interrupt window exiting */
1062bf21cd93STycho Nightingale 	vmx->cap[vcpu].proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING;
1063bf21cd93STycho Nightingale 	vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
1064bf21cd93STycho Nightingale }
1065bf21cd93STycho Nightingale 
1066c74a40a5SPatrick Mooney static __inline bool
vmx_nmi_window_exiting(struct vmx * vmx,int vcpu)1067c74a40a5SPatrick Mooney vmx_nmi_window_exiting(struct vmx *vmx, int vcpu)
1068c74a40a5SPatrick Mooney {
1069c74a40a5SPatrick Mooney 	return ((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0);
1070c74a40a5SPatrick Mooney }
1071c74a40a5SPatrick Mooney 
10724c87aefeSPatrick Mooney static __inline void
vmx_set_nmi_window_exiting(struct vmx * vmx,int vcpu)1073bf21cd93STycho Nightingale vmx_set_nmi_window_exiting(struct vmx *vmx, int vcpu)
1074bf21cd93STycho Nightingale {
1075c74a40a5SPatrick Mooney 	if (!vmx_nmi_window_exiting(vmx, vcpu)) {
1076bf21cd93STycho Nightingale 		vmx->cap[vcpu].proc_ctls |= PROCBASED_NMI_WINDOW_EXITING;
1077bf21cd93STycho Nightingale 		vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
1078bf21cd93STycho Nightingale 	}
1079bf21cd93STycho Nightingale }
1080bf21cd93STycho Nightingale 
10814c87aefeSPatrick Mooney static __inline void
vmx_clear_nmi_window_exiting(struct vmx * vmx,int vcpu)1082bf21cd93STycho Nightingale vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu)
1083bf21cd93STycho Nightingale {
1084c74a40a5SPatrick Mooney 	ASSERT(vmx_nmi_window_exiting(vmx, vcpu));
1085bf21cd93STycho Nightingale 	vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING;
1086bf21cd93STycho Nightingale 	vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls);
1087bf21cd93STycho Nightingale }
1088bf21cd93STycho Nightingale 
10894c87aefeSPatrick Mooney /*
10904c87aefeSPatrick Mooney  * Set the TSC adjustment, taking into account the offsets measured between
10914c87aefeSPatrick Mooney  * host physical CPUs.  This is required even if the guest has not set a TSC
10924c87aefeSPatrick Mooney  * offset since vCPUs inherit the TSC offset of whatever physical CPU it has
10934c87aefeSPatrick Mooney  * migrated onto.  Without this mitigation, un-synched host TSCs will convey
10944c87aefeSPatrick Mooney  * the appearance of TSC time-travel to the guest as its vCPUs migrate.
10954c87aefeSPatrick Mooney  */
1096007ca332SPatrick Mooney static void
vmx_apply_tsc_adjust(struct vmx * vmx,int vcpu)10974c87aefeSPatrick Mooney vmx_apply_tsc_adjust(struct vmx *vmx, int vcpu)
10984c87aefeSPatrick Mooney {
10999250eb13SPatrick Mooney 	const uint64_t offset = vcpu_tsc_offset(vmx->vm, vcpu, true);
11004c87aefeSPatrick Mooney 
11014c87aefeSPatrick Mooney 	ASSERT(vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET);
11024c87aefeSPatrick Mooney 
11039250eb13SPatrick Mooney 	if (vmx->tsc_offset_active[vcpu] != offset) {
11049250eb13SPatrick Mooney 		vmcs_write(VMCS_TSC_OFFSET, offset);
11059250eb13SPatrick Mooney 		vmx->tsc_offset_active[vcpu] = offset;
11064c87aefeSPatrick Mooney 	}
11074c87aefeSPatrick Mooney }
1108bf21cd93STycho Nightingale 
11103d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_NMI		== VM_INTINFO_NMI);
11173d097f7dSPatrick Mooney 
11183d097f7dSPatrick Mooney static uint64_t
vmx_idtvec_to_intinfo(uint32_t info)11193d097f7dSPatrick Mooney vmx_idtvec_to_intinfo(uint32_t info)
11203d097f7dSPatrick Mooney {
11213d097f7dSPatrick Mooney 	ASSERT(info & VMCS_IDT_VEC_VALID);
11223d097f7dSPatrick Mooney 
11233d097f7dSPatrick Mooney 	const uint32_t type = info & VMCS_INTR_T_MASK;
11243d097f7dSPatrick Mooney 	const uint8_t vec = info & 0xff;
11253d097f7dSPatrick Mooney 
11263d097f7dSPatrick Mooney 	switch (type) {
11273d097f7dSPatrick Mooney 	case VMCS_INTR_T_HWINTR:
11283d097f7dSPatrick Mooney 	case VMCS_INTR_T_NMI:
11293d097f7dSPatrick Mooney 	case VMCS_INTR_T_HWEXCEPTION:
11303d097f7dSPatrick Mooney 	case VMCS_INTR_T_SWINTR:
11313d097f7dSPatrick Mooney 	case VMCS_INTR_T_PRIV_SWEXCEPTION:
11323d097f7dSPatrick Mooney 	case VMCS_INTR_T_SWEXCEPTION:
11333d097f7dSPatrick Mooney 		break;
11343d097f7dSPatrick Mooney 	default:
11353d097f7dSPatrick Mooney 		panic("unexpected event type 0x%03x", type);
11363d097f7dSPatrick Mooney 	}
11373d097f7dSPatrick Mooney 
11383d097f7dSPatrick Mooney 	uint64_t intinfo = VM_INTINFO_VALID | type | vec;
11393d097f7dSPatrick Mooney 	if (info & VMCS_IDT_VEC_ERRCODE_VALID) {
11403d097f7dSPatrick Mooney 		const uint32_t errcode = vmcs_read(VMCS_IDT_VECTORING_ERROR);
11413d097f7dSPatrick Mooney 		intinfo |= (uint64_t)errcode << 32;
11423d097f7dSPatrick Mooney 	}
11433d097f7dSPatrick Mooney 
11443d097f7dSPatrick Mooney 	return (intinfo);
11453d097f7dSPatrick Mooney }
11463d097f7dSPatrick Mooney 
11473d097f7dSPatrick Mooney static void
vmx_inject_intinfo(uint64_t info)11483d097f7dSPatrick Mooney vmx_inject_intinfo(uint64_t info)
11493d097f7dSPatrick Mooney {
11503d097f7dSPatrick Mooney 	ASSERT(VM_INTINFO_PENDING(info));
11513d097f7dSPatrick Mooney 	ASSERT0(info & VM_INTINFO_MASK_RSVD);
11523d097f7dSPatrick Mooney 
11533d097f7dSPatrick Mooney 	/*
11543d097f7dSPatrick Mooney 	 * The bhyve format matches that of the VMCS, which is ensured by the
11553d097f7dSPatrick Mooney 	 * CTASSERTs above.
11563d097f7dSPatrick Mooney 	 */
11573d097f7dSPatrick Mooney 	uint32_t inject = info;
11583d097f7dSPatrick Mooney 	switch (VM_INTINFO_VECTOR(info)) {
11593d097f7dSPatrick Mooney 	case IDT_BP:
11603d097f7dSPatrick Mooney 	case IDT_OF:
11613d097f7dSPatrick Mooney 		/*
11623d097f7dSPatrick Mooney 		 * VT-x requires #BP and #OF to be injected as software
11633d097f7dSPatrick Mooney 		 * exceptions.
11643d097f7dSPatrick Mooney 		 */
11653d097f7dSPatrick Mooney 		inject &= ~VMCS_INTR_T_MASK;
11663d097f7dSPatrick Mooney 		inject |= VMCS_INTR_T_SWEXCEPTION;
11673d097f7dSPatrick Mooney 		break;
11683d097f7dSPatrick Mooney 	default:
11693d097f7dSPatrick Mooney 		break;
11703d097f7dSPatrick Mooney 	}
11713d097f7dSPatrick Mooney 
11723d097f7dSPatrick Mooney 	if (VM_INTINFO_HAS_ERRCODE(info)) {
11733d097f7dSPatrick Mooney 		vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR,
11743d097f7dSPatrick Mooney 		    VM_INTINFO_ERRCODE(info));
11753d097f7dSPatrick Mooney 	}
11763d097f7dSPatrick Mooney 	vmcs_write(VMCS_ENTRY_INTR_INFO, inject);
11773d097f7dSPatrick Mooney }
11783d097f7dSPatrick Mooney 
1179bf21cd93STycho Nightingale #define	NMI_BLOCKING	(VMCS_INTERRUPTIBILITY_NMI_BLOCKING |		\
1183bf21cd93STycho Nightingale 
1184bf21cd93STycho Nightingale static void
vmx_inject_nmi(struct vmx * vmx,int vcpu)1185bf21cd93STycho Nightingale vmx_inject_nmi(struct vmx *vmx, int vcpu)
1186bf21cd93STycho Nightingale {
1187c74a40a5SPatrick Mooney 	ASSERT0(vmcs_read(VMCS_GUEST_INTERRUPTIBILITY) & NMI_BLOCKING);
1188c74a40a5SPatrick Mooney 	ASSERT0(vmcs_read(VMCS_ENTRY_INTR_INFO) & VMCS_INTR_VALID);
1189bf21cd93STycho Nightingale 
1190bf21cd93STycho Nightingale 	/*
1191bf21cd93STycho Nightingale 	 * Inject the virtual NMI. The vector must be the NMI IDT entry
1192bf21cd93STycho Nightingale 	 * or the VMCS entry check will fail.
1193bf21cd93STycho Nightingale 	 */
1194c74a40a5SPatrick Mooney 	vmcs_write(VMCS_ENTRY_INTR_INFO,
1195c74a40a5SPatrick Mooney 	    IDT_NMI | VMCS_INTR_T_NMI | VMCS_INTR_VALID);
1196bf21cd93STycho Nightingale 
1197bf21cd93STycho Nightingale 	/* Clear the request */
1198bf21cd93STycho Nightingale 	vm_nmi_clear(vmx->vm, vcpu);
1199bf21cd93STycho Nightingale }
1200bf21cd93STycho Nightingale 
1201c74a40a5SPatrick Mooney /*
1202c74a40a5SPatrick Mooney  * Inject exceptions, NMIs, and ExtINTs.
1203c74a40a5SPatrick Mooney  *
1204c74a40a5SPatrick Mooney  * The logic behind these are complicated and may involve mutex contention, so
1205c74a40a5SPatrick Mooney  * the injection is performed without the protection of host CPU interrupts
1206c74a40a5SPatrick Mooney  * being disabled.  This means a racing notification could be "lost",
1207c74a40a5SPatrick Mooney  * necessitating a later call to vmx_inject_recheck() to close that window
1208c74a40a5SPatrick Mooney  * of opportunity.
1209c74a40a5SPatrick Mooney  */
1210c74a40a5SPatrick Mooney static enum event_inject_state
vmx_inject_events(struct vmx * vmx,int vcpu,uint64_t rip)1211c74a40a5SPatrick Mooney vmx_inject_events(struct vmx *vmx, int vcpu, uint64_t rip)
1212bf21cd93STycho Nightingale {
1213c74a40a5SPatrick Mooney 	uint64_t entryinfo;
1214bf21cd93STycho Nightingale 	uint32_t gi, info;
12154c87aefeSPatrick Mooney 	int vector;
1216c74a40a5SPatrick Mooney 	enum event_inject_state state;
12174c87aefeSPatrick Mooney 
12184c87aefeSPatrick Mooney 	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
12194c87aefeSPatrick Mooney 	info = vmcs_read(VMCS_ENTRY_INTR_INFO);
1220c74a40a5SPatrick Mooney 	state = EIS_CAN_INJECT;
12214c87aefeSPatrick Mooney 
1222c74a40a5SPatrick Mooney 	/* Clear any interrupt blocking if the guest %rip has changed */
1223c74a40a5SPatrick Mooney 	if (vmx->state[vcpu].nextrip != rip && (gi & HWINTR_BLOCKING) != 0) {
12244c87aefeSPatrick Mooney 		gi &= ~HWINTR_BLOCKING;
12254c87aefeSPatrick Mooney 		vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
12264c87aefeSPatrick Mooney 	}
12274c87aefeSPatrick Mooney 
12284c87aefeSPatrick Mooney 	/*
12294c87aefeSPatrick Mooney 	 * It could be that an interrupt is already pending for injection from
12304c87aefeSPatrick Mooney 	 * the VMCS.  This would be the case if the vCPU exited for conditions
12314c87aefeSPatrick Mooney 	 * such as an AST before a vm-entry delivered the injection.
12324c87aefeSPatrick Mooney 	 */
12334c87aefeSPatrick Mooney 	if ((info & VMCS_INTR_VALID) != 0) {
1234c74a40a5SPatrick Mooney 		return (EIS_EV_EXISTING | EIS_REQ_EXIT);
12354c87aefeSPatrick Mooney 	}
1236bf21cd93STycho Nightingale 
1237bf21cd93STycho Nightingale 	if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) {
12383d097f7dSPatrick Mooney 		vmx_inject_intinfo(entryinfo);
1239c74a40a5SPatrick Mooney 		state = EIS_EV_INJECTED;
12404c87aefeSPatrick Mooney 	}
12414c87aefeSPatrick Mooney 
12424c87aefeSPatrick Mooney 	if (vm_nmi_pending(vmx->vm, vcpu)) {
12434c87aefeSPatrick Mooney 		/*
1244c74a40a5SPatrick Mooney 		 * If there are no conditions blocking NMI injection then inject
1245c74a40a5SPatrick Mooney 		 * it directly here otherwise enable "NMI window exiting" to
1246c74a40a5SPatrick Mooney 		 * inject it as soon as we can.
12474c87aefeSPatrick Mooney 		 *
1248c74a40a5SPatrick Mooney 		 * According to the Intel manual, some CPUs do not allow NMI
1249c74a40a5SPatrick Mooney 		 * injection when STI_BLOCKING is active.  That check is
1250c74a40a5SPatrick Mooney 		 * enforced here, regardless of CPU capability.  If running on a
1251c74a40a5SPatrick Mooney 		 * CPU without such a restriction it will immediately exit and
1252c74a40a5SPatrick Mooney 		 * the NMI will be injected in the "NMI window exiting" handler.
12534c87aefeSPatrick Mooney 		 */
12544c87aefeSPatrick Mooney 		if ((gi & (HWINTR_BLOCKING | NMI_BLOCKING)) == 0) {
1255c74a40a5SPatrick Mooney 			if (state == EIS_CAN_INJECT) {
1256c74a40a5SPatrick Mooney 				vmx_inject_nmi(vmx, vcpu);
1257c74a40a5SPatrick Mooney 				state = EIS_EV_INJECTED;
12584c87aefeSPatrick Mooney 			} else {
1259c74a40a5SPatrick Mooney 				return (state | EIS_REQ_EXIT);
12604c87aefeSPatrick Mooney 			}
12614c87aefeSPatrick Mooney 		} else {
12624c87aefeSPatrick Mooney 			vmx_set_nmi_window_exiting(vmx, vcpu);
12634c87aefeSPatrick Mooney 		}
12644c87aefeSPatrick Mooney 	}
12654c87aefeSPatrick Mooney 
12664c87aefeSPatrick Mooney 	if (vm_extint_pending(vmx->vm, vcpu)) {
1267c74a40a5SPatrick Mooney 		if (state != EIS_CAN_INJECT) {
1268c74a40a5SPatrick Mooney 			return (state | EIS_REQ_EXIT);
1269c74a40a5SPatrick Mooney 		}
1270c74a40a5SPatrick Mooney 		if ((gi & HWINTR_BLOCKING) != 0 ||
1271c74a40a5SPatrick Mooney 		    (vmcs_read(VMCS_GUEST_RFLAGS) & PSL_I) == 0) {
1272c74a40a5SPatrick Mooney 			return (EIS_GI_BLOCK);
1273c74a40a5SPatrick Mooney 		}
1274c74a40a5SPatrick Mooney 
12754c87aefeSPatrick Mooney 		/* Ask the legacy pic for a vector to inject */
12764c87aefeSPatrick Mooney 		vatpic_pending_intr(vmx->vm, &vector);
12774c87aefeSPatrick Mooney 
12784c87aefeSPatrick Mooney 		/*
12794c87aefeSPatrick Mooney 		 * From the Intel SDM, Volume 3, Section "Maskable
12804c87aefeSPatrick Mooney 		 * Hardware Interrupts":
12814c87aefeSPatrick Mooney 		 * - maskable interrupt vectors [0,255] can be delivered
12824c87aefeSPatrick Mooney 		 *   through the INTR pin.
12834c87aefeSPatrick Mooney 		 */
12844c87aefeSPatrick Mooney 		KASSERT(vector >= 0 && vector <= 255,
12854c87aefeSPatrick Mooney 		    ("invalid vector %d from INTR", vector));
12864c87aefeSPatrick Mooney 
1287c74a40a5SPatrick Mooney 		/* Inject the interrupt */
1288c74a40a5SPatrick Mooney 		vmcs_write(VMCS_ENTRY_INTR_INFO,
1289c74a40a5SPatrick Mooney 		    VMCS_INTR_T_HWINTR | VMCS_INTR_VALID | vector);
12904c87aefeSPatrick Mooney 
1291c74a40a5SPatrick Mooney 		vm_extint_clear(vmx->vm, vcpu);
1292c74a40a5SPatrick Mooney 		vatpic_intr_accepted(vmx->vm, vector);
1293c74a40a5SPatrick Mooney 		state = EIS_EV_INJECTED;
12944c87aefeSPatrick Mooney 	}
1295c74a40a5SPatrick Mooney 
1296c74a40a5SPatrick Mooney 	return (state);
1297c74a40a5SPatrick Mooney }
1298c74a40a5SPatrick Mooney 
1299c74a40a5SPatrick Mooney /*
1300c74a40a5SPatrick Mooney  * Inject any interrupts pending on the vLAPIC.
1301c74a40a5SPatrick Mooney  *
1302c74a40a5SPatrick Mooney  * This is done with host CPU interrupts disabled so notification IPIs, either
1303c74a40a5SPatrick Mooney  * from the standard vCPU notification or APICv posted interrupts, will be
1304c74a40a5SPatrick Mooney  * queued on the host APIC and recognized when entering VMX context.
1305c74a40a5SPatrick Mooney  */
1306c74a40a5SPatrick Mooney static enum event_inject_state
vmx_inject_vlapic(struct vmx * vmx,int vcpu,struct vlapic * vlapic)1307c74a40a5SPatrick Mooney vmx_inject_vlapic(struct vmx *vmx, int vcpu, struct vlapic *vlapic)
1308c74a40a5SPatrick Mooney {
1309c74a40a5SPatrick Mooney 	int vector;
1310c74a40a5SPatrick Mooney 
1311c74a40a5SPatrick Mooney 	if (!vlapic_pending_intr(vlapic, &vector)) {
1312c74a40a5SPatrick Mooney 		return (EIS_CAN_INJECT);
13134c87aefeSPatrick Mooney 	}
13144c87aefeSPatrick Mooney 
1315c74a40a5SPatrick Mooney 	/*
1316c74a40a5SPatrick Mooney 	 * From the Intel SDM, Volume 3, Section "Maskable
1317c74a40a5SPatrick Mooney 	 * Hardware Interrupts":
1318c74a40a5SPatrick Mooney 	 * - maskable interrupt vectors [16,255] can be delivered
1319c74a40a5SPatrick Mooney 	 *   through the local APIC.
13202699b94cSPatrick Mooney 	 */
1321c74a40a5SPatrick Mooney 	KASSERT(vector >= 16 && vector <= 255,
1322c74a40a5SPatrick Mooney 	    ("invalid vector %d from local APIC", vector));
13234c87aefeSPatrick Mooney 
1324c74a40a5SPatrick Mooney 	if (vmx_cap_en(vmx, VMX_CAP_APICV)) {
1325c74a40a5SPatrick Mooney 		uint16_t status_old = vmcs_read(VMCS_GUEST_INTR_STATUS);
1326c74a40a5SPatrick Mooney 		uint16_t status_new = (status_old & 0xff00) | vector;
13274c87aefeSPatrick Mooney 
13284c87aefeSPatrick Mooney 		/*
1329c74a40a5SPatrick Mooney 		 * The APICv state will have been synced into the vLAPIC
1330c74a40a5SPatrick Mooney 		 * as part of vlapic_pending_intr().  Prepare the VMCS
1331c74a40a5SPatrick Mooney 		 * for the to-be-injected pending interrupt.
13324c87aefeSPatrick Mooney 		 */
1333c74a40a5SPatrick Mooney 		if (status_new > status_old) {
1334c74a40a5SPatrick Mooney 			vmcs_write(VMCS_GUEST_INTR_STATUS, status_new);
1335c74a40a5SPatrick Mooney 		}
1336c74a40a5SPatrick Mooney 
1337c74a40a5SPatrick Mooney 		/*
1338c74a40a5SPatrick Mooney 		 * Ensure VMCS state regarding EOI traps is kept in sync
1339c74a40a5SPatrick Mooney 		 * with the TMRs in the vlapic.
1340c74a40a5SPatrick Mooney 		 */
1341c74a40a5SPatrick Mooney 		vmx_apicv_sync_tmr(vlapic);
1342c74a40a5SPatrick Mooney 
1343c74a40a5SPatrick Mooney 		/*
1344c74a40a5SPatrick Mooney 		 * The rest of the injection process for injecting the
1345c74a40a5SPatrick Mooney 		 * interrupt(s) is handled by APICv. It does not preclude other
1346c74a40a5SPatrick Mooney 		 * event injection from occurring.
1347c74a40a5SPatrick Mooney 		 */
1348c74a40a5SPatrick Mooney 		return (EIS_CAN_INJECT);
13494c87aefeSPatrick Mooney 	}
13504c87aefeSPatrick Mooney 
1351c74a40a5SPatrick Mooney 	ASSERT0(vmcs_read(VMCS_ENTRY_INTR_INFO) & VMCS_INTR_VALID);
13524c87aefeSPatrick Mooney 
1353c74a40a5SPatrick Mooney 	/* Does guest interruptability block injection? */
1354c74a40a5SPatrick Mooney 	if ((vmcs_read(VMCS_GUEST_INTERRUPTIBILITY) & HWINTR_BLOCKING) != 0 ||
1355c74a40a5SPatrick Mooney 	    (vmcs_read(VMCS_GUEST_RFLAGS) & PSL_I) == 0) {
1356c74a40a5SPatrick Mooney 		return (EIS_GI_BLOCK);
1357c74a40a5SPatrick Mooney 	}
1358c74a40a5SPatrick Mooney 
1359c74a40a5SPatrick Mooney 	/* Inject the interrupt */
1360c74a40a5SPatrick Mooney 	vmcs_write(VMCS_ENTRY_INTR_INFO,
1361c74a40a5SPatrick Mooney 	    VMCS_INTR_T_HWINTR | VMCS_INTR_VALID | vector);
1362c74a40a5SPatrick Mooney 
1363c74a40a5SPatrick Mooney 	/* Update the Local APIC ISR */
1364c74a40a5SPatrick Mooney 	vlapic_intr_accepted(vlapic, vector);
1365c74a40a5SPatrick Mooney 
1366c74a40a5SPatrick Mooney 	return (EIS_EV_INJECTED);
1367c74a40a5SPatrick Mooney }
1368c74a40a5SPatrick Mooney 
1369c74a40a5SPatrick Mooney /*
1370c74a40a5SPatrick Mooney  * Re-check for events to be injected.
1371c74a40a5SPatrick Mooney  *
1372c74a40a5SPatrick Mooney  * Once host CPU interrupts are disabled, check for the presence of any events
1373c74a40a5SPatrick Mooney  * which require injection processing.  If an exit is required upon injection,
1374c74a40a5SPatrick Mooney  * or once the guest becomes interruptable, that will be configured too.
1375c74a40a5SPatrick Mooney  */
1376c74a40a5SPatrick Mooney static bool
vmx_inject_recheck(struct vmx * vmx,int vcpu,enum event_inject_state state)1377c74a40a5SPatrick Mooney vmx_inject_recheck(struct vmx *vmx, int vcpu, enum event_inject_state state)
1378c74a40a5SPatrick Mooney {
1379c74a40a5SPatrick Mooney 	if (state == EIS_CAN_INJECT) {
1380c74a40a5SPatrick Mooney 		if (vm_nmi_pending(vmx->vm, vcpu) &&
1381c74a40a5SPatrick Mooney 		    !vmx_nmi_window_exiting(vmx, vcpu)) {
1382c74a40a5SPatrick Mooney 			/* queued NMI not blocked by NMI-window-exiting */
1383c74a40a5SPatrick Mooney 			return (true);
1384c74a40a5SPatrick Mooney 		}
1385c74a40a5SPatrick Mooney 		if (vm_extint_pending(vmx->vm, vcpu)) {
1386c74a40a5SPatrick Mooney 			/* queued ExtINT not blocked by existing injection */
1387c74a40a5SPatrick Mooney 			return (true);
1388c74a40a5SPatrick Mooney 		}
1389c74a40a5SPatrick Mooney 	} else {
1390c74a40a5SPatrick Mooney 		if ((state & EIS_REQ_EXIT) != 0) {
1391c74a40a5SPatrick Mooney 			/*
1392c74a40a5SPatrick Mooney 			 * Use a self-IPI to force an immediate exit after
1393c74a40a5SPatrick Mooney 			 * event injection has occurred.
1394c74a40a5SPatrick Mooney 			 */
1395c74a40a5SPatrick Mooney 			poke_cpu(CPU->cpu_id);
1396c74a40a5SPatrick Mooney 		} else {
1397c74a40a5SPatrick Mooney 			/*
1398c74a40a5SPatrick Mooney 			 * If any event is being injected, an exit immediately
1399c74a40a5SPatrick Mooney 			 * upon becoming interruptable again will allow pending
1400c74a40a5SPatrick Mooney 			 * or newly queued events to be injected in a timely
1401c74a40a5SPatrick Mooney 			 * manner.
1402c74a40a5SPatrick Mooney 			 */
1403c74a40a5SPatrick Mooney 			vmx_set_int_window_exiting(vmx, vcpu);
1404c74a40a5SPatrick Mooney 		}
1405c74a40a5SPatrick Mooney 	}
1406c74a40a5SPatrick Mooney 	return (false);
14074c87aefeSPatrick Mooney }
1408bf21cd93STycho Nightingale 
1409bf21cd93STycho Nightingale /*
1410bf21cd93STycho Nightingale  * If the Virtual NMIs execution control is '1' then the logical processor
1411bf21cd93STycho Nightingale  * tracks virtual-NMI blocking in the Guest Interruptibility-state field of
1412bf21cd93STycho Nightingale  * the VMCS. An IRET instruction in VMX non-root operation will remove any
1413bf21cd93STycho Nightingale  * virtual-NMI blocking.
1414bf21cd93STycho Nightingale  *
1415bf21cd93STycho Nightingale  * This unblocking occurs even if the IRET causes a fault. In this case the
1416bf21cd93STycho Nightingale  * hypervisor needs to restore virtual-NMI blocking before resuming the guest.
1417bf21cd93STycho Nightingale  */
1418bf21cd93STycho Nightingale static void
vmx_restore_nmi_blocking(struct vmx * vmx,int vcpuid)1419bf21cd93STycho Nightingale vmx_restore_nmi_blocking(struct vmx *vmx, int vcpuid)
1420bf21cd93STycho Nightingale {
1421bf21cd93STycho Nightingale 	uint32_t gi;
1422bf21cd93STycho Nightingale 
1423bf21cd93STycho Nightingale 	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
1424bf21cd93STycho Nightingale 	gi |= VMCS_INTERRUPTIBILITY_NMI_BLOCKING;
1425bf21cd93STycho Nightingale 	vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
1426bf21cd93STycho Nightingale }
1427bf21cd93STycho Nightingale 
1428bf21cd93STycho Nightingale static void
vmx_clear_nmi_blocking(struct vmx * vmx,int vcpuid)1429bf21cd93STycho Nightingale vmx_clear_nmi_blocking(struct vmx *vmx, int vcpuid)
1430bf21cd93STycho Nightingale {
1431bf21cd93STycho Nightingale 	uint32_t gi;
1432bf21cd93STycho Nightingale 
1433bf21cd93STycho Nightingale 	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
1434bf21cd93STycho Nightingale 	gi &= ~VMCS_INTERRUPTIBILITY_NMI_BLOCKING;
1435bf21cd93STycho Nightingale 	vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
1436bf21cd93STycho Nightingale }
1437bf21cd93STycho Nightingale 
14384c87aefeSPatrick Mooney static void
vmx_assert_nmi_blocking(struct vmx * vmx,int vcpuid)14394c87aefeSPatrick Mooney vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid)
14404c87aefeSPatrick Mooney {
14414c87aefeSPatrick Mooney 	uint32_t gi;
14424c87aefeSPatrick Mooney 
14434c87aefeSPatrick Mooney 	gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
14459dc804b9SPatrick Mooney 	    ("NMI blocking is not in effect %x", gi));
14464c87aefeSPatrick Mooney }
14474c87aefeSPatrick Mooney 
14484c87aefeSPatrick Mooney static int
vmx_emulate_xsetbv(struct vmx * vmx,int vcpu,struct vm_exit * vmexit)14494c87aefeSPatrick Mooney vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
14504c87aefeSPatrick Mooney {
14514c87aefeSPatrick Mooney 	struct vmxctx *vmxctx;
14524c87aefeSPatrick Mooney 	uint64_t xcrval;
14534c87aefeSPatrick Mooney 	const struct xsave_limits *limits;
14544c87aefeSPatrick Mooney 
14554c87aefeSPatrick Mooney 	vmxctx = &vmx->ctx[vcpu];
14564c87aefeSPatrick Mooney 	limits = vmm_get_xsave_limits();
14574c87aefeSPatrick Mooney 
14584c87aefeSPatrick Mooney 	/*
14594c87aefeSPatrick Mooney 	 * Note that the processor raises a GP# fault on its own if
14604c87aefeSPatrick Mooney 	 * xsetbv is executed for CPL != 0, so we do not have to
14614c87aefeSPatrick Mooney 	 * emulate that fault here.
14624c87aefeSPatrick Mooney 	 */
14634c87aefeSPatrick Mooney 
14644c87aefeSPatrick Mooney 	/* Only xcr0 is supported. */
14654c87aefeSPatrick Mooney 	if (vmxctx->guest_rcx != 0) {
14664c87aefeSPatrick Mooney 		vm_inject_gp(vmx->vm, vcpu);
14674c87aefeSPatrick Mooney 		return (HANDLED);
14684c87aefeSPatrick Mooney 	}
14694c87aefeSPatrick Mooney 
14704c87aefeSPatrick Mooney 	/* We only handle xcr0 if both the host and guest have XSAVE enabled. */
14712699b94cSPatrick Mooney 	if (!limits->xsave_enabled ||
14722699b94cSPatrick Mooney 	    !(vmcs_read(VMCS_GUEST_CR4) & CR4_XSAVE)) {
14734c87aefeSPatrick Mooney 		vm_inject_ud(vmx->vm, vcpu);
14744c87aefeSPatrick Mooney 		return (HANDLED);
14754c87aefeSPatrick Mooney 	}
14764c87aefeSPatrick Mooney 
14774c87aefeSPatrick Mooney 	xcrval = vmxctx->guest_rdx << 32 | (vmxctx->guest_rax & 0xffffffff);
14784c87aefeSPatrick Mooney 	if ((xcrval & ~limits->xcr0_allowed) != 0) {
14794c87aefeSPatrick Mooney 		vm_inject_gp(vmx->vm, vcpu);
14804c87aefeSPatrick Mooney 		return (HANDLED);
14814c87aefeSPatrick Mooney 	}
14824c87aefeSPatrick Mooney 
14834c87aefeSPatrick Mooney 	if (!(xcrval & XFEATURE_ENABLED_X87)) {
14844c87aefeSPatrick Mooney 		vm_inject_gp(vmx->vm, vcpu);
14854c87aefeSPatrick Mooney 		return (HANDLED);
14864c87aefeSPatrick Mooney 	}
14874c87aefeSPatrick Mooney 
14884c87aefeSPatrick Mooney 	/* AVX (YMM_Hi128) requires SSE. */
14894c87aefeSPatrick Mooney 	if (xcrval & XFEATURE_ENABLED_AVX &&
14904c87aefeSPatrick Mooney 	    (xcrval & XFEATURE_AVX) != XFEATURE_AVX) {
14914c87aefeSPatrick Mooney 		vm_inject_gp(vmx->vm, vcpu);
14924c87aefeSPatrick Mooney 		return (HANDLED);
14934c87aefeSPatrick Mooney 	}
14944c87aefeSPatrick Mooney 
14954c87aefeSPatrick Mooney 	/*
14964c87aefeSPatrick Mooney 	 * AVX512 requires base AVX (YMM_Hi128) as well as OpMask,
14974c87aefeSPatrick Mooney 	 * ZMM_Hi256, and Hi16_ZMM.
14984c87aefeSPatrick Mooney 	 */
14994c87aefeSPatrick Mooney 	if (xcrval & XFEATURE_AVX512 &&
15004c87aefeSPatrick Mooney 	    (xcrval & (XFEATURE_AVX512 | XFEATURE_AVX)) !=
15014c87aefeSPatrick Mooney 	    (XFEATURE_AVX512 | XFEATURE_AVX)) {
15024c87aefeSPatrick Mooney 		vm_inject_gp(vmx->vm, vcpu);
15034c87aefeSPatrick Mooney 		return (HANDLED);
15044c87aefeSPatrick Mooney 	}
15054c87aefeSPatrick Mooney 
15064c87aefeSPatrick Mooney 	/*
15074c87aefeSPatrick Mooney 	 * Intel MPX requires both bound register state flags to be
15084c87aefeSPatrick Mooney 	 * set.
15094c87aefeSPatrick Mooney 	 */
15104c87aefeSPatrick Mooney 	if (((xcrval & XFEATURE_ENABLED_BNDREGS) != 0) !=
15114c87aefeSPatrick Mooney 	    ((xcrval & XFEATURE_ENABLED_BNDCSR) != 0)) {
15124c87aefeSPatrick Mooney 		vm_inject_gp(vmx->vm, vcpu);
15134c87aefeSPatrick Mooney 		return (HANDLED);
15144c87aefeSPatrick Mooney 	}
15154c87aefeSPatrick Mooney 
15164c87aefeSPatrick Mooney 	/*
15174c87aefeSPatrick Mooney 	 * This runs "inside" vmrun() with the guest's FPU state, so
15184c87aefeSPatrick Mooney 	 * modifying xcr0 directly modifies the guest's xcr0, not the
15194c87aefeSPatrick Mooney 	 * host's.
15204c87aefeSPatrick Mooney 	 */
15214c87aefeSPatrick Mooney 	load_xcr(0, xcrval);
15224c87aefeSPatrick Mooney 	return (HANDLED);
15234c87aefeSPatrick Mooney }
15244c87aefeSPatrick Mooney 
1525bf21cd93STycho Nightingale static uint64_t
vmx_get_guest_reg(struct vmx * vmx,int vcpu,int ident)1526bf21cd93STycho Nightingale vmx_get_guest_reg(struct vmx *vmx, int vcpu, int ident)
1527bf21cd93STycho Nightingale {
1528bf21cd93STycho Nightingale 	const struct vmxctx *vmxctx;
1529bf21cd93STycho Nightingale 
1530bf21cd93STycho Nightingale 	vmxctx = &vmx->ctx[vcpu];
1531bf21cd93STycho Nightingale 
1532bf21cd93STycho Nightingale 	switch (ident) {
1533bf21cd93STycho Nightingale 	case 0:
1534bf21cd93STycho Nightingale 		return (vmxctx->guest_rax);
1535bf21cd93STycho Nightingale 	case 1:
1536bf21cd93STycho Nightingale 		return (vmxctx->guest_rcx);
1537bf21cd93STycho Nightingale 	case 2:
1538bf21cd93STycho Nightingale 		return (vmxctx->guest_rdx);
1539bf21cd93STycho Nightingale 	case 3:
1540bf21cd93STycho Nightingale 		return (vmxctx->guest_rbx);
1541bf21cd93STycho Nightingale 	case 4:
1542bf21cd93STycho Nightingale 		return (vmcs_read(VMCS_GUEST_RSP));
1543bf21cd93STycho Nightingale 	case 5:
1544bf21cd93STycho Nightingale 		return (vmxctx->guest_rbp);
1545bf21cd93STycho Nightingale 	case 6:
1546bf21cd93STycho Nightingale 		return (vmxctx->guest_rsi);
1547bf21cd93STycho Nightingale 	case 7:
1548bf21cd93STycho Nightingale 		return (vmxctx->guest_rdi);
1549bf21cd93STycho Nightingale 	case 8:
1550bf21cd93STycho Nightingale 		return (vmxctx->guest_r8);
1551bf21cd93STycho Nightingale 	case 9:
1552bf21cd93STycho Nightingale 		return (vmxctx->guest_r9);
1553bf21cd93STycho Nightingale 	case 10:
1554bf21cd93STycho Nightingale 		return (vmxctx->guest_r10);
1555bf21cd93STycho Nightingale 	case 11:
1556bf21cd93STycho Nightingale 		return (vmxctx->guest_r11);
1557bf21cd93STycho Nightingale 	case 12:
1558bf21cd93STycho Nightingale 		return (vmxctx->guest_r12);
1559bf21cd93STycho Nightingale 	case 13:
1560bf21cd93STycho Nightingale 		return (vmxctx->guest_r13);
1561bf21cd93STycho Nightingale 	case 14:
1562bf21cd93STycho Nightingale 		return (vmxctx->guest_r14);
1563bf21cd93STycho Nightingale 	case 15:
1564bf21cd93STycho Nightingale 		return (vmxctx->guest_r15);