1bf21cd93STycho Nightingale /*- 24c87aefeSPatrick Mooney * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 34c87aefeSPatrick Mooney * 4bf21cd93STycho Nightingale * Copyright (c) 2011 NetApp, Inc. 5bf21cd93STycho Nightingale * All rights reserved. 64c87aefeSPatrick Mooney * Copyright (c) 2018 Joyent, Inc. 7bf21cd93STycho Nightingale * 8bf21cd93STycho Nightingale * Redistribution and use in source and binary forms, with or without 9bf21cd93STycho Nightingale * modification, are permitted provided that the following conditions 10bf21cd93STycho Nightingale * are met: 11bf21cd93STycho Nightingale * 1. Redistributions of source code must retain the above copyright 12bf21cd93STycho Nightingale * notice, this list of conditions and the following disclaimer. 13bf21cd93STycho Nightingale * 2. Redistributions in binary form must reproduce the above copyright 14bf21cd93STycho Nightingale * notice, this list of conditions and the following disclaimer in the 15bf21cd93STycho Nightingale * documentation and/or other materials provided with the distribution. 16bf21cd93STycho Nightingale * 17bf21cd93STycho Nightingale * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18bf21cd93STycho Nightingale * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19bf21cd93STycho Nightingale * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20bf21cd93STycho Nightingale * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21bf21cd93STycho Nightingale * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22bf21cd93STycho Nightingale * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23bf21cd93STycho Nightingale * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24bf21cd93STycho Nightingale * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25bf21cd93STycho Nightingale * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26bf21cd93STycho Nightingale * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27bf21cd93STycho Nightingale * SUCH DAMAGE. 28bf21cd93STycho Nightingale * 294c87aefeSPatrick Mooney * $FreeBSD$ 30bf21cd93STycho Nightingale */ 31bf21cd93STycho Nightingale /* 32bf21cd93STycho Nightingale * This file and its contents are supplied under the terms of the 33bf21cd93STycho Nightingale * Common Development and Distribution License ("CDDL"), version 1.0. 34bf21cd93STycho Nightingale * You may only use this file in accordance with the terms of version 35bf21cd93STycho Nightingale * 1.0 of the CDDL. 36bf21cd93STycho Nightingale * 37bf21cd93STycho Nightingale * A full copy of the text of the CDDL should have accompanied this 38bf21cd93STycho Nightingale * source. A copy of the CDDL is also available via the Internet at 39bf21cd93STycho Nightingale * http://www.illumos.org/license/CDDL. 40bf21cd93STycho Nightingale * 41bf21cd93STycho Nightingale * Copyright 2015 Pluribus Networks Inc. 424c87aefeSPatrick Mooney * Copyright 2018 Joyent, Inc. 4383b49c54SPatrick Mooney * Copyright 2022 Oxide Computer Company 44bf21cd93STycho Nightingale */ 45bf21cd93STycho Nightingale 46bf21cd93STycho Nightingale #include <sys/cdefs.h> 474c87aefeSPatrick Mooney __FBSDID("$FreeBSD$"); 48bf21cd93STycho Nightingale 49bf21cd93STycho Nightingale #include <sys/param.h> 50bf21cd93STycho Nightingale #include <sys/systm.h> 51bf21cd93STycho Nightingale #include <sys/kernel.h> 52bf21cd93STycho Nightingale #include <sys/malloc.h> 53bf21cd93STycho Nightingale #include <sys/pcpu.h> 54bf21cd93STycho Nightingale #include <sys/proc.h> 55bf21cd93STycho Nightingale #include <sys/sysctl.h> 56bf21cd93STycho Nightingale 574c87aefeSPatrick Mooney #include <sys/x86_archext.h> 584c87aefeSPatrick Mooney #include <sys/smp_impldefs.h> 594c87aefeSPatrick Mooney #include <sys/smt.h> 604c87aefeSPatrick Mooney #include <sys/hma.h> 614c87aefeSPatrick Mooney #include <sys/trap.h> 620153d828SPatrick Mooney #include <sys/archsystm.h> 634c87aefeSPatrick Mooney 64bf21cd93STycho Nightingale #include <machine/psl.h> 65bf21cd93STycho Nightingale #include <machine/cpufunc.h> 66bf21cd93STycho Nightingale #include <machine/md_var.h> 674c87aefeSPatrick Mooney #include <machine/reg.h> 68bf21cd93STycho Nightingale #include <machine/segments.h> 69bf21cd93STycho Nightingale #include <machine/specialreg.h> 70bf21cd93STycho Nightingale #include <machine/vmparam.h> 71cf409e3fSDan Cross #include <sys/vmm_vm.h> 72d2f938fdSPatrick Mooney #include <sys/vmm_kernel.h> 73bf21cd93STycho Nightingale 74bf21cd93STycho Nightingale #include <machine/vmm.h> 75bf21cd93STycho Nightingale #include <machine/vmm_dev.h> 76e0c0d44eSPatrick Mooney #include <sys/vmm_instruction_emul.h> 77bf21cd93STycho Nightingale #include "vmm_lapic.h" 78bf21cd93STycho Nightingale #include "vmm_host.h" 79bf21cd93STycho Nightingale #include "vmm_ioport.h" 80bf21cd93STycho Nightingale #include "vmm_ktr.h" 81bf21cd93STycho Nightingale #include "vmm_stat.h" 82bf21cd93STycho Nightingale #include "vatpic.h" 83bf21cd93STycho Nightingale #include "vlapic.h" 84bf21cd93STycho Nightingale #include "vlapic_priv.h" 85bf21cd93STycho Nightingale 864c87aefeSPatrick Mooney #include "vmcs.h" 87bf21cd93STycho Nightingale #include "vmx.h" 88bf21cd93STycho Nightingale #include "vmx_msr.h" 89bf21cd93STycho Nightingale #include "x86.h" 90bf21cd93STycho Nightingale #include "vmx_controls.h" 91bf21cd93STycho Nightingale 92bf21cd93STycho Nightingale #define PINBASED_CTLS_ONE_SETTING \ 93bf21cd93STycho Nightingale (PINBASED_EXTINT_EXITING | \ 942699b94cSPatrick Mooney PINBASED_NMI_EXITING | \ 952699b94cSPatrick Mooney PINBASED_VIRTUAL_NMI) 96bf21cd93STycho Nightingale #define PINBASED_CTLS_ZERO_SETTING 0 97bf21cd93STycho Nightingale 982699b94cSPatrick Mooney #define PROCBASED_CTLS_WINDOW_SETTING \ 99bf21cd93STycho Nightingale (PROCBASED_INT_WINDOW_EXITING | \ 1002699b94cSPatrick Mooney PROCBASED_NMI_WINDOW_EXITING) 101bf21cd93STycho Nightingale 1024c87aefeSPatrick Mooney /* We consider TSC offset a necessity for unsynched TSC handling */ 10384971882SPatrick Mooney #define PROCBASED_CTLS_ONE_SETTING \ 104bf21cd93STycho Nightingale (PROCBASED_SECONDARY_CONTROLS | \ 1052699b94cSPatrick Mooney PROCBASED_TSC_OFFSET | \ 1062699b94cSPatrick Mooney PROCBASED_MWAIT_EXITING | \ 1072699b94cSPatrick Mooney PROCBASED_MONITOR_EXITING | \ 1082699b94cSPatrick Mooney PROCBASED_IO_EXITING | \ 1092699b94cSPatrick Mooney PROCBASED_MSR_BITMAPS | \ 1102699b94cSPatrick Mooney PROCBASED_CTLS_WINDOW_SETTING | \ 1112699b94cSPatrick Mooney PROCBASED_CR8_LOAD_EXITING | \ 1122699b94cSPatrick Mooney PROCBASED_CR8_STORE_EXITING) 1134c87aefeSPatrick Mooney 114bf21cd93STycho Nightingale #define PROCBASED_CTLS_ZERO_SETTING \ 115bf21cd93STycho Nightingale (PROCBASED_CR3_LOAD_EXITING | \ 116bf21cd93STycho Nightingale PROCBASED_CR3_STORE_EXITING | \ 117bf21cd93STycho Nightingale PROCBASED_IO_BITMAPS) 118bf21cd93STycho Nightingale 119c3ae3afaSPatrick Mooney /* 120c3ae3afaSPatrick Mooney * EPT and Unrestricted Guest are considered necessities. The latter is not a 121c3ae3afaSPatrick Mooney * requirement on FreeBSD, where grub2-bhyve is used to load guests directly 122c3ae3afaSPatrick Mooney * without a bootrom starting in real mode. 123c3ae3afaSPatrick Mooney */ 124c3ae3afaSPatrick Mooney #define PROCBASED_CTLS2_ONE_SETTING \ 125c3ae3afaSPatrick Mooney (PROCBASED2_ENABLE_EPT | \ 126c3ae3afaSPatrick Mooney PROCBASED2_UNRESTRICTED_GUEST) 127bf21cd93STycho Nightingale #define PROCBASED_CTLS2_ZERO_SETTING 0 128bf21cd93STycho Nightingale 129bf21cd93STycho Nightingale #define VM_EXIT_CTLS_ONE_SETTING \ 1304c87aefeSPatrick Mooney (VM_EXIT_SAVE_DEBUG_CONTROLS | \ 1314c87aefeSPatrick Mooney VM_EXIT_HOST_LMA | \ 1324c87aefeSPatrick Mooney VM_EXIT_LOAD_PAT | \ 133bf21cd93STycho Nightingale VM_EXIT_SAVE_EFER | \ 134bf21cd93STycho Nightingale VM_EXIT_LOAD_EFER | \ 1354c87aefeSPatrick Mooney VM_EXIT_ACKNOWLEDGE_INTERRUPT) 136bf21cd93STycho Nightingale 1374c87aefeSPatrick Mooney #define VM_EXIT_CTLS_ZERO_SETTING 0 138bf21cd93STycho Nightingale 1394c87aefeSPatrick Mooney #define VM_ENTRY_CTLS_ONE_SETTING \ 1404c87aefeSPatrick Mooney (VM_ENTRY_LOAD_DEBUG_CONTROLS | \ 1414c87aefeSPatrick Mooney VM_ENTRY_LOAD_EFER) 142bf21cd93STycho Nightingale 143bf21cd93STycho Nightingale #define VM_ENTRY_CTLS_ZERO_SETTING \ 1444c87aefeSPatrick Mooney (VM_ENTRY_INTO_SMM | \ 145bf21cd93STycho Nightingale VM_ENTRY_DEACTIVATE_DUAL_MONITOR) 146bf21cd93STycho Nightingale 1470153d828SPatrick Mooney /* 1480153d828SPatrick Mooney * Cover the EPT capabilities used by bhyve at present: 1490153d828SPatrick Mooney * - 4-level page walks 1500153d828SPatrick Mooney * - write-back memory type 1510153d828SPatrick Mooney * - INVEPT operations (all types) 1520153d828SPatrick Mooney * - INVVPID operations (single-context only) 1530153d828SPatrick Mooney */ 1540153d828SPatrick Mooney #define EPT_CAPS_REQUIRED \ 1550153d828SPatrick Mooney (IA32_VMX_EPT_VPID_PWL4 | \ 1560153d828SPatrick Mooney IA32_VMX_EPT_VPID_TYPE_WB | \ 1570153d828SPatrick Mooney IA32_VMX_EPT_VPID_INVEPT | \ 1580153d828SPatrick Mooney IA32_VMX_EPT_VPID_INVEPT_SINGLE | \ 1590153d828SPatrick Mooney IA32_VMX_EPT_VPID_INVEPT_ALL | \ 1600153d828SPatrick Mooney IA32_VMX_EPT_VPID_INVVPID | \ 1610153d828SPatrick Mooney IA32_VMX_EPT_VPID_INVVPID_SINGLE) 1620153d828SPatrick Mooney 163bf21cd93STycho Nightingale #define HANDLED 1 164bf21cd93STycho Nightingale #define UNHANDLED 0 165bf21cd93STycho Nightingale 166bf21cd93STycho Nightingale static MALLOC_DEFINE(M_VMX, "vmx", "vmx"); 167bf21cd93STycho Nightingale static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic"); 168bf21cd93STycho Nightingale 169bf21cd93STycho Nightingale SYSCTL_DECL(_hw_vmm); 170154972afSPatrick Mooney SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 171154972afSPatrick Mooney NULL); 172bf21cd93STycho Nightingale 173bf21cd93STycho Nightingale static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2; 174bf21cd93STycho Nightingale static uint32_t exit_ctls, entry_ctls; 175bf21cd93STycho Nightingale 176bf21cd93STycho Nightingale static uint64_t cr0_ones_mask, cr0_zeros_mask; 177bf21cd93STycho Nightingale 178bf21cd93STycho Nightingale static uint64_t cr4_ones_mask, cr4_zeros_mask; 179bf21cd93STycho Nightingale 180bf21cd93STycho Nightingale static int vmx_initialized; 181bf21cd93STycho Nightingale 1822699b94cSPatrick Mooney /* Do not flush RSB upon vmexit */ 183007ca332SPatrick Mooney static int no_flush_rsb; 184007ca332SPatrick Mooney 185bf21cd93STycho Nightingale /* 186bf21cd93STycho Nightingale * Optional capabilities 187bf21cd93STycho Nightingale */ 1884c87aefeSPatrick Mooney 1892699b94cSPatrick Mooney /* HLT triggers a VM-exit */ 190bf21cd93STycho Nightingale static int cap_halt_exit; 1914c87aefeSPatrick Mooney 1922699b94cSPatrick Mooney /* PAUSE triggers a VM-exit */ 193bf21cd93STycho Nightingale static int cap_pause_exit; 1944c87aefeSPatrick Mooney 1952699b94cSPatrick Mooney /* Monitor trap flag */ 196bf21cd93STycho Nightingale static int cap_monitor_trap; 1974c87aefeSPatrick Mooney 1982699b94cSPatrick Mooney /* Guests are allowed to use INVPCID */ 199bf21cd93STycho Nightingale static int cap_invpcid; 200bf21cd93STycho Nightingale 201c3ae3afaSPatrick Mooney /* Extra capabilities (VMX_CAP_*) beyond the minimum */ 202c3ae3afaSPatrick Mooney static enum vmx_caps vmx_capabilities; 203bf21cd93STycho Nightingale 2042699b94cSPatrick Mooney /* APICv posted interrupt vector */ 2054c87aefeSPatrick Mooney static int pirvec = -1; 206bf21cd93STycho Nightingale 2072699b94cSPatrick Mooney static uint_t vpid_alloc_failed; 208bf21cd93STycho Nightingale 209154972afSPatrick Mooney int guest_l1d_flush; 210154972afSPatrick Mooney int guest_l1d_flush_sw; 2114c87aefeSPatrick Mooney 212007ca332SPatrick Mooney /* MSR save region is composed of an array of 'struct msr_entry' */ 213007ca332SPatrick Mooney struct msr_entry { 214007ca332SPatrick Mooney uint32_t index; 215007ca332SPatrick Mooney uint32_t reserved; 216007ca332SPatrick Mooney uint64_t val; 217007ca332SPatrick Mooney }; 218007ca332SPatrick Mooney 2194c87aefeSPatrick Mooney static struct msr_entry msr_load_list[1] __aligned(16); 2204c87aefeSPatrick Mooney 2214c87aefeSPatrick Mooney /* 2224c87aefeSPatrick Mooney * The definitions of SDT probes for VMX. 2234c87aefeSPatrick Mooney */ 2244c87aefeSPatrick Mooney 2252699b94cSPatrick Mooney /* BEGIN CSTYLED */ 2264c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, entry, 2274c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2284c87aefeSPatrick Mooney 2294c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, taskswitch, 2304c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "struct vm_task_switch *"); 2314c87aefeSPatrick Mooney 2324c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, craccess, 2334c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint64_t"); 2344c87aefeSPatrick Mooney 2354c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, rdmsr, 2364c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint32_t"); 2374c87aefeSPatrick Mooney 2384c87aefeSPatrick Mooney SDT_PROBE_DEFINE5(vmm, vmx, exit, wrmsr, 2394c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint32_t", "uint64_t"); 2404c87aefeSPatrick Mooney 2414c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, halt, 2424c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2434c87aefeSPatrick Mooney 2444c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, mtrap, 2454c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2464c87aefeSPatrick Mooney 2474c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, pause, 2484c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2494c87aefeSPatrick Mooney 2504c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, intrwindow, 2514c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2524c87aefeSPatrick Mooney 2534c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, interrupt, 2544c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint32_t"); 2554c87aefeSPatrick Mooney 2564c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, nmiwindow, 2574c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2584c87aefeSPatrick Mooney 2594c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, inout, 2604c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2614c87aefeSPatrick Mooney 2624c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, cpuid, 2634c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2644c87aefeSPatrick Mooney 2654c87aefeSPatrick Mooney SDT_PROBE_DEFINE5(vmm, vmx, exit, exception, 2664c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint32_t", "int"); 2674c87aefeSPatrick Mooney 2684c87aefeSPatrick Mooney SDT_PROBE_DEFINE5(vmm, vmx, exit, nestedfault, 2694c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint64_t", "uint64_t"); 2704c87aefeSPatrick Mooney 2714c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, mmiofault, 2724c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint64_t"); 2734c87aefeSPatrick Mooney 2744c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, eoi, 2754c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2764c87aefeSPatrick Mooney 2774c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, apicaccess, 2784c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2794c87aefeSPatrick Mooney 2804c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, apicwrite, 2814c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "struct vlapic *"); 2824c87aefeSPatrick Mooney 2834c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, xsetbv, 2844c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2854c87aefeSPatrick Mooney 2864c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, monitor, 2874c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2884c87aefeSPatrick Mooney 2894c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, mwait, 2904c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2914c87aefeSPatrick Mooney 2924c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, vminsn, 2934c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2944c87aefeSPatrick Mooney 2954c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, unknown, 2964c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint32_t"); 2974c87aefeSPatrick Mooney 2984c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, return, 2994c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "int"); 3002699b94cSPatrick Mooney /* END CSTYLED */ 3014c87aefeSPatrick Mooney 302bf21cd93STycho Nightingale static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); 303bf21cd93STycho Nightingale static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); 304007ca332SPatrick Mooney static void vmx_apply_tsc_adjust(struct vmx *, int); 305c74a40a5SPatrick Mooney static void vmx_apicv_sync_tmr(struct vlapic *vlapic); 306c74a40a5SPatrick Mooney static void vmx_tpr_shadow_enter(struct vlapic *vlapic); 307c74a40a5SPatrick Mooney static void vmx_tpr_shadow_exit(struct vlapic *vlapic); 308bf21cd93STycho Nightingale 3096b641d7aSPatrick Mooney static void 3106b641d7aSPatrick Mooney vmx_allow_x2apic_msrs(struct vmx *vmx, int vcpuid) 311bf21cd93STycho Nightingale { 312bf21cd93STycho Nightingale /* 313bf21cd93STycho Nightingale * Allow readonly access to the following x2APIC MSRs from the guest. 314bf21cd93STycho Nightingale */ 3156b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_ID); 3166b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_VERSION); 3176b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LDR); 3186b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_SVR); 3196b641d7aSPatrick Mooney 3206b641d7aSPatrick Mooney for (uint_t i = 0; i < 8; i++) { 3216b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_ISR0 + i); 3226b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_TMR0 + i); 3236b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_IRR0 + i); 3246b641d7aSPatrick Mooney } 3256b641d7aSPatrick Mooney 3266b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_ESR); 3276b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_TIMER); 3286b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_THERMAL); 3296b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_PCINT); 3306b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_LINT0); 3316b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_LINT1); 3326b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_ERROR); 3336b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_ICR_TIMER); 3346b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_DCR_TIMER); 3356b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_ICR); 336bf21cd93STycho Nightingale 337bf21cd93STycho Nightingale /* 338bf21cd93STycho Nightingale * Allow TPR, EOI and SELF_IPI MSRs to be read and written by the guest. 339bf21cd93STycho Nightingale * 340bf21cd93STycho Nightingale * These registers get special treatment described in the section 341bf21cd93STycho Nightingale * "Virtualizing MSR-Based APIC Accesses". 342bf21cd93STycho Nightingale */ 3436b641d7aSPatrick Mooney guest_msr_rw(vmx, vcpuid, MSR_APIC_TPR); 3446b641d7aSPatrick Mooney guest_msr_rw(vmx, vcpuid, MSR_APIC_EOI); 3456b641d7aSPatrick Mooney guest_msr_rw(vmx, vcpuid, MSR_APIC_SELF_IPI); 346bf21cd93STycho Nightingale } 347bf21cd93STycho Nightingale 3482699b94cSPatrick Mooney static ulong_t 3492699b94cSPatrick Mooney vmx_fix_cr0(ulong_t cr0) 350bf21cd93STycho Nightingale { 351bf21cd93STycho Nightingale return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask); 352bf21cd93STycho Nightingale } 353bf21cd93STycho Nightingale 354bf0dcd3fSPatrick Mooney /* 355bf0dcd3fSPatrick Mooney * Given a live (VMCS-active) cr0 value, and its shadow counterpart, calculate 356bf0dcd3fSPatrick Mooney * the value observable from the guest. 357bf0dcd3fSPatrick Mooney */ 358bf0dcd3fSPatrick Mooney static ulong_t 359bf0dcd3fSPatrick Mooney vmx_unshadow_cr0(uint64_t cr0, uint64_t shadow) 360bf0dcd3fSPatrick Mooney { 361bf0dcd3fSPatrick Mooney return ((cr0 & ~cr0_ones_mask) | 362bf0dcd3fSPatrick Mooney (shadow & (cr0_zeros_mask | cr0_ones_mask))); 363bf0dcd3fSPatrick Mooney } 364bf0dcd3fSPatrick Mooney 3652699b94cSPatrick Mooney static ulong_t 3662699b94cSPatrick Mooney vmx_fix_cr4(ulong_t cr4) 367bf21cd93STycho Nightingale { 368bf21cd93STycho Nightingale return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask); 369bf21cd93STycho Nightingale } 370bf21cd93STycho Nightingale 371bf0dcd3fSPatrick Mooney /* 372bf0dcd3fSPatrick Mooney * Given a live (VMCS-active) cr4 value, and its shadow counterpart, calculate 373bf0dcd3fSPatrick Mooney * the value observable from the guest. 374bf0dcd3fSPatrick Mooney */ 375bf0dcd3fSPatrick Mooney static ulong_t 376bf0dcd3fSPatrick Mooney vmx_unshadow_cr4(uint64_t cr4, uint64_t shadow) 377bf0dcd3fSPatrick Mooney { 378bf0dcd3fSPatrick Mooney return ((cr4 & ~cr4_ones_mask) | 379bf0dcd3fSPatrick Mooney (shadow & (cr4_zeros_mask | cr4_ones_mask))); 380bf0dcd3fSPatrick Mooney } 381bf0dcd3fSPatrick Mooney 382bf21cd93STycho Nightingale static void 383bf21cd93STycho Nightingale vpid_free(int vpid) 384bf21cd93STycho Nightingale { 385bf21cd93STycho Nightingale if (vpid < 0 || vpid > 0xffff) 386bf21cd93STycho Nightingale panic("vpid_free: invalid vpid %d", vpid); 387bf21cd93STycho Nightingale 388bf21cd93STycho Nightingale /* 389bf21cd93STycho Nightingale * VPIDs [0,VM_MAXCPU] are special and are not allocated from 390bf21cd93STycho Nightingale * the unit number allocator. 391bf21cd93STycho Nightingale */ 392bf21cd93STycho Nightingale 393bf21cd93STycho Nightingale if (vpid > VM_MAXCPU) 3944c87aefeSPatrick Mooney hma_vmx_vpid_free((uint16_t)vpid); 395bf21cd93STycho Nightingale } 396bf21cd93STycho Nightingale 397bf21cd93STycho Nightingale static void 398bf21cd93STycho Nightingale vpid_alloc(uint16_t *vpid, int num) 399bf21cd93STycho Nightingale { 400bf21cd93STycho Nightingale int i, x; 401bf21cd93STycho Nightingale 402bf21cd93STycho Nightingale if (num <= 0 || num > VM_MAXCPU) 403bf21cd93STycho Nightingale panic("invalid number of vpids requested: %d", num); 404bf21cd93STycho Nightingale 405bf21cd93STycho Nightingale /* 406bf21cd93STycho Nightingale * If the "enable vpid" execution control is not enabled then the 407bf21cd93STycho Nightingale * VPID is required to be 0 for all vcpus. 408bf21cd93STycho Nightingale */ 409bf21cd93STycho Nightingale if ((procbased_ctls2 & PROCBASED2_ENABLE_VPID) == 0) { 410bf21cd93STycho Nightingale for (i = 0; i < num; i++) 411bf21cd93STycho Nightingale vpid[i] = 0; 412bf21cd93STycho Nightingale return; 413bf21cd93STycho Nightingale } 414bf21cd93STycho Nightingale 415bf21cd93STycho Nightingale /* 416bf21cd93STycho Nightingale * Allocate a unique VPID for each vcpu from the unit number allocator. 417bf21cd93STycho Nightingale */ 418bf21cd93STycho Nightingale for (i = 0; i < num; i++) { 4194c87aefeSPatrick Mooney uint16_t tmp; 4204c87aefeSPatrick Mooney 4214c87aefeSPatrick Mooney tmp = hma_vmx_vpid_alloc(); 4224c87aefeSPatrick Mooney x = (tmp == 0) ? -1 : tmp; 423f703164bSPatrick Mooney 424bf21cd93STycho Nightingale if (x == -1) 425bf21cd93STycho Nightingale break; 426bf21cd93STycho Nightingale else 427bf21cd93STycho Nightingale vpid[i] = x; 428bf21cd93STycho Nightingale } 429bf21cd93STycho Nightingale 430bf21cd93STycho Nightingale if (i < num) { 431bf21cd93STycho Nightingale atomic_add_int(&vpid_alloc_failed, 1); 432bf21cd93STycho Nightingale 433bf21cd93STycho Nightingale /* 434bf21cd93STycho Nightingale * If the unit number allocator does not have enough unique 435bf21cd93STycho Nightingale * VPIDs then we need to allocate from the [1,VM_MAXCPU] range. 436bf21cd93STycho Nightingale * 437bf21cd93STycho Nightingale * These VPIDs are not be unique across VMs but this does not 438bf21cd93STycho Nightingale * affect correctness because the combined mappings are also 439bf21cd93STycho Nightingale * tagged with the EP4TA which is unique for each VM. 440bf21cd93STycho Nightingale * 441bf21cd93STycho Nightingale * It is still sub-optimal because the invvpid will invalidate 442bf21cd93STycho Nightingale * combined mappings for a particular VPID across all EP4TAs. 443bf21cd93STycho Nightingale */ 444bf21cd93STycho Nightingale while (i-- > 0) 445bf21cd93STycho Nightingale vpid_free(vpid[i]); 446bf21cd93STycho Nightingale 447bf21cd93STycho Nightingale for (i = 0; i < num; i++) 448bf21cd93STycho Nightingale vpid[i] = i + 1; 449bf21cd93STycho Nightingale } 450bf21cd93STycho Nightingale } 451bf21cd93STycho Nightingale 452bf21cd93STycho Nightingale static int 4534c87aefeSPatrick Mooney vmx_cleanup(void) 454bf21cd93STycho Nightingale { 4554c87aefeSPatrick Mooney /* This is taken care of by the hma registration */ 4564c87aefeSPatrick Mooney return (0); 4574c87aefeSPatrick Mooney } 4584c87aefeSPatrick Mooney 4594c87aefeSPatrick Mooney static void 4604c87aefeSPatrick Mooney vmx_restore(void) 4614c87aefeSPatrick Mooney { 4624c87aefeSPatrick Mooney /* No-op on illumos */ 4634c87aefeSPatrick Mooney } 4644c87aefeSPatrick Mooney 4654c87aefeSPatrick Mooney static int 4660153d828SPatrick Mooney vmx_init(void) 4674c87aefeSPatrick Mooney { 468154972afSPatrick Mooney int error; 4694c87aefeSPatrick Mooney uint64_t fixed0, fixed1; 470c3ae3afaSPatrick Mooney uint32_t tmp; 471c3ae3afaSPatrick Mooney enum vmx_caps avail_caps = VMX_CAP_NONE; 4724c87aefeSPatrick Mooney 473bf21cd93STycho Nightingale /* Check support for primary processor-based VM-execution controls */ 474bf21cd93STycho Nightingale error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 4752699b94cSPatrick Mooney MSR_VMX_TRUE_PROCBASED_CTLS, 4762699b94cSPatrick Mooney PROCBASED_CTLS_ONE_SETTING, 4772699b94cSPatrick Mooney PROCBASED_CTLS_ZERO_SETTING, &procbased_ctls); 478bf21cd93STycho Nightingale if (error) { 479bf21cd93STycho Nightingale printf("vmx_init: processor does not support desired primary " 4802699b94cSPatrick Mooney "processor-based controls\n"); 481bf21cd93STycho Nightingale return (error); 482bf21cd93STycho Nightingale } 483bf21cd93STycho Nightingale 484bf21cd93STycho Nightingale /* Clear the processor-based ctl bits that are set on demand */ 485bf21cd93STycho Nightingale procbased_ctls &= ~PROCBASED_CTLS_WINDOW_SETTING; 486bf21cd93STycho Nightingale 487bf21cd93STycho Nightingale /* Check support for secondary processor-based VM-execution controls */ 488bf21cd93STycho Nightingale error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 4892699b94cSPatrick Mooney MSR_VMX_PROCBASED_CTLS2, 4902699b94cSPatrick Mooney PROCBASED_CTLS2_ONE_SETTING, 4912699b94cSPatrick Mooney PROCBASED_CTLS2_ZERO_SETTING, &procbased_ctls2); 492bf21cd93STycho Nightingale if (error) { 493bf21cd93STycho Nightingale printf("vmx_init: processor does not support desired secondary " 4942699b94cSPatrick Mooney "processor-based controls\n"); 495bf21cd93STycho Nightingale return (error); 496bf21cd93STycho Nightingale } 497bf21cd93STycho Nightingale 498bf21cd93STycho Nightingale /* Check support for VPID */ 4992699b94cSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 5002699b94cSPatrick Mooney MSR_VMX_PROCBASED_CTLS2, 5012699b94cSPatrick Mooney PROCBASED2_ENABLE_VPID, 5022699b94cSPatrick Mooney 0, &tmp); 503bf21cd93STycho Nightingale if (error == 0) 504bf21cd93STycho Nightingale procbased_ctls2 |= PROCBASED2_ENABLE_VPID; 505bf21cd93STycho Nightingale 506bf21cd93STycho Nightingale /* Check support for pin-based VM-execution controls */ 507bf21cd93STycho Nightingale error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS, 5082699b94cSPatrick Mooney MSR_VMX_TRUE_PINBASED_CTLS, 5092699b94cSPatrick Mooney PINBASED_CTLS_ONE_SETTING, 5102699b94cSPatrick Mooney PINBASED_CTLS_ZERO_SETTING, &pinbased_ctls); 511bf21cd93STycho Nightingale if (error) { 512bf21cd93STycho Nightingale printf("vmx_init: processor does not support desired " 5132699b94cSPatrick Mooney "pin-based controls\n"); 514bf21cd93STycho Nightingale return (error); 515bf21cd93STycho Nightingale } 516bf21cd93STycho Nightingale 517bf21cd93STycho Nightingale /* Check support for VM-exit controls */ 518bf21cd93STycho Nightingale error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS, 5192699b94cSPatrick Mooney VM_EXIT_CTLS_ONE_SETTING, 5202699b94cSPatrick Mooney VM_EXIT_CTLS_ZERO_SETTING, 5212699b94cSPatrick Mooney &exit_ctls); 522bf21cd93STycho Nightingale if (error) { 523bf21cd93STycho Nightingale printf("vmx_init: processor does not support desired " 524bf21cd93STycho Nightingale "exit controls\n"); 525bf21cd93STycho Nightingale return (error); 526bf21cd93STycho Nightingale } 527bf21cd93STycho Nightingale 528bf21cd93STycho Nightingale /* Check support for VM-entry controls */ 529bf21cd93STycho Nightingale error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS, 530bf21cd93STycho Nightingale VM_ENTRY_CTLS_ONE_SETTING, VM_ENTRY_CTLS_ZERO_SETTING, 531bf21cd93STycho Nightingale &entry_ctls); 532bf21cd93STycho Nightingale if (error) { 533bf21cd93STycho Nightingale printf("vmx_init: processor does not support desired " 534bf21cd93STycho Nightingale "entry controls\n"); 535bf21cd93STycho Nightingale return (error); 536bf21cd93STycho Nightingale } 537bf21cd93STycho Nightingale 538bf21cd93STycho Nightingale /* 539bf21cd93STycho Nightingale * Check support for optional features by testing them 540bf21cd93STycho Nightingale * as individual bits 541bf21cd93STycho Nightingale */ 542bf21cd93STycho Nightingale cap_halt_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 5432699b94cSPatrick Mooney MSR_VMX_TRUE_PROCBASED_CTLS, 5442699b94cSPatrick Mooney PROCBASED_HLT_EXITING, 0, 5452699b94cSPatrick Mooney &tmp) == 0); 546bf21cd93STycho Nightingale 547bf21cd93STycho Nightingale cap_monitor_trap = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 5482699b94cSPatrick Mooney MSR_VMX_PROCBASED_CTLS, 5492699b94cSPatrick Mooney PROCBASED_MTF, 0, 5502699b94cSPatrick Mooney &tmp) == 0); 551bf21cd93STycho Nightingale 552bf21cd93STycho Nightingale cap_pause_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 5532699b94cSPatrick Mooney MSR_VMX_TRUE_PROCBASED_CTLS, 5542699b94cSPatrick Mooney PROCBASED_PAUSE_EXITING, 0, 5552699b94cSPatrick Mooney &tmp) == 0); 556bf21cd93STycho Nightingale 5574c87aefeSPatrick Mooney cap_invpcid = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 5584c87aefeSPatrick Mooney MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0, 5594c87aefeSPatrick Mooney &tmp) == 0); 5604c87aefeSPatrick Mooney 5612699b94cSPatrick Mooney /* 5622699b94cSPatrick Mooney * Check for APIC virtualization capabilities: 563c3ae3afaSPatrick Mooney * - TPR shadowing 564c3ae3afaSPatrick Mooney * - Full APICv (with or without x2APIC support) 565c3ae3afaSPatrick Mooney * - Posted interrupt handling 566154972afSPatrick Mooney */ 567c3ae3afaSPatrick Mooney if (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, MSR_VMX_TRUE_PROCBASED_CTLS, 568c3ae3afaSPatrick Mooney PROCBASED_USE_TPR_SHADOW, 0, &tmp) == 0) { 569c3ae3afaSPatrick Mooney avail_caps |= VMX_CAP_TPR_SHADOW; 570c3ae3afaSPatrick Mooney 571c3ae3afaSPatrick Mooney const uint32_t apicv_bits = 572c3ae3afaSPatrick Mooney PROCBASED2_VIRTUALIZE_APIC_ACCESSES | 573c3ae3afaSPatrick Mooney PROCBASED2_APIC_REGISTER_VIRTUALIZATION | 574c3ae3afaSPatrick Mooney PROCBASED2_VIRTUALIZE_X2APIC_MODE | 575c3ae3afaSPatrick Mooney PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY; 576c3ae3afaSPatrick Mooney if (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 577c3ae3afaSPatrick Mooney MSR_VMX_PROCBASED_CTLS2, apicv_bits, 0, &tmp) == 0) { 578c3ae3afaSPatrick Mooney avail_caps |= VMX_CAP_APICV; 579154972afSPatrick Mooney 5804c87aefeSPatrick Mooney /* 581c3ae3afaSPatrick Mooney * It may make sense in the future to differentiate 582c3ae3afaSPatrick Mooney * hardware (or software) configurations with APICv but 583c3ae3afaSPatrick Mooney * no support for accelerating x2APIC mode. 5844c87aefeSPatrick Mooney */ 585c3ae3afaSPatrick Mooney avail_caps |= VMX_CAP_APICV_X2APIC; 586c3ae3afaSPatrick Mooney 587c3ae3afaSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS, 588c3ae3afaSPatrick Mooney MSR_VMX_TRUE_PINBASED_CTLS, 589c3ae3afaSPatrick Mooney PINBASED_POSTED_INTERRUPT, 0, &tmp); 590c3ae3afaSPatrick Mooney if (error == 0) { 591c3ae3afaSPatrick Mooney /* 592c3ae3afaSPatrick Mooney * If the PSM-provided interfaces for requesting 593c3ae3afaSPatrick Mooney * and using a PIR IPI vector are present, use 594c3ae3afaSPatrick Mooney * them for posted interrupts. 595c3ae3afaSPatrick Mooney */ 596c3ae3afaSPatrick Mooney if (psm_get_pir_ipivect != NULL && 597c3ae3afaSPatrick Mooney psm_send_pir_ipi != NULL) { 598c3ae3afaSPatrick Mooney pirvec = psm_get_pir_ipivect(); 599c3ae3afaSPatrick Mooney avail_caps |= VMX_CAP_APICV_PIR; 600c3ae3afaSPatrick Mooney } 6014c87aefeSPatrick Mooney } 6024c87aefeSPatrick Mooney } 6034c87aefeSPatrick Mooney } 6044c87aefeSPatrick Mooney 6050153d828SPatrick Mooney /* 6060153d828SPatrick Mooney * Check for necessary EPT capabilities 6070153d828SPatrick Mooney * 6080153d828SPatrick Mooney * TODO: Properly handle when IA32_VMX_EPT_VPID_HW_AD is missing and the 6090153d828SPatrick Mooney * hypervisor intends to utilize dirty page tracking. 6100153d828SPatrick Mooney */ 6110153d828SPatrick Mooney uint64_t ept_caps = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 6120153d828SPatrick Mooney if ((ept_caps & EPT_CAPS_REQUIRED) != EPT_CAPS_REQUIRED) { 6130153d828SPatrick Mooney cmn_err(CE_WARN, "!Inadequate EPT capabilities: %lx", ept_caps); 6140153d828SPatrick Mooney return (EINVAL); 615bf21cd93STycho Nightingale } 616bf21cd93STycho Nightingale 6174c87aefeSPatrick Mooney #ifdef __FreeBSD__ 6184c87aefeSPatrick Mooney guest_l1d_flush = (cpu_ia32_arch_caps & 6194c87aefeSPatrick Mooney IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) == 0; 6204c87aefeSPatrick Mooney TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush); 6214c87aefeSPatrick Mooney 6224c87aefeSPatrick Mooney /* 6234c87aefeSPatrick Mooney * L1D cache flush is enabled. Use IA32_FLUSH_CMD MSR when 6244c87aefeSPatrick Mooney * available. Otherwise fall back to the software flush 6254c87aefeSPatrick Mooney * method which loads enough data from the kernel text to 6264c87aefeSPatrick Mooney * flush existing L1D content, both on VMX entry and on NMI 6274c87aefeSPatrick Mooney * return. 6284c87aefeSPatrick Mooney */ 6294c87aefeSPatrick Mooney if (guest_l1d_flush) { 6304c87aefeSPatrick Mooney if ((cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) == 0) { 6314c87aefeSPatrick Mooney guest_l1d_flush_sw = 1; 6324c87aefeSPatrick Mooney TUNABLE_INT_FETCH("hw.vmm.l1d_flush_sw", 6334c87aefeSPatrick Mooney &guest_l1d_flush_sw); 6344c87aefeSPatrick Mooney } 6354c87aefeSPatrick Mooney if (guest_l1d_flush_sw) { 6364c87aefeSPatrick Mooney if (nmi_flush_l1d_sw <= 1) 6374c87aefeSPatrick Mooney nmi_flush_l1d_sw = 1; 6384c87aefeSPatrick Mooney } else { 6394c87aefeSPatrick Mooney msr_load_list[0].index = MSR_IA32_FLUSH_CMD; 6404c87aefeSPatrick Mooney msr_load_list[0].val = IA32_FLUSH_CMD_L1D; 6414c87aefeSPatrick Mooney } 6424c87aefeSPatrick Mooney } 6434c87aefeSPatrick Mooney #else 6444c87aefeSPatrick Mooney /* L1D flushing is taken care of by smt_acquire() and friends */ 6454c87aefeSPatrick Mooney guest_l1d_flush = 0; 6464c87aefeSPatrick Mooney #endif /* __FreeBSD__ */ 6474c87aefeSPatrick Mooney 648bf21cd93STycho Nightingale /* 649bf21cd93STycho Nightingale * Stash the cr0 and cr4 bits that must be fixed to 0 or 1 650bf21cd93STycho Nightingale */ 651bf21cd93STycho Nightingale fixed0 = rdmsr(MSR_VMX_CR0_FIXED0); 652bf21cd93STycho Nightingale fixed1 = rdmsr(MSR_VMX_CR0_FIXED1); 653bf21cd93STycho Nightingale cr0_ones_mask = fixed0 & fixed1; 654bf21cd93STycho Nightingale cr0_zeros_mask = ~fixed0 & ~fixed1; 655bf21cd93STycho Nightingale 656bf21cd93STycho Nightingale /* 657c3ae3afaSPatrick Mooney * Since Unrestricted Guest was already verified present, CR0_PE and 658c3ae3afaSPatrick Mooney * CR0_PG are allowed to be set to zero in VMX non-root operation 659bf21cd93STycho Nightingale */ 660c3ae3afaSPatrick Mooney cr0_ones_mask &= ~(CR0_PG | CR0_PE); 661bf21cd93STycho Nightingale 662bf21cd93STycho Nightingale /* 663bf21cd93STycho Nightingale * Do not allow the guest to set CR0_NW or CR0_CD. 664bf21cd93STycho Nightingale */ 665bf21cd93STycho Nightingale cr0_zeros_mask |= (CR0_NW | CR0_CD); 666bf21cd93STycho Nightingale 667bf21cd93STycho Nightingale fixed0 = rdmsr(MSR_VMX_CR4_FIXED0); 668bf21cd93STycho Nightingale fixed1 = rdmsr(MSR_VMX_CR4_FIXED1); 669bf21cd93STycho Nightingale cr4_ones_mask = fixed0 & fixed1; 670bf21cd93STycho Nightingale cr4_zeros_mask = ~fixed0 & ~fixed1; 671bf21cd93STycho Nightingale 672bf21cd93STycho Nightingale vmx_msr_init(); 673bf21cd93STycho Nightingale 674c3ae3afaSPatrick Mooney vmx_capabilities = avail_caps; 675bf21cd93STycho Nightingale vmx_initialized = 1; 676bf21cd93STycho Nightingale 677bf21cd93STycho Nightingale return (0); 678bf21cd93STycho Nightingale } 679bf21cd93STycho Nightingale 6804c87aefeSPatrick Mooney static void 6814c87aefeSPatrick Mooney vmx_trigger_hostintr(int vector) 6824c87aefeSPatrick Mooney { 6834c87aefeSPatrick Mooney VERIFY(vector >= 32 && vector <= 255); 6844c87aefeSPatrick Mooney vmx_call_isr(vector - 32); 6854c87aefeSPatrick Mooney } 6864c87aefeSPatrick Mooney 687bf21cd93STycho Nightingale static void * 6880153d828SPatrick Mooney vmx_vminit(struct vm *vm) 689bf21cd93STycho Nightingale { 690bf21cd93STycho Nightingale uint16_t vpid[VM_MAXCPU]; 691007ca332SPatrick Mooney int i, error, datasel; 692bf21cd93STycho Nightingale struct vmx *vmx; 6934c87aefeSPatrick Mooney uint32_t exc_bitmap; 6944c87aefeSPatrick Mooney uint16_t maxcpus; 695c3ae3afaSPatrick Mooney uint32_t proc_ctls, proc2_ctls, pin_ctls; 6966b641d7aSPatrick Mooney uint64_t apic_access_pa = UINT64_MAX; 697bf21cd93STycho Nightingale 6982699b94cSPatrick Mooney vmx = malloc(sizeof (struct vmx), M_VMX, M_WAITOK | M_ZERO); 699bf21cd93STycho Nightingale if ((uintptr_t)vmx & PAGE_MASK) { 700bf21cd93STycho Nightingale panic("malloc of struct vmx not aligned on %d byte boundary", 7012699b94cSPatrick Mooney PAGE_SIZE); 702bf21cd93STycho Nightingale } 703bf21cd93STycho Nightingale vmx->vm = vm; 704bf21cd93STycho Nightingale 7050153d828SPatrick Mooney vmx->eptp = vmspace_table_root(vm_get_vmspace(vm)); 7064c87aefeSPatrick Mooney 707bf21cd93STycho Nightingale /* 708d1c02647SPatrick Mooney * Clean up EP4TA-tagged guest-physical and combined mappings 709bf21cd93STycho Nightingale * 710bf21cd93STycho Nightingale * VMX transitions are not required to invalidate any guest physical 711bf21cd93STycho Nightingale * mappings. So, it may be possible for stale guest physical mappings 712bf21cd93STycho Nightingale * to be present in the processor TLBs. 713bf21cd93STycho Nightingale * 714bf21cd93STycho Nightingale * Combined mappings for this EP4TA are also invalidated for all VPIDs. 715bf21cd93STycho Nightingale */ 7160153d828SPatrick Mooney hma_vmx_invept_allcpus((uintptr_t)vmx->eptp); 717bf21cd93STycho Nightingale 7186b641d7aSPatrick Mooney vmx_msr_bitmap_initialize(vmx); 719bf21cd93STycho Nightingale 720bf21cd93STycho Nightingale vpid_alloc(vpid, VM_MAXCPU); 721bf21cd93STycho Nightingale 722c3ae3afaSPatrick Mooney /* Grab the established defaults */ 723c3ae3afaSPatrick Mooney proc_ctls = procbased_ctls; 724c3ae3afaSPatrick Mooney proc2_ctls = procbased_ctls2; 725c3ae3afaSPatrick Mooney pin_ctls = pinbased_ctls; 726c3ae3afaSPatrick Mooney /* For now, default to the available capabilities */ 727c3ae3afaSPatrick Mooney vmx->vmx_caps = vmx_capabilities; 728c3ae3afaSPatrick Mooney 729c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) { 730c3ae3afaSPatrick Mooney proc_ctls |= PROCBASED_USE_TPR_SHADOW; 731c3ae3afaSPatrick Mooney proc_ctls &= ~PROCBASED_CR8_LOAD_EXITING; 732c3ae3afaSPatrick Mooney proc_ctls &= ~PROCBASED_CR8_STORE_EXITING; 733c3ae3afaSPatrick Mooney } 734c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV)) { 735c3ae3afaSPatrick Mooney ASSERT(vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)); 736c3ae3afaSPatrick Mooney 737c3ae3afaSPatrick Mooney proc2_ctls |= (PROCBASED2_VIRTUALIZE_APIC_ACCESSES | 738c3ae3afaSPatrick Mooney PROCBASED2_APIC_REGISTER_VIRTUALIZATION | 739c3ae3afaSPatrick Mooney PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY); 740c3ae3afaSPatrick Mooney 7416b641d7aSPatrick Mooney /* 7426b641d7aSPatrick Mooney * Allocate a page of memory to back the APIC access address for 7436b641d7aSPatrick Mooney * when APICv features are in use. Guest MMIO accesses should 7446b641d7aSPatrick Mooney * never actually reach this page, but rather be intercepted. 7456b641d7aSPatrick Mooney */ 7466b641d7aSPatrick Mooney vmx->apic_access_page = kmem_zalloc(PAGESIZE, KM_SLEEP); 7476b641d7aSPatrick Mooney VERIFY3U((uintptr_t)vmx->apic_access_page & PAGEOFFSET, ==, 0); 7486b641d7aSPatrick Mooney apic_access_pa = vtophys(vmx->apic_access_page); 7496b641d7aSPatrick Mooney 7504c87aefeSPatrick Mooney error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE, 7516b641d7aSPatrick Mooney apic_access_pa); 7524c87aefeSPatrick Mooney /* XXX this should really return an error to the caller */ 7534c87aefeSPatrick Mooney KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error)); 7544c87aefeSPatrick Mooney } 755c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) { 756c3ae3afaSPatrick Mooney ASSERT(vmx_cap_en(vmx, VMX_CAP_APICV)); 757c3ae3afaSPatrick Mooney 758c3ae3afaSPatrick Mooney pin_ctls |= PINBASED_POSTED_INTERRUPT; 759c3ae3afaSPatrick Mooney } 7604c87aefeSPatrick Mooney 7614c87aefeSPatrick Mooney maxcpus = vm_get_maxcpus(vm); 762007ca332SPatrick Mooney datasel = vmm_get_host_datasel(); 7634c87aefeSPatrick Mooney for (i = 0; i < maxcpus; i++) { 7644c87aefeSPatrick Mooney /* 7654c87aefeSPatrick Mooney * Cache physical address lookups for various components which 7664c87aefeSPatrick Mooney * may be required inside the critical_enter() section implied 7674c87aefeSPatrick Mooney * by VMPTRLD() below. 7684c87aefeSPatrick Mooney */ 7696b641d7aSPatrick Mooney vm_paddr_t msr_bitmap_pa = vtophys(vmx->msr_bitmap[i]); 7704c87aefeSPatrick Mooney vm_paddr_t apic_page_pa = vtophys(&vmx->apic_page[i]); 7714c87aefeSPatrick Mooney vm_paddr_t pir_desc_pa = vtophys(&vmx->pir_desc[i]); 7724c87aefeSPatrick Mooney 773007ca332SPatrick Mooney vmx->vmcs_pa[i] = (uintptr_t)vtophys(&vmx->vmcs[i]); 774007ca332SPatrick Mooney vmcs_initialize(&vmx->vmcs[i], vmx->vmcs_pa[i]); 775bf21cd93STycho Nightingale 776bf21cd93STycho Nightingale vmx_msr_guest_init(vmx, i); 777bf21cd93STycho Nightingale 778007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[i]); 779bf21cd93STycho Nightingale 780007ca332SPatrick Mooney vmcs_write(VMCS_HOST_IA32_PAT, vmm_get_host_pat()); 781007ca332SPatrick Mooney vmcs_write(VMCS_HOST_IA32_EFER, vmm_get_host_efer()); 782007ca332SPatrick Mooney 783007ca332SPatrick Mooney /* Load the control registers */ 784007ca332SPatrick Mooney vmcs_write(VMCS_HOST_CR0, vmm_get_host_cr0()); 785007ca332SPatrick Mooney vmcs_write(VMCS_HOST_CR4, vmm_get_host_cr4() | CR4_VMXE); 786007ca332SPatrick Mooney 787007ca332SPatrick Mooney /* Load the segment selectors */ 788007ca332SPatrick Mooney vmcs_write(VMCS_HOST_CS_SELECTOR, vmm_get_host_codesel()); 789007ca332SPatrick Mooney 790007ca332SPatrick Mooney vmcs_write(VMCS_HOST_ES_SELECTOR, datasel); 791007ca332SPatrick Mooney vmcs_write(VMCS_HOST_SS_SELECTOR, datasel); 792007ca332SPatrick Mooney vmcs_write(VMCS_HOST_DS_SELECTOR, datasel); 793007ca332SPatrick Mooney 794007ca332SPatrick Mooney vmcs_write(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel()); 795007ca332SPatrick Mooney vmcs_write(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel()); 796007ca332SPatrick Mooney vmcs_write(VMCS_HOST_TR_SELECTOR, vmm_get_host_tsssel()); 797007ca332SPatrick Mooney 798007ca332SPatrick Mooney /* 799007ca332SPatrick Mooney * Configure host sysenter MSRs to be restored on VM exit. 8002699b94cSPatrick Mooney * The thread-specific MSR_INTC_SEP_ESP value is loaded in 8012699b94cSPatrick Mooney * vmx_run. 802007ca332SPatrick Mooney */ 803007ca332SPatrick Mooney vmcs_write(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL); 804007ca332SPatrick Mooney vmcs_write(VMCS_HOST_IA32_SYSENTER_EIP, 805007ca332SPatrick Mooney rdmsr(MSR_SYSENTER_EIP_MSR)); 806007ca332SPatrick Mooney 807007ca332SPatrick Mooney /* instruction pointer */ 808007ca332SPatrick Mooney if (no_flush_rsb) { 809007ca332SPatrick Mooney vmcs_write(VMCS_HOST_RIP, (uint64_t)vmx_exit_guest); 810007ca332SPatrick Mooney } else { 811007ca332SPatrick Mooney vmcs_write(VMCS_HOST_RIP, 812007ca332SPatrick Mooney (uint64_t)vmx_exit_guest_flush_rsb); 813007ca332SPatrick Mooney } 814c3ae3afaSPatrick Mooney 815007ca332SPatrick Mooney /* link pointer */ 816007ca332SPatrick Mooney vmcs_write(VMCS_LINK_POINTER, ~0); 817007ca332SPatrick Mooney 818007ca332SPatrick Mooney vmcs_write(VMCS_EPTP, vmx->eptp); 819007ca332SPatrick Mooney vmcs_write(VMCS_PIN_BASED_CTLS, pin_ctls); 820007ca332SPatrick Mooney vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); 821007ca332SPatrick Mooney vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls); 822007ca332SPatrick Mooney vmcs_write(VMCS_EXIT_CTLS, exit_ctls); 823007ca332SPatrick Mooney vmcs_write(VMCS_ENTRY_CTLS, entry_ctls); 824007ca332SPatrick Mooney vmcs_write(VMCS_MSR_BITMAP, msr_bitmap_pa); 825007ca332SPatrick Mooney vmcs_write(VMCS_VPID, vpid[i]); 8264c87aefeSPatrick Mooney 8274c87aefeSPatrick Mooney if (guest_l1d_flush && !guest_l1d_flush_sw) { 8280153d828SPatrick Mooney vmcs_write(VMCS_ENTRY_MSR_LOAD, 8290153d828SPatrick Mooney vtophys(&msr_load_list[0])); 8304c87aefeSPatrick Mooney vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT, 8314c87aefeSPatrick Mooney nitems(msr_load_list)); 8324c87aefeSPatrick Mooney vmcs_write(VMCS_EXIT_MSR_STORE, 0); 8334c87aefeSPatrick Mooney vmcs_write(VMCS_EXIT_MSR_STORE_COUNT, 0); 8344c87aefeSPatrick Mooney } 835bf21cd93STycho Nightingale 8364c87aefeSPatrick Mooney /* exception bitmap */ 8374c87aefeSPatrick Mooney if (vcpu_trace_exceptions(vm, i)) 8384c87aefeSPatrick Mooney exc_bitmap = 0xffffffff; 8394c87aefeSPatrick Mooney else 8404c87aefeSPatrick Mooney exc_bitmap = 1 << IDT_MC; 841007ca332SPatrick Mooney vmcs_write(VMCS_EXCEPTION_BITMAP, exc_bitmap); 842bf21cd93STycho Nightingale 8434c87aefeSPatrick Mooney vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1; 844007ca332SPatrick Mooney vmcs_write(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1); 845bf21cd93STycho Nightingale 846c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) { 847007ca332SPatrick Mooney vmcs_write(VMCS_VIRTUAL_APIC, apic_page_pa); 848154972afSPatrick Mooney } 849154972afSPatrick Mooney 850c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV)) { 8516b641d7aSPatrick Mooney vmcs_write(VMCS_APIC_ACCESS, apic_access_pa); 852007ca332SPatrick Mooney vmcs_write(VMCS_EOI_EXIT0, 0); 853007ca332SPatrick Mooney vmcs_write(VMCS_EOI_EXIT1, 0); 854007ca332SPatrick Mooney vmcs_write(VMCS_EOI_EXIT2, 0); 855007ca332SPatrick Mooney vmcs_write(VMCS_EOI_EXIT3, 0); 8564c87aefeSPatrick Mooney } 857c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) { 858007ca332SPatrick Mooney vmcs_write(VMCS_PIR_VECTOR, pirvec); 859007ca332SPatrick Mooney vmcs_write(VMCS_PIR_DESC, pir_desc_pa); 8604c87aefeSPatrick Mooney } 861007ca332SPatrick Mooney 862007ca332SPatrick Mooney /* 863007ca332SPatrick Mooney * Set up the CR0/4 masks and configure the read shadow state 864007ca332SPatrick Mooney * to the power-on register value from the Intel Sys Arch. 865007ca332SPatrick Mooney * CR0 - 0x60000010 866007ca332SPatrick Mooney * CR4 - 0 867007ca332SPatrick Mooney */ 868007ca332SPatrick Mooney vmcs_write(VMCS_CR0_MASK, cr0_ones_mask | cr0_zeros_mask); 869007ca332SPatrick Mooney vmcs_write(VMCS_CR0_SHADOW, 0x60000010); 870007ca332SPatrick Mooney vmcs_write(VMCS_CR4_MASK, cr4_ones_mask | cr4_zeros_mask); 871007ca332SPatrick Mooney vmcs_write(VMCS_CR4_SHADOW, 0); 872007ca332SPatrick Mooney 873007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[i]); 874bf21cd93STycho Nightingale 8754c87aefeSPatrick Mooney vmx->cap[i].set = 0; 876c3ae3afaSPatrick Mooney vmx->cap[i].proc_ctls = proc_ctls; 877c3ae3afaSPatrick Mooney vmx->cap[i].proc_ctls2 = proc2_ctls; 878154972afSPatrick Mooney vmx->cap[i].exc_bitmap = exc_bitmap; 879bf21cd93STycho Nightingale 8804c87aefeSPatrick Mooney vmx->state[i].nextrip = ~0; 8814c87aefeSPatrick Mooney vmx->state[i].lastcpu = NOCPU; 8824c87aefeSPatrick Mooney vmx->state[i].vpid = vpid[i]; 883bf21cd93STycho Nightingale } 884bf21cd93STycho Nightingale 885bf21cd93STycho Nightingale return (vmx); 886bf21cd93STycho Nightingale } 887bf21cd93STycho Nightingale 888bf21cd93STycho Nightingale static int 889bf21cd93STycho Nightingale vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx) 890bf21cd93STycho Nightingale { 8914c87aefeSPatrick Mooney int handled; 892bf21cd93STycho Nightingale 8933c5f2a9dSPatrick Mooney handled = x86_emulate_cpuid(vm, vcpu, (uint64_t *)&vmxctx->guest_rax, 8943c5f2a9dSPatrick Mooney (uint64_t *)&vmxctx->guest_rbx, (uint64_t *)&vmxctx->guest_rcx, 8953c5f2a9dSPatrick Mooney (uint64_t *)&vmxctx->guest_rdx); 896bf21cd93STycho Nightingale return (handled); 897bf21cd93STycho Nightingale } 898bf21cd93STycho Nightingale 8994c87aefeSPatrick Mooney static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved"); 9004c87aefeSPatrick Mooney static VMM_STAT_INTEL(VCPU_INVVPID_DONE, "Number of vpid invalidations done"); 9014c87aefeSPatrick Mooney 902007ca332SPatrick Mooney #define INVVPID_TYPE_ADDRESS 0UL 903007ca332SPatrick Mooney #define INVVPID_TYPE_SINGLE_CONTEXT 1UL 904007ca332SPatrick Mooney #define INVVPID_TYPE_ALL_CONTEXTS 2UL 905007ca332SPatrick Mooney 906007ca332SPatrick Mooney struct invvpid_desc { 907007ca332SPatrick Mooney uint16_t vpid; 908007ca332SPatrick Mooney uint16_t _res1; 909007ca332SPatrick Mooney uint32_t _res2; 910007ca332SPatrick Mooney uint64_t linear_addr; 911007ca332SPatrick Mooney }; 9122699b94cSPatrick Mooney CTASSERT(sizeof (struct invvpid_desc) == 16); 913007ca332SPatrick Mooney 914007ca332SPatrick Mooney static __inline void 915007ca332SPatrick Mooney invvpid(uint64_t type, struct invvpid_desc desc) 916007ca332SPatrick Mooney { 917007ca332SPatrick Mooney int error; 918007ca332SPatrick Mooney 91970ae9a33SPatrick Mooney DTRACE_PROBE3(vmx__invvpid, uint64_t, type, uint16_t, desc.vpid, 92070ae9a33SPatrick Mooney uint64_t, desc.linear_addr); 92170ae9a33SPatrick Mooney 922007ca332SPatrick Mooney __asm __volatile("invvpid %[desc], %[type];" 923007ca332SPatrick Mooney VMX_SET_ERROR_CODE_ASM 924007ca332SPatrick Mooney : [error] "=r" (error) 925007ca332SPatrick Mooney : [desc] "m" (desc), [type] "r" (type) 926007ca332SPatrick Mooney : "memory"); 927007ca332SPatrick Mooney 92870ae9a33SPatrick Mooney if (error) { 929007ca332SPatrick Mooney panic("invvpid error %d", error); 93070ae9a33SPatrick Mooney } 931007ca332SPatrick Mooney } 932007ca332SPatrick Mooney 9334c87aefeSPatrick Mooney /* 934d1c02647SPatrick Mooney * Invalidate guest mappings identified by its VPID from the TLB. 935d1c02647SPatrick Mooney * 936d1c02647SPatrick Mooney * This is effectively a flush of the guest TLB, removing only "combined 937d1c02647SPatrick Mooney * mappings" (to use the VMX parlance). Actions which modify the EPT structures 938d1c02647SPatrick Mooney * for the instance (such as unmapping GPAs) would require an 'invept' flush. 9394c87aefeSPatrick Mooney */ 94070ae9a33SPatrick Mooney static void 9410153d828SPatrick Mooney vmx_invvpid(struct vmx *vmx, int vcpu, int running) 942bf21cd93STycho Nightingale { 943bf21cd93STycho Nightingale struct vmxstate *vmxstate; 9440153d828SPatrick Mooney struct vmspace *vms; 945bf21cd93STycho Nightingale 946bf21cd93STycho Nightingale vmxstate = &vmx->state[vcpu]; 94770ae9a33SPatrick Mooney if (vmxstate->vpid == 0) { 948bf21cd93STycho Nightingale return; 94970ae9a33SPatrick Mooney } 950bf21cd93STycho Nightingale 9514c87aefeSPatrick Mooney if (!running) { 9524c87aefeSPatrick Mooney /* 9534c87aefeSPatrick Mooney * Set the 'lastcpu' to an invalid host cpu. 9544c87aefeSPatrick Mooney * 9554c87aefeSPatrick Mooney * This will invalidate TLB entries tagged with the vcpu's 9564c87aefeSPatrick Mooney * vpid the next time it runs via vmx_set_pcpu_defaults(). 9574c87aefeSPatrick Mooney */ 9584c87aefeSPatrick Mooney vmxstate->lastcpu = NOCPU; 9594c87aefeSPatrick Mooney return; 9604c87aefeSPatrick Mooney } 961bf21cd93STycho Nightingale 962bf21cd93STycho Nightingale /* 9634c87aefeSPatrick Mooney * Invalidate all mappings tagged with 'vpid' 964bf21cd93STycho Nightingale * 965d1c02647SPatrick Mooney * This is done when a vCPU moves between host CPUs, where there may be 966d1c02647SPatrick Mooney * stale TLB entries for this VPID on the target, or if emulated actions 967d1c02647SPatrick Mooney * in the guest CPU have incurred an explicit TLB flush. 968bf21cd93STycho Nightingale */ 96970ae9a33SPatrick Mooney vms = vm_get_vmspace(vmx->vm); 9700153d828SPatrick Mooney if (vmspace_table_gen(vms) == vmx->eptgen[curcpu]) { 97170ae9a33SPatrick Mooney struct invvpid_desc invvpid_desc = { 97270ae9a33SPatrick Mooney .vpid = vmxstate->vpid, 97370ae9a33SPatrick Mooney .linear_addr = 0, 97470ae9a33SPatrick Mooney ._res1 = 0, 97570ae9a33SPatrick Mooney ._res2 = 0, 97670ae9a33SPatrick Mooney }; 97770ae9a33SPatrick Mooney 978bf21cd93STycho Nightingale invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc); 9794c87aefeSPatrick Mooney vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_DONE, 1); 9804c87aefeSPatrick Mooney } else { 9814c87aefeSPatrick Mooney /* 982d1c02647SPatrick Mooney * The INVVPID can be skipped if an INVEPT is going to be 983d1c02647SPatrick Mooney * performed before entering the guest. The INVEPT will 984d1c02647SPatrick Mooney * invalidate combined mappings for the EP4TA associated with 985d1c02647SPatrick Mooney * this guest, in all VPIDs. 9864c87aefeSPatrick Mooney */ 9874c87aefeSPatrick Mooney vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_SAVED, 1); 988bf21cd93STycho Nightingale } 989bf21cd93STycho Nightingale } 990bf21cd93STycho Nightingale 9910153d828SPatrick Mooney static __inline void 9920153d828SPatrick Mooney invept(uint64_t type, uint64_t eptp) 9930153d828SPatrick Mooney { 9940153d828SPatrick Mooney int error; 9950153d828SPatrick Mooney struct invept_desc { 9960153d828SPatrick Mooney uint64_t eptp; 9970153d828SPatrick Mooney uint64_t _resv; 9980153d828SPatrick Mooney } desc = { eptp, 0 }; 9990153d828SPatrick Mooney 100070ae9a33SPatrick Mooney DTRACE_PROBE2(vmx__invept, uint64_t, type, uint64_t, eptp); 100170ae9a33SPatrick Mooney 10020153d828SPatrick Mooney __asm __volatile("invept %[desc], %[type];" 10030153d828SPatrick Mooney VMX_SET_ERROR_CODE_ASM 10040153d828SPatrick Mooney : [error] "=r" (error) 10050153d828SPatrick Mooney : [desc] "m" (desc), [type] "r" (type) 10060153d828SPatrick Mooney : "memory"); 10070153d828SPatrick Mooney 10080153d828SPatrick Mooney if (error != 0) { 10090153d828SPatrick Mooney panic("invvpid error %d", error); 10100153d828SPatrick Mooney } 10110153d828SPatrick Mooney } 10120153d828SPatrick Mooney 10134c87aefeSPatrick Mooney static void 10140153d828SPatrick Mooney vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu) 1015bf21cd93STycho Nightingale { 10164c87aefeSPatrick Mooney struct vmxstate *vmxstate; 1017bf21cd93STycho Nightingale 10184c87aefeSPatrick Mooney /* 10194c87aefeSPatrick Mooney * Regardless of whether the VM appears to have migrated between CPUs, 10204c87aefeSPatrick Mooney * save the host sysenter stack pointer. As it points to the kernel 10214c87aefeSPatrick Mooney * stack of each thread, the correct value must be maintained for every 10224c87aefeSPatrick Mooney * trip into the critical section. 10234c87aefeSPatrick Mooney */ 10244c87aefeSPatrick Mooney vmcs_write(VMCS_HOST_IA32_SYSENTER_ESP, rdmsr(MSR_SYSENTER_ESP_MSR)); 1025bf21cd93STycho Nightingale 10264c87aefeSPatrick Mooney /* 10274c87aefeSPatrick Mooney * Perform any needed TSC_OFFSET adjustment based on TSC_MSR writes or 10284c87aefeSPatrick Mooney * migration between host CPUs with differing TSC values. 10294c87aefeSPatrick Mooney */ 1030007ca332SPatrick Mooney vmx_apply_tsc_adjust(vmx, vcpu); 10314c87aefeSPatrick Mooney 10324c87aefeSPatrick Mooney vmxstate = &vmx->state[vcpu]; 10334c87aefeSPatrick Mooney if (vmxstate->lastcpu == curcpu) 10344c87aefeSPatrick Mooney return; 10354c87aefeSPatrick Mooney 10364c87aefeSPatrick Mooney vmxstate->lastcpu = curcpu; 10374c87aefeSPatrick Mooney 10384c87aefeSPatrick Mooney vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1); 10394c87aefeSPatrick Mooney 10404c87aefeSPatrick Mooney /* Load the per-CPU IDT address */ 10414c87aefeSPatrick Mooney vmcs_write(VMCS_HOST_IDTR_BASE, vmm_get_host_idtrbase()); 10424c87aefeSPatrick Mooney vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase()); 10434c87aefeSPatrick Mooney vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase()); 10444c87aefeSPatrick Mooney vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase()); 10450153d828SPatrick Mooney vmx_invvpid(vmx, vcpu, 1); 10464c87aefeSPatrick Mooney } 10474c87aefeSPatrick Mooney 10484c87aefeSPatrick Mooney /* 10494c87aefeSPatrick Mooney * We depend on 'procbased_ctls' to have the Interrupt Window Exiting bit set. 10504c87aefeSPatrick Mooney */ 10514c87aefeSPatrick Mooney CTASSERT((PROCBASED_CTLS_ONE_SETTING & PROCBASED_INT_WINDOW_EXITING) != 0); 10524c87aefeSPatrick Mooney 10534c87aefeSPatrick Mooney static __inline void 10544c87aefeSPatrick Mooney vmx_set_int_window_exiting(struct vmx *vmx, int vcpu) 1055bf21cd93STycho Nightingale { 1056bf21cd93STycho Nightingale 1057bf21cd93STycho Nightingale if ((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) == 0) { 1058bf21cd93STycho Nightingale vmx->cap[vcpu].proc_ctls |= PROCBASED_INT_WINDOW_EXITING; 1059bf21cd93STycho Nightingale vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 1060bf21cd93STycho Nightingale VCPU_CTR0(vmx->vm, vcpu, "Enabling interrupt window exiting"); 1061bf21cd93STycho Nightingale } 1062bf21cd93STycho Nightingale } 1063bf21cd93STycho Nightingale 10644c87aefeSPatrick Mooney static __inline void 1065bf21cd93STycho Nightingale vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu) 1066bf21cd93STycho Nightingale { 1067bf21cd93STycho Nightingale 1068bf21cd93STycho Nightingale KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0, 10699dc804b9SPatrick Mooney ("intr_window_exiting not set: %x", vmx->cap[vcpu].proc_ctls)); 1070bf21cd93STycho Nightingale vmx->cap[vcpu].proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING; 1071bf21cd93STycho Nightingale vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 1072bf21cd93STycho Nightingale VCPU_CTR0(vmx->vm, vcpu, "Disabling interrupt window exiting"); 1073bf21cd93STycho Nightingale } 1074bf21cd93STycho Nightingale 1075c74a40a5SPatrick Mooney static __inline bool 1076c74a40a5SPatrick Mooney vmx_nmi_window_exiting(struct vmx *vmx, int vcpu) 1077c74a40a5SPatrick Mooney { 1078c74a40a5SPatrick Mooney return ((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0); 1079c74a40a5SPatrick Mooney } 1080c74a40a5SPatrick Mooney 10814c87aefeSPatrick Mooney static __inline void 1082bf21cd93STycho Nightingale vmx_set_nmi_window_exiting(struct vmx *vmx, int vcpu) 1083bf21cd93STycho Nightingale { 1084c74a40a5SPatrick Mooney if (!vmx_nmi_window_exiting(vmx, vcpu)) { 1085bf21cd93STycho Nightingale vmx->cap[vcpu].proc_ctls |= PROCBASED_NMI_WINDOW_EXITING; 1086bf21cd93STycho Nightingale vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 1087bf21cd93STycho Nightingale } 1088bf21cd93STycho Nightingale } 1089bf21cd93STycho Nightingale 10904c87aefeSPatrick Mooney static __inline void 1091bf21cd93STycho Nightingale vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu) 1092bf21cd93STycho Nightingale { 1093c74a40a5SPatrick Mooney ASSERT(vmx_nmi_window_exiting(vmx, vcpu)); 1094bf21cd93STycho Nightingale vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING; 1095bf21cd93STycho Nightingale vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 1096bf21cd93STycho Nightingale } 1097bf21cd93STycho Nightingale 10984c87aefeSPatrick Mooney /* 10994c87aefeSPatrick Mooney * Set the TSC adjustment, taking into account the offsets measured between 11004c87aefeSPatrick Mooney * host physical CPUs. This is required even if the guest has not set a TSC 11014c87aefeSPatrick Mooney * offset since vCPUs inherit the TSC offset of whatever physical CPU it has 11024c87aefeSPatrick Mooney * migrated onto. Without this mitigation, un-synched host TSCs will convey 11034c87aefeSPatrick Mooney * the appearance of TSC time-travel to the guest as its vCPUs migrate. 11044c87aefeSPatrick Mooney */ 1105007ca332SPatrick Mooney static void 11064c87aefeSPatrick Mooney vmx_apply_tsc_adjust(struct vmx *vmx, int vcpu) 11074c87aefeSPatrick Mooney { 11089250eb13SPatrick Mooney const uint64_t offset = vcpu_tsc_offset(vmx->vm, vcpu, true); 11094c87aefeSPatrick Mooney 11104c87aefeSPatrick Mooney ASSERT(vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET); 11114c87aefeSPatrick Mooney 11129250eb13SPatrick Mooney if (vmx->tsc_offset_active[vcpu] != offset) { 11139250eb13SPatrick Mooney vmcs_write(VMCS_TSC_OFFSET, offset); 11149250eb13SPatrick Mooney vmx->tsc_offset_active[vcpu] = offset; 11154c87aefeSPatrick Mooney } 11164c87aefeSPatrick Mooney } 1117bf21cd93STycho Nightingale 1118*3d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_HWINTR == VM_INTINFO_HWINTR); 1119*3d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_NMI == VM_INTINFO_NMI); 1120*3d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_HWEXCEPTION == VM_INTINFO_HWEXCP); 1121*3d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_SWINTR == VM_INTINFO_SWINTR); 1122*3d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_PRIV_SWEXCEPTION == VM_INTINFO_RESV5); 1123*3d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_SWEXCEPTION == VM_INTINFO_RESV6); 1124*3d097f7dSPatrick Mooney CTASSERT(VMCS_IDT_VEC_ERRCODE_VALID == VM_INTINFO_DEL_ERRCODE); 1125*3d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_MASK == VM_INTINFO_MASK_TYPE); 1126*3d097f7dSPatrick Mooney 1127*3d097f7dSPatrick Mooney static uint64_t 1128*3d097f7dSPatrick Mooney vmx_idtvec_to_intinfo(uint32_t info) 1129*3d097f7dSPatrick Mooney { 1130*3d097f7dSPatrick Mooney ASSERT(info & VMCS_IDT_VEC_VALID); 1131*3d097f7dSPatrick Mooney 1132*3d097f7dSPatrick Mooney const uint32_t type = info & VMCS_INTR_T_MASK; 1133*3d097f7dSPatrick Mooney const uint8_t vec = info & 0xff; 1134*3d097f7dSPatrick Mooney 1135*3d097f7dSPatrick Mooney switch (type) { 1136*3d097f7dSPatrick Mooney case VMCS_INTR_T_HWINTR: 1137*3d097f7dSPatrick Mooney case VMCS_INTR_T_NMI: 1138*3d097f7dSPatrick Mooney case VMCS_INTR_T_HWEXCEPTION: 1139*3d097f7dSPatrick Mooney case VMCS_INTR_T_SWINTR: 1140*3d097f7dSPatrick Mooney case VMCS_INTR_T_PRIV_SWEXCEPTION: 1141*3d097f7dSPatrick Mooney case VMCS_INTR_T_SWEXCEPTION: 1142*3d097f7dSPatrick Mooney break; 1143*3d097f7dSPatrick Mooney default: 1144*3d097f7dSPatrick Mooney panic("unexpected event type 0x%03x", type); 1145*3d097f7dSPatrick Mooney } 1146*3d097f7dSPatrick Mooney 1147*3d097f7dSPatrick Mooney uint64_t intinfo = VM_INTINFO_VALID | type | vec; 1148*3d097f7dSPatrick Mooney if (info & VMCS_IDT_VEC_ERRCODE_VALID) { 1149*3d097f7dSPatrick Mooney const uint32_t errcode = vmcs_read(VMCS_IDT_VECTORING_ERROR); 1150*3d097f7dSPatrick Mooney intinfo |= (uint64_t)errcode << 32; 1151*3d097f7dSPatrick Mooney } 1152*3d097f7dSPatrick Mooney 1153*3d097f7dSPatrick Mooney return (intinfo); 1154*3d097f7dSPatrick Mooney } 1155*3d097f7dSPatrick Mooney 1156*3d097f7dSPatrick Mooney static void 1157*3d097f7dSPatrick Mooney vmx_inject_intinfo(uint64_t info) 1158*3d097f7dSPatrick Mooney { 1159*3d097f7dSPatrick Mooney ASSERT(VM_INTINFO_PENDING(info)); 1160*3d097f7dSPatrick Mooney ASSERT0(info & VM_INTINFO_MASK_RSVD); 1161*3d097f7dSPatrick Mooney 1162*3d097f7dSPatrick Mooney /* 1163*3d097f7dSPatrick Mooney * The bhyve format matches that of the VMCS, which is ensured by the 1164*3d097f7dSPatrick Mooney * CTASSERTs above. 1165*3d097f7dSPatrick Mooney */ 1166*3d097f7dSPatrick Mooney uint32_t inject = info; 1167*3d097f7dSPatrick Mooney switch (VM_INTINFO_VECTOR(info)) { 1168*3d097f7dSPatrick Mooney case IDT_BP: 1169*3d097f7dSPatrick Mooney case IDT_OF: 1170*3d097f7dSPatrick Mooney /* 1171*3d097f7dSPatrick Mooney * VT-x requires #BP and #OF to be injected as software 1172*3d097f7dSPatrick Mooney * exceptions. 1173*3d097f7dSPatrick Mooney */ 1174*3d097f7dSPatrick Mooney inject &= ~VMCS_INTR_T_MASK; 1175*3d097f7dSPatrick Mooney inject |= VMCS_INTR_T_SWEXCEPTION; 1176*3d097f7dSPatrick Mooney break; 1177*3d097f7dSPatrick Mooney default: 1178*3d097f7dSPatrick Mooney break; 1179*3d097f7dSPatrick Mooney } 1180*3d097f7dSPatrick Mooney 1181*3d097f7dSPatrick Mooney if (VM_INTINFO_HAS_ERRCODE(info)) { 1182*3d097f7dSPatrick Mooney vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, 1183*3d097f7dSPatrick Mooney VM_INTINFO_ERRCODE(info)); 1184*3d097f7dSPatrick Mooney } 1185*3d097f7dSPatrick Mooney vmcs_write(VMCS_ENTRY_INTR_INFO, inject); 1186*3d097f7dSPatrick Mooney } 1187*3d097f7dSPatrick Mooney 1188bf21cd93STycho Nightingale #define NMI_BLOCKING (VMCS_INTERRUPTIBILITY_NMI_BLOCKING | \ 11892699b94cSPatrick Mooney VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) 1190bf21cd93STycho Nightingale #define HWINTR_BLOCKING (VMCS_INTERRUPTIBILITY_STI_BLOCKING | \ 11912699b94cSPatrick Mooney VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) 1192bf21cd93STycho Nightingale 1193bf21cd93STycho Nightingale static void 1194bf21cd93STycho Nightingale vmx_inject_nmi(struct vmx *vmx, int vcpu) 1195bf21cd93STycho Nightingale { 1196c74a40a5SPatrick Mooney ASSERT0(vmcs_read(VMCS_GUEST_INTERRUPTIBILITY) & NMI_BLOCKING); 1197c74a40a5SPatrick Mooney ASSERT0(vmcs_read(VMCS_ENTRY_INTR_INFO) & VMCS_INTR_VALID); 1198bf21cd93STycho Nightingale 1199bf21cd93STycho Nightingale /* 1200bf21cd93STycho Nightingale * Inject the virtual NMI. The vector must be the NMI IDT entry 1201bf21cd93STycho Nightingale * or the VMCS entry check will fail. 1202bf21cd93STycho Nightingale */ 1203c74a40a5SPatrick Mooney vmcs_write(VMCS_ENTRY_INTR_INFO, 1204c74a40a5SPatrick Mooney IDT_NMI | VMCS_INTR_T_NMI | VMCS_INTR_VALID); 1205bf21cd93STycho Nightingale 1206bf21cd93STycho Nightingale /* Clear the request */ 1207bf21cd93STycho Nightingale vm_nmi_clear(vmx->vm, vcpu); 1208bf21cd93STycho Nightingale } 1209bf21cd93STycho Nightingale 1210c74a40a5SPatrick Mooney /* 1211c74a40a5SPatrick Mooney * Inject exceptions, NMIs, and ExtINTs. 1212c74a40a5SPatrick Mooney * 1213c74a40a5SPatrick Mooney * The logic behind these are complicated and may involve mutex contention, so 1214c74a40a5SPatrick Mooney * the injection is performed without the protection of host CPU interrupts 1215c74a40a5SPatrick Mooney * being disabled. This means a racing notification could be "lost", 1216c74a40a5SPatrick Mooney * necessitating a later call to vmx_inject_recheck() to close that window 1217c74a40a5SPatrick Mooney * of opportunity. 1218c74a40a5SPatrick Mooney */ 1219c74a40a5SPatrick Mooney static enum event_inject_state 1220c74a40a5SPatrick Mooney vmx_inject_events(struct vmx *vmx, int vcpu, uint64_t rip) 1221bf21cd93STycho Nightingale { 1222c74a40a5SPatrick Mooney uint64_t entryinfo; 1223bf21cd93STycho Nightingale uint32_t gi, info; 12244c87aefeSPatrick Mooney int vector; 1225c74a40a5SPatrick Mooney enum event_inject_state state; 12264c87aefeSPatrick Mooney 12274c87aefeSPatrick Mooney gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 12284c87aefeSPatrick Mooney info = vmcs_read(VMCS_ENTRY_INTR_INFO); 1229c74a40a5SPatrick Mooney state = EIS_CAN_INJECT; 12304c87aefeSPatrick Mooney 1231c74a40a5SPatrick Mooney /* Clear any interrupt blocking if the guest %rip has changed */ 1232c74a40a5SPatrick Mooney if (vmx->state[vcpu].nextrip != rip && (gi & HWINTR_BLOCKING) != 0) { 12334c87aefeSPatrick Mooney gi &= ~HWINTR_BLOCKING; 12344c87aefeSPatrick Mooney vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); 12354c87aefeSPatrick Mooney } 12364c87aefeSPatrick Mooney 12374c87aefeSPatrick Mooney /* 12384c87aefeSPatrick Mooney * It could be that an interrupt is already pending for injection from 12394c87aefeSPatrick Mooney * the VMCS. This would be the case if the vCPU exited for conditions 12404c87aefeSPatrick Mooney * such as an AST before a vm-entry delivered the injection. 12414c87aefeSPatrick Mooney */ 12424c87aefeSPatrick Mooney if ((info & VMCS_INTR_VALID) != 0) { 1243c74a40a5SPatrick Mooney return (EIS_EV_EXISTING | EIS_REQ_EXIT); 12444c87aefeSPatrick Mooney } 1245bf21cd93STycho Nightingale 1246bf21cd93STycho Nightingale if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) { 1247*3d097f7dSPatrick Mooney vmx_inject_intinfo(entryinfo); 1248c74a40a5SPatrick Mooney state = EIS_EV_INJECTED; 12494c87aefeSPatrick Mooney } 12504c87aefeSPatrick Mooney 12514c87aefeSPatrick Mooney if (vm_nmi_pending(vmx->vm, vcpu)) { 12524c87aefeSPatrick Mooney /* 1253c74a40a5SPatrick Mooney * If there are no conditions blocking NMI injection then inject 1254c74a40a5SPatrick Mooney * it directly here otherwise enable "NMI window exiting" to 1255c74a40a5SPatrick Mooney * inject it as soon as we can. 12564c87aefeSPatrick Mooney * 1257c74a40a5SPatrick Mooney * According to the Intel manual, some CPUs do not allow NMI 1258c74a40a5SPatrick Mooney * injection when STI_BLOCKING is active. That check is 1259c74a40a5SPatrick Mooney * enforced here, regardless of CPU capability. If running on a 1260c74a40a5SPatrick Mooney * CPU without such a restriction it will immediately exit and 1261c74a40a5SPatrick Mooney * the NMI will be injected in the "NMI window exiting" handler. 12624c87aefeSPatrick Mooney */ 12634c87aefeSPatrick Mooney if ((gi & (HWINTR_BLOCKING | NMI_BLOCKING)) == 0) { 1264c74a40a5SPatrick Mooney if (state == EIS_CAN_INJECT) { 1265c74a40a5SPatrick Mooney vmx_inject_nmi(vmx, vcpu); 1266c74a40a5SPatrick Mooney state = EIS_EV_INJECTED; 12674c87aefeSPatrick Mooney } else { 1268c74a40a5SPatrick Mooney return (state | EIS_REQ_EXIT); 12694c87aefeSPatrick Mooney } 12704c87aefeSPatrick Mooney } else { 12714c87aefeSPatrick Mooney vmx_set_nmi_window_exiting(vmx, vcpu); 12724c87aefeSPatrick Mooney } 12734c87aefeSPatrick Mooney } 12744c87aefeSPatrick Mooney 12754c87aefeSPatrick Mooney if (vm_extint_pending(vmx->vm, vcpu)) { 1276c74a40a5SPatrick Mooney if (state != EIS_CAN_INJECT) { 1277c74a40a5SPatrick Mooney return (state | EIS_REQ_EXIT); 1278c74a40a5SPatrick Mooney } 1279c74a40a5SPatrick Mooney if ((gi & HWINTR_BLOCKING) != 0 || 1280c74a40a5SPatrick Mooney (vmcs_read(VMCS_GUEST_RFLAGS) & PSL_I) == 0) { 1281c74a40a5SPatrick Mooney return (EIS_GI_BLOCK); 1282c74a40a5SPatrick Mooney } 1283c74a40a5SPatrick Mooney 12844c87aefeSPatrick Mooney /* Ask the legacy pic for a vector to inject */ 12854c87aefeSPatrick Mooney vatpic_pending_intr(vmx->vm, &vector); 12864c87aefeSPatrick Mooney 12874c87aefeSPatrick Mooney /* 12884c87aefeSPatrick Mooney * From the Intel SDM, Volume 3, Section "Maskable 12894c87aefeSPatrick Mooney * Hardware Interrupts": 12904c87aefeSPatrick Mooney * - maskable interrupt vectors [0,255] can be delivered 12914c87aefeSPatrick Mooney * through the INTR pin. 12924c87aefeSPatrick Mooney */ 12934c87aefeSPatrick Mooney KASSERT(vector >= 0 && vector <= 255, 12944c87aefeSPatrick Mooney ("invalid vector %d from INTR", vector)); 12954c87aefeSPatrick Mooney 1296c74a40a5SPatrick Mooney /* Inject the interrupt */ 1297c74a40a5SPatrick Mooney vmcs_write(VMCS_ENTRY_INTR_INFO, 1298c74a40a5SPatrick Mooney VMCS_INTR_T_HWINTR | VMCS_INTR_VALID | vector); 12994c87aefeSPatrick Mooney 1300c74a40a5SPatrick Mooney vm_extint_clear(vmx->vm, vcpu); 1301c74a40a5SPatrick Mooney vatpic_intr_accepted(vmx->vm, vector); 1302c74a40a5SPatrick Mooney state = EIS_EV_INJECTED; 13034c87aefeSPatrick Mooney } 1304c74a40a5SPatrick Mooney 1305c74a40a5SPatrick Mooney return (state); 1306c74a40a5SPatrick Mooney } 1307c74a40a5SPatrick Mooney 1308c74a40a5SPatrick Mooney /* 1309c74a40a5SPatrick Mooney * Inject any interrupts pending on the vLAPIC. 1310c74a40a5SPatrick Mooney * 1311c74a40a5SPatrick Mooney * This is done with host CPU interrupts disabled so notification IPIs, either 1312c74a40a5SPatrick Mooney * from the standard vCPU notification or APICv posted interrupts, will be 1313c74a40a5SPatrick Mooney * queued on the host APIC and recognized when entering VMX context. 1314c74a40a5SPatrick Mooney */ 1315c74a40a5SPatrick Mooney static enum event_inject_state 1316c74a40a5SPatrick Mooney vmx_inject_vlapic(struct vmx *vmx, int vcpu, struct vlapic *vlapic) 1317c74a40a5SPatrick Mooney { 1318c74a40a5SPatrick Mooney int vector; 1319c74a40a5SPatrick Mooney 1320c74a40a5SPatrick Mooney if (!vlapic_pending_intr(vlapic, &vector)) { 1321c74a40a5SPatrick Mooney return (EIS_CAN_INJECT); 13224c87aefeSPatrick Mooney } 13234c87aefeSPatrick Mooney 1324c74a40a5SPatrick Mooney /* 1325c74a40a5SPatrick Mooney * From the Intel SDM, Volume 3, Section "Maskable 1326c74a40a5SPatrick Mooney * Hardware Interrupts": 1327c74a40a5SPatrick Mooney * - maskable interrupt vectors [16,255] can be delivered 1328c74a40a5SPatrick Mooney * through the local APIC. 13292699b94cSPatrick Mooney */ 1330c74a40a5SPatrick Mooney KASSERT(vector >= 16 && vector <= 255, 1331c74a40a5SPatrick Mooney ("invalid vector %d from local APIC", vector)); 13324c87aefeSPatrick Mooney 1333c74a40a5SPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV)) { 1334c74a40a5SPatrick Mooney uint16_t status_old = vmcs_read(VMCS_GUEST_INTR_STATUS); 1335c74a40a5SPatrick Mooney uint16_t status_new = (status_old & 0xff00) | vector; 13364c87aefeSPatrick Mooney 13374c87aefeSPatrick Mooney /* 1338c74a40a5SPatrick Mooney * The APICv state will have been synced into the vLAPIC 1339c74a40a5SPatrick Mooney * as part of vlapic_pending_intr(). Prepare the VMCS 1340c74a40a5SPatrick Mooney * for the to-be-injected pending interrupt. 13414c87aefeSPatrick Mooney */ 1342c74a40a5SPatrick Mooney if (status_new > status_old) { 1343c74a40a5SPatrick Mooney vmcs_write(VMCS_GUEST_INTR_STATUS, status_new); 1344c74a40a5SPatrick Mooney VCPU_CTR2(vlapic->vm, vlapic->vcpuid, 1345c74a40a5SPatrick Mooney "vmx_inject_interrupts: guest_intr_status " 1346c74a40a5SPatrick Mooney "changed from 0x%04x to 0x%04x", 1347c74a40a5SPatrick Mooney status_old, status_new); 1348c74a40a5SPatrick Mooney } 1349c74a40a5SPatrick Mooney 1350c74a40a5SPatrick Mooney /* 1351c74a40a5SPatrick Mooney * Ensure VMCS state regarding EOI traps is kept in sync 1352c74a40a5SPatrick Mooney * with the TMRs in the vlapic. 1353c74a40a5SPatrick Mooney */ 1354c74a40a5SPatrick Mooney vmx_apicv_sync_tmr(vlapic); 1355c74a40a5SPatrick Mooney 1356c74a40a5SPatrick Mooney /* 1357c74a40a5SPatrick Mooney * The rest of the injection process for injecting the 1358c74a40a5SPatrick Mooney * interrupt(s) is handled by APICv. It does not preclude other 1359c74a40a5SPatrick Mooney * event injection from occurring. 1360c74a40a5SPatrick Mooney */ 1361c74a40a5SPatrick Mooney return (EIS_CAN_INJECT); 13624c87aefeSPatrick Mooney } 13634c87aefeSPatrick Mooney 1364c74a40a5SPatrick Mooney ASSERT0(vmcs_read(VMCS_ENTRY_INTR_INFO) & VMCS_INTR_VALID); 13654c87aefeSPatrick Mooney 1366c74a40a5SPatrick Mooney /* Does guest interruptability block injection? */ 1367c74a40a5SPatrick Mooney if ((vmcs_read(VMCS_GUEST_INTERRUPTIBILITY) & HWINTR_BLOCKING) != 0 || 1368c74a40a5SPatrick Mooney (vmcs_read(VMCS_GUEST_RFLAGS) & PSL_I) == 0) { 1369c74a40a5SPatrick Mooney return (EIS_GI_BLOCK); 1370c74a40a5SPatrick Mooney } 1371c74a40a5SPatrick Mooney 1372c74a40a5SPatrick Mooney /* Inject the interrupt */ 1373c74a40a5SPatrick Mooney vmcs_write(VMCS_ENTRY_INTR_INFO, 1374c74a40a5SPatrick Mooney VMCS_INTR_T_HWINTR | VMCS_INTR_VALID | vector); 1375c74a40a5SPatrick Mooney 1376c74a40a5SPatrick Mooney /* Update the Local APIC ISR */ 1377c74a40a5SPatrick Mooney vlapic_intr_accepted(vlapic, vector); 1378c74a40a5SPatrick Mooney 1379c74a40a5SPatrick Mooney return (EIS_EV_INJECTED); 1380c74a40a5SPatrick Mooney } 1381c74a40a5SPatrick Mooney 1382c74a40a5SPatrick Mooney /* 1383c74a40a5SPatrick Mooney * Re-check for events to be injected. 1384c74a40a5SPatrick Mooney * 1385c74a40a5SPatrick Mooney * Once host CPU interrupts are disabled, check for the presence of any events 1386c74a40a5SPatrick Mooney * which require injection processing. If an exit is required upon injection, 1387c74a40a5SPatrick Mooney * or once the guest becomes interruptable, that will be configured too. 1388c74a40a5SPatrick Mooney */ 1389c74a40a5SPatrick Mooney static bool 1390c74a40a5SPatrick Mooney vmx_inject_recheck(struct vmx *vmx, int vcpu, enum event_inject_state state) 1391c74a40a5SPatrick Mooney { 1392c74a40a5SPatrick Mooney if (state == EIS_CAN_INJECT) { 1393c74a40a5SPatrick Mooney if (vm_nmi_pending(vmx->vm, vcpu) && 1394c74a40a5SPatrick Mooney !vmx_nmi_window_exiting(vmx, vcpu)) { 1395c74a40a5SPatrick Mooney /* queued NMI not blocked by NMI-window-exiting */ 1396c74a40a5SPatrick Mooney return (true); 1397c74a40a5SPatrick Mooney } 1398c74a40a5SPatrick Mooney if (vm_extint_pending(vmx->vm, vcpu)) { 1399c74a40a5SPatrick Mooney /* queued ExtINT not blocked by existing injection */ 1400c74a40a5SPatrick Mooney return (true); 1401c74a40a5SPatrick Mooney } 1402c74a40a5SPatrick Mooney } else { 1403c74a40a5SPatrick Mooney if ((state & EIS_REQ_EXIT) != 0) { 1404c74a40a5SPatrick Mooney /* 1405c74a40a5SPatrick Mooney * Use a self-IPI to force an immediate exit after 1406c74a40a5SPatrick Mooney * event injection has occurred. 1407c74a40a5SPatrick Mooney */ 1408c74a40a5SPatrick Mooney poke_cpu(CPU->cpu_id); 1409c74a40a5SPatrick Mooney } else { 1410c74a40a5SPatrick Mooney /* 1411c74a40a5SPatrick Mooney * If any event is being injected, an exit immediately 1412c74a40a5SPatrick Mooney * upon becoming interruptable again will allow pending 1413c74a40a5SPatrick Mooney * or newly queued events to be injected in a timely 1414c74a40a5SPatrick Mooney * manner. 1415c74a40a5SPatrick Mooney */ 1416c74a40a5SPatrick Mooney vmx_set_int_window_exiting(vmx, vcpu); 1417c74a40a5SPatrick Mooney } 1418c74a40a5SPatrick Mooney } 1419c74a40a5SPatrick Mooney return (false); 14204c87aefeSPatrick Mooney } 1421bf21cd93STycho Nightingale 1422bf21cd93STycho Nightingale /* 1423bf21cd93STycho Nightingale * If the Virtual NMIs execution control is '1' then the logical processor 1424bf21cd93STycho Nightingale * tracks virtual-NMI blocking in the Guest Interruptibility-state field of 1425bf21cd93STycho Nightingale * the VMCS. An IRET instruction in VMX non-root operation will remove any 1426bf21cd93STycho Nightingale * virtual-NMI blocking. 1427bf21cd93STycho Nightingale * 1428bf21cd93STycho Nightingale * This unblocking occurs even if the IRET causes a fault. In this case the 1429bf21cd93STycho Nightingale * hypervisor needs to restore virtual-NMI blocking before resuming the guest. 1430bf21cd93STycho Nightingale */ 1431bf21cd93STycho Nightingale static void 1432bf21cd93STycho Nightingale vmx_restore_nmi_blocking(struct vmx *vmx, int vcpuid) 1433bf21cd93STycho Nightingale { 1434bf21cd93STycho Nightingale uint32_t gi; 1435bf21cd93STycho Nightingale 1436bf21cd93STycho Nightingale VCPU_CTR0(vmx->vm, vcpuid, "Restore Virtual-NMI blocking"); 1437bf21cd93STycho Nightingale gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 1438bf21cd93STycho Nightingale gi |= VMCS_INTERRUPTIBILITY_NMI_BLOCKING; 1439bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); 1440bf21cd93STycho Nightingale } 1441bf21cd93STycho Nightingale 1442bf21cd93STycho Nightingale static void 1443bf21cd93STycho Nightingale vmx_clear_nmi_blocking(struct vmx *vmx, int vcpuid) 1444bf21cd93STycho Nightingale { 1445bf21cd93STycho Nightingale uint32_t gi; 1446bf21cd93STycho Nightingale 1447bf21cd93STycho Nightingale VCPU_CTR0(vmx->vm, vcpuid, "Clear Virtual-NMI blocking"); 1448bf21cd93STycho Nightingale gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 1449bf21cd93STycho Nightingale gi &= ~VMCS_INTERRUPTIBILITY_NMI_BLOCKING; 1450bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); 1451bf21cd93STycho Nightingale } 1452bf21cd93STycho Nightingale 14534c87aefeSPatrick Mooney static void 14544c87aefeSPatrick Mooney vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid) 14554c87aefeSPatrick Mooney { 14564c87aefeSPatrick Mooney uint32_t gi; 14574c87aefeSPatrick Mooney 14584c87aefeSPatrick Mooney gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 14594c87aefeSPatrick Mooney KASSERT(gi & VMCS_INTERRUPTIBILITY_NMI_BLOCKING, 14609dc804b9SPatrick Mooney ("NMI blocking is not in effect %x", gi)); 14614c87aefeSPatrick Mooney } 14624c87aefeSPatrick Mooney 14634c87aefeSPatrick Mooney static int 14644c87aefeSPatrick Mooney vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) 14654c87aefeSPatrick Mooney { 14664c87aefeSPatrick Mooney struct vmxctx *vmxctx; 14674c87aefeSPatrick Mooney uint64_t xcrval; 14684c87aefeSPatrick Mooney const struct xsave_limits *limits; 14694c87aefeSPatrick Mooney 14704c87aefeSPatrick Mooney vmxctx = &vmx->ctx[vcpu]; 14714c87aefeSPatrick Mooney limits = vmm_get_xsave_limits(); 14724c87aefeSPatrick Mooney 14734c87aefeSPatrick Mooney /* 14744c87aefeSPatrick Mooney * Note that the processor raises a GP# fault on its own if 14754c87aefeSPatrick Mooney * xsetbv is executed for CPL != 0, so we do not have to 14764c87aefeSPatrick Mooney * emulate that fault here. 14774c87aefeSPatrick Mooney */ 14784c87aefeSPatrick Mooney 14794c87aefeSPatrick Mooney /* Only xcr0 is supported. */ 14804c87aefeSPatrick Mooney if (vmxctx->guest_rcx != 0) { 14814c87aefeSPatrick Mooney vm_inject_gp(vmx->vm, vcpu); 14824c87aefeSPatrick Mooney return (HANDLED); 14834c87aefeSPatrick Mooney } 14844c87aefeSPatrick Mooney 14854c87aefeSPatrick Mooney /* We only handle xcr0 if both the host and guest have XSAVE enabled. */ 14862699b94cSPatrick Mooney if (!limits->xsave_enabled || 14872699b94cSPatrick Mooney !(vmcs_read(VMCS_GUEST_CR4) & CR4_XSAVE)) { 14884c87aefeSPatrick Mooney vm_inject_ud(vmx->vm, vcpu); 14894c87aefeSPatrick Mooney return (HANDLED); 14904c87aefeSPatrick Mooney } 14914c87aefeSPatrick Mooney 14924c87aefeSPatrick Mooney xcrval = vmxctx->guest_rdx << 32 | (vmxctx->guest_rax & 0xffffffff); 14934c87aefeSPatrick Mooney if ((xcrval & ~limits->xcr0_allowed) != 0) { 14944c87aefeSPatrick Mooney vm_inject_gp(vmx->vm, vcpu); 14954c87aefeSPatrick Mooney return (HANDLED); 14964c87aefeSPatrick Mooney } 14974c87aefeSPatrick Mooney 14984c87aefeSPatrick Mooney if (!(xcrval & XFEATURE_ENABLED_X87)) { 14994c87aefeSPatrick Mooney vm_inject_gp(vmx->vm, vcpu); 15004c87aefeSPatrick Mooney return (HANDLED); 15014c87aefeSPatrick Mooney } 15024c87aefeSPatrick Mooney 15034c87aefeSPatrick Mooney /* AVX (YMM_Hi128) requires SSE. */ 15044c87aefeSPatrick Mooney if (xcrval & XFEATURE_ENABLED_AVX && 15054c87aefeSPatrick Mooney (xcrval & XFEATURE_AVX) != XFEATURE_AVX) { 15064c87aefeSPatrick Mooney vm_inject_gp(vmx->vm, vcpu); 15074c87aefeSPatrick Mooney return (HANDLED); 15084c87aefeSPatrick Mooney } 15094c87aefeSPatrick Mooney 15104c87aefeSPatrick Mooney /* 15114c87aefeSPatrick Mooney * AVX512 requires base AVX (YMM_Hi128) as well as OpMask, 15124c87aefeSPatrick Mooney * ZMM_Hi256, and Hi16_ZMM. 15134c87aefeSPatrick Mooney */ 15144c87aefeSPatrick Mooney if (xcrval & XFEATURE_AVX512 && 15154c87aefeSPatrick Mooney (xcrval & (XFEATURE_AVX512 | XFEATURE_AVX)) != 15164c87aefeSPatrick Mooney (XFEATURE_AVX512 | XFEATURE_AVX)) { 15174c87aefeSPatrick Mooney vm_inject_gp(vmx->vm, vcpu); 15184c87aefeSPatrick Mooney return (HANDLED); 15194c87aefeSPatrick Mooney } 15204c87aefeSPatrick Mooney 15214c87aefeSPatrick Mooney /* 15224c87aefeSPatrick Mooney * Intel MPX requires both bound register state flags to be 15234c87aefeSPatrick Mooney * set. 15244c87aefeSPatrick Mooney */ 15254c87aefeSPatrick Mooney if (((xcrval & XFEATURE_ENABLED_BNDREGS) != 0) != 15264c87aefeSPatrick Mooney ((xcrval & XFEATURE_ENABLED_BNDCSR) != 0)) { 15274c87aefeSPatrick Mooney vm_inject_gp(vmx->vm, vcpu); 15284c87aefeSPatrick Mooney return (HANDLED); 15294c87aefeSPatrick Mooney } 15304c87aefeSPatrick Mooney 15314c87aefeSPatrick Mooney /* 15324c87aefeSPatrick Mooney * This runs "inside" vmrun() with the guest's FPU state, so 15334c87aefeSPatrick Mooney * modifying xcr0 directly modifies the guest's xcr0, not the 15344c87aefeSPatrick Mooney * host's. 15354c87aefeSPatrick Mooney */ 15364c87aefeSPatrick Mooney load_xcr(0, xcrval); 15374c87aefeSPatrick Mooney return (HANDLED); 15384c87aefeSPatrick Mooney } 15394c87aefeSPatrick Mooney 1540bf21cd93STycho Nightingale static uint64_t 1541bf21cd93STycho Nightingale vmx_get_guest_reg(struct vmx *vmx, int vcpu, int ident) 1542bf21cd93STycho Nightingale { 1543bf21cd93STycho Nightingale const struct vmxctx *vmxctx; 1544bf21cd93STycho Nightingale 1545bf21cd93STycho Nightingale vmxctx = &vmx->ctx[vcpu]; 1546bf21cd93STycho Nightingale 1547bf21cd93STycho Nightingale switch (ident) { 1548bf21cd93STycho Nightingale case 0: 1549bf21cd93STycho Nightingale return (vmxctx->guest_rax); 1550bf21cd93STycho Nightingale case 1: 1551bf21cd93STycho Nightingale return (vmxctx->guest_rcx); 1552bf21cd93STycho Nightingale case 2: 1553bf21cd93STycho Nightingale return (vmxctx->guest_rdx); 1554bf21cd93STycho Nightingale case 3: 1555bf21cd93STycho Nightingale return (vmxctx->guest_rbx); 1556bf21cd93STycho Nightingale case 4: 1557bf21cd93STycho Nightingale return (vmcs_read(VMCS_GUEST_RSP)); 1558bf21cd93STycho Nightingale case 5: 1559bf21cd93STycho Nightingale return (vmxctx->guest_rbp); 1560bf21cd93STycho Nightingale case 6: 1561bf21cd93STycho Nightingale return (vmxctx->guest_rsi); 1562bf21cd93STycho Nightingale case 7: 1563bf21cd93STycho Nightingale return (vmxctx->guest_rdi); 1564bf21cd93STycho Nightingale case 8: 1565bf21cd93STycho Nightingale return (vmxctx->guest_r8); 1566bf21cd93STycho Nightingale case 9: 1567bf21cd93STycho Nightingale return (vmxctx->guest_r9); 1568bf21cd93STycho Nightingale case 10: 1569bf21cd93STycho Nightingale return (vmxctx->guest_r10); 1570bf21cd93STycho Nightingale case 11: 1571bf21cd93STycho Nightingale return (vmxctx->guest_r11); 1572bf21cd93STycho Nightingale case 12: 1573bf21cd93STycho Nightingale return (vmxctx->guest_r12); 1574bf21cd93STycho Nightingale case 13: 1575bf21cd93STycho Nightingale return (vmxctx->guest_r13); 1576bf21cd93STycho Nightingale case 14: 1577bf21cd93STycho Nightingale return (vmxctx->guest_r14); 1578bf21cd93STycho Nightingale case 15: 1579bf21cd93STycho Nightingale return (vmxctx->guest_r15); 1580bf21cd93STycho Nightingale default: 1581bf21cd93STycho Nightingale panic("invalid vmx register %d", ident); 1582bf21cd93STycho Nightingale } 1583bf21cd93STycho Nightingale } 1584bf21cd93STycho Nightingale 1585bf21cd93STycho Nightingale static void 1586bf21cd93STycho Nightingale vmx_set_guest_reg(struct vmx *vmx, int vcpu, int ident, uint64_t regval) 1587bf21cd93STycho Nightingale { 1588bf21cd93STycho Nightingale struct vmxctx *vmxctx; 1589bf21cd93STycho Nightingale 1590bf21cd93STycho Nightingale vmxctx = &vmx->ctx[vcpu]; 1591bf21cd93STycho Nightingale 1592bf21cd93STycho Nightingale switch (ident) { 1593bf21cd93STycho Nightingale case 0: 1594bf21cd93STycho Nightingale vmxctx->guest_rax = regval; 1595bf21cd93STycho Nightingale break; 1596bf21cd93STycho Nightingale case 1: 1597bf21cd93STycho Nightingale vmxctx->guest_rcx = regval; 1598bf21cd93STycho Nightingale break; 1599bf21cd93STycho Nightingale case 2: 1600bf21cd93STycho Nightingale vmxctx->guest_rdx = regval; 1601bf21cd93STycho Nightingale break; 1602bf21cd93STycho Nightingale case 3: 1603bf21cd93STycho Nightingale vmxctx->guest_rbx = regval; 1604bf21cd93STycho Nightingale break; 1605bf21cd93STycho Nightingale case 4: 1606bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_RSP, regval); 1607bf21cd93STycho Nightingale break; 1608bf21cd93STycho Nightingale case 5: 1609bf21cd93STycho Nightingale vmxctx->guest_rbp = regval; 1610bf21cd93STycho Nightingale break; 1611bf21cd93STycho Nightingale case 6: 1612bf21cd93STycho Nightingale vmxctx->guest_rsi = regval; 1613bf21cd93STycho Nightingale break; 1614bf21cd93STycho Nightingale case 7: 1615bf21cd93STycho Nightingale vmxctx->guest_rdi = regval; 1616bf21cd93STycho Nightingale break; 1617bf21cd93STycho Nightingale case 8: 1618bf21cd93STycho Nightingale vmxctx->guest_r8 = regval; 1619bf21cd93STycho Nightingale break; 1620bf21cd93STycho Nightingale case 9: 1621bf21cd93STycho Nightingale vmxctx->guest_r9 = regval; 1622bf21cd93STycho Nightingale break; 1623bf21cd93STycho Nightingale case 10: 1624bf21cd93STycho Nightingale vmxctx->guest_r10 = regval; 1625bf21cd93STycho Nightingale break; 1626bf21cd93STycho Nightingale case 11: 1627bf21cd93STycho Nightingale vmxctx->guest_r11 = regval; 1628bf21cd93STycho Nightingale break; 1629bf21cd93STycho Nightingale case 12: 1630bf21cd93STycho Nightingale vmxctx->guest_r12 = regval; 1631bf21cd93STycho Nightingale break; 1632bf21cd93STycho Nightingale case 13: 1633bf21cd93STycho Nightingale vmxctx->guest_r13 = regval; 1634bf21cd93STycho Nightingale break; 1635bf21cd93STycho Nightingale case 14: 1636bf21cd93STycho Nightingale vmxctx->guest_r14 = regval; 1637bf21cd93STycho Nightingale break; 1638bf21cd93STycho Nightingale case 15: 1639bf21cd93STycho Nightingale vmxctx->guest_r15 = regval; 1640bf21cd93STycho Nightingale break; 1641bf21cd93STycho Nightingale default: 1642bf21cd93STycho Nightingale panic("invalid vmx register %d", ident); 1643bf21cd93STycho Nightingale } 1644bf21cd93STycho Nightingale } 1645bf21cd93STycho Nightingale 1646bf21cd93STycho Nightingale static int 1647bf21cd93STycho Nightingale vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual) 1648bf21cd93STycho Nightingale { 1649bf21cd93STycho Nightingale uint64_t crval, regval; 1650bf21cd93STycho Nightingale 1651bf21cd93STycho Nightingale /* We only handle mov to %cr0 at this time */ 1652bf21cd93STycho Nightingale if ((exitqual & 0xf0) != 0x00) 1653bf21cd93STycho Nightingale return (UNHANDLED); 1654bf21cd93STycho Nightingale 1655bf21cd93STycho Nightingale regval = vmx_get_guest_reg(vmx, vcpu, (exitqual >> 8) & 0xf); 1656bf21cd93STycho Nightingale 1657bf21cd93STycho Nightingale vmcs_write(VMCS_CR0_SHADOW, regval); 1658bf21cd93STycho Nightingale 1659bf21cd93STycho Nightingale crval = regval | cr0_ones_mask; 1660bf21cd93STycho Nightingale crval &= ~cr0_zeros_mask; 1661bf0dcd3fSPatrick Mooney 1662bf0dcd3fSPatrick Mooney const uint64_t old = vmcs_read(VMCS_GUEST_CR0); 1663bf0dcd3fSPatrick Mooney const uint64_t diff = crval ^ old; 1664bf0dcd3fSPatrick Mooney /* Flush the TLB if the paging or write-protect bits are changing */ 1665bf0dcd3fSPatrick Mooney if ((diff & CR0_PG) != 0 || (diff & CR0_WP) != 0) { 16660153d828SPatrick Mooney vmx_invvpid(vmx, vcpu, 1); 1667bf0dcd3fSPatrick Mooney } 1668bf0dcd3fSPatrick Mooney 1669bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_CR0, crval); 1670bf21cd93STycho Nightingale 1671bf21cd93STycho Nightingale if (regval & CR0_PG) { 1672bf21cd93STycho Nightingale uint64_t efer, entry_ctls; 1673bf21cd93STycho Nightingale 1674bf21cd93STycho Nightingale /* 1675bf21cd93STycho Nightingale * If CR0.PG is 1 and EFER.LME is 1 then EFER.LMA and 1676bf21cd93STycho Nightingale * the "IA-32e mode guest" bit in VM-entry control must be 1677bf21cd93STycho Nightingale * equal. 1678bf21cd93STycho Nightingale */ 1679bf21cd93STycho Nightingale efer = vmcs_read(VMCS_GUEST_IA32_EFER); 1680bf21cd93STycho Nightingale if (efer & EFER_LME) { 1681bf21cd93STycho Nightingale efer |= EFER_LMA; 1682bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_IA32_EFER, efer); 1683bf21cd93STycho Nightingale entry_ctls = vmcs_read(VMCS_ENTRY_CTLS); 1684bf21cd93STycho Nightingale entry_ctls |= VM_ENTRY_GUEST_LMA; 1685bf21cd93STycho Nightingale vmcs_write(VMCS_ENTRY_CTLS, entry_ctls); 1686bf21cd93STycho Nightingale } 1687bf21cd93STycho Nightingale } 1688bf21cd93STycho Nightingale 1689bf21cd93STycho Nightingale return (HANDLED); 1690bf21cd93STycho Nightingale } 1691bf21cd93STycho Nightingale 1692bf21cd93STycho Nightingale static int 1693bf21cd93STycho Nightingale vmx_emulate_cr4_access(struct vmx *vmx, int vcpu, uint64_t exitqual) 1694bf21cd93STycho Nightingale { 1695bf21cd93STycho Nightingale uint64_t crval, regval; 1696bf21cd93STycho Nightingale 1697bf21cd93STycho Nightingale /* We only handle mov to %cr4 at this time */ 1698bf21cd93STycho Nightingale if ((exitqual & 0xf0) != 0x00) 1699bf21cd93STycho Nightingale return (UNHANDLED); 1700bf21cd93STycho Nightingale 1701bf21cd93STycho Nightingale regval = vmx_get_guest_reg(vmx, vcpu, (exitqual >> 8) & 0xf); 1702bf21cd93STycho Nightingale 1703bf21cd93STycho Nightingale vmcs_write(VMCS_CR4_SHADOW, regval); 1704bf21cd93STycho Nightingale 1705bf21cd93STycho Nightingale crval = regval | cr4_ones_mask; 1706bf21cd93STycho Nightingale crval &= ~cr4_zeros_mask; 1707bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_CR4, crval); 1708bf21cd93STycho Nightingale 1709bf21cd93STycho Nightingale return (HANDLED); 1710bf21cd93STycho Nightingale } 1711bf21cd93STycho Nightingale 1712bf21cd93STycho Nightingale static int 1713bf21cd93STycho Nightingale vmx_emulate_cr8_access(struct vmx *vmx, int vcpu, uint64_t exitqual) 1714bf21cd93STycho Nightingale { 1715bf21cd93STycho Nightingale struct vlapic *vlapic; 1716bf21cd93STycho Nightingale uint64_t cr8; 1717bf21cd93STycho Nightingale int regnum; 1718bf21cd93STycho Nightingale 1719bf21cd93STycho Nightingale /* We only handle mov %cr8 to/from a register at this time. */ 1720bf21cd93STycho Nightingale if ((exitqual & 0xe0) != 0x00) { 1721bf21cd93STycho Nightingale return (UNHANDLED); 1722bf21cd93STycho Nightingale } 1723bf21cd93STycho Nightingale 1724bf21cd93STycho Nightingale vlapic = vm_lapic(vmx->vm, vcpu); 1725bf21cd93STycho Nightingale regnum = (exitqual >> 8) & 0xf; 1726bf21cd93STycho Nightingale if (exitqual & 0x10) { 1727bf21cd93STycho Nightingale cr8 = vlapic_get_cr8(vlapic); 1728bf21cd93STycho Nightingale vmx_set_guest_reg(vmx, vcpu, regnum, cr8); 1729bf21cd93STycho Nightingale } else { 1730bf21cd93STycho Nightingale cr8 = vmx_get_guest_reg(vmx, vcpu, regnum); 1731bf21cd93STycho Nightingale vlapic_set_cr8(vlapic, cr8); 1732bf21cd93STycho Nightingale } 1733bf21cd93STycho Nightingale 1734bf21cd93STycho Nightingale return (HANDLED); 1735bf21cd93STycho Nightingale } 1736bf21cd93STycho Nightingale 1737bf21cd93STycho Nightingale /* 1738bf21cd93STycho Nightingale * From section "Guest Register State" in the Intel SDM: CPL = SS.DPL 1739bf21cd93STycho Nightingale */ 1740bf21cd93STycho Nightingale static int 1741bf21cd93STycho Nightingale vmx_cpl(void) 1742bf21cd93STycho Nightingale { 1743bf21cd93STycho Nightingale uint32_t ssar; 1744bf21cd93STycho Nightingale 1745bf21cd93STycho Nightingale ssar = vmcs_read(VMCS_GUEST_SS_ACCESS_RIGHTS); 1746bf21cd93STycho Nightingale return ((ssar >> 5) & 0x3); 1747bf21cd93STycho Nightingale } 1748bf21cd93STycho Nightingale 1749bf21cd93STycho Nightingale static enum vm_cpu_mode 1750bf21cd93STycho Nightingale vmx_cpu_mode(void) 1751bf21cd93STycho Nightingale { 1752bf21cd93STycho Nightingale uint32_t csar; 1753bf21cd93STycho Nightingale 1754bf21cd93STycho Nightingale if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LMA) { 1755bf21cd93STycho Nightingale csar = vmcs_read(VMCS_GUEST_CS_ACCESS_RIGHTS); 1756bf21cd93STycho Nightingale if (csar & 0x2000) 1757bf21cd93STycho Nightingale return (CPU_MODE_64BIT); /* CS.L = 1 */ 1758bf21cd93STycho Nightingale else 1759bf21cd93STycho Nightingale return (CPU_MODE_COMPATIBILITY); 1760bf21cd93STycho Nightingale } else if (vmcs_read(VMCS_GUEST_CR0) & CR0_PE) { 1761bf21cd93STycho Nightingale return (CPU_MODE_PROTECTED); 1762bf21cd93STycho Nightingale } else { 1763bf21cd93STycho Nightingale return (CPU_MODE_REAL); 1764bf21cd93STycho Nightingale } 1765bf21cd93STycho Nightingale } 1766bf21cd93STycho Nightingale 1767bf21cd93STycho Nightingale static enum vm_paging_mode 1768bf21cd93STycho Nightingale vmx_paging_mode(void) 1769bf21cd93STycho Nightingale { 1770bf21cd93STycho Nightingale 1771bf21cd93STycho Nightingale if (!(vmcs_read(VMCS_GUEST_CR0) & CR0_PG)) 1772bf21cd93STycho Nightingale return (PAGING_MODE_FLAT); 1773bf21cd93STycho Nightingale if (!(vmcs_read(VMCS_GUEST_CR4) & CR4_PAE)) 1774bf21cd93STycho Nightingale return (PAGING_MODE_32); 1775bf21cd93STycho Nightingale if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LME) 1776bf21cd93STycho Nightingale return (PAGING_MODE_64); 1777bf21cd93STycho Nightingale else 1778bf21cd93STycho Nightingale return (PAGING_MODE_PAE); 1779bf21cd93STycho Nightingale } 1780bf21cd93STycho Nightingale 1781bf21cd93STycho Nightingale static void 1782bf21cd93STycho Nightingale vmx_paging_info(struct vm_guest_paging *paging) 1783bf21cd93STycho Nightingale { 1784*3d097f7dSPatrick Mooney paging->cr3 = vmcs_read(VMCS_GUEST_CR3); 1785bf21cd93STycho Nightingale paging->cpl = vmx_cpl(); 1786bf21cd93STycho Nightingale paging->cpu_mode = vmx_cpu_mode(); 1787bf21cd93STycho Nightingale paging->paging_mode = vmx_paging_mode(); 1788bf21cd93STycho Nightingale } 1789bf21cd93STycho Nightingale 1790bf21cd93STycho Nightingale static void 1791e0c0d44eSPatrick Mooney vmexit_mmio_emul(struct vm_exit *vmexit, struct vie *vie, uint64_t gpa, 1792e0c0d44eSPatrick Mooney uint64_t gla) 1793bf21cd93STycho Nightingale { 1794e0c0d44eSPatrick Mooney struct vm_guest_paging paging; 1795bf21cd93STycho Nightingale uint32_t csar; 1796bf21cd93STycho Nightingale 1797e0c0d44eSPatrick Mooney vmexit->exitcode = VM_EXITCODE_MMIO_EMUL; 17984c87aefeSPatrick Mooney vmexit->inst_length = 0; 1799e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.gpa = gpa; 1800e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.gla = gla; 1801e0c0d44eSPatrick Mooney vmx_paging_info(&paging); 1802e0c0d44eSPatrick Mooney 1803e0c0d44eSPatrick Mooney switch (paging.cpu_mode) { 1804bf21cd93STycho Nightingale case CPU_MODE_REAL: 1805e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE); 1806e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.cs_d = 0; 1807bf21cd93STycho Nightingale break; 1808bf21cd93STycho Nightingale case CPU_MODE_PROTECTED: 1809bf21cd93STycho Nightingale case CPU_MODE_COMPATIBILITY: 1810e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE); 1811bf21cd93STycho Nightingale csar = vmcs_read(VMCS_GUEST_CS_ACCESS_RIGHTS); 1812e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.cs_d = SEG_DESC_DEF32(csar); 1813bf21cd93STycho Nightingale break; 1814bf21cd93STycho Nightingale default: 1815e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.cs_base = 0; 1816e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.cs_d = 0; 1817bf21cd93STycho Nightingale break; 1818bf21cd93STycho Nightingale } 1819e0c0d44eSPatrick Mooney 1820e0c0d44eSPatrick Mooney vie_init_mmio(vie, NULL, 0, &paging, gpa); 1821e0c0d44eSPatrick Mooney } 1822e0c0d44eSPatrick Mooney 1823e0c0d44eSPatrick Mooney static void 1824e0c0d44eSPatrick Mooney vmexit_inout(struct vm_exit *vmexit, struct vie *vie, uint64_t qual, 1825e0c0d44eSPatrick Mooney uint32_t eax) 1826e0c0d44eSPatrick Mooney { 1827e0c0d44eSPatrick Mooney struct vm_guest_paging paging; 1828e0c0d44eSPatrick Mooney struct vm_inout *inout; 1829e0c0d44eSPatrick Mooney 1830e0c0d44eSPatrick Mooney inout = &vmexit->u.inout; 1831e0c0d44eSPatrick Mooney 1832e0c0d44eSPatrick Mooney inout->bytes = (qual & 0x7) + 1; 1833e0c0d44eSPatrick Mooney inout->flags = 0; 1834e0c0d44eSPatrick Mooney inout->flags |= (qual & 0x8) ? INOUT_IN : 0; 1835e0c0d44eSPatrick Mooney inout->flags |= (qual & 0x10) ? INOUT_STR : 0; 1836e0c0d44eSPatrick Mooney inout->flags |= (qual & 0x20) ? INOUT_REP : 0; 1837e0c0d44eSPatrick Mooney inout->port = (uint16_t)(qual >> 16); 1838e0c0d44eSPatrick Mooney inout->eax = eax; 1839e0c0d44eSPatrick Mooney if (inout->flags & INOUT_STR) { 1840e0c0d44eSPatrick Mooney uint64_t inst_info; 1841e0c0d44eSPatrick Mooney 1842e0c0d44eSPatrick Mooney inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO); 1843e0c0d44eSPatrick Mooney 1844e0c0d44eSPatrick Mooney /* 1845d92a2ce7SPatrick Mooney * According to the SDM, bits 9:7 encode the address size of the 1846d92a2ce7SPatrick Mooney * ins/outs operation, but only values 0/1/2 are expected, 1847d92a2ce7SPatrick Mooney * corresponding to 16/32/64 bit sizes. 1848e0c0d44eSPatrick Mooney */ 1849d92a2ce7SPatrick Mooney inout->addrsize = 2 << BITX(inst_info, 9, 7); 1850e0c0d44eSPatrick Mooney VERIFY(inout->addrsize == 2 || inout->addrsize == 4 || 1851e0c0d44eSPatrick Mooney inout->addrsize == 8); 1852e0c0d44eSPatrick Mooney 1853e0c0d44eSPatrick Mooney if (inout->flags & INOUT_IN) { 1854e0c0d44eSPatrick Mooney /* 1855e0c0d44eSPatrick Mooney * The bits describing the segment in INSTRUCTION_INFO 1856e0c0d44eSPatrick Mooney * are not defined for ins, leaving it to system 1857e0c0d44eSPatrick Mooney * software to assume %es (encoded as 0) 1858e0c0d44eSPatrick Mooney */ 1859e0c0d44eSPatrick Mooney inout->segment = 0; 1860e0c0d44eSPatrick Mooney } else { 1861e0c0d44eSPatrick Mooney /* 1862e0c0d44eSPatrick Mooney * Bits 15-17 encode the segment for OUTS. 1863e0c0d44eSPatrick Mooney * This value follows the standard x86 segment order. 1864e0c0d44eSPatrick Mooney */ 1865e0c0d44eSPatrick Mooney inout->segment = (inst_info >> 15) & 0x7; 1866e0c0d44eSPatrick Mooney } 1867e0c0d44eSPatrick Mooney } 1868e0c0d44eSPatrick Mooney 1869e0c0d44eSPatrick Mooney vmexit->exitcode = VM_EXITCODE_INOUT; 1870e0c0d44eSPatrick Mooney vmx_paging_info(&paging); 1871e0c0d44eSPatrick Mooney vie_init_inout(vie, inout, vmexit->inst_length, &paging); 1872e0c0d44eSPatrick Mooney 1873e0c0d44eSPatrick Mooney /* The in/out emulation will handle advancing %rip */ 1874e0c0d44eSPatrick Mooney vmexit->inst_length = 0; 1875bf21cd93STycho Nightingale } 1876bf21cd93STycho Nightingale 1877bf21cd93STycho Nightingale static int 1878bf21cd93STycho Nightingale ept_fault_type(uint64_t ept_qual) 1879bf21cd93STycho Nightingale { 1880bf21cd93STycho Nightingale int fault_type; 1881bf21cd93STycho Nightingale 1882bf21cd93STycho Nightingale if (ept_qual & EPT_VIOLATION_DATA_WRITE) 1883cf409e3fSDan Cross fault_type = PROT_WRITE; 1884bf21cd93STycho Nightingale else if (ept_qual & EPT_VIOLATION_INST_FETCH) 1885cf409e3fSDan Cross fault_type = PROT_EXEC; 1886bf21cd93STycho Nightingale else 1887cf409e3fSDan Cross fault_type = PROT_READ; 1888bf21cd93STycho Nightingale 1889bf21cd93STycho Nightingale return (fault_type); 1890bf21cd93STycho Nightingale } 1891bf21cd93STycho Nightingale 189284659b24SMichael Zeller static bool 1893bf21cd93STycho Nightingale ept_emulation_fault(uint64_t ept_qual) 1894bf21cd93STycho Nightingale { 1895bf21cd93STycho Nightingale int read, write; 1896bf21cd93STycho Nightingale 1897bf21cd93STycho Nightingale /* EPT fault on an instruction fetch doesn't make sense here */ 1898bf21cd93STycho Nightingale if (ept_qual & EPT_VIOLATION_INST_FETCH) 189984659b24SMichael Zeller return (false); 1900bf21cd93STycho Nightingale 1901bf21cd93STycho Nightingale /* EPT fault must be a read fault or a write fault */ 1902bf21cd93STycho Nightingale read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0; 1903bf21cd93STycho Nightingale write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0; 1904bf21cd93STycho Nightingale if ((read | write) == 0) 190584659b24SMichael Zeller return (false); 1906bf21cd93STycho Nightingale 1907bf21cd93STycho Nightingale /* 1908bf21cd93STycho Nightingale * The EPT violation must have been caused by accessing a 1909bf21cd93STycho Nightingale * guest-physical address that is a translation of a guest-linear 1910bf21cd93STycho Nightingale * address. 1911bf21cd93STycho Nightingale */ 1912bf21cd93STycho Nightingale if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 || 1913bf21cd93STycho Nightingale (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) { 191484659b24SMichael Zeller return (false); 1915bf21cd93STycho Nightingale } 1916bf21cd93STycho Nightingale 191784659b24SMichael Zeller return (true); 1918bf21cd93STycho Nightingale } 1919bf21cd93STycho Nightingale 19204c87aefeSPatrick Mooney static __inline int 19214c87aefeSPatrick Mooney apic_access_virtualization(struct vmx *vmx, int vcpuid) 19224c87aefeSPatrick Mooney { 19234c87aefeSPatrick Mooney uint32_t proc_ctls2; 19244c87aefeSPatrick Mooney 19254c87aefeSPatrick Mooney proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; 19264c87aefeSPatrick Mooney return ((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) ? 1 : 0); 19274c87aefeSPatrick Mooney } 19284c87aefeSPatrick Mooney 19294c87aefeSPatrick Mooney static __inline int 19304c87aefeSPatrick Mooney x2apic_virtualization(struct vmx *vmx, int vcpuid) 19314c87aefeSPatrick Mooney { 19324c87aefeSPatrick Mooney uint32_t proc_ctls2; 19334c87aefeSPatrick Mooney 19344c87aefeSPatrick Mooney proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; 19354c87aefeSPatrick Mooney return ((proc_ctls2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE) ? 1 : 0); 19364c87aefeSPatrick Mooney } 19374c87aefeSPatrick Mooney 19384c87aefeSPatrick Mooney static int 19394c87aefeSPatrick Mooney vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic, 19404c87aefeSPatrick Mooney uint64_t qual) 19414c87aefeSPatrick Mooney { 1942d2f938fdSPatrick Mooney const uint_t offset = APIC_WRITE_OFFSET(qual); 19434c87aefeSPatrick Mooney 19444c87aefeSPatrick Mooney if (!apic_access_virtualization(vmx, vcpuid)) { 19454c87aefeSPatrick Mooney /* 19464c87aefeSPatrick Mooney * In general there should not be any APIC write VM-exits 19474c87aefeSPatrick Mooney * unless APIC-access virtualization is enabled. 19484c87aefeSPatrick Mooney * 19494c87aefeSPatrick Mooney * However self-IPI virtualization can legitimately trigger 19504c87aefeSPatrick Mooney * an APIC-write VM-exit so treat it specially. 19514c87aefeSPatrick Mooney */ 19524c87aefeSPatrick Mooney if (x2apic_virtualization(vmx, vcpuid) && 19534c87aefeSPatrick Mooney offset == APIC_OFFSET_SELF_IPI) { 1954d2f938fdSPatrick Mooney const uint32_t *apic_regs = 1955d2f938fdSPatrick Mooney (uint32_t *)(vlapic->apic_page); 1956d2f938fdSPatrick Mooney const uint32_t vector = 1957d2f938fdSPatrick Mooney apic_regs[APIC_OFFSET_SELF_IPI / 4]; 1958d2f938fdSPatrick Mooney 19594c87aefeSPatrick Mooney vlapic_self_ipi_handler(vlapic, vector); 19604c87aefeSPatrick Mooney return (HANDLED); 19614c87aefeSPatrick Mooney } else 19624c87aefeSPatrick Mooney return (UNHANDLED); 19634c87aefeSPatrick Mooney } 19644c87aefeSPatrick Mooney 19654c87aefeSPatrick Mooney switch (offset) { 19664c87aefeSPatrick Mooney case APIC_OFFSET_ID: 19674c87aefeSPatrick Mooney vlapic_id_write_handler(vlapic); 19684c87aefeSPatrick Mooney break; 19694c87aefeSPatrick Mooney case APIC_OFFSET_LDR: 19704c87aefeSPatrick Mooney vlapic_ldr_write_handler(vlapic); 19714c87aefeSPatrick Mooney break; 19724c87aefeSPatrick Mooney case APIC_OFFSET_DFR: 19734c87aefeSPatrick Mooney vlapic_dfr_write_handler(vlapic); 19744c87aefeSPatrick Mooney break; 19754c87aefeSPatrick Mooney case APIC_OFFSET_SVR: 19764c87aefeSPatrick Mooney vlapic_svr_write_handler(vlapic); 19774c87aefeSPatrick Mooney break; 19784c87aefeSPatrick Mooney case APIC_OFFSET_ESR: 19794c87aefeSPatrick Mooney vlapic_esr_write_handler(vlapic); 19804c87aefeSPatrick Mooney break; 19814c87aefeSPatrick Mooney case APIC_OFFSET_ICR_LOW: 1982d2f938fdSPatrick Mooney vlapic_icrlo_write_handler(vlapic); 19834c87aefeSPatrick Mooney break; 19844c87aefeSPatrick Mooney case APIC_OFFSET_CMCI_LVT: 19854c87aefeSPatrick Mooney case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 19864c87aefeSPatrick Mooney vlapic_lvt_write_handler(vlapic, offset); 19874c87aefeSPatrick Mooney break; 19884c87aefeSPatrick Mooney case APIC_OFFSET_TIMER_ICR: 19894c87aefeSPatrick Mooney vlapic_icrtmr_write_handler(vlapic); 19904c87aefeSPatrick Mooney break; 19914c87aefeSPatrick Mooney case APIC_OFFSET_TIMER_DCR: 19924c87aefeSPatrick Mooney vlapic_dcr_write_handler(vlapic); 19934c87aefeSPatrick Mooney break; 19944c87aefeSPatrick Mooney default: 1995d2f938fdSPatrick Mooney return (UNHANDLED); 19964c87aefeSPatrick Mooney } 1997d2f938fdSPatrick Mooney return (HANDLED); 19984c87aefeSPatrick Mooney } 19994c87aefeSPatrick Mooney 20004c87aefeSPatrick Mooney static bool 20014c87aefeSPatrick Mooney apic_access_fault(struct vmx *vmx, int vcpuid, uint64_t gpa) 20024c87aefeSPatrick Mooney { 20034c87aefeSPatrick Mooney 20044c87aefeSPatrick Mooney if (apic_access_virtualization(vmx, vcpuid) && 20054c87aefeSPatrick Mooney (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE)) 20064c87aefeSPatrick Mooney return (true); 20074c87aefeSPatrick Mooney else 20084c87aefeSPatrick Mooney return (false); 20094c87aefeSPatrick Mooney } 20104c87aefeSPatrick Mooney 20114c87aefeSPatrick Mooney static int 20124c87aefeSPatrick Mooney vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) 20134c87aefeSPatrick Mooney { 20144c87aefeSPatrick Mooney uint64_t qual; 20154c87aefeSPatrick Mooney int access_type, offset, allowed; 2016e0c0d44eSPatrick Mooney struct vie *vie; 20174c87aefeSPatrick Mooney 20184c87aefeSPatrick Mooney if (!apic_access_virtualization(vmx, vcpuid)) 20194c87aefeSPatrick Mooney return (UNHANDLED); 20204c87aefeSPatrick Mooney 20214c87aefeSPatrick Mooney qual = vmexit->u.vmx.exit_qualification; 20224c87aefeSPatrick Mooney access_type = APIC_ACCESS_TYPE(qual); 20234c87aefeSPatrick Mooney offset = APIC_ACCESS_OFFSET(qual); 20244c87aefeSPatrick Mooney 20254c87aefeSPatrick Mooney allowed = 0; 20264c87aefeSPatrick Mooney if (access_type == 0) { 20274c87aefeSPatrick Mooney /* 20284c87aefeSPatrick Mooney * Read data access to the following registers is expected. 20294c87aefeSPatrick Mooney */ 20304c87aefeSPatrick Mooney switch (offset) { 20314c87aefeSPatrick Mooney case APIC_OFFSET_APR: 20324c87aefeSPatrick Mooney case APIC_OFFSET_PPR: 20334c87aefeSPatrick Mooney case APIC_OFFSET_RRR: 20344c87aefeSPatrick Mooney case APIC_OFFSET_CMCI_LVT: 20354c87aefeSPatrick Mooney case APIC_OFFSET_TIMER_CCR: 20364c87aefeSPatrick Mooney allowed = 1; 20374c87aefeSPatrick Mooney break; 20384c87aefeSPatrick Mooney default: 20394c87aefeSPatrick Mooney break; 20404c87aefeSPatrick Mooney } 20414c87aefeSPatrick Mooney } else if (access_type == 1) { 20424c87aefeSPatrick Mooney /* 20434c87aefeSPatrick Mooney * Write data access to the following registers is expected. 20444c87aefeSPatrick Mooney */ 20454c87aefeSPatrick Mooney switch (offset) { 20464c87aefeSPatrick Mooney case APIC_OFFSET_VER: 20474c87aefeSPatrick Mooney case APIC_OFFSET_APR: 20484c87aefeSPatrick Mooney case APIC_OFFSET_PPR: 20494c87aefeSPatrick Mooney case APIC_OFFSET_RRR: 20504c87aefeSPatrick Mooney case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 20514c87aefeSPatrick Mooney case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 20524c87aefeSPatrick Mooney case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 20534c87aefeSPatrick Mooney case APIC_OFFSET_CMCI_LVT: 20544c87aefeSPatrick Mooney case APIC_OFFSET_TIMER_CCR: 20554c87aefeSPatrick Mooney allowed = 1; 20564c87aefeSPatrick Mooney break; 20574c87aefeSPatrick Mooney default: 20584c87aefeSPatrick Mooney break; 20594c87aefeSPatrick Mooney } 20604c87aefeSPatrick Mooney } 20614c87aefeSPatrick Mooney 20624c87aefeSPatrick Mooney if (allowed) { 2063e0c0d44eSPatrick Mooney vie = vm_vie_ctx(vmx->vm, vcpuid); 2064e0c0d44eSPatrick Mooney vmexit_mmio_emul(vmexit, vie, DEFAULT_APIC_BASE + offset, 20654c87aefeSPatrick Mooney VIE_INVALID_GLA); 20664c87aefeSPatrick Mooney } 20674c87aefeSPatrick Mooney 20684c87aefeSPatrick Mooney /* 20694c87aefeSPatrick Mooney * Regardless of whether the APIC-access is allowed this handler 20704c87aefeSPatrick Mooney * always returns UNHANDLED: 20714c87aefeSPatrick Mooney * - if the access is allowed then it is handled by emulating the 20724c87aefeSPatrick Mooney * instruction that caused the VM-exit (outside the critical section) 20734c87aefeSPatrick Mooney * - if the access is not allowed then it will be converted to an 20744c87aefeSPatrick Mooney * exitcode of VM_EXITCODE_VMX and will be dealt with in userland. 20754c87aefeSPatrick Mooney */ 20764c87aefeSPatrick Mooney return (UNHANDLED); 20774c87aefeSPatrick Mooney } 20784c87aefeSPatrick Mooney 20794c87aefeSPatrick Mooney static enum task_switch_reason 20804c87aefeSPatrick Mooney vmx_task_switch_reason(uint64_t qual) 20814c87aefeSPatrick Mooney { 20824c87aefeSPatrick Mooney int reason; 20834c87aefeSPatrick Mooney 20844c87aefeSPatrick Mooney reason = (qual >> 30) & 0x3; 20854c87aefeSPatrick Mooney switch (reason) { 20864c87aefeSPatrick Mooney case 0: 20874c87aefeSPatrick Mooney return (TSR_CALL); 20884c87aefeSPatrick Mooney case 1: 20894c87aefeSPatrick Mooney return (TSR_IRET); 20904c87aefeSPatrick Mooney case 2: 20914c87aefeSPatrick Mooney return (TSR_JMP); 20924c87aefeSPatrick Mooney case 3: 20934c87aefeSPatrick Mooney return (TSR_IDT_GATE); 20944c87aefeSPatrick Mooney default: 20954c87aefeSPatrick Mooney panic("%s: invalid reason %d", __func__, reason); 20964c87aefeSPatrick Mooney } 20974c87aefeSPatrick Mooney } 20984c87aefeSPatrick Mooney 2099bf21cd93STycho Nightingale static int 2100d2f938fdSPatrick Mooney vmx_handle_msr(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit, 2101d2f938fdSPatrick Mooney bool is_wrmsr) 2102bf21cd93STycho Nightingale { 2103d2f938fdSPatrick Mooney struct vmxctx *vmxctx = &vmx->ctx[vcpuid]; 2104d2f938fdSPatrick Mooney const uint32_t ecx = vmxctx->guest_rcx; 2105d2f938fdSPatrick Mooney vm_msr_result_t res; 2106d2f938fdSPatrick Mooney uint64_t val = 0; 2107bf21cd93STycho Nightingale 2108d2f938fdSPatrick Mooney if (is_wrmsr) { 2109d2f938fdSPatrick Mooney vmm_stat_incr(vmx->vm, vcpuid, VMEXIT_WRMSR, 1); 2110d2f938fdSPatrick Mooney val = vmxctx->guest_rdx << 32 | (uint32_t)vmxctx->guest_rax; 2111bf21cd93STycho Nightingale 2112d2f938fdSPatrick Mooney if (vlapic_owned_msr(ecx)) { 2113d2f938fdSPatrick Mooney struct vlapic *vlapic = vm_lapic(vmx->vm, vcpuid); 2114bf21cd93STycho Nightingale 2115d2f938fdSPatrick Mooney res = vlapic_wrmsr(vlapic, ecx, val); 2116d2f938fdSPatrick Mooney } else { 2117d2f938fdSPatrick Mooney res = vmx_wrmsr(vmx, vcpuid, ecx, val); 2118d2f938fdSPatrick Mooney } 2119d2f938fdSPatrick Mooney } else { 2120d2f938fdSPatrick Mooney vmm_stat_incr(vmx->vm, vcpuid, VMEXIT_RDMSR, 1); 2121bf21cd93STycho Nightingale 2122d2f938fdSPatrick Mooney if (vlapic_owned_msr(ecx)) { 2123d2f938fdSPatrick Mooney struct vlapic *vlapic = vm_lapic(vmx->vm, vcpuid); 2124bf21cd93STycho Nightingale 2125d2f938fdSPatrick Mooney res = vlapic_rdmsr(vlapic, ecx, &val); 2126d2f938fdSPatrick Mooney } else { 2127d2f938fdSPatrick Mooney res = vmx_rdmsr(vmx, vcpuid, ecx, &val); 2128d2f938fdSPatrick Mooney } 2129bf21cd93STycho Nightingale } 2130bf21cd93STycho Nightingale 2131d2f938fdSPatrick Mooney switch (res) { 2132d2f938fdSPatrick Mooney case VMR_OK: 2133d2f938fdSPatrick Mooney /* Store rdmsr result in the appropriate registers */ 2134d2f938fdSPatrick Mooney if (!is_wrmsr) { 2135d2f938fdSPatrick Mooney vmxctx->guest_rax = (uint32_t)val; 2136d2f938fdSPatrick Mooney vmxctx->guest_rdx = val >> 32; 2137d2f938fdSPatrick Mooney } 2138d2f938fdSPatrick Mooney return (HANDLED); 2139d2f938fdSPatrick Mooney case VMR_GP: 2140d2f938fdSPatrick Mooney vm_inject_gp(vmx->vm, vcpuid); 2141d2f938fdSPatrick Mooney return (HANDLED); 2142d2f938fdSPatrick Mooney case VMR_UNHANLDED: 2143d2f938fdSPatrick Mooney vmexit->exitcode = is_wrmsr ? 2144d2f938fdSPatrick Mooney VM_EXITCODE_WRMSR : VM_EXITCODE_RDMSR; 2145d2f938fdSPatrick Mooney vmexit->u.msr.code = ecx; 2146d2f938fdSPatrick Mooney vmexit->u.msr.wval = val; 2147d2f938fdSPatrick Mooney return (UNHANDLED); 2148d2f938fdSPatrick Mooney default: 2149d2f938fdSPatrick Mooney panic("unexpected msr result %u\n", res); 2150d2f938fdSPatrick Mooney } 2151bf21cd93STycho Nightingale } 2152bf21cd93STycho Nightingale 2153bf21cd93STycho Nightingale static int 2154bf21cd93STycho Nightingale vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) 2155bf21cd93STycho Nightingale { 2156e0c0d44eSPatrick Mooney int error, errcode, errcode_valid, handled; 2157bf21cd93STycho Nightingale struct vmxctx *vmxctx; 2158e0c0d44eSPatrick Mooney struct vie *vie; 21594c87aefeSPatrick Mooney struct vlapic *vlapic; 21604c87aefeSPatrick Mooney struct vm_task_switch *ts; 2161*3d097f7dSPatrick Mooney uint32_t idtvec_info, intr_info; 21624c87aefeSPatrick Mooney uint32_t intr_type, intr_vec, reason; 2163*3d097f7dSPatrick Mooney uint64_t qual, gpa; 2164bf21cd93STycho Nightingale 2165bf21cd93STycho Nightingale CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_VIRTUAL_NMI) != 0); 2166bf21cd93STycho Nightingale CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_NMI_EXITING) != 0); 2167bf21cd93STycho Nightingale 2168bf21cd93STycho Nightingale handled = UNHANDLED; 2169bf21cd93STycho Nightingale vmxctx = &vmx->ctx[vcpu]; 21704c87aefeSPatrick Mooney 2171bf21cd93STycho Nightingale qual = vmexit->u.vmx.exit_qualification; 21724c87aefeSPatrick Mooney reason = vmexit->u.vmx.exit_reason; 2173bf21cd93STycho Nightingale vmexit->exitcode = VM_EXITCODE_BOGUS; 2174bf21cd93STycho Nightingale 2175bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1); 21764c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, entry, vmx, vcpu, vmexit); 21774c87aefeSPatrick Mooney 21784c87aefeSPatrick Mooney /* 21794c87aefeSPatrick Mooney * VM-entry failures during or after loading guest state. 21804c87aefeSPatrick Mooney * 21814c87aefeSPatrick Mooney * These VM-exits are uncommon but must be handled specially 21824c87aefeSPatrick Mooney * as most VM-exit fields are not populated as usual. 21834c87aefeSPatrick Mooney */ 2184f703164bSPatrick Mooney if (reason == EXIT_REASON_MCE_DURING_ENTRY) { 21854c87aefeSPatrick Mooney VCPU_CTR0(vmx->vm, vcpu, "Handling MCE during VM-entry"); 21864c87aefeSPatrick Mooney vmm_call_trap(T_MCE); 21874c87aefeSPatrick Mooney return (1); 21884c87aefeSPatrick Mooney } 2189bf21cd93STycho Nightingale 21904c87aefeSPatrick Mooney /* 21914c87aefeSPatrick Mooney * VM exits that can be triggered during event delivery need to 21924c87aefeSPatrick Mooney * be handled specially by re-injecting the event if the IDT 21934c87aefeSPatrick Mooney * vectoring information field's valid bit is set. 21944c87aefeSPatrick Mooney * 21954c87aefeSPatrick Mooney * See "Information for VM Exits During Event Delivery" in Intel SDM 21964c87aefeSPatrick Mooney * for details. 21974c87aefeSPatrick Mooney */ 2198*3d097f7dSPatrick Mooney idtvec_info = vmcs_read(VMCS_IDT_VECTORING_INFO); 21994c87aefeSPatrick Mooney if (idtvec_info & VMCS_IDT_VEC_VALID) { 2200*3d097f7dSPatrick Mooney /* Record exit intinfo */ 2201*3d097f7dSPatrick Mooney VERIFY0(vm_exit_intinfo(vmx->vm, vcpu, 2202*3d097f7dSPatrick Mooney vmx_idtvec_to_intinfo(idtvec_info))); 22034c87aefeSPatrick Mooney 22044c87aefeSPatrick Mooney /* 22054c87aefeSPatrick Mooney * If 'virtual NMIs' are being used and the VM-exit 22064c87aefeSPatrick Mooney * happened while injecting an NMI during the previous 22074c87aefeSPatrick Mooney * VM-entry, then clear "blocking by NMI" in the 22084c87aefeSPatrick Mooney * Guest Interruptibility-State so the NMI can be 22094c87aefeSPatrick Mooney * reinjected on the subsequent VM-entry. 22104c87aefeSPatrick Mooney * 22114c87aefeSPatrick Mooney * However, if the NMI was being delivered through a task 22124c87aefeSPatrick Mooney * gate, then the new task must start execution with NMIs 22134c87aefeSPatrick Mooney * blocked so don't clear NMI blocking in this case. 22144c87aefeSPatrick Mooney */ 22154c87aefeSPatrick Mooney intr_type = idtvec_info & VMCS_INTR_T_MASK; 22164c87aefeSPatrick Mooney if (intr_type == VMCS_INTR_T_NMI) { 22174c87aefeSPatrick Mooney if (reason != EXIT_REASON_TASK_SWITCH) 22184c87aefeSPatrick Mooney vmx_clear_nmi_blocking(vmx, vcpu); 22194c87aefeSPatrick Mooney else 22204c87aefeSPatrick Mooney vmx_assert_nmi_blocking(vmx, vcpu); 22214c87aefeSPatrick Mooney } 22224c87aefeSPatrick Mooney 22234c87aefeSPatrick Mooney /* 22244c87aefeSPatrick Mooney * Update VM-entry instruction length if the event being 22254c87aefeSPatrick Mooney * delivered was a software interrupt or software exception. 22264c87aefeSPatrick Mooney */ 22274c87aefeSPatrick Mooney if (intr_type == VMCS_INTR_T_SWINTR || 22284c87aefeSPatrick Mooney intr_type == VMCS_INTR_T_PRIV_SWEXCEPTION || 22294c87aefeSPatrick Mooney intr_type == VMCS_INTR_T_SWEXCEPTION) { 22304c87aefeSPatrick Mooney vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length); 22314c87aefeSPatrick Mooney } 22324c87aefeSPatrick Mooney } 22334c87aefeSPatrick Mooney 22344c87aefeSPatrick Mooney switch (reason) { 223583b49c54SPatrick Mooney case EXIT_REASON_TRIPLE_FAULT: 223683b49c54SPatrick Mooney (void) vm_suspend(vmx->vm, VM_SUSPEND_TRIPLEFAULT); 223783b49c54SPatrick Mooney handled = HANDLED; 223883b49c54SPatrick Mooney break; 22394c87aefeSPatrick Mooney case EXIT_REASON_TASK_SWITCH: 22404c87aefeSPatrick Mooney ts = &vmexit->u.task_switch; 22414c87aefeSPatrick Mooney ts->tsssel = qual & 0xffff; 22424c87aefeSPatrick Mooney ts->reason = vmx_task_switch_reason(qual); 22434c87aefeSPatrick Mooney ts->ext = 0; 22444c87aefeSPatrick Mooney ts->errcode_valid = 0; 22454c87aefeSPatrick Mooney vmx_paging_info(&ts->paging); 22464c87aefeSPatrick Mooney /* 22474c87aefeSPatrick Mooney * If the task switch was due to a CALL, JMP, IRET, software 22484c87aefeSPatrick Mooney * interrupt (INT n) or software exception (INT3, INTO), 22494c87aefeSPatrick Mooney * then the saved %rip references the instruction that caused 22504c87aefeSPatrick Mooney * the task switch. The instruction length field in the VMCS 22514c87aefeSPatrick Mooney * is valid in this case. 22524c87aefeSPatrick Mooney * 22534c87aefeSPatrick Mooney * In all other cases (e.g., NMI, hardware exception) the 22544c87aefeSPatrick Mooney * saved %rip is one that would have been saved in the old TSS 22554c87aefeSPatrick Mooney * had the task switch completed normally so the instruction 22564c87aefeSPatrick Mooney * length field is not needed in this case and is explicitly 22574c87aefeSPatrick Mooney * set to 0. 22584c87aefeSPatrick Mooney */ 22594c87aefeSPatrick Mooney if (ts->reason == TSR_IDT_GATE) { 22604c87aefeSPatrick Mooney KASSERT(idtvec_info & VMCS_IDT_VEC_VALID, 22619dc804b9SPatrick Mooney ("invalid idtvec_info %x for IDT task switch", 22624c87aefeSPatrick Mooney idtvec_info)); 22634c87aefeSPatrick Mooney intr_type = idtvec_info & VMCS_INTR_T_MASK; 22644c87aefeSPatrick Mooney if (intr_type != VMCS_INTR_T_SWINTR && 22654c87aefeSPatrick Mooney intr_type != VMCS_INTR_T_SWEXCEPTION && 22664c87aefeSPatrick Mooney intr_type != VMCS_INTR_T_PRIV_SWEXCEPTION) { 22674c87aefeSPatrick Mooney /* Task switch triggered by external event */ 22684c87aefeSPatrick Mooney ts->ext = 1; 22694c87aefeSPatrick Mooney vmexit->inst_length = 0; 22704c87aefeSPatrick Mooney if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { 22714c87aefeSPatrick Mooney ts->errcode_valid = 1; 2272*3d097f7dSPatrick Mooney ts->errcode = 2273*3d097f7dSPatrick Mooney vmcs_read(VMCS_IDT_VECTORING_ERROR); 22744c87aefeSPatrick Mooney } 22754c87aefeSPatrick Mooney } 22764c87aefeSPatrick Mooney } 22774c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_TASK_SWITCH; 22784c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, taskswitch, vmx, vcpu, vmexit, ts); 22794c87aefeSPatrick Mooney VCPU_CTR4(vmx->vm, vcpu, "task switch reason %d, tss 0x%04x, " 22804c87aefeSPatrick Mooney "%s errcode 0x%016lx", ts->reason, ts->tsssel, 22814c87aefeSPatrick Mooney ts->ext ? "external" : "internal", 22824c87aefeSPatrick Mooney ((uint64_t)ts->errcode << 32) | ts->errcode_valid); 22834c87aefeSPatrick Mooney break; 2284bf21cd93STycho Nightingale case EXIT_REASON_CR_ACCESS: 2285bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1); 22864c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, craccess, vmx, vcpu, vmexit, qual); 2287bf21cd93STycho Nightingale switch (qual & 0xf) { 2288bf21cd93STycho Nightingale case 0: 2289bf21cd93STycho Nightingale handled = vmx_emulate_cr0_access(vmx, vcpu, qual); 2290bf21cd93STycho Nightingale break; 2291bf21cd93STycho Nightingale case 4: 2292bf21cd93STycho Nightingale handled = vmx_emulate_cr4_access(vmx, vcpu, qual); 2293bf21cd93STycho Nightingale break; 2294bf21cd93STycho Nightingale case 8: 2295bf21cd93STycho Nightingale handled = vmx_emulate_cr8_access(vmx, vcpu, qual); 2296bf21cd93STycho Nightingale break; 2297bf21cd93STycho Nightingale } 2298bf21cd93STycho Nightingale break; 2299bf21cd93STycho Nightingale case EXIT_REASON_RDMSR: 2300bf21cd93STycho Nightingale case EXIT_REASON_WRMSR: 2301d2f938fdSPatrick Mooney handled = vmx_handle_msr(vmx, vcpu, vmexit, 2302d2f938fdSPatrick Mooney reason == EXIT_REASON_WRMSR); 2303bf21cd93STycho Nightingale break; 2304bf21cd93STycho Nightingale case EXIT_REASON_HLT: 2305bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1); 23064c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, halt, vmx, vcpu, vmexit); 2307bf21cd93STycho Nightingale vmexit->exitcode = VM_EXITCODE_HLT; 2308bf21cd93STycho Nightingale vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS); 2309bf21cd93STycho Nightingale break; 2310bf21cd93STycho Nightingale case EXIT_REASON_MTF: 2311bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1); 23124c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, mtrap, vmx, vcpu, vmexit); 2313bf21cd93STycho Nightingale vmexit->exitcode = VM_EXITCODE_MTRAP; 23144c87aefeSPatrick Mooney vmexit->inst_length = 0; 2315bf21cd93STycho Nightingale break; 2316bf21cd93STycho Nightingale case EXIT_REASON_PAUSE: 2317bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1); 23184c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, pause, vmx, vcpu, vmexit); 2319bf21cd93STycho Nightingale vmexit->exitcode = VM_EXITCODE_PAUSE; 2320bf21cd93STycho Nightingale break; 2321bf21cd93STycho Nightingale case EXIT_REASON_INTR_WINDOW: 2322bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INTR_WINDOW, 1); 23234c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, intrwindow, vmx, vcpu, vmexit); 2324bf21cd93STycho Nightingale vmx_clear_int_window_exiting(vmx, vcpu); 2325bf21cd93STycho Nightingale return (1); 2326bf21cd93STycho Nightingale case EXIT_REASON_EXT_INTR: 2327bf21cd93STycho Nightingale /* 2328bf21cd93STycho Nightingale * External interrupts serve only to cause VM exits and allow 2329bf21cd93STycho Nightingale * the host interrupt handler to run. 2330bf21cd93STycho Nightingale * 2331bf21cd93STycho Nightingale * If this external interrupt triggers a virtual interrupt 2332bf21cd93STycho Nightingale * to a VM, then that state will be recorded by the 2333bf21cd93STycho Nightingale * host interrupt handler in the VM's softc. We will inject 2334bf21cd93STycho Nightingale * this virtual interrupt during the subsequent VM enter. 2335bf21cd93STycho Nightingale */ 2336bf21cd93STycho Nightingale intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); 23374c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, interrupt, 23384c87aefeSPatrick Mooney vmx, vcpu, vmexit, intr_info); 2339bf21cd93STycho Nightingale 2340bf21cd93STycho Nightingale /* 2341bf21cd93STycho Nightingale * XXX: Ignore this exit if VMCS_INTR_VALID is not set. 2342bf21cd93STycho Nightingale * This appears to be a bug in VMware Fusion? 2343bf21cd93STycho Nightingale */ 2344bf21cd93STycho Nightingale if (!(intr_info & VMCS_INTR_VALID)) 2345bf21cd93STycho Nightingale return (1); 2346bf21cd93STycho Nightingale KASSERT((intr_info & VMCS_INTR_VALID) != 0 && 2347bf21cd93STycho Nightingale (intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_HWINTR, 23489dc804b9SPatrick Mooney ("VM exit interruption info invalid: %x", intr_info)); 2349bf21cd93STycho Nightingale vmx_trigger_hostintr(intr_info & 0xff); 2350bf21cd93STycho Nightingale 2351bf21cd93STycho Nightingale /* 2352bf21cd93STycho Nightingale * This is special. We want to treat this as an 'handled' 2353bf21cd93STycho Nightingale * VM-exit but not increment the instruction pointer. 2354bf21cd93STycho Nightingale */ 2355bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXTINT, 1); 2356bf21cd93STycho Nightingale return (1); 2357bf21cd93STycho Nightingale case EXIT_REASON_NMI_WINDOW: 23584c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, nmiwindow, vmx, vcpu, vmexit); 2359bf21cd93STycho Nightingale /* Exit to allow the pending virtual NMI to be injected */ 2360bf21cd93STycho Nightingale if (vm_nmi_pending(vmx->vm, vcpu)) 2361bf21cd93STycho Nightingale vmx_inject_nmi(vmx, vcpu); 2362bf21cd93STycho Nightingale vmx_clear_nmi_window_exiting(vmx, vcpu); 2363bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NMI_WINDOW, 1); 2364bf21cd93STycho Nightingale return (1); 2365bf21cd93STycho Nightingale case EXIT_REASON_INOUT: 2366bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1); 2367e0c0d44eSPatrick Mooney vie = vm_vie_ctx(vmx->vm, vcpu); 2368e0c0d44eSPatrick Mooney vmexit_inout(vmexit, vie, qual, (uint32_t)vmxctx->guest_rax); 23694c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpu, vmexit); 2370bf21cd93STycho Nightingale break; 2371bf21cd93STycho Nightingale case EXIT_REASON_CPUID: 2372bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CPUID, 1); 23734c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, cpuid, vmx, vcpu, vmexit); 2374bf21cd93STycho Nightingale handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx); 2375bf21cd93STycho Nightingale break; 2376bf21cd93STycho Nightingale case EXIT_REASON_EXCEPTION: 2377bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXCEPTION, 1); 2378bf21cd93STycho Nightingale intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); 2379bf21cd93STycho Nightingale KASSERT((intr_info & VMCS_INTR_VALID) != 0, 23809dc804b9SPatrick Mooney ("VM exit interruption info invalid: %x", intr_info)); 23814c87aefeSPatrick Mooney 23824c87aefeSPatrick Mooney intr_vec = intr_info & 0xff; 23834c87aefeSPatrick Mooney intr_type = intr_info & VMCS_INTR_T_MASK; 2384bf21cd93STycho Nightingale 2385bf21cd93STycho Nightingale /* 2386bf21cd93STycho Nightingale * If Virtual NMIs control is 1 and the VM-exit is due to a 2387bf21cd93STycho Nightingale * fault encountered during the execution of IRET then we must 2388bf21cd93STycho Nightingale * restore the state of "virtual-NMI blocking" before resuming 2389bf21cd93STycho Nightingale * the guest. 2390bf21cd93STycho Nightingale * 2391bf21cd93STycho Nightingale * See "Resuming Guest Software after Handling an Exception". 23924c87aefeSPatrick Mooney * See "Information for VM Exits Due to Vectored Events". 2393bf21cd93STycho Nightingale */ 2394bf21cd93STycho Nightingale if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 && 23954c87aefeSPatrick Mooney (intr_vec != IDT_DF) && 2396bf21cd93STycho Nightingale (intr_info & EXIT_QUAL_NMIUDTI) != 0) 2397bf21cd93STycho Nightingale vmx_restore_nmi_blocking(vmx, vcpu); 2398bf21cd93STycho Nightingale 2399bf21cd93STycho Nightingale /* 2400bf21cd93STycho Nightingale * The NMI has already been handled in vmx_exit_handle_nmi(). 2401bf21cd93STycho Nightingale */ 24024c87aefeSPatrick Mooney if (intr_type == VMCS_INTR_T_NMI) 2403bf21cd93STycho Nightingale return (1); 24044c87aefeSPatrick Mooney 24054c87aefeSPatrick Mooney /* 24064c87aefeSPatrick Mooney * Call the machine check handler by hand. Also don't reflect 24074c87aefeSPatrick Mooney * the machine check back into the guest. 24084c87aefeSPatrick Mooney */ 24094c87aefeSPatrick Mooney if (intr_vec == IDT_MC) { 24104c87aefeSPatrick Mooney VCPU_CTR0(vmx->vm, vcpu, "Vectoring to MCE handler"); 24114c87aefeSPatrick Mooney vmm_call_trap(T_MCE); 24124c87aefeSPatrick Mooney return (1); 24134c87aefeSPatrick Mooney } 24144c87aefeSPatrick Mooney 2415154972afSPatrick Mooney /* 2416154972afSPatrick Mooney * If the hypervisor has requested user exits for 2417154972afSPatrick Mooney * debug exceptions, bounce them out to userland. 2418154972afSPatrick Mooney */ 24192699b94cSPatrick Mooney if (intr_type == VMCS_INTR_T_SWEXCEPTION && 24202699b94cSPatrick Mooney intr_vec == IDT_BP && 2421154972afSPatrick Mooney (vmx->cap[vcpu].set & (1 << VM_CAP_BPT_EXIT))) { 2422154972afSPatrick Mooney vmexit->exitcode = VM_EXITCODE_BPT; 2423154972afSPatrick Mooney vmexit->u.bpt.inst_length = vmexit->inst_length; 2424154972afSPatrick Mooney vmexit->inst_length = 0; 2425154972afSPatrick Mooney break; 2426154972afSPatrick Mooney } 2427154972afSPatrick Mooney 24284c87aefeSPatrick Mooney if (intr_vec == IDT_PF) { 2429007ca332SPatrick Mooney vmxctx->guest_cr2 = qual; 24304c87aefeSPatrick Mooney } 24314c87aefeSPatrick Mooney 24324c87aefeSPatrick Mooney /* 24334c87aefeSPatrick Mooney * Software exceptions exhibit trap-like behavior. This in 24344c87aefeSPatrick Mooney * turn requires populating the VM-entry instruction length 24354c87aefeSPatrick Mooney * so that the %rip in the trap frame is past the INT3/INTO 24364c87aefeSPatrick Mooney * instruction. 24374c87aefeSPatrick Mooney */ 24384c87aefeSPatrick Mooney if (intr_type == VMCS_INTR_T_SWEXCEPTION) 24394c87aefeSPatrick Mooney vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length); 24404c87aefeSPatrick Mooney 24414c87aefeSPatrick Mooney /* Reflect all other exceptions back into the guest */ 24424c87aefeSPatrick Mooney errcode_valid = errcode = 0; 24434c87aefeSPatrick Mooney if (intr_info & VMCS_INTR_DEL_ERRCODE) { 24444c87aefeSPatrick Mooney errcode_valid = 1; 24454c87aefeSPatrick Mooney errcode = vmcs_read(VMCS_EXIT_INTR_ERRCODE); 24464c87aefeSPatrick Mooney } 24479dc804b9SPatrick Mooney VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%x into " 24484c87aefeSPatrick Mooney "the guest", intr_vec, errcode); 24494c87aefeSPatrick Mooney SDT_PROBE5(vmm, vmx, exit, exception, 24504c87aefeSPatrick Mooney vmx, vcpu, vmexit, intr_vec, errcode); 24514c87aefeSPatrick Mooney error = vm_inject_exception(vmx->vm, vcpu, intr_vec, 24524c87aefeSPatrick Mooney errcode_valid, errcode, 0); 24534c87aefeSPatrick Mooney KASSERT(error == 0, ("%s: vm_inject_exception error %d", 24544c87aefeSPatrick Mooney __func__, error)); 24554c87aefeSPatrick Mooney return (1); 24564c87aefeSPatrick Mooney 2457bf21cd93STycho Nightingale case EXIT_REASON_EPT_FAULT: 24584c87aefeSPatrick Mooney /* 24594c87aefeSPatrick Mooney * If 'gpa' lies within the address space allocated to 24604c87aefeSPatrick Mooney * memory then this must be a nested page fault otherwise 24614c87aefeSPatrick Mooney * this must be an instruction that accesses MMIO space. 24624c87aefeSPatrick Mooney */ 2463*3d097f7dSPatrick Mooney gpa = vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS); 24644c87aefeSPatrick Mooney if (vm_mem_allocated(vmx->vm, vcpu, gpa) || 24654c87aefeSPatrick Mooney apic_access_fault(vmx, vcpu, gpa)) { 24664c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_PAGING; 24674c87aefeSPatrick Mooney vmexit->inst_length = 0; 24684c87aefeSPatrick Mooney vmexit->u.paging.gpa = gpa; 24694c87aefeSPatrick Mooney vmexit->u.paging.fault_type = ept_fault_type(qual); 24704c87aefeSPatrick Mooney vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1); 24714c87aefeSPatrick Mooney SDT_PROBE5(vmm, vmx, exit, nestedfault, 24724c87aefeSPatrick Mooney vmx, vcpu, vmexit, gpa, qual); 24734c87aefeSPatrick Mooney } else if (ept_emulation_fault(qual)) { 2474e0c0d44eSPatrick Mooney vie = vm_vie_ctx(vmx->vm, vcpu); 2475*3d097f7dSPatrick Mooney vmexit_mmio_emul(vmexit, vie, gpa, 2476*3d097f7dSPatrick Mooney vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)); 2477e0c0d44eSPatrick Mooney vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MMIO_EMUL, 1); 24784c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, mmiofault, 24794c87aefeSPatrick Mooney vmx, vcpu, vmexit, gpa); 2480bf21cd93STycho Nightingale } 24814c87aefeSPatrick Mooney /* 24824c87aefeSPatrick Mooney * If Virtual NMIs control is 1 and the VM-exit is due to an 24834c87aefeSPatrick Mooney * EPT fault during the execution of IRET then we must restore 24844c87aefeSPatrick Mooney * the state of "virtual-NMI blocking" before resuming. 24854c87aefeSPatrick Mooney * 24864c87aefeSPatrick Mooney * See description of "NMI unblocking due to IRET" in 24874c87aefeSPatrick Mooney * "Exit Qualification for EPT Violations". 24884c87aefeSPatrick Mooney */ 24894c87aefeSPatrick Mooney if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 && 24904c87aefeSPatrick Mooney (qual & EXIT_QUAL_NMIUDTI) != 0) 24914c87aefeSPatrick Mooney vmx_restore_nmi_blocking(vmx, vcpu); 24924c87aefeSPatrick Mooney break; 24934c87aefeSPatrick Mooney case EXIT_REASON_VIRTUALIZED_EOI: 24944c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_IOAPIC_EOI; 24954c87aefeSPatrick Mooney vmexit->u.ioapic_eoi.vector = qual & 0xFF; 24964c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, eoi, vmx, vcpu, vmexit); 24974c87aefeSPatrick Mooney vmexit->inst_length = 0; /* trap-like */ 24984c87aefeSPatrick Mooney break; 24994c87aefeSPatrick Mooney case EXIT_REASON_APIC_ACCESS: 25004c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, apicaccess, vmx, vcpu, vmexit); 25014c87aefeSPatrick Mooney handled = vmx_handle_apic_access(vmx, vcpu, vmexit); 25024c87aefeSPatrick Mooney break; 25034c87aefeSPatrick Mooney case EXIT_REASON_APIC_WRITE: 25044c87aefeSPatrick Mooney /* 25054c87aefeSPatrick Mooney * APIC-write VM exit is trap-like so the %rip is already 25064c87aefeSPatrick Mooney * pointing to the next instruction. 25074c87aefeSPatrick Mooney */ 25084c87aefeSPatrick Mooney vmexit->inst_length = 0; 25094c87aefeSPatrick Mooney vlapic = vm_lapic(vmx->vm, vcpu); 25104c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, apicwrite, 25114c87aefeSPatrick Mooney vmx, vcpu, vmexit, vlapic); 25124c87aefeSPatrick Mooney handled = vmx_handle_apic_write(vmx, vcpu, vlapic, qual); 25134c87aefeSPatrick Mooney break; 25144c87aefeSPatrick Mooney case EXIT_REASON_XSETBV: 25154c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, xsetbv, vmx, vcpu, vmexit); 25164c87aefeSPatrick Mooney handled = vmx_emulate_xsetbv(vmx, vcpu, vmexit); 25174c87aefeSPatrick Mooney break; 25184c87aefeSPatrick Mooney case EXIT_REASON_MONITOR: 25194c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, monitor, vmx, vcpu, vmexit); 25204c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_MONITOR; 25214c87aefeSPatrick Mooney break; 25224c87aefeSPatrick Mooney case EXIT_REASON_MWAIT: 25234c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, mwait, vmx, vcpu, vmexit); 25244c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_MWAIT; 25254c87aefeSPatrick Mooney break; 2526154972afSPatrick Mooney case EXIT_REASON_TPR: 2527154972afSPatrick Mooney vlapic = vm_lapic(vmx->vm, vcpu); 2528154972afSPatrick Mooney vlapic_sync_tpr(vlapic); 2529154972afSPatrick Mooney vmexit->inst_length = 0; 2530154972afSPatrick Mooney handled = HANDLED; 2531154972afSPatrick Mooney break; 25324c87aefeSPatrick Mooney case EXIT_REASON_VMCALL: 25334c87aefeSPatrick Mooney case EXIT_REASON_VMCLEAR: 25344c87aefeSPatrick Mooney case EXIT_REASON_VMLAUNCH: 25354c87aefeSPatrick Mooney case EXIT_REASON_VMPTRLD: 25364c87aefeSPatrick Mooney case EXIT_REASON_VMPTRST: 25374c87aefeSPatrick Mooney case EXIT_REASON_VMREAD: 25384c87aefeSPatrick Mooney case EXIT_REASON_VMRESUME: 25394c87aefeSPatrick Mooney case EXIT_REASON_VMWRITE: 25404c87aefeSPatrick Mooney case EXIT_REASON_VMXOFF: 25414c87aefeSPatrick Mooney case EXIT_REASON_VMXON: 25424c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, vminsn, vmx, vcpu, vmexit); 25434c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_VMINSN; 2544bf21cd93STycho Nightingale break; 2545bf21cd93STycho Nightingale default: 25464c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, unknown, 25474c87aefeSPatrick Mooney vmx, vcpu, vmexit, reason); 2548bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1); 2549bf21cd93STycho Nightingale break; 2550bf21cd93STycho Nightingale } 2551bf21cd93STycho Nightingale 2552bf21cd93STycho Nightingale if (handled) { 2553bf21cd93STycho Nightingale /* 2554bf21cd93STycho Nightingale * It is possible that control is returned to userland 2555bf21cd93STycho Nightingale * even though we were able to handle the VM exit in the 2556bf21cd93STycho Nightingale * kernel. 2557bf21cd93STycho Nightingale * 2558bf21cd93STycho Nightingale * In such a case we want to make sure that the userland 2559bf21cd93STycho Nightingale * restarts guest execution at the instruction *after* 2560bf21cd93STycho Nightingale * the one we just processed. Therefore we update the 2561bf21cd93STycho Nightingale * guest rip in the VMCS and in 'vmexit'. 2562bf21cd93STycho Nightingale */ 2563bf21cd93STycho Nightingale vmexit->rip += vmexit->inst_length; 2564bf21cd93STycho Nightingale vmexit->inst_length = 0; 25654c87aefeSPatrick Mooney vmcs_write(VMCS_GUEST_RIP, vmexit->rip); 2566bf21cd93STycho Nightingale } else { 2567bf21cd93STycho Nightingale if (vmexit->exitcode == VM_EXITCODE_BOGUS) { 2568bf21cd93STycho Nightingale /* 2569bf21cd93STycho Nightingale * If this VM exit was not claimed by anybody then 2570bf21cd93STycho Nightingale * treat it as a generic VMX exit. 2571bf21cd93STycho Nightingale */ 2572bf21cd93STycho Nightingale vmexit->exitcode = VM_EXITCODE_VMX; 2573bf21cd93STycho Nightingale vmexit->u.vmx.status = VM_SUCCESS; 2574bf21cd93STycho Nightingale vmexit->u.vmx.inst_type = 0; 2575bf21cd93STycho Nightingale vmexit->u.vmx.inst_error = 0; 2576bf21cd93STycho Nightingale } else { 2577bf21cd93STycho Nightingale /* 2578bf21cd93STycho Nightingale * The exitcode and collateral have been populated. 2579bf21cd93STycho Nightingale * The VM exit will be processed further in userland. 2580bf21cd93STycho Nightingale */ 2581bf21cd93STycho Nightingale } 2582bf21cd93STycho Nightingale } 25834c87aefeSPatrick Mooney 25844c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, return, 25854c87aefeSPatrick Mooney vmx, vcpu, vmexit, handled); 25864c87aefeSPatrick Mooney return (handled); 25874c87aefeSPatrick Mooney } 25884c87aefeSPatrick Mooney 25894c87aefeSPatrick Mooney static void 25904c87aefeSPatrick Mooney vmx_exit_inst_error(struct vmxctx *vmxctx, int rc, struct vm_exit *vmexit) 25914c87aefeSPatrick Mooney { 25924c87aefeSPatrick Mooney 25934c87aefeSPatrick Mooney KASSERT(vmxctx->inst_fail_status != VM_SUCCESS, 25944c87aefeSPatrick Mooney ("vmx_exit_inst_error: invalid inst_fail_status %d", 25954c87aefeSPatrick Mooney vmxctx->inst_fail_status)); 25964c87aefeSPatrick Mooney 25974c87aefeSPatrick Mooney vmexit->inst_length = 0; 25984c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_VMX; 25994c87aefeSPatrick Mooney vmexit->u.vmx.status = vmxctx->inst_fail_status; 2600*3d097f7dSPatrick Mooney vmexit->u.vmx.inst_error = vmcs_read(VMCS_INSTRUCTION_ERROR); 26014c87aefeSPatrick Mooney vmexit->u.vmx.exit_reason = ~0; 26024c87aefeSPatrick Mooney vmexit->u.vmx.exit_qualification = ~0; 26034c87aefeSPatrick Mooney 26044c87aefeSPatrick Mooney switch (rc) { 26054c87aefeSPatrick Mooney case VMX_VMRESUME_ERROR: 26064c87aefeSPatrick Mooney case VMX_VMLAUNCH_ERROR: 26074c87aefeSPatrick Mooney case VMX_INVEPT_ERROR: 26084c87aefeSPatrick Mooney case VMX_VMWRITE_ERROR: 26094c87aefeSPatrick Mooney vmexit->u.vmx.inst_type = rc; 26104c87aefeSPatrick Mooney break; 26114c87aefeSPatrick Mooney default: 26124c87aefeSPatrick Mooney panic("vm_exit_inst_error: vmx_enter_guest returned %d", rc); 26134c87aefeSPatrick Mooney } 26144c87aefeSPatrick Mooney } 26154c87aefeSPatrick Mooney 26164c87aefeSPatrick Mooney /* 26174c87aefeSPatrick Mooney * If the NMI-exiting VM execution control is set to '1' then an NMI in 26184c87aefeSPatrick Mooney * non-root operation causes a VM-exit. NMI blocking is in effect so it is 26194c87aefeSPatrick Mooney * sufficient to simply vector to the NMI handler via a software interrupt. 26204c87aefeSPatrick Mooney * However, this must be done before maskable interrupts are enabled 26214c87aefeSPatrick Mooney * otherwise the "iret" issued by an interrupt handler will incorrectly 26224c87aefeSPatrick Mooney * clear NMI blocking. 26234c87aefeSPatrick Mooney */ 26244c87aefeSPatrick Mooney static __inline void 26250153d828SPatrick Mooney vmx_exit_handle_possible_nmi(struct vm_exit *vmexit) 26264c87aefeSPatrick Mooney { 26270153d828SPatrick Mooney ASSERT(!interrupts_enabled()); 26284c87aefeSPatrick Mooney 26290153d828SPatrick Mooney if (vmexit->u.vmx.exit_reason == EXIT_REASON_EXCEPTION) { 26300153d828SPatrick Mooney uint32_t intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); 26310153d828SPatrick Mooney ASSERT(intr_info & VMCS_INTR_VALID); 26324c87aefeSPatrick Mooney 26330153d828SPatrick Mooney if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI) { 26340153d828SPatrick Mooney ASSERT3U(intr_info & 0xff, ==, IDT_NMI); 26350153d828SPatrick Mooney vmm_call_trap(T_NMIFLT); 26360153d828SPatrick Mooney } 26374c87aefeSPatrick Mooney } 26384c87aefeSPatrick Mooney } 26394c87aefeSPatrick Mooney 26404c87aefeSPatrick Mooney static __inline void 26414c87aefeSPatrick Mooney vmx_dr_enter_guest(struct vmxctx *vmxctx) 26424c87aefeSPatrick Mooney { 2643db8733f5SPatrick Mooney uint64_t rflags; 26444c87aefeSPatrick Mooney 26454c87aefeSPatrick Mooney /* Save host control debug registers. */ 26464c87aefeSPatrick Mooney vmxctx->host_dr7 = rdr7(); 26474c87aefeSPatrick Mooney vmxctx->host_debugctl = rdmsr(MSR_DEBUGCTLMSR); 26484c87aefeSPatrick Mooney 26494c87aefeSPatrick Mooney /* 26504c87aefeSPatrick Mooney * Disable debugging in DR7 and DEBUGCTL to avoid triggering 26514c87aefeSPatrick Mooney * exceptions in the host based on the guest DRx values. The 26524c87aefeSPatrick Mooney * guest DR7 and DEBUGCTL are saved/restored in the VMCS. 26534c87aefeSPatrick Mooney */ 26544c87aefeSPatrick Mooney load_dr7(0); 26554c87aefeSPatrick Mooney wrmsr(MSR_DEBUGCTLMSR, 0); 26564c87aefeSPatrick Mooney 26574c87aefeSPatrick Mooney /* 26584c87aefeSPatrick Mooney * Disable single stepping the kernel to avoid corrupting the 26594c87aefeSPatrick Mooney * guest DR6. A debugger might still be able to corrupt the 26604c87aefeSPatrick Mooney * guest DR6 by setting a breakpoint after this point and then 26614c87aefeSPatrick Mooney * single stepping. 26624c87aefeSPatrick Mooney */ 26634c87aefeSPatrick Mooney rflags = read_rflags(); 26644c87aefeSPatrick Mooney vmxctx->host_tf = rflags & PSL_T; 26654c87aefeSPatrick Mooney write_rflags(rflags & ~PSL_T); 26664c87aefeSPatrick Mooney 26674c87aefeSPatrick Mooney /* Save host debug registers. */ 26684c87aefeSPatrick Mooney vmxctx->host_dr0 = rdr0(); 26694c87aefeSPatrick Mooney vmxctx->host_dr1 = rdr1(); 26704c87aefeSPatrick Mooney vmxctx->host_dr2 = rdr2(); 26714c87aefeSPatrick Mooney vmxctx->host_dr3 = rdr3(); 26724c87aefeSPatrick Mooney vmxctx->host_dr6 = rdr6(); 26734c87aefeSPatrick Mooney 26744c87aefeSPatrick Mooney /* Restore guest debug registers. */ 26754c87aefeSPatrick Mooney load_dr0(vmxctx->guest_dr0); 26764c87aefeSPatrick Mooney load_dr1(vmxctx->guest_dr1); 26774c87aefeSPatrick Mooney load_dr2(vmxctx->guest_dr2); 26784c87aefeSPatrick Mooney load_dr3(vmxctx->guest_dr3); 26794c87aefeSPatrick Mooney load_dr6(vmxctx->guest_dr6); 26804c87aefeSPatrick Mooney } 26814c87aefeSPatrick Mooney 26824c87aefeSPatrick Mooney static __inline void 26834c87aefeSPatrick Mooney vmx_dr_leave_guest(struct vmxctx *vmxctx) 26844c87aefeSPatrick Mooney { 26854c87aefeSPatrick Mooney 26864c87aefeSPatrick Mooney /* Save guest debug registers. */ 26874c87aefeSPatrick Mooney vmxctx->guest_dr0 = rdr0(); 26884c87aefeSPatrick Mooney vmxctx->guest_dr1 = rdr1(); 26894c87aefeSPatrick Mooney vmxctx->guest_dr2 = rdr2(); 26904c87aefeSPatrick Mooney vmxctx->guest_dr3 = rdr3(); 26914c87aefeSPatrick Mooney vmxctx->guest_dr6 = rdr6(); 26924c87aefeSPatrick Mooney 26934c87aefeSPatrick Mooney /* 26944c87aefeSPatrick Mooney * Restore host debug registers. Restore DR7, DEBUGCTL, and 26954c87aefeSPatrick Mooney * PSL_T last. 26964c87aefeSPatrick Mooney */ 26974c87aefeSPatrick Mooney load_dr0(vmxctx->host_dr0); 26984c87aefeSPatrick Mooney load_dr1(vmxctx->host_dr1); 26994c87aefeSPatrick Mooney load_dr2(vmxctx->host_dr2); 27004c87aefeSPatrick Mooney load_dr3(vmxctx->host_dr3); 27014c87aefeSPatrick Mooney load_dr6(vmxctx->host_dr6); 27024c87aefeSPatrick Mooney wrmsr(MSR_DEBUGCTLMSR, vmxctx->host_debugctl); 27034c87aefeSPatrick Mooney load_dr7(vmxctx->host_dr7); 27044c87aefeSPatrick Mooney write_rflags(read_rflags() | vmxctx->host_tf); 2705bf21cd93STycho Nightingale } 2706bf21cd93STycho Nightingale 2707bf21cd93STycho Nightingale static int 27080153d828SPatrick Mooney vmx_run(void *arg, int vcpu, uint64_t rip) 2709bf21cd93STycho Nightingale { 27104c87aefeSPatrick Mooney int rc, handled, launched; 2711bf21cd93STycho Nightingale struct vmx *vmx; 2712bf21cd93STycho Nightingale struct vm *vm; 2713bf21cd93STycho Nightingale struct vmxctx *vmxctx; 2714007ca332SPatrick Mooney uintptr_t vmcs_pa; 2715bf21cd93STycho Nightingale struct vm_exit *vmexit; 2716bf21cd93STycho Nightingale struct vlapic *vlapic; 27174c87aefeSPatrick Mooney uint32_t exit_reason; 2718c74a40a5SPatrick Mooney bool tpr_shadow_active; 27190153d828SPatrick Mooney vm_client_t *vmc; 27204c87aefeSPatrick Mooney 2721bf21cd93STycho Nightingale vmx = arg; 2722bf21cd93STycho Nightingale vm = vmx->vm; 2723007ca332SPatrick Mooney vmcs_pa = vmx->vmcs_pa[vcpu]; 2724bf21cd93STycho Nightingale vmxctx = &vmx->ctx[vcpu]; 2725bf21cd93STycho Nightingale vlapic = vm_lapic(vm, vcpu); 27264c87aefeSPatrick Mooney vmexit = vm_exitinfo(vm, vcpu); 27270153d828SPatrick Mooney vmc = vm_get_vmclient(vm, vcpu); 27284c87aefeSPatrick Mooney launched = 0; 2729c74a40a5SPatrick Mooney tpr_shadow_active = vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW) && 2730c74a40a5SPatrick Mooney !vmx_cap_en(vmx, VMX_CAP_APICV) && 2731c74a40a5SPatrick Mooney (vmx->cap[vcpu].proc_ctls & PROCBASED_USE_TPR_SHADOW) != 0; 2732bf21cd93STycho Nightingale 2733bf21cd93STycho Nightingale vmx_msr_guest_enter(vmx, vcpu); 2734bf21cd93STycho Nightingale 2735007ca332SPatrick Mooney vmcs_load(vmcs_pa); 2736bf21cd93STycho Nightingale 27374c87aefeSPatrick Mooney VERIFY(vmx->vmcs_state[vcpu] == VS_NONE && curthread->t_preempt != 0); 27384c87aefeSPatrick Mooney vmx->vmcs_state[vcpu] = VS_LOADED; 27394c87aefeSPatrick Mooney 2740bf21cd93STycho Nightingale /* 2741bf21cd93STycho Nightingale * XXX 2742bf21cd93STycho Nightingale * We do this every time because we may setup the virtual machine 2743bf21cd93STycho Nightingale * from a different process than the one that actually runs it. 2744bf21cd93STycho Nightingale * 2745bf21cd93STycho Nightingale * If the life of a virtual machine was spent entirely in the context 27464c87aefeSPatrick Mooney * of a single process we could do this once in vmx_vminit(). 2747bf21cd93STycho Nightingale */ 2748bf21cd93STycho Nightingale vmcs_write(VMCS_HOST_CR3, rcr3()); 2749bf21cd93STycho Nightingale 2750bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_RIP, rip); 27510153d828SPatrick Mooney vmx_set_pcpu_defaults(vmx, vcpu); 2752bf21cd93STycho Nightingale do { 2753c74a40a5SPatrick Mooney enum event_inject_state inject_state; 27540153d828SPatrick Mooney uint64_t eptgen; 2755c74a40a5SPatrick Mooney 2756*3d097f7dSPatrick Mooney ASSERT3U(vmcs_read(VMCS_GUEST_RIP), ==, rip); 27574c87aefeSPatrick Mooney 27584c87aefeSPatrick Mooney handled = UNHANDLED; 2759c74a40a5SPatrick Mooney 2760c74a40a5SPatrick Mooney /* 2761c74a40a5SPatrick Mooney * Perform initial event/exception/interrupt injection before 2762c74a40a5SPatrick Mooney * host CPU interrupts are disabled. 2763c74a40a5SPatrick Mooney */ 2764c74a40a5SPatrick Mooney inject_state = vmx_inject_events(vmx, vcpu, rip); 2765c74a40a5SPatrick Mooney 27664c87aefeSPatrick Mooney /* 27674c87aefeSPatrick Mooney * Interrupts are disabled from this point on until the 27684c87aefeSPatrick Mooney * guest starts executing. This is done for the following 27694c87aefeSPatrick Mooney * reasons: 27704c87aefeSPatrick Mooney * 27714c87aefeSPatrick Mooney * If an AST is asserted on this thread after the check below, 27724c87aefeSPatrick Mooney * then the IPI_AST notification will not be lost, because it 27734c87aefeSPatrick Mooney * will cause a VM exit due to external interrupt as soon as 27744c87aefeSPatrick Mooney * the guest state is loaded. 27754c87aefeSPatrick Mooney * 2776c74a40a5SPatrick Mooney * A posted interrupt after vmx_inject_vlapic() will not be 2777c74a40a5SPatrick Mooney * "lost" because it will be held pending in the host APIC 2778c74a40a5SPatrick Mooney * because interrupts are disabled. The pending interrupt will 2779c74a40a5SPatrick Mooney * be recognized as soon as the guest state is loaded. 27804c87aefeSPatrick Mooney * 27810153d828SPatrick Mooney * The same reasoning applies to the IPI generated by vmspace 27820153d828SPatrick Mooney * invalidation. 27834c87aefeSPatrick Mooney */ 27844c87aefeSPatrick Mooney disable_intr(); 2785c74a40a5SPatrick Mooney 2786c74a40a5SPatrick Mooney /* 2787c74a40a5SPatrick Mooney * If not precluded by existing events, inject any interrupt 2788c74a40a5SPatrick Mooney * pending on the vLAPIC. As a lock-less operation, it is safe 2789c74a40a5SPatrick Mooney * (and prudent) to perform with host CPU interrupts disabled. 2790c74a40a5SPatrick Mooney */ 2791c74a40a5SPatrick Mooney if (inject_state == EIS_CAN_INJECT) { 2792c74a40a5SPatrick Mooney inject_state = vmx_inject_vlapic(vmx, vcpu, vlapic); 27934c87aefeSPatrick Mooney } 27944c87aefeSPatrick Mooney 27954c87aefeSPatrick Mooney /* 27962606939dSPatrick Mooney * Check for vCPU bail-out conditions. This must be done after 27972606939dSPatrick Mooney * vmx_inject_events() to detect a triple-fault condition. 27984c87aefeSPatrick Mooney */ 27992606939dSPatrick Mooney if (vcpu_entry_bailout_checks(vmx->vm, vcpu, rip)) { 28004c87aefeSPatrick Mooney enable_intr(); 28014c87aefeSPatrick Mooney break; 28024c87aefeSPatrick Mooney } 28034c87aefeSPatrick Mooney 28042606939dSPatrick Mooney if (vcpu_run_state_pending(vm, vcpu)) { 28054c87aefeSPatrick Mooney enable_intr(); 28062606939dSPatrick Mooney vm_exit_run_state(vmx->vm, vcpu, rip); 28074c87aefeSPatrick Mooney break; 28084c87aefeSPatrick Mooney } 28094c87aefeSPatrick Mooney 2810c74a40a5SPatrick Mooney /* 2811c74a40a5SPatrick Mooney * If subsequent activity queued events which require injection 2812c74a40a5SPatrick Mooney * handling, take another lap to handle them. 2813c74a40a5SPatrick Mooney */ 2814c74a40a5SPatrick Mooney if (vmx_inject_recheck(vmx, vcpu, inject_state)) { 2815c74a40a5SPatrick Mooney enable_intr(); 2816c74a40a5SPatrick Mooney handled = HANDLED; 2817c74a40a5SPatrick Mooney continue; 2818c74a40a5SPatrick Mooney } 2819c74a40a5SPatrick Mooney 28204c87aefeSPatrick Mooney if ((rc = smt_acquire()) != 1) { 28214c87aefeSPatrick Mooney enable_intr(); 28224c87aefeSPatrick Mooney vmexit->rip = rip; 28234c87aefeSPatrick Mooney vmexit->inst_length = 0; 28244c87aefeSPatrick Mooney if (rc == -1) { 28254c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_HT; 28264c87aefeSPatrick Mooney } else { 28274c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_BOGUS; 28284c87aefeSPatrick Mooney handled = HANDLED; 2829bf21cd93STycho Nightingale } 2830bf21cd93STycho Nightingale break; 2831bf21cd93STycho Nightingale } 28324c87aefeSPatrick Mooney 28334c87aefeSPatrick Mooney /* 28344c87aefeSPatrick Mooney * If this thread has gone off-cpu due to mutex operations 28354c87aefeSPatrick Mooney * during vmx_run, the VMCS will have been unloaded, forcing a 28364c87aefeSPatrick Mooney * re-VMLAUNCH as opposed to VMRESUME. 28374c87aefeSPatrick Mooney */ 28384c87aefeSPatrick Mooney launched = (vmx->vmcs_state[vcpu] & VS_LAUNCHED) != 0; 28394c87aefeSPatrick Mooney /* 28404c87aefeSPatrick Mooney * Restoration of the GDT limit is taken care of by 28414c87aefeSPatrick Mooney * vmx_savectx(). Since the maximum practical index for the 28424c87aefeSPatrick Mooney * IDT is 255, restoring its limits from the post-VMX-exit 28434c87aefeSPatrick Mooney * default of 0xffff is not a concern. 28444c87aefeSPatrick Mooney * 28454c87aefeSPatrick Mooney * Only 64-bit hypervisor callers are allowed, which forgoes 28464c87aefeSPatrick Mooney * the need to restore any LDT descriptor. Toss an error to 28474c87aefeSPatrick Mooney * anyone attempting to break that rule. 28484c87aefeSPatrick Mooney */ 28494c87aefeSPatrick Mooney if (curproc->p_model != DATAMODEL_LP64) { 28504c87aefeSPatrick Mooney smt_release(); 28514c87aefeSPatrick Mooney enable_intr(); 28524c87aefeSPatrick Mooney bzero(vmexit, sizeof (*vmexit)); 28534c87aefeSPatrick Mooney vmexit->rip = rip; 28544c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_VMX; 28554c87aefeSPatrick Mooney vmexit->u.vmx.status = VM_FAIL_INVALID; 28564c87aefeSPatrick Mooney handled = UNHANDLED; 28574c87aefeSPatrick Mooney break; 28584c87aefeSPatrick Mooney } 28594c87aefeSPatrick Mooney 2860c74a40a5SPatrick Mooney if (tpr_shadow_active) { 2861c74a40a5SPatrick Mooney vmx_tpr_shadow_enter(vlapic); 2862154972afSPatrick Mooney } 2863154972afSPatrick Mooney 28640153d828SPatrick Mooney /* 28650153d828SPatrick Mooney * Indicate activation of vmspace (EPT) table just prior to VMX 28660153d828SPatrick Mooney * entry, checking for the necessity of an invept invalidation. 28670153d828SPatrick Mooney */ 28680153d828SPatrick Mooney eptgen = vmc_table_enter(vmc); 2869d1c02647SPatrick Mooney if (vmx->eptgen[curcpu] != eptgen) { 28700153d828SPatrick Mooney /* 2871d1c02647SPatrick Mooney * VMspace generation does not match what was previously 2872d1c02647SPatrick Mooney * used on this host CPU, so all mappings associated 2873d1c02647SPatrick Mooney * with this EP4TA must be invalidated. 28740153d828SPatrick Mooney */ 28750153d828SPatrick Mooney invept(1, vmx->eptp); 2876d1c02647SPatrick Mooney vmx->eptgen[curcpu] = eptgen; 28770153d828SPatrick Mooney } 28780153d828SPatrick Mooney 287959460b49SPatrick Mooney vcpu_ustate_change(vm, vcpu, VU_RUN); 28804c87aefeSPatrick Mooney vmx_dr_enter_guest(vmxctx); 28810153d828SPatrick Mooney 28820153d828SPatrick Mooney /* Perform VMX entry */ 28834c87aefeSPatrick Mooney rc = vmx_enter_guest(vmxctx, vmx, launched); 28840153d828SPatrick Mooney 28854c87aefeSPatrick Mooney vmx_dr_leave_guest(vmxctx); 288659460b49SPatrick Mooney vcpu_ustate_change(vm, vcpu, VU_EMU_KERN); 28874c87aefeSPatrick Mooney 28884c87aefeSPatrick Mooney vmx->vmcs_state[vcpu] |= VS_LAUNCHED; 28894c87aefeSPatrick Mooney smt_release(); 28904c87aefeSPatrick Mooney 2891c74a40a5SPatrick Mooney if (tpr_shadow_active) { 2892c74a40a5SPatrick Mooney vmx_tpr_shadow_exit(vlapic); 2893c74a40a5SPatrick Mooney } 2894c74a40a5SPatrick Mooney 28954c87aefeSPatrick Mooney /* Collect some information for VM exit processing */ 2896*3d097f7dSPatrick Mooney vmexit->rip = rip = vmcs_read(VMCS_GUEST_RIP); 2897*3d097f7dSPatrick Mooney vmexit->inst_length = vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH); 2898*3d097f7dSPatrick Mooney vmexit->u.vmx.exit_reason = exit_reason = 2899*3d097f7dSPatrick Mooney (vmcs_read(VMCS_EXIT_REASON) & BASIC_EXIT_REASON_MASK); 2900*3d097f7dSPatrick Mooney vmexit->u.vmx.exit_qualification = 2901*3d097f7dSPatrick Mooney vmcs_read(VMCS_EXIT_QUALIFICATION); 2902bf21cd93STycho Nightingale /* Update 'nextrip' */ 2903bf21cd93STycho Nightingale vmx->state[vcpu].nextrip = rip; 2904bf21cd93STycho Nightingale 29054c87aefeSPatrick Mooney if (rc == VMX_GUEST_VMEXIT) { 29060153d828SPatrick Mooney vmx_exit_handle_possible_nmi(vmexit); 29070153d828SPatrick Mooney } 29080153d828SPatrick Mooney enable_intr(); 29090153d828SPatrick Mooney vmc_table_exit(vmc); 29100153d828SPatrick Mooney 29110153d828SPatrick Mooney if (rc == VMX_GUEST_VMEXIT) { 29124c87aefeSPatrick Mooney handled = vmx_exit_process(vmx, vcpu, vmexit); 29134c87aefeSPatrick Mooney } else { 29144c87aefeSPatrick Mooney vmx_exit_inst_error(vmxctx, rc, vmexit); 2915bf21cd93STycho Nightingale } 29162699b94cSPatrick Mooney DTRACE_PROBE3(vmm__vexit, int, vcpu, uint64_t, rip, 29172699b94cSPatrick Mooney uint32_t, exit_reason); 29184c87aefeSPatrick Mooney rip = vmexit->rip; 2919bf21cd93STycho Nightingale } while (handled); 2920bf21cd93STycho Nightingale 29212606939dSPatrick Mooney /* If a VM exit has been handled then the exitcode must be BOGUS */ 29222606939dSPatrick Mooney if (handled && vmexit->exitcode != VM_EXITCODE_BOGUS) { 29232606939dSPatrick Mooney panic("Non-BOGUS exitcode (%d) unexpected for handled VM exit", 29242606939dSPatrick Mooney vmexit->exitcode); 2925bf21cd93STycho Nightingale } 2926bf21cd93STycho Nightingale 29274c87aefeSPatrick Mooney VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d", 2928bf21cd93STycho Nightingale vmexit->exitcode); 2929bf21cd93STycho Nightingale 2930007ca332SPatrick Mooney vmcs_clear(vmcs_pa); 2931bf21cd93STycho Nightingale vmx_msr_guest_exit(vmx, vcpu); 2932bf21cd93STycho Nightingale 29334c87aefeSPatrick Mooney VERIFY(vmx->vmcs_state != VS_NONE && curthread->t_preempt != 0); 29344c87aefeSPatrick Mooney vmx->vmcs_state[vcpu] = VS_NONE; 2935bf21cd93STycho Nightingale 29364c87aefeSPatrick Mooney return (0); 2937bf21cd93STycho Nightingale } 2938bf21cd93STycho Nightingale 2939bf21cd93STycho Nightingale static void 2940bf21cd93STycho Nightingale vmx_vmcleanup(void *arg) 2941bf21cd93STycho Nightingale { 29424c87aefeSPatrick Mooney int i; 2943bf21cd93STycho Nightingale struct vmx *vmx = arg; 29444c87aefeSPatrick Mooney uint16_t maxcpus; 2945bf21cd93STycho Nightingale 29466b641d7aSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV)) { 2947e0994bd2SPatrick Mooney (void) vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE); 29486b641d7aSPatrick Mooney kmem_free(vmx->apic_access_page, PAGESIZE); 29496b641d7aSPatrick Mooney } else { 29506b641d7aSPatrick Mooney VERIFY3P(vmx->apic_access_page, ==, NULL); 29516b641d7aSPatrick Mooney } 29526b641d7aSPatrick Mooney 29536b641d7aSPatrick Mooney vmx_msr_bitmap_destroy(vmx); 2954bf21cd93STycho Nightingale 29554c87aefeSPatrick Mooney maxcpus = vm_get_maxcpus(vmx->vm); 29564c87aefeSPatrick Mooney for (i = 0; i < maxcpus; i++) 29574c87aefeSPatrick Mooney vpid_free(vmx->state[i].vpid); 2958bf21cd93STycho Nightingale 2959bf21cd93STycho Nightingale free(vmx, M_VMX); 2960bf21cd93STycho Nightingale } 2961bf21cd93STycho Nightingale 2962db8733f5SPatrick Mooney static uint64_t * 2963bf21cd93STycho Nightingale vmxctx_regptr(struct vmxctx *vmxctx, int reg) 2964bf21cd93STycho Nightingale { 2965bf21cd93STycho Nightingale switch (reg) { 2966bf21cd93STycho Nightingale case VM_REG_GUEST_RAX: 2967bf21cd93STycho Nightingale return (&vmxctx->guest_rax); 2968bf21cd93STycho Nightingale case VM_REG_GUEST_RBX: 2969bf21cd93STycho Nightingale return (&vmxctx->guest_rbx); 2970bf21cd93STycho Nightingale case VM_REG_GUEST_RCX: 2971bf21cd93STycho Nightingale return (&vmxctx->guest_rcx); 2972bf21cd93STycho Nightingale case VM_REG_GUEST_RDX: 2973bf21cd93STycho Nightingale return (&vmxctx->guest_rdx); 2974bf21cd93STycho Nightingale case VM_REG_GUEST_RSI: 2975bf21cd93STycho Nightingale return (&vmxctx->guest_rsi); 2976bf21cd93STycho Nightingale case VM_REG_GUEST_RDI: 2977bf21cd93STycho Nightingale return (&vmxctx->guest_rdi); 2978bf21cd93STycho Nightingale case VM_REG_GUEST_RBP: 2979bf21cd93STycho Nightingale return (&vmxctx->guest_rbp); 2980bf21cd93STycho Nightingale case VM_REG_GUEST_R8: 2981bf21cd93STycho Nightingale return (&vmxctx->guest_r8); 2982bf21cd93STycho Nightingale case VM_REG_GUEST_R9: 2983bf21cd93STycho Nightingale return (&vmxctx->guest_r9); 2984bf21cd93STycho Nightingale case VM_REG_GUEST_R10: 2985bf21cd93STycho Nightingale return (&vmxctx->guest_r10); 2986bf21cd93STycho Nightingale case VM_REG_GUEST_R11: 2987bf21cd93STycho Nightingale return (&vmxctx->guest_r11); 2988bf21cd93STycho Nightingale case VM_REG_GUEST_R12: 2989bf21cd93STycho Nightingale return (&vmxctx->guest_r12); 2990bf21cd93STycho Nightingale case VM_REG_GUEST_R13: 2991bf21cd93STycho Nightingale return (&vmxctx->guest_r13); 2992bf21cd93STycho Nightingale case VM_REG_GUEST_R14: 2993bf21cd93STycho Nightingale return (&vmxctx->guest_r14); 2994bf21cd93STycho Nightingale case VM_REG_GUEST_R15: 2995bf21cd93STycho Nightingale return (&vmxctx->guest_r15); 2996bf21cd93STycho Nightingale case VM_REG_GUEST_CR2: 2997bf21cd93STycho Nightingale return (&vmxctx->guest_cr2); 29984c87aefeSPatrick Mooney case VM_REG_GUEST_DR0: 29994c87aefeSPatrick Mooney return (&vmxctx->guest_dr0); 30004c87aefeSPatrick Mooney case VM_REG_GUEST_DR1: 30014c87aefeSPatrick Mooney return (&vmxctx->guest_dr1); 30024c87aefeSPatrick Mooney case VM_REG_GUEST_DR2: 30034c87aefeSPatrick Mooney return (&vmxctx->guest_dr2); 30044c87aefeSPatrick Mooney case VM_REG_GUEST_DR3: 30054c87aefeSPatrick Mooney return (&vmxctx->guest_dr3); 30064c87aefeSPatrick Mooney case VM_REG_GUEST_DR6: 30074c87aefeSPatrick Mooney return (&vmxctx->guest_dr6); 3008bf21cd93STycho Nightingale default: 3009bf21cd93STycho Nightingale break; 3010bf21cd93STycho Nightingale } 3011bf21cd93STycho Nightingale return (NULL); 3012bf21cd93STycho Nightingale } 3013bf21cd93STycho Nightingale 3014bf21cd93STycho Nightingale static int 3015007ca332SPatrick Mooney vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) 3016bf21cd93STycho Nightingale { 3017007ca332SPatrick Mooney int running, hostcpu, err; 3018007ca332SPatrick Mooney struct vmx *vmx = arg; 3019db8733f5SPatrick Mooney uint64_t *regp; 3020bf21cd93STycho Nightingale 3021007ca332SPatrick Mooney running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 3022007ca332SPatrick Mooney if (running && hostcpu != curcpu) 3023007ca332SPatrick Mooney panic("vmx_getreg: %s%d is running", vm_name(vmx->vm), vcpu); 3024bf21cd93STycho Nightingale 3025007ca332SPatrick Mooney /* VMCS access not required for ctx reads */ 3026007ca332SPatrick Mooney if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { 3027007ca332SPatrick Mooney *retval = *regp; 3028bf21cd93STycho Nightingale return (0); 30294c87aefeSPatrick Mooney } 30304c87aefeSPatrick Mooney 3031007ca332SPatrick Mooney if (!running) { 3032007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpu]); 30334c87aefeSPatrick Mooney } 3034bf21cd93STycho Nightingale 3035bf0dcd3fSPatrick Mooney err = 0; 3036007ca332SPatrick Mooney if (reg == VM_REG_GUEST_INTR_SHADOW) { 3037007ca332SPatrick Mooney uint64_t gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 3038007ca332SPatrick Mooney *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; 3039007ca332SPatrick Mooney } else { 3040007ca332SPatrick Mooney uint32_t encoding; 3041bf21cd93STycho Nightingale 3042007ca332SPatrick Mooney encoding = vmcs_field_encoding(reg); 3043bf0dcd3fSPatrick Mooney switch (encoding) { 3044bf0dcd3fSPatrick Mooney case VMCS_GUEST_CR0: 3045bf0dcd3fSPatrick Mooney /* Take the shadow bits into account */ 3046bf0dcd3fSPatrick Mooney *retval = vmx_unshadow_cr0(vmcs_read(encoding), 3047bf0dcd3fSPatrick Mooney vmcs_read(VMCS_CR0_SHADOW)); 3048bf0dcd3fSPatrick Mooney break; 3049bf0dcd3fSPatrick Mooney case VMCS_GUEST_CR4: 3050bf0dcd3fSPatrick Mooney /* Take the shadow bits into account */ 3051bf0dcd3fSPatrick Mooney *retval = vmx_unshadow_cr4(vmcs_read(encoding), 3052bf0dcd3fSPatrick Mooney vmcs_read(VMCS_CR4_SHADOW)); 3053bf0dcd3fSPatrick Mooney break; 3054bf0dcd3fSPatrick Mooney case VMCS_INVALID_ENCODING: 3055bf0dcd3fSPatrick Mooney err = EINVAL; 3056bf0dcd3fSPatrick Mooney break; 3057bf0dcd3fSPatrick Mooney default: 3058007ca332SPatrick Mooney *retval = vmcs_read(encoding); 3059bf0dcd3fSPatrick Mooney break; 3060007ca332SPatrick Mooney } 3061bf21cd93STycho Nightingale } 3062bf21cd93STycho Nightingale 3063007ca332SPatrick Mooney if (!running) { 3064007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpu]); 3065007ca332SPatrick Mooney } 3066bf21cd93STycho Nightingale 3067007ca332SPatrick Mooney return (err); 3068bf21cd93STycho Nightingale } 3069bf21cd93STycho Nightingale 3070bf21cd93STycho Nightingale static int 3071bf21cd93STycho Nightingale vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) 3072bf21cd93STycho Nightingale { 3073007ca332SPatrick Mooney int running, hostcpu, error; 3074bf21cd93STycho Nightingale struct vmx *vmx = arg; 3075db8733f5SPatrick Mooney uint64_t *regp; 3076bf21cd93STycho Nightingale 3077bf21cd93STycho Nightingale running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 3078bf21cd93STycho Nightingale if (running && hostcpu != curcpu) 3079bf21cd93STycho Nightingale panic("vmx_setreg: %s%d is running", vm_name(vmx->vm), vcpu); 3080bf21cd93STycho Nightingale 3081007ca332SPatrick Mooney /* VMCS access not required for ctx writes */ 3082007ca332SPatrick Mooney if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { 3083007ca332SPatrick Mooney *regp = val; 3084bf21cd93STycho Nightingale return (0); 3085007ca332SPatrick Mooney } 3086bf21cd93STycho Nightingale 3087007ca332SPatrick Mooney if (!running) { 3088007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpu]); 3089007ca332SPatrick Mooney } 3090bf21cd93STycho Nightingale 3091007ca332SPatrick Mooney if (reg == VM_REG_GUEST_INTR_SHADOW) { 3092007ca332SPatrick Mooney if (val != 0) { 3093bf21cd93STycho Nightingale /* 3094007ca332SPatrick Mooney * Forcing the vcpu into an interrupt shadow is not 3095007ca332SPatrick Mooney * presently supported. 30964c87aefeSPatrick Mooney */ 3097007ca332SPatrick Mooney error = EINVAL; 3098007ca332SPatrick Mooney } else { 3099007ca332SPatrick Mooney uint64_t gi; 3100007ca332SPatrick Mooney 3101007ca332SPatrick Mooney gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 3102007ca332SPatrick Mooney gi &= ~HWINTR_BLOCKING; 3103007ca332SPatrick Mooney vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); 3104007ca332SPatrick Mooney error = 0; 3105bf21cd93STycho Nightingale } 3106007ca332SPatrick Mooney } else { 3107007ca332SPatrick Mooney uint32_t encoding; 31084c87aefeSPatrick Mooney 3109007ca332SPatrick Mooney error = 0; 3110007ca332SPatrick Mooney encoding = vmcs_field_encoding(reg); 3111007ca332SPatrick Mooney switch (encoding) { 3112007ca332SPatrick Mooney case VMCS_GUEST_IA32_EFER: 3113007ca332SPatrick Mooney /* 3114007ca332SPatrick Mooney * If the "load EFER" VM-entry control is 1 then the 3115007ca332SPatrick Mooney * value of EFER.LMA must be identical to "IA-32e mode 3116007ca332SPatrick Mooney * guest" bit in the VM-entry control. 3117007ca332SPatrick Mooney */ 3118007ca332SPatrick Mooney if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0) { 3119007ca332SPatrick Mooney uint64_t ctls; 3120007ca332SPatrick Mooney 3121007ca332SPatrick Mooney ctls = vmcs_read(VMCS_ENTRY_CTLS); 3122007ca332SPatrick Mooney if (val & EFER_LMA) { 3123007ca332SPatrick Mooney ctls |= VM_ENTRY_GUEST_LMA; 3124007ca332SPatrick Mooney } else { 3125007ca332SPatrick Mooney ctls &= ~VM_ENTRY_GUEST_LMA; 3126007ca332SPatrick Mooney } 3127007ca332SPatrick Mooney vmcs_write(VMCS_ENTRY_CTLS, ctls); 3128007ca332SPatrick Mooney } 3129007ca332SPatrick Mooney vmcs_write(encoding, val); 3130007ca332SPatrick Mooney break; 3131007ca332SPatrick Mooney case VMCS_GUEST_CR0: 3132007ca332SPatrick Mooney /* 3133007ca332SPatrick Mooney * The guest is not allowed to modify certain bits in 3134007ca332SPatrick Mooney * %cr0 and %cr4. To maintain the illusion of full 3135007ca332SPatrick Mooney * control, they have shadow versions which contain the 3136007ca332SPatrick Mooney * guest-perceived (via reads from the register) values 3137007ca332SPatrick Mooney * as opposed to the guest-effective values. 3138007ca332SPatrick Mooney * 3139007ca332SPatrick Mooney * This is detailed in the SDM: Vol. 3 Ch. 24.6.6. 3140007ca332SPatrick Mooney */ 3141007ca332SPatrick Mooney vmcs_write(VMCS_CR0_SHADOW, val); 3142007ca332SPatrick Mooney vmcs_write(encoding, vmx_fix_cr0(val)); 3143007ca332SPatrick Mooney break; 3144007ca332SPatrick Mooney case VMCS_GUEST_CR4: 3145007ca332SPatrick Mooney /* See above for detail on %cr4 shadowing */ 3146007ca332SPatrick Mooney vmcs_write(VMCS_CR4_SHADOW, val); 3147007ca332SPatrick Mooney vmcs_write(encoding, vmx_fix_cr4(val)); 3148007ca332SPatrick Mooney break; 3149007ca332SPatrick Mooney case VMCS_GUEST_CR3: 3150007ca332SPatrick Mooney vmcs_write(encoding, val); 31514c87aefeSPatrick Mooney /* 31524c87aefeSPatrick Mooney * Invalidate the guest vcpu's TLB mappings to emulate 31534c87aefeSPatrick Mooney * the behavior of updating %cr3. 31544c87aefeSPatrick Mooney * 31554c87aefeSPatrick Mooney * XXX the processor retains global mappings when %cr3 31564c87aefeSPatrick Mooney * is updated but vmx_invvpid() does not. 31574c87aefeSPatrick Mooney */ 31580153d828SPatrick Mooney vmx_invvpid(vmx, vcpu, running); 3159007ca332SPatrick Mooney break; 3160007ca332SPatrick Mooney case VMCS_INVALID_ENCODING: 3161007ca332SPatrick Mooney error = EINVAL; 3162007ca332SPatrick Mooney break; 3163007ca332SPatrick Mooney default: 3164007ca332SPatrick Mooney vmcs_write(encoding, val); 3165007ca332SPatrick Mooney break; 31664c87aefeSPatrick Mooney } 3167bf21cd93STycho Nightingale } 3168bf21cd93STycho Nightingale 3169007ca332SPatrick Mooney if (!running) { 3170007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpu]); 3171007ca332SPatrick Mooney } 3172007ca332SPatrick Mooney 3173bf21cd93STycho Nightingale return (error); 3174bf21cd93STycho Nightingale } 3175bf21cd93STycho Nightingale 3176bf21cd93STycho Nightingale static int 3177007ca332SPatrick Mooney vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) 3178bf21cd93STycho Nightingale { 3179bf21cd93STycho Nightingale int hostcpu, running; 3180bf21cd93STycho Nightingale struct vmx *vmx = arg; 3181007ca332SPatrick Mooney uint32_t base, limit, access; 3182bf21cd93STycho Nightingale 3183bf21cd93STycho Nightingale running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 3184bf21cd93STycho Nightingale if (running && hostcpu != curcpu) 3185bf21cd93STycho Nightingale panic("vmx_getdesc: %s%d is running", vm_name(vmx->vm), vcpu); 3186bf21cd93STycho Nightingale 3187007ca332SPatrick Mooney if (!running) { 3188007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpu]); 3189007ca332SPatrick Mooney } 3190007ca332SPatrick Mooney 3191007ca332SPatrick Mooney vmcs_seg_desc_encoding(seg, &base, &limit, &access); 3192007ca332SPatrick Mooney desc->base = vmcs_read(base); 3193007ca332SPatrick Mooney desc->limit = vmcs_read(limit); 3194007ca332SPatrick Mooney if (access != VMCS_INVALID_ENCODING) { 3195007ca332SPatrick Mooney desc->access = vmcs_read(access); 3196007ca332SPatrick Mooney } else { 3197007ca332SPatrick Mooney desc->access = 0; 3198007ca332SPatrick Mooney } 3199007ca332SPatrick Mooney 3200007ca332SPatrick Mooney if (!running) { 3201007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpu]); 3202007ca332SPatrick Mooney } 3203007ca332SPatrick Mooney return (0); 3204bf21cd93STycho Nightingale } 3205bf21cd93STycho Nightingale 3206bf21cd93STycho Nightingale static int 32072606939dSPatrick Mooney vmx_setdesc(void *arg, int vcpu, int seg, const struct seg_desc *desc) 3208bf21cd93STycho Nightingale { 3209bf21cd93STycho Nightingale int hostcpu, running; 3210bf21cd93STycho Nightingale struct vmx *vmx = arg; 3211007ca332SPatrick Mooney uint32_t base, limit, access; 3212bf21cd93STycho Nightingale 3213bf21cd93STycho Nightingale running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 3214bf21cd93STycho Nightingale if (running && hostcpu != curcpu) 3215bf21cd93STycho Nightingale panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu); 3216bf21cd93STycho Nightingale 3217007ca332SPatrick Mooney if (!running) { 3218007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpu]); 3219007ca332SPatrick Mooney } 3220007ca332SPatrick Mooney 3221007ca332SPatrick Mooney vmcs_seg_desc_encoding(seg, &base, &limit, &access); 3222007ca332SPatrick Mooney vmcs_write(base, desc->base); 3223007ca332SPatrick Mooney vmcs_write(limit, desc->limit); 3224007ca332SPatrick Mooney if (access != VMCS_INVALID_ENCODING) { 3225007ca332SPatrick Mooney vmcs_write(access, desc->access); 3226007ca332SPatrick Mooney } 3227007ca332SPatrick Mooney 3228007ca332SPatrick Mooney if (!running) { 3229007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpu]); 3230007ca332SPatrick Mooney } 3231007ca332SPatrick Mooney return (0); 3232bf21cd93STycho Nightingale } 3233bf21cd93STycho Nightingale 3234bf21cd93STycho Nightingale static int 3235bf21cd93STycho Nightingale vmx_getcap(void *arg, int vcpu, int type, int *retval) 3236bf21cd93STycho Nightingale { 3237bf21cd93STycho Nightingale struct vmx *vmx = arg; 3238bf21cd93STycho Nightingale int vcap; 3239bf21cd93STycho Nightingale int ret; 3240bf21cd93STycho Nightingale 3241bf21cd93STycho Nightingale ret = ENOENT; 3242bf21cd93STycho Nightingale 3243bf21cd93STycho Nightingale vcap = vmx->cap[vcpu].set; 3244bf21cd93STycho Nightingale 3245bf21cd93STycho Nightingale switch (type) { 3246bf21cd93STycho Nightingale case VM_CAP_HALT_EXIT: 3247bf21cd93STycho Nightingale if (cap_halt_exit) 3248bf21cd93STycho Nightingale ret = 0; 3249bf21cd93STycho Nightingale break; 3250bf21cd93STycho Nightingale case VM_CAP_PAUSE_EXIT: 3251bf21cd93STycho Nightingale if (cap_pause_exit) 3252bf21cd93STycho Nightingale ret = 0; 3253bf21cd93STycho Nightingale break; 3254bf21cd93STycho Nightingale case VM_CAP_MTRAP_EXIT: 3255bf21cd93STycho Nightingale if (cap_monitor_trap) 3256bf21cd93STycho Nightingale ret = 0; 3257bf21cd93STycho Nightingale break; 32584c87aefeSPatrick Mooney case VM_CAP_ENABLE_INVPCID: 32594c87aefeSPatrick Mooney if (cap_invpcid) 32604c87aefeSPatrick Mooney ret = 0; 32614c87aefeSPatrick Mooney break; 3262154972afSPatrick Mooney case VM_CAP_BPT_EXIT: 3263154972afSPatrick Mooney ret = 0; 3264154972afSPatrick Mooney break; 3265bf21cd93STycho Nightingale default: 3266bf21cd93STycho Nightingale break; 3267bf21cd93STycho Nightingale } 3268bf21cd93STycho Nightingale 3269bf21cd93STycho Nightingale if (ret == 0) 3270bf21cd93STycho Nightingale *retval = (vcap & (1 << type)) ? 1 : 0; 3271bf21cd93STycho Nightingale 3272bf21cd93STycho Nightingale return (ret); 3273bf21cd93STycho Nightingale } 3274bf21cd93STycho Nightingale 3275bf21cd93STycho Nightingale static int 3276bf21cd93STycho Nightingale vmx_setcap(void *arg, int vcpu, int type, int val) 3277bf21cd93STycho Nightingale { 3278bf21cd93STycho Nightingale struct vmx *vmx = arg; 3279007ca332SPatrick Mooney uint32_t baseval, reg, flag; 3280bf21cd93STycho Nightingale uint32_t *pptr; 3281bf21cd93STycho Nightingale int error; 3282bf21cd93STycho Nightingale 3283007ca332SPatrick Mooney error = ENOENT; 3284bf21cd93STycho Nightingale pptr = NULL; 3285bf21cd93STycho Nightingale 3286bf21cd93STycho Nightingale switch (type) { 3287bf21cd93STycho Nightingale case VM_CAP_HALT_EXIT: 3288bf21cd93STycho Nightingale if (cap_halt_exit) { 3289007ca332SPatrick Mooney error = 0; 3290bf21cd93STycho Nightingale pptr = &vmx->cap[vcpu].proc_ctls; 3291bf21cd93STycho Nightingale baseval = *pptr; 3292bf21cd93STycho Nightingale flag = PROCBASED_HLT_EXITING; 3293bf21cd93STycho Nightingale reg = VMCS_PRI_PROC_BASED_CTLS; 3294bf21cd93STycho Nightingale } 3295bf21cd93STycho Nightingale break; 3296bf21cd93STycho Nightingale case VM_CAP_MTRAP_EXIT: 3297bf21cd93STycho Nightingale if (cap_monitor_trap) { 3298007ca332SPatrick Mooney error = 0; 3299bf21cd93STycho Nightingale pptr = &vmx->cap[vcpu].proc_ctls; 3300bf21cd93STycho Nightingale baseval = *pptr; 3301bf21cd93STycho Nightingale flag = PROCBASED_MTF; 3302bf21cd93STycho Nightingale reg = VMCS_PRI_PROC_BASED_CTLS; 3303bf21cd93STycho Nightingale } 3304bf21cd93STycho Nightingale break; 3305bf21cd93STycho Nightingale case VM_CAP_PAUSE_EXIT: 3306bf21cd93STycho Nightingale if (cap_pause_exit) { 3307007ca332SPatrick Mooney error = 0; 3308bf21cd93STycho Nightingale pptr = &vmx->cap[vcpu].proc_ctls; 3309bf21cd93STycho Nightingale baseval = *pptr; 3310bf21cd93STycho Nightingale flag = PROCBASED_PAUSE_EXITING; 3311bf21cd93STycho Nightingale reg = VMCS_PRI_PROC_BASED_CTLS; 3312bf21cd93STycho Nightingale } 3313bf21cd93STycho Nightingale break; 33144c87aefeSPatrick Mooney case VM_CAP_ENABLE_INVPCID: 33154c87aefeSPatrick Mooney if (cap_invpcid) { 3316007ca332SPatrick Mooney error = 0; 33174c87aefeSPatrick Mooney pptr = &vmx->cap[vcpu].proc_ctls2; 33184c87aefeSPatrick Mooney baseval = *pptr; 33194c87aefeSPatrick Mooney flag = PROCBASED2_ENABLE_INVPCID; 33204c87aefeSPatrick Mooney reg = VMCS_SEC_PROC_BASED_CTLS; 33214c87aefeSPatrick Mooney } 33224c87aefeSPatrick Mooney break; 3323154972afSPatrick Mooney case VM_CAP_BPT_EXIT: 3324007ca332SPatrick Mooney error = 0; 3325154972afSPatrick Mooney 3326154972afSPatrick Mooney /* Don't change the bitmap if we are tracing all exceptions. */ 3327154972afSPatrick Mooney if (vmx->cap[vcpu].exc_bitmap != 0xffffffff) { 3328154972afSPatrick Mooney pptr = &vmx->cap[vcpu].exc_bitmap; 3329154972afSPatrick Mooney baseval = *pptr; 3330154972afSPatrick Mooney flag = (1 << IDT_BP); 3331154972afSPatrick Mooney reg = VMCS_EXCEPTION_BITMAP; 3332154972afSPatrick Mooney } 3333154972afSPatrick Mooney break; 3334bf21cd93STycho Nightingale default: 3335bf21cd93STycho Nightingale break; 3336bf21cd93STycho Nightingale } 3337bf21cd93STycho Nightingale 3338007ca332SPatrick Mooney if (error != 0) { 3339007ca332SPatrick Mooney return (error); 3340007ca332SPatrick Mooney } 3341154972afSPatrick Mooney 3342154972afSPatrick Mooney if (pptr != NULL) { 3343bf21cd93STycho Nightingale if (val) { 3344bf21cd93STycho Nightingale baseval |= flag; 3345bf21cd93STycho Nightingale } else { 3346bf21cd93STycho Nightingale baseval &= ~flag; 3347bf21cd93STycho Nightingale } 3348007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpu]); 3349007ca332SPatrick Mooney vmcs_write(reg, baseval); 3350007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpu]); 3351bf21cd93STycho Nightingale 3352154972afSPatrick Mooney /* 3353154972afSPatrick Mooney * Update optional stored flags, and record 3354154972afSPatrick Mooney * setting 3355154972afSPatrick Mooney */ 3356154972afSPatrick Mooney *pptr = baseval; 3357bf21cd93STycho Nightingale } 3358bf21cd93STycho Nightingale 3359154972afSPatrick Mooney if (val) { 3360154972afSPatrick Mooney vmx->cap[vcpu].set |= (1 << type); 3361154972afSPatrick Mooney } else { 3362154972afSPatrick Mooney vmx->cap[vcpu].set &= ~(1 << type); 3363154972afSPatrick Mooney } 3364154972afSPatrick Mooney 3365154972afSPatrick Mooney return (0); 3366bf21cd93STycho Nightingale } 3367bf21cd93STycho Nightingale 3368bf21cd93STycho Nightingale struct vlapic_vtx { 3369bf21cd93STycho Nightingale struct vlapic vlapic; 3370c74a40a5SPatrick Mooney 3371c74a40a5SPatrick Mooney /* Align to the nearest cacheline */ 3372c74a40a5SPatrick Mooney uint8_t _pad[64 - (sizeof (struct vlapic) % 64)]; 3373c74a40a5SPatrick Mooney 3374c74a40a5SPatrick Mooney /* TMR handling state for posted interrupts */ 3375c74a40a5SPatrick Mooney uint32_t tmr_active[8]; 3376c74a40a5SPatrick Mooney uint32_t pending_level[8]; 3377c74a40a5SPatrick Mooney uint32_t pending_edge[8]; 3378c74a40a5SPatrick Mooney 3379bf21cd93STycho Nightingale struct pir_desc *pir_desc; 3380bf21cd93STycho Nightingale struct vmx *vmx; 33812699b94cSPatrick Mooney uint_t pending_prio; 3382c74a40a5SPatrick Mooney boolean_t tmr_sync; 3383bf21cd93STycho Nightingale }; 3384bf21cd93STycho Nightingale 33852699b94cSPatrick Mooney CTASSERT((offsetof(struct vlapic_vtx, tmr_active) & 63) == 0); 33864c87aefeSPatrick Mooney 33872699b94cSPatrick Mooney #define VPR_PRIO_BIT(vpr) (1 << ((vpr) >> 4)) 3388bf21cd93STycho Nightingale 3389c74a40a5SPatrick Mooney static vcpu_notify_t 3390c74a40a5SPatrick Mooney vmx_apicv_set_ready(struct vlapic *vlapic, int vector, bool level) 3391bf21cd93STycho Nightingale { 3392bf21cd93STycho Nightingale struct vlapic_vtx *vlapic_vtx; 3393bf21cd93STycho Nightingale struct pir_desc *pir_desc; 3394c74a40a5SPatrick Mooney uint32_t mask, tmrval; 3395c74a40a5SPatrick Mooney int idx; 3396c74a40a5SPatrick Mooney vcpu_notify_t notify = VCPU_NOTIFY_NONE; 3397bf21cd93STycho Nightingale 3398bf21cd93STycho Nightingale vlapic_vtx = (struct vlapic_vtx *)vlapic; 3399bf21cd93STycho Nightingale pir_desc = vlapic_vtx->pir_desc; 3400c74a40a5SPatrick Mooney idx = vector / 32; 3401c74a40a5SPatrick Mooney mask = 1UL << (vector % 32); 3402bf21cd93STycho Nightingale 3403bf21cd93STycho Nightingale /* 3404c74a40a5SPatrick Mooney * If the currently asserted TMRs do not match the state requested by 3405c74a40a5SPatrick Mooney * the incoming interrupt, an exit will be required to reconcile those 3406c74a40a5SPatrick Mooney * bits in the APIC page. This will keep the vLAPIC behavior in line 3407c74a40a5SPatrick Mooney * with the architecturally defined expectations. 3408c74a40a5SPatrick Mooney * 3409c74a40a5SPatrick Mooney * If actors of mixed types (edge and level) are racing against the same 3410c74a40a5SPatrick Mooney * vector (toggling its TMR bit back and forth), the results could 3411c74a40a5SPatrick Mooney * inconsistent. Such circumstances are considered a rare edge case and 3412c74a40a5SPatrick Mooney * are never expected to be found in the wild. 3413bf21cd93STycho Nightingale */ 3414c74a40a5SPatrick Mooney tmrval = atomic_load_acq_int(&vlapic_vtx->tmr_active[idx]); 3415c74a40a5SPatrick Mooney if (!level) { 3416c74a40a5SPatrick Mooney if ((tmrval & mask) != 0) { 3417c74a40a5SPatrick Mooney /* Edge-triggered interrupt needs TMR de-asserted */ 3418c74a40a5SPatrick Mooney atomic_set_int(&vlapic_vtx->pending_edge[idx], mask); 3419c74a40a5SPatrick Mooney atomic_store_rel_long(&pir_desc->pending, 1); 3420c74a40a5SPatrick Mooney return (VCPU_NOTIFY_EXIT); 3421c74a40a5SPatrick Mooney } 3422c74a40a5SPatrick Mooney } else { 3423c74a40a5SPatrick Mooney if ((tmrval & mask) == 0) { 3424c74a40a5SPatrick Mooney /* Level-triggered interrupt needs TMR asserted */ 3425c74a40a5SPatrick Mooney atomic_set_int(&vlapic_vtx->pending_level[idx], mask); 3426c74a40a5SPatrick Mooney atomic_store_rel_long(&pir_desc->pending, 1); 3427c74a40a5SPatrick Mooney return (VCPU_NOTIFY_EXIT); 3428c74a40a5SPatrick Mooney } 3429c74a40a5SPatrick Mooney } 3430c74a40a5SPatrick Mooney 3431c74a40a5SPatrick Mooney /* 3432c74a40a5SPatrick Mooney * If the interrupt request does not require manipulation of the TMRs 3433c74a40a5SPatrick Mooney * for delivery, set it in PIR descriptor. It cannot be inserted into 3434c74a40a5SPatrick Mooney * the APIC page while the vCPU might be running. 3435c74a40a5SPatrick Mooney */ 3436c74a40a5SPatrick Mooney atomic_set_int(&pir_desc->pir[idx], mask); 34374c87aefeSPatrick Mooney 34384c87aefeSPatrick Mooney /* 34394c87aefeSPatrick Mooney * A notification is required whenever the 'pending' bit makes a 34404c87aefeSPatrick Mooney * transition from 0->1. 34414c87aefeSPatrick Mooney * 34424c87aefeSPatrick Mooney * Even if the 'pending' bit is already asserted, notification about 34434c87aefeSPatrick Mooney * the incoming interrupt may still be necessary. For example, if a 34444c87aefeSPatrick Mooney * vCPU is HLTed with a high PPR, a low priority interrupt would cause 34454c87aefeSPatrick Mooney * the 0->1 'pending' transition with a notification, but the vCPU 34464c87aefeSPatrick Mooney * would ignore the interrupt for the time being. The same vCPU would 34474c87aefeSPatrick Mooney * need to then be notified if a high-priority interrupt arrived which 34484c87aefeSPatrick Mooney * satisfied the PPR. 34494c87aefeSPatrick Mooney * 34504c87aefeSPatrick Mooney * The priorities of interrupts injected while 'pending' is asserted 34514c87aefeSPatrick Mooney * are tracked in a custom bitfield 'pending_prio'. Should the 34524c87aefeSPatrick Mooney * to-be-injected interrupt exceed the priorities already present, the 34534c87aefeSPatrick Mooney * notification is sent. The priorities recorded in 'pending_prio' are 34544c87aefeSPatrick Mooney * cleared whenever the 'pending' bit makes another 0->1 transition. 34554c87aefeSPatrick Mooney */ 34564c87aefeSPatrick Mooney if (atomic_cmpset_long(&pir_desc->pending, 0, 1) != 0) { 3457c74a40a5SPatrick Mooney notify = VCPU_NOTIFY_APIC; 34584c87aefeSPatrick Mooney vlapic_vtx->pending_prio = 0; 34594c87aefeSPatrick Mooney } else { 34602699b94cSPatrick Mooney const uint_t old_prio = vlapic_vtx->pending_prio; 34612699b94cSPatrick Mooney const uint_t prio_bit = VPR_PRIO_BIT(vector & APIC_TPR_INT); 34624c87aefeSPatrick Mooney 34634c87aefeSPatrick Mooney if ((old_prio & prio_bit) == 0 && prio_bit > old_prio) { 34644c87aefeSPatrick Mooney atomic_set_int(&vlapic_vtx->pending_prio, prio_bit); 3465c74a40a5SPatrick Mooney notify = VCPU_NOTIFY_APIC; 34664c87aefeSPatrick Mooney } 34674c87aefeSPatrick Mooney } 3468bf21cd93STycho Nightingale 3469bf21cd93STycho Nightingale return (notify); 3470bf21cd93STycho Nightingale } 3471bf21cd93STycho Nightingale 3472c74a40a5SPatrick Mooney static void 3473c74a40a5SPatrick Mooney vmx_apicv_accepted(struct vlapic *vlapic, int vector) 3474bf21cd93STycho Nightingale { 3475bf21cd93STycho Nightingale /* 3476c74a40a5SPatrick Mooney * When APICv is enabled for an instance, the traditional interrupt 3477c74a40a5SPatrick Mooney * injection method (populating ENTRY_INTR_INFO in the VMCS) is not 3478c74a40a5SPatrick Mooney * used and the CPU does the heavy lifting of virtual interrupt 3479c74a40a5SPatrick Mooney * delivery. For that reason vmx_intr_accepted() should never be called 3480c74a40a5SPatrick Mooney * when APICv is enabled. 3481bf21cd93STycho Nightingale */ 3482c74a40a5SPatrick Mooney panic("vmx_intr_accepted: not expected to be called"); 3483bf21cd93STycho Nightingale } 3484bf21cd93STycho Nightingale 3485bf21cd93STycho Nightingale static void 3486c74a40a5SPatrick Mooney vmx_apicv_sync_tmr(struct vlapic *vlapic) 3487bf21cd93STycho Nightingale { 3488c74a40a5SPatrick Mooney struct vlapic_vtx *vlapic_vtx; 3489c74a40a5SPatrick Mooney const uint32_t *tmrs; 3490bf21cd93STycho Nightingale 3491c74a40a5SPatrick Mooney vlapic_vtx = (struct vlapic_vtx *)vlapic; 3492c74a40a5SPatrick Mooney tmrs = &vlapic_vtx->tmr_active[0]; 3493bf21cd93STycho Nightingale 3494c74a40a5SPatrick Mooney if (!vlapic_vtx->tmr_sync) { 3495c74a40a5SPatrick Mooney return; 3496c74a40a5SPatrick Mooney } 3497c74a40a5SPatrick Mooney 3498c74a40a5SPatrick Mooney vmcs_write(VMCS_EOI_EXIT0, ((uint64_t)tmrs[1] << 32) | tmrs[0]); 3499c74a40a5SPatrick Mooney vmcs_write(VMCS_EOI_EXIT1, ((uint64_t)tmrs[3] << 32) | tmrs[2]); 3500c74a40a5SPatrick Mooney vmcs_write(VMCS_EOI_EXIT2, ((uint64_t)tmrs[5] << 32) | tmrs[4]); 3501c74a40a5SPatrick Mooney vmcs_write(VMCS_EOI_EXIT3, ((uint64_t)tmrs[7] << 32) | tmrs[6]); 3502c74a40a5SPatrick Mooney vlapic_vtx->tmr_sync = B_FALSE; 35034c87aefeSPatrick Mooney } 35044c87aefeSPatrick Mooney 35054c87aefeSPatrick Mooney static void 3506154972afSPatrick Mooney vmx_enable_x2apic_mode_ts(struct vlapic *vlapic) 3507154972afSPatrick Mooney { 3508154972afSPatrick Mooney struct vmx *vmx; 3509154972afSPatrick Mooney uint32_t proc_ctls; 3510154972afSPatrick Mooney int vcpuid; 3511154972afSPatrick Mooney 3512154972afSPatrick Mooney vcpuid = vlapic->vcpuid; 3513154972afSPatrick Mooney vmx = ((struct vlapic_vtx *)vlapic)->vmx; 3514154972afSPatrick Mooney 3515154972afSPatrick Mooney proc_ctls = vmx->cap[vcpuid].proc_ctls; 3516154972afSPatrick Mooney proc_ctls &= ~PROCBASED_USE_TPR_SHADOW; 3517154972afSPatrick Mooney proc_ctls |= PROCBASED_CR8_LOAD_EXITING; 3518154972afSPatrick Mooney proc_ctls |= PROCBASED_CR8_STORE_EXITING; 3519154972afSPatrick Mooney vmx->cap[vcpuid].proc_ctls = proc_ctls; 3520154972afSPatrick Mooney 3521007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpuid]); 3522154972afSPatrick Mooney vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); 3523007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpuid]); 3524154972afSPatrick Mooney } 3525154972afSPatrick Mooney 3526154972afSPatrick Mooney static void 3527154972afSPatrick Mooney vmx_enable_x2apic_mode_vid(struct vlapic *vlapic) 3528bf21cd93STycho Nightingale { 3529bf21cd93STycho Nightingale struct vmx *vmx; 35304c87aefeSPatrick Mooney uint32_t proc_ctls2; 35316b641d7aSPatrick Mooney int vcpuid; 3532bf21cd93STycho Nightingale 35334c87aefeSPatrick Mooney vcpuid = vlapic->vcpuid; 35344c87aefeSPatrick Mooney vmx = ((struct vlapic_vtx *)vlapic)->vmx; 3535bf21cd93STycho Nightingale 35364c87aefeSPatrick Mooney proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; 35374c87aefeSPatrick Mooney KASSERT((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) != 0, 35389dc804b9SPatrick Mooney ("%s: invalid proc_ctls2 %x", __func__, proc_ctls2)); 35394c87aefeSPatrick Mooney 35404c87aefeSPatrick Mooney proc_ctls2 &= ~PROCBASED2_VIRTUALIZE_APIC_ACCESSES; 35414c87aefeSPatrick Mooney proc_ctls2 |= PROCBASED2_VIRTUALIZE_X2APIC_MODE; 35424c87aefeSPatrick Mooney vmx->cap[vcpuid].proc_ctls2 = proc_ctls2; 3543bf21cd93STycho Nightingale 3544007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpuid]); 35454c87aefeSPatrick Mooney vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2); 3546007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpuid]); 35474c87aefeSPatrick Mooney 35486b641d7aSPatrick Mooney vmx_allow_x2apic_msrs(vmx, vcpuid); 3549bf21cd93STycho Nightingale } 3550bf21cd93STycho Nightingale 3551bf21cd93STycho Nightingale static void 3552c74a40a5SPatrick Mooney vmx_apicv_notify(struct vlapic *vlapic, int hostcpu) 3553bf21cd93STycho Nightingale { 35544c87aefeSPatrick Mooney psm_send_pir_ipi(hostcpu); 3555bf21cd93STycho Nightingale } 3556bf21cd93STycho Nightingale 3557bf21cd93STycho Nightingale static void 3558c74a40a5SPatrick Mooney vmx_apicv_sync(struct vlapic *vlapic) 3559bf21cd93STycho Nightingale { 3560bf21cd93STycho Nightingale struct vlapic_vtx *vlapic_vtx; 3561bf21cd93STycho Nightingale struct pir_desc *pir_desc; 3562bf21cd93STycho Nightingale struct LAPIC *lapic; 3563c74a40a5SPatrick Mooney uint_t i; 3564bf21cd93STycho Nightingale 3565bf21cd93STycho Nightingale vlapic_vtx = (struct vlapic_vtx *)vlapic; 3566bf21cd93STycho Nightingale pir_desc = vlapic_vtx->pir_desc; 3567c74a40a5SPatrick Mooney lapic = vlapic->apic_page; 3568c74a40a5SPatrick Mooney 3569bf21cd93STycho Nightingale if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) { 3570bf21cd93STycho Nightingale return; 3571bf21cd93STycho Nightingale } 3572bf21cd93STycho Nightingale 3573c74a40a5SPatrick Mooney vlapic_vtx->pending_prio = 0; 3574bf21cd93STycho Nightingale 3575c74a40a5SPatrick Mooney /* Make sure the invalid (0-15) vectors are not set */ 3576c74a40a5SPatrick Mooney ASSERT0(vlapic_vtx->pending_level[0] & 0xffff); 3577c74a40a5SPatrick Mooney ASSERT0(vlapic_vtx->pending_edge[0] & 0xffff); 3578c74a40a5SPatrick Mooney ASSERT0(pir_desc->pir[0] & 0xffff); 3579bf21cd93STycho Nightingale 3580c74a40a5SPatrick Mooney for (i = 0; i <= 7; i++) { 3581c74a40a5SPatrick Mooney uint32_t *tmrp = &lapic->tmr0 + (i * 4); 3582c74a40a5SPatrick Mooney uint32_t *irrp = &lapic->irr0 + (i * 4); 3583bf21cd93STycho Nightingale 3584c74a40a5SPatrick Mooney const uint32_t pending_level = 3585c74a40a5SPatrick Mooney atomic_readandclear_int(&vlapic_vtx->pending_level[i]); 3586c74a40a5SPatrick Mooney const uint32_t pending_edge = 3587c74a40a5SPatrick Mooney atomic_readandclear_int(&vlapic_vtx->pending_edge[i]); 3588c74a40a5SPatrick Mooney const uint32_t pending_inject = 3589c74a40a5SPatrick Mooney atomic_readandclear_int(&pir_desc->pir[i]); 3590c74a40a5SPatrick Mooney 3591c74a40a5SPatrick Mooney if (pending_level != 0) { 3592c74a40a5SPatrick Mooney /* 3593c74a40a5SPatrick Mooney * Level-triggered interrupts assert their corresponding 3594c74a40a5SPatrick Mooney * bit in the TMR when queued in IRR. 3595c74a40a5SPatrick Mooney */ 3596c74a40a5SPatrick Mooney *tmrp |= pending_level; 3597c74a40a5SPatrick Mooney *irrp |= pending_level; 3598c74a40a5SPatrick Mooney } 3599c74a40a5SPatrick Mooney if (pending_edge != 0) { 3600c74a40a5SPatrick Mooney /* 3601c74a40a5SPatrick Mooney * When queuing an edge-triggered interrupt in IRR, the 3602c74a40a5SPatrick Mooney * corresponding bit in the TMR is cleared. 3603c74a40a5SPatrick Mooney */ 3604c74a40a5SPatrick Mooney *tmrp &= ~pending_edge; 3605c74a40a5SPatrick Mooney *irrp |= pending_edge; 3606c74a40a5SPatrick Mooney } 3607c74a40a5SPatrick Mooney if (pending_inject != 0) { 3608c74a40a5SPatrick Mooney /* 3609c74a40a5SPatrick Mooney * Interrupts which do not require a change to the TMR 3610c74a40a5SPatrick Mooney * (because it already matches the necessary state) can 3611c74a40a5SPatrick Mooney * simply be queued in IRR. 3612c74a40a5SPatrick Mooney */ 3613c74a40a5SPatrick Mooney *irrp |= pending_inject; 3614c74a40a5SPatrick Mooney } 3615bf21cd93STycho Nightingale 3616c74a40a5SPatrick Mooney if (*tmrp != vlapic_vtx->tmr_active[i]) { 3617c74a40a5SPatrick Mooney /* Check if VMX EOI triggers require updating. */ 3618c74a40a5SPatrick Mooney vlapic_vtx->tmr_active[i] = *tmrp; 3619c74a40a5SPatrick Mooney vlapic_vtx->tmr_sync = B_TRUE; 3620c74a40a5SPatrick Mooney } 3621bf21cd93STycho Nightingale } 3622c74a40a5SPatrick Mooney } 3623bf21cd93STycho Nightingale 3624c74a40a5SPatrick Mooney static void 3625c74a40a5SPatrick Mooney vmx_tpr_shadow_enter(struct vlapic *vlapic) 3626c74a40a5SPatrick Mooney { 3627c74a40a5SPatrick Mooney /* 3628c74a40a5SPatrick Mooney * When TPR shadowing is enabled, VMX will initiate a guest exit if its 3629c74a40a5SPatrick Mooney * TPR falls below a threshold priority. That threshold is set to the 3630c74a40a5SPatrick Mooney * current TPR priority, since guest interrupt status should be 3631c74a40a5SPatrick Mooney * re-evaluated if its TPR is set lower. 3632c74a40a5SPatrick Mooney */ 3633c74a40a5SPatrick Mooney vmcs_write(VMCS_TPR_THRESHOLD, vlapic_get_cr8(vlapic)); 3634c74a40a5SPatrick Mooney } 3635bf21cd93STycho Nightingale 3636c74a40a5SPatrick Mooney static void 3637c74a40a5SPatrick Mooney vmx_tpr_shadow_exit(struct vlapic *vlapic) 3638c74a40a5SPatrick Mooney { 3639bf21cd93STycho Nightingale /* 3640c74a40a5SPatrick Mooney * Unlike full APICv, where changes to the TPR are reflected in the PPR, 3641c74a40a5SPatrick Mooney * with TPR shadowing, that duty is relegated to the VMM. Upon exit, 3642c74a40a5SPatrick Mooney * the PPR is updated to reflect any change in the TPR here. 3643bf21cd93STycho Nightingale */ 3644c74a40a5SPatrick Mooney vlapic_sync_tpr(vlapic); 3645bf21cd93STycho Nightingale } 3646bf21cd93STycho Nightingale 3647bf21cd93STycho Nightingale static struct vlapic * 3648bf21cd93STycho Nightingale vmx_vlapic_init(void *arg, int vcpuid) 3649bf21cd93STycho Nightingale { 3650bf21cd93STycho Nightingale struct vmx *vmx; 3651bf21cd93STycho Nightingale struct vlapic *vlapic; 3652bf21cd93STycho Nightingale struct vlapic_vtx *vlapic_vtx; 36534c87aefeSPatrick Mooney 3654bf21cd93STycho Nightingale vmx = arg; 3655bf21cd93STycho Nightingale 36562699b94cSPatrick Mooney vlapic = malloc(sizeof (struct vlapic_vtx), M_VLAPIC, 36572699b94cSPatrick Mooney M_WAITOK | M_ZERO); 3658bf21cd93STycho Nightingale vlapic->vm = vmx->vm; 3659bf21cd93STycho Nightingale vlapic->vcpuid = vcpuid; 3660bf21cd93STycho Nightingale vlapic->apic_page = (struct LAPIC *)&vmx->apic_page[vcpuid]; 3661bf21cd93STycho Nightingale 3662bf21cd93STycho Nightingale vlapic_vtx = (struct vlapic_vtx *)vlapic; 3663bf21cd93STycho Nightingale vlapic_vtx->pir_desc = &vmx->pir_desc[vcpuid]; 3664bf21cd93STycho Nightingale vlapic_vtx->vmx = vmx; 3665bf21cd93STycho Nightingale 3666c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) { 3667154972afSPatrick Mooney vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_ts; 3668154972afSPatrick Mooney } 3669c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV)) { 3670c74a40a5SPatrick Mooney vlapic->ops.set_intr_ready = vmx_apicv_set_ready; 3671c74a40a5SPatrick Mooney vlapic->ops.sync_state = vmx_apicv_sync; 3672c74a40a5SPatrick Mooney vlapic->ops.intr_accepted = vmx_apicv_accepted; 3673154972afSPatrick Mooney vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_vid; 3674bf21cd93STycho Nightingale 3675c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) { 3676c74a40a5SPatrick Mooney vlapic->ops.post_intr = vmx_apicv_notify; 3677c3ae3afaSPatrick Mooney } 3678c3ae3afaSPatrick Mooney } 3679bf21cd93STycho Nightingale 3680bf21cd93STycho Nightingale vlapic_init(vlapic); 3681bf21cd93STycho Nightingale 3682bf21cd93STycho Nightingale return (vlapic); 3683bf21cd93STycho Nightingale } 3684bf21cd93STycho Nightingale 3685bf21cd93STycho Nightingale static void 3686bf21cd93STycho Nightingale vmx_vlapic_cleanup(void *arg, struct vlapic *vlapic) 3687bf21cd93STycho Nightingale { 3688bf21cd93STycho Nightingale 3689bf21cd93STycho Nightingale vlapic_cleanup(vlapic); 3690bf21cd93STycho Nightingale free(vlapic, M_VLAPIC); 3691bf21cd93STycho Nightingale } 3692bf21cd93STycho Nightingale 36934c87aefeSPatrick Mooney static void 36944c87aefeSPatrick Mooney vmx_savectx(void *arg, int vcpu) 36954c87aefeSPatrick Mooney { 36964c87aefeSPatrick Mooney struct vmx *vmx = arg; 36974c87aefeSPatrick Mooney 36984c87aefeSPatrick Mooney if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) { 3699007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpu]); 37004c87aefeSPatrick Mooney vmx_msr_guest_exit(vmx, vcpu); 37014c87aefeSPatrick Mooney /* 37024c87aefeSPatrick Mooney * Having VMCLEARed the VMCS, it can no longer be re-entered 37034c87aefeSPatrick Mooney * with VMRESUME, but must be VMLAUNCHed again. 37044c87aefeSPatrick Mooney */ 37054c87aefeSPatrick Mooney vmx->vmcs_state[vcpu] &= ~VS_LAUNCHED; 37064c87aefeSPatrick Mooney } 37074c87aefeSPatrick Mooney 37084c87aefeSPatrick Mooney reset_gdtr_limit(); 37094c87aefeSPatrick Mooney } 37104c87aefeSPatrick Mooney 37114c87aefeSPatrick Mooney static void 37124c87aefeSPatrick Mooney vmx_restorectx(void *arg, int vcpu) 37134c87aefeSPatrick Mooney { 37144c87aefeSPatrick Mooney struct vmx *vmx = arg; 37154c87aefeSPatrick Mooney 37164c87aefeSPatrick Mooney ASSERT0(vmx->vmcs_state[vcpu] & VS_LAUNCHED); 37174c87aefeSPatrick Mooney 37184c87aefeSPatrick Mooney if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) { 37194c87aefeSPatrick Mooney vmx_msr_guest_enter(vmx, vcpu); 3720007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpu]); 37214c87aefeSPatrick Mooney } 37224c87aefeSPatrick Mooney } 37234c87aefeSPatrick Mooney 3724bf21cd93STycho Nightingale struct vmm_ops vmm_ops_intel = { 372584659b24SMichael Zeller .init = vmx_init, 372684659b24SMichael Zeller .cleanup = vmx_cleanup, 372784659b24SMichael Zeller .resume = vmx_restore, 37280153d828SPatrick Mooney 372984659b24SMichael Zeller .vminit = vmx_vminit, 373084659b24SMichael Zeller .vmrun = vmx_run, 373184659b24SMichael Zeller .vmcleanup = vmx_vmcleanup, 373284659b24SMichael Zeller .vmgetreg = vmx_getreg, 373384659b24SMichael Zeller .vmsetreg = vmx_setreg, 373484659b24SMichael Zeller .vmgetdesc = vmx_getdesc, 373584659b24SMichael Zeller .vmsetdesc = vmx_setdesc, 373684659b24SMichael Zeller .vmgetcap = vmx_getcap, 373784659b24SMichael Zeller .vmsetcap = vmx_setcap, 373884659b24SMichael Zeller .vlapic_init = vmx_vlapic_init, 373984659b24SMichael Zeller .vlapic_cleanup = vmx_vlapic_cleanup, 37404c87aefeSPatrick Mooney 374184659b24SMichael Zeller .vmsavectx = vmx_savectx, 374284659b24SMichael Zeller .vmrestorectx = vmx_restorectx, 3743bf21cd93STycho Nightingale }; 37444c87aefeSPatrick Mooney 37454c87aefeSPatrick Mooney /* Side-effect free HW validation derived from checks in vmx_init. */ 37464c87aefeSPatrick Mooney int 37474c87aefeSPatrick Mooney vmx_x86_supported(const char **msg) 37484c87aefeSPatrick Mooney { 37494c87aefeSPatrick Mooney int error; 37504c87aefeSPatrick Mooney uint32_t tmp; 37514c87aefeSPatrick Mooney 37524c87aefeSPatrick Mooney ASSERT(msg != NULL); 37534c87aefeSPatrick Mooney 37544c87aefeSPatrick Mooney /* Check support for primary processor-based VM-execution controls */ 37554c87aefeSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 37564c87aefeSPatrick Mooney MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_CTLS_ONE_SETTING, 37574c87aefeSPatrick Mooney PROCBASED_CTLS_ZERO_SETTING, &tmp); 37584c87aefeSPatrick Mooney if (error) { 37594c87aefeSPatrick Mooney *msg = "processor does not support desired primary " 37604c87aefeSPatrick Mooney "processor-based controls"; 37614c87aefeSPatrick Mooney return (error); 37624c87aefeSPatrick Mooney } 37634c87aefeSPatrick Mooney 37644c87aefeSPatrick Mooney /* Check support for secondary processor-based VM-execution controls */ 37654c87aefeSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 37664c87aefeSPatrick Mooney MSR_VMX_PROCBASED_CTLS2, PROCBASED_CTLS2_ONE_SETTING, 37674c87aefeSPatrick Mooney PROCBASED_CTLS2_ZERO_SETTING, &tmp); 37684c87aefeSPatrick Mooney if (error) { 37694c87aefeSPatrick Mooney *msg = "processor does not support desired secondary " 37704c87aefeSPatrick Mooney "processor-based controls"; 37714c87aefeSPatrick Mooney return (error); 37724c87aefeSPatrick Mooney } 37734c87aefeSPatrick Mooney 37744c87aefeSPatrick Mooney /* Check support for pin-based VM-execution controls */ 37754c87aefeSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS, 37764c87aefeSPatrick Mooney MSR_VMX_TRUE_PINBASED_CTLS, PINBASED_CTLS_ONE_SETTING, 37774c87aefeSPatrick Mooney PINBASED_CTLS_ZERO_SETTING, &tmp); 37784c87aefeSPatrick Mooney if (error) { 37794c87aefeSPatrick Mooney *msg = "processor does not support desired pin-based controls"; 37804c87aefeSPatrick Mooney return (error); 37814c87aefeSPatrick Mooney } 37824c87aefeSPatrick Mooney 37834c87aefeSPatrick Mooney /* Check support for VM-exit controls */ 37844c87aefeSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS, 37854c87aefeSPatrick Mooney VM_EXIT_CTLS_ONE_SETTING, VM_EXIT_CTLS_ZERO_SETTING, &tmp); 37864c87aefeSPatrick Mooney if (error) { 37874c87aefeSPatrick Mooney *msg = "processor does not support desired exit controls"; 37884c87aefeSPatrick Mooney return (error); 37894c87aefeSPatrick Mooney } 37904c87aefeSPatrick Mooney 37914c87aefeSPatrick Mooney /* Check support for VM-entry controls */ 37924c87aefeSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS, 37934c87aefeSPatrick Mooney VM_ENTRY_CTLS_ONE_SETTING, VM_ENTRY_CTLS_ZERO_SETTING, &tmp); 37944c87aefeSPatrick Mooney if (error) { 37954c87aefeSPatrick Mooney *msg = "processor does not support desired entry controls"; 37964c87aefeSPatrick Mooney return (error); 37974c87aefeSPatrick Mooney } 37984c87aefeSPatrick Mooney 37994c87aefeSPatrick Mooney /* Unrestricted guest is nominally optional, but not for us. */ 38004c87aefeSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2, 38014c87aefeSPatrick Mooney PROCBASED2_UNRESTRICTED_GUEST, 0, &tmp); 38024c87aefeSPatrick Mooney if (error) { 38034c87aefeSPatrick Mooney *msg = "processor does not support desired unrestricted guest " 38044c87aefeSPatrick Mooney "controls"; 38054c87aefeSPatrick Mooney return (error); 38064c87aefeSPatrick Mooney } 38074c87aefeSPatrick Mooney 38084c87aefeSPatrick Mooney return (0); 38094c87aefeSPatrick Mooney } 3810