1bf21cd93STycho Nightingale /*- 24c87aefeSPatrick Mooney * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 34c87aefeSPatrick Mooney * 4bf21cd93STycho Nightingale * Copyright (c) 2011 NetApp, Inc. 5bf21cd93STycho Nightingale * All rights reserved. 64c87aefeSPatrick Mooney * Copyright (c) 2018 Joyent, Inc. 7bf21cd93STycho Nightingale * 8bf21cd93STycho Nightingale * Redistribution and use in source and binary forms, with or without 9bf21cd93STycho Nightingale * modification, are permitted provided that the following conditions 10bf21cd93STycho Nightingale * are met: 11bf21cd93STycho Nightingale * 1. Redistributions of source code must retain the above copyright 12bf21cd93STycho Nightingale * notice, this list of conditions and the following disclaimer. 13bf21cd93STycho Nightingale * 2. Redistributions in binary form must reproduce the above copyright 14bf21cd93STycho Nightingale * notice, this list of conditions and the following disclaimer in the 15bf21cd93STycho Nightingale * documentation and/or other materials provided with the distribution. 16bf21cd93STycho Nightingale * 17bf21cd93STycho Nightingale * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 18bf21cd93STycho Nightingale * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19bf21cd93STycho Nightingale * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20bf21cd93STycho Nightingale * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 21bf21cd93STycho Nightingale * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22bf21cd93STycho Nightingale * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23bf21cd93STycho Nightingale * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24bf21cd93STycho Nightingale * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25bf21cd93STycho Nightingale * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26bf21cd93STycho Nightingale * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27bf21cd93STycho Nightingale * SUCH DAMAGE. 28bf21cd93STycho Nightingale * 294c87aefeSPatrick Mooney * $FreeBSD$ 30bf21cd93STycho Nightingale */ 31bf21cd93STycho Nightingale /* 32bf21cd93STycho Nightingale * This file and its contents are supplied under the terms of the 33bf21cd93STycho Nightingale * Common Development and Distribution License ("CDDL"), version 1.0. 34bf21cd93STycho Nightingale * You may only use this file in accordance with the terms of version 35bf21cd93STycho Nightingale * 1.0 of the CDDL. 36bf21cd93STycho Nightingale * 37bf21cd93STycho Nightingale * A full copy of the text of the CDDL should have accompanied this 38bf21cd93STycho Nightingale * source. A copy of the CDDL is also available via the Internet at 39bf21cd93STycho Nightingale * http://www.illumos.org/license/CDDL. 40bf21cd93STycho Nightingale * 41bf21cd93STycho Nightingale * Copyright 2015 Pluribus Networks Inc. 424c87aefeSPatrick Mooney * Copyright 2018 Joyent, Inc. 4383b49c54SPatrick Mooney * Copyright 2022 Oxide Computer Company 44bf21cd93STycho Nightingale */ 45bf21cd93STycho Nightingale 46bf21cd93STycho Nightingale #include <sys/cdefs.h> 474c87aefeSPatrick Mooney __FBSDID("$FreeBSD$"); 48bf21cd93STycho Nightingale 49bf21cd93STycho Nightingale #include <sys/param.h> 50bf21cd93STycho Nightingale #include <sys/systm.h> 51bf21cd93STycho Nightingale #include <sys/kernel.h> 52bf21cd93STycho Nightingale #include <sys/malloc.h> 53bf21cd93STycho Nightingale #include <sys/pcpu.h> 54bf21cd93STycho Nightingale #include <sys/proc.h> 55bf21cd93STycho Nightingale #include <sys/sysctl.h> 56bf21cd93STycho Nightingale 574c87aefeSPatrick Mooney #include <sys/x86_archext.h> 584c87aefeSPatrick Mooney #include <sys/smp_impldefs.h> 594c87aefeSPatrick Mooney #include <sys/smt.h> 604c87aefeSPatrick Mooney #include <sys/hma.h> 614c87aefeSPatrick Mooney #include <sys/trap.h> 620153d828SPatrick Mooney #include <sys/archsystm.h> 634c87aefeSPatrick Mooney 64bf21cd93STycho Nightingale #include <machine/psl.h> 65bf21cd93STycho Nightingale #include <machine/cpufunc.h> 66bf21cd93STycho Nightingale #include <machine/md_var.h> 674c87aefeSPatrick Mooney #include <machine/reg.h> 68bf21cd93STycho Nightingale #include <machine/segments.h> 69bf21cd93STycho Nightingale #include <machine/specialreg.h> 70bf21cd93STycho Nightingale #include <machine/vmparam.h> 71cf409e3fSDan Cross #include <sys/vmm_vm.h> 72d2f938fdSPatrick Mooney #include <sys/vmm_kernel.h> 73bf21cd93STycho Nightingale 74bf21cd93STycho Nightingale #include <machine/vmm.h> 75bf21cd93STycho Nightingale #include <machine/vmm_dev.h> 76e0c0d44eSPatrick Mooney #include <sys/vmm_instruction_emul.h> 77bf21cd93STycho Nightingale #include "vmm_lapic.h" 78bf21cd93STycho Nightingale #include "vmm_host.h" 79bf21cd93STycho Nightingale #include "vmm_ioport.h" 80bf21cd93STycho Nightingale #include "vmm_stat.h" 81bf21cd93STycho Nightingale #include "vatpic.h" 82bf21cd93STycho Nightingale #include "vlapic.h" 83bf21cd93STycho Nightingale #include "vlapic_priv.h" 84bf21cd93STycho Nightingale 854c87aefeSPatrick Mooney #include "vmcs.h" 86bf21cd93STycho Nightingale #include "vmx.h" 87bf21cd93STycho Nightingale #include "vmx_msr.h" 88bf21cd93STycho Nightingale #include "x86.h" 89bf21cd93STycho Nightingale #include "vmx_controls.h" 90bf21cd93STycho Nightingale 91bf21cd93STycho Nightingale #define PINBASED_CTLS_ONE_SETTING \ 92bf21cd93STycho Nightingale (PINBASED_EXTINT_EXITING | \ 932699b94cSPatrick Mooney PINBASED_NMI_EXITING | \ 942699b94cSPatrick Mooney PINBASED_VIRTUAL_NMI) 95bf21cd93STycho Nightingale #define PINBASED_CTLS_ZERO_SETTING 0 96bf21cd93STycho Nightingale 972699b94cSPatrick Mooney #define PROCBASED_CTLS_WINDOW_SETTING \ 98bf21cd93STycho Nightingale (PROCBASED_INT_WINDOW_EXITING | \ 992699b94cSPatrick Mooney PROCBASED_NMI_WINDOW_EXITING) 100bf21cd93STycho Nightingale 1014c87aefeSPatrick Mooney /* We consider TSC offset a necessity for unsynched TSC handling */ 10284971882SPatrick Mooney #define PROCBASED_CTLS_ONE_SETTING \ 103bf21cd93STycho Nightingale (PROCBASED_SECONDARY_CONTROLS | \ 1042699b94cSPatrick Mooney PROCBASED_TSC_OFFSET | \ 1052699b94cSPatrick Mooney PROCBASED_MWAIT_EXITING | \ 1062699b94cSPatrick Mooney PROCBASED_MONITOR_EXITING | \ 1072699b94cSPatrick Mooney PROCBASED_IO_EXITING | \ 1082699b94cSPatrick Mooney PROCBASED_MSR_BITMAPS | \ 1092699b94cSPatrick Mooney PROCBASED_CTLS_WINDOW_SETTING | \ 1102699b94cSPatrick Mooney PROCBASED_CR8_LOAD_EXITING | \ 1112699b94cSPatrick Mooney PROCBASED_CR8_STORE_EXITING) 1124c87aefeSPatrick Mooney 113bf21cd93STycho Nightingale #define PROCBASED_CTLS_ZERO_SETTING \ 114bf21cd93STycho Nightingale (PROCBASED_CR3_LOAD_EXITING | \ 115bf21cd93STycho Nightingale PROCBASED_CR3_STORE_EXITING | \ 116bf21cd93STycho Nightingale PROCBASED_IO_BITMAPS) 117bf21cd93STycho Nightingale 118c3ae3afaSPatrick Mooney /* 119c3ae3afaSPatrick Mooney * EPT and Unrestricted Guest are considered necessities. The latter is not a 120c3ae3afaSPatrick Mooney * requirement on FreeBSD, where grub2-bhyve is used to load guests directly 121c3ae3afaSPatrick Mooney * without a bootrom starting in real mode. 122c3ae3afaSPatrick Mooney */ 123c3ae3afaSPatrick Mooney #define PROCBASED_CTLS2_ONE_SETTING \ 124c3ae3afaSPatrick Mooney (PROCBASED2_ENABLE_EPT | \ 125c3ae3afaSPatrick Mooney PROCBASED2_UNRESTRICTED_GUEST) 126bf21cd93STycho Nightingale #define PROCBASED_CTLS2_ZERO_SETTING 0 127bf21cd93STycho Nightingale 128bf21cd93STycho Nightingale #define VM_EXIT_CTLS_ONE_SETTING \ 1294c87aefeSPatrick Mooney (VM_EXIT_SAVE_DEBUG_CONTROLS | \ 1304c87aefeSPatrick Mooney VM_EXIT_HOST_LMA | \ 1314c87aefeSPatrick Mooney VM_EXIT_LOAD_PAT | \ 132bf21cd93STycho Nightingale VM_EXIT_SAVE_EFER | \ 133bf21cd93STycho Nightingale VM_EXIT_LOAD_EFER | \ 1344c87aefeSPatrick Mooney VM_EXIT_ACKNOWLEDGE_INTERRUPT) 135bf21cd93STycho Nightingale 1364c87aefeSPatrick Mooney #define VM_EXIT_CTLS_ZERO_SETTING 0 137bf21cd93STycho Nightingale 1384c87aefeSPatrick Mooney #define VM_ENTRY_CTLS_ONE_SETTING \ 1394c87aefeSPatrick Mooney (VM_ENTRY_LOAD_DEBUG_CONTROLS | \ 1404c87aefeSPatrick Mooney VM_ENTRY_LOAD_EFER) 141bf21cd93STycho Nightingale 142bf21cd93STycho Nightingale #define VM_ENTRY_CTLS_ZERO_SETTING \ 1434c87aefeSPatrick Mooney (VM_ENTRY_INTO_SMM | \ 144bf21cd93STycho Nightingale VM_ENTRY_DEACTIVATE_DUAL_MONITOR) 145bf21cd93STycho Nightingale 1460153d828SPatrick Mooney /* 1470153d828SPatrick Mooney * Cover the EPT capabilities used by bhyve at present: 1480153d828SPatrick Mooney * - 4-level page walks 1490153d828SPatrick Mooney * - write-back memory type 1500153d828SPatrick Mooney * - INVEPT operations (all types) 1510153d828SPatrick Mooney * - INVVPID operations (single-context only) 1520153d828SPatrick Mooney */ 1530153d828SPatrick Mooney #define EPT_CAPS_REQUIRED \ 1540153d828SPatrick Mooney (IA32_VMX_EPT_VPID_PWL4 | \ 1550153d828SPatrick Mooney IA32_VMX_EPT_VPID_TYPE_WB | \ 1560153d828SPatrick Mooney IA32_VMX_EPT_VPID_INVEPT | \ 1570153d828SPatrick Mooney IA32_VMX_EPT_VPID_INVEPT_SINGLE | \ 1580153d828SPatrick Mooney IA32_VMX_EPT_VPID_INVEPT_ALL | \ 1590153d828SPatrick Mooney IA32_VMX_EPT_VPID_INVVPID | \ 1600153d828SPatrick Mooney IA32_VMX_EPT_VPID_INVVPID_SINGLE) 1610153d828SPatrick Mooney 162bf21cd93STycho Nightingale #define HANDLED 1 163bf21cd93STycho Nightingale #define UNHANDLED 0 164bf21cd93STycho Nightingale 165bf21cd93STycho Nightingale static MALLOC_DEFINE(M_VMX, "vmx", "vmx"); 166bf21cd93STycho Nightingale static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic"); 167bf21cd93STycho Nightingale 168bf21cd93STycho Nightingale SYSCTL_DECL(_hw_vmm); 169154972afSPatrick Mooney SYSCTL_NODE(_hw_vmm, OID_AUTO, vmx, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 170154972afSPatrick Mooney NULL); 171bf21cd93STycho Nightingale 172bf21cd93STycho Nightingale static uint32_t pinbased_ctls, procbased_ctls, procbased_ctls2; 173bf21cd93STycho Nightingale static uint32_t exit_ctls, entry_ctls; 174bf21cd93STycho Nightingale 175bf21cd93STycho Nightingale static uint64_t cr0_ones_mask, cr0_zeros_mask; 176bf21cd93STycho Nightingale 177bf21cd93STycho Nightingale static uint64_t cr4_ones_mask, cr4_zeros_mask; 178bf21cd93STycho Nightingale 179bf21cd93STycho Nightingale static int vmx_initialized; 180bf21cd93STycho Nightingale 1812699b94cSPatrick Mooney /* Do not flush RSB upon vmexit */ 182007ca332SPatrick Mooney static int no_flush_rsb; 183007ca332SPatrick Mooney 184bf21cd93STycho Nightingale /* 185bf21cd93STycho Nightingale * Optional capabilities 186bf21cd93STycho Nightingale */ 1874c87aefeSPatrick Mooney 1882699b94cSPatrick Mooney /* HLT triggers a VM-exit */ 189bf21cd93STycho Nightingale static int cap_halt_exit; 1904c87aefeSPatrick Mooney 1912699b94cSPatrick Mooney /* PAUSE triggers a VM-exit */ 192bf21cd93STycho Nightingale static int cap_pause_exit; 1934c87aefeSPatrick Mooney 1942699b94cSPatrick Mooney /* Monitor trap flag */ 195bf21cd93STycho Nightingale static int cap_monitor_trap; 1964c87aefeSPatrick Mooney 1972699b94cSPatrick Mooney /* Guests are allowed to use INVPCID */ 198bf21cd93STycho Nightingale static int cap_invpcid; 199bf21cd93STycho Nightingale 200c3ae3afaSPatrick Mooney /* Extra capabilities (VMX_CAP_*) beyond the minimum */ 201c3ae3afaSPatrick Mooney static enum vmx_caps vmx_capabilities; 202bf21cd93STycho Nightingale 2032699b94cSPatrick Mooney /* APICv posted interrupt vector */ 2044c87aefeSPatrick Mooney static int pirvec = -1; 205bf21cd93STycho Nightingale 2062699b94cSPatrick Mooney static uint_t vpid_alloc_failed; 207bf21cd93STycho Nightingale 208154972afSPatrick Mooney int guest_l1d_flush; 209154972afSPatrick Mooney int guest_l1d_flush_sw; 2104c87aefeSPatrick Mooney 211007ca332SPatrick Mooney /* MSR save region is composed of an array of 'struct msr_entry' */ 212007ca332SPatrick Mooney struct msr_entry { 213007ca332SPatrick Mooney uint32_t index; 214007ca332SPatrick Mooney uint32_t reserved; 215007ca332SPatrick Mooney uint64_t val; 216007ca332SPatrick Mooney }; 217007ca332SPatrick Mooney 2184c87aefeSPatrick Mooney static struct msr_entry msr_load_list[1] __aligned(16); 2194c87aefeSPatrick Mooney 2204c87aefeSPatrick Mooney /* 2214c87aefeSPatrick Mooney * The definitions of SDT probes for VMX. 2224c87aefeSPatrick Mooney */ 2234c87aefeSPatrick Mooney 2242699b94cSPatrick Mooney /* BEGIN CSTYLED */ 2254c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, entry, 2264c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2274c87aefeSPatrick Mooney 2284c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, taskswitch, 2294c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "struct vm_task_switch *"); 2304c87aefeSPatrick Mooney 2314c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, craccess, 2324c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint64_t"); 2334c87aefeSPatrick Mooney 2344c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, rdmsr, 2354c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint32_t"); 2364c87aefeSPatrick Mooney 2374c87aefeSPatrick Mooney SDT_PROBE_DEFINE5(vmm, vmx, exit, wrmsr, 2384c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint32_t", "uint64_t"); 2394c87aefeSPatrick Mooney 2404c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, halt, 2414c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2424c87aefeSPatrick Mooney 2434c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, mtrap, 2444c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2454c87aefeSPatrick Mooney 2464c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, pause, 2474c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2484c87aefeSPatrick Mooney 2494c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, intrwindow, 2504c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2514c87aefeSPatrick Mooney 2524c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, interrupt, 2534c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint32_t"); 2544c87aefeSPatrick Mooney 2554c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, nmiwindow, 2564c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2574c87aefeSPatrick Mooney 2584c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, inout, 2594c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2604c87aefeSPatrick Mooney 2614c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, cpuid, 2624c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2634c87aefeSPatrick Mooney 2644c87aefeSPatrick Mooney SDT_PROBE_DEFINE5(vmm, vmx, exit, exception, 2654c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint32_t", "int"); 2664c87aefeSPatrick Mooney 2674c87aefeSPatrick Mooney SDT_PROBE_DEFINE5(vmm, vmx, exit, nestedfault, 2684c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint64_t", "uint64_t"); 2694c87aefeSPatrick Mooney 2704c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, mmiofault, 2714c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint64_t"); 2724c87aefeSPatrick Mooney 2734c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, eoi, 2744c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2754c87aefeSPatrick Mooney 2764c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, apicaccess, 2774c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2784c87aefeSPatrick Mooney 2794c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, apicwrite, 2804c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "struct vlapic *"); 2814c87aefeSPatrick Mooney 2824c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, xsetbv, 2834c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2844c87aefeSPatrick Mooney 2854c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, monitor, 2864c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2874c87aefeSPatrick Mooney 2884c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, mwait, 2894c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2904c87aefeSPatrick Mooney 2914c87aefeSPatrick Mooney SDT_PROBE_DEFINE3(vmm, vmx, exit, vminsn, 2924c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *"); 2934c87aefeSPatrick Mooney 2944c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, unknown, 2954c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "uint32_t"); 2964c87aefeSPatrick Mooney 2974c87aefeSPatrick Mooney SDT_PROBE_DEFINE4(vmm, vmx, exit, return, 2984c87aefeSPatrick Mooney "struct vmx *", "int", "struct vm_exit *", "int"); 2992699b94cSPatrick Mooney /* END CSTYLED */ 3004c87aefeSPatrick Mooney 301bf21cd93STycho Nightingale static int vmx_getdesc(void *arg, int vcpu, int reg, struct seg_desc *desc); 302bf21cd93STycho Nightingale static int vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval); 303007ca332SPatrick Mooney static void vmx_apply_tsc_adjust(struct vmx *, int); 304c74a40a5SPatrick Mooney static void vmx_apicv_sync_tmr(struct vlapic *vlapic); 305c74a40a5SPatrick Mooney static void vmx_tpr_shadow_enter(struct vlapic *vlapic); 306c74a40a5SPatrick Mooney static void vmx_tpr_shadow_exit(struct vlapic *vlapic); 307bf21cd93STycho Nightingale 3086b641d7aSPatrick Mooney static void 3096b641d7aSPatrick Mooney vmx_allow_x2apic_msrs(struct vmx *vmx, int vcpuid) 310bf21cd93STycho Nightingale { 311bf21cd93STycho Nightingale /* 312bf21cd93STycho Nightingale * Allow readonly access to the following x2APIC MSRs from the guest. 313bf21cd93STycho Nightingale */ 3146b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_ID); 3156b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_VERSION); 3166b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LDR); 3176b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_SVR); 3186b641d7aSPatrick Mooney 3196b641d7aSPatrick Mooney for (uint_t i = 0; i < 8; i++) { 3206b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_ISR0 + i); 3216b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_TMR0 + i); 3226b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_IRR0 + i); 3236b641d7aSPatrick Mooney } 3246b641d7aSPatrick Mooney 3256b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_ESR); 3266b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_TIMER); 3276b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_THERMAL); 3286b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_PCINT); 3296b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_LINT0); 3306b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_LINT1); 3316b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_LVT_ERROR); 3326b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_ICR_TIMER); 3336b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_DCR_TIMER); 3346b641d7aSPatrick Mooney guest_msr_ro(vmx, vcpuid, MSR_APIC_ICR); 335bf21cd93STycho Nightingale 336bf21cd93STycho Nightingale /* 337bf21cd93STycho Nightingale * Allow TPR, EOI and SELF_IPI MSRs to be read and written by the guest. 338bf21cd93STycho Nightingale * 339bf21cd93STycho Nightingale * These registers get special treatment described in the section 340bf21cd93STycho Nightingale * "Virtualizing MSR-Based APIC Accesses". 341bf21cd93STycho Nightingale */ 3426b641d7aSPatrick Mooney guest_msr_rw(vmx, vcpuid, MSR_APIC_TPR); 3436b641d7aSPatrick Mooney guest_msr_rw(vmx, vcpuid, MSR_APIC_EOI); 3446b641d7aSPatrick Mooney guest_msr_rw(vmx, vcpuid, MSR_APIC_SELF_IPI); 345bf21cd93STycho Nightingale } 346bf21cd93STycho Nightingale 3472699b94cSPatrick Mooney static ulong_t 3482699b94cSPatrick Mooney vmx_fix_cr0(ulong_t cr0) 349bf21cd93STycho Nightingale { 350bf21cd93STycho Nightingale return ((cr0 | cr0_ones_mask) & ~cr0_zeros_mask); 351bf21cd93STycho Nightingale } 352bf21cd93STycho Nightingale 353bf0dcd3fSPatrick Mooney /* 354bf0dcd3fSPatrick Mooney * Given a live (VMCS-active) cr0 value, and its shadow counterpart, calculate 355bf0dcd3fSPatrick Mooney * the value observable from the guest. 356bf0dcd3fSPatrick Mooney */ 357bf0dcd3fSPatrick Mooney static ulong_t 358bf0dcd3fSPatrick Mooney vmx_unshadow_cr0(uint64_t cr0, uint64_t shadow) 359bf0dcd3fSPatrick Mooney { 360bf0dcd3fSPatrick Mooney return ((cr0 & ~cr0_ones_mask) | 361bf0dcd3fSPatrick Mooney (shadow & (cr0_zeros_mask | cr0_ones_mask))); 362bf0dcd3fSPatrick Mooney } 363bf0dcd3fSPatrick Mooney 3642699b94cSPatrick Mooney static ulong_t 3652699b94cSPatrick Mooney vmx_fix_cr4(ulong_t cr4) 366bf21cd93STycho Nightingale { 367bf21cd93STycho Nightingale return ((cr4 | cr4_ones_mask) & ~cr4_zeros_mask); 368bf21cd93STycho Nightingale } 369bf21cd93STycho Nightingale 370bf0dcd3fSPatrick Mooney /* 371bf0dcd3fSPatrick Mooney * Given a live (VMCS-active) cr4 value, and its shadow counterpart, calculate 372bf0dcd3fSPatrick Mooney * the value observable from the guest. 373bf0dcd3fSPatrick Mooney */ 374bf0dcd3fSPatrick Mooney static ulong_t 375bf0dcd3fSPatrick Mooney vmx_unshadow_cr4(uint64_t cr4, uint64_t shadow) 376bf0dcd3fSPatrick Mooney { 377bf0dcd3fSPatrick Mooney return ((cr4 & ~cr4_ones_mask) | 378bf0dcd3fSPatrick Mooney (shadow & (cr4_zeros_mask | cr4_ones_mask))); 379bf0dcd3fSPatrick Mooney } 380bf0dcd3fSPatrick Mooney 381bf21cd93STycho Nightingale static void 382bf21cd93STycho Nightingale vpid_free(int vpid) 383bf21cd93STycho Nightingale { 384bf21cd93STycho Nightingale if (vpid < 0 || vpid > 0xffff) 385bf21cd93STycho Nightingale panic("vpid_free: invalid vpid %d", vpid); 386bf21cd93STycho Nightingale 387bf21cd93STycho Nightingale /* 388bf21cd93STycho Nightingale * VPIDs [0,VM_MAXCPU] are special and are not allocated from 389bf21cd93STycho Nightingale * the unit number allocator. 390bf21cd93STycho Nightingale */ 391bf21cd93STycho Nightingale 392bf21cd93STycho Nightingale if (vpid > VM_MAXCPU) 3934c87aefeSPatrick Mooney hma_vmx_vpid_free((uint16_t)vpid); 394bf21cd93STycho Nightingale } 395bf21cd93STycho Nightingale 396bf21cd93STycho Nightingale static void 397bf21cd93STycho Nightingale vpid_alloc(uint16_t *vpid, int num) 398bf21cd93STycho Nightingale { 399bf21cd93STycho Nightingale int i, x; 400bf21cd93STycho Nightingale 401bf21cd93STycho Nightingale if (num <= 0 || num > VM_MAXCPU) 402bf21cd93STycho Nightingale panic("invalid number of vpids requested: %d", num); 403bf21cd93STycho Nightingale 404bf21cd93STycho Nightingale /* 405bf21cd93STycho Nightingale * If the "enable vpid" execution control is not enabled then the 406bf21cd93STycho Nightingale * VPID is required to be 0 for all vcpus. 407bf21cd93STycho Nightingale */ 408bf21cd93STycho Nightingale if ((procbased_ctls2 & PROCBASED2_ENABLE_VPID) == 0) { 409bf21cd93STycho Nightingale for (i = 0; i < num; i++) 410bf21cd93STycho Nightingale vpid[i] = 0; 411bf21cd93STycho Nightingale return; 412bf21cd93STycho Nightingale } 413bf21cd93STycho Nightingale 414bf21cd93STycho Nightingale /* 415bf21cd93STycho Nightingale * Allocate a unique VPID for each vcpu from the unit number allocator. 416bf21cd93STycho Nightingale */ 417bf21cd93STycho Nightingale for (i = 0; i < num; i++) { 4184c87aefeSPatrick Mooney uint16_t tmp; 4194c87aefeSPatrick Mooney 4204c87aefeSPatrick Mooney tmp = hma_vmx_vpid_alloc(); 4214c87aefeSPatrick Mooney x = (tmp == 0) ? -1 : tmp; 422f703164bSPatrick Mooney 423bf21cd93STycho Nightingale if (x == -1) 424bf21cd93STycho Nightingale break; 425bf21cd93STycho Nightingale else 426bf21cd93STycho Nightingale vpid[i] = x; 427bf21cd93STycho Nightingale } 428bf21cd93STycho Nightingale 429bf21cd93STycho Nightingale if (i < num) { 430bf21cd93STycho Nightingale atomic_add_int(&vpid_alloc_failed, 1); 431bf21cd93STycho Nightingale 432bf21cd93STycho Nightingale /* 433bf21cd93STycho Nightingale * If the unit number allocator does not have enough unique 434bf21cd93STycho Nightingale * VPIDs then we need to allocate from the [1,VM_MAXCPU] range. 435bf21cd93STycho Nightingale * 436bf21cd93STycho Nightingale * These VPIDs are not be unique across VMs but this does not 437bf21cd93STycho Nightingale * affect correctness because the combined mappings are also 438bf21cd93STycho Nightingale * tagged with the EP4TA which is unique for each VM. 439bf21cd93STycho Nightingale * 440bf21cd93STycho Nightingale * It is still sub-optimal because the invvpid will invalidate 441bf21cd93STycho Nightingale * combined mappings for a particular VPID across all EP4TAs. 442bf21cd93STycho Nightingale */ 443bf21cd93STycho Nightingale while (i-- > 0) 444bf21cd93STycho Nightingale vpid_free(vpid[i]); 445bf21cd93STycho Nightingale 446bf21cd93STycho Nightingale for (i = 0; i < num; i++) 447bf21cd93STycho Nightingale vpid[i] = i + 1; 448bf21cd93STycho Nightingale } 449bf21cd93STycho Nightingale } 450bf21cd93STycho Nightingale 451bf21cd93STycho Nightingale static int 4524c87aefeSPatrick Mooney vmx_cleanup(void) 453bf21cd93STycho Nightingale { 4544c87aefeSPatrick Mooney /* This is taken care of by the hma registration */ 4554c87aefeSPatrick Mooney return (0); 4564c87aefeSPatrick Mooney } 4574c87aefeSPatrick Mooney 4584c87aefeSPatrick Mooney static void 4594c87aefeSPatrick Mooney vmx_restore(void) 4604c87aefeSPatrick Mooney { 4614c87aefeSPatrick Mooney /* No-op on illumos */ 4624c87aefeSPatrick Mooney } 4634c87aefeSPatrick Mooney 4644c87aefeSPatrick Mooney static int 4650153d828SPatrick Mooney vmx_init(void) 4664c87aefeSPatrick Mooney { 467154972afSPatrick Mooney int error; 4684c87aefeSPatrick Mooney uint64_t fixed0, fixed1; 469c3ae3afaSPatrick Mooney uint32_t tmp; 470c3ae3afaSPatrick Mooney enum vmx_caps avail_caps = VMX_CAP_NONE; 4714c87aefeSPatrick Mooney 472bf21cd93STycho Nightingale /* Check support for primary processor-based VM-execution controls */ 473bf21cd93STycho Nightingale error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 4742699b94cSPatrick Mooney MSR_VMX_TRUE_PROCBASED_CTLS, 4752699b94cSPatrick Mooney PROCBASED_CTLS_ONE_SETTING, 4762699b94cSPatrick Mooney PROCBASED_CTLS_ZERO_SETTING, &procbased_ctls); 477bf21cd93STycho Nightingale if (error) { 478bf21cd93STycho Nightingale printf("vmx_init: processor does not support desired primary " 4792699b94cSPatrick Mooney "processor-based controls\n"); 480bf21cd93STycho Nightingale return (error); 481bf21cd93STycho Nightingale } 482bf21cd93STycho Nightingale 483bf21cd93STycho Nightingale /* Clear the processor-based ctl bits that are set on demand */ 484bf21cd93STycho Nightingale procbased_ctls &= ~PROCBASED_CTLS_WINDOW_SETTING; 485bf21cd93STycho Nightingale 486bf21cd93STycho Nightingale /* Check support for secondary processor-based VM-execution controls */ 487bf21cd93STycho Nightingale error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 4882699b94cSPatrick Mooney MSR_VMX_PROCBASED_CTLS2, 4892699b94cSPatrick Mooney PROCBASED_CTLS2_ONE_SETTING, 4902699b94cSPatrick Mooney PROCBASED_CTLS2_ZERO_SETTING, &procbased_ctls2); 491bf21cd93STycho Nightingale if (error) { 492bf21cd93STycho Nightingale printf("vmx_init: processor does not support desired secondary " 4932699b94cSPatrick Mooney "processor-based controls\n"); 494bf21cd93STycho Nightingale return (error); 495bf21cd93STycho Nightingale } 496bf21cd93STycho Nightingale 497bf21cd93STycho Nightingale /* Check support for VPID */ 4982699b94cSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 4992699b94cSPatrick Mooney MSR_VMX_PROCBASED_CTLS2, 5002699b94cSPatrick Mooney PROCBASED2_ENABLE_VPID, 5012699b94cSPatrick Mooney 0, &tmp); 502bf21cd93STycho Nightingale if (error == 0) 503bf21cd93STycho Nightingale procbased_ctls2 |= PROCBASED2_ENABLE_VPID; 504bf21cd93STycho Nightingale 505bf21cd93STycho Nightingale /* Check support for pin-based VM-execution controls */ 506bf21cd93STycho Nightingale error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS, 5072699b94cSPatrick Mooney MSR_VMX_TRUE_PINBASED_CTLS, 5082699b94cSPatrick Mooney PINBASED_CTLS_ONE_SETTING, 5092699b94cSPatrick Mooney PINBASED_CTLS_ZERO_SETTING, &pinbased_ctls); 510bf21cd93STycho Nightingale if (error) { 511bf21cd93STycho Nightingale printf("vmx_init: processor does not support desired " 5122699b94cSPatrick Mooney "pin-based controls\n"); 513bf21cd93STycho Nightingale return (error); 514bf21cd93STycho Nightingale } 515bf21cd93STycho Nightingale 516bf21cd93STycho Nightingale /* Check support for VM-exit controls */ 517bf21cd93STycho Nightingale error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS, 5182699b94cSPatrick Mooney VM_EXIT_CTLS_ONE_SETTING, 5192699b94cSPatrick Mooney VM_EXIT_CTLS_ZERO_SETTING, 5202699b94cSPatrick Mooney &exit_ctls); 521bf21cd93STycho Nightingale if (error) { 522bf21cd93STycho Nightingale printf("vmx_init: processor does not support desired " 523bf21cd93STycho Nightingale "exit controls\n"); 524bf21cd93STycho Nightingale return (error); 525bf21cd93STycho Nightingale } 526bf21cd93STycho Nightingale 527bf21cd93STycho Nightingale /* Check support for VM-entry controls */ 528bf21cd93STycho Nightingale error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS, 529bf21cd93STycho Nightingale VM_ENTRY_CTLS_ONE_SETTING, VM_ENTRY_CTLS_ZERO_SETTING, 530bf21cd93STycho Nightingale &entry_ctls); 531bf21cd93STycho Nightingale if (error) { 532bf21cd93STycho Nightingale printf("vmx_init: processor does not support desired " 533bf21cd93STycho Nightingale "entry controls\n"); 534bf21cd93STycho Nightingale return (error); 535bf21cd93STycho Nightingale } 536bf21cd93STycho Nightingale 537bf21cd93STycho Nightingale /* 538bf21cd93STycho Nightingale * Check support for optional features by testing them 539bf21cd93STycho Nightingale * as individual bits 540bf21cd93STycho Nightingale */ 541bf21cd93STycho Nightingale cap_halt_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 5422699b94cSPatrick Mooney MSR_VMX_TRUE_PROCBASED_CTLS, 5432699b94cSPatrick Mooney PROCBASED_HLT_EXITING, 0, 5442699b94cSPatrick Mooney &tmp) == 0); 545bf21cd93STycho Nightingale 546bf21cd93STycho Nightingale cap_monitor_trap = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 5472699b94cSPatrick Mooney MSR_VMX_PROCBASED_CTLS, 5482699b94cSPatrick Mooney PROCBASED_MTF, 0, 5492699b94cSPatrick Mooney &tmp) == 0); 550bf21cd93STycho Nightingale 551bf21cd93STycho Nightingale cap_pause_exit = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 5522699b94cSPatrick Mooney MSR_VMX_TRUE_PROCBASED_CTLS, 5532699b94cSPatrick Mooney PROCBASED_PAUSE_EXITING, 0, 5542699b94cSPatrick Mooney &tmp) == 0); 555bf21cd93STycho Nightingale 5564c87aefeSPatrick Mooney cap_invpcid = (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 5574c87aefeSPatrick Mooney MSR_VMX_PROCBASED_CTLS2, PROCBASED2_ENABLE_INVPCID, 0, 5584c87aefeSPatrick Mooney &tmp) == 0); 5594c87aefeSPatrick Mooney 5602699b94cSPatrick Mooney /* 5612699b94cSPatrick Mooney * Check for APIC virtualization capabilities: 562c3ae3afaSPatrick Mooney * - TPR shadowing 563c3ae3afaSPatrick Mooney * - Full APICv (with or without x2APIC support) 564c3ae3afaSPatrick Mooney * - Posted interrupt handling 565154972afSPatrick Mooney */ 566c3ae3afaSPatrick Mooney if (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, MSR_VMX_TRUE_PROCBASED_CTLS, 567c3ae3afaSPatrick Mooney PROCBASED_USE_TPR_SHADOW, 0, &tmp) == 0) { 568c3ae3afaSPatrick Mooney avail_caps |= VMX_CAP_TPR_SHADOW; 569c3ae3afaSPatrick Mooney 570c3ae3afaSPatrick Mooney const uint32_t apicv_bits = 571c3ae3afaSPatrick Mooney PROCBASED2_VIRTUALIZE_APIC_ACCESSES | 572c3ae3afaSPatrick Mooney PROCBASED2_APIC_REGISTER_VIRTUALIZATION | 573c3ae3afaSPatrick Mooney PROCBASED2_VIRTUALIZE_X2APIC_MODE | 574c3ae3afaSPatrick Mooney PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY; 575c3ae3afaSPatrick Mooney if (vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 576c3ae3afaSPatrick Mooney MSR_VMX_PROCBASED_CTLS2, apicv_bits, 0, &tmp) == 0) { 577c3ae3afaSPatrick Mooney avail_caps |= VMX_CAP_APICV; 578154972afSPatrick Mooney 5794c87aefeSPatrick Mooney /* 580c3ae3afaSPatrick Mooney * It may make sense in the future to differentiate 581c3ae3afaSPatrick Mooney * hardware (or software) configurations with APICv but 582c3ae3afaSPatrick Mooney * no support for accelerating x2APIC mode. 5834c87aefeSPatrick Mooney */ 584c3ae3afaSPatrick Mooney avail_caps |= VMX_CAP_APICV_X2APIC; 585c3ae3afaSPatrick Mooney 586c3ae3afaSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS, 587c3ae3afaSPatrick Mooney MSR_VMX_TRUE_PINBASED_CTLS, 588c3ae3afaSPatrick Mooney PINBASED_POSTED_INTERRUPT, 0, &tmp); 589c3ae3afaSPatrick Mooney if (error == 0) { 590c3ae3afaSPatrick Mooney /* 591c3ae3afaSPatrick Mooney * If the PSM-provided interfaces for requesting 592c3ae3afaSPatrick Mooney * and using a PIR IPI vector are present, use 593c3ae3afaSPatrick Mooney * them for posted interrupts. 594c3ae3afaSPatrick Mooney */ 595c3ae3afaSPatrick Mooney if (psm_get_pir_ipivect != NULL && 596c3ae3afaSPatrick Mooney psm_send_pir_ipi != NULL) { 597c3ae3afaSPatrick Mooney pirvec = psm_get_pir_ipivect(); 598c3ae3afaSPatrick Mooney avail_caps |= VMX_CAP_APICV_PIR; 599c3ae3afaSPatrick Mooney } 6004c87aefeSPatrick Mooney } 6014c87aefeSPatrick Mooney } 6024c87aefeSPatrick Mooney } 6034c87aefeSPatrick Mooney 6040153d828SPatrick Mooney /* 6050153d828SPatrick Mooney * Check for necessary EPT capabilities 6060153d828SPatrick Mooney * 6070153d828SPatrick Mooney * TODO: Properly handle when IA32_VMX_EPT_VPID_HW_AD is missing and the 6080153d828SPatrick Mooney * hypervisor intends to utilize dirty page tracking. 6090153d828SPatrick Mooney */ 6100153d828SPatrick Mooney uint64_t ept_caps = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 6110153d828SPatrick Mooney if ((ept_caps & EPT_CAPS_REQUIRED) != EPT_CAPS_REQUIRED) { 6120153d828SPatrick Mooney cmn_err(CE_WARN, "!Inadequate EPT capabilities: %lx", ept_caps); 6130153d828SPatrick Mooney return (EINVAL); 614bf21cd93STycho Nightingale } 615bf21cd93STycho Nightingale 6164c87aefeSPatrick Mooney #ifdef __FreeBSD__ 6174c87aefeSPatrick Mooney guest_l1d_flush = (cpu_ia32_arch_caps & 6184c87aefeSPatrick Mooney IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY) == 0; 6194c87aefeSPatrick Mooney TUNABLE_INT_FETCH("hw.vmm.l1d_flush", &guest_l1d_flush); 6204c87aefeSPatrick Mooney 6214c87aefeSPatrick Mooney /* 6224c87aefeSPatrick Mooney * L1D cache flush is enabled. Use IA32_FLUSH_CMD MSR when 6234c87aefeSPatrick Mooney * available. Otherwise fall back to the software flush 6244c87aefeSPatrick Mooney * method which loads enough data from the kernel text to 6254c87aefeSPatrick Mooney * flush existing L1D content, both on VMX entry and on NMI 6264c87aefeSPatrick Mooney * return. 6274c87aefeSPatrick Mooney */ 6284c87aefeSPatrick Mooney if (guest_l1d_flush) { 6294c87aefeSPatrick Mooney if ((cpu_stdext_feature3 & CPUID_STDEXT3_L1D_FLUSH) == 0) { 6304c87aefeSPatrick Mooney guest_l1d_flush_sw = 1; 6314c87aefeSPatrick Mooney TUNABLE_INT_FETCH("hw.vmm.l1d_flush_sw", 6324c87aefeSPatrick Mooney &guest_l1d_flush_sw); 6334c87aefeSPatrick Mooney } 6344c87aefeSPatrick Mooney if (guest_l1d_flush_sw) { 6354c87aefeSPatrick Mooney if (nmi_flush_l1d_sw <= 1) 6364c87aefeSPatrick Mooney nmi_flush_l1d_sw = 1; 6374c87aefeSPatrick Mooney } else { 6384c87aefeSPatrick Mooney msr_load_list[0].index = MSR_IA32_FLUSH_CMD; 6394c87aefeSPatrick Mooney msr_load_list[0].val = IA32_FLUSH_CMD_L1D; 6404c87aefeSPatrick Mooney } 6414c87aefeSPatrick Mooney } 6424c87aefeSPatrick Mooney #else 6434c87aefeSPatrick Mooney /* L1D flushing is taken care of by smt_acquire() and friends */ 6444c87aefeSPatrick Mooney guest_l1d_flush = 0; 6454c87aefeSPatrick Mooney #endif /* __FreeBSD__ */ 6464c87aefeSPatrick Mooney 647bf21cd93STycho Nightingale /* 648bf21cd93STycho Nightingale * Stash the cr0 and cr4 bits that must be fixed to 0 or 1 649bf21cd93STycho Nightingale */ 650bf21cd93STycho Nightingale fixed0 = rdmsr(MSR_VMX_CR0_FIXED0); 651bf21cd93STycho Nightingale fixed1 = rdmsr(MSR_VMX_CR0_FIXED1); 652bf21cd93STycho Nightingale cr0_ones_mask = fixed0 & fixed1; 653bf21cd93STycho Nightingale cr0_zeros_mask = ~fixed0 & ~fixed1; 654bf21cd93STycho Nightingale 655bf21cd93STycho Nightingale /* 656c3ae3afaSPatrick Mooney * Since Unrestricted Guest was already verified present, CR0_PE and 657c3ae3afaSPatrick Mooney * CR0_PG are allowed to be set to zero in VMX non-root operation 658bf21cd93STycho Nightingale */ 659c3ae3afaSPatrick Mooney cr0_ones_mask &= ~(CR0_PG | CR0_PE); 660bf21cd93STycho Nightingale 661bf21cd93STycho Nightingale /* 662bf21cd93STycho Nightingale * Do not allow the guest to set CR0_NW or CR0_CD. 663bf21cd93STycho Nightingale */ 664bf21cd93STycho Nightingale cr0_zeros_mask |= (CR0_NW | CR0_CD); 665bf21cd93STycho Nightingale 666bf21cd93STycho Nightingale fixed0 = rdmsr(MSR_VMX_CR4_FIXED0); 667bf21cd93STycho Nightingale fixed1 = rdmsr(MSR_VMX_CR4_FIXED1); 668bf21cd93STycho Nightingale cr4_ones_mask = fixed0 & fixed1; 669bf21cd93STycho Nightingale cr4_zeros_mask = ~fixed0 & ~fixed1; 670bf21cd93STycho Nightingale 671bf21cd93STycho Nightingale vmx_msr_init(); 672bf21cd93STycho Nightingale 673c3ae3afaSPatrick Mooney vmx_capabilities = avail_caps; 674bf21cd93STycho Nightingale vmx_initialized = 1; 675bf21cd93STycho Nightingale 676bf21cd93STycho Nightingale return (0); 677bf21cd93STycho Nightingale } 678bf21cd93STycho Nightingale 6794c87aefeSPatrick Mooney static void 6804c87aefeSPatrick Mooney vmx_trigger_hostintr(int vector) 6814c87aefeSPatrick Mooney { 6824c87aefeSPatrick Mooney VERIFY(vector >= 32 && vector <= 255); 6834c87aefeSPatrick Mooney vmx_call_isr(vector - 32); 6844c87aefeSPatrick Mooney } 6854c87aefeSPatrick Mooney 686bf21cd93STycho Nightingale static void * 6870153d828SPatrick Mooney vmx_vminit(struct vm *vm) 688bf21cd93STycho Nightingale { 689bf21cd93STycho Nightingale uint16_t vpid[VM_MAXCPU]; 690007ca332SPatrick Mooney int i, error, datasel; 691bf21cd93STycho Nightingale struct vmx *vmx; 6924c87aefeSPatrick Mooney uint32_t exc_bitmap; 6934c87aefeSPatrick Mooney uint16_t maxcpus; 694c3ae3afaSPatrick Mooney uint32_t proc_ctls, proc2_ctls, pin_ctls; 6956b641d7aSPatrick Mooney uint64_t apic_access_pa = UINT64_MAX; 696bf21cd93STycho Nightingale 6972699b94cSPatrick Mooney vmx = malloc(sizeof (struct vmx), M_VMX, M_WAITOK | M_ZERO); 698bf21cd93STycho Nightingale if ((uintptr_t)vmx & PAGE_MASK) { 699bf21cd93STycho Nightingale panic("malloc of struct vmx not aligned on %d byte boundary", 7002699b94cSPatrick Mooney PAGE_SIZE); 701bf21cd93STycho Nightingale } 702bf21cd93STycho Nightingale vmx->vm = vm; 703bf21cd93STycho Nightingale 7040153d828SPatrick Mooney vmx->eptp = vmspace_table_root(vm_get_vmspace(vm)); 7054c87aefeSPatrick Mooney 706bf21cd93STycho Nightingale /* 707d1c02647SPatrick Mooney * Clean up EP4TA-tagged guest-physical and combined mappings 708bf21cd93STycho Nightingale * 709bf21cd93STycho Nightingale * VMX transitions are not required to invalidate any guest physical 710bf21cd93STycho Nightingale * mappings. So, it may be possible for stale guest physical mappings 711bf21cd93STycho Nightingale * to be present in the processor TLBs. 712bf21cd93STycho Nightingale * 713bf21cd93STycho Nightingale * Combined mappings for this EP4TA are also invalidated for all VPIDs. 714bf21cd93STycho Nightingale */ 7150153d828SPatrick Mooney hma_vmx_invept_allcpus((uintptr_t)vmx->eptp); 716bf21cd93STycho Nightingale 7176b641d7aSPatrick Mooney vmx_msr_bitmap_initialize(vmx); 718bf21cd93STycho Nightingale 719bf21cd93STycho Nightingale vpid_alloc(vpid, VM_MAXCPU); 720bf21cd93STycho Nightingale 721c3ae3afaSPatrick Mooney /* Grab the established defaults */ 722c3ae3afaSPatrick Mooney proc_ctls = procbased_ctls; 723c3ae3afaSPatrick Mooney proc2_ctls = procbased_ctls2; 724c3ae3afaSPatrick Mooney pin_ctls = pinbased_ctls; 725c3ae3afaSPatrick Mooney /* For now, default to the available capabilities */ 726c3ae3afaSPatrick Mooney vmx->vmx_caps = vmx_capabilities; 727c3ae3afaSPatrick Mooney 728c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) { 729c3ae3afaSPatrick Mooney proc_ctls |= PROCBASED_USE_TPR_SHADOW; 730c3ae3afaSPatrick Mooney proc_ctls &= ~PROCBASED_CR8_LOAD_EXITING; 731c3ae3afaSPatrick Mooney proc_ctls &= ~PROCBASED_CR8_STORE_EXITING; 732c3ae3afaSPatrick Mooney } 733c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV)) { 734c3ae3afaSPatrick Mooney ASSERT(vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)); 735c3ae3afaSPatrick Mooney 736c3ae3afaSPatrick Mooney proc2_ctls |= (PROCBASED2_VIRTUALIZE_APIC_ACCESSES | 737c3ae3afaSPatrick Mooney PROCBASED2_APIC_REGISTER_VIRTUALIZATION | 738c3ae3afaSPatrick Mooney PROCBASED2_VIRTUAL_INTERRUPT_DELIVERY); 739c3ae3afaSPatrick Mooney 7406b641d7aSPatrick Mooney /* 7416b641d7aSPatrick Mooney * Allocate a page of memory to back the APIC access address for 7426b641d7aSPatrick Mooney * when APICv features are in use. Guest MMIO accesses should 7436b641d7aSPatrick Mooney * never actually reach this page, but rather be intercepted. 7446b641d7aSPatrick Mooney */ 7456b641d7aSPatrick Mooney vmx->apic_access_page = kmem_zalloc(PAGESIZE, KM_SLEEP); 7466b641d7aSPatrick Mooney VERIFY3U((uintptr_t)vmx->apic_access_page & PAGEOFFSET, ==, 0); 7476b641d7aSPatrick Mooney apic_access_pa = vtophys(vmx->apic_access_page); 7486b641d7aSPatrick Mooney 7494c87aefeSPatrick Mooney error = vm_map_mmio(vm, DEFAULT_APIC_BASE, PAGE_SIZE, 7506b641d7aSPatrick Mooney apic_access_pa); 7514c87aefeSPatrick Mooney /* XXX this should really return an error to the caller */ 7524c87aefeSPatrick Mooney KASSERT(error == 0, ("vm_map_mmio(apicbase) error %d", error)); 7534c87aefeSPatrick Mooney } 754c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) { 755c3ae3afaSPatrick Mooney ASSERT(vmx_cap_en(vmx, VMX_CAP_APICV)); 756c3ae3afaSPatrick Mooney 757c3ae3afaSPatrick Mooney pin_ctls |= PINBASED_POSTED_INTERRUPT; 758c3ae3afaSPatrick Mooney } 7594c87aefeSPatrick Mooney 7604c87aefeSPatrick Mooney maxcpus = vm_get_maxcpus(vm); 761007ca332SPatrick Mooney datasel = vmm_get_host_datasel(); 7624c87aefeSPatrick Mooney for (i = 0; i < maxcpus; i++) { 7634c87aefeSPatrick Mooney /* 7644c87aefeSPatrick Mooney * Cache physical address lookups for various components which 7654c87aefeSPatrick Mooney * may be required inside the critical_enter() section implied 7664c87aefeSPatrick Mooney * by VMPTRLD() below. 7674c87aefeSPatrick Mooney */ 7686b641d7aSPatrick Mooney vm_paddr_t msr_bitmap_pa = vtophys(vmx->msr_bitmap[i]); 7694c87aefeSPatrick Mooney vm_paddr_t apic_page_pa = vtophys(&vmx->apic_page[i]); 7704c87aefeSPatrick Mooney vm_paddr_t pir_desc_pa = vtophys(&vmx->pir_desc[i]); 7714c87aefeSPatrick Mooney 772007ca332SPatrick Mooney vmx->vmcs_pa[i] = (uintptr_t)vtophys(&vmx->vmcs[i]); 773007ca332SPatrick Mooney vmcs_initialize(&vmx->vmcs[i], vmx->vmcs_pa[i]); 774bf21cd93STycho Nightingale 775bf21cd93STycho Nightingale vmx_msr_guest_init(vmx, i); 776bf21cd93STycho Nightingale 777007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[i]); 778bf21cd93STycho Nightingale 779007ca332SPatrick Mooney vmcs_write(VMCS_HOST_IA32_PAT, vmm_get_host_pat()); 780007ca332SPatrick Mooney vmcs_write(VMCS_HOST_IA32_EFER, vmm_get_host_efer()); 781007ca332SPatrick Mooney 782007ca332SPatrick Mooney /* Load the control registers */ 783007ca332SPatrick Mooney vmcs_write(VMCS_HOST_CR0, vmm_get_host_cr0()); 784007ca332SPatrick Mooney vmcs_write(VMCS_HOST_CR4, vmm_get_host_cr4() | CR4_VMXE); 785007ca332SPatrick Mooney 786007ca332SPatrick Mooney /* Load the segment selectors */ 787007ca332SPatrick Mooney vmcs_write(VMCS_HOST_CS_SELECTOR, vmm_get_host_codesel()); 788007ca332SPatrick Mooney 789007ca332SPatrick Mooney vmcs_write(VMCS_HOST_ES_SELECTOR, datasel); 790007ca332SPatrick Mooney vmcs_write(VMCS_HOST_SS_SELECTOR, datasel); 791007ca332SPatrick Mooney vmcs_write(VMCS_HOST_DS_SELECTOR, datasel); 792007ca332SPatrick Mooney 793007ca332SPatrick Mooney vmcs_write(VMCS_HOST_FS_SELECTOR, vmm_get_host_fssel()); 794007ca332SPatrick Mooney vmcs_write(VMCS_HOST_GS_SELECTOR, vmm_get_host_gssel()); 795007ca332SPatrick Mooney vmcs_write(VMCS_HOST_TR_SELECTOR, vmm_get_host_tsssel()); 796007ca332SPatrick Mooney 797007ca332SPatrick Mooney /* 798007ca332SPatrick Mooney * Configure host sysenter MSRs to be restored on VM exit. 7992699b94cSPatrick Mooney * The thread-specific MSR_INTC_SEP_ESP value is loaded in 8002699b94cSPatrick Mooney * vmx_run. 801007ca332SPatrick Mooney */ 802007ca332SPatrick Mooney vmcs_write(VMCS_HOST_IA32_SYSENTER_CS, KCS_SEL); 803007ca332SPatrick Mooney vmcs_write(VMCS_HOST_IA32_SYSENTER_EIP, 804007ca332SPatrick Mooney rdmsr(MSR_SYSENTER_EIP_MSR)); 805007ca332SPatrick Mooney 806007ca332SPatrick Mooney /* instruction pointer */ 807007ca332SPatrick Mooney if (no_flush_rsb) { 808007ca332SPatrick Mooney vmcs_write(VMCS_HOST_RIP, (uint64_t)vmx_exit_guest); 809007ca332SPatrick Mooney } else { 810007ca332SPatrick Mooney vmcs_write(VMCS_HOST_RIP, 811007ca332SPatrick Mooney (uint64_t)vmx_exit_guest_flush_rsb); 812007ca332SPatrick Mooney } 813c3ae3afaSPatrick Mooney 814007ca332SPatrick Mooney /* link pointer */ 815007ca332SPatrick Mooney vmcs_write(VMCS_LINK_POINTER, ~0); 816007ca332SPatrick Mooney 817007ca332SPatrick Mooney vmcs_write(VMCS_EPTP, vmx->eptp); 818007ca332SPatrick Mooney vmcs_write(VMCS_PIN_BASED_CTLS, pin_ctls); 819007ca332SPatrick Mooney vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); 820007ca332SPatrick Mooney vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc2_ctls); 821007ca332SPatrick Mooney vmcs_write(VMCS_EXIT_CTLS, exit_ctls); 822007ca332SPatrick Mooney vmcs_write(VMCS_ENTRY_CTLS, entry_ctls); 823007ca332SPatrick Mooney vmcs_write(VMCS_MSR_BITMAP, msr_bitmap_pa); 824007ca332SPatrick Mooney vmcs_write(VMCS_VPID, vpid[i]); 8254c87aefeSPatrick Mooney 8264c87aefeSPatrick Mooney if (guest_l1d_flush && !guest_l1d_flush_sw) { 8270153d828SPatrick Mooney vmcs_write(VMCS_ENTRY_MSR_LOAD, 8280153d828SPatrick Mooney vtophys(&msr_load_list[0])); 8294c87aefeSPatrick Mooney vmcs_write(VMCS_ENTRY_MSR_LOAD_COUNT, 8304c87aefeSPatrick Mooney nitems(msr_load_list)); 8314c87aefeSPatrick Mooney vmcs_write(VMCS_EXIT_MSR_STORE, 0); 8324c87aefeSPatrick Mooney vmcs_write(VMCS_EXIT_MSR_STORE_COUNT, 0); 8334c87aefeSPatrick Mooney } 834bf21cd93STycho Nightingale 8354c87aefeSPatrick Mooney /* exception bitmap */ 8364c87aefeSPatrick Mooney if (vcpu_trace_exceptions(vm, i)) 8374c87aefeSPatrick Mooney exc_bitmap = 0xffffffff; 8384c87aefeSPatrick Mooney else 8394c87aefeSPatrick Mooney exc_bitmap = 1 << IDT_MC; 840007ca332SPatrick Mooney vmcs_write(VMCS_EXCEPTION_BITMAP, exc_bitmap); 841bf21cd93STycho Nightingale 8424c87aefeSPatrick Mooney vmx->ctx[i].guest_dr6 = DBREG_DR6_RESERVED1; 843007ca332SPatrick Mooney vmcs_write(VMCS_GUEST_DR7, DBREG_DR7_RESERVED1); 844bf21cd93STycho Nightingale 845c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) { 846007ca332SPatrick Mooney vmcs_write(VMCS_VIRTUAL_APIC, apic_page_pa); 847154972afSPatrick Mooney } 848154972afSPatrick Mooney 849c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV)) { 8506b641d7aSPatrick Mooney vmcs_write(VMCS_APIC_ACCESS, apic_access_pa); 851007ca332SPatrick Mooney vmcs_write(VMCS_EOI_EXIT0, 0); 852007ca332SPatrick Mooney vmcs_write(VMCS_EOI_EXIT1, 0); 853007ca332SPatrick Mooney vmcs_write(VMCS_EOI_EXIT2, 0); 854007ca332SPatrick Mooney vmcs_write(VMCS_EOI_EXIT3, 0); 8554c87aefeSPatrick Mooney } 856c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) { 857007ca332SPatrick Mooney vmcs_write(VMCS_PIR_VECTOR, pirvec); 858007ca332SPatrick Mooney vmcs_write(VMCS_PIR_DESC, pir_desc_pa); 8594c87aefeSPatrick Mooney } 860007ca332SPatrick Mooney 861007ca332SPatrick Mooney /* 862007ca332SPatrick Mooney * Set up the CR0/4 masks and configure the read shadow state 863007ca332SPatrick Mooney * to the power-on register value from the Intel Sys Arch. 864007ca332SPatrick Mooney * CR0 - 0x60000010 865007ca332SPatrick Mooney * CR4 - 0 866007ca332SPatrick Mooney */ 867007ca332SPatrick Mooney vmcs_write(VMCS_CR0_MASK, cr0_ones_mask | cr0_zeros_mask); 868007ca332SPatrick Mooney vmcs_write(VMCS_CR0_SHADOW, 0x60000010); 869007ca332SPatrick Mooney vmcs_write(VMCS_CR4_MASK, cr4_ones_mask | cr4_zeros_mask); 870007ca332SPatrick Mooney vmcs_write(VMCS_CR4_SHADOW, 0); 871007ca332SPatrick Mooney 872007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[i]); 873bf21cd93STycho Nightingale 8744c87aefeSPatrick Mooney vmx->cap[i].set = 0; 875c3ae3afaSPatrick Mooney vmx->cap[i].proc_ctls = proc_ctls; 876c3ae3afaSPatrick Mooney vmx->cap[i].proc_ctls2 = proc2_ctls; 877154972afSPatrick Mooney vmx->cap[i].exc_bitmap = exc_bitmap; 878bf21cd93STycho Nightingale 8794c87aefeSPatrick Mooney vmx->state[i].nextrip = ~0; 8804c87aefeSPatrick Mooney vmx->state[i].lastcpu = NOCPU; 8814c87aefeSPatrick Mooney vmx->state[i].vpid = vpid[i]; 882bf21cd93STycho Nightingale } 883bf21cd93STycho Nightingale 884bf21cd93STycho Nightingale return (vmx); 885bf21cd93STycho Nightingale } 886bf21cd93STycho Nightingale 887bf21cd93STycho Nightingale static int 888bf21cd93STycho Nightingale vmx_handle_cpuid(struct vm *vm, int vcpu, struct vmxctx *vmxctx) 889bf21cd93STycho Nightingale { 8904c87aefeSPatrick Mooney int handled; 891bf21cd93STycho Nightingale 8923c5f2a9dSPatrick Mooney handled = x86_emulate_cpuid(vm, vcpu, (uint64_t *)&vmxctx->guest_rax, 8933c5f2a9dSPatrick Mooney (uint64_t *)&vmxctx->guest_rbx, (uint64_t *)&vmxctx->guest_rcx, 8943c5f2a9dSPatrick Mooney (uint64_t *)&vmxctx->guest_rdx); 895bf21cd93STycho Nightingale return (handled); 896bf21cd93STycho Nightingale } 897bf21cd93STycho Nightingale 8984c87aefeSPatrick Mooney static VMM_STAT_INTEL(VCPU_INVVPID_SAVED, "Number of vpid invalidations saved"); 8994c87aefeSPatrick Mooney static VMM_STAT_INTEL(VCPU_INVVPID_DONE, "Number of vpid invalidations done"); 9004c87aefeSPatrick Mooney 901007ca332SPatrick Mooney #define INVVPID_TYPE_ADDRESS 0UL 902007ca332SPatrick Mooney #define INVVPID_TYPE_SINGLE_CONTEXT 1UL 903007ca332SPatrick Mooney #define INVVPID_TYPE_ALL_CONTEXTS 2UL 904007ca332SPatrick Mooney 905007ca332SPatrick Mooney struct invvpid_desc { 906007ca332SPatrick Mooney uint16_t vpid; 907007ca332SPatrick Mooney uint16_t _res1; 908007ca332SPatrick Mooney uint32_t _res2; 909007ca332SPatrick Mooney uint64_t linear_addr; 910007ca332SPatrick Mooney }; 9112699b94cSPatrick Mooney CTASSERT(sizeof (struct invvpid_desc) == 16); 912007ca332SPatrick Mooney 913007ca332SPatrick Mooney static __inline void 914007ca332SPatrick Mooney invvpid(uint64_t type, struct invvpid_desc desc) 915007ca332SPatrick Mooney { 916007ca332SPatrick Mooney int error; 917007ca332SPatrick Mooney 91870ae9a33SPatrick Mooney DTRACE_PROBE3(vmx__invvpid, uint64_t, type, uint16_t, desc.vpid, 91970ae9a33SPatrick Mooney uint64_t, desc.linear_addr); 92070ae9a33SPatrick Mooney 921007ca332SPatrick Mooney __asm __volatile("invvpid %[desc], %[type];" 922007ca332SPatrick Mooney VMX_SET_ERROR_CODE_ASM 923007ca332SPatrick Mooney : [error] "=r" (error) 924007ca332SPatrick Mooney : [desc] "m" (desc), [type] "r" (type) 925007ca332SPatrick Mooney : "memory"); 926007ca332SPatrick Mooney 92770ae9a33SPatrick Mooney if (error) { 928007ca332SPatrick Mooney panic("invvpid error %d", error); 92970ae9a33SPatrick Mooney } 930007ca332SPatrick Mooney } 931007ca332SPatrick Mooney 9324c87aefeSPatrick Mooney /* 933d1c02647SPatrick Mooney * Invalidate guest mappings identified by its VPID from the TLB. 934d1c02647SPatrick Mooney * 935d1c02647SPatrick Mooney * This is effectively a flush of the guest TLB, removing only "combined 936d1c02647SPatrick Mooney * mappings" (to use the VMX parlance). Actions which modify the EPT structures 937d1c02647SPatrick Mooney * for the instance (such as unmapping GPAs) would require an 'invept' flush. 9384c87aefeSPatrick Mooney */ 93970ae9a33SPatrick Mooney static void 9400153d828SPatrick Mooney vmx_invvpid(struct vmx *vmx, int vcpu, int running) 941bf21cd93STycho Nightingale { 942bf21cd93STycho Nightingale struct vmxstate *vmxstate; 9430153d828SPatrick Mooney struct vmspace *vms; 944bf21cd93STycho Nightingale 945bf21cd93STycho Nightingale vmxstate = &vmx->state[vcpu]; 94670ae9a33SPatrick Mooney if (vmxstate->vpid == 0) { 947bf21cd93STycho Nightingale return; 94870ae9a33SPatrick Mooney } 949bf21cd93STycho Nightingale 9504c87aefeSPatrick Mooney if (!running) { 9514c87aefeSPatrick Mooney /* 9524c87aefeSPatrick Mooney * Set the 'lastcpu' to an invalid host cpu. 9534c87aefeSPatrick Mooney * 9544c87aefeSPatrick Mooney * This will invalidate TLB entries tagged with the vcpu's 9554c87aefeSPatrick Mooney * vpid the next time it runs via vmx_set_pcpu_defaults(). 9564c87aefeSPatrick Mooney */ 9574c87aefeSPatrick Mooney vmxstate->lastcpu = NOCPU; 9584c87aefeSPatrick Mooney return; 9594c87aefeSPatrick Mooney } 960bf21cd93STycho Nightingale 961bf21cd93STycho Nightingale /* 9624c87aefeSPatrick Mooney * Invalidate all mappings tagged with 'vpid' 963bf21cd93STycho Nightingale * 964d1c02647SPatrick Mooney * This is done when a vCPU moves between host CPUs, where there may be 965d1c02647SPatrick Mooney * stale TLB entries for this VPID on the target, or if emulated actions 966d1c02647SPatrick Mooney * in the guest CPU have incurred an explicit TLB flush. 967bf21cd93STycho Nightingale */ 96870ae9a33SPatrick Mooney vms = vm_get_vmspace(vmx->vm); 9690153d828SPatrick Mooney if (vmspace_table_gen(vms) == vmx->eptgen[curcpu]) { 97070ae9a33SPatrick Mooney struct invvpid_desc invvpid_desc = { 97170ae9a33SPatrick Mooney .vpid = vmxstate->vpid, 97270ae9a33SPatrick Mooney .linear_addr = 0, 97370ae9a33SPatrick Mooney ._res1 = 0, 97470ae9a33SPatrick Mooney ._res2 = 0, 97570ae9a33SPatrick Mooney }; 97670ae9a33SPatrick Mooney 977bf21cd93STycho Nightingale invvpid(INVVPID_TYPE_SINGLE_CONTEXT, invvpid_desc); 9784c87aefeSPatrick Mooney vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_DONE, 1); 9794c87aefeSPatrick Mooney } else { 9804c87aefeSPatrick Mooney /* 981d1c02647SPatrick Mooney * The INVVPID can be skipped if an INVEPT is going to be 982d1c02647SPatrick Mooney * performed before entering the guest. The INVEPT will 983d1c02647SPatrick Mooney * invalidate combined mappings for the EP4TA associated with 984d1c02647SPatrick Mooney * this guest, in all VPIDs. 9854c87aefeSPatrick Mooney */ 9864c87aefeSPatrick Mooney vmm_stat_incr(vmx->vm, vcpu, VCPU_INVVPID_SAVED, 1); 987bf21cd93STycho Nightingale } 988bf21cd93STycho Nightingale } 989bf21cd93STycho Nightingale 9900153d828SPatrick Mooney static __inline void 9910153d828SPatrick Mooney invept(uint64_t type, uint64_t eptp) 9920153d828SPatrick Mooney { 9930153d828SPatrick Mooney int error; 9940153d828SPatrick Mooney struct invept_desc { 9950153d828SPatrick Mooney uint64_t eptp; 9960153d828SPatrick Mooney uint64_t _resv; 9970153d828SPatrick Mooney } desc = { eptp, 0 }; 9980153d828SPatrick Mooney 99970ae9a33SPatrick Mooney DTRACE_PROBE2(vmx__invept, uint64_t, type, uint64_t, eptp); 100070ae9a33SPatrick Mooney 10010153d828SPatrick Mooney __asm __volatile("invept %[desc], %[type];" 10020153d828SPatrick Mooney VMX_SET_ERROR_CODE_ASM 10030153d828SPatrick Mooney : [error] "=r" (error) 10040153d828SPatrick Mooney : [desc] "m" (desc), [type] "r" (type) 10050153d828SPatrick Mooney : "memory"); 10060153d828SPatrick Mooney 10070153d828SPatrick Mooney if (error != 0) { 10080153d828SPatrick Mooney panic("invvpid error %d", error); 10090153d828SPatrick Mooney } 10100153d828SPatrick Mooney } 10110153d828SPatrick Mooney 10124c87aefeSPatrick Mooney static void 10130153d828SPatrick Mooney vmx_set_pcpu_defaults(struct vmx *vmx, int vcpu) 1014bf21cd93STycho Nightingale { 10154c87aefeSPatrick Mooney struct vmxstate *vmxstate; 1016bf21cd93STycho Nightingale 10174c87aefeSPatrick Mooney /* 10184c87aefeSPatrick Mooney * Regardless of whether the VM appears to have migrated between CPUs, 10194c87aefeSPatrick Mooney * save the host sysenter stack pointer. As it points to the kernel 10204c87aefeSPatrick Mooney * stack of each thread, the correct value must be maintained for every 10214c87aefeSPatrick Mooney * trip into the critical section. 10224c87aefeSPatrick Mooney */ 10234c87aefeSPatrick Mooney vmcs_write(VMCS_HOST_IA32_SYSENTER_ESP, rdmsr(MSR_SYSENTER_ESP_MSR)); 1024bf21cd93STycho Nightingale 10254c87aefeSPatrick Mooney /* 10264c87aefeSPatrick Mooney * Perform any needed TSC_OFFSET adjustment based on TSC_MSR writes or 10274c87aefeSPatrick Mooney * migration between host CPUs with differing TSC values. 10284c87aefeSPatrick Mooney */ 1029007ca332SPatrick Mooney vmx_apply_tsc_adjust(vmx, vcpu); 10304c87aefeSPatrick Mooney 10314c87aefeSPatrick Mooney vmxstate = &vmx->state[vcpu]; 10324c87aefeSPatrick Mooney if (vmxstate->lastcpu == curcpu) 10334c87aefeSPatrick Mooney return; 10344c87aefeSPatrick Mooney 10354c87aefeSPatrick Mooney vmxstate->lastcpu = curcpu; 10364c87aefeSPatrick Mooney 10374c87aefeSPatrick Mooney vmm_stat_incr(vmx->vm, vcpu, VCPU_MIGRATIONS, 1); 10384c87aefeSPatrick Mooney 10394c87aefeSPatrick Mooney /* Load the per-CPU IDT address */ 10404c87aefeSPatrick Mooney vmcs_write(VMCS_HOST_IDTR_BASE, vmm_get_host_idtrbase()); 10414c87aefeSPatrick Mooney vmcs_write(VMCS_HOST_TR_BASE, vmm_get_host_trbase()); 10424c87aefeSPatrick Mooney vmcs_write(VMCS_HOST_GDTR_BASE, vmm_get_host_gdtrbase()); 10434c87aefeSPatrick Mooney vmcs_write(VMCS_HOST_GS_BASE, vmm_get_host_gsbase()); 10440153d828SPatrick Mooney vmx_invvpid(vmx, vcpu, 1); 10454c87aefeSPatrick Mooney } 10464c87aefeSPatrick Mooney 10474c87aefeSPatrick Mooney /* 10484c87aefeSPatrick Mooney * We depend on 'procbased_ctls' to have the Interrupt Window Exiting bit set. 10494c87aefeSPatrick Mooney */ 10504c87aefeSPatrick Mooney CTASSERT((PROCBASED_CTLS_ONE_SETTING & PROCBASED_INT_WINDOW_EXITING) != 0); 10514c87aefeSPatrick Mooney 10524c87aefeSPatrick Mooney static __inline void 10534c87aefeSPatrick Mooney vmx_set_int_window_exiting(struct vmx *vmx, int vcpu) 1054bf21cd93STycho Nightingale { 1055bf21cd93STycho Nightingale if ((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) == 0) { 1056*d4f59ae5SPatrick Mooney /* Enable interrupt window exiting */ 1057bf21cd93STycho Nightingale vmx->cap[vcpu].proc_ctls |= PROCBASED_INT_WINDOW_EXITING; 1058bf21cd93STycho Nightingale vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 1059bf21cd93STycho Nightingale } 1060bf21cd93STycho Nightingale } 1061bf21cd93STycho Nightingale 10624c87aefeSPatrick Mooney static __inline void 1063bf21cd93STycho Nightingale vmx_clear_int_window_exiting(struct vmx *vmx, int vcpu) 1064bf21cd93STycho Nightingale { 1065bf21cd93STycho Nightingale KASSERT((vmx->cap[vcpu].proc_ctls & PROCBASED_INT_WINDOW_EXITING) != 0, 10669dc804b9SPatrick Mooney ("intr_window_exiting not set: %x", vmx->cap[vcpu].proc_ctls)); 1067*d4f59ae5SPatrick Mooney 1068*d4f59ae5SPatrick Mooney /* Disable interrupt window exiting */ 1069bf21cd93STycho Nightingale vmx->cap[vcpu].proc_ctls &= ~PROCBASED_INT_WINDOW_EXITING; 1070bf21cd93STycho Nightingale vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 1071bf21cd93STycho Nightingale } 1072bf21cd93STycho Nightingale 1073c74a40a5SPatrick Mooney static __inline bool 1074c74a40a5SPatrick Mooney vmx_nmi_window_exiting(struct vmx *vmx, int vcpu) 1075c74a40a5SPatrick Mooney { 1076c74a40a5SPatrick Mooney return ((vmx->cap[vcpu].proc_ctls & PROCBASED_NMI_WINDOW_EXITING) != 0); 1077c74a40a5SPatrick Mooney } 1078c74a40a5SPatrick Mooney 10794c87aefeSPatrick Mooney static __inline void 1080bf21cd93STycho Nightingale vmx_set_nmi_window_exiting(struct vmx *vmx, int vcpu) 1081bf21cd93STycho Nightingale { 1082c74a40a5SPatrick Mooney if (!vmx_nmi_window_exiting(vmx, vcpu)) { 1083bf21cd93STycho Nightingale vmx->cap[vcpu].proc_ctls |= PROCBASED_NMI_WINDOW_EXITING; 1084bf21cd93STycho Nightingale vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 1085bf21cd93STycho Nightingale } 1086bf21cd93STycho Nightingale } 1087bf21cd93STycho Nightingale 10884c87aefeSPatrick Mooney static __inline void 1089bf21cd93STycho Nightingale vmx_clear_nmi_window_exiting(struct vmx *vmx, int vcpu) 1090bf21cd93STycho Nightingale { 1091c74a40a5SPatrick Mooney ASSERT(vmx_nmi_window_exiting(vmx, vcpu)); 1092bf21cd93STycho Nightingale vmx->cap[vcpu].proc_ctls &= ~PROCBASED_NMI_WINDOW_EXITING; 1093bf21cd93STycho Nightingale vmcs_write(VMCS_PRI_PROC_BASED_CTLS, vmx->cap[vcpu].proc_ctls); 1094bf21cd93STycho Nightingale } 1095bf21cd93STycho Nightingale 10964c87aefeSPatrick Mooney /* 10974c87aefeSPatrick Mooney * Set the TSC adjustment, taking into account the offsets measured between 10984c87aefeSPatrick Mooney * host physical CPUs. This is required even if the guest has not set a TSC 10994c87aefeSPatrick Mooney * offset since vCPUs inherit the TSC offset of whatever physical CPU it has 11004c87aefeSPatrick Mooney * migrated onto. Without this mitigation, un-synched host TSCs will convey 11014c87aefeSPatrick Mooney * the appearance of TSC time-travel to the guest as its vCPUs migrate. 11024c87aefeSPatrick Mooney */ 1103007ca332SPatrick Mooney static void 11044c87aefeSPatrick Mooney vmx_apply_tsc_adjust(struct vmx *vmx, int vcpu) 11054c87aefeSPatrick Mooney { 11069250eb13SPatrick Mooney const uint64_t offset = vcpu_tsc_offset(vmx->vm, vcpu, true); 11074c87aefeSPatrick Mooney 11084c87aefeSPatrick Mooney ASSERT(vmx->cap[vcpu].proc_ctls & PROCBASED_TSC_OFFSET); 11094c87aefeSPatrick Mooney 11109250eb13SPatrick Mooney if (vmx->tsc_offset_active[vcpu] != offset) { 11119250eb13SPatrick Mooney vmcs_write(VMCS_TSC_OFFSET, offset); 11129250eb13SPatrick Mooney vmx->tsc_offset_active[vcpu] = offset; 11134c87aefeSPatrick Mooney } 11144c87aefeSPatrick Mooney } 1115bf21cd93STycho Nightingale 11163d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_HWINTR == VM_INTINFO_HWINTR); 11173d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_NMI == VM_INTINFO_NMI); 11183d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_HWEXCEPTION == VM_INTINFO_HWEXCP); 11193d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_SWINTR == VM_INTINFO_SWINTR); 11203d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_PRIV_SWEXCEPTION == VM_INTINFO_RESV5); 11213d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_SWEXCEPTION == VM_INTINFO_RESV6); 11223d097f7dSPatrick Mooney CTASSERT(VMCS_IDT_VEC_ERRCODE_VALID == VM_INTINFO_DEL_ERRCODE); 11233d097f7dSPatrick Mooney CTASSERT(VMCS_INTR_T_MASK == VM_INTINFO_MASK_TYPE); 11243d097f7dSPatrick Mooney 11253d097f7dSPatrick Mooney static uint64_t 11263d097f7dSPatrick Mooney vmx_idtvec_to_intinfo(uint32_t info) 11273d097f7dSPatrick Mooney { 11283d097f7dSPatrick Mooney ASSERT(info & VMCS_IDT_VEC_VALID); 11293d097f7dSPatrick Mooney 11303d097f7dSPatrick Mooney const uint32_t type = info & VMCS_INTR_T_MASK; 11313d097f7dSPatrick Mooney const uint8_t vec = info & 0xff; 11323d097f7dSPatrick Mooney 11333d097f7dSPatrick Mooney switch (type) { 11343d097f7dSPatrick Mooney case VMCS_INTR_T_HWINTR: 11353d097f7dSPatrick Mooney case VMCS_INTR_T_NMI: 11363d097f7dSPatrick Mooney case VMCS_INTR_T_HWEXCEPTION: 11373d097f7dSPatrick Mooney case VMCS_INTR_T_SWINTR: 11383d097f7dSPatrick Mooney case VMCS_INTR_T_PRIV_SWEXCEPTION: 11393d097f7dSPatrick Mooney case VMCS_INTR_T_SWEXCEPTION: 11403d097f7dSPatrick Mooney break; 11413d097f7dSPatrick Mooney default: 11423d097f7dSPatrick Mooney panic("unexpected event type 0x%03x", type); 11433d097f7dSPatrick Mooney } 11443d097f7dSPatrick Mooney 11453d097f7dSPatrick Mooney uint64_t intinfo = VM_INTINFO_VALID | type | vec; 11463d097f7dSPatrick Mooney if (info & VMCS_IDT_VEC_ERRCODE_VALID) { 11473d097f7dSPatrick Mooney const uint32_t errcode = vmcs_read(VMCS_IDT_VECTORING_ERROR); 11483d097f7dSPatrick Mooney intinfo |= (uint64_t)errcode << 32; 11493d097f7dSPatrick Mooney } 11503d097f7dSPatrick Mooney 11513d097f7dSPatrick Mooney return (intinfo); 11523d097f7dSPatrick Mooney } 11533d097f7dSPatrick Mooney 11543d097f7dSPatrick Mooney static void 11553d097f7dSPatrick Mooney vmx_inject_intinfo(uint64_t info) 11563d097f7dSPatrick Mooney { 11573d097f7dSPatrick Mooney ASSERT(VM_INTINFO_PENDING(info)); 11583d097f7dSPatrick Mooney ASSERT0(info & VM_INTINFO_MASK_RSVD); 11593d097f7dSPatrick Mooney 11603d097f7dSPatrick Mooney /* 11613d097f7dSPatrick Mooney * The bhyve format matches that of the VMCS, which is ensured by the 11623d097f7dSPatrick Mooney * CTASSERTs above. 11633d097f7dSPatrick Mooney */ 11643d097f7dSPatrick Mooney uint32_t inject = info; 11653d097f7dSPatrick Mooney switch (VM_INTINFO_VECTOR(info)) { 11663d097f7dSPatrick Mooney case IDT_BP: 11673d097f7dSPatrick Mooney case IDT_OF: 11683d097f7dSPatrick Mooney /* 11693d097f7dSPatrick Mooney * VT-x requires #BP and #OF to be injected as software 11703d097f7dSPatrick Mooney * exceptions. 11713d097f7dSPatrick Mooney */ 11723d097f7dSPatrick Mooney inject &= ~VMCS_INTR_T_MASK; 11733d097f7dSPatrick Mooney inject |= VMCS_INTR_T_SWEXCEPTION; 11743d097f7dSPatrick Mooney break; 11753d097f7dSPatrick Mooney default: 11763d097f7dSPatrick Mooney break; 11773d097f7dSPatrick Mooney } 11783d097f7dSPatrick Mooney 11793d097f7dSPatrick Mooney if (VM_INTINFO_HAS_ERRCODE(info)) { 11803d097f7dSPatrick Mooney vmcs_write(VMCS_ENTRY_EXCEPTION_ERROR, 11813d097f7dSPatrick Mooney VM_INTINFO_ERRCODE(info)); 11823d097f7dSPatrick Mooney } 11833d097f7dSPatrick Mooney vmcs_write(VMCS_ENTRY_INTR_INFO, inject); 11843d097f7dSPatrick Mooney } 11853d097f7dSPatrick Mooney 1186bf21cd93STycho Nightingale #define NMI_BLOCKING (VMCS_INTERRUPTIBILITY_NMI_BLOCKING | \ 11872699b94cSPatrick Mooney VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) 1188bf21cd93STycho Nightingale #define HWINTR_BLOCKING (VMCS_INTERRUPTIBILITY_STI_BLOCKING | \ 11892699b94cSPatrick Mooney VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING) 1190bf21cd93STycho Nightingale 1191bf21cd93STycho Nightingale static void 1192bf21cd93STycho Nightingale vmx_inject_nmi(struct vmx *vmx, int vcpu) 1193bf21cd93STycho Nightingale { 1194c74a40a5SPatrick Mooney ASSERT0(vmcs_read(VMCS_GUEST_INTERRUPTIBILITY) & NMI_BLOCKING); 1195c74a40a5SPatrick Mooney ASSERT0(vmcs_read(VMCS_ENTRY_INTR_INFO) & VMCS_INTR_VALID); 1196bf21cd93STycho Nightingale 1197bf21cd93STycho Nightingale /* 1198bf21cd93STycho Nightingale * Inject the virtual NMI. The vector must be the NMI IDT entry 1199bf21cd93STycho Nightingale * or the VMCS entry check will fail. 1200bf21cd93STycho Nightingale */ 1201c74a40a5SPatrick Mooney vmcs_write(VMCS_ENTRY_INTR_INFO, 1202c74a40a5SPatrick Mooney IDT_NMI | VMCS_INTR_T_NMI | VMCS_INTR_VALID); 1203bf21cd93STycho Nightingale 1204bf21cd93STycho Nightingale /* Clear the request */ 1205bf21cd93STycho Nightingale vm_nmi_clear(vmx->vm, vcpu); 1206bf21cd93STycho Nightingale } 1207bf21cd93STycho Nightingale 1208c74a40a5SPatrick Mooney /* 1209c74a40a5SPatrick Mooney * Inject exceptions, NMIs, and ExtINTs. 1210c74a40a5SPatrick Mooney * 1211c74a40a5SPatrick Mooney * The logic behind these are complicated and may involve mutex contention, so 1212c74a40a5SPatrick Mooney * the injection is performed without the protection of host CPU interrupts 1213c74a40a5SPatrick Mooney * being disabled. This means a racing notification could be "lost", 1214c74a40a5SPatrick Mooney * necessitating a later call to vmx_inject_recheck() to close that window 1215c74a40a5SPatrick Mooney * of opportunity. 1216c74a40a5SPatrick Mooney */ 1217c74a40a5SPatrick Mooney static enum event_inject_state 1218c74a40a5SPatrick Mooney vmx_inject_events(struct vmx *vmx, int vcpu, uint64_t rip) 1219bf21cd93STycho Nightingale { 1220c74a40a5SPatrick Mooney uint64_t entryinfo; 1221bf21cd93STycho Nightingale uint32_t gi, info; 12224c87aefeSPatrick Mooney int vector; 1223c74a40a5SPatrick Mooney enum event_inject_state state; 12244c87aefeSPatrick Mooney 12254c87aefeSPatrick Mooney gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 12264c87aefeSPatrick Mooney info = vmcs_read(VMCS_ENTRY_INTR_INFO); 1227c74a40a5SPatrick Mooney state = EIS_CAN_INJECT; 12284c87aefeSPatrick Mooney 1229c74a40a5SPatrick Mooney /* Clear any interrupt blocking if the guest %rip has changed */ 1230c74a40a5SPatrick Mooney if (vmx->state[vcpu].nextrip != rip && (gi & HWINTR_BLOCKING) != 0) { 12314c87aefeSPatrick Mooney gi &= ~HWINTR_BLOCKING; 12324c87aefeSPatrick Mooney vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); 12334c87aefeSPatrick Mooney } 12344c87aefeSPatrick Mooney 12354c87aefeSPatrick Mooney /* 12364c87aefeSPatrick Mooney * It could be that an interrupt is already pending for injection from 12374c87aefeSPatrick Mooney * the VMCS. This would be the case if the vCPU exited for conditions 12384c87aefeSPatrick Mooney * such as an AST before a vm-entry delivered the injection. 12394c87aefeSPatrick Mooney */ 12404c87aefeSPatrick Mooney if ((info & VMCS_INTR_VALID) != 0) { 1241c74a40a5SPatrick Mooney return (EIS_EV_EXISTING | EIS_REQ_EXIT); 12424c87aefeSPatrick Mooney } 1243bf21cd93STycho Nightingale 1244bf21cd93STycho Nightingale if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) { 12453d097f7dSPatrick Mooney vmx_inject_intinfo(entryinfo); 1246c74a40a5SPatrick Mooney state = EIS_EV_INJECTED; 12474c87aefeSPatrick Mooney } 12484c87aefeSPatrick Mooney 12494c87aefeSPatrick Mooney if (vm_nmi_pending(vmx->vm, vcpu)) { 12504c87aefeSPatrick Mooney /* 1251c74a40a5SPatrick Mooney * If there are no conditions blocking NMI injection then inject 1252c74a40a5SPatrick Mooney * it directly here otherwise enable "NMI window exiting" to 1253c74a40a5SPatrick Mooney * inject it as soon as we can. 12544c87aefeSPatrick Mooney * 1255c74a40a5SPatrick Mooney * According to the Intel manual, some CPUs do not allow NMI 1256c74a40a5SPatrick Mooney * injection when STI_BLOCKING is active. That check is 1257c74a40a5SPatrick Mooney * enforced here, regardless of CPU capability. If running on a 1258c74a40a5SPatrick Mooney * CPU without such a restriction it will immediately exit and 1259c74a40a5SPatrick Mooney * the NMI will be injected in the "NMI window exiting" handler. 12604c87aefeSPatrick Mooney */ 12614c87aefeSPatrick Mooney if ((gi & (HWINTR_BLOCKING | NMI_BLOCKING)) == 0) { 1262c74a40a5SPatrick Mooney if (state == EIS_CAN_INJECT) { 1263c74a40a5SPatrick Mooney vmx_inject_nmi(vmx, vcpu); 1264c74a40a5SPatrick Mooney state = EIS_EV_INJECTED; 12654c87aefeSPatrick Mooney } else { 1266c74a40a5SPatrick Mooney return (state | EIS_REQ_EXIT); 12674c87aefeSPatrick Mooney } 12684c87aefeSPatrick Mooney } else { 12694c87aefeSPatrick Mooney vmx_set_nmi_window_exiting(vmx, vcpu); 12704c87aefeSPatrick Mooney } 12714c87aefeSPatrick Mooney } 12724c87aefeSPatrick Mooney 12734c87aefeSPatrick Mooney if (vm_extint_pending(vmx->vm, vcpu)) { 1274c74a40a5SPatrick Mooney if (state != EIS_CAN_INJECT) { 1275c74a40a5SPatrick Mooney return (state | EIS_REQ_EXIT); 1276c74a40a5SPatrick Mooney } 1277c74a40a5SPatrick Mooney if ((gi & HWINTR_BLOCKING) != 0 || 1278c74a40a5SPatrick Mooney (vmcs_read(VMCS_GUEST_RFLAGS) & PSL_I) == 0) { 1279c74a40a5SPatrick Mooney return (EIS_GI_BLOCK); 1280c74a40a5SPatrick Mooney } 1281c74a40a5SPatrick Mooney 12824c87aefeSPatrick Mooney /* Ask the legacy pic for a vector to inject */ 12834c87aefeSPatrick Mooney vatpic_pending_intr(vmx->vm, &vector); 12844c87aefeSPatrick Mooney 12854c87aefeSPatrick Mooney /* 12864c87aefeSPatrick Mooney * From the Intel SDM, Volume 3, Section "Maskable 12874c87aefeSPatrick Mooney * Hardware Interrupts": 12884c87aefeSPatrick Mooney * - maskable interrupt vectors [0,255] can be delivered 12894c87aefeSPatrick Mooney * through the INTR pin. 12904c87aefeSPatrick Mooney */ 12914c87aefeSPatrick Mooney KASSERT(vector >= 0 && vector <= 255, 12924c87aefeSPatrick Mooney ("invalid vector %d from INTR", vector)); 12934c87aefeSPatrick Mooney 1294c74a40a5SPatrick Mooney /* Inject the interrupt */ 1295c74a40a5SPatrick Mooney vmcs_write(VMCS_ENTRY_INTR_INFO, 1296c74a40a5SPatrick Mooney VMCS_INTR_T_HWINTR | VMCS_INTR_VALID | vector); 12974c87aefeSPatrick Mooney 1298c74a40a5SPatrick Mooney vm_extint_clear(vmx->vm, vcpu); 1299c74a40a5SPatrick Mooney vatpic_intr_accepted(vmx->vm, vector); 1300c74a40a5SPatrick Mooney state = EIS_EV_INJECTED; 13014c87aefeSPatrick Mooney } 1302c74a40a5SPatrick Mooney 1303c74a40a5SPatrick Mooney return (state); 1304c74a40a5SPatrick Mooney } 1305c74a40a5SPatrick Mooney 1306c74a40a5SPatrick Mooney /* 1307c74a40a5SPatrick Mooney * Inject any interrupts pending on the vLAPIC. 1308c74a40a5SPatrick Mooney * 1309c74a40a5SPatrick Mooney * This is done with host CPU interrupts disabled so notification IPIs, either 1310c74a40a5SPatrick Mooney * from the standard vCPU notification or APICv posted interrupts, will be 1311c74a40a5SPatrick Mooney * queued on the host APIC and recognized when entering VMX context. 1312c74a40a5SPatrick Mooney */ 1313c74a40a5SPatrick Mooney static enum event_inject_state 1314c74a40a5SPatrick Mooney vmx_inject_vlapic(struct vmx *vmx, int vcpu, struct vlapic *vlapic) 1315c74a40a5SPatrick Mooney { 1316c74a40a5SPatrick Mooney int vector; 1317c74a40a5SPatrick Mooney 1318c74a40a5SPatrick Mooney if (!vlapic_pending_intr(vlapic, &vector)) { 1319c74a40a5SPatrick Mooney return (EIS_CAN_INJECT); 13204c87aefeSPatrick Mooney } 13214c87aefeSPatrick Mooney 1322c74a40a5SPatrick Mooney /* 1323c74a40a5SPatrick Mooney * From the Intel SDM, Volume 3, Section "Maskable 1324c74a40a5SPatrick Mooney * Hardware Interrupts": 1325c74a40a5SPatrick Mooney * - maskable interrupt vectors [16,255] can be delivered 1326c74a40a5SPatrick Mooney * through the local APIC. 13272699b94cSPatrick Mooney */ 1328c74a40a5SPatrick Mooney KASSERT(vector >= 16 && vector <= 255, 1329c74a40a5SPatrick Mooney ("invalid vector %d from local APIC", vector)); 13304c87aefeSPatrick Mooney 1331c74a40a5SPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV)) { 1332c74a40a5SPatrick Mooney uint16_t status_old = vmcs_read(VMCS_GUEST_INTR_STATUS); 1333c74a40a5SPatrick Mooney uint16_t status_new = (status_old & 0xff00) | vector; 13344c87aefeSPatrick Mooney 13354c87aefeSPatrick Mooney /* 1336c74a40a5SPatrick Mooney * The APICv state will have been synced into the vLAPIC 1337c74a40a5SPatrick Mooney * as part of vlapic_pending_intr(). Prepare the VMCS 1338c74a40a5SPatrick Mooney * for the to-be-injected pending interrupt. 13394c87aefeSPatrick Mooney */ 1340c74a40a5SPatrick Mooney if (status_new > status_old) { 1341c74a40a5SPatrick Mooney vmcs_write(VMCS_GUEST_INTR_STATUS, status_new); 1342c74a40a5SPatrick Mooney } 1343c74a40a5SPatrick Mooney 1344c74a40a5SPatrick Mooney /* 1345c74a40a5SPatrick Mooney * Ensure VMCS state regarding EOI traps is kept in sync 1346c74a40a5SPatrick Mooney * with the TMRs in the vlapic. 1347c74a40a5SPatrick Mooney */ 1348c74a40a5SPatrick Mooney vmx_apicv_sync_tmr(vlapic); 1349c74a40a5SPatrick Mooney 1350c74a40a5SPatrick Mooney /* 1351c74a40a5SPatrick Mooney * The rest of the injection process for injecting the 1352c74a40a5SPatrick Mooney * interrupt(s) is handled by APICv. It does not preclude other 1353c74a40a5SPatrick Mooney * event injection from occurring. 1354c74a40a5SPatrick Mooney */ 1355c74a40a5SPatrick Mooney return (EIS_CAN_INJECT); 13564c87aefeSPatrick Mooney } 13574c87aefeSPatrick Mooney 1358c74a40a5SPatrick Mooney ASSERT0(vmcs_read(VMCS_ENTRY_INTR_INFO) & VMCS_INTR_VALID); 13594c87aefeSPatrick Mooney 1360c74a40a5SPatrick Mooney /* Does guest interruptability block injection? */ 1361c74a40a5SPatrick Mooney if ((vmcs_read(VMCS_GUEST_INTERRUPTIBILITY) & HWINTR_BLOCKING) != 0 || 1362c74a40a5SPatrick Mooney (vmcs_read(VMCS_GUEST_RFLAGS) & PSL_I) == 0) { 1363c74a40a5SPatrick Mooney return (EIS_GI_BLOCK); 1364c74a40a5SPatrick Mooney } 1365c74a40a5SPatrick Mooney 1366c74a40a5SPatrick Mooney /* Inject the interrupt */ 1367c74a40a5SPatrick Mooney vmcs_write(VMCS_ENTRY_INTR_INFO, 1368c74a40a5SPatrick Mooney VMCS_INTR_T_HWINTR | VMCS_INTR_VALID | vector); 1369c74a40a5SPatrick Mooney 1370c74a40a5SPatrick Mooney /* Update the Local APIC ISR */ 1371c74a40a5SPatrick Mooney vlapic_intr_accepted(vlapic, vector); 1372c74a40a5SPatrick Mooney 1373c74a40a5SPatrick Mooney return (EIS_EV_INJECTED); 1374c74a40a5SPatrick Mooney } 1375c74a40a5SPatrick Mooney 1376c74a40a5SPatrick Mooney /* 1377c74a40a5SPatrick Mooney * Re-check for events to be injected. 1378c74a40a5SPatrick Mooney * 1379c74a40a5SPatrick Mooney * Once host CPU interrupts are disabled, check for the presence of any events 1380c74a40a5SPatrick Mooney * which require injection processing. If an exit is required upon injection, 1381c74a40a5SPatrick Mooney * or once the guest becomes interruptable, that will be configured too. 1382c74a40a5SPatrick Mooney */ 1383c74a40a5SPatrick Mooney static bool 1384c74a40a5SPatrick Mooney vmx_inject_recheck(struct vmx *vmx, int vcpu, enum event_inject_state state) 1385c74a40a5SPatrick Mooney { 1386c74a40a5SPatrick Mooney if (state == EIS_CAN_INJECT) { 1387c74a40a5SPatrick Mooney if (vm_nmi_pending(vmx->vm, vcpu) && 1388c74a40a5SPatrick Mooney !vmx_nmi_window_exiting(vmx, vcpu)) { 1389c74a40a5SPatrick Mooney /* queued NMI not blocked by NMI-window-exiting */ 1390c74a40a5SPatrick Mooney return (true); 1391c74a40a5SPatrick Mooney } 1392c74a40a5SPatrick Mooney if (vm_extint_pending(vmx->vm, vcpu)) { 1393c74a40a5SPatrick Mooney /* queued ExtINT not blocked by existing injection */ 1394c74a40a5SPatrick Mooney return (true); 1395c74a40a5SPatrick Mooney } 1396c74a40a5SPatrick Mooney } else { 1397c74a40a5SPatrick Mooney if ((state & EIS_REQ_EXIT) != 0) { 1398c74a40a5SPatrick Mooney /* 1399c74a40a5SPatrick Mooney * Use a self-IPI to force an immediate exit after 1400c74a40a5SPatrick Mooney * event injection has occurred. 1401c74a40a5SPatrick Mooney */ 1402c74a40a5SPatrick Mooney poke_cpu(CPU->cpu_id); 1403c74a40a5SPatrick Mooney } else { 1404c74a40a5SPatrick Mooney /* 1405c74a40a5SPatrick Mooney * If any event is being injected, an exit immediately 1406c74a40a5SPatrick Mooney * upon becoming interruptable again will allow pending 1407c74a40a5SPatrick Mooney * or newly queued events to be injected in a timely 1408c74a40a5SPatrick Mooney * manner. 1409c74a40a5SPatrick Mooney */ 1410c74a40a5SPatrick Mooney vmx_set_int_window_exiting(vmx, vcpu); 1411c74a40a5SPatrick Mooney } 1412c74a40a5SPatrick Mooney } 1413c74a40a5SPatrick Mooney return (false); 14144c87aefeSPatrick Mooney } 1415bf21cd93STycho Nightingale 1416bf21cd93STycho Nightingale /* 1417bf21cd93STycho Nightingale * If the Virtual NMIs execution control is '1' then the logical processor 1418bf21cd93STycho Nightingale * tracks virtual-NMI blocking in the Guest Interruptibility-state field of 1419bf21cd93STycho Nightingale * the VMCS. An IRET instruction in VMX non-root operation will remove any 1420bf21cd93STycho Nightingale * virtual-NMI blocking. 1421bf21cd93STycho Nightingale * 1422bf21cd93STycho Nightingale * This unblocking occurs even if the IRET causes a fault. In this case the 1423bf21cd93STycho Nightingale * hypervisor needs to restore virtual-NMI blocking before resuming the guest. 1424bf21cd93STycho Nightingale */ 1425bf21cd93STycho Nightingale static void 1426bf21cd93STycho Nightingale vmx_restore_nmi_blocking(struct vmx *vmx, int vcpuid) 1427bf21cd93STycho Nightingale { 1428bf21cd93STycho Nightingale uint32_t gi; 1429bf21cd93STycho Nightingale 1430bf21cd93STycho Nightingale gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 1431bf21cd93STycho Nightingale gi |= VMCS_INTERRUPTIBILITY_NMI_BLOCKING; 1432bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); 1433bf21cd93STycho Nightingale } 1434bf21cd93STycho Nightingale 1435bf21cd93STycho Nightingale static void 1436bf21cd93STycho Nightingale vmx_clear_nmi_blocking(struct vmx *vmx, int vcpuid) 1437bf21cd93STycho Nightingale { 1438bf21cd93STycho Nightingale uint32_t gi; 1439bf21cd93STycho Nightingale 1440bf21cd93STycho Nightingale gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 1441bf21cd93STycho Nightingale gi &= ~VMCS_INTERRUPTIBILITY_NMI_BLOCKING; 1442bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); 1443bf21cd93STycho Nightingale } 1444bf21cd93STycho Nightingale 14454c87aefeSPatrick Mooney static void 14464c87aefeSPatrick Mooney vmx_assert_nmi_blocking(struct vmx *vmx, int vcpuid) 14474c87aefeSPatrick Mooney { 14484c87aefeSPatrick Mooney uint32_t gi; 14494c87aefeSPatrick Mooney 14504c87aefeSPatrick Mooney gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 14514c87aefeSPatrick Mooney KASSERT(gi & VMCS_INTERRUPTIBILITY_NMI_BLOCKING, 14529dc804b9SPatrick Mooney ("NMI blocking is not in effect %x", gi)); 14534c87aefeSPatrick Mooney } 14544c87aefeSPatrick Mooney 14554c87aefeSPatrick Mooney static int 14564c87aefeSPatrick Mooney vmx_emulate_xsetbv(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) 14574c87aefeSPatrick Mooney { 14584c87aefeSPatrick Mooney struct vmxctx *vmxctx; 14594c87aefeSPatrick Mooney uint64_t xcrval; 14604c87aefeSPatrick Mooney const struct xsave_limits *limits; 14614c87aefeSPatrick Mooney 14624c87aefeSPatrick Mooney vmxctx = &vmx->ctx[vcpu]; 14634c87aefeSPatrick Mooney limits = vmm_get_xsave_limits(); 14644c87aefeSPatrick Mooney 14654c87aefeSPatrick Mooney /* 14664c87aefeSPatrick Mooney * Note that the processor raises a GP# fault on its own if 14674c87aefeSPatrick Mooney * xsetbv is executed for CPL != 0, so we do not have to 14684c87aefeSPatrick Mooney * emulate that fault here. 14694c87aefeSPatrick Mooney */ 14704c87aefeSPatrick Mooney 14714c87aefeSPatrick Mooney /* Only xcr0 is supported. */ 14724c87aefeSPatrick Mooney if (vmxctx->guest_rcx != 0) { 14734c87aefeSPatrick Mooney vm_inject_gp(vmx->vm, vcpu); 14744c87aefeSPatrick Mooney return (HANDLED); 14754c87aefeSPatrick Mooney } 14764c87aefeSPatrick Mooney 14774c87aefeSPatrick Mooney /* We only handle xcr0 if both the host and guest have XSAVE enabled. */ 14782699b94cSPatrick Mooney if (!limits->xsave_enabled || 14792699b94cSPatrick Mooney !(vmcs_read(VMCS_GUEST_CR4) & CR4_XSAVE)) { 14804c87aefeSPatrick Mooney vm_inject_ud(vmx->vm, vcpu); 14814c87aefeSPatrick Mooney return (HANDLED); 14824c87aefeSPatrick Mooney } 14834c87aefeSPatrick Mooney 14844c87aefeSPatrick Mooney xcrval = vmxctx->guest_rdx << 32 | (vmxctx->guest_rax & 0xffffffff); 14854c87aefeSPatrick Mooney if ((xcrval & ~limits->xcr0_allowed) != 0) { 14864c87aefeSPatrick Mooney vm_inject_gp(vmx->vm, vcpu); 14874c87aefeSPatrick Mooney return (HANDLED); 14884c87aefeSPatrick Mooney } 14894c87aefeSPatrick Mooney 14904c87aefeSPatrick Mooney if (!(xcrval & XFEATURE_ENABLED_X87)) { 14914c87aefeSPatrick Mooney vm_inject_gp(vmx->vm, vcpu); 14924c87aefeSPatrick Mooney return (HANDLED); 14934c87aefeSPatrick Mooney } 14944c87aefeSPatrick Mooney 14954c87aefeSPatrick Mooney /* AVX (YMM_Hi128) requires SSE. */ 14964c87aefeSPatrick Mooney if (xcrval & XFEATURE_ENABLED_AVX && 14974c87aefeSPatrick Mooney (xcrval & XFEATURE_AVX) != XFEATURE_AVX) { 14984c87aefeSPatrick Mooney vm_inject_gp(vmx->vm, vcpu); 14994c87aefeSPatrick Mooney return (HANDLED); 15004c87aefeSPatrick Mooney } 15014c87aefeSPatrick Mooney 15024c87aefeSPatrick Mooney /* 15034c87aefeSPatrick Mooney * AVX512 requires base AVX (YMM_Hi128) as well as OpMask, 15044c87aefeSPatrick Mooney * ZMM_Hi256, and Hi16_ZMM. 15054c87aefeSPatrick Mooney */ 15064c87aefeSPatrick Mooney if (xcrval & XFEATURE_AVX512 && 15074c87aefeSPatrick Mooney (xcrval & (XFEATURE_AVX512 | XFEATURE_AVX)) != 15084c87aefeSPatrick Mooney (XFEATURE_AVX512 | XFEATURE_AVX)) { 15094c87aefeSPatrick Mooney vm_inject_gp(vmx->vm, vcpu); 15104c87aefeSPatrick Mooney return (HANDLED); 15114c87aefeSPatrick Mooney } 15124c87aefeSPatrick Mooney 15134c87aefeSPatrick Mooney /* 15144c87aefeSPatrick Mooney * Intel MPX requires both bound register state flags to be 15154c87aefeSPatrick Mooney * set. 15164c87aefeSPatrick Mooney */ 15174c87aefeSPatrick Mooney if (((xcrval & XFEATURE_ENABLED_BNDREGS) != 0) != 15184c87aefeSPatrick Mooney ((xcrval & XFEATURE_ENABLED_BNDCSR) != 0)) { 15194c87aefeSPatrick Mooney vm_inject_gp(vmx->vm, vcpu); 15204c87aefeSPatrick Mooney return (HANDLED); 15214c87aefeSPatrick Mooney } 15224c87aefeSPatrick Mooney 15234c87aefeSPatrick Mooney /* 15244c87aefeSPatrick Mooney * This runs "inside" vmrun() with the guest's FPU state, so 15254c87aefeSPatrick Mooney * modifying xcr0 directly modifies the guest's xcr0, not the 15264c87aefeSPatrick Mooney * host's. 15274c87aefeSPatrick Mooney */ 15284c87aefeSPatrick Mooney load_xcr(0, xcrval); 15294c87aefeSPatrick Mooney return (HANDLED); 15304c87aefeSPatrick Mooney } 15314c87aefeSPatrick Mooney 1532bf21cd93STycho Nightingale static uint64_t 1533bf21cd93STycho Nightingale vmx_get_guest_reg(struct vmx *vmx, int vcpu, int ident) 1534bf21cd93STycho Nightingale { 1535bf21cd93STycho Nightingale const struct vmxctx *vmxctx; 1536bf21cd93STycho Nightingale 1537bf21cd93STycho Nightingale vmxctx = &vmx->ctx[vcpu]; 1538bf21cd93STycho Nightingale 1539bf21cd93STycho Nightingale switch (ident) { 1540bf21cd93STycho Nightingale case 0: 1541bf21cd93STycho Nightingale return (vmxctx->guest_rax); 1542bf21cd93STycho Nightingale case 1: 1543bf21cd93STycho Nightingale return (vmxctx->guest_rcx); 1544bf21cd93STycho Nightingale case 2: 1545bf21cd93STycho Nightingale return (vmxctx->guest_rdx); 1546bf21cd93STycho Nightingale case 3: 1547bf21cd93STycho Nightingale return (vmxctx->guest_rbx); 1548bf21cd93STycho Nightingale case 4: 1549bf21cd93STycho Nightingale return (vmcs_read(VMCS_GUEST_RSP)); 1550bf21cd93STycho Nightingale case 5: 1551bf21cd93STycho Nightingale return (vmxctx->guest_rbp); 1552bf21cd93STycho Nightingale case 6: 1553bf21cd93STycho Nightingale return (vmxctx->guest_rsi); 1554bf21cd93STycho Nightingale case 7: 1555bf21cd93STycho Nightingale return (vmxctx->guest_rdi); 1556bf21cd93STycho Nightingale case 8: 1557bf21cd93STycho Nightingale return (vmxctx->guest_r8); 1558bf21cd93STycho Nightingale case 9: 1559bf21cd93STycho Nightingale return (vmxctx->guest_r9); 1560bf21cd93STycho Nightingale case 10: 1561bf21cd93STycho Nightingale return (vmxctx->guest_r10); 1562bf21cd93STycho Nightingale case 11: 1563bf21cd93STycho Nightingale return (vmxctx->guest_r11); 1564bf21cd93STycho Nightingale case 12: 1565bf21cd93STycho Nightingale return (vmxctx->guest_r12); 1566bf21cd93STycho Nightingale case 13: 1567bf21cd93STycho Nightingale return (vmxctx->guest_r13); 1568bf21cd93STycho Nightingale case 14: 1569bf21cd93STycho Nightingale return (vmxctx->guest_r14); 1570bf21cd93STycho Nightingale case 15: 1571bf21cd93STycho Nightingale return (vmxctx->guest_r15); 1572bf21cd93STycho Nightingale default: 1573bf21cd93STycho Nightingale panic("invalid vmx register %d", ident); 1574bf21cd93STycho Nightingale } 1575bf21cd93STycho Nightingale } 1576bf21cd93STycho Nightingale 1577bf21cd93STycho Nightingale static void 1578bf21cd93STycho Nightingale vmx_set_guest_reg(struct vmx *vmx, int vcpu, int ident, uint64_t regval) 1579bf21cd93STycho Nightingale { 1580bf21cd93STycho Nightingale struct vmxctx *vmxctx; 1581bf21cd93STycho Nightingale 1582bf21cd93STycho Nightingale vmxctx = &vmx->ctx[vcpu]; 1583bf21cd93STycho Nightingale 1584bf21cd93STycho Nightingale switch (ident) { 1585bf21cd93STycho Nightingale case 0: 1586bf21cd93STycho Nightingale vmxctx->guest_rax = regval; 1587bf21cd93STycho Nightingale break; 1588bf21cd93STycho Nightingale case 1: 1589bf21cd93STycho Nightingale vmxctx->guest_rcx = regval; 1590bf21cd93STycho Nightingale break; 1591bf21cd93STycho Nightingale case 2: 1592bf21cd93STycho Nightingale vmxctx->guest_rdx = regval; 1593bf21cd93STycho Nightingale break; 1594bf21cd93STycho Nightingale case 3: 1595bf21cd93STycho Nightingale vmxctx->guest_rbx = regval; 1596bf21cd93STycho Nightingale break; 1597bf21cd93STycho Nightingale case 4: 1598bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_RSP, regval); 1599bf21cd93STycho Nightingale break; 1600bf21cd93STycho Nightingale case 5: 1601bf21cd93STycho Nightingale vmxctx->guest_rbp = regval; 1602bf21cd93STycho Nightingale break; 1603bf21cd93STycho Nightingale case 6: 1604bf21cd93STycho Nightingale vmxctx->guest_rsi = regval; 1605bf21cd93STycho Nightingale break; 1606bf21cd93STycho Nightingale case 7: 1607bf21cd93STycho Nightingale vmxctx->guest_rdi = regval; 1608bf21cd93STycho Nightingale break; 1609bf21cd93STycho Nightingale case 8: 1610bf21cd93STycho Nightingale vmxctx->guest_r8 = regval; 1611bf21cd93STycho Nightingale break; 1612bf21cd93STycho Nightingale case 9: 1613bf21cd93STycho Nightingale vmxctx->guest_r9 = regval; 1614bf21cd93STycho Nightingale break; 1615bf21cd93STycho Nightingale case 10: 1616bf21cd93STycho Nightingale vmxctx->guest_r10 = regval; 1617bf21cd93STycho Nightingale break; 1618bf21cd93STycho Nightingale case 11: 1619bf21cd93STycho Nightingale vmxctx->guest_r11 = regval; 1620bf21cd93STycho Nightingale break; 1621bf21cd93STycho Nightingale case 12: 1622bf21cd93STycho Nightingale vmxctx->guest_r12 = regval; 1623bf21cd93STycho Nightingale break; 1624bf21cd93STycho Nightingale case 13: 1625bf21cd93STycho Nightingale vmxctx->guest_r13 = regval; 1626bf21cd93STycho Nightingale break; 1627bf21cd93STycho Nightingale case 14: 1628bf21cd93STycho Nightingale vmxctx->guest_r14 = regval; 1629bf21cd93STycho Nightingale break; 1630bf21cd93STycho Nightingale case 15: 1631bf21cd93STycho Nightingale vmxctx->guest_r15 = regval; 1632bf21cd93STycho Nightingale break; 1633bf21cd93STycho Nightingale default: 1634bf21cd93STycho Nightingale panic("invalid vmx register %d", ident); 1635bf21cd93STycho Nightingale } 1636bf21cd93STycho Nightingale } 1637bf21cd93STycho Nightingale 1638bf21cd93STycho Nightingale static int 1639bf21cd93STycho Nightingale vmx_emulate_cr0_access(struct vmx *vmx, int vcpu, uint64_t exitqual) 1640bf21cd93STycho Nightingale { 1641bf21cd93STycho Nightingale uint64_t crval, regval; 1642bf21cd93STycho Nightingale 1643bf21cd93STycho Nightingale /* We only handle mov to %cr0 at this time */ 1644bf21cd93STycho Nightingale if ((exitqual & 0xf0) != 0x00) 1645bf21cd93STycho Nightingale return (UNHANDLED); 1646bf21cd93STycho Nightingale 1647bf21cd93STycho Nightingale regval = vmx_get_guest_reg(vmx, vcpu, (exitqual >> 8) & 0xf); 1648bf21cd93STycho Nightingale 1649bf21cd93STycho Nightingale vmcs_write(VMCS_CR0_SHADOW, regval); 1650bf21cd93STycho Nightingale 1651bf21cd93STycho Nightingale crval = regval | cr0_ones_mask; 1652bf21cd93STycho Nightingale crval &= ~cr0_zeros_mask; 1653bf0dcd3fSPatrick Mooney 1654bf0dcd3fSPatrick Mooney const uint64_t old = vmcs_read(VMCS_GUEST_CR0); 1655bf0dcd3fSPatrick Mooney const uint64_t diff = crval ^ old; 1656bf0dcd3fSPatrick Mooney /* Flush the TLB if the paging or write-protect bits are changing */ 1657bf0dcd3fSPatrick Mooney if ((diff & CR0_PG) != 0 || (diff & CR0_WP) != 0) { 16580153d828SPatrick Mooney vmx_invvpid(vmx, vcpu, 1); 1659bf0dcd3fSPatrick Mooney } 1660bf0dcd3fSPatrick Mooney 1661bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_CR0, crval); 1662bf21cd93STycho Nightingale 1663bf21cd93STycho Nightingale if (regval & CR0_PG) { 1664bf21cd93STycho Nightingale uint64_t efer, entry_ctls; 1665bf21cd93STycho Nightingale 1666bf21cd93STycho Nightingale /* 1667bf21cd93STycho Nightingale * If CR0.PG is 1 and EFER.LME is 1 then EFER.LMA and 1668bf21cd93STycho Nightingale * the "IA-32e mode guest" bit in VM-entry control must be 1669bf21cd93STycho Nightingale * equal. 1670bf21cd93STycho Nightingale */ 1671bf21cd93STycho Nightingale efer = vmcs_read(VMCS_GUEST_IA32_EFER); 1672bf21cd93STycho Nightingale if (efer & EFER_LME) { 1673bf21cd93STycho Nightingale efer |= EFER_LMA; 1674bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_IA32_EFER, efer); 1675bf21cd93STycho Nightingale entry_ctls = vmcs_read(VMCS_ENTRY_CTLS); 1676bf21cd93STycho Nightingale entry_ctls |= VM_ENTRY_GUEST_LMA; 1677bf21cd93STycho Nightingale vmcs_write(VMCS_ENTRY_CTLS, entry_ctls); 1678bf21cd93STycho Nightingale } 1679bf21cd93STycho Nightingale } 1680bf21cd93STycho Nightingale 1681bf21cd93STycho Nightingale return (HANDLED); 1682bf21cd93STycho Nightingale } 1683bf21cd93STycho Nightingale 1684bf21cd93STycho Nightingale static int 1685bf21cd93STycho Nightingale vmx_emulate_cr4_access(struct vmx *vmx, int vcpu, uint64_t exitqual) 1686bf21cd93STycho Nightingale { 1687bf21cd93STycho Nightingale uint64_t crval, regval; 1688bf21cd93STycho Nightingale 1689bf21cd93STycho Nightingale /* We only handle mov to %cr4 at this time */ 1690bf21cd93STycho Nightingale if ((exitqual & 0xf0) != 0x00) 1691bf21cd93STycho Nightingale return (UNHANDLED); 1692bf21cd93STycho Nightingale 1693bf21cd93STycho Nightingale regval = vmx_get_guest_reg(vmx, vcpu, (exitqual >> 8) & 0xf); 1694bf21cd93STycho Nightingale 1695bf21cd93STycho Nightingale vmcs_write(VMCS_CR4_SHADOW, regval); 1696bf21cd93STycho Nightingale 1697bf21cd93STycho Nightingale crval = regval | cr4_ones_mask; 1698bf21cd93STycho Nightingale crval &= ~cr4_zeros_mask; 1699bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_CR4, crval); 1700bf21cd93STycho Nightingale 1701bf21cd93STycho Nightingale return (HANDLED); 1702bf21cd93STycho Nightingale } 1703bf21cd93STycho Nightingale 1704bf21cd93STycho Nightingale static int 1705bf21cd93STycho Nightingale vmx_emulate_cr8_access(struct vmx *vmx, int vcpu, uint64_t exitqual) 1706bf21cd93STycho Nightingale { 1707bf21cd93STycho Nightingale struct vlapic *vlapic; 1708bf21cd93STycho Nightingale uint64_t cr8; 1709bf21cd93STycho Nightingale int regnum; 1710bf21cd93STycho Nightingale 1711bf21cd93STycho Nightingale /* We only handle mov %cr8 to/from a register at this time. */ 1712bf21cd93STycho Nightingale if ((exitqual & 0xe0) != 0x00) { 1713bf21cd93STycho Nightingale return (UNHANDLED); 1714bf21cd93STycho Nightingale } 1715bf21cd93STycho Nightingale 1716bf21cd93STycho Nightingale vlapic = vm_lapic(vmx->vm, vcpu); 1717bf21cd93STycho Nightingale regnum = (exitqual >> 8) & 0xf; 1718bf21cd93STycho Nightingale if (exitqual & 0x10) { 1719bf21cd93STycho Nightingale cr8 = vlapic_get_cr8(vlapic); 1720bf21cd93STycho Nightingale vmx_set_guest_reg(vmx, vcpu, regnum, cr8); 1721bf21cd93STycho Nightingale } else { 1722bf21cd93STycho Nightingale cr8 = vmx_get_guest_reg(vmx, vcpu, regnum); 1723bf21cd93STycho Nightingale vlapic_set_cr8(vlapic, cr8); 1724bf21cd93STycho Nightingale } 1725bf21cd93STycho Nightingale 1726bf21cd93STycho Nightingale return (HANDLED); 1727bf21cd93STycho Nightingale } 1728bf21cd93STycho Nightingale 1729bf21cd93STycho Nightingale /* 1730bf21cd93STycho Nightingale * From section "Guest Register State" in the Intel SDM: CPL = SS.DPL 1731bf21cd93STycho Nightingale */ 1732bf21cd93STycho Nightingale static int 1733bf21cd93STycho Nightingale vmx_cpl(void) 1734bf21cd93STycho Nightingale { 1735bf21cd93STycho Nightingale uint32_t ssar; 1736bf21cd93STycho Nightingale 1737bf21cd93STycho Nightingale ssar = vmcs_read(VMCS_GUEST_SS_ACCESS_RIGHTS); 1738bf21cd93STycho Nightingale return ((ssar >> 5) & 0x3); 1739bf21cd93STycho Nightingale } 1740bf21cd93STycho Nightingale 1741bf21cd93STycho Nightingale static enum vm_cpu_mode 1742bf21cd93STycho Nightingale vmx_cpu_mode(void) 1743bf21cd93STycho Nightingale { 1744bf21cd93STycho Nightingale uint32_t csar; 1745bf21cd93STycho Nightingale 1746bf21cd93STycho Nightingale if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LMA) { 1747bf21cd93STycho Nightingale csar = vmcs_read(VMCS_GUEST_CS_ACCESS_RIGHTS); 1748bf21cd93STycho Nightingale if (csar & 0x2000) 1749bf21cd93STycho Nightingale return (CPU_MODE_64BIT); /* CS.L = 1 */ 1750bf21cd93STycho Nightingale else 1751bf21cd93STycho Nightingale return (CPU_MODE_COMPATIBILITY); 1752bf21cd93STycho Nightingale } else if (vmcs_read(VMCS_GUEST_CR0) & CR0_PE) { 1753bf21cd93STycho Nightingale return (CPU_MODE_PROTECTED); 1754bf21cd93STycho Nightingale } else { 1755bf21cd93STycho Nightingale return (CPU_MODE_REAL); 1756bf21cd93STycho Nightingale } 1757bf21cd93STycho Nightingale } 1758bf21cd93STycho Nightingale 1759bf21cd93STycho Nightingale static enum vm_paging_mode 1760bf21cd93STycho Nightingale vmx_paging_mode(void) 1761bf21cd93STycho Nightingale { 1762bf21cd93STycho Nightingale 1763bf21cd93STycho Nightingale if (!(vmcs_read(VMCS_GUEST_CR0) & CR0_PG)) 1764bf21cd93STycho Nightingale return (PAGING_MODE_FLAT); 1765bf21cd93STycho Nightingale if (!(vmcs_read(VMCS_GUEST_CR4) & CR4_PAE)) 1766bf21cd93STycho Nightingale return (PAGING_MODE_32); 1767bf21cd93STycho Nightingale if (vmcs_read(VMCS_GUEST_IA32_EFER) & EFER_LME) 1768bf21cd93STycho Nightingale return (PAGING_MODE_64); 1769bf21cd93STycho Nightingale else 1770bf21cd93STycho Nightingale return (PAGING_MODE_PAE); 1771bf21cd93STycho Nightingale } 1772bf21cd93STycho Nightingale 1773bf21cd93STycho Nightingale static void 1774bf21cd93STycho Nightingale vmx_paging_info(struct vm_guest_paging *paging) 1775bf21cd93STycho Nightingale { 17763d097f7dSPatrick Mooney paging->cr3 = vmcs_read(VMCS_GUEST_CR3); 1777bf21cd93STycho Nightingale paging->cpl = vmx_cpl(); 1778bf21cd93STycho Nightingale paging->cpu_mode = vmx_cpu_mode(); 1779bf21cd93STycho Nightingale paging->paging_mode = vmx_paging_mode(); 1780bf21cd93STycho Nightingale } 1781bf21cd93STycho Nightingale 1782bf21cd93STycho Nightingale static void 1783e0c0d44eSPatrick Mooney vmexit_mmio_emul(struct vm_exit *vmexit, struct vie *vie, uint64_t gpa, 1784e0c0d44eSPatrick Mooney uint64_t gla) 1785bf21cd93STycho Nightingale { 1786e0c0d44eSPatrick Mooney struct vm_guest_paging paging; 1787bf21cd93STycho Nightingale uint32_t csar; 1788bf21cd93STycho Nightingale 1789e0c0d44eSPatrick Mooney vmexit->exitcode = VM_EXITCODE_MMIO_EMUL; 17904c87aefeSPatrick Mooney vmexit->inst_length = 0; 1791e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.gpa = gpa; 1792e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.gla = gla; 1793e0c0d44eSPatrick Mooney vmx_paging_info(&paging); 1794e0c0d44eSPatrick Mooney 1795e0c0d44eSPatrick Mooney switch (paging.cpu_mode) { 1796bf21cd93STycho Nightingale case CPU_MODE_REAL: 1797e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE); 1798e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.cs_d = 0; 1799bf21cd93STycho Nightingale break; 1800bf21cd93STycho Nightingale case CPU_MODE_PROTECTED: 1801bf21cd93STycho Nightingale case CPU_MODE_COMPATIBILITY: 1802e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.cs_base = vmcs_read(VMCS_GUEST_CS_BASE); 1803bf21cd93STycho Nightingale csar = vmcs_read(VMCS_GUEST_CS_ACCESS_RIGHTS); 1804e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.cs_d = SEG_DESC_DEF32(csar); 1805bf21cd93STycho Nightingale break; 1806bf21cd93STycho Nightingale default: 1807e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.cs_base = 0; 1808e0c0d44eSPatrick Mooney vmexit->u.mmio_emul.cs_d = 0; 1809bf21cd93STycho Nightingale break; 1810bf21cd93STycho Nightingale } 1811e0c0d44eSPatrick Mooney 1812e0c0d44eSPatrick Mooney vie_init_mmio(vie, NULL, 0, &paging, gpa); 1813e0c0d44eSPatrick Mooney } 1814e0c0d44eSPatrick Mooney 1815e0c0d44eSPatrick Mooney static void 1816e0c0d44eSPatrick Mooney vmexit_inout(struct vm_exit *vmexit, struct vie *vie, uint64_t qual, 1817e0c0d44eSPatrick Mooney uint32_t eax) 1818e0c0d44eSPatrick Mooney { 1819e0c0d44eSPatrick Mooney struct vm_guest_paging paging; 1820e0c0d44eSPatrick Mooney struct vm_inout *inout; 1821e0c0d44eSPatrick Mooney 1822e0c0d44eSPatrick Mooney inout = &vmexit->u.inout; 1823e0c0d44eSPatrick Mooney 1824e0c0d44eSPatrick Mooney inout->bytes = (qual & 0x7) + 1; 1825e0c0d44eSPatrick Mooney inout->flags = 0; 1826e0c0d44eSPatrick Mooney inout->flags |= (qual & 0x8) ? INOUT_IN : 0; 1827e0c0d44eSPatrick Mooney inout->flags |= (qual & 0x10) ? INOUT_STR : 0; 1828e0c0d44eSPatrick Mooney inout->flags |= (qual & 0x20) ? INOUT_REP : 0; 1829e0c0d44eSPatrick Mooney inout->port = (uint16_t)(qual >> 16); 1830e0c0d44eSPatrick Mooney inout->eax = eax; 1831e0c0d44eSPatrick Mooney if (inout->flags & INOUT_STR) { 1832e0c0d44eSPatrick Mooney uint64_t inst_info; 1833e0c0d44eSPatrick Mooney 1834e0c0d44eSPatrick Mooney inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO); 1835e0c0d44eSPatrick Mooney 1836e0c0d44eSPatrick Mooney /* 1837d92a2ce7SPatrick Mooney * According to the SDM, bits 9:7 encode the address size of the 1838d92a2ce7SPatrick Mooney * ins/outs operation, but only values 0/1/2 are expected, 1839d92a2ce7SPatrick Mooney * corresponding to 16/32/64 bit sizes. 1840e0c0d44eSPatrick Mooney */ 1841d92a2ce7SPatrick Mooney inout->addrsize = 2 << BITX(inst_info, 9, 7); 1842e0c0d44eSPatrick Mooney VERIFY(inout->addrsize == 2 || inout->addrsize == 4 || 1843e0c0d44eSPatrick Mooney inout->addrsize == 8); 1844e0c0d44eSPatrick Mooney 1845e0c0d44eSPatrick Mooney if (inout->flags & INOUT_IN) { 1846e0c0d44eSPatrick Mooney /* 1847e0c0d44eSPatrick Mooney * The bits describing the segment in INSTRUCTION_INFO 1848e0c0d44eSPatrick Mooney * are not defined for ins, leaving it to system 1849e0c0d44eSPatrick Mooney * software to assume %es (encoded as 0) 1850e0c0d44eSPatrick Mooney */ 1851e0c0d44eSPatrick Mooney inout->segment = 0; 1852e0c0d44eSPatrick Mooney } else { 1853e0c0d44eSPatrick Mooney /* 1854e0c0d44eSPatrick Mooney * Bits 15-17 encode the segment for OUTS. 1855e0c0d44eSPatrick Mooney * This value follows the standard x86 segment order. 1856e0c0d44eSPatrick Mooney */ 1857e0c0d44eSPatrick Mooney inout->segment = (inst_info >> 15) & 0x7; 1858e0c0d44eSPatrick Mooney } 1859e0c0d44eSPatrick Mooney } 1860e0c0d44eSPatrick Mooney 1861e0c0d44eSPatrick Mooney vmexit->exitcode = VM_EXITCODE_INOUT; 1862e0c0d44eSPatrick Mooney vmx_paging_info(&paging); 1863e0c0d44eSPatrick Mooney vie_init_inout(vie, inout, vmexit->inst_length, &paging); 1864e0c0d44eSPatrick Mooney 1865e0c0d44eSPatrick Mooney /* The in/out emulation will handle advancing %rip */ 1866e0c0d44eSPatrick Mooney vmexit->inst_length = 0; 1867bf21cd93STycho Nightingale } 1868bf21cd93STycho Nightingale 1869bf21cd93STycho Nightingale static int 1870bf21cd93STycho Nightingale ept_fault_type(uint64_t ept_qual) 1871bf21cd93STycho Nightingale { 1872bf21cd93STycho Nightingale int fault_type; 1873bf21cd93STycho Nightingale 1874bf21cd93STycho Nightingale if (ept_qual & EPT_VIOLATION_DATA_WRITE) 1875cf409e3fSDan Cross fault_type = PROT_WRITE; 1876bf21cd93STycho Nightingale else if (ept_qual & EPT_VIOLATION_INST_FETCH) 1877cf409e3fSDan Cross fault_type = PROT_EXEC; 1878bf21cd93STycho Nightingale else 1879cf409e3fSDan Cross fault_type = PROT_READ; 1880bf21cd93STycho Nightingale 1881bf21cd93STycho Nightingale return (fault_type); 1882bf21cd93STycho Nightingale } 1883bf21cd93STycho Nightingale 188484659b24SMichael Zeller static bool 1885bf21cd93STycho Nightingale ept_emulation_fault(uint64_t ept_qual) 1886bf21cd93STycho Nightingale { 1887bf21cd93STycho Nightingale int read, write; 1888bf21cd93STycho Nightingale 1889bf21cd93STycho Nightingale /* EPT fault on an instruction fetch doesn't make sense here */ 1890bf21cd93STycho Nightingale if (ept_qual & EPT_VIOLATION_INST_FETCH) 189184659b24SMichael Zeller return (false); 1892bf21cd93STycho Nightingale 1893bf21cd93STycho Nightingale /* EPT fault must be a read fault or a write fault */ 1894bf21cd93STycho Nightingale read = ept_qual & EPT_VIOLATION_DATA_READ ? 1 : 0; 1895bf21cd93STycho Nightingale write = ept_qual & EPT_VIOLATION_DATA_WRITE ? 1 : 0; 1896bf21cd93STycho Nightingale if ((read | write) == 0) 189784659b24SMichael Zeller return (false); 1898bf21cd93STycho Nightingale 1899bf21cd93STycho Nightingale /* 1900bf21cd93STycho Nightingale * The EPT violation must have been caused by accessing a 1901bf21cd93STycho Nightingale * guest-physical address that is a translation of a guest-linear 1902bf21cd93STycho Nightingale * address. 1903bf21cd93STycho Nightingale */ 1904bf21cd93STycho Nightingale if ((ept_qual & EPT_VIOLATION_GLA_VALID) == 0 || 1905bf21cd93STycho Nightingale (ept_qual & EPT_VIOLATION_XLAT_VALID) == 0) { 190684659b24SMichael Zeller return (false); 1907bf21cd93STycho Nightingale } 1908bf21cd93STycho Nightingale 190984659b24SMichael Zeller return (true); 1910bf21cd93STycho Nightingale } 1911bf21cd93STycho Nightingale 19124c87aefeSPatrick Mooney static __inline int 19134c87aefeSPatrick Mooney apic_access_virtualization(struct vmx *vmx, int vcpuid) 19144c87aefeSPatrick Mooney { 19154c87aefeSPatrick Mooney uint32_t proc_ctls2; 19164c87aefeSPatrick Mooney 19174c87aefeSPatrick Mooney proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; 19184c87aefeSPatrick Mooney return ((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) ? 1 : 0); 19194c87aefeSPatrick Mooney } 19204c87aefeSPatrick Mooney 19214c87aefeSPatrick Mooney static __inline int 19224c87aefeSPatrick Mooney x2apic_virtualization(struct vmx *vmx, int vcpuid) 19234c87aefeSPatrick Mooney { 19244c87aefeSPatrick Mooney uint32_t proc_ctls2; 19254c87aefeSPatrick Mooney 19264c87aefeSPatrick Mooney proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; 19274c87aefeSPatrick Mooney return ((proc_ctls2 & PROCBASED2_VIRTUALIZE_X2APIC_MODE) ? 1 : 0); 19284c87aefeSPatrick Mooney } 19294c87aefeSPatrick Mooney 19304c87aefeSPatrick Mooney static int 19314c87aefeSPatrick Mooney vmx_handle_apic_write(struct vmx *vmx, int vcpuid, struct vlapic *vlapic, 19324c87aefeSPatrick Mooney uint64_t qual) 19334c87aefeSPatrick Mooney { 1934d2f938fdSPatrick Mooney const uint_t offset = APIC_WRITE_OFFSET(qual); 19354c87aefeSPatrick Mooney 19364c87aefeSPatrick Mooney if (!apic_access_virtualization(vmx, vcpuid)) { 19374c87aefeSPatrick Mooney /* 19384c87aefeSPatrick Mooney * In general there should not be any APIC write VM-exits 19394c87aefeSPatrick Mooney * unless APIC-access virtualization is enabled. 19404c87aefeSPatrick Mooney * 19414c87aefeSPatrick Mooney * However self-IPI virtualization can legitimately trigger 19424c87aefeSPatrick Mooney * an APIC-write VM-exit so treat it specially. 19434c87aefeSPatrick Mooney */ 19444c87aefeSPatrick Mooney if (x2apic_virtualization(vmx, vcpuid) && 19454c87aefeSPatrick Mooney offset == APIC_OFFSET_SELF_IPI) { 1946d2f938fdSPatrick Mooney const uint32_t *apic_regs = 1947d2f938fdSPatrick Mooney (uint32_t *)(vlapic->apic_page); 1948d2f938fdSPatrick Mooney const uint32_t vector = 1949d2f938fdSPatrick Mooney apic_regs[APIC_OFFSET_SELF_IPI / 4]; 1950d2f938fdSPatrick Mooney 19514c87aefeSPatrick Mooney vlapic_self_ipi_handler(vlapic, vector); 19524c87aefeSPatrick Mooney return (HANDLED); 19534c87aefeSPatrick Mooney } else 19544c87aefeSPatrick Mooney return (UNHANDLED); 19554c87aefeSPatrick Mooney } 19564c87aefeSPatrick Mooney 19574c87aefeSPatrick Mooney switch (offset) { 19584c87aefeSPatrick Mooney case APIC_OFFSET_ID: 19594c87aefeSPatrick Mooney vlapic_id_write_handler(vlapic); 19604c87aefeSPatrick Mooney break; 19614c87aefeSPatrick Mooney case APIC_OFFSET_LDR: 19624c87aefeSPatrick Mooney vlapic_ldr_write_handler(vlapic); 19634c87aefeSPatrick Mooney break; 19644c87aefeSPatrick Mooney case APIC_OFFSET_DFR: 19654c87aefeSPatrick Mooney vlapic_dfr_write_handler(vlapic); 19664c87aefeSPatrick Mooney break; 19674c87aefeSPatrick Mooney case APIC_OFFSET_SVR: 19684c87aefeSPatrick Mooney vlapic_svr_write_handler(vlapic); 19694c87aefeSPatrick Mooney break; 19704c87aefeSPatrick Mooney case APIC_OFFSET_ESR: 19714c87aefeSPatrick Mooney vlapic_esr_write_handler(vlapic); 19724c87aefeSPatrick Mooney break; 19734c87aefeSPatrick Mooney case APIC_OFFSET_ICR_LOW: 1974d2f938fdSPatrick Mooney vlapic_icrlo_write_handler(vlapic); 19754c87aefeSPatrick Mooney break; 19764c87aefeSPatrick Mooney case APIC_OFFSET_CMCI_LVT: 19774c87aefeSPatrick Mooney case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: 19784c87aefeSPatrick Mooney vlapic_lvt_write_handler(vlapic, offset); 19794c87aefeSPatrick Mooney break; 19804c87aefeSPatrick Mooney case APIC_OFFSET_TIMER_ICR: 19814c87aefeSPatrick Mooney vlapic_icrtmr_write_handler(vlapic); 19824c87aefeSPatrick Mooney break; 19834c87aefeSPatrick Mooney case APIC_OFFSET_TIMER_DCR: 19844c87aefeSPatrick Mooney vlapic_dcr_write_handler(vlapic); 19854c87aefeSPatrick Mooney break; 19864c87aefeSPatrick Mooney default: 1987d2f938fdSPatrick Mooney return (UNHANDLED); 19884c87aefeSPatrick Mooney } 1989d2f938fdSPatrick Mooney return (HANDLED); 19904c87aefeSPatrick Mooney } 19914c87aefeSPatrick Mooney 19924c87aefeSPatrick Mooney static bool 19934c87aefeSPatrick Mooney apic_access_fault(struct vmx *vmx, int vcpuid, uint64_t gpa) 19944c87aefeSPatrick Mooney { 19954c87aefeSPatrick Mooney 19964c87aefeSPatrick Mooney if (apic_access_virtualization(vmx, vcpuid) && 19974c87aefeSPatrick Mooney (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE)) 19984c87aefeSPatrick Mooney return (true); 19994c87aefeSPatrick Mooney else 20004c87aefeSPatrick Mooney return (false); 20014c87aefeSPatrick Mooney } 20024c87aefeSPatrick Mooney 20034c87aefeSPatrick Mooney static int 20044c87aefeSPatrick Mooney vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit) 20054c87aefeSPatrick Mooney { 20064c87aefeSPatrick Mooney uint64_t qual; 20074c87aefeSPatrick Mooney int access_type, offset, allowed; 2008e0c0d44eSPatrick Mooney struct vie *vie; 20094c87aefeSPatrick Mooney 20104c87aefeSPatrick Mooney if (!apic_access_virtualization(vmx, vcpuid)) 20114c87aefeSPatrick Mooney return (UNHANDLED); 20124c87aefeSPatrick Mooney 20134c87aefeSPatrick Mooney qual = vmexit->u.vmx.exit_qualification; 20144c87aefeSPatrick Mooney access_type = APIC_ACCESS_TYPE(qual); 20154c87aefeSPatrick Mooney offset = APIC_ACCESS_OFFSET(qual); 20164c87aefeSPatrick Mooney 20174c87aefeSPatrick Mooney allowed = 0; 20184c87aefeSPatrick Mooney if (access_type == 0) { 20194c87aefeSPatrick Mooney /* 20204c87aefeSPatrick Mooney * Read data access to the following registers is expected. 20214c87aefeSPatrick Mooney */ 20224c87aefeSPatrick Mooney switch (offset) { 20234c87aefeSPatrick Mooney case APIC_OFFSET_APR: 20244c87aefeSPatrick Mooney case APIC_OFFSET_PPR: 20254c87aefeSPatrick Mooney case APIC_OFFSET_RRR: 20264c87aefeSPatrick Mooney case APIC_OFFSET_CMCI_LVT: 20274c87aefeSPatrick Mooney case APIC_OFFSET_TIMER_CCR: 20284c87aefeSPatrick Mooney allowed = 1; 20294c87aefeSPatrick Mooney break; 20304c87aefeSPatrick Mooney default: 20314c87aefeSPatrick Mooney break; 20324c87aefeSPatrick Mooney } 20334c87aefeSPatrick Mooney } else if (access_type == 1) { 20344c87aefeSPatrick Mooney /* 20354c87aefeSPatrick Mooney * Write data access to the following registers is expected. 20364c87aefeSPatrick Mooney */ 20374c87aefeSPatrick Mooney switch (offset) { 20384c87aefeSPatrick Mooney case APIC_OFFSET_VER: 20394c87aefeSPatrick Mooney case APIC_OFFSET_APR: 20404c87aefeSPatrick Mooney case APIC_OFFSET_PPR: 20414c87aefeSPatrick Mooney case APIC_OFFSET_RRR: 20424c87aefeSPatrick Mooney case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: 20434c87aefeSPatrick Mooney case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: 20444c87aefeSPatrick Mooney case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: 20454c87aefeSPatrick Mooney case APIC_OFFSET_CMCI_LVT: 20464c87aefeSPatrick Mooney case APIC_OFFSET_TIMER_CCR: 20474c87aefeSPatrick Mooney allowed = 1; 20484c87aefeSPatrick Mooney break; 20494c87aefeSPatrick Mooney default: 20504c87aefeSPatrick Mooney break; 20514c87aefeSPatrick Mooney } 20524c87aefeSPatrick Mooney } 20534c87aefeSPatrick Mooney 20544c87aefeSPatrick Mooney if (allowed) { 2055e0c0d44eSPatrick Mooney vie = vm_vie_ctx(vmx->vm, vcpuid); 2056e0c0d44eSPatrick Mooney vmexit_mmio_emul(vmexit, vie, DEFAULT_APIC_BASE + offset, 20574c87aefeSPatrick Mooney VIE_INVALID_GLA); 20584c87aefeSPatrick Mooney } 20594c87aefeSPatrick Mooney 20604c87aefeSPatrick Mooney /* 20614c87aefeSPatrick Mooney * Regardless of whether the APIC-access is allowed this handler 20624c87aefeSPatrick Mooney * always returns UNHANDLED: 20634c87aefeSPatrick Mooney * - if the access is allowed then it is handled by emulating the 20644c87aefeSPatrick Mooney * instruction that caused the VM-exit (outside the critical section) 20654c87aefeSPatrick Mooney * - if the access is not allowed then it will be converted to an 20664c87aefeSPatrick Mooney * exitcode of VM_EXITCODE_VMX and will be dealt with in userland. 20674c87aefeSPatrick Mooney */ 20684c87aefeSPatrick Mooney return (UNHANDLED); 20694c87aefeSPatrick Mooney } 20704c87aefeSPatrick Mooney 20714c87aefeSPatrick Mooney static enum task_switch_reason 20724c87aefeSPatrick Mooney vmx_task_switch_reason(uint64_t qual) 20734c87aefeSPatrick Mooney { 20744c87aefeSPatrick Mooney int reason; 20754c87aefeSPatrick Mooney 20764c87aefeSPatrick Mooney reason = (qual >> 30) & 0x3; 20774c87aefeSPatrick Mooney switch (reason) { 20784c87aefeSPatrick Mooney case 0: 20794c87aefeSPatrick Mooney return (TSR_CALL); 20804c87aefeSPatrick Mooney case 1: 20814c87aefeSPatrick Mooney return (TSR_IRET); 20824c87aefeSPatrick Mooney case 2: 20834c87aefeSPatrick Mooney return (TSR_JMP); 20844c87aefeSPatrick Mooney case 3: 20854c87aefeSPatrick Mooney return (TSR_IDT_GATE); 20864c87aefeSPatrick Mooney default: 20874c87aefeSPatrick Mooney panic("%s: invalid reason %d", __func__, reason); 20884c87aefeSPatrick Mooney } 20894c87aefeSPatrick Mooney } 20904c87aefeSPatrick Mooney 2091bf21cd93STycho Nightingale static int 2092d2f938fdSPatrick Mooney vmx_handle_msr(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit, 2093d2f938fdSPatrick Mooney bool is_wrmsr) 2094bf21cd93STycho Nightingale { 2095d2f938fdSPatrick Mooney struct vmxctx *vmxctx = &vmx->ctx[vcpuid]; 2096d2f938fdSPatrick Mooney const uint32_t ecx = vmxctx->guest_rcx; 2097d2f938fdSPatrick Mooney vm_msr_result_t res; 2098d2f938fdSPatrick Mooney uint64_t val = 0; 2099bf21cd93STycho Nightingale 2100d2f938fdSPatrick Mooney if (is_wrmsr) { 2101d2f938fdSPatrick Mooney vmm_stat_incr(vmx->vm, vcpuid, VMEXIT_WRMSR, 1); 2102d2f938fdSPatrick Mooney val = vmxctx->guest_rdx << 32 | (uint32_t)vmxctx->guest_rax; 2103bf21cd93STycho Nightingale 2104d2f938fdSPatrick Mooney if (vlapic_owned_msr(ecx)) { 2105d2f938fdSPatrick Mooney struct vlapic *vlapic = vm_lapic(vmx->vm, vcpuid); 2106bf21cd93STycho Nightingale 2107d2f938fdSPatrick Mooney res = vlapic_wrmsr(vlapic, ecx, val); 2108d2f938fdSPatrick Mooney } else { 2109d2f938fdSPatrick Mooney res = vmx_wrmsr(vmx, vcpuid, ecx, val); 2110d2f938fdSPatrick Mooney } 2111d2f938fdSPatrick Mooney } else { 2112d2f938fdSPatrick Mooney vmm_stat_incr(vmx->vm, vcpuid, VMEXIT_RDMSR, 1); 2113bf21cd93STycho Nightingale 2114d2f938fdSPatrick Mooney if (vlapic_owned_msr(ecx)) { 2115d2f938fdSPatrick Mooney struct vlapic *vlapic = vm_lapic(vmx->vm, vcpuid); 2116bf21cd93STycho Nightingale 2117d2f938fdSPatrick Mooney res = vlapic_rdmsr(vlapic, ecx, &val); 2118d2f938fdSPatrick Mooney } else { 2119d2f938fdSPatrick Mooney res = vmx_rdmsr(vmx, vcpuid, ecx, &val); 2120d2f938fdSPatrick Mooney } 2121bf21cd93STycho Nightingale } 2122bf21cd93STycho Nightingale 2123d2f938fdSPatrick Mooney switch (res) { 2124d2f938fdSPatrick Mooney case VMR_OK: 2125d2f938fdSPatrick Mooney /* Store rdmsr result in the appropriate registers */ 2126d2f938fdSPatrick Mooney if (!is_wrmsr) { 2127d2f938fdSPatrick Mooney vmxctx->guest_rax = (uint32_t)val; 2128d2f938fdSPatrick Mooney vmxctx->guest_rdx = val >> 32; 2129d2f938fdSPatrick Mooney } 2130d2f938fdSPatrick Mooney return (HANDLED); 2131d2f938fdSPatrick Mooney case VMR_GP: 2132d2f938fdSPatrick Mooney vm_inject_gp(vmx->vm, vcpuid); 2133d2f938fdSPatrick Mooney return (HANDLED); 2134d2f938fdSPatrick Mooney case VMR_UNHANLDED: 2135d2f938fdSPatrick Mooney vmexit->exitcode = is_wrmsr ? 2136d2f938fdSPatrick Mooney VM_EXITCODE_WRMSR : VM_EXITCODE_RDMSR; 2137d2f938fdSPatrick Mooney vmexit->u.msr.code = ecx; 2138d2f938fdSPatrick Mooney vmexit->u.msr.wval = val; 2139d2f938fdSPatrick Mooney return (UNHANDLED); 2140d2f938fdSPatrick Mooney default: 2141d2f938fdSPatrick Mooney panic("unexpected msr result %u\n", res); 2142d2f938fdSPatrick Mooney } 2143bf21cd93STycho Nightingale } 2144bf21cd93STycho Nightingale 2145bf21cd93STycho Nightingale static int 2146bf21cd93STycho Nightingale vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) 2147bf21cd93STycho Nightingale { 2148e0c0d44eSPatrick Mooney int error, errcode, errcode_valid, handled; 2149bf21cd93STycho Nightingale struct vmxctx *vmxctx; 2150e0c0d44eSPatrick Mooney struct vie *vie; 21514c87aefeSPatrick Mooney struct vlapic *vlapic; 21524c87aefeSPatrick Mooney struct vm_task_switch *ts; 21533d097f7dSPatrick Mooney uint32_t idtvec_info, intr_info; 21544c87aefeSPatrick Mooney uint32_t intr_type, intr_vec, reason; 21553d097f7dSPatrick Mooney uint64_t qual, gpa; 2156bf21cd93STycho Nightingale 2157bf21cd93STycho Nightingale CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_VIRTUAL_NMI) != 0); 2158bf21cd93STycho Nightingale CTASSERT((PINBASED_CTLS_ONE_SETTING & PINBASED_NMI_EXITING) != 0); 2159bf21cd93STycho Nightingale 2160bf21cd93STycho Nightingale handled = UNHANDLED; 2161bf21cd93STycho Nightingale vmxctx = &vmx->ctx[vcpu]; 21624c87aefeSPatrick Mooney 2163bf21cd93STycho Nightingale qual = vmexit->u.vmx.exit_qualification; 21644c87aefeSPatrick Mooney reason = vmexit->u.vmx.exit_reason; 2165bf21cd93STycho Nightingale vmexit->exitcode = VM_EXITCODE_BOGUS; 2166bf21cd93STycho Nightingale 2167bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1); 21684c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, entry, vmx, vcpu, vmexit); 21694c87aefeSPatrick Mooney 21704c87aefeSPatrick Mooney /* 21714c87aefeSPatrick Mooney * VM-entry failures during or after loading guest state. 21724c87aefeSPatrick Mooney * 21734c87aefeSPatrick Mooney * These VM-exits are uncommon but must be handled specially 21744c87aefeSPatrick Mooney * as most VM-exit fields are not populated as usual. 21754c87aefeSPatrick Mooney */ 2176f703164bSPatrick Mooney if (reason == EXIT_REASON_MCE_DURING_ENTRY) { 21774c87aefeSPatrick Mooney vmm_call_trap(T_MCE); 21784c87aefeSPatrick Mooney return (1); 21794c87aefeSPatrick Mooney } 2180bf21cd93STycho Nightingale 21814c87aefeSPatrick Mooney /* 21824c87aefeSPatrick Mooney * VM exits that can be triggered during event delivery need to 21834c87aefeSPatrick Mooney * be handled specially by re-injecting the event if the IDT 21844c87aefeSPatrick Mooney * vectoring information field's valid bit is set. 21854c87aefeSPatrick Mooney * 21864c87aefeSPatrick Mooney * See "Information for VM Exits During Event Delivery" in Intel SDM 21874c87aefeSPatrick Mooney * for details. 21884c87aefeSPatrick Mooney */ 21893d097f7dSPatrick Mooney idtvec_info = vmcs_read(VMCS_IDT_VECTORING_INFO); 21904c87aefeSPatrick Mooney if (idtvec_info & VMCS_IDT_VEC_VALID) { 21913d097f7dSPatrick Mooney /* Record exit intinfo */ 21923d097f7dSPatrick Mooney VERIFY0(vm_exit_intinfo(vmx->vm, vcpu, 21933d097f7dSPatrick Mooney vmx_idtvec_to_intinfo(idtvec_info))); 21944c87aefeSPatrick Mooney 21954c87aefeSPatrick Mooney /* 21964c87aefeSPatrick Mooney * If 'virtual NMIs' are being used and the VM-exit 21974c87aefeSPatrick Mooney * happened while injecting an NMI during the previous 21984c87aefeSPatrick Mooney * VM-entry, then clear "blocking by NMI" in the 21994c87aefeSPatrick Mooney * Guest Interruptibility-State so the NMI can be 22004c87aefeSPatrick Mooney * reinjected on the subsequent VM-entry. 22014c87aefeSPatrick Mooney * 22024c87aefeSPatrick Mooney * However, if the NMI was being delivered through a task 22034c87aefeSPatrick Mooney * gate, then the new task must start execution with NMIs 22044c87aefeSPatrick Mooney * blocked so don't clear NMI blocking in this case. 22054c87aefeSPatrick Mooney */ 22064c87aefeSPatrick Mooney intr_type = idtvec_info & VMCS_INTR_T_MASK; 22074c87aefeSPatrick Mooney if (intr_type == VMCS_INTR_T_NMI) { 22084c87aefeSPatrick Mooney if (reason != EXIT_REASON_TASK_SWITCH) 22094c87aefeSPatrick Mooney vmx_clear_nmi_blocking(vmx, vcpu); 22104c87aefeSPatrick Mooney else 22114c87aefeSPatrick Mooney vmx_assert_nmi_blocking(vmx, vcpu); 22124c87aefeSPatrick Mooney } 22134c87aefeSPatrick Mooney 22144c87aefeSPatrick Mooney /* 22154c87aefeSPatrick Mooney * Update VM-entry instruction length if the event being 22164c87aefeSPatrick Mooney * delivered was a software interrupt or software exception. 22174c87aefeSPatrick Mooney */ 22184c87aefeSPatrick Mooney if (intr_type == VMCS_INTR_T_SWINTR || 22194c87aefeSPatrick Mooney intr_type == VMCS_INTR_T_PRIV_SWEXCEPTION || 22204c87aefeSPatrick Mooney intr_type == VMCS_INTR_T_SWEXCEPTION) { 22214c87aefeSPatrick Mooney vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length); 22224c87aefeSPatrick Mooney } 22234c87aefeSPatrick Mooney } 22244c87aefeSPatrick Mooney 22254c87aefeSPatrick Mooney switch (reason) { 222683b49c54SPatrick Mooney case EXIT_REASON_TRIPLE_FAULT: 222783b49c54SPatrick Mooney (void) vm_suspend(vmx->vm, VM_SUSPEND_TRIPLEFAULT); 222883b49c54SPatrick Mooney handled = HANDLED; 222983b49c54SPatrick Mooney break; 22304c87aefeSPatrick Mooney case EXIT_REASON_TASK_SWITCH: 22314c87aefeSPatrick Mooney ts = &vmexit->u.task_switch; 22324c87aefeSPatrick Mooney ts->tsssel = qual & 0xffff; 22334c87aefeSPatrick Mooney ts->reason = vmx_task_switch_reason(qual); 22344c87aefeSPatrick Mooney ts->ext = 0; 22354c87aefeSPatrick Mooney ts->errcode_valid = 0; 22364c87aefeSPatrick Mooney vmx_paging_info(&ts->paging); 22374c87aefeSPatrick Mooney /* 22384c87aefeSPatrick Mooney * If the task switch was due to a CALL, JMP, IRET, software 22394c87aefeSPatrick Mooney * interrupt (INT n) or software exception (INT3, INTO), 22404c87aefeSPatrick Mooney * then the saved %rip references the instruction that caused 22414c87aefeSPatrick Mooney * the task switch. The instruction length field in the VMCS 22424c87aefeSPatrick Mooney * is valid in this case. 22434c87aefeSPatrick Mooney * 22444c87aefeSPatrick Mooney * In all other cases (e.g., NMI, hardware exception) the 22454c87aefeSPatrick Mooney * saved %rip is one that would have been saved in the old TSS 22464c87aefeSPatrick Mooney * had the task switch completed normally so the instruction 22474c87aefeSPatrick Mooney * length field is not needed in this case and is explicitly 22484c87aefeSPatrick Mooney * set to 0. 22494c87aefeSPatrick Mooney */ 22504c87aefeSPatrick Mooney if (ts->reason == TSR_IDT_GATE) { 22514c87aefeSPatrick Mooney KASSERT(idtvec_info & VMCS_IDT_VEC_VALID, 22529dc804b9SPatrick Mooney ("invalid idtvec_info %x for IDT task switch", 22534c87aefeSPatrick Mooney idtvec_info)); 22544c87aefeSPatrick Mooney intr_type = idtvec_info & VMCS_INTR_T_MASK; 22554c87aefeSPatrick Mooney if (intr_type != VMCS_INTR_T_SWINTR && 22564c87aefeSPatrick Mooney intr_type != VMCS_INTR_T_SWEXCEPTION && 22574c87aefeSPatrick Mooney intr_type != VMCS_INTR_T_PRIV_SWEXCEPTION) { 22584c87aefeSPatrick Mooney /* Task switch triggered by external event */ 22594c87aefeSPatrick Mooney ts->ext = 1; 22604c87aefeSPatrick Mooney vmexit->inst_length = 0; 22614c87aefeSPatrick Mooney if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { 22624c87aefeSPatrick Mooney ts->errcode_valid = 1; 22633d097f7dSPatrick Mooney ts->errcode = 22643d097f7dSPatrick Mooney vmcs_read(VMCS_IDT_VECTORING_ERROR); 22654c87aefeSPatrick Mooney } 22664c87aefeSPatrick Mooney } 22674c87aefeSPatrick Mooney } 22684c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_TASK_SWITCH; 22694c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, taskswitch, vmx, vcpu, vmexit, ts); 22704c87aefeSPatrick Mooney break; 2271bf21cd93STycho Nightingale case EXIT_REASON_CR_ACCESS: 2272bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1); 22734c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, craccess, vmx, vcpu, vmexit, qual); 2274bf21cd93STycho Nightingale switch (qual & 0xf) { 2275bf21cd93STycho Nightingale case 0: 2276bf21cd93STycho Nightingale handled = vmx_emulate_cr0_access(vmx, vcpu, qual); 2277bf21cd93STycho Nightingale break; 2278bf21cd93STycho Nightingale case 4: 2279bf21cd93STycho Nightingale handled = vmx_emulate_cr4_access(vmx, vcpu, qual); 2280bf21cd93STycho Nightingale break; 2281bf21cd93STycho Nightingale case 8: 2282bf21cd93STycho Nightingale handled = vmx_emulate_cr8_access(vmx, vcpu, qual); 2283bf21cd93STycho Nightingale break; 2284bf21cd93STycho Nightingale } 2285bf21cd93STycho Nightingale break; 2286bf21cd93STycho Nightingale case EXIT_REASON_RDMSR: 2287bf21cd93STycho Nightingale case EXIT_REASON_WRMSR: 2288d2f938fdSPatrick Mooney handled = vmx_handle_msr(vmx, vcpu, vmexit, 2289d2f938fdSPatrick Mooney reason == EXIT_REASON_WRMSR); 2290bf21cd93STycho Nightingale break; 2291bf21cd93STycho Nightingale case EXIT_REASON_HLT: 2292bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1); 22934c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, halt, vmx, vcpu, vmexit); 2294bf21cd93STycho Nightingale vmexit->exitcode = VM_EXITCODE_HLT; 2295bf21cd93STycho Nightingale vmexit->u.hlt.rflags = vmcs_read(VMCS_GUEST_RFLAGS); 2296bf21cd93STycho Nightingale break; 2297bf21cd93STycho Nightingale case EXIT_REASON_MTF: 2298bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1); 22994c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, mtrap, vmx, vcpu, vmexit); 2300bf21cd93STycho Nightingale vmexit->exitcode = VM_EXITCODE_MTRAP; 23014c87aefeSPatrick Mooney vmexit->inst_length = 0; 2302bf21cd93STycho Nightingale break; 2303bf21cd93STycho Nightingale case EXIT_REASON_PAUSE: 2304bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1); 23054c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, pause, vmx, vcpu, vmexit); 2306bf21cd93STycho Nightingale vmexit->exitcode = VM_EXITCODE_PAUSE; 2307bf21cd93STycho Nightingale break; 2308bf21cd93STycho Nightingale case EXIT_REASON_INTR_WINDOW: 2309bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INTR_WINDOW, 1); 23104c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, intrwindow, vmx, vcpu, vmexit); 2311bf21cd93STycho Nightingale vmx_clear_int_window_exiting(vmx, vcpu); 2312bf21cd93STycho Nightingale return (1); 2313bf21cd93STycho Nightingale case EXIT_REASON_EXT_INTR: 2314bf21cd93STycho Nightingale /* 2315bf21cd93STycho Nightingale * External interrupts serve only to cause VM exits and allow 2316bf21cd93STycho Nightingale * the host interrupt handler to run. 2317bf21cd93STycho Nightingale * 2318bf21cd93STycho Nightingale * If this external interrupt triggers a virtual interrupt 2319bf21cd93STycho Nightingale * to a VM, then that state will be recorded by the 2320bf21cd93STycho Nightingale * host interrupt handler in the VM's softc. We will inject 2321bf21cd93STycho Nightingale * this virtual interrupt during the subsequent VM enter. 2322bf21cd93STycho Nightingale */ 2323bf21cd93STycho Nightingale intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); 23244c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, interrupt, 23254c87aefeSPatrick Mooney vmx, vcpu, vmexit, intr_info); 2326bf21cd93STycho Nightingale 2327bf21cd93STycho Nightingale /* 2328bf21cd93STycho Nightingale * XXX: Ignore this exit if VMCS_INTR_VALID is not set. 2329bf21cd93STycho Nightingale * This appears to be a bug in VMware Fusion? 2330bf21cd93STycho Nightingale */ 2331bf21cd93STycho Nightingale if (!(intr_info & VMCS_INTR_VALID)) 2332bf21cd93STycho Nightingale return (1); 2333bf21cd93STycho Nightingale KASSERT((intr_info & VMCS_INTR_VALID) != 0 && 2334bf21cd93STycho Nightingale (intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_HWINTR, 23359dc804b9SPatrick Mooney ("VM exit interruption info invalid: %x", intr_info)); 2336bf21cd93STycho Nightingale vmx_trigger_hostintr(intr_info & 0xff); 2337bf21cd93STycho Nightingale 2338bf21cd93STycho Nightingale /* 2339bf21cd93STycho Nightingale * This is special. We want to treat this as an 'handled' 2340bf21cd93STycho Nightingale * VM-exit but not increment the instruction pointer. 2341bf21cd93STycho Nightingale */ 2342bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXTINT, 1); 2343bf21cd93STycho Nightingale return (1); 2344bf21cd93STycho Nightingale case EXIT_REASON_NMI_WINDOW: 23454c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, nmiwindow, vmx, vcpu, vmexit); 2346bf21cd93STycho Nightingale /* Exit to allow the pending virtual NMI to be injected */ 2347bf21cd93STycho Nightingale if (vm_nmi_pending(vmx->vm, vcpu)) 2348bf21cd93STycho Nightingale vmx_inject_nmi(vmx, vcpu); 2349bf21cd93STycho Nightingale vmx_clear_nmi_window_exiting(vmx, vcpu); 2350bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NMI_WINDOW, 1); 2351bf21cd93STycho Nightingale return (1); 2352bf21cd93STycho Nightingale case EXIT_REASON_INOUT: 2353bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_INOUT, 1); 2354e0c0d44eSPatrick Mooney vie = vm_vie_ctx(vmx->vm, vcpu); 2355e0c0d44eSPatrick Mooney vmexit_inout(vmexit, vie, qual, (uint32_t)vmxctx->guest_rax); 23564c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, inout, vmx, vcpu, vmexit); 2357bf21cd93STycho Nightingale break; 2358bf21cd93STycho Nightingale case EXIT_REASON_CPUID: 2359bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CPUID, 1); 23604c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, cpuid, vmx, vcpu, vmexit); 2361bf21cd93STycho Nightingale handled = vmx_handle_cpuid(vmx->vm, vcpu, vmxctx); 2362bf21cd93STycho Nightingale break; 2363bf21cd93STycho Nightingale case EXIT_REASON_EXCEPTION: 2364bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_EXCEPTION, 1); 2365bf21cd93STycho Nightingale intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); 2366bf21cd93STycho Nightingale KASSERT((intr_info & VMCS_INTR_VALID) != 0, 23679dc804b9SPatrick Mooney ("VM exit interruption info invalid: %x", intr_info)); 23684c87aefeSPatrick Mooney 23694c87aefeSPatrick Mooney intr_vec = intr_info & 0xff; 23704c87aefeSPatrick Mooney intr_type = intr_info & VMCS_INTR_T_MASK; 2371bf21cd93STycho Nightingale 2372bf21cd93STycho Nightingale /* 2373bf21cd93STycho Nightingale * If Virtual NMIs control is 1 and the VM-exit is due to a 2374bf21cd93STycho Nightingale * fault encountered during the execution of IRET then we must 2375bf21cd93STycho Nightingale * restore the state of "virtual-NMI blocking" before resuming 2376bf21cd93STycho Nightingale * the guest. 2377bf21cd93STycho Nightingale * 2378bf21cd93STycho Nightingale * See "Resuming Guest Software after Handling an Exception". 23794c87aefeSPatrick Mooney * See "Information for VM Exits Due to Vectored Events". 2380bf21cd93STycho Nightingale */ 2381bf21cd93STycho Nightingale if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 && 23824c87aefeSPatrick Mooney (intr_vec != IDT_DF) && 2383bf21cd93STycho Nightingale (intr_info & EXIT_QUAL_NMIUDTI) != 0) 2384bf21cd93STycho Nightingale vmx_restore_nmi_blocking(vmx, vcpu); 2385bf21cd93STycho Nightingale 2386bf21cd93STycho Nightingale /* 2387bf21cd93STycho Nightingale * The NMI has already been handled in vmx_exit_handle_nmi(). 2388bf21cd93STycho Nightingale */ 23894c87aefeSPatrick Mooney if (intr_type == VMCS_INTR_T_NMI) 2390bf21cd93STycho Nightingale return (1); 23914c87aefeSPatrick Mooney 23924c87aefeSPatrick Mooney /* 23934c87aefeSPatrick Mooney * Call the machine check handler by hand. Also don't reflect 23944c87aefeSPatrick Mooney * the machine check back into the guest. 23954c87aefeSPatrick Mooney */ 23964c87aefeSPatrick Mooney if (intr_vec == IDT_MC) { 23974c87aefeSPatrick Mooney vmm_call_trap(T_MCE); 23984c87aefeSPatrick Mooney return (1); 23994c87aefeSPatrick Mooney } 24004c87aefeSPatrick Mooney 2401154972afSPatrick Mooney /* 2402154972afSPatrick Mooney * If the hypervisor has requested user exits for 2403154972afSPatrick Mooney * debug exceptions, bounce them out to userland. 2404154972afSPatrick Mooney */ 24052699b94cSPatrick Mooney if (intr_type == VMCS_INTR_T_SWEXCEPTION && 24062699b94cSPatrick Mooney intr_vec == IDT_BP && 2407154972afSPatrick Mooney (vmx->cap[vcpu].set & (1 << VM_CAP_BPT_EXIT))) { 2408154972afSPatrick Mooney vmexit->exitcode = VM_EXITCODE_BPT; 2409154972afSPatrick Mooney vmexit->u.bpt.inst_length = vmexit->inst_length; 2410154972afSPatrick Mooney vmexit->inst_length = 0; 2411154972afSPatrick Mooney break; 2412154972afSPatrick Mooney } 2413154972afSPatrick Mooney 24144c87aefeSPatrick Mooney if (intr_vec == IDT_PF) { 2415007ca332SPatrick Mooney vmxctx->guest_cr2 = qual; 24164c87aefeSPatrick Mooney } 24174c87aefeSPatrick Mooney 24184c87aefeSPatrick Mooney /* 24194c87aefeSPatrick Mooney * Software exceptions exhibit trap-like behavior. This in 24204c87aefeSPatrick Mooney * turn requires populating the VM-entry instruction length 24214c87aefeSPatrick Mooney * so that the %rip in the trap frame is past the INT3/INTO 24224c87aefeSPatrick Mooney * instruction. 24234c87aefeSPatrick Mooney */ 24244c87aefeSPatrick Mooney if (intr_type == VMCS_INTR_T_SWEXCEPTION) 24254c87aefeSPatrick Mooney vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length); 24264c87aefeSPatrick Mooney 24274c87aefeSPatrick Mooney /* Reflect all other exceptions back into the guest */ 24284c87aefeSPatrick Mooney errcode_valid = errcode = 0; 24294c87aefeSPatrick Mooney if (intr_info & VMCS_INTR_DEL_ERRCODE) { 24304c87aefeSPatrick Mooney errcode_valid = 1; 24314c87aefeSPatrick Mooney errcode = vmcs_read(VMCS_EXIT_INTR_ERRCODE); 24324c87aefeSPatrick Mooney } 24334c87aefeSPatrick Mooney SDT_PROBE5(vmm, vmx, exit, exception, 24344c87aefeSPatrick Mooney vmx, vcpu, vmexit, intr_vec, errcode); 24354c87aefeSPatrick Mooney error = vm_inject_exception(vmx->vm, vcpu, intr_vec, 24364c87aefeSPatrick Mooney errcode_valid, errcode, 0); 24374c87aefeSPatrick Mooney KASSERT(error == 0, ("%s: vm_inject_exception error %d", 24384c87aefeSPatrick Mooney __func__, error)); 24394c87aefeSPatrick Mooney return (1); 24404c87aefeSPatrick Mooney 2441bf21cd93STycho Nightingale case EXIT_REASON_EPT_FAULT: 24424c87aefeSPatrick Mooney /* 24434c87aefeSPatrick Mooney * If 'gpa' lies within the address space allocated to 24444c87aefeSPatrick Mooney * memory then this must be a nested page fault otherwise 24454c87aefeSPatrick Mooney * this must be an instruction that accesses MMIO space. 24464c87aefeSPatrick Mooney */ 24473d097f7dSPatrick Mooney gpa = vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS); 24484c87aefeSPatrick Mooney if (vm_mem_allocated(vmx->vm, vcpu, gpa) || 24494c87aefeSPatrick Mooney apic_access_fault(vmx, vcpu, gpa)) { 24504c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_PAGING; 24514c87aefeSPatrick Mooney vmexit->inst_length = 0; 24524c87aefeSPatrick Mooney vmexit->u.paging.gpa = gpa; 24534c87aefeSPatrick Mooney vmexit->u.paging.fault_type = ept_fault_type(qual); 24544c87aefeSPatrick Mooney vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1); 24554c87aefeSPatrick Mooney SDT_PROBE5(vmm, vmx, exit, nestedfault, 24564c87aefeSPatrick Mooney vmx, vcpu, vmexit, gpa, qual); 24574c87aefeSPatrick Mooney } else if (ept_emulation_fault(qual)) { 2458e0c0d44eSPatrick Mooney vie = vm_vie_ctx(vmx->vm, vcpu); 24593d097f7dSPatrick Mooney vmexit_mmio_emul(vmexit, vie, gpa, 24603d097f7dSPatrick Mooney vmcs_read(VMCS_GUEST_LINEAR_ADDRESS)); 2461e0c0d44eSPatrick Mooney vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MMIO_EMUL, 1); 24624c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, mmiofault, 24634c87aefeSPatrick Mooney vmx, vcpu, vmexit, gpa); 2464bf21cd93STycho Nightingale } 24654c87aefeSPatrick Mooney /* 24664c87aefeSPatrick Mooney * If Virtual NMIs control is 1 and the VM-exit is due to an 24674c87aefeSPatrick Mooney * EPT fault during the execution of IRET then we must restore 24684c87aefeSPatrick Mooney * the state of "virtual-NMI blocking" before resuming. 24694c87aefeSPatrick Mooney * 24704c87aefeSPatrick Mooney * See description of "NMI unblocking due to IRET" in 24714c87aefeSPatrick Mooney * "Exit Qualification for EPT Violations". 24724c87aefeSPatrick Mooney */ 24734c87aefeSPatrick Mooney if ((idtvec_info & VMCS_IDT_VEC_VALID) == 0 && 24744c87aefeSPatrick Mooney (qual & EXIT_QUAL_NMIUDTI) != 0) 24754c87aefeSPatrick Mooney vmx_restore_nmi_blocking(vmx, vcpu); 24764c87aefeSPatrick Mooney break; 24774c87aefeSPatrick Mooney case EXIT_REASON_VIRTUALIZED_EOI: 24784c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_IOAPIC_EOI; 24794c87aefeSPatrick Mooney vmexit->u.ioapic_eoi.vector = qual & 0xFF; 24804c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, eoi, vmx, vcpu, vmexit); 24814c87aefeSPatrick Mooney vmexit->inst_length = 0; /* trap-like */ 24824c87aefeSPatrick Mooney break; 24834c87aefeSPatrick Mooney case EXIT_REASON_APIC_ACCESS: 24844c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, apicaccess, vmx, vcpu, vmexit); 24854c87aefeSPatrick Mooney handled = vmx_handle_apic_access(vmx, vcpu, vmexit); 24864c87aefeSPatrick Mooney break; 24874c87aefeSPatrick Mooney case EXIT_REASON_APIC_WRITE: 24884c87aefeSPatrick Mooney /* 24894c87aefeSPatrick Mooney * APIC-write VM exit is trap-like so the %rip is already 24904c87aefeSPatrick Mooney * pointing to the next instruction. 24914c87aefeSPatrick Mooney */ 24924c87aefeSPatrick Mooney vmexit->inst_length = 0; 24934c87aefeSPatrick Mooney vlapic = vm_lapic(vmx->vm, vcpu); 24944c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, apicwrite, 24954c87aefeSPatrick Mooney vmx, vcpu, vmexit, vlapic); 24964c87aefeSPatrick Mooney handled = vmx_handle_apic_write(vmx, vcpu, vlapic, qual); 24974c87aefeSPatrick Mooney break; 24984c87aefeSPatrick Mooney case EXIT_REASON_XSETBV: 24994c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, xsetbv, vmx, vcpu, vmexit); 25004c87aefeSPatrick Mooney handled = vmx_emulate_xsetbv(vmx, vcpu, vmexit); 25014c87aefeSPatrick Mooney break; 25024c87aefeSPatrick Mooney case EXIT_REASON_MONITOR: 25034c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, monitor, vmx, vcpu, vmexit); 25044c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_MONITOR; 25054c87aefeSPatrick Mooney break; 25064c87aefeSPatrick Mooney case EXIT_REASON_MWAIT: 25074c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, mwait, vmx, vcpu, vmexit); 25084c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_MWAIT; 25094c87aefeSPatrick Mooney break; 2510154972afSPatrick Mooney case EXIT_REASON_TPR: 2511154972afSPatrick Mooney vlapic = vm_lapic(vmx->vm, vcpu); 2512154972afSPatrick Mooney vlapic_sync_tpr(vlapic); 2513154972afSPatrick Mooney vmexit->inst_length = 0; 2514154972afSPatrick Mooney handled = HANDLED; 2515154972afSPatrick Mooney break; 25164c87aefeSPatrick Mooney case EXIT_REASON_VMCALL: 25174c87aefeSPatrick Mooney case EXIT_REASON_VMCLEAR: 25184c87aefeSPatrick Mooney case EXIT_REASON_VMLAUNCH: 25194c87aefeSPatrick Mooney case EXIT_REASON_VMPTRLD: 25204c87aefeSPatrick Mooney case EXIT_REASON_VMPTRST: 25214c87aefeSPatrick Mooney case EXIT_REASON_VMREAD: 25224c87aefeSPatrick Mooney case EXIT_REASON_VMRESUME: 25234c87aefeSPatrick Mooney case EXIT_REASON_VMWRITE: 25244c87aefeSPatrick Mooney case EXIT_REASON_VMXOFF: 25254c87aefeSPatrick Mooney case EXIT_REASON_VMXON: 25264c87aefeSPatrick Mooney SDT_PROBE3(vmm, vmx, exit, vminsn, vmx, vcpu, vmexit); 25274c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_VMINSN; 2528bf21cd93STycho Nightingale break; 2529bf21cd93STycho Nightingale default: 25304c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, unknown, 25314c87aefeSPatrick Mooney vmx, vcpu, vmexit, reason); 2532bf21cd93STycho Nightingale vmm_stat_incr(vmx->vm, vcpu, VMEXIT_UNKNOWN, 1); 2533bf21cd93STycho Nightingale break; 2534bf21cd93STycho Nightingale } 2535bf21cd93STycho Nightingale 2536bf21cd93STycho Nightingale if (handled) { 2537bf21cd93STycho Nightingale /* 2538bf21cd93STycho Nightingale * It is possible that control is returned to userland 2539bf21cd93STycho Nightingale * even though we were able to handle the VM exit in the 2540bf21cd93STycho Nightingale * kernel. 2541bf21cd93STycho Nightingale * 2542bf21cd93STycho Nightingale * In such a case we want to make sure that the userland 2543bf21cd93STycho Nightingale * restarts guest execution at the instruction *after* 2544bf21cd93STycho Nightingale * the one we just processed. Therefore we update the 2545bf21cd93STycho Nightingale * guest rip in the VMCS and in 'vmexit'. 2546bf21cd93STycho Nightingale */ 2547bf21cd93STycho Nightingale vmexit->rip += vmexit->inst_length; 2548bf21cd93STycho Nightingale vmexit->inst_length = 0; 25494c87aefeSPatrick Mooney vmcs_write(VMCS_GUEST_RIP, vmexit->rip); 2550bf21cd93STycho Nightingale } else { 2551bf21cd93STycho Nightingale if (vmexit->exitcode == VM_EXITCODE_BOGUS) { 2552bf21cd93STycho Nightingale /* 2553bf21cd93STycho Nightingale * If this VM exit was not claimed by anybody then 2554bf21cd93STycho Nightingale * treat it as a generic VMX exit. 2555bf21cd93STycho Nightingale */ 2556bf21cd93STycho Nightingale vmexit->exitcode = VM_EXITCODE_VMX; 2557bf21cd93STycho Nightingale vmexit->u.vmx.status = VM_SUCCESS; 2558bf21cd93STycho Nightingale vmexit->u.vmx.inst_type = 0; 2559bf21cd93STycho Nightingale vmexit->u.vmx.inst_error = 0; 2560bf21cd93STycho Nightingale } else { 2561bf21cd93STycho Nightingale /* 2562bf21cd93STycho Nightingale * The exitcode and collateral have been populated. 2563bf21cd93STycho Nightingale * The VM exit will be processed further in userland. 2564bf21cd93STycho Nightingale */ 2565bf21cd93STycho Nightingale } 2566bf21cd93STycho Nightingale } 25674c87aefeSPatrick Mooney 25684c87aefeSPatrick Mooney SDT_PROBE4(vmm, vmx, exit, return, 25694c87aefeSPatrick Mooney vmx, vcpu, vmexit, handled); 25704c87aefeSPatrick Mooney return (handled); 25714c87aefeSPatrick Mooney } 25724c87aefeSPatrick Mooney 25734c87aefeSPatrick Mooney static void 25744c87aefeSPatrick Mooney vmx_exit_inst_error(struct vmxctx *vmxctx, int rc, struct vm_exit *vmexit) 25754c87aefeSPatrick Mooney { 25764c87aefeSPatrick Mooney 25774c87aefeSPatrick Mooney KASSERT(vmxctx->inst_fail_status != VM_SUCCESS, 25784c87aefeSPatrick Mooney ("vmx_exit_inst_error: invalid inst_fail_status %d", 25794c87aefeSPatrick Mooney vmxctx->inst_fail_status)); 25804c87aefeSPatrick Mooney 25814c87aefeSPatrick Mooney vmexit->inst_length = 0; 25824c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_VMX; 25834c87aefeSPatrick Mooney vmexit->u.vmx.status = vmxctx->inst_fail_status; 25843d097f7dSPatrick Mooney vmexit->u.vmx.inst_error = vmcs_read(VMCS_INSTRUCTION_ERROR); 25854c87aefeSPatrick Mooney vmexit->u.vmx.exit_reason = ~0; 25864c87aefeSPatrick Mooney vmexit->u.vmx.exit_qualification = ~0; 25874c87aefeSPatrick Mooney 25884c87aefeSPatrick Mooney switch (rc) { 25894c87aefeSPatrick Mooney case VMX_VMRESUME_ERROR: 25904c87aefeSPatrick Mooney case VMX_VMLAUNCH_ERROR: 25914c87aefeSPatrick Mooney case VMX_INVEPT_ERROR: 25924c87aefeSPatrick Mooney case VMX_VMWRITE_ERROR: 25934c87aefeSPatrick Mooney vmexit->u.vmx.inst_type = rc; 25944c87aefeSPatrick Mooney break; 25954c87aefeSPatrick Mooney default: 25964c87aefeSPatrick Mooney panic("vm_exit_inst_error: vmx_enter_guest returned %d", rc); 25974c87aefeSPatrick Mooney } 25984c87aefeSPatrick Mooney } 25994c87aefeSPatrick Mooney 26004c87aefeSPatrick Mooney /* 26014c87aefeSPatrick Mooney * If the NMI-exiting VM execution control is set to '1' then an NMI in 26024c87aefeSPatrick Mooney * non-root operation causes a VM-exit. NMI blocking is in effect so it is 26034c87aefeSPatrick Mooney * sufficient to simply vector to the NMI handler via a software interrupt. 26044c87aefeSPatrick Mooney * However, this must be done before maskable interrupts are enabled 26054c87aefeSPatrick Mooney * otherwise the "iret" issued by an interrupt handler will incorrectly 26064c87aefeSPatrick Mooney * clear NMI blocking. 26074c87aefeSPatrick Mooney */ 26084c87aefeSPatrick Mooney static __inline void 26090153d828SPatrick Mooney vmx_exit_handle_possible_nmi(struct vm_exit *vmexit) 26104c87aefeSPatrick Mooney { 26110153d828SPatrick Mooney ASSERT(!interrupts_enabled()); 26124c87aefeSPatrick Mooney 26130153d828SPatrick Mooney if (vmexit->u.vmx.exit_reason == EXIT_REASON_EXCEPTION) { 26140153d828SPatrick Mooney uint32_t intr_info = vmcs_read(VMCS_EXIT_INTR_INFO); 26150153d828SPatrick Mooney ASSERT(intr_info & VMCS_INTR_VALID); 26164c87aefeSPatrick Mooney 26170153d828SPatrick Mooney if ((intr_info & VMCS_INTR_T_MASK) == VMCS_INTR_T_NMI) { 26180153d828SPatrick Mooney ASSERT3U(intr_info & 0xff, ==, IDT_NMI); 26190153d828SPatrick Mooney vmm_call_trap(T_NMIFLT); 26200153d828SPatrick Mooney } 26214c87aefeSPatrick Mooney } 26224c87aefeSPatrick Mooney } 26234c87aefeSPatrick Mooney 26244c87aefeSPatrick Mooney static __inline void 26254c87aefeSPatrick Mooney vmx_dr_enter_guest(struct vmxctx *vmxctx) 26264c87aefeSPatrick Mooney { 2627db8733f5SPatrick Mooney uint64_t rflags; 26284c87aefeSPatrick Mooney 26294c87aefeSPatrick Mooney /* Save host control debug registers. */ 26304c87aefeSPatrick Mooney vmxctx->host_dr7 = rdr7(); 26314c87aefeSPatrick Mooney vmxctx->host_debugctl = rdmsr(MSR_DEBUGCTLMSR); 26324c87aefeSPatrick Mooney 26334c87aefeSPatrick Mooney /* 26344c87aefeSPatrick Mooney * Disable debugging in DR7 and DEBUGCTL to avoid triggering 26354c87aefeSPatrick Mooney * exceptions in the host based on the guest DRx values. The 26364c87aefeSPatrick Mooney * guest DR7 and DEBUGCTL are saved/restored in the VMCS. 26374c87aefeSPatrick Mooney */ 26384c87aefeSPatrick Mooney load_dr7(0); 26394c87aefeSPatrick Mooney wrmsr(MSR_DEBUGCTLMSR, 0); 26404c87aefeSPatrick Mooney 26414c87aefeSPatrick Mooney /* 26424c87aefeSPatrick Mooney * Disable single stepping the kernel to avoid corrupting the 26434c87aefeSPatrick Mooney * guest DR6. A debugger might still be able to corrupt the 26444c87aefeSPatrick Mooney * guest DR6 by setting a breakpoint after this point and then 26454c87aefeSPatrick Mooney * single stepping. 26464c87aefeSPatrick Mooney */ 26474c87aefeSPatrick Mooney rflags = read_rflags(); 26484c87aefeSPatrick Mooney vmxctx->host_tf = rflags & PSL_T; 26494c87aefeSPatrick Mooney write_rflags(rflags & ~PSL_T); 26504c87aefeSPatrick Mooney 26514c87aefeSPatrick Mooney /* Save host debug registers. */ 26524c87aefeSPatrick Mooney vmxctx->host_dr0 = rdr0(); 26534c87aefeSPatrick Mooney vmxctx->host_dr1 = rdr1(); 26544c87aefeSPatrick Mooney vmxctx->host_dr2 = rdr2(); 26554c87aefeSPatrick Mooney vmxctx->host_dr3 = rdr3(); 26564c87aefeSPatrick Mooney vmxctx->host_dr6 = rdr6(); 26574c87aefeSPatrick Mooney 26584c87aefeSPatrick Mooney /* Restore guest debug registers. */ 26594c87aefeSPatrick Mooney load_dr0(vmxctx->guest_dr0); 26604c87aefeSPatrick Mooney load_dr1(vmxctx->guest_dr1); 26614c87aefeSPatrick Mooney load_dr2(vmxctx->guest_dr2); 26624c87aefeSPatrick Mooney load_dr3(vmxctx->guest_dr3); 26634c87aefeSPatrick Mooney load_dr6(vmxctx->guest_dr6); 26644c87aefeSPatrick Mooney } 26654c87aefeSPatrick Mooney 26664c87aefeSPatrick Mooney static __inline void 26674c87aefeSPatrick Mooney vmx_dr_leave_guest(struct vmxctx *vmxctx) 26684c87aefeSPatrick Mooney { 26694c87aefeSPatrick Mooney 26704c87aefeSPatrick Mooney /* Save guest debug registers. */ 26714c87aefeSPatrick Mooney vmxctx->guest_dr0 = rdr0(); 26724c87aefeSPatrick Mooney vmxctx->guest_dr1 = rdr1(); 26734c87aefeSPatrick Mooney vmxctx->guest_dr2 = rdr2(); 26744c87aefeSPatrick Mooney vmxctx->guest_dr3 = rdr3(); 26754c87aefeSPatrick Mooney vmxctx->guest_dr6 = rdr6(); 26764c87aefeSPatrick Mooney 26774c87aefeSPatrick Mooney /* 26784c87aefeSPatrick Mooney * Restore host debug registers. Restore DR7, DEBUGCTL, and 26794c87aefeSPatrick Mooney * PSL_T last. 26804c87aefeSPatrick Mooney */ 26814c87aefeSPatrick Mooney load_dr0(vmxctx->host_dr0); 26824c87aefeSPatrick Mooney load_dr1(vmxctx->host_dr1); 26834c87aefeSPatrick Mooney load_dr2(vmxctx->host_dr2); 26844c87aefeSPatrick Mooney load_dr3(vmxctx->host_dr3); 26854c87aefeSPatrick Mooney load_dr6(vmxctx->host_dr6); 26864c87aefeSPatrick Mooney wrmsr(MSR_DEBUGCTLMSR, vmxctx->host_debugctl); 26874c87aefeSPatrick Mooney load_dr7(vmxctx->host_dr7); 26884c87aefeSPatrick Mooney write_rflags(read_rflags() | vmxctx->host_tf); 2689bf21cd93STycho Nightingale } 2690bf21cd93STycho Nightingale 2691bf21cd93STycho Nightingale static int 26920153d828SPatrick Mooney vmx_run(void *arg, int vcpu, uint64_t rip) 2693bf21cd93STycho Nightingale { 26944c87aefeSPatrick Mooney int rc, handled, launched; 2695bf21cd93STycho Nightingale struct vmx *vmx; 2696bf21cd93STycho Nightingale struct vm *vm; 2697bf21cd93STycho Nightingale struct vmxctx *vmxctx; 2698007ca332SPatrick Mooney uintptr_t vmcs_pa; 2699bf21cd93STycho Nightingale struct vm_exit *vmexit; 2700bf21cd93STycho Nightingale struct vlapic *vlapic; 27014c87aefeSPatrick Mooney uint32_t exit_reason; 2702c74a40a5SPatrick Mooney bool tpr_shadow_active; 27030153d828SPatrick Mooney vm_client_t *vmc; 27044c87aefeSPatrick Mooney 2705bf21cd93STycho Nightingale vmx = arg; 2706bf21cd93STycho Nightingale vm = vmx->vm; 2707007ca332SPatrick Mooney vmcs_pa = vmx->vmcs_pa[vcpu]; 2708bf21cd93STycho Nightingale vmxctx = &vmx->ctx[vcpu]; 2709bf21cd93STycho Nightingale vlapic = vm_lapic(vm, vcpu); 27104c87aefeSPatrick Mooney vmexit = vm_exitinfo(vm, vcpu); 27110153d828SPatrick Mooney vmc = vm_get_vmclient(vm, vcpu); 27124c87aefeSPatrick Mooney launched = 0; 2713c74a40a5SPatrick Mooney tpr_shadow_active = vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW) && 2714c74a40a5SPatrick Mooney !vmx_cap_en(vmx, VMX_CAP_APICV) && 2715c74a40a5SPatrick Mooney (vmx->cap[vcpu].proc_ctls & PROCBASED_USE_TPR_SHADOW) != 0; 2716bf21cd93STycho Nightingale 2717bf21cd93STycho Nightingale vmx_msr_guest_enter(vmx, vcpu); 2718bf21cd93STycho Nightingale 2719007ca332SPatrick Mooney vmcs_load(vmcs_pa); 2720bf21cd93STycho Nightingale 27214c87aefeSPatrick Mooney VERIFY(vmx->vmcs_state[vcpu] == VS_NONE && curthread->t_preempt != 0); 27224c87aefeSPatrick Mooney vmx->vmcs_state[vcpu] = VS_LOADED; 27234c87aefeSPatrick Mooney 2724bf21cd93STycho Nightingale /* 2725bf21cd93STycho Nightingale * XXX 2726bf21cd93STycho Nightingale * We do this every time because we may setup the virtual machine 2727bf21cd93STycho Nightingale * from a different process than the one that actually runs it. 2728bf21cd93STycho Nightingale * 2729bf21cd93STycho Nightingale * If the life of a virtual machine was spent entirely in the context 27304c87aefeSPatrick Mooney * of a single process we could do this once in vmx_vminit(). 2731bf21cd93STycho Nightingale */ 2732bf21cd93STycho Nightingale vmcs_write(VMCS_HOST_CR3, rcr3()); 2733bf21cd93STycho Nightingale 2734bf21cd93STycho Nightingale vmcs_write(VMCS_GUEST_RIP, rip); 27350153d828SPatrick Mooney vmx_set_pcpu_defaults(vmx, vcpu); 2736bf21cd93STycho Nightingale do { 2737c74a40a5SPatrick Mooney enum event_inject_state inject_state; 27380153d828SPatrick Mooney uint64_t eptgen; 2739c74a40a5SPatrick Mooney 27403d097f7dSPatrick Mooney ASSERT3U(vmcs_read(VMCS_GUEST_RIP), ==, rip); 27414c87aefeSPatrick Mooney 27424c87aefeSPatrick Mooney handled = UNHANDLED; 2743c74a40a5SPatrick Mooney 2744c74a40a5SPatrick Mooney /* 2745c74a40a5SPatrick Mooney * Perform initial event/exception/interrupt injection before 2746c74a40a5SPatrick Mooney * host CPU interrupts are disabled. 2747c74a40a5SPatrick Mooney */ 2748c74a40a5SPatrick Mooney inject_state = vmx_inject_events(vmx, vcpu, rip); 2749c74a40a5SPatrick Mooney 27504c87aefeSPatrick Mooney /* 27514c87aefeSPatrick Mooney * Interrupts are disabled from this point on until the 27524c87aefeSPatrick Mooney * guest starts executing. This is done for the following 27534c87aefeSPatrick Mooney * reasons: 27544c87aefeSPatrick Mooney * 27554c87aefeSPatrick Mooney * If an AST is asserted on this thread after the check below, 27564c87aefeSPatrick Mooney * then the IPI_AST notification will not be lost, because it 27574c87aefeSPatrick Mooney * will cause a VM exit due to external interrupt as soon as 27584c87aefeSPatrick Mooney * the guest state is loaded. 27594c87aefeSPatrick Mooney * 2760c74a40a5SPatrick Mooney * A posted interrupt after vmx_inject_vlapic() will not be 2761c74a40a5SPatrick Mooney * "lost" because it will be held pending in the host APIC 2762c74a40a5SPatrick Mooney * because interrupts are disabled. The pending interrupt will 2763c74a40a5SPatrick Mooney * be recognized as soon as the guest state is loaded. 27644c87aefeSPatrick Mooney * 27650153d828SPatrick Mooney * The same reasoning applies to the IPI generated by vmspace 27660153d828SPatrick Mooney * invalidation. 27674c87aefeSPatrick Mooney */ 27684c87aefeSPatrick Mooney disable_intr(); 2769c74a40a5SPatrick Mooney 2770c74a40a5SPatrick Mooney /* 2771c74a40a5SPatrick Mooney * If not precluded by existing events, inject any interrupt 2772c74a40a5SPatrick Mooney * pending on the vLAPIC. As a lock-less operation, it is safe 2773c74a40a5SPatrick Mooney * (and prudent) to perform with host CPU interrupts disabled. 2774c74a40a5SPatrick Mooney */ 2775c74a40a5SPatrick Mooney if (inject_state == EIS_CAN_INJECT) { 2776c74a40a5SPatrick Mooney inject_state = vmx_inject_vlapic(vmx, vcpu, vlapic); 27774c87aefeSPatrick Mooney } 27784c87aefeSPatrick Mooney 27794c87aefeSPatrick Mooney /* 27802606939dSPatrick Mooney * Check for vCPU bail-out conditions. This must be done after 27812606939dSPatrick Mooney * vmx_inject_events() to detect a triple-fault condition. 27824c87aefeSPatrick Mooney */ 27832606939dSPatrick Mooney if (vcpu_entry_bailout_checks(vmx->vm, vcpu, rip)) { 27844c87aefeSPatrick Mooney enable_intr(); 27854c87aefeSPatrick Mooney break; 27864c87aefeSPatrick Mooney } 27874c87aefeSPatrick Mooney 27882606939dSPatrick Mooney if (vcpu_run_state_pending(vm, vcpu)) { 27894c87aefeSPatrick Mooney enable_intr(); 27902606939dSPatrick Mooney vm_exit_run_state(vmx->vm, vcpu, rip); 27914c87aefeSPatrick Mooney break; 27924c87aefeSPatrick Mooney } 27934c87aefeSPatrick Mooney 2794c74a40a5SPatrick Mooney /* 2795c74a40a5SPatrick Mooney * If subsequent activity queued events which require injection 2796c74a40a5SPatrick Mooney * handling, take another lap to handle them. 2797c74a40a5SPatrick Mooney */ 2798c74a40a5SPatrick Mooney if (vmx_inject_recheck(vmx, vcpu, inject_state)) { 2799c74a40a5SPatrick Mooney enable_intr(); 2800c74a40a5SPatrick Mooney handled = HANDLED; 2801c74a40a5SPatrick Mooney continue; 2802c74a40a5SPatrick Mooney } 2803c74a40a5SPatrick Mooney 28044c87aefeSPatrick Mooney if ((rc = smt_acquire()) != 1) { 28054c87aefeSPatrick Mooney enable_intr(); 28064c87aefeSPatrick Mooney vmexit->rip = rip; 28074c87aefeSPatrick Mooney vmexit->inst_length = 0; 28084c87aefeSPatrick Mooney if (rc == -1) { 28094c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_HT; 28104c87aefeSPatrick Mooney } else { 28114c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_BOGUS; 28124c87aefeSPatrick Mooney handled = HANDLED; 2813bf21cd93STycho Nightingale } 2814bf21cd93STycho Nightingale break; 2815bf21cd93STycho Nightingale } 28164c87aefeSPatrick Mooney 28174c87aefeSPatrick Mooney /* 28184c87aefeSPatrick Mooney * If this thread has gone off-cpu due to mutex operations 28194c87aefeSPatrick Mooney * during vmx_run, the VMCS will have been unloaded, forcing a 28204c87aefeSPatrick Mooney * re-VMLAUNCH as opposed to VMRESUME. 28214c87aefeSPatrick Mooney */ 28224c87aefeSPatrick Mooney launched = (vmx->vmcs_state[vcpu] & VS_LAUNCHED) != 0; 28234c87aefeSPatrick Mooney /* 28244c87aefeSPatrick Mooney * Restoration of the GDT limit is taken care of by 28254c87aefeSPatrick Mooney * vmx_savectx(). Since the maximum practical index for the 28264c87aefeSPatrick Mooney * IDT is 255, restoring its limits from the post-VMX-exit 28274c87aefeSPatrick Mooney * default of 0xffff is not a concern. 28284c87aefeSPatrick Mooney * 28294c87aefeSPatrick Mooney * Only 64-bit hypervisor callers are allowed, which forgoes 28304c87aefeSPatrick Mooney * the need to restore any LDT descriptor. Toss an error to 28314c87aefeSPatrick Mooney * anyone attempting to break that rule. 28324c87aefeSPatrick Mooney */ 28334c87aefeSPatrick Mooney if (curproc->p_model != DATAMODEL_LP64) { 28344c87aefeSPatrick Mooney smt_release(); 28354c87aefeSPatrick Mooney enable_intr(); 28364c87aefeSPatrick Mooney bzero(vmexit, sizeof (*vmexit)); 28374c87aefeSPatrick Mooney vmexit->rip = rip; 28384c87aefeSPatrick Mooney vmexit->exitcode = VM_EXITCODE_VMX; 28394c87aefeSPatrick Mooney vmexit->u.vmx.status = VM_FAIL_INVALID; 28404c87aefeSPatrick Mooney handled = UNHANDLED; 28414c87aefeSPatrick Mooney break; 28424c87aefeSPatrick Mooney } 28434c87aefeSPatrick Mooney 2844c74a40a5SPatrick Mooney if (tpr_shadow_active) { 2845c74a40a5SPatrick Mooney vmx_tpr_shadow_enter(vlapic); 2846154972afSPatrick Mooney } 2847154972afSPatrick Mooney 28480153d828SPatrick Mooney /* 28490153d828SPatrick Mooney * Indicate activation of vmspace (EPT) table just prior to VMX 28500153d828SPatrick Mooney * entry, checking for the necessity of an invept invalidation. 28510153d828SPatrick Mooney */ 28520153d828SPatrick Mooney eptgen = vmc_table_enter(vmc); 2853d1c02647SPatrick Mooney if (vmx->eptgen[curcpu] != eptgen) { 28540153d828SPatrick Mooney /* 2855d1c02647SPatrick Mooney * VMspace generation does not match what was previously 2856d1c02647SPatrick Mooney * used on this host CPU, so all mappings associated 2857d1c02647SPatrick Mooney * with this EP4TA must be invalidated. 28580153d828SPatrick Mooney */ 28590153d828SPatrick Mooney invept(1, vmx->eptp); 2860d1c02647SPatrick Mooney vmx->eptgen[curcpu] = eptgen; 28610153d828SPatrick Mooney } 28620153d828SPatrick Mooney 286359460b49SPatrick Mooney vcpu_ustate_change(vm, vcpu, VU_RUN); 28644c87aefeSPatrick Mooney vmx_dr_enter_guest(vmxctx); 28650153d828SPatrick Mooney 28660153d828SPatrick Mooney /* Perform VMX entry */ 28674c87aefeSPatrick Mooney rc = vmx_enter_guest(vmxctx, vmx, launched); 28680153d828SPatrick Mooney 28694c87aefeSPatrick Mooney vmx_dr_leave_guest(vmxctx); 287059460b49SPatrick Mooney vcpu_ustate_change(vm, vcpu, VU_EMU_KERN); 28714c87aefeSPatrick Mooney 28724c87aefeSPatrick Mooney vmx->vmcs_state[vcpu] |= VS_LAUNCHED; 28734c87aefeSPatrick Mooney smt_release(); 28744c87aefeSPatrick Mooney 2875c74a40a5SPatrick Mooney if (tpr_shadow_active) { 2876c74a40a5SPatrick Mooney vmx_tpr_shadow_exit(vlapic); 2877c74a40a5SPatrick Mooney } 2878c74a40a5SPatrick Mooney 28794c87aefeSPatrick Mooney /* Collect some information for VM exit processing */ 28803d097f7dSPatrick Mooney vmexit->rip = rip = vmcs_read(VMCS_GUEST_RIP); 28813d097f7dSPatrick Mooney vmexit->inst_length = vmcs_read(VMCS_EXIT_INSTRUCTION_LENGTH); 28823d097f7dSPatrick Mooney vmexit->u.vmx.exit_reason = exit_reason = 28833d097f7dSPatrick Mooney (vmcs_read(VMCS_EXIT_REASON) & BASIC_EXIT_REASON_MASK); 28843d097f7dSPatrick Mooney vmexit->u.vmx.exit_qualification = 28853d097f7dSPatrick Mooney vmcs_read(VMCS_EXIT_QUALIFICATION); 2886bf21cd93STycho Nightingale /* Update 'nextrip' */ 2887bf21cd93STycho Nightingale vmx->state[vcpu].nextrip = rip; 2888bf21cd93STycho Nightingale 28894c87aefeSPatrick Mooney if (rc == VMX_GUEST_VMEXIT) { 28900153d828SPatrick Mooney vmx_exit_handle_possible_nmi(vmexit); 28910153d828SPatrick Mooney } 28920153d828SPatrick Mooney enable_intr(); 28930153d828SPatrick Mooney vmc_table_exit(vmc); 28940153d828SPatrick Mooney 28950153d828SPatrick Mooney if (rc == VMX_GUEST_VMEXIT) { 28964c87aefeSPatrick Mooney handled = vmx_exit_process(vmx, vcpu, vmexit); 28974c87aefeSPatrick Mooney } else { 28984c87aefeSPatrick Mooney vmx_exit_inst_error(vmxctx, rc, vmexit); 2899bf21cd93STycho Nightingale } 29002699b94cSPatrick Mooney DTRACE_PROBE3(vmm__vexit, int, vcpu, uint64_t, rip, 29012699b94cSPatrick Mooney uint32_t, exit_reason); 29024c87aefeSPatrick Mooney rip = vmexit->rip; 2903bf21cd93STycho Nightingale } while (handled); 2904bf21cd93STycho Nightingale 29052606939dSPatrick Mooney /* If a VM exit has been handled then the exitcode must be BOGUS */ 29062606939dSPatrick Mooney if (handled && vmexit->exitcode != VM_EXITCODE_BOGUS) { 29072606939dSPatrick Mooney panic("Non-BOGUS exitcode (%d) unexpected for handled VM exit", 29082606939dSPatrick Mooney vmexit->exitcode); 2909bf21cd93STycho Nightingale } 2910bf21cd93STycho Nightingale 2911007ca332SPatrick Mooney vmcs_clear(vmcs_pa); 2912bf21cd93STycho Nightingale vmx_msr_guest_exit(vmx, vcpu); 2913bf21cd93STycho Nightingale 29144c87aefeSPatrick Mooney VERIFY(vmx->vmcs_state != VS_NONE && curthread->t_preempt != 0); 29154c87aefeSPatrick Mooney vmx->vmcs_state[vcpu] = VS_NONE; 2916bf21cd93STycho Nightingale 29174c87aefeSPatrick Mooney return (0); 2918bf21cd93STycho Nightingale } 2919bf21cd93STycho Nightingale 2920bf21cd93STycho Nightingale static void 2921bf21cd93STycho Nightingale vmx_vmcleanup(void *arg) 2922bf21cd93STycho Nightingale { 29234c87aefeSPatrick Mooney int i; 2924bf21cd93STycho Nightingale struct vmx *vmx = arg; 29254c87aefeSPatrick Mooney uint16_t maxcpus; 2926bf21cd93STycho Nightingale 29276b641d7aSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV)) { 2928e0994bd2SPatrick Mooney (void) vm_unmap_mmio(vmx->vm, DEFAULT_APIC_BASE, PAGE_SIZE); 29296b641d7aSPatrick Mooney kmem_free(vmx->apic_access_page, PAGESIZE); 29306b641d7aSPatrick Mooney } else { 29316b641d7aSPatrick Mooney VERIFY3P(vmx->apic_access_page, ==, NULL); 29326b641d7aSPatrick Mooney } 29336b641d7aSPatrick Mooney 29346b641d7aSPatrick Mooney vmx_msr_bitmap_destroy(vmx); 2935bf21cd93STycho Nightingale 29364c87aefeSPatrick Mooney maxcpus = vm_get_maxcpus(vmx->vm); 29374c87aefeSPatrick Mooney for (i = 0; i < maxcpus; i++) 29384c87aefeSPatrick Mooney vpid_free(vmx->state[i].vpid); 2939bf21cd93STycho Nightingale 2940bf21cd93STycho Nightingale free(vmx, M_VMX); 2941bf21cd93STycho Nightingale } 2942bf21cd93STycho Nightingale 2943db8733f5SPatrick Mooney static uint64_t * 2944bf21cd93STycho Nightingale vmxctx_regptr(struct vmxctx *vmxctx, int reg) 2945bf21cd93STycho Nightingale { 2946bf21cd93STycho Nightingale switch (reg) { 2947bf21cd93STycho Nightingale case VM_REG_GUEST_RAX: 2948bf21cd93STycho Nightingale return (&vmxctx->guest_rax); 2949bf21cd93STycho Nightingale case VM_REG_GUEST_RBX: 2950bf21cd93STycho Nightingale return (&vmxctx->guest_rbx); 2951bf21cd93STycho Nightingale case VM_REG_GUEST_RCX: 2952bf21cd93STycho Nightingale return (&vmxctx->guest_rcx); 2953bf21cd93STycho Nightingale case VM_REG_GUEST_RDX: 2954bf21cd93STycho Nightingale return (&vmxctx->guest_rdx); 2955bf21cd93STycho Nightingale case VM_REG_GUEST_RSI: 2956bf21cd93STycho Nightingale return (&vmxctx->guest_rsi); 2957bf21cd93STycho Nightingale case VM_REG_GUEST_RDI: 2958bf21cd93STycho Nightingale return (&vmxctx->guest_rdi); 2959bf21cd93STycho Nightingale case VM_REG_GUEST_RBP: 2960bf21cd93STycho Nightingale return (&vmxctx->guest_rbp); 2961bf21cd93STycho Nightingale case VM_REG_GUEST_R8: 2962bf21cd93STycho Nightingale return (&vmxctx->guest_r8); 2963bf21cd93STycho Nightingale case VM_REG_GUEST_R9: 2964bf21cd93STycho Nightingale return (&vmxctx->guest_r9); 2965bf21cd93STycho Nightingale case VM_REG_GUEST_R10: 2966bf21cd93STycho Nightingale return (&vmxctx->guest_r10); 2967bf21cd93STycho Nightingale case VM_REG_GUEST_R11: 2968bf21cd93STycho Nightingale return (&vmxctx->guest_r11); 2969bf21cd93STycho Nightingale case VM_REG_GUEST_R12: 2970bf21cd93STycho Nightingale return (&vmxctx->guest_r12); 2971bf21cd93STycho Nightingale case VM_REG_GUEST_R13: 2972bf21cd93STycho Nightingale return (&vmxctx->guest_r13); 2973bf21cd93STycho Nightingale case VM_REG_GUEST_R14: 2974bf21cd93STycho Nightingale return (&vmxctx->guest_r14); 2975bf21cd93STycho Nightingale case VM_REG_GUEST_R15: 2976bf21cd93STycho Nightingale return (&vmxctx->guest_r15); 2977bf21cd93STycho Nightingale case VM_REG_GUEST_CR2: 2978bf21cd93STycho Nightingale return (&vmxctx->guest_cr2); 29794c87aefeSPatrick Mooney case VM_REG_GUEST_DR0: 29804c87aefeSPatrick Mooney return (&vmxctx->guest_dr0); 29814c87aefeSPatrick Mooney case VM_REG_GUEST_DR1: 29824c87aefeSPatrick Mooney return (&vmxctx->guest_dr1); 29834c87aefeSPatrick Mooney case VM_REG_GUEST_DR2: 29844c87aefeSPatrick Mooney return (&vmxctx->guest_dr2); 29854c87aefeSPatrick Mooney case VM_REG_GUEST_DR3: 29864c87aefeSPatrick Mooney return (&vmxctx->guest_dr3); 29874c87aefeSPatrick Mooney case VM_REG_GUEST_DR6: 29884c87aefeSPatrick Mooney return (&vmxctx->guest_dr6); 2989bf21cd93STycho Nightingale default: 2990bf21cd93STycho Nightingale break; 2991bf21cd93STycho Nightingale } 2992bf21cd93STycho Nightingale return (NULL); 2993bf21cd93STycho Nightingale } 2994bf21cd93STycho Nightingale 2995bf21cd93STycho Nightingale static int 2996007ca332SPatrick Mooney vmx_getreg(void *arg, int vcpu, int reg, uint64_t *retval) 2997bf21cd93STycho Nightingale { 2998007ca332SPatrick Mooney int running, hostcpu, err; 2999007ca332SPatrick Mooney struct vmx *vmx = arg; 3000db8733f5SPatrick Mooney uint64_t *regp; 3001bf21cd93STycho Nightingale 3002007ca332SPatrick Mooney running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 3003007ca332SPatrick Mooney if (running && hostcpu != curcpu) 3004*d4f59ae5SPatrick Mooney panic("vmx_getreg: %d is running", vcpu); 3005bf21cd93STycho Nightingale 3006007ca332SPatrick Mooney /* VMCS access not required for ctx reads */ 3007007ca332SPatrick Mooney if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { 3008007ca332SPatrick Mooney *retval = *regp; 3009bf21cd93STycho Nightingale return (0); 30104c87aefeSPatrick Mooney } 30114c87aefeSPatrick Mooney 3012007ca332SPatrick Mooney if (!running) { 3013007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpu]); 30144c87aefeSPatrick Mooney } 3015bf21cd93STycho Nightingale 3016bf0dcd3fSPatrick Mooney err = 0; 3017007ca332SPatrick Mooney if (reg == VM_REG_GUEST_INTR_SHADOW) { 3018007ca332SPatrick Mooney uint64_t gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 3019007ca332SPatrick Mooney *retval = (gi & HWINTR_BLOCKING) ? 1 : 0; 3020007ca332SPatrick Mooney } else { 3021007ca332SPatrick Mooney uint32_t encoding; 3022bf21cd93STycho Nightingale 3023007ca332SPatrick Mooney encoding = vmcs_field_encoding(reg); 3024bf0dcd3fSPatrick Mooney switch (encoding) { 3025bf0dcd3fSPatrick Mooney case VMCS_GUEST_CR0: 3026bf0dcd3fSPatrick Mooney /* Take the shadow bits into account */ 3027bf0dcd3fSPatrick Mooney *retval = vmx_unshadow_cr0(vmcs_read(encoding), 3028bf0dcd3fSPatrick Mooney vmcs_read(VMCS_CR0_SHADOW)); 3029bf0dcd3fSPatrick Mooney break; 3030bf0dcd3fSPatrick Mooney case VMCS_GUEST_CR4: 3031bf0dcd3fSPatrick Mooney /* Take the shadow bits into account */ 3032bf0dcd3fSPatrick Mooney *retval = vmx_unshadow_cr4(vmcs_read(encoding), 3033bf0dcd3fSPatrick Mooney vmcs_read(VMCS_CR4_SHADOW)); 3034bf0dcd3fSPatrick Mooney break; 3035bf0dcd3fSPatrick Mooney case VMCS_INVALID_ENCODING: 3036bf0dcd3fSPatrick Mooney err = EINVAL; 3037bf0dcd3fSPatrick Mooney break; 3038bf0dcd3fSPatrick Mooney default: 3039007ca332SPatrick Mooney *retval = vmcs_read(encoding); 3040bf0dcd3fSPatrick Mooney break; 3041007ca332SPatrick Mooney } 3042bf21cd93STycho Nightingale } 3043bf21cd93STycho Nightingale 3044007ca332SPatrick Mooney if (!running) { 3045007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpu]); 3046007ca332SPatrick Mooney } 3047bf21cd93STycho Nightingale 3048007ca332SPatrick Mooney return (err); 3049bf21cd93STycho Nightingale } 3050bf21cd93STycho Nightingale 3051bf21cd93STycho Nightingale static int 3052bf21cd93STycho Nightingale vmx_setreg(void *arg, int vcpu, int reg, uint64_t val) 3053bf21cd93STycho Nightingale { 3054007ca332SPatrick Mooney int running, hostcpu, error; 3055bf21cd93STycho Nightingale struct vmx *vmx = arg; 3056db8733f5SPatrick Mooney uint64_t *regp; 3057bf21cd93STycho Nightingale 3058bf21cd93STycho Nightingale running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 3059bf21cd93STycho Nightingale if (running && hostcpu != curcpu) 3060*d4f59ae5SPatrick Mooney panic("vmx_setreg: %d is running", vcpu); 3061bf21cd93STycho Nightingale 3062007ca332SPatrick Mooney /* VMCS access not required for ctx writes */ 3063007ca332SPatrick Mooney if ((regp = vmxctx_regptr(&vmx->ctx[vcpu], reg)) != NULL) { 3064007ca332SPatrick Mooney *regp = val; 3065bf21cd93STycho Nightingale return (0); 3066007ca332SPatrick Mooney } 3067bf21cd93STycho Nightingale 3068007ca332SPatrick Mooney if (!running) { 3069007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpu]); 3070007ca332SPatrick Mooney } 3071bf21cd93STycho Nightingale 3072007ca332SPatrick Mooney if (reg == VM_REG_GUEST_INTR_SHADOW) { 3073007ca332SPatrick Mooney if (val != 0) { 3074bf21cd93STycho Nightingale /* 3075007ca332SPatrick Mooney * Forcing the vcpu into an interrupt shadow is not 3076007ca332SPatrick Mooney * presently supported. 30774c87aefeSPatrick Mooney */ 3078007ca332SPatrick Mooney error = EINVAL; 3079007ca332SPatrick Mooney } else { 3080007ca332SPatrick Mooney uint64_t gi; 3081007ca332SPatrick Mooney 3082007ca332SPatrick Mooney gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY); 3083007ca332SPatrick Mooney gi &= ~HWINTR_BLOCKING; 3084007ca332SPatrick Mooney vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi); 3085007ca332SPatrick Mooney error = 0; 3086bf21cd93STycho Nightingale } 3087007ca332SPatrick Mooney } else { 3088007ca332SPatrick Mooney uint32_t encoding; 30894c87aefeSPatrick Mooney 3090007ca332SPatrick Mooney error = 0; 3091007ca332SPatrick Mooney encoding = vmcs_field_encoding(reg); 3092007ca332SPatrick Mooney switch (encoding) { 3093007ca332SPatrick Mooney case VMCS_GUEST_IA32_EFER: 3094007ca332SPatrick Mooney /* 3095007ca332SPatrick Mooney * If the "load EFER" VM-entry control is 1 then the 3096007ca332SPatrick Mooney * value of EFER.LMA must be identical to "IA-32e mode 3097007ca332SPatrick Mooney * guest" bit in the VM-entry control. 3098007ca332SPatrick Mooney */ 3099007ca332SPatrick Mooney if ((entry_ctls & VM_ENTRY_LOAD_EFER) != 0) { 3100007ca332SPatrick Mooney uint64_t ctls; 3101007ca332SPatrick Mooney 3102007ca332SPatrick Mooney ctls = vmcs_read(VMCS_ENTRY_CTLS); 3103007ca332SPatrick Mooney if (val & EFER_LMA) { 3104007ca332SPatrick Mooney ctls |= VM_ENTRY_GUEST_LMA; 3105007ca332SPatrick Mooney } else { 3106007ca332SPatrick Mooney ctls &= ~VM_ENTRY_GUEST_LMA; 3107007ca332SPatrick Mooney } 3108007ca332SPatrick Mooney vmcs_write(VMCS_ENTRY_CTLS, ctls); 3109007ca332SPatrick Mooney } 3110007ca332SPatrick Mooney vmcs_write(encoding, val); 3111007ca332SPatrick Mooney break; 3112007ca332SPatrick Mooney case VMCS_GUEST_CR0: 3113007ca332SPatrick Mooney /* 3114007ca332SPatrick Mooney * The guest is not allowed to modify certain bits in 3115007ca332SPatrick Mooney * %cr0 and %cr4. To maintain the illusion of full 3116007ca332SPatrick Mooney * control, they have shadow versions which contain the 3117007ca332SPatrick Mooney * guest-perceived (via reads from the register) values 3118007ca332SPatrick Mooney * as opposed to the guest-effective values. 3119007ca332SPatrick Mooney * 3120007ca332SPatrick Mooney * This is detailed in the SDM: Vol. 3 Ch. 24.6.6. 3121007ca332SPatrick Mooney */ 3122007ca332SPatrick Mooney vmcs_write(VMCS_CR0_SHADOW, val); 3123007ca332SPatrick Mooney vmcs_write(encoding, vmx_fix_cr0(val)); 3124007ca332SPatrick Mooney break; 3125007ca332SPatrick Mooney case VMCS_GUEST_CR4: 3126007ca332SPatrick Mooney /* See above for detail on %cr4 shadowing */ 3127007ca332SPatrick Mooney vmcs_write(VMCS_CR4_SHADOW, val); 3128007ca332SPatrick Mooney vmcs_write(encoding, vmx_fix_cr4(val)); 3129007ca332SPatrick Mooney break; 3130007ca332SPatrick Mooney case VMCS_GUEST_CR3: 3131007ca332SPatrick Mooney vmcs_write(encoding, val); 31324c87aefeSPatrick Mooney /* 31334c87aefeSPatrick Mooney * Invalidate the guest vcpu's TLB mappings to emulate 31344c87aefeSPatrick Mooney * the behavior of updating %cr3. 31354c87aefeSPatrick Mooney * 31364c87aefeSPatrick Mooney * XXX the processor retains global mappings when %cr3 31374c87aefeSPatrick Mooney * is updated but vmx_invvpid() does not. 31384c87aefeSPatrick Mooney */ 31390153d828SPatrick Mooney vmx_invvpid(vmx, vcpu, running); 3140007ca332SPatrick Mooney break; 3141007ca332SPatrick Mooney case VMCS_INVALID_ENCODING: 3142007ca332SPatrick Mooney error = EINVAL; 3143007ca332SPatrick Mooney break; 3144007ca332SPatrick Mooney default: 3145007ca332SPatrick Mooney vmcs_write(encoding, val); 3146007ca332SPatrick Mooney break; 31474c87aefeSPatrick Mooney } 3148bf21cd93STycho Nightingale } 3149bf21cd93STycho Nightingale 3150007ca332SPatrick Mooney if (!running) { 3151007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpu]); 3152007ca332SPatrick Mooney } 3153007ca332SPatrick Mooney 3154bf21cd93STycho Nightingale return (error); 3155bf21cd93STycho Nightingale } 3156bf21cd93STycho Nightingale 3157bf21cd93STycho Nightingale static int 3158007ca332SPatrick Mooney vmx_getdesc(void *arg, int vcpu, int seg, struct seg_desc *desc) 3159bf21cd93STycho Nightingale { 3160bf21cd93STycho Nightingale int hostcpu, running; 3161bf21cd93STycho Nightingale struct vmx *vmx = arg; 3162007ca332SPatrick Mooney uint32_t base, limit, access; 3163bf21cd93STycho Nightingale 3164bf21cd93STycho Nightingale running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 3165bf21cd93STycho Nightingale if (running && hostcpu != curcpu) 3166*d4f59ae5SPatrick Mooney panic("vmx_getdesc: %d is running", vcpu); 3167bf21cd93STycho Nightingale 3168007ca332SPatrick Mooney if (!running) { 3169007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpu]); 3170007ca332SPatrick Mooney } 3171007ca332SPatrick Mooney 3172007ca332SPatrick Mooney vmcs_seg_desc_encoding(seg, &base, &limit, &access); 3173007ca332SPatrick Mooney desc->base = vmcs_read(base); 3174007ca332SPatrick Mooney desc->limit = vmcs_read(limit); 3175007ca332SPatrick Mooney if (access != VMCS_INVALID_ENCODING) { 3176007ca332SPatrick Mooney desc->access = vmcs_read(access); 3177007ca332SPatrick Mooney } else { 3178007ca332SPatrick Mooney desc->access = 0; 3179007ca332SPatrick Mooney } 3180007ca332SPatrick Mooney 3181007ca332SPatrick Mooney if (!running) { 3182007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpu]); 3183007ca332SPatrick Mooney } 3184007ca332SPatrick Mooney return (0); 3185bf21cd93STycho Nightingale } 3186bf21cd93STycho Nightingale 3187bf21cd93STycho Nightingale static int 31882606939dSPatrick Mooney vmx_setdesc(void *arg, int vcpu, int seg, const struct seg_desc *desc) 3189bf21cd93STycho Nightingale { 3190bf21cd93STycho Nightingale int hostcpu, running; 3191bf21cd93STycho Nightingale struct vmx *vmx = arg; 3192007ca332SPatrick Mooney uint32_t base, limit, access; 3193bf21cd93STycho Nightingale 3194bf21cd93STycho Nightingale running = vcpu_is_running(vmx->vm, vcpu, &hostcpu); 3195bf21cd93STycho Nightingale if (running && hostcpu != curcpu) 3196*d4f59ae5SPatrick Mooney panic("vmx_setdesc: %d is running", vcpu); 3197bf21cd93STycho Nightingale 3198007ca332SPatrick Mooney if (!running) { 3199007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpu]); 3200007ca332SPatrick Mooney } 3201007ca332SPatrick Mooney 3202007ca332SPatrick Mooney vmcs_seg_desc_encoding(seg, &base, &limit, &access); 3203007ca332SPatrick Mooney vmcs_write(base, desc->base); 3204007ca332SPatrick Mooney vmcs_write(limit, desc->limit); 3205007ca332SPatrick Mooney if (access != VMCS_INVALID_ENCODING) { 3206007ca332SPatrick Mooney vmcs_write(access, desc->access); 3207007ca332SPatrick Mooney } 3208007ca332SPatrick Mooney 3209007ca332SPatrick Mooney if (!running) { 3210007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpu]); 3211007ca332SPatrick Mooney } 3212007ca332SPatrick Mooney return (0); 3213bf21cd93STycho Nightingale } 3214bf21cd93STycho Nightingale 3215bf21cd93STycho Nightingale static int 3216bf21cd93STycho Nightingale vmx_getcap(void *arg, int vcpu, int type, int *retval) 3217bf21cd93STycho Nightingale { 3218bf21cd93STycho Nightingale struct vmx *vmx = arg; 3219bf21cd93STycho Nightingale int vcap; 3220bf21cd93STycho Nightingale int ret; 3221bf21cd93STycho Nightingale 3222bf21cd93STycho Nightingale ret = ENOENT; 3223bf21cd93STycho Nightingale 3224bf21cd93STycho Nightingale vcap = vmx->cap[vcpu].set; 3225bf21cd93STycho Nightingale 3226bf21cd93STycho Nightingale switch (type) { 3227bf21cd93STycho Nightingale case VM_CAP_HALT_EXIT: 3228bf21cd93STycho Nightingale if (cap_halt_exit) 3229bf21cd93STycho Nightingale ret = 0; 3230bf21cd93STycho Nightingale break; 3231bf21cd93STycho Nightingale case VM_CAP_PAUSE_EXIT: 3232bf21cd93STycho Nightingale if (cap_pause_exit) 3233bf21cd93STycho Nightingale ret = 0; 3234bf21cd93STycho Nightingale break; 3235bf21cd93STycho Nightingale case VM_CAP_MTRAP_EXIT: 3236bf21cd93STycho Nightingale if (cap_monitor_trap) 3237bf21cd93STycho Nightingale ret = 0; 3238bf21cd93STycho Nightingale break; 32394c87aefeSPatrick Mooney case VM_CAP_ENABLE_INVPCID: 32404c87aefeSPatrick Mooney if (cap_invpcid) 32414c87aefeSPatrick Mooney ret = 0; 32424c87aefeSPatrick Mooney break; 3243154972afSPatrick Mooney case VM_CAP_BPT_EXIT: 3244154972afSPatrick Mooney ret = 0; 3245154972afSPatrick Mooney break; 3246bf21cd93STycho Nightingale default: 3247bf21cd93STycho Nightingale break; 3248bf21cd93STycho Nightingale } 3249bf21cd93STycho Nightingale 3250bf21cd93STycho Nightingale if (ret == 0) 3251bf21cd93STycho Nightingale *retval = (vcap & (1 << type)) ? 1 : 0; 3252bf21cd93STycho Nightingale 3253bf21cd93STycho Nightingale return (ret); 3254bf21cd93STycho Nightingale } 3255bf21cd93STycho Nightingale 3256bf21cd93STycho Nightingale static int 3257bf21cd93STycho Nightingale vmx_setcap(void *arg, int vcpu, int type, int val) 3258bf21cd93STycho Nightingale { 3259bf21cd93STycho Nightingale struct vmx *vmx = arg; 3260007ca332SPatrick Mooney uint32_t baseval, reg, flag; 3261bf21cd93STycho Nightingale uint32_t *pptr; 3262bf21cd93STycho Nightingale int error; 3263bf21cd93STycho Nightingale 3264007ca332SPatrick Mooney error = ENOENT; 3265bf21cd93STycho Nightingale pptr = NULL; 3266bf21cd93STycho Nightingale 3267bf21cd93STycho Nightingale switch (type) { 3268bf21cd93STycho Nightingale case VM_CAP_HALT_EXIT: 3269bf21cd93STycho Nightingale if (cap_halt_exit) { 3270007ca332SPatrick Mooney error = 0; 3271bf21cd93STycho Nightingale pptr = &vmx->cap[vcpu].proc_ctls; 3272bf21cd93STycho Nightingale baseval = *pptr; 3273bf21cd93STycho Nightingale flag = PROCBASED_HLT_EXITING; 3274bf21cd93STycho Nightingale reg = VMCS_PRI_PROC_BASED_CTLS; 3275bf21cd93STycho Nightingale } 3276bf21cd93STycho Nightingale break; 3277bf21cd93STycho Nightingale case VM_CAP_MTRAP_EXIT: 3278bf21cd93STycho Nightingale if (cap_monitor_trap) { 3279007ca332SPatrick Mooney error = 0; 3280bf21cd93STycho Nightingale pptr = &vmx->cap[vcpu].proc_ctls; 3281bf21cd93STycho Nightingale baseval = *pptr; 3282bf21cd93STycho Nightingale flag = PROCBASED_MTF; 3283bf21cd93STycho Nightingale reg = VMCS_PRI_PROC_BASED_CTLS; 3284bf21cd93STycho Nightingale } 3285bf21cd93STycho Nightingale break; 3286bf21cd93STycho Nightingale case VM_CAP_PAUSE_EXIT: 3287bf21cd93STycho Nightingale if (cap_pause_exit) { 3288007ca332SPatrick Mooney error = 0; 3289bf21cd93STycho Nightingale pptr = &vmx->cap[vcpu].proc_ctls; 3290bf21cd93STycho Nightingale baseval = *pptr; 3291bf21cd93STycho Nightingale flag = PROCBASED_PAUSE_EXITING; 3292bf21cd93STycho Nightingale reg = VMCS_PRI_PROC_BASED_CTLS; 3293bf21cd93STycho Nightingale } 3294bf21cd93STycho Nightingale break; 32954c87aefeSPatrick Mooney case VM_CAP_ENABLE_INVPCID: 32964c87aefeSPatrick Mooney if (cap_invpcid) { 3297007ca332SPatrick Mooney error = 0; 32984c87aefeSPatrick Mooney pptr = &vmx->cap[vcpu].proc_ctls2; 32994c87aefeSPatrick Mooney baseval = *pptr; 33004c87aefeSPatrick Mooney flag = PROCBASED2_ENABLE_INVPCID; 33014c87aefeSPatrick Mooney reg = VMCS_SEC_PROC_BASED_CTLS; 33024c87aefeSPatrick Mooney } 33034c87aefeSPatrick Mooney break; 3304154972afSPatrick Mooney case VM_CAP_BPT_EXIT: 3305007ca332SPatrick Mooney error = 0; 3306154972afSPatrick Mooney 3307154972afSPatrick Mooney /* Don't change the bitmap if we are tracing all exceptions. */ 3308154972afSPatrick Mooney if (vmx->cap[vcpu].exc_bitmap != 0xffffffff) { 3309154972afSPatrick Mooney pptr = &vmx->cap[vcpu].exc_bitmap; 3310154972afSPatrick Mooney baseval = *pptr; 3311154972afSPatrick Mooney flag = (1 << IDT_BP); 3312154972afSPatrick Mooney reg = VMCS_EXCEPTION_BITMAP; 3313154972afSPatrick Mooney } 3314154972afSPatrick Mooney break; 3315bf21cd93STycho Nightingale default: 3316bf21cd93STycho Nightingale break; 3317bf21cd93STycho Nightingale } 3318bf21cd93STycho Nightingale 3319007ca332SPatrick Mooney if (error != 0) { 3320007ca332SPatrick Mooney return (error); 3321007ca332SPatrick Mooney } 3322154972afSPatrick Mooney 3323154972afSPatrick Mooney if (pptr != NULL) { 3324bf21cd93STycho Nightingale if (val) { 3325bf21cd93STycho Nightingale baseval |= flag; 3326bf21cd93STycho Nightingale } else { 3327bf21cd93STycho Nightingale baseval &= ~flag; 3328bf21cd93STycho Nightingale } 3329007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpu]); 3330007ca332SPatrick Mooney vmcs_write(reg, baseval); 3331007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpu]); 3332bf21cd93STycho Nightingale 3333154972afSPatrick Mooney /* 3334154972afSPatrick Mooney * Update optional stored flags, and record 3335154972afSPatrick Mooney * setting 3336154972afSPatrick Mooney */ 3337154972afSPatrick Mooney *pptr = baseval; 3338bf21cd93STycho Nightingale } 3339bf21cd93STycho Nightingale 3340154972afSPatrick Mooney if (val) { 3341154972afSPatrick Mooney vmx->cap[vcpu].set |= (1 << type); 3342154972afSPatrick Mooney } else { 3343154972afSPatrick Mooney vmx->cap[vcpu].set &= ~(1 << type); 3344154972afSPatrick Mooney } 3345154972afSPatrick Mooney 3346154972afSPatrick Mooney return (0); 3347bf21cd93STycho Nightingale } 3348bf21cd93STycho Nightingale 3349bf21cd93STycho Nightingale struct vlapic_vtx { 3350bf21cd93STycho Nightingale struct vlapic vlapic; 3351c74a40a5SPatrick Mooney 3352c74a40a5SPatrick Mooney /* Align to the nearest cacheline */ 3353c74a40a5SPatrick Mooney uint8_t _pad[64 - (sizeof (struct vlapic) % 64)]; 3354c74a40a5SPatrick Mooney 3355c74a40a5SPatrick Mooney /* TMR handling state for posted interrupts */ 3356c74a40a5SPatrick Mooney uint32_t tmr_active[8]; 3357c74a40a5SPatrick Mooney uint32_t pending_level[8]; 3358c74a40a5SPatrick Mooney uint32_t pending_edge[8]; 3359c74a40a5SPatrick Mooney 3360bf21cd93STycho Nightingale struct pir_desc *pir_desc; 3361bf21cd93STycho Nightingale struct vmx *vmx; 33622699b94cSPatrick Mooney uint_t pending_prio; 3363c74a40a5SPatrick Mooney boolean_t tmr_sync; 3364bf21cd93STycho Nightingale }; 3365bf21cd93STycho Nightingale 33662699b94cSPatrick Mooney CTASSERT((offsetof(struct vlapic_vtx, tmr_active) & 63) == 0); 33674c87aefeSPatrick Mooney 33682699b94cSPatrick Mooney #define VPR_PRIO_BIT(vpr) (1 << ((vpr) >> 4)) 3369bf21cd93STycho Nightingale 3370c74a40a5SPatrick Mooney static vcpu_notify_t 3371c74a40a5SPatrick Mooney vmx_apicv_set_ready(struct vlapic *vlapic, int vector, bool level) 3372bf21cd93STycho Nightingale { 3373bf21cd93STycho Nightingale struct vlapic_vtx *vlapic_vtx; 3374bf21cd93STycho Nightingale struct pir_desc *pir_desc; 3375c74a40a5SPatrick Mooney uint32_t mask, tmrval; 3376c74a40a5SPatrick Mooney int idx; 3377c74a40a5SPatrick Mooney vcpu_notify_t notify = VCPU_NOTIFY_NONE; 3378bf21cd93STycho Nightingale 3379bf21cd93STycho Nightingale vlapic_vtx = (struct vlapic_vtx *)vlapic; 3380bf21cd93STycho Nightingale pir_desc = vlapic_vtx->pir_desc; 3381c74a40a5SPatrick Mooney idx = vector / 32; 3382c74a40a5SPatrick Mooney mask = 1UL << (vector % 32); 3383bf21cd93STycho Nightingale 3384bf21cd93STycho Nightingale /* 3385c74a40a5SPatrick Mooney * If the currently asserted TMRs do not match the state requested by 3386c74a40a5SPatrick Mooney * the incoming interrupt, an exit will be required to reconcile those 3387c74a40a5SPatrick Mooney * bits in the APIC page. This will keep the vLAPIC behavior in line 3388c74a40a5SPatrick Mooney * with the architecturally defined expectations. 3389c74a40a5SPatrick Mooney * 3390c74a40a5SPatrick Mooney * If actors of mixed types (edge and level) are racing against the same 3391c74a40a5SPatrick Mooney * vector (toggling its TMR bit back and forth), the results could 3392c74a40a5SPatrick Mooney * inconsistent. Such circumstances are considered a rare edge case and 3393c74a40a5SPatrick Mooney * are never expected to be found in the wild. 3394bf21cd93STycho Nightingale */ 3395c74a40a5SPatrick Mooney tmrval = atomic_load_acq_int(&vlapic_vtx->tmr_active[idx]); 3396c74a40a5SPatrick Mooney if (!level) { 3397c74a40a5SPatrick Mooney if ((tmrval & mask) != 0) { 3398c74a40a5SPatrick Mooney /* Edge-triggered interrupt needs TMR de-asserted */ 3399c74a40a5SPatrick Mooney atomic_set_int(&vlapic_vtx->pending_edge[idx], mask); 3400c74a40a5SPatrick Mooney atomic_store_rel_long(&pir_desc->pending, 1); 3401c74a40a5SPatrick Mooney return (VCPU_NOTIFY_EXIT); 3402c74a40a5SPatrick Mooney } 3403c74a40a5SPatrick Mooney } else { 3404c74a40a5SPatrick Mooney if ((tmrval & mask) == 0) { 3405c74a40a5SPatrick Mooney /* Level-triggered interrupt needs TMR asserted */ 3406c74a40a5SPatrick Mooney atomic_set_int(&vlapic_vtx->pending_level[idx], mask); 3407c74a40a5SPatrick Mooney atomic_store_rel_long(&pir_desc->pending, 1); 3408c74a40a5SPatrick Mooney return (VCPU_NOTIFY_EXIT); 3409c74a40a5SPatrick Mooney } 3410c74a40a5SPatrick Mooney } 3411c74a40a5SPatrick Mooney 3412c74a40a5SPatrick Mooney /* 3413c74a40a5SPatrick Mooney * If the interrupt request does not require manipulation of the TMRs 3414c74a40a5SPatrick Mooney * for delivery, set it in PIR descriptor. It cannot be inserted into 3415c74a40a5SPatrick Mooney * the APIC page while the vCPU might be running. 3416c74a40a5SPatrick Mooney */ 3417c74a40a5SPatrick Mooney atomic_set_int(&pir_desc->pir[idx], mask); 34184c87aefeSPatrick Mooney 34194c87aefeSPatrick Mooney /* 34204c87aefeSPatrick Mooney * A notification is required whenever the 'pending' bit makes a 34214c87aefeSPatrick Mooney * transition from 0->1. 34224c87aefeSPatrick Mooney * 34234c87aefeSPatrick Mooney * Even if the 'pending' bit is already asserted, notification about 34244c87aefeSPatrick Mooney * the incoming interrupt may still be necessary. For example, if a 34254c87aefeSPatrick Mooney * vCPU is HLTed with a high PPR, a low priority interrupt would cause 34264c87aefeSPatrick Mooney * the 0->1 'pending' transition with a notification, but the vCPU 34274c87aefeSPatrick Mooney * would ignore the interrupt for the time being. The same vCPU would 34284c87aefeSPatrick Mooney * need to then be notified if a high-priority interrupt arrived which 34294c87aefeSPatrick Mooney * satisfied the PPR. 34304c87aefeSPatrick Mooney * 34314c87aefeSPatrick Mooney * The priorities of interrupts injected while 'pending' is asserted 34324c87aefeSPatrick Mooney * are tracked in a custom bitfield 'pending_prio'. Should the 34334c87aefeSPatrick Mooney * to-be-injected interrupt exceed the priorities already present, the 34344c87aefeSPatrick Mooney * notification is sent. The priorities recorded in 'pending_prio' are 34354c87aefeSPatrick Mooney * cleared whenever the 'pending' bit makes another 0->1 transition. 34364c87aefeSPatrick Mooney */ 34374c87aefeSPatrick Mooney if (atomic_cmpset_long(&pir_desc->pending, 0, 1) != 0) { 3438c74a40a5SPatrick Mooney notify = VCPU_NOTIFY_APIC; 34394c87aefeSPatrick Mooney vlapic_vtx->pending_prio = 0; 34404c87aefeSPatrick Mooney } else { 34412699b94cSPatrick Mooney const uint_t old_prio = vlapic_vtx->pending_prio; 34422699b94cSPatrick Mooney const uint_t prio_bit = VPR_PRIO_BIT(vector & APIC_TPR_INT); 34434c87aefeSPatrick Mooney 34444c87aefeSPatrick Mooney if ((old_prio & prio_bit) == 0 && prio_bit > old_prio) { 34454c87aefeSPatrick Mooney atomic_set_int(&vlapic_vtx->pending_prio, prio_bit); 3446c74a40a5SPatrick Mooney notify = VCPU_NOTIFY_APIC; 34474c87aefeSPatrick Mooney } 34484c87aefeSPatrick Mooney } 3449bf21cd93STycho Nightingale 3450bf21cd93STycho Nightingale return (notify); 3451bf21cd93STycho Nightingale } 3452bf21cd93STycho Nightingale 3453c74a40a5SPatrick Mooney static void 3454c74a40a5SPatrick Mooney vmx_apicv_accepted(struct vlapic *vlapic, int vector) 3455bf21cd93STycho Nightingale { 3456bf21cd93STycho Nightingale /* 3457c74a40a5SPatrick Mooney * When APICv is enabled for an instance, the traditional interrupt 3458c74a40a5SPatrick Mooney * injection method (populating ENTRY_INTR_INFO in the VMCS) is not 3459c74a40a5SPatrick Mooney * used and the CPU does the heavy lifting of virtual interrupt 3460c74a40a5SPatrick Mooney * delivery. For that reason vmx_intr_accepted() should never be called 3461c74a40a5SPatrick Mooney * when APICv is enabled. 3462bf21cd93STycho Nightingale */ 3463c74a40a5SPatrick Mooney panic("vmx_intr_accepted: not expected to be called"); 3464bf21cd93STycho Nightingale } 3465bf21cd93STycho Nightingale 3466bf21cd93STycho Nightingale static void 3467c74a40a5SPatrick Mooney vmx_apicv_sync_tmr(struct vlapic *vlapic) 3468bf21cd93STycho Nightingale { 3469c74a40a5SPatrick Mooney struct vlapic_vtx *vlapic_vtx; 3470c74a40a5SPatrick Mooney const uint32_t *tmrs; 3471bf21cd93STycho Nightingale 3472c74a40a5SPatrick Mooney vlapic_vtx = (struct vlapic_vtx *)vlapic; 3473c74a40a5SPatrick Mooney tmrs = &vlapic_vtx->tmr_active[0]; 3474bf21cd93STycho Nightingale 3475c74a40a5SPatrick Mooney if (!vlapic_vtx->tmr_sync) { 3476c74a40a5SPatrick Mooney return; 3477c74a40a5SPatrick Mooney } 3478c74a40a5SPatrick Mooney 3479c74a40a5SPatrick Mooney vmcs_write(VMCS_EOI_EXIT0, ((uint64_t)tmrs[1] << 32) | tmrs[0]); 3480c74a40a5SPatrick Mooney vmcs_write(VMCS_EOI_EXIT1, ((uint64_t)tmrs[3] << 32) | tmrs[2]); 3481c74a40a5SPatrick Mooney vmcs_write(VMCS_EOI_EXIT2, ((uint64_t)tmrs[5] << 32) | tmrs[4]); 3482c74a40a5SPatrick Mooney vmcs_write(VMCS_EOI_EXIT3, ((uint64_t)tmrs[7] << 32) | tmrs[6]); 3483c74a40a5SPatrick Mooney vlapic_vtx->tmr_sync = B_FALSE; 34844c87aefeSPatrick Mooney } 34854c87aefeSPatrick Mooney 34864c87aefeSPatrick Mooney static void 3487154972afSPatrick Mooney vmx_enable_x2apic_mode_ts(struct vlapic *vlapic) 3488154972afSPatrick Mooney { 3489154972afSPatrick Mooney struct vmx *vmx; 3490154972afSPatrick Mooney uint32_t proc_ctls; 3491154972afSPatrick Mooney int vcpuid; 3492154972afSPatrick Mooney 3493154972afSPatrick Mooney vcpuid = vlapic->vcpuid; 3494154972afSPatrick Mooney vmx = ((struct vlapic_vtx *)vlapic)->vmx; 3495154972afSPatrick Mooney 3496154972afSPatrick Mooney proc_ctls = vmx->cap[vcpuid].proc_ctls; 3497154972afSPatrick Mooney proc_ctls &= ~PROCBASED_USE_TPR_SHADOW; 3498154972afSPatrick Mooney proc_ctls |= PROCBASED_CR8_LOAD_EXITING; 3499154972afSPatrick Mooney proc_ctls |= PROCBASED_CR8_STORE_EXITING; 3500154972afSPatrick Mooney vmx->cap[vcpuid].proc_ctls = proc_ctls; 3501154972afSPatrick Mooney 3502007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpuid]); 3503154972afSPatrick Mooney vmcs_write(VMCS_PRI_PROC_BASED_CTLS, proc_ctls); 3504007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpuid]); 3505154972afSPatrick Mooney } 3506154972afSPatrick Mooney 3507154972afSPatrick Mooney static void 3508154972afSPatrick Mooney vmx_enable_x2apic_mode_vid(struct vlapic *vlapic) 3509bf21cd93STycho Nightingale { 3510bf21cd93STycho Nightingale struct vmx *vmx; 35114c87aefeSPatrick Mooney uint32_t proc_ctls2; 35126b641d7aSPatrick Mooney int vcpuid; 3513bf21cd93STycho Nightingale 35144c87aefeSPatrick Mooney vcpuid = vlapic->vcpuid; 35154c87aefeSPatrick Mooney vmx = ((struct vlapic_vtx *)vlapic)->vmx; 3516bf21cd93STycho Nightingale 35174c87aefeSPatrick Mooney proc_ctls2 = vmx->cap[vcpuid].proc_ctls2; 35184c87aefeSPatrick Mooney KASSERT((proc_ctls2 & PROCBASED2_VIRTUALIZE_APIC_ACCESSES) != 0, 35199dc804b9SPatrick Mooney ("%s: invalid proc_ctls2 %x", __func__, proc_ctls2)); 35204c87aefeSPatrick Mooney 35214c87aefeSPatrick Mooney proc_ctls2 &= ~PROCBASED2_VIRTUALIZE_APIC_ACCESSES; 35224c87aefeSPatrick Mooney proc_ctls2 |= PROCBASED2_VIRTUALIZE_X2APIC_MODE; 35234c87aefeSPatrick Mooney vmx->cap[vcpuid].proc_ctls2 = proc_ctls2; 3524bf21cd93STycho Nightingale 3525007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpuid]); 35264c87aefeSPatrick Mooney vmcs_write(VMCS_SEC_PROC_BASED_CTLS, proc_ctls2); 3527007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpuid]); 35284c87aefeSPatrick Mooney 35296b641d7aSPatrick Mooney vmx_allow_x2apic_msrs(vmx, vcpuid); 3530bf21cd93STycho Nightingale } 3531bf21cd93STycho Nightingale 3532bf21cd93STycho Nightingale static void 3533c74a40a5SPatrick Mooney vmx_apicv_notify(struct vlapic *vlapic, int hostcpu) 3534bf21cd93STycho Nightingale { 35354c87aefeSPatrick Mooney psm_send_pir_ipi(hostcpu); 3536bf21cd93STycho Nightingale } 3537bf21cd93STycho Nightingale 3538bf21cd93STycho Nightingale static void 3539c74a40a5SPatrick Mooney vmx_apicv_sync(struct vlapic *vlapic) 3540bf21cd93STycho Nightingale { 3541bf21cd93STycho Nightingale struct vlapic_vtx *vlapic_vtx; 3542bf21cd93STycho Nightingale struct pir_desc *pir_desc; 3543bf21cd93STycho Nightingale struct LAPIC *lapic; 3544c74a40a5SPatrick Mooney uint_t i; 3545bf21cd93STycho Nightingale 3546bf21cd93STycho Nightingale vlapic_vtx = (struct vlapic_vtx *)vlapic; 3547bf21cd93STycho Nightingale pir_desc = vlapic_vtx->pir_desc; 3548c74a40a5SPatrick Mooney lapic = vlapic->apic_page; 3549c74a40a5SPatrick Mooney 3550bf21cd93STycho Nightingale if (atomic_cmpset_long(&pir_desc->pending, 1, 0) == 0) { 3551bf21cd93STycho Nightingale return; 3552bf21cd93STycho Nightingale } 3553bf21cd93STycho Nightingale 3554c74a40a5SPatrick Mooney vlapic_vtx->pending_prio = 0; 3555bf21cd93STycho Nightingale 3556c74a40a5SPatrick Mooney /* Make sure the invalid (0-15) vectors are not set */ 3557c74a40a5SPatrick Mooney ASSERT0(vlapic_vtx->pending_level[0] & 0xffff); 3558c74a40a5SPatrick Mooney ASSERT0(vlapic_vtx->pending_edge[0] & 0xffff); 3559c74a40a5SPatrick Mooney ASSERT0(pir_desc->pir[0] & 0xffff); 3560bf21cd93STycho Nightingale 3561c74a40a5SPatrick Mooney for (i = 0; i <= 7; i++) { 3562c74a40a5SPatrick Mooney uint32_t *tmrp = &lapic->tmr0 + (i * 4); 3563c74a40a5SPatrick Mooney uint32_t *irrp = &lapic->irr0 + (i * 4); 3564bf21cd93STycho Nightingale 3565c74a40a5SPatrick Mooney const uint32_t pending_level = 3566c74a40a5SPatrick Mooney atomic_readandclear_int(&vlapic_vtx->pending_level[i]); 3567c74a40a5SPatrick Mooney const uint32_t pending_edge = 3568c74a40a5SPatrick Mooney atomic_readandclear_int(&vlapic_vtx->pending_edge[i]); 3569c74a40a5SPatrick Mooney const uint32_t pending_inject = 3570c74a40a5SPatrick Mooney atomic_readandclear_int(&pir_desc->pir[i]); 3571c74a40a5SPatrick Mooney 3572c74a40a5SPatrick Mooney if (pending_level != 0) { 3573c74a40a5SPatrick Mooney /* 3574c74a40a5SPatrick Mooney * Level-triggered interrupts assert their corresponding 3575c74a40a5SPatrick Mooney * bit in the TMR when queued in IRR. 3576c74a40a5SPatrick Mooney */ 3577c74a40a5SPatrick Mooney *tmrp |= pending_level; 3578c74a40a5SPatrick Mooney *irrp |= pending_level; 3579c74a40a5SPatrick Mooney } 3580c74a40a5SPatrick Mooney if (pending_edge != 0) { 3581c74a40a5SPatrick Mooney /* 3582c74a40a5SPatrick Mooney * When queuing an edge-triggered interrupt in IRR, the 3583c74a40a5SPatrick Mooney * corresponding bit in the TMR is cleared. 3584c74a40a5SPatrick Mooney */ 3585c74a40a5SPatrick Mooney *tmrp &= ~pending_edge; 3586c74a40a5SPatrick Mooney *irrp |= pending_edge; 3587c74a40a5SPatrick Mooney } 3588c74a40a5SPatrick Mooney if (pending_inject != 0) { 3589c74a40a5SPatrick Mooney /* 3590c74a40a5SPatrick Mooney * Interrupts which do not require a change to the TMR 3591c74a40a5SPatrick Mooney * (because it already matches the necessary state) can 3592c74a40a5SPatrick Mooney * simply be queued in IRR. 3593c74a40a5SPatrick Mooney */ 3594c74a40a5SPatrick Mooney *irrp |= pending_inject; 3595c74a40a5SPatrick Mooney } 3596bf21cd93STycho Nightingale 3597c74a40a5SPatrick Mooney if (*tmrp != vlapic_vtx->tmr_active[i]) { 3598c74a40a5SPatrick Mooney /* Check if VMX EOI triggers require updating. */ 3599c74a40a5SPatrick Mooney vlapic_vtx->tmr_active[i] = *tmrp; 3600c74a40a5SPatrick Mooney vlapic_vtx->tmr_sync = B_TRUE; 3601c74a40a5SPatrick Mooney } 3602bf21cd93STycho Nightingale } 3603c74a40a5SPatrick Mooney } 3604bf21cd93STycho Nightingale 3605c74a40a5SPatrick Mooney static void 3606c74a40a5SPatrick Mooney vmx_tpr_shadow_enter(struct vlapic *vlapic) 3607c74a40a5SPatrick Mooney { 3608c74a40a5SPatrick Mooney /* 3609c74a40a5SPatrick Mooney * When TPR shadowing is enabled, VMX will initiate a guest exit if its 3610c74a40a5SPatrick Mooney * TPR falls below a threshold priority. That threshold is set to the 3611c74a40a5SPatrick Mooney * current TPR priority, since guest interrupt status should be 3612c74a40a5SPatrick Mooney * re-evaluated if its TPR is set lower. 3613c74a40a5SPatrick Mooney */ 3614c74a40a5SPatrick Mooney vmcs_write(VMCS_TPR_THRESHOLD, vlapic_get_cr8(vlapic)); 3615c74a40a5SPatrick Mooney } 3616bf21cd93STycho Nightingale 3617c74a40a5SPatrick Mooney static void 3618c74a40a5SPatrick Mooney vmx_tpr_shadow_exit(struct vlapic *vlapic) 3619c74a40a5SPatrick Mooney { 3620bf21cd93STycho Nightingale /* 3621c74a40a5SPatrick Mooney * Unlike full APICv, where changes to the TPR are reflected in the PPR, 3622c74a40a5SPatrick Mooney * with TPR shadowing, that duty is relegated to the VMM. Upon exit, 3623c74a40a5SPatrick Mooney * the PPR is updated to reflect any change in the TPR here. 3624bf21cd93STycho Nightingale */ 3625c74a40a5SPatrick Mooney vlapic_sync_tpr(vlapic); 3626bf21cd93STycho Nightingale } 3627bf21cd93STycho Nightingale 3628bf21cd93STycho Nightingale static struct vlapic * 3629bf21cd93STycho Nightingale vmx_vlapic_init(void *arg, int vcpuid) 3630bf21cd93STycho Nightingale { 3631bf21cd93STycho Nightingale struct vmx *vmx; 3632bf21cd93STycho Nightingale struct vlapic *vlapic; 3633bf21cd93STycho Nightingale struct vlapic_vtx *vlapic_vtx; 36344c87aefeSPatrick Mooney 3635bf21cd93STycho Nightingale vmx = arg; 3636bf21cd93STycho Nightingale 36372699b94cSPatrick Mooney vlapic = malloc(sizeof (struct vlapic_vtx), M_VLAPIC, 36382699b94cSPatrick Mooney M_WAITOK | M_ZERO); 3639bf21cd93STycho Nightingale vlapic->vm = vmx->vm; 3640bf21cd93STycho Nightingale vlapic->vcpuid = vcpuid; 3641bf21cd93STycho Nightingale vlapic->apic_page = (struct LAPIC *)&vmx->apic_page[vcpuid]; 3642bf21cd93STycho Nightingale 3643bf21cd93STycho Nightingale vlapic_vtx = (struct vlapic_vtx *)vlapic; 3644bf21cd93STycho Nightingale vlapic_vtx->pir_desc = &vmx->pir_desc[vcpuid]; 3645bf21cd93STycho Nightingale vlapic_vtx->vmx = vmx; 3646bf21cd93STycho Nightingale 3647c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_TPR_SHADOW)) { 3648154972afSPatrick Mooney vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_ts; 3649154972afSPatrick Mooney } 3650c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV)) { 3651c74a40a5SPatrick Mooney vlapic->ops.set_intr_ready = vmx_apicv_set_ready; 3652c74a40a5SPatrick Mooney vlapic->ops.sync_state = vmx_apicv_sync; 3653c74a40a5SPatrick Mooney vlapic->ops.intr_accepted = vmx_apicv_accepted; 3654154972afSPatrick Mooney vlapic->ops.enable_x2apic_mode = vmx_enable_x2apic_mode_vid; 3655bf21cd93STycho Nightingale 3656c3ae3afaSPatrick Mooney if (vmx_cap_en(vmx, VMX_CAP_APICV_PIR)) { 3657c74a40a5SPatrick Mooney vlapic->ops.post_intr = vmx_apicv_notify; 3658c3ae3afaSPatrick Mooney } 3659c3ae3afaSPatrick Mooney } 3660bf21cd93STycho Nightingale 3661bf21cd93STycho Nightingale vlapic_init(vlapic); 3662bf21cd93STycho Nightingale 3663bf21cd93STycho Nightingale return (vlapic); 3664bf21cd93STycho Nightingale } 3665bf21cd93STycho Nightingale 3666bf21cd93STycho Nightingale static void 3667bf21cd93STycho Nightingale vmx_vlapic_cleanup(void *arg, struct vlapic *vlapic) 3668bf21cd93STycho Nightingale { 3669bf21cd93STycho Nightingale 3670bf21cd93STycho Nightingale vlapic_cleanup(vlapic); 3671bf21cd93STycho Nightingale free(vlapic, M_VLAPIC); 3672bf21cd93STycho Nightingale } 3673bf21cd93STycho Nightingale 36744c87aefeSPatrick Mooney static void 36754c87aefeSPatrick Mooney vmx_savectx(void *arg, int vcpu) 36764c87aefeSPatrick Mooney { 36774c87aefeSPatrick Mooney struct vmx *vmx = arg; 36784c87aefeSPatrick Mooney 36794c87aefeSPatrick Mooney if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) { 3680007ca332SPatrick Mooney vmcs_clear(vmx->vmcs_pa[vcpu]); 36814c87aefeSPatrick Mooney vmx_msr_guest_exit(vmx, vcpu); 36824c87aefeSPatrick Mooney /* 36834c87aefeSPatrick Mooney * Having VMCLEARed the VMCS, it can no longer be re-entered 36844c87aefeSPatrick Mooney * with VMRESUME, but must be VMLAUNCHed again. 36854c87aefeSPatrick Mooney */ 36864c87aefeSPatrick Mooney vmx->vmcs_state[vcpu] &= ~VS_LAUNCHED; 36874c87aefeSPatrick Mooney } 36884c87aefeSPatrick Mooney 36894c87aefeSPatrick Mooney reset_gdtr_limit(); 36904c87aefeSPatrick Mooney } 36914c87aefeSPatrick Mooney 36924c87aefeSPatrick Mooney static void 36934c87aefeSPatrick Mooney vmx_restorectx(void *arg, int vcpu) 36944c87aefeSPatrick Mooney { 36954c87aefeSPatrick Mooney struct vmx *vmx = arg; 36964c87aefeSPatrick Mooney 36974c87aefeSPatrick Mooney ASSERT0(vmx->vmcs_state[vcpu] & VS_LAUNCHED); 36984c87aefeSPatrick Mooney 36994c87aefeSPatrick Mooney if ((vmx->vmcs_state[vcpu] & VS_LOADED) != 0) { 37004c87aefeSPatrick Mooney vmx_msr_guest_enter(vmx, vcpu); 3701007ca332SPatrick Mooney vmcs_load(vmx->vmcs_pa[vcpu]); 37024c87aefeSPatrick Mooney } 37034c87aefeSPatrick Mooney } 37044c87aefeSPatrick Mooney 3705bf21cd93STycho Nightingale struct vmm_ops vmm_ops_intel = { 370684659b24SMichael Zeller .init = vmx_init, 370784659b24SMichael Zeller .cleanup = vmx_cleanup, 370884659b24SMichael Zeller .resume = vmx_restore, 37090153d828SPatrick Mooney 371084659b24SMichael Zeller .vminit = vmx_vminit, 371184659b24SMichael Zeller .vmrun = vmx_run, 371284659b24SMichael Zeller .vmcleanup = vmx_vmcleanup, 371384659b24SMichael Zeller .vmgetreg = vmx_getreg, 371484659b24SMichael Zeller .vmsetreg = vmx_setreg, 371584659b24SMichael Zeller .vmgetdesc = vmx_getdesc, 371684659b24SMichael Zeller .vmsetdesc = vmx_setdesc, 371784659b24SMichael Zeller .vmgetcap = vmx_getcap, 371884659b24SMichael Zeller .vmsetcap = vmx_setcap, 371984659b24SMichael Zeller .vlapic_init = vmx_vlapic_init, 372084659b24SMichael Zeller .vlapic_cleanup = vmx_vlapic_cleanup, 37214c87aefeSPatrick Mooney 372284659b24SMichael Zeller .vmsavectx = vmx_savectx, 372384659b24SMichael Zeller .vmrestorectx = vmx_restorectx, 3724bf21cd93STycho Nightingale }; 37254c87aefeSPatrick Mooney 37264c87aefeSPatrick Mooney /* Side-effect free HW validation derived from checks in vmx_init. */ 37274c87aefeSPatrick Mooney int 37284c87aefeSPatrick Mooney vmx_x86_supported(const char **msg) 37294c87aefeSPatrick Mooney { 37304c87aefeSPatrick Mooney int error; 37314c87aefeSPatrick Mooney uint32_t tmp; 37324c87aefeSPatrick Mooney 37334c87aefeSPatrick Mooney ASSERT(msg != NULL); 37344c87aefeSPatrick Mooney 37354c87aefeSPatrick Mooney /* Check support for primary processor-based VM-execution controls */ 37364c87aefeSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS, 37374c87aefeSPatrick Mooney MSR_VMX_TRUE_PROCBASED_CTLS, PROCBASED_CTLS_ONE_SETTING, 37384c87aefeSPatrick Mooney PROCBASED_CTLS_ZERO_SETTING, &tmp); 37394c87aefeSPatrick Mooney if (error) { 37404c87aefeSPatrick Mooney *msg = "processor does not support desired primary " 37414c87aefeSPatrick Mooney "processor-based controls"; 37424c87aefeSPatrick Mooney return (error); 37434c87aefeSPatrick Mooney } 37444c87aefeSPatrick Mooney 37454c87aefeSPatrick Mooney /* Check support for secondary processor-based VM-execution controls */ 37464c87aefeSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, 37474c87aefeSPatrick Mooney MSR_VMX_PROCBASED_CTLS2, PROCBASED_CTLS2_ONE_SETTING, 37484c87aefeSPatrick Mooney PROCBASED_CTLS2_ZERO_SETTING, &tmp); 37494c87aefeSPatrick Mooney if (error) { 37504c87aefeSPatrick Mooney *msg = "processor does not support desired secondary " 37514c87aefeSPatrick Mooney "processor-based controls"; 37524c87aefeSPatrick Mooney return (error); 37534c87aefeSPatrick Mooney } 37544c87aefeSPatrick Mooney 37554c87aefeSPatrick Mooney /* Check support for pin-based VM-execution controls */ 37564c87aefeSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_PINBASED_CTLS, 37574c87aefeSPatrick Mooney MSR_VMX_TRUE_PINBASED_CTLS, PINBASED_CTLS_ONE_SETTING, 37584c87aefeSPatrick Mooney PINBASED_CTLS_ZERO_SETTING, &tmp); 37594c87aefeSPatrick Mooney if (error) { 37604c87aefeSPatrick Mooney *msg = "processor does not support desired pin-based controls"; 37614c87aefeSPatrick Mooney return (error); 37624c87aefeSPatrick Mooney } 37634c87aefeSPatrick Mooney 37644c87aefeSPatrick Mooney /* Check support for VM-exit controls */ 37654c87aefeSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_EXIT_CTLS, MSR_VMX_TRUE_EXIT_CTLS, 37664c87aefeSPatrick Mooney VM_EXIT_CTLS_ONE_SETTING, VM_EXIT_CTLS_ZERO_SETTING, &tmp); 37674c87aefeSPatrick Mooney if (error) { 37684c87aefeSPatrick Mooney *msg = "processor does not support desired exit controls"; 37694c87aefeSPatrick Mooney return (error); 37704c87aefeSPatrick Mooney } 37714c87aefeSPatrick Mooney 37724c87aefeSPatrick Mooney /* Check support for VM-entry controls */ 37734c87aefeSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_ENTRY_CTLS, MSR_VMX_TRUE_ENTRY_CTLS, 37744c87aefeSPatrick Mooney VM_ENTRY_CTLS_ONE_SETTING, VM_ENTRY_CTLS_ZERO_SETTING, &tmp); 37754c87aefeSPatrick Mooney if (error) { 37764c87aefeSPatrick Mooney *msg = "processor does not support desired entry controls"; 37774c87aefeSPatrick Mooney return (error); 37784c87aefeSPatrick Mooney } 37794c87aefeSPatrick Mooney 37804c87aefeSPatrick Mooney /* Unrestricted guest is nominally optional, but not for us. */ 37814c87aefeSPatrick Mooney error = vmx_set_ctlreg(MSR_VMX_PROCBASED_CTLS2, MSR_VMX_PROCBASED_CTLS2, 37824c87aefeSPatrick Mooney PROCBASED2_UNRESTRICTED_GUEST, 0, &tmp); 37834c87aefeSPatrick Mooney if (error) { 37844c87aefeSPatrick Mooney *msg = "processor does not support desired unrestricted guest " 37854c87aefeSPatrick Mooney "controls"; 37864c87aefeSPatrick Mooney return (error); 37874c87aefeSPatrick Mooney } 37884c87aefeSPatrick Mooney 37894c87aefeSPatrick Mooney return (0); 37904c87aefeSPatrick Mooney } 3791