/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 NetApp, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. * * Copyright 2015 Pluribus Networks Inc. * Copyright 2019 Joyent, Inc. * Copyright 2022 Oxide Computer Company */ #ifndef _VMM_H_ #define _VMM_H_ enum vm_suspend_how { VM_SUSPEND_NONE, VM_SUSPEND_RESET, VM_SUSPEND_POWEROFF, VM_SUSPEND_HALT, VM_SUSPEND_TRIPLEFAULT, VM_SUSPEND_LAST }; /* * Identifiers for architecturally defined registers. */ enum vm_reg_name { VM_REG_GUEST_RAX, VM_REG_GUEST_RBX, VM_REG_GUEST_RCX, VM_REG_GUEST_RDX, VM_REG_GUEST_RSI, VM_REG_GUEST_RDI, VM_REG_GUEST_RBP, VM_REG_GUEST_R8, VM_REG_GUEST_R9, VM_REG_GUEST_R10, VM_REG_GUEST_R11, VM_REG_GUEST_R12, VM_REG_GUEST_R13, VM_REG_GUEST_R14, VM_REG_GUEST_R15, VM_REG_GUEST_CR0, VM_REG_GUEST_CR3, VM_REG_GUEST_CR4, VM_REG_GUEST_DR7, VM_REG_GUEST_RSP, VM_REG_GUEST_RIP, VM_REG_GUEST_RFLAGS, VM_REG_GUEST_ES, VM_REG_GUEST_CS, VM_REG_GUEST_SS, VM_REG_GUEST_DS, VM_REG_GUEST_FS, VM_REG_GUEST_GS, VM_REG_GUEST_LDTR, VM_REG_GUEST_TR, VM_REG_GUEST_IDTR, VM_REG_GUEST_GDTR, VM_REG_GUEST_EFER, VM_REG_GUEST_CR2, VM_REG_GUEST_PDPTE0, VM_REG_GUEST_PDPTE1, VM_REG_GUEST_PDPTE2, VM_REG_GUEST_PDPTE3, VM_REG_GUEST_INTR_SHADOW, VM_REG_GUEST_DR0, VM_REG_GUEST_DR1, VM_REG_GUEST_DR2, VM_REG_GUEST_DR3, VM_REG_GUEST_DR6, VM_REG_GUEST_ENTRY_INST_LENGTH, VM_REG_GUEST_XCR0, VM_REG_LAST }; enum x2apic_state { X2APIC_DISABLED, X2APIC_ENABLED, X2APIC_STATE_LAST }; #define VM_INTINFO_MASK_VECTOR 0xffUL #define VM_INTINFO_MASK_TYPE 0x700UL #define VM_INTINFO_MASK_RSVD 0x7ffff000UL #define VM_INTINFO_SHIFT_ERRCODE 32 #define VM_INTINFO_VECTOR(val) ((val) & VM_INTINFO_MASK_VECTOR) #define VM_INTINFO_TYPE(val) ((val) & VM_INTINFO_MASK_TYPE) #define VM_INTINFO_ERRCODE(val) ((val) >> VM_INTINFO_SHIFT_ERRCODE) #define VM_INTINFO_PENDING(val) (((val) & VM_INTINFO_VALID) != 0) #define VM_INTINFO_HAS_ERRCODE(val) (((val) & VM_INTINFO_DEL_ERRCODE) != 0) #define VM_INTINFO_VALID (1UL << 31) #define VM_INTINFO_DEL_ERRCODE (1UL << 11) #define VM_INTINFO_HWINTR (0 << 8) #define VM_INTINFO_NMI (2 << 8) #define VM_INTINFO_HWEXCP (3 << 8) #define VM_INTINFO_SWINTR (4 << 8) /* Reserved for CPU (read: Intel) specific types */ #define VM_INTINFO_RESV1 (1 << 8) #define VM_INTINFO_RESV5 (5 << 8) #define VM_INTINFO_RESV6 (6 << 8) #define VM_INTINFO_RESV7 (7 << 8) /* * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does. * To simplify structure definitions, an arbitrary limit has been chosen. * This same limit is used for memory segment names */ #define VM_MAX_NAMELEN 128 #define VM_MAX_SEG_NAMELEN 128 #ifdef _KERNEL #define VM_MAXCPU 32 /* maximum virtual cpus */ #endif /* * Identifiers for optional vmm capabilities */ enum vm_cap_type { VM_CAP_HALT_EXIT, VM_CAP_MTRAP_EXIT, VM_CAP_PAUSE_EXIT, VM_CAP_ENABLE_INVPCID, VM_CAP_BPT_EXIT, VM_CAP_MAX }; enum vmx_caps { VMX_CAP_NONE = 0, VMX_CAP_TPR_SHADOW = (1UL << 0), VMX_CAP_APICV = (1UL << 1), VMX_CAP_APICV_X2APIC = (1UL << 2), VMX_CAP_APICV_PIR = (1UL << 3), }; enum vm_intr_trigger { EDGE_TRIGGER, LEVEL_TRIGGER }; /* * The 'access' field has the format specified in Table 21-2 of the Intel * Architecture Manual vol 3b. * * XXX The contents of the 'access' field are architecturally defined except * bit 16 - Segment Unusable. */ struct seg_desc { uint64_t base; uint32_t limit; uint32_t access; }; #define SEG_DESC_TYPE(access) ((access) & 0x001f) #define SEG_DESC_DPL_MASK 0x3 #define SEG_DESC_DPL_SHIFT 5 #define SEG_DESC_DPL(access) \ (((access) >> SEG_DESC_DPL_SHIFT) & SEG_DESC_DPL_MASK) #define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0) #define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0) #define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0) #define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0) enum vm_cpu_mode { CPU_MODE_REAL, CPU_MODE_PROTECTED, CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ }; enum vm_paging_mode { PAGING_MODE_FLAT, PAGING_MODE_32, PAGING_MODE_PAE, PAGING_MODE_64, }; struct vm_guest_paging { uint64_t cr3; int cpl; enum vm_cpu_mode cpu_mode; enum vm_paging_mode paging_mode; }; enum vm_exitcode { VM_EXITCODE_INOUT, VM_EXITCODE_VMX, VM_EXITCODE_BOGUS, VM_EXITCODE_RDMSR, VM_EXITCODE_WRMSR, VM_EXITCODE_HLT, VM_EXITCODE_MTRAP, VM_EXITCODE_PAUSE, VM_EXITCODE_PAGING, VM_EXITCODE_INST_EMUL, VM_EXITCODE_RUN_STATE, VM_EXITCODE_MMIO_EMUL, VM_EXITCODE_DEPRECATED, /* formerly RUNBLOCK */ VM_EXITCODE_IOAPIC_EOI, VM_EXITCODE_SUSPENDED, VM_EXITCODE_MMIO, VM_EXITCODE_TASK_SWITCH, VM_EXITCODE_MONITOR, VM_EXITCODE_MWAIT, VM_EXITCODE_SVM, VM_EXITCODE_DEPRECATED2, /* formerly REQIDLE */ VM_EXITCODE_DEBUG, VM_EXITCODE_VMINSN, VM_EXITCODE_BPT, VM_EXITCODE_HT, VM_EXITCODE_MAX }; enum inout_flags { INOUT_IN = (1U << 0), /* direction: 'in' when set, else 'out' */ /* * The following flags are used only for in-kernel emulation logic and * are not exposed to userspace. */ INOUT_STR = (1U << 1), /* ins/outs operation */ INOUT_REP = (1U << 2), /* 'rep' prefix present on instruction */ }; struct vm_inout { uint32_t eax; uint16_t port; uint8_t bytes; /* 1 or 2 or 4 */ uint8_t flags; /* see: inout_flags */ /* * The address size and segment are relevant to INS/OUTS operations. * Userspace is not concerned with them since the in-kernel emulation * handles those specific aspects. */ uint8_t addrsize; uint8_t segment; }; struct vm_mmio { uint8_t bytes; /* 1/2/4/8 bytes */ uint8_t read; /* read: 1, write: 0 */ uint16_t _pad[3]; uint64_t gpa; uint64_t data; }; enum task_switch_reason { TSR_CALL, TSR_IRET, TSR_JMP, TSR_IDT_GATE, /* task gate in IDT */ }; struct vm_task_switch { uint16_t tsssel; /* new TSS selector */ int ext; /* task switch due to external event */ uint32_t errcode; int errcode_valid; /* push 'errcode' on the new stack */ enum task_switch_reason reason; struct vm_guest_paging paging; }; enum vcpu_run_state { VRS_HALT = 0, VRS_INIT = (1 << 0), VRS_RUN = (1 << 1), VRS_PEND_INIT = (1 << 14), VRS_PEND_SIPI = (1 << 15), }; #define VRS_MASK_VALID(v) \ ((v) & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI | VRS_PEND_SIPI)) #define VRS_IS_VALID(v) ((v) == VRS_MASK_VALID(v)) struct vm_exit { enum vm_exitcode exitcode; int inst_length; /* 0 means unknown */ uint64_t rip; union { struct vm_inout inout; struct vm_mmio mmio; struct { uint64_t gpa; int fault_type; } paging; /* * Kernel-internal MMIO decoding and emulation. * Userspace should not expect to see this, but rather a * VM_EXITCODE_MMIO with the above 'mmio' context. */ struct { uint64_t gpa; uint64_t gla; uint64_t cs_base; int cs_d; /* CS.D */ } mmio_emul; struct { uint8_t inst[15]; uint8_t num_valid; } inst_emul; /* * VMX specific payload. Used when there is no "better" * exitcode to represent the VM-exit. */ struct { int status; /* vmx inst status */ /* * 'exit_reason' and 'exit_qualification' are valid * only if 'status' is zero. */ uint32_t exit_reason; uint64_t exit_qualification; /* * 'inst_error' and 'inst_type' are valid * only if 'status' is non-zero. */ int inst_type; int inst_error; } vmx; /* * SVM specific payload. */ struct { uint64_t exitcode; uint64_t exitinfo1; uint64_t exitinfo2; } svm; struct { int inst_length; } bpt; struct { uint32_t code; /* ecx value */ uint64_t wval; } msr; struct { uint64_t rflags; } hlt; struct { int vector; } ioapic_eoi; struct { enum vm_suspend_how how; /* * Source vcpuid for suspend status. Typically -1, * except for triple-fault events which occur on a * specific faulting vCPU. */ int source; /* * When suspend status was set on VM, measured in * nanoseconds since VM boot. */ uint64_t when; } suspended; struct vm_task_switch task_switch; } u; }; enum vm_entry_cmds { VEC_DEFAULT = 0, VEC_DISCARD_INSTR, /* discard inst emul state */ VEC_FULFILL_MMIO, /* entry includes result for mmio emul */ VEC_FULFILL_INOUT, /* entry includes result for inout emul */ /* Below are flags which can be combined with the above commands: */ /* * Exit to userspace when vCPU is in consistent state: when any pending * instruction emulation tasks have been completed and committed to the * architecturally defined state. */ VEC_FLAG_EXIT_CONSISTENT = 1 << 31, }; struct vm_entry { int cpuid; uint_t cmd; /* see: vm_entry_cmds */ void *exit_data; union { struct vm_inout inout; struct vm_mmio mmio; } u; }; int vm_restart_instruction(void *vm, int vcpuid); enum vm_create_flags { /* * Allocate guest memory segments from existing reservoir capacity, * rather than attempting to create transient allocations. */ VCF_RESERVOIR_MEM = (1 << 0), /* * Enable dirty page tracking for the guest. */ VCF_TRACK_DIRTY = (1 << 1), }; /* * Describes an entry for `cpuid` emulation. * Used internally by bhyve (kernel) in addition to exposed ioctl(2) interface. */ struct vcpu_cpuid_entry { uint32_t vce_function; uint32_t vce_index; uint32_t vce_flags; uint32_t vce_eax; uint32_t vce_ebx; uint32_t vce_ecx; uint32_t vce_edx; uint32_t _pad; }; /* * Defined flags for vcpu_cpuid_entry`vce_flags are below. */ /* Use index (ecx) input value when matching entry */ #define VCE_FLAG_MATCH_INDEX (1 << 0) /* All valid flacts for vcpu_cpuid_entry`vce_flags */ #define VCE_FLAGS_VALID VCE_FLAG_MATCH_INDEX /* * Defined flags for vcpu_cpuid configuration are below. * These are used by both the ioctl(2) interface via vm_vcpu_cpuid_config and * internally in the kernel vmm. */ /* Use legacy hard-coded cpuid masking tables applied to the host CPU */ #define VCC_FLAG_LEGACY_HANDLING (1 << 0) /* * Emulate Intel-style fallback behavior (emit highest "standard" entry) if the * queried function/index do not match. If not set, emulate AMD-style, where * all zeroes are returned in such cases. */ #define VCC_FLAG_INTEL_FALLBACK (1 << 1) /* All valid flacts for vm_vcpu_cpuid_config`vvcc_flags */ #define VCC_FLAGS_VALID \ (VCC_FLAG_LEGACY_HANDLING | VCC_FLAG_INTEL_FALLBACK) /* Maximum vcpu_cpuid_entry records per vCPU */ #define VMM_MAX_CPUID_ENTRIES 256 #endif /* _VMM_H_ */