1*4c87aefeSPatrick Mooney /*- 2*4c87aefeSPatrick Mooney * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3*4c87aefeSPatrick Mooney * 4*4c87aefeSPatrick Mooney * Copyright (c) 2014 Neel Natu <neel@freebsd.org> 5*4c87aefeSPatrick Mooney * All rights reserved. 6*4c87aefeSPatrick Mooney * 7*4c87aefeSPatrick Mooney * Redistribution and use in source and binary forms, with or without 8*4c87aefeSPatrick Mooney * modification, are permitted provided that the following conditions 9*4c87aefeSPatrick Mooney * are met: 10*4c87aefeSPatrick Mooney * 1. Redistributions of source code must retain the above copyright 11*4c87aefeSPatrick Mooney * notice, this list of conditions and the following disclaimer. 12*4c87aefeSPatrick Mooney * 2. Redistributions in binary form must reproduce the above copyright 13*4c87aefeSPatrick Mooney * notice, this list of conditions and the following disclaimer in the 14*4c87aefeSPatrick Mooney * documentation and/or other materials provided with the distribution. 15*4c87aefeSPatrick Mooney * 16*4c87aefeSPatrick Mooney * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 17*4c87aefeSPatrick Mooney * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18*4c87aefeSPatrick Mooney * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19*4c87aefeSPatrick Mooney * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20*4c87aefeSPatrick Mooney * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21*4c87aefeSPatrick Mooney * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22*4c87aefeSPatrick Mooney * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23*4c87aefeSPatrick Mooney * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24*4c87aefeSPatrick Mooney * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25*4c87aefeSPatrick Mooney * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26*4c87aefeSPatrick Mooney * SUCH DAMAGE. 27*4c87aefeSPatrick Mooney */ 28*4c87aefeSPatrick Mooney 29*4c87aefeSPatrick Mooney #include <sys/cdefs.h> 30*4c87aefeSPatrick Mooney __FBSDID("$FreeBSD$"); 31*4c87aefeSPatrick Mooney 32*4c87aefeSPatrick Mooney #include <sys/param.h> 33*4c87aefeSPatrick Mooney #include <sys/_iovec.h> 34*4c87aefeSPatrick Mooney #include <sys/mman.h> 35*4c87aefeSPatrick Mooney 36*4c87aefeSPatrick Mooney #include <x86/psl.h> 37*4c87aefeSPatrick Mooney #include <x86/segments.h> 38*4c87aefeSPatrick Mooney #include <x86/specialreg.h> 39*4c87aefeSPatrick Mooney #include <machine/vmm.h> 40*4c87aefeSPatrick Mooney #include <machine/vmm_instruction_emul.h> 41*4c87aefeSPatrick Mooney 42*4c87aefeSPatrick Mooney #include <assert.h> 43*4c87aefeSPatrick Mooney #include <errno.h> 44*4c87aefeSPatrick Mooney #include <stdbool.h> 45*4c87aefeSPatrick Mooney #include <stdio.h> 46*4c87aefeSPatrick Mooney #include <stdlib.h> 47*4c87aefeSPatrick Mooney 48*4c87aefeSPatrick Mooney #include <vmmapi.h> 49*4c87aefeSPatrick Mooney 50*4c87aefeSPatrick Mooney #include "bhyverun.h" 51*4c87aefeSPatrick Mooney 52*4c87aefeSPatrick Mooney /* 53*4c87aefeSPatrick Mooney * Using 'struct i386tss' is tempting but causes myriad sign extension 54*4c87aefeSPatrick Mooney * issues because all of its fields are defined as signed integers. 55*4c87aefeSPatrick Mooney */ 56*4c87aefeSPatrick Mooney struct tss32 { 57*4c87aefeSPatrick Mooney uint16_t tss_link; 58*4c87aefeSPatrick Mooney uint16_t rsvd1; 59*4c87aefeSPatrick Mooney uint32_t tss_esp0; 60*4c87aefeSPatrick Mooney uint16_t tss_ss0; 61*4c87aefeSPatrick Mooney uint16_t rsvd2; 62*4c87aefeSPatrick Mooney uint32_t tss_esp1; 63*4c87aefeSPatrick Mooney uint16_t tss_ss1; 64*4c87aefeSPatrick Mooney uint16_t rsvd3; 65*4c87aefeSPatrick Mooney uint32_t tss_esp2; 66*4c87aefeSPatrick Mooney uint16_t tss_ss2; 67*4c87aefeSPatrick Mooney uint16_t rsvd4; 68*4c87aefeSPatrick Mooney uint32_t tss_cr3; 69*4c87aefeSPatrick Mooney uint32_t tss_eip; 70*4c87aefeSPatrick Mooney uint32_t tss_eflags; 71*4c87aefeSPatrick Mooney uint32_t tss_eax; 72*4c87aefeSPatrick Mooney uint32_t tss_ecx; 73*4c87aefeSPatrick Mooney uint32_t tss_edx; 74*4c87aefeSPatrick Mooney uint32_t tss_ebx; 75*4c87aefeSPatrick Mooney uint32_t tss_esp; 76*4c87aefeSPatrick Mooney uint32_t tss_ebp; 77*4c87aefeSPatrick Mooney uint32_t tss_esi; 78*4c87aefeSPatrick Mooney uint32_t tss_edi; 79*4c87aefeSPatrick Mooney uint16_t tss_es; 80*4c87aefeSPatrick Mooney uint16_t rsvd5; 81*4c87aefeSPatrick Mooney uint16_t tss_cs; 82*4c87aefeSPatrick Mooney uint16_t rsvd6; 83*4c87aefeSPatrick Mooney uint16_t tss_ss; 84*4c87aefeSPatrick Mooney uint16_t rsvd7; 85*4c87aefeSPatrick Mooney uint16_t tss_ds; 86*4c87aefeSPatrick Mooney uint16_t rsvd8; 87*4c87aefeSPatrick Mooney uint16_t tss_fs; 88*4c87aefeSPatrick Mooney uint16_t rsvd9; 89*4c87aefeSPatrick Mooney uint16_t tss_gs; 90*4c87aefeSPatrick Mooney uint16_t rsvd10; 91*4c87aefeSPatrick Mooney uint16_t tss_ldt; 92*4c87aefeSPatrick Mooney uint16_t rsvd11; 93*4c87aefeSPatrick Mooney uint16_t tss_trap; 94*4c87aefeSPatrick Mooney uint16_t tss_iomap; 95*4c87aefeSPatrick Mooney }; 96*4c87aefeSPatrick Mooney static_assert(sizeof(struct tss32) == 104, "compile-time assertion failed"); 97*4c87aefeSPatrick Mooney 98*4c87aefeSPatrick Mooney #define SEL_START(sel) (((sel) & ~0x7)) 99*4c87aefeSPatrick Mooney #define SEL_LIMIT(sel) (((sel) | 0x7)) 100*4c87aefeSPatrick Mooney #define TSS_BUSY(type) (((type) & 0x2) != 0) 101*4c87aefeSPatrick Mooney 102*4c87aefeSPatrick Mooney static uint64_t 103*4c87aefeSPatrick Mooney GETREG(struct vmctx *ctx, int vcpu, int reg) 104*4c87aefeSPatrick Mooney { 105*4c87aefeSPatrick Mooney uint64_t val; 106*4c87aefeSPatrick Mooney int error; 107*4c87aefeSPatrick Mooney 108*4c87aefeSPatrick Mooney error = vm_get_register(ctx, vcpu, reg, &val); 109*4c87aefeSPatrick Mooney assert(error == 0); 110*4c87aefeSPatrick Mooney return (val); 111*4c87aefeSPatrick Mooney } 112*4c87aefeSPatrick Mooney 113*4c87aefeSPatrick Mooney static void 114*4c87aefeSPatrick Mooney SETREG(struct vmctx *ctx, int vcpu, int reg, uint64_t val) 115*4c87aefeSPatrick Mooney { 116*4c87aefeSPatrick Mooney int error; 117*4c87aefeSPatrick Mooney 118*4c87aefeSPatrick Mooney error = vm_set_register(ctx, vcpu, reg, val); 119*4c87aefeSPatrick Mooney assert(error == 0); 120*4c87aefeSPatrick Mooney } 121*4c87aefeSPatrick Mooney 122*4c87aefeSPatrick Mooney static struct seg_desc 123*4c87aefeSPatrick Mooney usd_to_seg_desc(struct user_segment_descriptor *usd) 124*4c87aefeSPatrick Mooney { 125*4c87aefeSPatrick Mooney struct seg_desc seg_desc; 126*4c87aefeSPatrick Mooney 127*4c87aefeSPatrick Mooney seg_desc.base = (u_int)USD_GETBASE(usd); 128*4c87aefeSPatrick Mooney if (usd->sd_gran) 129*4c87aefeSPatrick Mooney seg_desc.limit = (u_int)(USD_GETLIMIT(usd) << 12) | 0xfff; 130*4c87aefeSPatrick Mooney else 131*4c87aefeSPatrick Mooney seg_desc.limit = (u_int)USD_GETLIMIT(usd); 132*4c87aefeSPatrick Mooney seg_desc.access = usd->sd_type | usd->sd_dpl << 5 | usd->sd_p << 7; 133*4c87aefeSPatrick Mooney seg_desc.access |= usd->sd_xx << 12; 134*4c87aefeSPatrick Mooney seg_desc.access |= usd->sd_def32 << 14; 135*4c87aefeSPatrick Mooney seg_desc.access |= usd->sd_gran << 15; 136*4c87aefeSPatrick Mooney 137*4c87aefeSPatrick Mooney return (seg_desc); 138*4c87aefeSPatrick Mooney } 139*4c87aefeSPatrick Mooney 140*4c87aefeSPatrick Mooney /* 141*4c87aefeSPatrick Mooney * Inject an exception with an error code that is a segment selector. 142*4c87aefeSPatrick Mooney * The format of the error code is described in section 6.13, "Error Code", 143*4c87aefeSPatrick Mooney * Intel SDM volume 3. 144*4c87aefeSPatrick Mooney * 145*4c87aefeSPatrick Mooney * Bit 0 (EXT) denotes whether the exception occurred during delivery 146*4c87aefeSPatrick Mooney * of an external event like an interrupt. 147*4c87aefeSPatrick Mooney * 148*4c87aefeSPatrick Mooney * Bit 1 (IDT) indicates whether the selector points to a gate descriptor 149*4c87aefeSPatrick Mooney * in the IDT. 150*4c87aefeSPatrick Mooney * 151*4c87aefeSPatrick Mooney * Bit 2(GDT/LDT) has the usual interpretation of Table Indicator (TI). 152*4c87aefeSPatrick Mooney */ 153*4c87aefeSPatrick Mooney static void 154*4c87aefeSPatrick Mooney sel_exception(struct vmctx *ctx, int vcpu, int vector, uint16_t sel, int ext) 155*4c87aefeSPatrick Mooney { 156*4c87aefeSPatrick Mooney /* 157*4c87aefeSPatrick Mooney * Bit 2 from the selector is retained as-is in the error code. 158*4c87aefeSPatrick Mooney * 159*4c87aefeSPatrick Mooney * Bit 1 can be safely cleared because none of the selectors 160*4c87aefeSPatrick Mooney * encountered during task switch emulation refer to a task 161*4c87aefeSPatrick Mooney * gate in the IDT. 162*4c87aefeSPatrick Mooney * 163*4c87aefeSPatrick Mooney * Bit 0 is set depending on the value of 'ext'. 164*4c87aefeSPatrick Mooney */ 165*4c87aefeSPatrick Mooney sel &= ~0x3; 166*4c87aefeSPatrick Mooney if (ext) 167*4c87aefeSPatrick Mooney sel |= 0x1; 168*4c87aefeSPatrick Mooney vm_inject_fault(ctx, vcpu, vector, 1, sel); 169*4c87aefeSPatrick Mooney } 170*4c87aefeSPatrick Mooney 171*4c87aefeSPatrick Mooney /* 172*4c87aefeSPatrick Mooney * Return 0 if the selector 'sel' in within the limits of the GDT/LDT 173*4c87aefeSPatrick Mooney * and non-zero otherwise. 174*4c87aefeSPatrick Mooney */ 175*4c87aefeSPatrick Mooney static int 176*4c87aefeSPatrick Mooney desc_table_limit_check(struct vmctx *ctx, int vcpu, uint16_t sel) 177*4c87aefeSPatrick Mooney { 178*4c87aefeSPatrick Mooney uint64_t base; 179*4c87aefeSPatrick Mooney uint32_t limit, access; 180*4c87aefeSPatrick Mooney int error, reg; 181*4c87aefeSPatrick Mooney 182*4c87aefeSPatrick Mooney reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR; 183*4c87aefeSPatrick Mooney error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access); 184*4c87aefeSPatrick Mooney assert(error == 0); 185*4c87aefeSPatrick Mooney 186*4c87aefeSPatrick Mooney if (reg == VM_REG_GUEST_LDTR) { 187*4c87aefeSPatrick Mooney if (SEG_DESC_UNUSABLE(access) || !SEG_DESC_PRESENT(access)) 188*4c87aefeSPatrick Mooney return (-1); 189*4c87aefeSPatrick Mooney } 190*4c87aefeSPatrick Mooney 191*4c87aefeSPatrick Mooney if (limit < SEL_LIMIT(sel)) 192*4c87aefeSPatrick Mooney return (-1); 193*4c87aefeSPatrick Mooney else 194*4c87aefeSPatrick Mooney return (0); 195*4c87aefeSPatrick Mooney } 196*4c87aefeSPatrick Mooney 197*4c87aefeSPatrick Mooney /* 198*4c87aefeSPatrick Mooney * Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced 199*4c87aefeSPatrick Mooney * by the selector 'sel'. 200*4c87aefeSPatrick Mooney * 201*4c87aefeSPatrick Mooney * Returns 0 on success. 202*4c87aefeSPatrick Mooney * Returns 1 if an exception was injected into the guest. 203*4c87aefeSPatrick Mooney * Returns -1 otherwise. 204*4c87aefeSPatrick Mooney */ 205*4c87aefeSPatrick Mooney static int 206*4c87aefeSPatrick Mooney desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, 207*4c87aefeSPatrick Mooney uint16_t sel, struct user_segment_descriptor *desc, bool doread, 208*4c87aefeSPatrick Mooney int *faultptr) 209*4c87aefeSPatrick Mooney { 210*4c87aefeSPatrick Mooney struct iovec iov[2]; 211*4c87aefeSPatrick Mooney uint64_t base; 212*4c87aefeSPatrick Mooney uint32_t limit, access; 213*4c87aefeSPatrick Mooney int error, reg; 214*4c87aefeSPatrick Mooney 215*4c87aefeSPatrick Mooney reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR; 216*4c87aefeSPatrick Mooney error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access); 217*4c87aefeSPatrick Mooney assert(error == 0); 218*4c87aefeSPatrick Mooney assert(limit >= SEL_LIMIT(sel)); 219*4c87aefeSPatrick Mooney 220*4c87aefeSPatrick Mooney error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel), 221*4c87aefeSPatrick Mooney sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov), 222*4c87aefeSPatrick Mooney faultptr); 223*4c87aefeSPatrick Mooney if (error || *faultptr) 224*4c87aefeSPatrick Mooney return (error); 225*4c87aefeSPatrick Mooney 226*4c87aefeSPatrick Mooney if (doread) 227*4c87aefeSPatrick Mooney vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc)); 228*4c87aefeSPatrick Mooney else 229*4c87aefeSPatrick Mooney vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc)); 230*4c87aefeSPatrick Mooney return (0); 231*4c87aefeSPatrick Mooney } 232*4c87aefeSPatrick Mooney 233*4c87aefeSPatrick Mooney static int 234*4c87aefeSPatrick Mooney desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, 235*4c87aefeSPatrick Mooney uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) 236*4c87aefeSPatrick Mooney { 237*4c87aefeSPatrick Mooney return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr)); 238*4c87aefeSPatrick Mooney } 239*4c87aefeSPatrick Mooney 240*4c87aefeSPatrick Mooney static int 241*4c87aefeSPatrick Mooney desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, 242*4c87aefeSPatrick Mooney uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) 243*4c87aefeSPatrick Mooney { 244*4c87aefeSPatrick Mooney return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr)); 245*4c87aefeSPatrick Mooney } 246*4c87aefeSPatrick Mooney 247*4c87aefeSPatrick Mooney /* 248*4c87aefeSPatrick Mooney * Read the TSS descriptor referenced by 'sel' into 'desc'. 249*4c87aefeSPatrick Mooney * 250*4c87aefeSPatrick Mooney * Returns 0 on success. 251*4c87aefeSPatrick Mooney * Returns 1 if an exception was injected into the guest. 252*4c87aefeSPatrick Mooney * Returns -1 otherwise. 253*4c87aefeSPatrick Mooney */ 254*4c87aefeSPatrick Mooney static int 255*4c87aefeSPatrick Mooney read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, 256*4c87aefeSPatrick Mooney uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) 257*4c87aefeSPatrick Mooney { 258*4c87aefeSPatrick Mooney struct vm_guest_paging sup_paging; 259*4c87aefeSPatrick Mooney int error; 260*4c87aefeSPatrick Mooney 261*4c87aefeSPatrick Mooney assert(!ISLDT(sel)); 262*4c87aefeSPatrick Mooney assert(IDXSEL(sel) != 0); 263*4c87aefeSPatrick Mooney 264*4c87aefeSPatrick Mooney /* Fetch the new TSS descriptor */ 265*4c87aefeSPatrick Mooney if (desc_table_limit_check(ctx, vcpu, sel)) { 266*4c87aefeSPatrick Mooney if (ts->reason == TSR_IRET) 267*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); 268*4c87aefeSPatrick Mooney else 269*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_GP, sel, ts->ext); 270*4c87aefeSPatrick Mooney return (1); 271*4c87aefeSPatrick Mooney } 272*4c87aefeSPatrick Mooney 273*4c87aefeSPatrick Mooney sup_paging = ts->paging; 274*4c87aefeSPatrick Mooney sup_paging.cpl = 0; /* implicit supervisor mode */ 275*4c87aefeSPatrick Mooney error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr); 276*4c87aefeSPatrick Mooney return (error); 277*4c87aefeSPatrick Mooney } 278*4c87aefeSPatrick Mooney 279*4c87aefeSPatrick Mooney static bool 280*4c87aefeSPatrick Mooney code_desc(int sd_type) 281*4c87aefeSPatrick Mooney { 282*4c87aefeSPatrick Mooney /* code descriptor */ 283*4c87aefeSPatrick Mooney return ((sd_type & 0x18) == 0x18); 284*4c87aefeSPatrick Mooney } 285*4c87aefeSPatrick Mooney 286*4c87aefeSPatrick Mooney static bool 287*4c87aefeSPatrick Mooney stack_desc(int sd_type) 288*4c87aefeSPatrick Mooney { 289*4c87aefeSPatrick Mooney /* writable data descriptor */ 290*4c87aefeSPatrick Mooney return ((sd_type & 0x1A) == 0x12); 291*4c87aefeSPatrick Mooney } 292*4c87aefeSPatrick Mooney 293*4c87aefeSPatrick Mooney static bool 294*4c87aefeSPatrick Mooney data_desc(int sd_type) 295*4c87aefeSPatrick Mooney { 296*4c87aefeSPatrick Mooney /* data descriptor or a readable code descriptor */ 297*4c87aefeSPatrick Mooney return ((sd_type & 0x18) == 0x10 || (sd_type & 0x1A) == 0x1A); 298*4c87aefeSPatrick Mooney } 299*4c87aefeSPatrick Mooney 300*4c87aefeSPatrick Mooney static bool 301*4c87aefeSPatrick Mooney ldt_desc(int sd_type) 302*4c87aefeSPatrick Mooney { 303*4c87aefeSPatrick Mooney 304*4c87aefeSPatrick Mooney return (sd_type == SDT_SYSLDT); 305*4c87aefeSPatrick Mooney } 306*4c87aefeSPatrick Mooney 307*4c87aefeSPatrick Mooney /* 308*4c87aefeSPatrick Mooney * Validate the descriptor 'seg_desc' associated with 'segment'. 309*4c87aefeSPatrick Mooney */ 310*4c87aefeSPatrick Mooney static int 311*4c87aefeSPatrick Mooney validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, 312*4c87aefeSPatrick Mooney int segment, struct seg_desc *seg_desc, int *faultptr) 313*4c87aefeSPatrick Mooney { 314*4c87aefeSPatrick Mooney struct vm_guest_paging sup_paging; 315*4c87aefeSPatrick Mooney struct user_segment_descriptor usd; 316*4c87aefeSPatrick Mooney int error, idtvec; 317*4c87aefeSPatrick Mooney int cpl, dpl, rpl; 318*4c87aefeSPatrick Mooney uint16_t sel, cs; 319*4c87aefeSPatrick Mooney bool ldtseg, codeseg, stackseg, dataseg, conforming; 320*4c87aefeSPatrick Mooney 321*4c87aefeSPatrick Mooney ldtseg = codeseg = stackseg = dataseg = false; 322*4c87aefeSPatrick Mooney switch (segment) { 323*4c87aefeSPatrick Mooney case VM_REG_GUEST_LDTR: 324*4c87aefeSPatrick Mooney ldtseg = true; 325*4c87aefeSPatrick Mooney break; 326*4c87aefeSPatrick Mooney case VM_REG_GUEST_CS: 327*4c87aefeSPatrick Mooney codeseg = true; 328*4c87aefeSPatrick Mooney break; 329*4c87aefeSPatrick Mooney case VM_REG_GUEST_SS: 330*4c87aefeSPatrick Mooney stackseg = true; 331*4c87aefeSPatrick Mooney break; 332*4c87aefeSPatrick Mooney case VM_REG_GUEST_DS: 333*4c87aefeSPatrick Mooney case VM_REG_GUEST_ES: 334*4c87aefeSPatrick Mooney case VM_REG_GUEST_FS: 335*4c87aefeSPatrick Mooney case VM_REG_GUEST_GS: 336*4c87aefeSPatrick Mooney dataseg = true; 337*4c87aefeSPatrick Mooney break; 338*4c87aefeSPatrick Mooney default: 339*4c87aefeSPatrick Mooney assert(0); 340*4c87aefeSPatrick Mooney } 341*4c87aefeSPatrick Mooney 342*4c87aefeSPatrick Mooney /* Get the segment selector */ 343*4c87aefeSPatrick Mooney sel = GETREG(ctx, vcpu, segment); 344*4c87aefeSPatrick Mooney 345*4c87aefeSPatrick Mooney /* LDT selector must point into the GDT */ 346*4c87aefeSPatrick Mooney if (ldtseg && ISLDT(sel)) { 347*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); 348*4c87aefeSPatrick Mooney return (1); 349*4c87aefeSPatrick Mooney } 350*4c87aefeSPatrick Mooney 351*4c87aefeSPatrick Mooney /* Descriptor table limit check */ 352*4c87aefeSPatrick Mooney if (desc_table_limit_check(ctx, vcpu, sel)) { 353*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); 354*4c87aefeSPatrick Mooney return (1); 355*4c87aefeSPatrick Mooney } 356*4c87aefeSPatrick Mooney 357*4c87aefeSPatrick Mooney /* NULL selector */ 358*4c87aefeSPatrick Mooney if (IDXSEL(sel) == 0) { 359*4c87aefeSPatrick Mooney /* Code and stack segment selectors cannot be NULL */ 360*4c87aefeSPatrick Mooney if (codeseg || stackseg) { 361*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); 362*4c87aefeSPatrick Mooney return (1); 363*4c87aefeSPatrick Mooney } 364*4c87aefeSPatrick Mooney seg_desc->base = 0; 365*4c87aefeSPatrick Mooney seg_desc->limit = 0; 366*4c87aefeSPatrick Mooney seg_desc->access = 0x10000; /* unusable */ 367*4c87aefeSPatrick Mooney return (0); 368*4c87aefeSPatrick Mooney } 369*4c87aefeSPatrick Mooney 370*4c87aefeSPatrick Mooney /* Read the descriptor from the GDT/LDT */ 371*4c87aefeSPatrick Mooney sup_paging = ts->paging; 372*4c87aefeSPatrick Mooney sup_paging.cpl = 0; /* implicit supervisor mode */ 373*4c87aefeSPatrick Mooney error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr); 374*4c87aefeSPatrick Mooney if (error || *faultptr) 375*4c87aefeSPatrick Mooney return (error); 376*4c87aefeSPatrick Mooney 377*4c87aefeSPatrick Mooney /* Verify that the descriptor type is compatible with the segment */ 378*4c87aefeSPatrick Mooney if ((ldtseg && !ldt_desc(usd.sd_type)) || 379*4c87aefeSPatrick Mooney (codeseg && !code_desc(usd.sd_type)) || 380*4c87aefeSPatrick Mooney (dataseg && !data_desc(usd.sd_type)) || 381*4c87aefeSPatrick Mooney (stackseg && !stack_desc(usd.sd_type))) { 382*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); 383*4c87aefeSPatrick Mooney return (1); 384*4c87aefeSPatrick Mooney } 385*4c87aefeSPatrick Mooney 386*4c87aefeSPatrick Mooney /* Segment must be marked present */ 387*4c87aefeSPatrick Mooney if (!usd.sd_p) { 388*4c87aefeSPatrick Mooney if (ldtseg) 389*4c87aefeSPatrick Mooney idtvec = IDT_TS; 390*4c87aefeSPatrick Mooney else if (stackseg) 391*4c87aefeSPatrick Mooney idtvec = IDT_SS; 392*4c87aefeSPatrick Mooney else 393*4c87aefeSPatrick Mooney idtvec = IDT_NP; 394*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, idtvec, sel, ts->ext); 395*4c87aefeSPatrick Mooney return (1); 396*4c87aefeSPatrick Mooney } 397*4c87aefeSPatrick Mooney 398*4c87aefeSPatrick Mooney cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS); 399*4c87aefeSPatrick Mooney cpl = cs & SEL_RPL_MASK; 400*4c87aefeSPatrick Mooney rpl = sel & SEL_RPL_MASK; 401*4c87aefeSPatrick Mooney dpl = usd.sd_dpl; 402*4c87aefeSPatrick Mooney 403*4c87aefeSPatrick Mooney if (stackseg && (rpl != cpl || dpl != cpl)) { 404*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); 405*4c87aefeSPatrick Mooney return (1); 406*4c87aefeSPatrick Mooney } 407*4c87aefeSPatrick Mooney 408*4c87aefeSPatrick Mooney if (codeseg) { 409*4c87aefeSPatrick Mooney conforming = (usd.sd_type & 0x4) ? true : false; 410*4c87aefeSPatrick Mooney if ((conforming && (cpl < dpl)) || 411*4c87aefeSPatrick Mooney (!conforming && (cpl != dpl))) { 412*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); 413*4c87aefeSPatrick Mooney return (1); 414*4c87aefeSPatrick Mooney } 415*4c87aefeSPatrick Mooney } 416*4c87aefeSPatrick Mooney 417*4c87aefeSPatrick Mooney if (dataseg) { 418*4c87aefeSPatrick Mooney /* 419*4c87aefeSPatrick Mooney * A data segment is always non-conforming except when it's 420*4c87aefeSPatrick Mooney * descriptor is a readable, conforming code segment. 421*4c87aefeSPatrick Mooney */ 422*4c87aefeSPatrick Mooney if (code_desc(usd.sd_type) && (usd.sd_type & 0x4) != 0) 423*4c87aefeSPatrick Mooney conforming = true; 424*4c87aefeSPatrick Mooney else 425*4c87aefeSPatrick Mooney conforming = false; 426*4c87aefeSPatrick Mooney 427*4c87aefeSPatrick Mooney if (!conforming && (rpl > dpl || cpl > dpl)) { 428*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext); 429*4c87aefeSPatrick Mooney return (1); 430*4c87aefeSPatrick Mooney } 431*4c87aefeSPatrick Mooney } 432*4c87aefeSPatrick Mooney *seg_desc = usd_to_seg_desc(&usd); 433*4c87aefeSPatrick Mooney return (0); 434*4c87aefeSPatrick Mooney } 435*4c87aefeSPatrick Mooney 436*4c87aefeSPatrick Mooney static void 437*4c87aefeSPatrick Mooney tss32_save(struct vmctx *ctx, int vcpu, struct vm_task_switch *task_switch, 438*4c87aefeSPatrick Mooney uint32_t eip, struct tss32 *tss, struct iovec *iov) 439*4c87aefeSPatrick Mooney { 440*4c87aefeSPatrick Mooney 441*4c87aefeSPatrick Mooney /* General purpose registers */ 442*4c87aefeSPatrick Mooney tss->tss_eax = GETREG(ctx, vcpu, VM_REG_GUEST_RAX); 443*4c87aefeSPatrick Mooney tss->tss_ecx = GETREG(ctx, vcpu, VM_REG_GUEST_RCX); 444*4c87aefeSPatrick Mooney tss->tss_edx = GETREG(ctx, vcpu, VM_REG_GUEST_RDX); 445*4c87aefeSPatrick Mooney tss->tss_ebx = GETREG(ctx, vcpu, VM_REG_GUEST_RBX); 446*4c87aefeSPatrick Mooney tss->tss_esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP); 447*4c87aefeSPatrick Mooney tss->tss_ebp = GETREG(ctx, vcpu, VM_REG_GUEST_RBP); 448*4c87aefeSPatrick Mooney tss->tss_esi = GETREG(ctx, vcpu, VM_REG_GUEST_RSI); 449*4c87aefeSPatrick Mooney tss->tss_edi = GETREG(ctx, vcpu, VM_REG_GUEST_RDI); 450*4c87aefeSPatrick Mooney 451*4c87aefeSPatrick Mooney /* Segment selectors */ 452*4c87aefeSPatrick Mooney tss->tss_es = GETREG(ctx, vcpu, VM_REG_GUEST_ES); 453*4c87aefeSPatrick Mooney tss->tss_cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS); 454*4c87aefeSPatrick Mooney tss->tss_ss = GETREG(ctx, vcpu, VM_REG_GUEST_SS); 455*4c87aefeSPatrick Mooney tss->tss_ds = GETREG(ctx, vcpu, VM_REG_GUEST_DS); 456*4c87aefeSPatrick Mooney tss->tss_fs = GETREG(ctx, vcpu, VM_REG_GUEST_FS); 457*4c87aefeSPatrick Mooney tss->tss_gs = GETREG(ctx, vcpu, VM_REG_GUEST_GS); 458*4c87aefeSPatrick Mooney 459*4c87aefeSPatrick Mooney /* eflags and eip */ 460*4c87aefeSPatrick Mooney tss->tss_eflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS); 461*4c87aefeSPatrick Mooney if (task_switch->reason == TSR_IRET) 462*4c87aefeSPatrick Mooney tss->tss_eflags &= ~PSL_NT; 463*4c87aefeSPatrick Mooney tss->tss_eip = eip; 464*4c87aefeSPatrick Mooney 465*4c87aefeSPatrick Mooney /* Copy updated old TSS into guest memory */ 466*4c87aefeSPatrick Mooney vm_copyout(ctx, vcpu, tss, iov, sizeof(struct tss32)); 467*4c87aefeSPatrick Mooney } 468*4c87aefeSPatrick Mooney 469*4c87aefeSPatrick Mooney static void 470*4c87aefeSPatrick Mooney update_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *sd) 471*4c87aefeSPatrick Mooney { 472*4c87aefeSPatrick Mooney int error; 473*4c87aefeSPatrick Mooney 474*4c87aefeSPatrick Mooney error = vm_set_desc(ctx, vcpu, reg, sd->base, sd->limit, sd->access); 475*4c87aefeSPatrick Mooney assert(error == 0); 476*4c87aefeSPatrick Mooney } 477*4c87aefeSPatrick Mooney 478*4c87aefeSPatrick Mooney /* 479*4c87aefeSPatrick Mooney * Update the vcpu registers to reflect the state of the new task. 480*4c87aefeSPatrick Mooney */ 481*4c87aefeSPatrick Mooney static int 482*4c87aefeSPatrick Mooney tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, 483*4c87aefeSPatrick Mooney uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr) 484*4c87aefeSPatrick Mooney { 485*4c87aefeSPatrick Mooney struct seg_desc seg_desc, seg_desc2; 486*4c87aefeSPatrick Mooney uint64_t *pdpte, maxphyaddr, reserved; 487*4c87aefeSPatrick Mooney uint32_t eflags; 488*4c87aefeSPatrick Mooney int error, i; 489*4c87aefeSPatrick Mooney bool nested; 490*4c87aefeSPatrick Mooney 491*4c87aefeSPatrick Mooney nested = false; 492*4c87aefeSPatrick Mooney if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) { 493*4c87aefeSPatrick Mooney tss->tss_link = ot_sel; 494*4c87aefeSPatrick Mooney nested = true; 495*4c87aefeSPatrick Mooney } 496*4c87aefeSPatrick Mooney 497*4c87aefeSPatrick Mooney eflags = tss->tss_eflags; 498*4c87aefeSPatrick Mooney if (nested) 499*4c87aefeSPatrick Mooney eflags |= PSL_NT; 500*4c87aefeSPatrick Mooney 501*4c87aefeSPatrick Mooney /* LDTR */ 502*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt); 503*4c87aefeSPatrick Mooney 504*4c87aefeSPatrick Mooney /* PBDR */ 505*4c87aefeSPatrick Mooney if (ts->paging.paging_mode != PAGING_MODE_FLAT) { 506*4c87aefeSPatrick Mooney if (ts->paging.paging_mode == PAGING_MODE_PAE) { 507*4c87aefeSPatrick Mooney /* 508*4c87aefeSPatrick Mooney * XXX Assuming 36-bit MAXPHYADDR. 509*4c87aefeSPatrick Mooney */ 510*4c87aefeSPatrick Mooney maxphyaddr = (1UL << 36) - 1; 511*4c87aefeSPatrick Mooney pdpte = paddr_guest2host(ctx, tss->tss_cr3 & ~0x1f, 32); 512*4c87aefeSPatrick Mooney for (i = 0; i < 4; i++) { 513*4c87aefeSPatrick Mooney /* Check reserved bits if the PDPTE is valid */ 514*4c87aefeSPatrick Mooney if (!(pdpte[i] & 0x1)) 515*4c87aefeSPatrick Mooney continue; 516*4c87aefeSPatrick Mooney /* 517*4c87aefeSPatrick Mooney * Bits 2:1, 8:5 and bits above the processor's 518*4c87aefeSPatrick Mooney * maximum physical address are reserved. 519*4c87aefeSPatrick Mooney */ 520*4c87aefeSPatrick Mooney reserved = ~maxphyaddr | 0x1E6; 521*4c87aefeSPatrick Mooney if (pdpte[i] & reserved) { 522*4c87aefeSPatrick Mooney vm_inject_gp(ctx, vcpu); 523*4c87aefeSPatrick Mooney return (1); 524*4c87aefeSPatrick Mooney } 525*4c87aefeSPatrick Mooney } 526*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]); 527*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]); 528*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]); 529*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]); 530*4c87aefeSPatrick Mooney } 531*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_CR3, tss->tss_cr3); 532*4c87aefeSPatrick Mooney ts->paging.cr3 = tss->tss_cr3; 533*4c87aefeSPatrick Mooney } 534*4c87aefeSPatrick Mooney 535*4c87aefeSPatrick Mooney /* eflags and eip */ 536*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS, eflags); 537*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_RIP, tss->tss_eip); 538*4c87aefeSPatrick Mooney 539*4c87aefeSPatrick Mooney /* General purpose registers */ 540*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_RAX, tss->tss_eax); 541*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_RCX, tss->tss_ecx); 542*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_RDX, tss->tss_edx); 543*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_RBX, tss->tss_ebx); 544*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_RSP, tss->tss_esp); 545*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_RBP, tss->tss_ebp); 546*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_RSI, tss->tss_esi); 547*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_RDI, tss->tss_edi); 548*4c87aefeSPatrick Mooney 549*4c87aefeSPatrick Mooney /* Segment selectors */ 550*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_ES, tss->tss_es); 551*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_CS, tss->tss_cs); 552*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_SS, tss->tss_ss); 553*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_DS, tss->tss_ds); 554*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_FS, tss->tss_fs); 555*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_GS, tss->tss_gs); 556*4c87aefeSPatrick Mooney 557*4c87aefeSPatrick Mooney /* 558*4c87aefeSPatrick Mooney * If this is a nested task then write out the new TSS to update 559*4c87aefeSPatrick Mooney * the previous link field. 560*4c87aefeSPatrick Mooney */ 561*4c87aefeSPatrick Mooney if (nested) 562*4c87aefeSPatrick Mooney vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss)); 563*4c87aefeSPatrick Mooney 564*4c87aefeSPatrick Mooney /* Validate segment descriptors */ 565*4c87aefeSPatrick Mooney error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc, 566*4c87aefeSPatrick Mooney faultptr); 567*4c87aefeSPatrick Mooney if (error || *faultptr) 568*4c87aefeSPatrick Mooney return (error); 569*4c87aefeSPatrick Mooney update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc); 570*4c87aefeSPatrick Mooney 571*4c87aefeSPatrick Mooney /* 572*4c87aefeSPatrick Mooney * Section "Checks on Guest Segment Registers", Intel SDM, Vol 3. 573*4c87aefeSPatrick Mooney * 574*4c87aefeSPatrick Mooney * The SS and CS attribute checks on VM-entry are inter-dependent so 575*4c87aefeSPatrick Mooney * we need to make sure that both segments are valid before updating 576*4c87aefeSPatrick Mooney * either of them. This ensures that the VMCS state can pass the 577*4c87aefeSPatrick Mooney * VM-entry checks so the guest can handle any exception injected 578*4c87aefeSPatrick Mooney * during task switch emulation. 579*4c87aefeSPatrick Mooney */ 580*4c87aefeSPatrick Mooney error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc, 581*4c87aefeSPatrick Mooney faultptr); 582*4c87aefeSPatrick Mooney if (error || *faultptr) 583*4c87aefeSPatrick Mooney return (error); 584*4c87aefeSPatrick Mooney 585*4c87aefeSPatrick Mooney error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2, 586*4c87aefeSPatrick Mooney faultptr); 587*4c87aefeSPatrick Mooney if (error || *faultptr) 588*4c87aefeSPatrick Mooney return (error); 589*4c87aefeSPatrick Mooney update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc); 590*4c87aefeSPatrick Mooney update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2); 591*4c87aefeSPatrick Mooney ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK; 592*4c87aefeSPatrick Mooney 593*4c87aefeSPatrick Mooney error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc, 594*4c87aefeSPatrick Mooney faultptr); 595*4c87aefeSPatrick Mooney if (error || *faultptr) 596*4c87aefeSPatrick Mooney return (error); 597*4c87aefeSPatrick Mooney update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc); 598*4c87aefeSPatrick Mooney 599*4c87aefeSPatrick Mooney error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc, 600*4c87aefeSPatrick Mooney faultptr); 601*4c87aefeSPatrick Mooney if (error || *faultptr) 602*4c87aefeSPatrick Mooney return (error); 603*4c87aefeSPatrick Mooney update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc); 604*4c87aefeSPatrick Mooney 605*4c87aefeSPatrick Mooney error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc, 606*4c87aefeSPatrick Mooney faultptr); 607*4c87aefeSPatrick Mooney if (error || *faultptr) 608*4c87aefeSPatrick Mooney return (error); 609*4c87aefeSPatrick Mooney update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc); 610*4c87aefeSPatrick Mooney 611*4c87aefeSPatrick Mooney error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc, 612*4c87aefeSPatrick Mooney faultptr); 613*4c87aefeSPatrick Mooney if (error || *faultptr) 614*4c87aefeSPatrick Mooney return (error); 615*4c87aefeSPatrick Mooney update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc); 616*4c87aefeSPatrick Mooney 617*4c87aefeSPatrick Mooney return (0); 618*4c87aefeSPatrick Mooney } 619*4c87aefeSPatrick Mooney 620*4c87aefeSPatrick Mooney /* 621*4c87aefeSPatrick Mooney * Push an error code on the stack of the new task. This is needed if the 622*4c87aefeSPatrick Mooney * task switch was triggered by a hardware exception that causes an error 623*4c87aefeSPatrick Mooney * code to be saved (e.g. #PF). 624*4c87aefeSPatrick Mooney */ 625*4c87aefeSPatrick Mooney static int 626*4c87aefeSPatrick Mooney push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, 627*4c87aefeSPatrick Mooney int task_type, uint32_t errcode, int *faultptr) 628*4c87aefeSPatrick Mooney { 629*4c87aefeSPatrick Mooney struct iovec iov[2]; 630*4c87aefeSPatrick Mooney struct seg_desc seg_desc; 631*4c87aefeSPatrick Mooney int stacksize, bytes, error; 632*4c87aefeSPatrick Mooney uint64_t gla, cr0, rflags; 633*4c87aefeSPatrick Mooney uint32_t esp; 634*4c87aefeSPatrick Mooney uint16_t stacksel; 635*4c87aefeSPatrick Mooney 636*4c87aefeSPatrick Mooney *faultptr = 0; 637*4c87aefeSPatrick Mooney 638*4c87aefeSPatrick Mooney cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0); 639*4c87aefeSPatrick Mooney rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS); 640*4c87aefeSPatrick Mooney stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS); 641*4c87aefeSPatrick Mooney 642*4c87aefeSPatrick Mooney error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc.base, 643*4c87aefeSPatrick Mooney &seg_desc.limit, &seg_desc.access); 644*4c87aefeSPatrick Mooney assert(error == 0); 645*4c87aefeSPatrick Mooney 646*4c87aefeSPatrick Mooney /* 647*4c87aefeSPatrick Mooney * Section "Error Code" in the Intel SDM vol 3: the error code is 648*4c87aefeSPatrick Mooney * pushed on the stack as a doubleword or word (depending on the 649*4c87aefeSPatrick Mooney * default interrupt, trap or task gate size). 650*4c87aefeSPatrick Mooney */ 651*4c87aefeSPatrick Mooney if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS) 652*4c87aefeSPatrick Mooney bytes = 4; 653*4c87aefeSPatrick Mooney else 654*4c87aefeSPatrick Mooney bytes = 2; 655*4c87aefeSPatrick Mooney 656*4c87aefeSPatrick Mooney /* 657*4c87aefeSPatrick Mooney * PUSH instruction from Intel SDM vol 2: the 'B' flag in the 658*4c87aefeSPatrick Mooney * stack-segment descriptor determines the size of the stack 659*4c87aefeSPatrick Mooney * pointer outside of 64-bit mode. 660*4c87aefeSPatrick Mooney */ 661*4c87aefeSPatrick Mooney if (SEG_DESC_DEF32(seg_desc.access)) 662*4c87aefeSPatrick Mooney stacksize = 4; 663*4c87aefeSPatrick Mooney else 664*4c87aefeSPatrick Mooney stacksize = 2; 665*4c87aefeSPatrick Mooney 666*4c87aefeSPatrick Mooney esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP); 667*4c87aefeSPatrick Mooney esp -= bytes; 668*4c87aefeSPatrick Mooney 669*4c87aefeSPatrick Mooney if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, 670*4c87aefeSPatrick Mooney &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) { 671*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_SS, stacksel, 1); 672*4c87aefeSPatrick Mooney *faultptr = 1; 673*4c87aefeSPatrick Mooney return (0); 674*4c87aefeSPatrick Mooney } 675*4c87aefeSPatrick Mooney 676*4c87aefeSPatrick Mooney if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) { 677*4c87aefeSPatrick Mooney vm_inject_ac(ctx, vcpu, 1); 678*4c87aefeSPatrick Mooney *faultptr = 1; 679*4c87aefeSPatrick Mooney return (0); 680*4c87aefeSPatrick Mooney } 681*4c87aefeSPatrick Mooney 682*4c87aefeSPatrick Mooney error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE, 683*4c87aefeSPatrick Mooney iov, nitems(iov), faultptr); 684*4c87aefeSPatrick Mooney if (error || *faultptr) 685*4c87aefeSPatrick Mooney return (error); 686*4c87aefeSPatrick Mooney 687*4c87aefeSPatrick Mooney vm_copyout(ctx, vcpu, &errcode, iov, bytes); 688*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_RSP, esp); 689*4c87aefeSPatrick Mooney return (0); 690*4c87aefeSPatrick Mooney } 691*4c87aefeSPatrick Mooney 692*4c87aefeSPatrick Mooney /* 693*4c87aefeSPatrick Mooney * Evaluate return value from helper functions and potentially return to 694*4c87aefeSPatrick Mooney * the VM run loop. 695*4c87aefeSPatrick Mooney */ 696*4c87aefeSPatrick Mooney #define CHKERR(error,fault) \ 697*4c87aefeSPatrick Mooney do { \ 698*4c87aefeSPatrick Mooney assert((error == 0) || (error == EFAULT)); \ 699*4c87aefeSPatrick Mooney if (error) \ 700*4c87aefeSPatrick Mooney return (VMEXIT_ABORT); \ 701*4c87aefeSPatrick Mooney else if (fault) \ 702*4c87aefeSPatrick Mooney return (VMEXIT_CONTINUE); \ 703*4c87aefeSPatrick Mooney } while (0) 704*4c87aefeSPatrick Mooney 705*4c87aefeSPatrick Mooney int 706*4c87aefeSPatrick Mooney vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) 707*4c87aefeSPatrick Mooney { 708*4c87aefeSPatrick Mooney struct seg_desc nt; 709*4c87aefeSPatrick Mooney struct tss32 oldtss, newtss; 710*4c87aefeSPatrick Mooney struct vm_task_switch *task_switch; 711*4c87aefeSPatrick Mooney struct vm_guest_paging *paging, sup_paging; 712*4c87aefeSPatrick Mooney struct user_segment_descriptor nt_desc, ot_desc; 713*4c87aefeSPatrick Mooney struct iovec nt_iov[2], ot_iov[2]; 714*4c87aefeSPatrick Mooney uint64_t cr0, ot_base; 715*4c87aefeSPatrick Mooney uint32_t eip, ot_lim, access; 716*4c87aefeSPatrick Mooney int error, ext, fault, minlimit, nt_type, ot_type, vcpu; 717*4c87aefeSPatrick Mooney enum task_switch_reason reason; 718*4c87aefeSPatrick Mooney uint16_t nt_sel, ot_sel; 719*4c87aefeSPatrick Mooney 720*4c87aefeSPatrick Mooney task_switch = &vmexit->u.task_switch; 721*4c87aefeSPatrick Mooney nt_sel = task_switch->tsssel; 722*4c87aefeSPatrick Mooney ext = vmexit->u.task_switch.ext; 723*4c87aefeSPatrick Mooney reason = vmexit->u.task_switch.reason; 724*4c87aefeSPatrick Mooney paging = &vmexit->u.task_switch.paging; 725*4c87aefeSPatrick Mooney vcpu = *pvcpu; 726*4c87aefeSPatrick Mooney 727*4c87aefeSPatrick Mooney assert(paging->cpu_mode == CPU_MODE_PROTECTED); 728*4c87aefeSPatrick Mooney 729*4c87aefeSPatrick Mooney /* 730*4c87aefeSPatrick Mooney * Calculate the instruction pointer to store in the old TSS. 731*4c87aefeSPatrick Mooney */ 732*4c87aefeSPatrick Mooney eip = vmexit->rip + vmexit->inst_length; 733*4c87aefeSPatrick Mooney 734*4c87aefeSPatrick Mooney /* 735*4c87aefeSPatrick Mooney * Section 4.6, "Access Rights" in Intel SDM Vol 3. 736*4c87aefeSPatrick Mooney * The following page table accesses are implicitly supervisor mode: 737*4c87aefeSPatrick Mooney * - accesses to GDT or LDT to load segment descriptors 738*4c87aefeSPatrick Mooney * - accesses to the task state segment during task switch 739*4c87aefeSPatrick Mooney */ 740*4c87aefeSPatrick Mooney sup_paging = *paging; 741*4c87aefeSPatrick Mooney sup_paging.cpl = 0; /* implicit supervisor mode */ 742*4c87aefeSPatrick Mooney 743*4c87aefeSPatrick Mooney /* Fetch the new TSS descriptor */ 744*4c87aefeSPatrick Mooney error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc, 745*4c87aefeSPatrick Mooney &fault); 746*4c87aefeSPatrick Mooney CHKERR(error, fault); 747*4c87aefeSPatrick Mooney 748*4c87aefeSPatrick Mooney nt = usd_to_seg_desc(&nt_desc); 749*4c87aefeSPatrick Mooney 750*4c87aefeSPatrick Mooney /* Verify the type of the new TSS */ 751*4c87aefeSPatrick Mooney nt_type = SEG_DESC_TYPE(nt.access); 752*4c87aefeSPatrick Mooney if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS && 753*4c87aefeSPatrick Mooney nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) { 754*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext); 755*4c87aefeSPatrick Mooney goto done; 756*4c87aefeSPatrick Mooney } 757*4c87aefeSPatrick Mooney 758*4c87aefeSPatrick Mooney /* TSS descriptor must have present bit set */ 759*4c87aefeSPatrick Mooney if (!SEG_DESC_PRESENT(nt.access)) { 760*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_NP, nt_sel, ext); 761*4c87aefeSPatrick Mooney goto done; 762*4c87aefeSPatrick Mooney } 763*4c87aefeSPatrick Mooney 764*4c87aefeSPatrick Mooney /* 765*4c87aefeSPatrick Mooney * TSS must have a minimum length of 104 bytes for a 32-bit TSS and 766*4c87aefeSPatrick Mooney * 44 bytes for a 16-bit TSS. 767*4c87aefeSPatrick Mooney */ 768*4c87aefeSPatrick Mooney if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS) 769*4c87aefeSPatrick Mooney minlimit = 104 - 1; 770*4c87aefeSPatrick Mooney else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) 771*4c87aefeSPatrick Mooney minlimit = 44 - 1; 772*4c87aefeSPatrick Mooney else 773*4c87aefeSPatrick Mooney minlimit = 0; 774*4c87aefeSPatrick Mooney 775*4c87aefeSPatrick Mooney assert(minlimit > 0); 776*4c87aefeSPatrick Mooney if (nt.limit < minlimit) { 777*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext); 778*4c87aefeSPatrick Mooney goto done; 779*4c87aefeSPatrick Mooney } 780*4c87aefeSPatrick Mooney 781*4c87aefeSPatrick Mooney /* TSS must be busy if task switch is due to IRET */ 782*4c87aefeSPatrick Mooney if (reason == TSR_IRET && !TSS_BUSY(nt_type)) { 783*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext); 784*4c87aefeSPatrick Mooney goto done; 785*4c87aefeSPatrick Mooney } 786*4c87aefeSPatrick Mooney 787*4c87aefeSPatrick Mooney /* 788*4c87aefeSPatrick Mooney * TSS must be available (not busy) if task switch reason is 789*4c87aefeSPatrick Mooney * CALL, JMP, exception or interrupt. 790*4c87aefeSPatrick Mooney */ 791*4c87aefeSPatrick Mooney if (reason != TSR_IRET && TSS_BUSY(nt_type)) { 792*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_GP, nt_sel, ext); 793*4c87aefeSPatrick Mooney goto done; 794*4c87aefeSPatrick Mooney } 795*4c87aefeSPatrick Mooney 796*4c87aefeSPatrick Mooney /* Fetch the new TSS */ 797*4c87aefeSPatrick Mooney error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1, 798*4c87aefeSPatrick Mooney PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault); 799*4c87aefeSPatrick Mooney CHKERR(error, fault); 800*4c87aefeSPatrick Mooney vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1); 801*4c87aefeSPatrick Mooney 802*4c87aefeSPatrick Mooney /* Get the old TSS selector from the guest's task register */ 803*4c87aefeSPatrick Mooney ot_sel = GETREG(ctx, vcpu, VM_REG_GUEST_TR); 804*4c87aefeSPatrick Mooney if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) { 805*4c87aefeSPatrick Mooney /* 806*4c87aefeSPatrick Mooney * This might happen if a task switch was attempted without 807*4c87aefeSPatrick Mooney * ever loading the task register with LTR. In this case the 808*4c87aefeSPatrick Mooney * TR would contain the values from power-on: 809*4c87aefeSPatrick Mooney * (sel = 0, base = 0, limit = 0xffff). 810*4c87aefeSPatrick Mooney */ 811*4c87aefeSPatrick Mooney sel_exception(ctx, vcpu, IDT_TS, ot_sel, task_switch->ext); 812*4c87aefeSPatrick Mooney goto done; 813*4c87aefeSPatrick Mooney } 814*4c87aefeSPatrick Mooney 815*4c87aefeSPatrick Mooney /* Get the old TSS base and limit from the guest's task register */ 816*4c87aefeSPatrick Mooney error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim, 817*4c87aefeSPatrick Mooney &access); 818*4c87aefeSPatrick Mooney assert(error == 0); 819*4c87aefeSPatrick Mooney assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access)); 820*4c87aefeSPatrick Mooney ot_type = SEG_DESC_TYPE(access); 821*4c87aefeSPatrick Mooney assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY); 822*4c87aefeSPatrick Mooney 823*4c87aefeSPatrick Mooney /* Fetch the old TSS descriptor */ 824*4c87aefeSPatrick Mooney error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc, 825*4c87aefeSPatrick Mooney &fault); 826*4c87aefeSPatrick Mooney CHKERR(error, fault); 827*4c87aefeSPatrick Mooney 828*4c87aefeSPatrick Mooney /* Get the old TSS */ 829*4c87aefeSPatrick Mooney error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1, 830*4c87aefeSPatrick Mooney PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault); 831*4c87aefeSPatrick Mooney CHKERR(error, fault); 832*4c87aefeSPatrick Mooney vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1); 833*4c87aefeSPatrick Mooney 834*4c87aefeSPatrick Mooney /* 835*4c87aefeSPatrick Mooney * Clear the busy bit in the old TSS descriptor if the task switch 836*4c87aefeSPatrick Mooney * due to an IRET or JMP instruction. 837*4c87aefeSPatrick Mooney */ 838*4c87aefeSPatrick Mooney if (reason == TSR_IRET || reason == TSR_JMP) { 839*4c87aefeSPatrick Mooney ot_desc.sd_type &= ~0x2; 840*4c87aefeSPatrick Mooney error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel, 841*4c87aefeSPatrick Mooney &ot_desc, &fault); 842*4c87aefeSPatrick Mooney CHKERR(error, fault); 843*4c87aefeSPatrick Mooney } 844*4c87aefeSPatrick Mooney 845*4c87aefeSPatrick Mooney if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) { 846*4c87aefeSPatrick Mooney fprintf(stderr, "Task switch to 16-bit TSS not supported\n"); 847*4c87aefeSPatrick Mooney return (VMEXIT_ABORT); 848*4c87aefeSPatrick Mooney } 849*4c87aefeSPatrick Mooney 850*4c87aefeSPatrick Mooney /* Save processor state in old TSS */ 851*4c87aefeSPatrick Mooney tss32_save(ctx, vcpu, task_switch, eip, &oldtss, ot_iov); 852*4c87aefeSPatrick Mooney 853*4c87aefeSPatrick Mooney /* 854*4c87aefeSPatrick Mooney * If the task switch was triggered for any reason other than IRET 855*4c87aefeSPatrick Mooney * then set the busy bit in the new TSS descriptor. 856*4c87aefeSPatrick Mooney */ 857*4c87aefeSPatrick Mooney if (reason != TSR_IRET) { 858*4c87aefeSPatrick Mooney nt_desc.sd_type |= 0x2; 859*4c87aefeSPatrick Mooney error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel, 860*4c87aefeSPatrick Mooney &nt_desc, &fault); 861*4c87aefeSPatrick Mooney CHKERR(error, fault); 862*4c87aefeSPatrick Mooney } 863*4c87aefeSPatrick Mooney 864*4c87aefeSPatrick Mooney /* Update task register to point at the new TSS */ 865*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_TR, nt_sel); 866*4c87aefeSPatrick Mooney 867*4c87aefeSPatrick Mooney /* Update the hidden descriptor state of the task register */ 868*4c87aefeSPatrick Mooney nt = usd_to_seg_desc(&nt_desc); 869*4c87aefeSPatrick Mooney update_seg_desc(ctx, vcpu, VM_REG_GUEST_TR, &nt); 870*4c87aefeSPatrick Mooney 871*4c87aefeSPatrick Mooney /* Set CR0.TS */ 872*4c87aefeSPatrick Mooney cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0); 873*4c87aefeSPatrick Mooney SETREG(ctx, vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS); 874*4c87aefeSPatrick Mooney 875*4c87aefeSPatrick Mooney /* 876*4c87aefeSPatrick Mooney * We are now committed to the task switch. Any exceptions encountered 877*4c87aefeSPatrick Mooney * after this point will be handled in the context of the new task and 878*4c87aefeSPatrick Mooney * the saved instruction pointer will belong to the new task. 879*4c87aefeSPatrick Mooney */ 880*4c87aefeSPatrick Mooney error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, newtss.tss_eip); 881*4c87aefeSPatrick Mooney assert(error == 0); 882*4c87aefeSPatrick Mooney 883*4c87aefeSPatrick Mooney /* Load processor state from new TSS */ 884*4c87aefeSPatrick Mooney error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov, 885*4c87aefeSPatrick Mooney &fault); 886*4c87aefeSPatrick Mooney CHKERR(error, fault); 887*4c87aefeSPatrick Mooney 888*4c87aefeSPatrick Mooney /* 889*4c87aefeSPatrick Mooney * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception 890*4c87aefeSPatrick Mooney * caused an error code to be generated, this error code is copied 891*4c87aefeSPatrick Mooney * to the stack of the new task. 892*4c87aefeSPatrick Mooney */ 893*4c87aefeSPatrick Mooney if (task_switch->errcode_valid) { 894*4c87aefeSPatrick Mooney assert(task_switch->ext); 895*4c87aefeSPatrick Mooney assert(task_switch->reason == TSR_IDT_GATE); 896*4c87aefeSPatrick Mooney error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type, 897*4c87aefeSPatrick Mooney task_switch->errcode, &fault); 898*4c87aefeSPatrick Mooney CHKERR(error, fault); 899*4c87aefeSPatrick Mooney } 900*4c87aefeSPatrick Mooney 901*4c87aefeSPatrick Mooney /* 902*4c87aefeSPatrick Mooney * Treatment of virtual-NMI blocking if NMI is delivered through 903*4c87aefeSPatrick Mooney * a task gate. 904*4c87aefeSPatrick Mooney * 905*4c87aefeSPatrick Mooney * Section "Architectural State Before A VM Exit", Intel SDM, Vol3: 906*4c87aefeSPatrick Mooney * If the virtual NMIs VM-execution control is 1, VM entry injects 907*4c87aefeSPatrick Mooney * an NMI, and delivery of the NMI causes a task switch that causes 908*4c87aefeSPatrick Mooney * a VM exit, virtual-NMI blocking is in effect before the VM exit 909*4c87aefeSPatrick Mooney * commences. 910*4c87aefeSPatrick Mooney * 911*4c87aefeSPatrick Mooney * Thus, virtual-NMI blocking is in effect at the time of the task 912*4c87aefeSPatrick Mooney * switch VM exit. 913*4c87aefeSPatrick Mooney */ 914*4c87aefeSPatrick Mooney 915*4c87aefeSPatrick Mooney /* 916*4c87aefeSPatrick Mooney * Treatment of virtual-NMI unblocking on IRET from NMI handler task. 917*4c87aefeSPatrick Mooney * 918*4c87aefeSPatrick Mooney * Section "Changes to Instruction Behavior in VMX Non-Root Operation" 919*4c87aefeSPatrick Mooney * If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking. 920*4c87aefeSPatrick Mooney * This unblocking of virtual-NMI occurs even if IRET causes a fault. 921*4c87aefeSPatrick Mooney * 922*4c87aefeSPatrick Mooney * Thus, virtual-NMI blocking is cleared at the time of the task switch 923*4c87aefeSPatrick Mooney * VM exit. 924*4c87aefeSPatrick Mooney */ 925*4c87aefeSPatrick Mooney 926*4c87aefeSPatrick Mooney /* 927*4c87aefeSPatrick Mooney * If the task switch was triggered by an event delivered through 928*4c87aefeSPatrick Mooney * the IDT then extinguish the pending event from the vcpu's 929*4c87aefeSPatrick Mooney * exitintinfo. 930*4c87aefeSPatrick Mooney */ 931*4c87aefeSPatrick Mooney if (task_switch->reason == TSR_IDT_GATE) { 932*4c87aefeSPatrick Mooney error = vm_set_intinfo(ctx, vcpu, 0); 933*4c87aefeSPatrick Mooney assert(error == 0); 934*4c87aefeSPatrick Mooney } 935*4c87aefeSPatrick Mooney 936*4c87aefeSPatrick Mooney /* 937*4c87aefeSPatrick Mooney * XXX should inject debug exception if 'T' bit is 1 938*4c87aefeSPatrick Mooney */ 939*4c87aefeSPatrick Mooney done: 940*4c87aefeSPatrick Mooney return (VMEXIT_CONTINUE); 941*4c87aefeSPatrick Mooney } 942