1/*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2011 NetApp, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 *
28 * $FreeBSD$
29 */
30
31#include "opt_bhyve_snapshot.h"
32#include "opt_ddb.h"
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD$");
36
37#include <sys/param.h>
38#include <sys/sysctl.h>
39#include <sys/systm.h>
40#include <sys/pcpu.h>
41
42#include <vm/vm.h>
43#include <vm/pmap.h>
44
45#include <machine/segments.h>
46#include <machine/vmm.h>
47#include <machine/vmm_snapshot.h>
48#include "vmm_host.h"
49#include "vmx_cpufunc.h"
50#include "vmcs.h"
51#include "ept.h"
52#include "vmx.h"
53
54#ifdef DDB
55#include <ddb/ddb.h>
56#endif
57
58SYSCTL_DECL(_hw_vmm_vmx);
59
60static int no_flush_rsb;
61SYSCTL_INT(_hw_vmm_vmx, OID_AUTO, no_flush_rsb, CTLFLAG_RW,
62    &no_flush_rsb, 0, "Do not flush RSB upon vmexit");
63
64static uint64_t
65vmcs_fix_regval(uint32_t encoding, uint64_t val)
66{
67
68	switch (encoding) {
69	case VMCS_GUEST_CR0:
70		val = vmx_fix_cr0(val);
71		break;
72	case VMCS_GUEST_CR4:
73		val = vmx_fix_cr4(val);
74		break;
75	default:
76		break;
77	}
78	return (val);
79}
80
81static uint32_t
82vmcs_field_encoding(int ident)
83{
84	switch (ident) {
85	case VM_REG_GUEST_CR0:
86		return (VMCS_GUEST_CR0);
87	case VM_REG_GUEST_CR3:
88		return (VMCS_GUEST_CR3);
89	case VM_REG_GUEST_CR4:
90		return (VMCS_GUEST_CR4);
91	case VM_REG_GUEST_DR7:
92		return (VMCS_GUEST_DR7);
93	case VM_REG_GUEST_RSP:
94		return (VMCS_GUEST_RSP);
95	case VM_REG_GUEST_RIP:
96		return (VMCS_GUEST_RIP);
97	case VM_REG_GUEST_RFLAGS:
98		return (VMCS_GUEST_RFLAGS);
99	case VM_REG_GUEST_ES:
100		return (VMCS_GUEST_ES_SELECTOR);
101	case VM_REG_GUEST_CS:
102		return (VMCS_GUEST_CS_SELECTOR);
103	case VM_REG_GUEST_SS:
104		return (VMCS_GUEST_SS_SELECTOR);
105	case VM_REG_GUEST_DS:
106		return (VMCS_GUEST_DS_SELECTOR);
107	case VM_REG_GUEST_FS:
108		return (VMCS_GUEST_FS_SELECTOR);
109	case VM_REG_GUEST_GS:
110		return (VMCS_GUEST_GS_SELECTOR);
111	case VM_REG_GUEST_TR:
112		return (VMCS_GUEST_TR_SELECTOR);
113	case VM_REG_GUEST_LDTR:
114		return (VMCS_GUEST_LDTR_SELECTOR);
115	case VM_REG_GUEST_EFER:
116		return (VMCS_GUEST_IA32_EFER);
117	case VM_REG_GUEST_PDPTE0:
118		return (VMCS_GUEST_PDPTE0);
119	case VM_REG_GUEST_PDPTE1:
120		return (VMCS_GUEST_PDPTE1);
121	case VM_REG_GUEST_PDPTE2:
122		return (VMCS_GUEST_PDPTE2);
123	case VM_REG_GUEST_PDPTE3:
124		return (VMCS_GUEST_PDPTE3);
125	case VM_REG_GUEST_ENTRY_INST_LENGTH:
126		return (VMCS_ENTRY_INST_LENGTH);
127	default:
128		return (-1);
129	}
130
131}
132
133static int
134vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
135{
136
137	switch (seg) {
138	case VM_REG_GUEST_ES:
139		*base = VMCS_GUEST_ES_BASE;
140		*lim = VMCS_GUEST_ES_LIMIT;
141		*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
142		break;
143	case VM_REG_GUEST_CS:
144		*base = VMCS_GUEST_CS_BASE;
145		*lim = VMCS_GUEST_CS_LIMIT;
146		*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
147		break;
148	case VM_REG_GUEST_SS:
149		*base = VMCS_GUEST_SS_BASE;
150		*lim = VMCS_GUEST_SS_LIMIT;
151		*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
152		break;
153	case VM_REG_GUEST_DS:
154		*base = VMCS_GUEST_DS_BASE;
155		*lim = VMCS_GUEST_DS_LIMIT;
156		*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
157		break;
158	case VM_REG_GUEST_FS:
159		*base = VMCS_GUEST_FS_BASE;
160		*lim = VMCS_GUEST_FS_LIMIT;
161		*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
162		break;
163	case VM_REG_GUEST_GS:
164		*base = VMCS_GUEST_GS_BASE;
165		*lim = VMCS_GUEST_GS_LIMIT;
166		*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
167		break;
168	case VM_REG_GUEST_TR:
169		*base = VMCS_GUEST_TR_BASE;
170		*lim = VMCS_GUEST_TR_LIMIT;
171		*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
172		break;
173	case VM_REG_GUEST_LDTR:
174		*base = VMCS_GUEST_LDTR_BASE;
175		*lim = VMCS_GUEST_LDTR_LIMIT;
176		*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
177		break;
178	case VM_REG_GUEST_IDTR:
179		*base = VMCS_GUEST_IDTR_BASE;
180		*lim = VMCS_GUEST_IDTR_LIMIT;
181		*acc = VMCS_INVALID_ENCODING;
182		break;
183	case VM_REG_GUEST_GDTR:
184		*base = VMCS_GUEST_GDTR_BASE;
185		*lim = VMCS_GUEST_GDTR_LIMIT;
186		*acc = VMCS_INVALID_ENCODING;
187		break;
188	default:
189		return (EINVAL);
190	}
191
192	return (0);
193}
194
195int
196vmcs_getreg(struct vmcs *vmcs, int running, int ident, uint64_t *retval)
197{
198	int error;
199	uint32_t encoding;
200
201	/*
202	 * If we need to get at vmx-specific state in the VMCS we can bypass
203	 * the translation of 'ident' to 'encoding' by simply setting the
204	 * sign bit. As it so happens the upper 16 bits are reserved (i.e
205	 * set to 0) in the encodings for the VMCS so we are free to use the
206	 * sign bit.
207	 */
208	if (ident < 0)
209		encoding = ident & 0x7fffffff;
210	else
211		encoding = vmcs_field_encoding(ident);
212
213	if (encoding == (uint32_t)-1)
214		return (EINVAL);
215
216	if (!running)
217		VMPTRLD(vmcs);
218
219	error = vmread(encoding, retval);
220
221	if (!running)
222		VMCLEAR(vmcs);
223
224	return (error);
225}
226
227int
228vmcs_setreg(struct vmcs *vmcs, int running, int ident, uint64_t val)
229{
230	int error;
231	uint32_t encoding;
232
233	if (ident < 0)
234		encoding = ident & 0x7fffffff;
235	else
236		encoding = vmcs_field_encoding(ident);
237
238	if (encoding == (uint32_t)-1)
239		return (EINVAL);
240
241	val = vmcs_fix_regval(encoding, val);
242
243	if (!running)
244		VMPTRLD(vmcs);
245
246	error = vmwrite(encoding, val);
247
248	if (!running)
249		VMCLEAR(vmcs);
250
251	return (error);
252}
253
254int
255vmcs_setdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
256{
257	int error;
258	uint32_t base, limit, access;
259
260	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
261	if (error != 0)
262		panic("vmcs_setdesc: invalid segment register %d", seg);
263
264	if (!running)
265		VMPTRLD(vmcs);
266	if ((error = vmwrite(base, desc->base)) != 0)
267		goto done;
268
269	if ((error = vmwrite(limit, desc->limit)) != 0)
270		goto done;
271
272	if (access != VMCS_INVALID_ENCODING) {
273		if ((error = vmwrite(access, desc->access)) != 0)
274			goto done;
275	}
276done:
277	if (!running)
278		VMCLEAR(vmcs);
279	return (error);
280}
281
282int
283vmcs_getdesc(struct vmcs *vmcs, int running, int seg, struct seg_desc *desc)
284{
285	int error;
286	uint32_t base, limit, access;
287	uint64_t u64;
288
289	error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
290	if (error != 0)
291		panic("vmcs_getdesc: invalid segment register %d", seg);
292
293	if (!running)
294		VMPTRLD(vmcs);
295	if ((error = vmread(base, &u64)) != 0)
296		goto done;
297	desc->base = u64;
298
299	if ((error = vmread(limit, &u64)) != 0)
300		goto done;
301	desc->limit = u64;
302
303	if (access != VMCS_INVALID_ENCODING) {
304		if ((error = vmread(access, &u64)) != 0)
305			goto done;
306		desc->access = u64;
307	}
308done:
309	if (!running)
310		VMCLEAR(vmcs);
311	return (error);
312}
313
314int
315vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
316{
317	int error;
318
319	VMPTRLD(vmcs);
320
321	/*
322	 * Guest MSRs are saved in the VM-exit MSR-store area.
323	 * Guest MSRs are loaded from the VM-entry MSR-load area.
324	 * Both areas point to the same location in memory.
325	 */
326	if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
327		goto done;
328	if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
329		goto done;
330
331	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
332		goto done;
333	if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
334		goto done;
335
336	error = 0;
337done:
338	VMCLEAR(vmcs);
339	return (error);
340}
341
342int
343vmcs_init(struct vmcs *vmcs)
344{
345	int error, codesel, datasel, tsssel;
346	u_long cr0, cr4, efer;
347	uint64_t pat, fsbase, idtrbase;
348
349	codesel = vmm_get_host_codesel();
350	datasel = vmm_get_host_datasel();
351	tsssel = vmm_get_host_tsssel();
352
353	/*
354	 * Make sure we have a "current" VMCS to work with.
355	 */
356	VMPTRLD(vmcs);
357
358	/* Host state */
359
360	/* Initialize host IA32_PAT MSR */
361	pat = vmm_get_host_pat();
362	if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
363		goto done;
364
365	/* Load the IA32_EFER MSR */
366	efer = vmm_get_host_efer();
367	if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
368		goto done;
369
370	/* Load the control registers */
371
372	cr0 = vmm_get_host_cr0();
373	if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
374		goto done;
375
376	cr4 = vmm_get_host_cr4() | CR4_VMXE;
377	if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
378		goto done;
379
380	/* Load the segment selectors */
381	if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
382		goto done;
383
384	if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
385		goto done;
386
387	if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
388		goto done;
389
390	if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
391		goto done;
392
393	if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
394		goto done;
395
396	if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
397		goto done;
398
399	if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
400		goto done;
401
402	/*
403	 * Load the Base-Address for %fs and idtr.
404	 *
405	 * Note that we exclude %gs, tss and gdtr here because their base
406	 * address is pcpu specific.
407	 */
408	fsbase = vmm_get_host_fsbase();
409	if ((error = vmwrite(VMCS_HOST_FS_BASE, fsbase)) != 0)
410		goto done;
411
412	idtrbase = vmm_get_host_idtrbase();
413	if ((error = vmwrite(VMCS_HOST_IDTR_BASE, idtrbase)) != 0)
414		goto done;
415
416	/* instruction pointer */
417	if (no_flush_rsb) {
418		if ((error = vmwrite(VMCS_HOST_RIP,
419		    (u_long)vmx_exit_guest)) != 0)
420			goto done;
421	} else {
422		if ((error = vmwrite(VMCS_HOST_RIP,
423		    (u_long)vmx_exit_guest_flush_rsb)) != 0)
424			goto done;
425	}
426
427	/* link pointer */
428	if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
429		goto done;
430done:
431	VMCLEAR(vmcs);
432	return (error);
433}
434
435#ifdef BHYVE_SNAPSHOT
436int
437vmcs_getany(struct vmcs *vmcs, int running, int ident, uint64_t *val)
438{
439	int error;
440
441	if (!running)
442		VMPTRLD(vmcs);
443
444	error = vmread(ident, val);
445
446	if (!running)
447		VMCLEAR(vmcs);
448
449	return (error);
450}
451
452int
453vmcs_setany(struct vmcs *vmcs, int running, int ident, uint64_t val)
454{
455	int error;
456
457	if (!running)
458		VMPTRLD(vmcs);
459
460	error = vmwrite(ident, val);
461
462	if (!running)
463		VMCLEAR(vmcs);
464
465	return (error);
466}
467
468int
469vmcs_snapshot_reg(struct vmcs *vmcs, int running, int ident,
470		  struct vm_snapshot_meta *meta)
471{
472	int ret;
473	uint64_t val;
474
475	if (meta->op == VM_SNAPSHOT_SAVE) {
476		ret = vmcs_getreg(vmcs, running, ident, &val);
477		if (ret != 0)
478			goto done;
479
480		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
481	} else if (meta->op == VM_SNAPSHOT_RESTORE) {
482		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
483
484		ret = vmcs_setreg(vmcs, running, ident, val);
485		if (ret != 0)
486			goto done;
487	} else {
488		ret = EINVAL;
489		goto done;
490	}
491
492done:
493	return (ret);
494}
495
496int
497vmcs_snapshot_desc(struct vmcs *vmcs, int running, int seg,
498		   struct vm_snapshot_meta *meta)
499{
500	int ret;
501	struct seg_desc desc;
502
503	if (meta->op == VM_SNAPSHOT_SAVE) {
504		ret = vmcs_getdesc(vmcs, running, seg, &desc);
505		if (ret != 0)
506			goto done;
507
508		SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
509		SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
510		SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
511	} else if (meta->op == VM_SNAPSHOT_RESTORE) {
512		SNAPSHOT_VAR_OR_LEAVE(desc.base, meta, ret, done);
513		SNAPSHOT_VAR_OR_LEAVE(desc.limit, meta, ret, done);
514		SNAPSHOT_VAR_OR_LEAVE(desc.access, meta, ret, done);
515
516		ret = vmcs_setdesc(vmcs, running, seg, &desc);
517		if (ret != 0)
518			goto done;
519	} else {
520		ret = EINVAL;
521		goto done;
522	}
523
524done:
525	return (ret);
526}
527
528int
529vmcs_snapshot_any(struct vmcs *vmcs, int running, int ident,
530		  struct vm_snapshot_meta *meta)
531{
532	int ret;
533	uint64_t val;
534
535	if (meta->op == VM_SNAPSHOT_SAVE) {
536		ret = vmcs_getany(vmcs, running, ident, &val);
537		if (ret != 0)
538			goto done;
539
540		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
541	} else if (meta->op == VM_SNAPSHOT_RESTORE) {
542		SNAPSHOT_VAR_OR_LEAVE(val, meta, ret, done);
543
544		ret = vmcs_setany(vmcs, running, ident, val);
545		if (ret != 0)
546			goto done;
547	} else {
548		ret = EINVAL;
549		goto done;
550	}
551
552done:
553	return (ret);
554}
555#endif
556
557#ifdef DDB
558extern int vmxon_enabled[];
559
560DB_SHOW_COMMAND(vmcs, db_show_vmcs)
561{
562	uint64_t cur_vmcs, val;
563	uint32_t exit;
564
565	if (!vmxon_enabled[curcpu]) {
566		db_printf("VMX not enabled\n");
567		return;
568	}
569
570	if (have_addr) {
571		db_printf("Only current VMCS supported\n");
572		return;
573	}
574
575	vmptrst(&cur_vmcs);
576	if (cur_vmcs == VMCS_INITIAL) {
577		db_printf("No current VM context\n");
578		return;
579	}
580	db_printf("VMCS: %jx\n", cur_vmcs);
581	db_printf("VPID: %lu\n", vmcs_read(VMCS_VPID));
582	db_printf("Activity: ");
583	val = vmcs_read(VMCS_GUEST_ACTIVITY);
584	switch (val) {
585	case 0:
586		db_printf("Active");
587		break;
588	case 1:
589		db_printf("HLT");
590		break;
591	case 2:
592		db_printf("Shutdown");
593		break;
594	case 3:
595		db_printf("Wait for SIPI");
596		break;
597	default:
598		db_printf("Unknown: %#lx", val);
599	}
600	db_printf("\n");
601	exit = vmcs_read(VMCS_EXIT_REASON);
602	if (exit & 0x80000000)
603		db_printf("Entry Failure Reason: %u\n", exit & 0xffff);
604	else
605		db_printf("Exit Reason: %u\n", exit & 0xffff);
606	db_printf("Qualification: %#lx\n", vmcs_exit_qualification());
607	db_printf("Guest Linear Address: %#lx\n",
608	    vmcs_read(VMCS_GUEST_LINEAR_ADDRESS));
609	switch (exit & 0x8000ffff) {
610	case EXIT_REASON_EXCEPTION:
611	case EXIT_REASON_EXT_INTR:
612		val = vmcs_read(VMCS_EXIT_INTR_INFO);
613		db_printf("Interrupt Type: ");
614		switch (val >> 8 & 0x7) {
615		case 0:
616			db_printf("external");
617			break;
618		case 2:
619			db_printf("NMI");
620			break;
621		case 3:
622			db_printf("HW exception");
623			break;
624		case 4:
625			db_printf("SW exception");
626			break;
627		default:
628			db_printf("?? %lu", val >> 8 & 0x7);
629			break;
630		}
631		db_printf("  Vector: %lu", val & 0xff);
632		if (val & 0x800)
633			db_printf("  Error Code: %lx",
634			    vmcs_read(VMCS_EXIT_INTR_ERRCODE));
635		db_printf("\n");
636		break;
637	case EXIT_REASON_EPT_FAULT:
638	case EXIT_REASON_EPT_MISCONFIG:
639		db_printf("Guest Physical Address: %#lx\n",
640		    vmcs_read(VMCS_GUEST_PHYSICAL_ADDRESS));
641		break;
642	}
643	db_printf("VM-instruction error: %#lx\n", vmcs_instruction_error());
644}
645#endif
646