xref: /illumos-gate/usr/src/lib/libvmm/libvmm.c (revision 32640292)
19c3024a3SHans Rosenfeld /*
29c3024a3SHans Rosenfeld  * This file and its contents are supplied under the terms of the
39c3024a3SHans Rosenfeld  * Common Development and Distribution License ("CDDL"), version 1.0.
49c3024a3SHans Rosenfeld  * You may only use this file in accordance with the terms of version
59c3024a3SHans Rosenfeld  * 1.0 of the CDDL.
69c3024a3SHans Rosenfeld  *
79c3024a3SHans Rosenfeld  * A full copy of the text of the CDDL should have accompanied this
89c3024a3SHans Rosenfeld  * source.  A copy of the CDDL is also available via the Internet at
99c3024a3SHans Rosenfeld  * http://www.illumos.org/license/CDDL.
109c3024a3SHans Rosenfeld  */
119c3024a3SHans Rosenfeld 
129c3024a3SHans Rosenfeld /*
139c3024a3SHans Rosenfeld  * Copyright 2019 Joyent, Inc.
14c3d209caSPatrick Mooney  * Copyright 2020 Oxide Computer Company
15*32640292SAndy Fiddaman  * Copyright 2023 OmniOS Community Edition (OmniOSce) Association.
169c3024a3SHans Rosenfeld  */
179c3024a3SHans Rosenfeld 
189c3024a3SHans Rosenfeld /*
199c3024a3SHans Rosenfeld  * Library for native code to access bhyve VMs, without the need to use
209c3024a3SHans Rosenfeld  * FreeBSD compat headers
219c3024a3SHans Rosenfeld  */
229c3024a3SHans Rosenfeld 
239c3024a3SHans Rosenfeld #include <sys/param.h>
249c3024a3SHans Rosenfeld #include <sys/list.h>
259c3024a3SHans Rosenfeld #include <sys/stddef.h>
269c3024a3SHans Rosenfeld #include <sys/mman.h>
279c3024a3SHans Rosenfeld #include <sys/kdi_regs.h>
289c3024a3SHans Rosenfeld #include <sys/sysmacros.h>
299c3024a3SHans Rosenfeld #include <sys/controlregs.h>
309c3024a3SHans Rosenfeld #include <sys/note.h>
319c3024a3SHans Rosenfeld #include <sys/debug.h>
329c3024a3SHans Rosenfeld #include <errno.h>
339c3024a3SHans Rosenfeld #include <stdlib.h>
349c3024a3SHans Rosenfeld #include <strings.h>
359c3024a3SHans Rosenfeld #include <unistd.h>
369c3024a3SHans Rosenfeld #include <assert.h>
379c3024a3SHans Rosenfeld 
389c3024a3SHans Rosenfeld #include <machine/vmm.h>
399c3024a3SHans Rosenfeld #include <vmmapi.h>
409c3024a3SHans Rosenfeld 
419c3024a3SHans Rosenfeld #include <libvmm.h>
429c3024a3SHans Rosenfeld 
439c3024a3SHans Rosenfeld typedef struct vmm_memseg vmm_memseg_t;
449c3024a3SHans Rosenfeld 
459c3024a3SHans Rosenfeld #define	VMM_MEMSEG_DEVMEM	0x1
469c3024a3SHans Rosenfeld 
479c3024a3SHans Rosenfeld struct vmm_memseg {
489c3024a3SHans Rosenfeld 	list_node_t vms_list;
499c3024a3SHans Rosenfeld 	int vms_segid;
509c3024a3SHans Rosenfeld 	int vms_prot;
519c3024a3SHans Rosenfeld 	int vms_flags;
529c3024a3SHans Rosenfeld 	uintptr_t vms_gpa;
539c3024a3SHans Rosenfeld 	off_t vms_segoff;
549c3024a3SHans Rosenfeld 	size_t vms_seglen;
559c3024a3SHans Rosenfeld 	size_t vms_maplen;
569c3024a3SHans Rosenfeld 	char vms_name[64];
579c3024a3SHans Rosenfeld };
589c3024a3SHans Rosenfeld 
599c3024a3SHans Rosenfeld struct vmm {
609c3024a3SHans Rosenfeld 	struct vmctx *vmm_ctx;
619c3024a3SHans Rosenfeld 	list_t vmm_memlist;
629c3024a3SHans Rosenfeld 	char *vmm_mem;
639c3024a3SHans Rosenfeld 	size_t vmm_memsize;
649c3024a3SHans Rosenfeld 	size_t vmm_ncpu;
65*32640292SAndy Fiddaman 	struct vcpu **vmm_vcpu;
669c3024a3SHans Rosenfeld };
679c3024a3SHans Rosenfeld 
689c3024a3SHans Rosenfeld 
699c3024a3SHans Rosenfeld /*
709c3024a3SHans Rosenfeld  * This code relies on two assumptions:
719c3024a3SHans Rosenfeld  * - CPUs are never removed from the "active set", not even when suspended.
729c3024a3SHans Rosenfeld  *   A CPU being active just means that it has been used by the guest OS.
739c3024a3SHans Rosenfeld  * - The CPU numbering is consecutive.
749c3024a3SHans Rosenfeld  */
759c3024a3SHans Rosenfeld static void
vmm_update_ncpu(vmm_t * vmm)769c3024a3SHans Rosenfeld vmm_update_ncpu(vmm_t *vmm)
779c3024a3SHans Rosenfeld {
789c3024a3SHans Rosenfeld 	cpuset_t cpuset;
799c3024a3SHans Rosenfeld 
809c3024a3SHans Rosenfeld 	assert(vm_active_cpus(vmm->vmm_ctx, &cpuset) == 0);
819c3024a3SHans Rosenfeld 
829c3024a3SHans Rosenfeld 	for (vmm->vmm_ncpu = 0;
839c3024a3SHans Rosenfeld 	    CPU_ISSET(vmm->vmm_ncpu, &cpuset) == 1;
849c3024a3SHans Rosenfeld 	    vmm->vmm_ncpu++)
859c3024a3SHans Rosenfeld 		;
869c3024a3SHans Rosenfeld }
879c3024a3SHans Rosenfeld 
889c3024a3SHans Rosenfeld vmm_t *
vmm_open_vm(const char * name)899c3024a3SHans Rosenfeld vmm_open_vm(const char *name)
909c3024a3SHans Rosenfeld {
919c3024a3SHans Rosenfeld 	vmm_t *vmm = NULL;
92*32640292SAndy Fiddaman 	int _errno;
93*32640292SAndy Fiddaman 	int i;
949c3024a3SHans Rosenfeld 
959c3024a3SHans Rosenfeld 	vmm = malloc(sizeof (vmm_t));
969c3024a3SHans Rosenfeld 	if (vmm == NULL)
979c3024a3SHans Rosenfeld 		return (NULL);
989c3024a3SHans Rosenfeld 
999c3024a3SHans Rosenfeld 	bzero(vmm, sizeof (vmm_t));
1009c3024a3SHans Rosenfeld 	vmm->vmm_mem = MAP_FAILED;
1019c3024a3SHans Rosenfeld 
1029c3024a3SHans Rosenfeld 	list_create(&vmm->vmm_memlist, sizeof (vmm_memseg_t),
1039c3024a3SHans Rosenfeld 	    offsetof(vmm_memseg_t, vms_list));
1049c3024a3SHans Rosenfeld 
1059c3024a3SHans Rosenfeld 	vmm->vmm_ctx = vm_open(name);
1069c3024a3SHans Rosenfeld 	if (vmm->vmm_ctx == NULL) {
107*32640292SAndy Fiddaman 		list_destroy(&vmm->vmm_memlist);
1089c3024a3SHans Rosenfeld 		free(vmm);
1099c3024a3SHans Rosenfeld 		return (NULL);
1109c3024a3SHans Rosenfeld 	}
1119c3024a3SHans Rosenfeld 
1129c3024a3SHans Rosenfeld 	vmm_update_ncpu(vmm);
1139c3024a3SHans Rosenfeld 
1149c3024a3SHans Rosenfeld 	/*
1159c3024a3SHans Rosenfeld 	 * If we open a VM that has just been created we may see a state
1169c3024a3SHans Rosenfeld 	 * where it has no CPUs configured yet. We'll just wait for 10ms
1179c3024a3SHans Rosenfeld 	 * and retry until we get a non-zero CPU count.
1189c3024a3SHans Rosenfeld 	 */
1199c3024a3SHans Rosenfeld 	if (vmm->vmm_ncpu == 0) {
1209c3024a3SHans Rosenfeld 		do {
1219c3024a3SHans Rosenfeld 			(void) usleep(10000);
1229c3024a3SHans Rosenfeld 			vmm_update_ncpu(vmm);
1239c3024a3SHans Rosenfeld 		} while (vmm->vmm_ncpu == 0);
1249c3024a3SHans Rosenfeld 	}
1259c3024a3SHans Rosenfeld 
126*32640292SAndy Fiddaman 	vmm->vmm_vcpu = calloc(vmm->vmm_ncpu, sizeof (struct vcpu *));
127*32640292SAndy Fiddaman 	if (vmm->vmm_vcpu == NULL)
128*32640292SAndy Fiddaman 		goto fail;
129*32640292SAndy Fiddaman 	for (i = 0; i < vmm->vmm_ncpu; i++) {
130*32640292SAndy Fiddaman 		vmm->vmm_vcpu[i] = vm_vcpu_open(vmm->vmm_ctx, i);
131*32640292SAndy Fiddaman 		if (vmm->vmm_vcpu[i] == NULL) {
132*32640292SAndy Fiddaman 			_errno = errno;
133*32640292SAndy Fiddaman 			while (i-- >= 0)
134*32640292SAndy Fiddaman 				vm_vcpu_close(vmm->vmm_vcpu[i]);
135*32640292SAndy Fiddaman 			free(vmm->vmm_vcpu);
136*32640292SAndy Fiddaman 			errno = _errno;
137*32640292SAndy Fiddaman 			goto fail;
138*32640292SAndy Fiddaman 		}
139*32640292SAndy Fiddaman 	}
140*32640292SAndy Fiddaman 
1419c3024a3SHans Rosenfeld 	return (vmm);
142*32640292SAndy Fiddaman 
143*32640292SAndy Fiddaman fail:
144*32640292SAndy Fiddaman 	_errno = errno;
145*32640292SAndy Fiddaman 	vmm_close_vm(vmm);
146*32640292SAndy Fiddaman 	errno = _errno;
147*32640292SAndy Fiddaman 
148*32640292SAndy Fiddaman 	return (NULL);
1499c3024a3SHans Rosenfeld }
1509c3024a3SHans Rosenfeld 
1519c3024a3SHans Rosenfeld void
vmm_close_vm(vmm_t * vmm)1529c3024a3SHans Rosenfeld vmm_close_vm(vmm_t *vmm)
1539c3024a3SHans Rosenfeld {
154*32640292SAndy Fiddaman 	uint_t i;
155*32640292SAndy Fiddaman 
1569c3024a3SHans Rosenfeld 	vmm_unmap(vmm);
1579c3024a3SHans Rosenfeld 
158*32640292SAndy Fiddaman 	for (i = 0; i < vmm->vmm_ncpu; i++)
159*32640292SAndy Fiddaman 		vm_vcpu_close(vmm->vmm_vcpu[i]);
160*32640292SAndy Fiddaman 	free(vmm->vmm_vcpu);
161*32640292SAndy Fiddaman 
1629c3024a3SHans Rosenfeld 	list_destroy(&vmm->vmm_memlist);
1639c3024a3SHans Rosenfeld 
1649c3024a3SHans Rosenfeld 	if (vmm->vmm_ctx != NULL)
1659c3024a3SHans Rosenfeld 		vm_close(vmm->vmm_ctx);
1669c3024a3SHans Rosenfeld 
1679c3024a3SHans Rosenfeld 	free(vmm);
1689c3024a3SHans Rosenfeld }
1699c3024a3SHans Rosenfeld 
1709c3024a3SHans Rosenfeld static vmm_memseg_t *
vmm_get_memseg(vmm_t * vmm,uintptr_t gpa)1719c3024a3SHans Rosenfeld vmm_get_memseg(vmm_t *vmm, uintptr_t gpa)
1729c3024a3SHans Rosenfeld {
1739c3024a3SHans Rosenfeld 	vmm_memseg_t ms, *ret;
1749c3024a3SHans Rosenfeld 	int error, flags;
1759c3024a3SHans Rosenfeld 
1769c3024a3SHans Rosenfeld 	bzero(&ms, sizeof (vmm_memseg_t));
1779c3024a3SHans Rosenfeld 	ms.vms_gpa = gpa;
1789c3024a3SHans Rosenfeld 	error = vm_mmap_getnext(vmm->vmm_ctx, &ms.vms_gpa, &ms.vms_segid,
1799c3024a3SHans Rosenfeld 	    &ms.vms_segoff, &ms.vms_maplen, &ms.vms_prot, &flags);
1809c3024a3SHans Rosenfeld 	if (error)
1819c3024a3SHans Rosenfeld 		return (NULL);
1829c3024a3SHans Rosenfeld 
1839c3024a3SHans Rosenfeld 	error = vm_get_memseg(vmm->vmm_ctx, ms.vms_segid, &ms.vms_seglen,
1849c3024a3SHans Rosenfeld 	    ms.vms_name, sizeof (ms.vms_name));
1859c3024a3SHans Rosenfeld 	if (error)
1869c3024a3SHans Rosenfeld 		return (NULL);
1879c3024a3SHans Rosenfeld 
1889c3024a3SHans Rosenfeld 	/*
1899c3024a3SHans Rosenfeld 	 * Regular memory segments don't have a name, but devmem segments do.
1909c3024a3SHans Rosenfeld 	 * We can use that information to set the DEVMEM flag if necessary.
1919c3024a3SHans Rosenfeld 	 */
1929c3024a3SHans Rosenfeld 	ms.vms_flags = ms.vms_name[0] != '\0' ? VMM_MEMSEG_DEVMEM : 0;
1939c3024a3SHans Rosenfeld 
1949c3024a3SHans Rosenfeld 	ret = malloc(sizeof (vmm_memseg_t));
1959c3024a3SHans Rosenfeld 	if (ret == NULL)
1969c3024a3SHans Rosenfeld 		return (NULL);
1979c3024a3SHans Rosenfeld 
1989c3024a3SHans Rosenfeld 	*ret = ms;
1999c3024a3SHans Rosenfeld 
2009c3024a3SHans Rosenfeld 	return (ret);
2019c3024a3SHans Rosenfeld }
2029c3024a3SHans Rosenfeld 
2039c3024a3SHans Rosenfeld int
vmm_map(vmm_t * vmm,boolean_t writable)2049c3024a3SHans Rosenfeld vmm_map(vmm_t *vmm, boolean_t writable)
2059c3024a3SHans Rosenfeld {
2069c3024a3SHans Rosenfeld 	uintptr_t last_gpa = 0;
2079c3024a3SHans Rosenfeld 	vmm_memseg_t *ms;
2089c3024a3SHans Rosenfeld 	int prot_write = writable ? PROT_WRITE : 0;
2099c3024a3SHans Rosenfeld 
2109c3024a3SHans Rosenfeld 	if (vmm->vmm_mem != MAP_FAILED) {
2119c3024a3SHans Rosenfeld 		errno = EINVAL;
2129c3024a3SHans Rosenfeld 		return (-1);
2139c3024a3SHans Rosenfeld 	}
2149c3024a3SHans Rosenfeld 
2159c3024a3SHans Rosenfeld 	assert(list_is_empty(&vmm->vmm_memlist));
2169c3024a3SHans Rosenfeld 
2179c3024a3SHans Rosenfeld 	for (;;) {
2189c3024a3SHans Rosenfeld 		ms = vmm_get_memseg(vmm, last_gpa);
2199c3024a3SHans Rosenfeld 
2209c3024a3SHans Rosenfeld 		if (ms == NULL)
2219c3024a3SHans Rosenfeld 			break;
2229c3024a3SHans Rosenfeld 
2239c3024a3SHans Rosenfeld 		last_gpa = ms->vms_gpa + ms->vms_maplen;
2249c3024a3SHans Rosenfeld 		list_insert_tail(&vmm->vmm_memlist, ms);
2259c3024a3SHans Rosenfeld 	}
2269c3024a3SHans Rosenfeld 
2279c3024a3SHans Rosenfeld 	vmm->vmm_mem = mmap(NULL, last_gpa, PROT_NONE,
2289c3024a3SHans Rosenfeld 	    MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, -1, 0);
2299c3024a3SHans Rosenfeld 
2309c3024a3SHans Rosenfeld 	if (vmm->vmm_mem == MAP_FAILED)
2319c3024a3SHans Rosenfeld 		goto fail;
2329c3024a3SHans Rosenfeld 
2339c3024a3SHans Rosenfeld 	for (ms = list_head(&vmm->vmm_memlist);
2349c3024a3SHans Rosenfeld 	    ms != NULL;
2359c3024a3SHans Rosenfeld 	    ms = list_next(&vmm->vmm_memlist, ms)) {
236c3d209caSPatrick Mooney 		off_t mapoff;
237c3d209caSPatrick Mooney 
238c3d209caSPatrick Mooney 		if ((ms->vms_flags & VMM_MEMSEG_DEVMEM) == 0) {
239c3d209caSPatrick Mooney 			/*
240c3d209caSPatrick Mooney 			 * sysmem segments will be located at an offset
241c3d209caSPatrick Mooney 			 * equivalent to their GPA.
242c3d209caSPatrick Mooney 			 */
243c3d209caSPatrick Mooney 			mapoff = ms->vms_gpa;
244c3d209caSPatrick Mooney 		} else {
245c3d209caSPatrick Mooney 			/*
246c3d209caSPatrick Mooney 			 * devmem segments are located in a special region away
247c3d209caSPatrick Mooney 			 * from the normal GPA space.
248c3d209caSPatrick Mooney 			 */
249c3d209caSPatrick Mooney 			if (vm_get_devmem_offset(vmm->vmm_ctx, ms->vms_segid,
250c3d209caSPatrick Mooney 			    &mapoff) != 0) {
251c3d209caSPatrick Mooney 				goto fail;
252c3d209caSPatrick Mooney 			}
253c3d209caSPatrick Mooney 		}
254c3d209caSPatrick Mooney 
255c3d209caSPatrick Mooney 		/*
256c3d209caSPatrick Mooney 		 * While 'mapoff' points to the front of the segment, the actual
257c3d209caSPatrick Mooney 		 * mapping may be at some offset beyond that.
258c3d209caSPatrick Mooney 		 */
259c3d209caSPatrick Mooney 		VERIFY(ms->vms_segoff >= 0);
260c3d209caSPatrick Mooney 		mapoff += ms->vms_segoff;
2619c3024a3SHans Rosenfeld 
2629c3024a3SHans Rosenfeld 		vmm->vmm_memsize += ms->vms_maplen;
2639c3024a3SHans Rosenfeld 
2649c3024a3SHans Rosenfeld 		if (mmap(vmm->vmm_mem + ms->vms_gpa, ms->vms_maplen,
2659c3024a3SHans Rosenfeld 		    PROT_READ | prot_write, MAP_SHARED | MAP_FIXED,
2669c3024a3SHans Rosenfeld 		    vm_get_device_fd(vmm->vmm_ctx), mapoff) == MAP_FAILED)
2679c3024a3SHans Rosenfeld 			goto fail;
2689c3024a3SHans Rosenfeld 	}
2699c3024a3SHans Rosenfeld 
2709c3024a3SHans Rosenfeld 	return (0);
2719c3024a3SHans Rosenfeld 
2729c3024a3SHans Rosenfeld fail:
2739c3024a3SHans Rosenfeld 	vmm_unmap(vmm);
2749c3024a3SHans Rosenfeld 
2759c3024a3SHans Rosenfeld 	return (-1);
2769c3024a3SHans Rosenfeld }
2779c3024a3SHans Rosenfeld 
2789c3024a3SHans Rosenfeld void
vmm_unmap(vmm_t * vmm)2799c3024a3SHans Rosenfeld vmm_unmap(vmm_t *vmm)
2809c3024a3SHans Rosenfeld {
2819c3024a3SHans Rosenfeld 	while (!list_is_empty(&vmm->vmm_memlist)) {
2829c3024a3SHans Rosenfeld 		vmm_memseg_t *ms = list_remove_head(&vmm->vmm_memlist);
2839c3024a3SHans Rosenfeld 
2849c3024a3SHans Rosenfeld 		if (vmm->vmm_mem != MAP_FAILED) {
2859c3024a3SHans Rosenfeld 			(void) munmap(vmm->vmm_mem + ms->vms_gpa,
2869c3024a3SHans Rosenfeld 			    ms->vms_maplen);
2879c3024a3SHans Rosenfeld 		}
2889c3024a3SHans Rosenfeld 
2899c3024a3SHans Rosenfeld 		free(ms);
2909c3024a3SHans Rosenfeld 	}
2919c3024a3SHans Rosenfeld 
2929c3024a3SHans Rosenfeld 	if (vmm->vmm_mem != MAP_FAILED)
2939c3024a3SHans Rosenfeld 		(void) munmap(vmm->vmm_mem, vmm->vmm_memsize);
2949c3024a3SHans Rosenfeld 
2959c3024a3SHans Rosenfeld 	vmm->vmm_mem = MAP_FAILED;
2969c3024a3SHans Rosenfeld 	vmm->vmm_memsize = 0;
2979c3024a3SHans Rosenfeld }
2989c3024a3SHans Rosenfeld 
2999c3024a3SHans Rosenfeld ssize_t
vmm_pread(vmm_t * vmm,void * buf,size_t len,uintptr_t addr)3009c3024a3SHans Rosenfeld vmm_pread(vmm_t *vmm, void *buf, size_t len, uintptr_t addr)
3019c3024a3SHans Rosenfeld {
3029c3024a3SHans Rosenfeld 	ssize_t count = 0;
3039c3024a3SHans Rosenfeld 	vmm_memseg_t *ms;
3049c3024a3SHans Rosenfeld 	ssize_t res = len;
3059c3024a3SHans Rosenfeld 
3069c3024a3SHans Rosenfeld 	for (ms = list_head(&vmm->vmm_memlist);
3079c3024a3SHans Rosenfeld 	    ms != NULL && len != 0;
3089c3024a3SHans Rosenfeld 	    ms = list_next(&vmm->vmm_memlist, ms)) {
3099c3024a3SHans Rosenfeld 
3109c3024a3SHans Rosenfeld 		if (addr >= ms->vms_gpa &&
3119c3024a3SHans Rosenfeld 		    addr < ms->vms_gpa + ms->vms_maplen) {
3129c3024a3SHans Rosenfeld 			res = (addr + len) - (ms->vms_gpa + ms->vms_maplen);
3139c3024a3SHans Rosenfeld 
3149c3024a3SHans Rosenfeld 			if (res < 0)
3159c3024a3SHans Rosenfeld 				res = 0;
3169c3024a3SHans Rosenfeld 
3179c3024a3SHans Rosenfeld 			bcopy(vmm->vmm_mem + addr, buf, len - res);
3189c3024a3SHans Rosenfeld 			count += len - res;
3199c3024a3SHans Rosenfeld 			addr += len - res;
3209c3024a3SHans Rosenfeld 			len = res;
3219c3024a3SHans Rosenfeld 		}
3229c3024a3SHans Rosenfeld 	}
3239c3024a3SHans Rosenfeld 
3249c3024a3SHans Rosenfeld 	if (res)
3259c3024a3SHans Rosenfeld 		errno = EFAULT;
3269c3024a3SHans Rosenfeld 	else
3279c3024a3SHans Rosenfeld 		errno = 0;
3289c3024a3SHans Rosenfeld 
3299c3024a3SHans Rosenfeld 	return (count);
3309c3024a3SHans Rosenfeld }
3319c3024a3SHans Rosenfeld 
3329c3024a3SHans Rosenfeld ssize_t
vmm_pwrite(vmm_t * vmm,const void * buf,size_t len,uintptr_t addr)3339c3024a3SHans Rosenfeld vmm_pwrite(vmm_t *vmm, const void *buf, size_t len, uintptr_t addr)
3349c3024a3SHans Rosenfeld {
3359c3024a3SHans Rosenfeld 	ssize_t count = 0;
3369c3024a3SHans Rosenfeld 	vmm_memseg_t *ms;
3379c3024a3SHans Rosenfeld 	ssize_t res = len;
3389c3024a3SHans Rosenfeld 
3399c3024a3SHans Rosenfeld 	for (ms = list_head(&vmm->vmm_memlist);
3409c3024a3SHans Rosenfeld 	    ms != NULL;
3419c3024a3SHans Rosenfeld 	    ms = list_next(&vmm->vmm_memlist, ms)) {
3429c3024a3SHans Rosenfeld 		if (addr >= ms->vms_gpa &&
3439c3024a3SHans Rosenfeld 		    addr < ms->vms_gpa + ms->vms_maplen) {
3449c3024a3SHans Rosenfeld 			res = (addr + len) - (ms->vms_gpa + ms->vms_maplen);
3459c3024a3SHans Rosenfeld 
3469c3024a3SHans Rosenfeld 			if (res < 0)
3479c3024a3SHans Rosenfeld 				res = 0;
3489c3024a3SHans Rosenfeld 
3499c3024a3SHans Rosenfeld 			bcopy(buf, vmm->vmm_mem + addr, len - res);
3509c3024a3SHans Rosenfeld 			count += len - res;
3519c3024a3SHans Rosenfeld 			addr += len - res;
3529c3024a3SHans Rosenfeld 			len = res;
3539c3024a3SHans Rosenfeld 		}
3549c3024a3SHans Rosenfeld 	}
3559c3024a3SHans Rosenfeld 
3569c3024a3SHans Rosenfeld 	if (res)
3579c3024a3SHans Rosenfeld 		errno = EFAULT;
3589c3024a3SHans Rosenfeld 	else
3599c3024a3SHans Rosenfeld 		errno = 0;
3609c3024a3SHans Rosenfeld 
3619c3024a3SHans Rosenfeld 	return (count);
3629c3024a3SHans Rosenfeld }
3639c3024a3SHans Rosenfeld 
3649c3024a3SHans Rosenfeld size_t
vmm_ncpu(vmm_t * vmm)3659c3024a3SHans Rosenfeld vmm_ncpu(vmm_t *vmm)
3669c3024a3SHans Rosenfeld {
3679c3024a3SHans Rosenfeld 	return (vmm->vmm_ncpu);
3689c3024a3SHans Rosenfeld }
3699c3024a3SHans Rosenfeld 
3709c3024a3SHans Rosenfeld size_t
vmm_memsize(vmm_t * vmm)3719c3024a3SHans Rosenfeld vmm_memsize(vmm_t *vmm)
3729c3024a3SHans Rosenfeld {
3739c3024a3SHans Rosenfeld 	return (vmm->vmm_memsize);
3749c3024a3SHans Rosenfeld }
3759c3024a3SHans Rosenfeld 
3769c3024a3SHans Rosenfeld int
vmm_cont(vmm_t * vmm)3779c3024a3SHans Rosenfeld vmm_cont(vmm_t *vmm)
3789c3024a3SHans Rosenfeld {
379*32640292SAndy Fiddaman 	return (vm_resume_all_cpus(vmm->vmm_ctx));
3809c3024a3SHans Rosenfeld }
3819c3024a3SHans Rosenfeld 
3829c3024a3SHans Rosenfeld int
vmm_step(vmm_t * vmm,int vcpuid)383*32640292SAndy Fiddaman vmm_step(vmm_t *vmm, int vcpuid)
3849c3024a3SHans Rosenfeld {
3859c3024a3SHans Rosenfeld 	cpuset_t cpuset;
3869c3024a3SHans Rosenfeld 	int ret;
3879c3024a3SHans Rosenfeld 
388*32640292SAndy Fiddaman 	if (vcpuid >= vmm->vmm_ncpu) {
3899c3024a3SHans Rosenfeld 		errno = EINVAL;
3909c3024a3SHans Rosenfeld 		return (-1);
3919c3024a3SHans Rosenfeld 	}
3929c3024a3SHans Rosenfeld 
393*32640292SAndy Fiddaman 	ret = vm_set_capability(vmm->vmm_vcpu[vcpuid], VM_CAP_MTRAP_EXIT, 1);
3949c3024a3SHans Rosenfeld 	if (ret != 0)
3959c3024a3SHans Rosenfeld 		return (-1);
3969c3024a3SHans Rosenfeld 
397*32640292SAndy Fiddaman 	assert(vm_resume_cpu(vmm->vmm_vcpu[vcpuid]) == 0);
3989c3024a3SHans Rosenfeld 
3999c3024a3SHans Rosenfeld 	do {
4009c3024a3SHans Rosenfeld 		(void) vm_debug_cpus(vmm->vmm_ctx, &cpuset);
401*32640292SAndy Fiddaman 	} while (!CPU_ISSET(vcpuid, &cpuset));
4029c3024a3SHans Rosenfeld 
403*32640292SAndy Fiddaman 	(void) vm_set_capability(vmm->vmm_vcpu[vcpuid], VM_CAP_MTRAP_EXIT, 0);
4049c3024a3SHans Rosenfeld 
4059c3024a3SHans Rosenfeld 	return (ret);
4069c3024a3SHans Rosenfeld }
4079c3024a3SHans Rosenfeld 
4089c3024a3SHans Rosenfeld int
vmm_stop(vmm_t * vmm)4099c3024a3SHans Rosenfeld vmm_stop(vmm_t *vmm)
4109c3024a3SHans Rosenfeld {
411*32640292SAndy Fiddaman 	int ret = vm_suspend_all_cpus(vmm->vmm_ctx);
4129c3024a3SHans Rosenfeld 
4139c3024a3SHans Rosenfeld 	if (ret == 0)
4149c3024a3SHans Rosenfeld 		vmm_update_ncpu(vmm);
4159c3024a3SHans Rosenfeld 
4169c3024a3SHans Rosenfeld 	return (ret);
4179c3024a3SHans Rosenfeld }
4189c3024a3SHans Rosenfeld 
4199c3024a3SHans Rosenfeld /*
4209c3024a3SHans Rosenfeld  * Mapping of KDI-defined registers to vmmapi-defined registers.
4219c3024a3SHans Rosenfeld  * Registers not known to vmmapi use VM_REG_LAST, which is invalid and
4229c3024a3SHans Rosenfeld  * causes an error in vm_{get,set}_register_set().
4239c3024a3SHans Rosenfeld  *
4249c3024a3SHans Rosenfeld  * This array must be kept in sync with the definitions in kdi_regs.h.
4259c3024a3SHans Rosenfeld  */
4269c3024a3SHans Rosenfeld static int vmm_kdi_regmap[] = {
4279c3024a3SHans Rosenfeld 	VM_REG_LAST,		/* KDIREG_SAVFP */
4289c3024a3SHans Rosenfeld 	VM_REG_LAST,		/* KDIREG_SAVPC */
4299c3024a3SHans Rosenfeld 	VM_REG_GUEST_RDI,	/* KDIREG_RDI */
4309c3024a3SHans Rosenfeld 	VM_REG_GUEST_RSI,	/* KDIREG_RSI */
4319c3024a3SHans Rosenfeld 	VM_REG_GUEST_RDX,	/* KDIREG_RDX */
4329c3024a3SHans Rosenfeld 	VM_REG_GUEST_RCX,	/* KDIREG_RCX */
4339c3024a3SHans Rosenfeld 	VM_REG_GUEST_R8,	/* KDIREG_R8 */
4349c3024a3SHans Rosenfeld 	VM_REG_GUEST_R9,	/* KDIREG_R9 */
4359c3024a3SHans Rosenfeld 	VM_REG_GUEST_RAX,	/* KDIREG_RAX */
4369c3024a3SHans Rosenfeld 	VM_REG_GUEST_RBX,	/* KDIREG_RBX */
4379c3024a3SHans Rosenfeld 	VM_REG_GUEST_RBP,	/* KDIREG_RBP */
4389c3024a3SHans Rosenfeld 	VM_REG_GUEST_R10,	/* KDIREG_R10 */
4399c3024a3SHans Rosenfeld 	VM_REG_GUEST_R11,	/* KDIREG_R11 */
4409c3024a3SHans Rosenfeld 	VM_REG_GUEST_R12,	/* KDIREG_R12 */
4419c3024a3SHans Rosenfeld 	VM_REG_GUEST_R13,	/* KDIREG_R13 */
4429c3024a3SHans Rosenfeld 	VM_REG_GUEST_R14,	/* KDIREG_R14 */
4439c3024a3SHans Rosenfeld 	VM_REG_GUEST_R15,	/* KDIREG_R15 */
4449c3024a3SHans Rosenfeld 	VM_REG_LAST,		/* KDIREG_FSBASE */
4459c3024a3SHans Rosenfeld 	VM_REG_LAST,		/* KDIREG_GSBASE */
4469c3024a3SHans Rosenfeld 	VM_REG_LAST,		/* KDIREG_KGSBASE */
4479c3024a3SHans Rosenfeld 	VM_REG_GUEST_CR2,	/* KDIREG_CR2 */
4489c3024a3SHans Rosenfeld 	VM_REG_GUEST_CR3,	/* KDIREG_CR3 */
4499c3024a3SHans Rosenfeld 	VM_REG_GUEST_DS,	/* KDIREG_DS */
4509c3024a3SHans Rosenfeld 	VM_REG_GUEST_ES,	/* KDIREG_ES */
4519c3024a3SHans Rosenfeld 	VM_REG_GUEST_FS,	/* KDIREG_FS */
4529c3024a3SHans Rosenfeld 	VM_REG_GUEST_GS,	/* KDIREG_GS */
4539c3024a3SHans Rosenfeld 	VM_REG_LAST,		/* KDIREG_TRAPNO */
4549c3024a3SHans Rosenfeld 	VM_REG_LAST,		/* KDIREG_ERR */
4559c3024a3SHans Rosenfeld 	VM_REG_GUEST_RIP,	/* KDIREG_RIP */
4569c3024a3SHans Rosenfeld 	VM_REG_GUEST_CS,	/* KDIREG_CS */
4579c3024a3SHans Rosenfeld 	VM_REG_GUEST_RFLAGS,	/* KDIREG_RFLAGS */
4589c3024a3SHans Rosenfeld 	VM_REG_GUEST_RSP,	/* KDIREG_RSP */
4599c3024a3SHans Rosenfeld 	VM_REG_GUEST_SS		/* KDIREG_SS */
4609c3024a3SHans Rosenfeld };
4619c3024a3SHans Rosenfeld CTASSERT(ARRAY_SIZE(vmm_kdi_regmap) == KDIREG_NGREG);
4629c3024a3SHans Rosenfeld 
4639c3024a3SHans Rosenfeld /*
4649c3024a3SHans Rosenfeld  * Mapping of libvmm-defined registers to vmmapi-defined registers.
4659c3024a3SHans Rosenfeld  *
4669c3024a3SHans Rosenfeld  * This array must be kept in sync with the definitions in libvmm.h
4679c3024a3SHans Rosenfeld  */
4689c3024a3SHans Rosenfeld static int vmm_sys_regmap[] = {
4699c3024a3SHans Rosenfeld 	VM_REG_GUEST_CR0,	/* VMM_REG_CR0 */
4709c3024a3SHans Rosenfeld 	VM_REG_GUEST_CR2,	/* VMM_REG_CR2 */
4719c3024a3SHans Rosenfeld 	VM_REG_GUEST_CR3,	/* VMM_REG_CR3 */
4729c3024a3SHans Rosenfeld 	VM_REG_GUEST_CR4,	/* VMM_REG_CR4 */
4739c3024a3SHans Rosenfeld 	VM_REG_GUEST_DR0,	/* VMM_REG_DR0 */
4749c3024a3SHans Rosenfeld 	VM_REG_GUEST_DR1,	/* VMM_REG_DR1 */
4759c3024a3SHans Rosenfeld 	VM_REG_GUEST_DR2,	/* VMM_REG_DR2 */
4769c3024a3SHans Rosenfeld 	VM_REG_GUEST_DR3,	/* VMM_REG_DR3 */
4779c3024a3SHans Rosenfeld 	VM_REG_GUEST_DR6,	/* VMM_REG_DR6 */
4789c3024a3SHans Rosenfeld 	VM_REG_GUEST_DR7,	/* VMM_REG_DR7 */
4799c3024a3SHans Rosenfeld 	VM_REG_GUEST_EFER,	/* VMM_REG_EFER */
4809c3024a3SHans Rosenfeld 	VM_REG_GUEST_PDPTE0,	/* VMM_REG_PDPTE0 */
4819c3024a3SHans Rosenfeld 	VM_REG_GUEST_PDPTE1,	/* VMM_REG_PDPTE1 */
4829c3024a3SHans Rosenfeld 	VM_REG_GUEST_PDPTE2,	/* VMM_REG_PDPTE2 */
4839c3024a3SHans Rosenfeld 	VM_REG_GUEST_PDPTE3,	/* VMM_REG_PDPTE3 */
4849c3024a3SHans Rosenfeld 	VM_REG_GUEST_INTR_SHADOW, /* VMM_REG_INTR_SHADOW */
4859c3024a3SHans Rosenfeld };
4869c3024a3SHans Rosenfeld 
4879c3024a3SHans Rosenfeld /*
4889c3024a3SHans Rosenfeld  * Mapping of libvmm-defined descriptors to vmmapi-defined descriptors.
4899c3024a3SHans Rosenfeld  *
4909c3024a3SHans Rosenfeld  * This array must be kept in sync with the definitions in libvmm.h
4919c3024a3SHans Rosenfeld  */
4929c3024a3SHans Rosenfeld static int vmm_descmap[] = {
4939c3024a3SHans Rosenfeld 	VM_REG_GUEST_GDTR,
4949c3024a3SHans Rosenfeld 	VM_REG_GUEST_LDTR,
4959c3024a3SHans Rosenfeld 	VM_REG_GUEST_IDTR,
4969c3024a3SHans Rosenfeld 	VM_REG_GUEST_TR,
4979c3024a3SHans Rosenfeld 	VM_REG_GUEST_CS,
4989c3024a3SHans Rosenfeld 	VM_REG_GUEST_DS,
4999c3024a3SHans Rosenfeld 	VM_REG_GUEST_ES,
5009c3024a3SHans Rosenfeld 	VM_REG_GUEST_FS,
5019c3024a3SHans Rosenfeld 	VM_REG_GUEST_GS,
5029c3024a3SHans Rosenfeld 	VM_REG_GUEST_SS
5039c3024a3SHans Rosenfeld };
5049c3024a3SHans Rosenfeld 
5059c3024a3SHans Rosenfeld static int
vmm_mapreg(int reg)5069c3024a3SHans Rosenfeld vmm_mapreg(int reg)
5079c3024a3SHans Rosenfeld {
5089c3024a3SHans Rosenfeld 	errno = 0;
5099c3024a3SHans Rosenfeld 
5109c3024a3SHans Rosenfeld 	if (reg < 0)
5119c3024a3SHans Rosenfeld 		goto fail;
5129c3024a3SHans Rosenfeld 
5139c3024a3SHans Rosenfeld 	if (reg < KDIREG_NGREG)
5149c3024a3SHans Rosenfeld 		return (vmm_kdi_regmap[reg]);
5159c3024a3SHans Rosenfeld 
5169c3024a3SHans Rosenfeld 	if (reg >= VMM_REG_OFFSET &&
5179c3024a3SHans Rosenfeld 	    reg < VMM_REG_OFFSET + ARRAY_SIZE(vmm_sys_regmap))
5189c3024a3SHans Rosenfeld 		return (vmm_sys_regmap[reg - VMM_REG_OFFSET]);
5199c3024a3SHans Rosenfeld 
5209c3024a3SHans Rosenfeld fail:
5219c3024a3SHans Rosenfeld 	errno = EINVAL;
5229c3024a3SHans Rosenfeld 	return (VM_REG_LAST);
5239c3024a3SHans Rosenfeld }
5249c3024a3SHans Rosenfeld 
5259c3024a3SHans Rosenfeld static int
vmm_mapdesc(int desc)5269c3024a3SHans Rosenfeld vmm_mapdesc(int desc)
5279c3024a3SHans Rosenfeld {
5289c3024a3SHans Rosenfeld 	errno = 0;
5299c3024a3SHans Rosenfeld 
5309c3024a3SHans Rosenfeld 	if (desc >= VMM_DESC_OFFSET &&
5319c3024a3SHans Rosenfeld 	    desc < VMM_DESC_OFFSET + ARRAY_SIZE(vmm_descmap))
5329c3024a3SHans Rosenfeld 		return (vmm_descmap[desc - VMM_DESC_OFFSET]);
5339c3024a3SHans Rosenfeld 
5349c3024a3SHans Rosenfeld 	errno = EINVAL;
5359c3024a3SHans Rosenfeld 	return (VM_REG_LAST);
5369c3024a3SHans Rosenfeld }
5379c3024a3SHans Rosenfeld 
5389c3024a3SHans Rosenfeld int
vmm_getreg(vmm_t * vmm,int vcpuid,int reg,uint64_t * val)539*32640292SAndy Fiddaman vmm_getreg(vmm_t *vmm, int vcpuid, int reg, uint64_t *val)
5409c3024a3SHans Rosenfeld {
5419c3024a3SHans Rosenfeld 	reg = vmm_mapreg(reg);
5429c3024a3SHans Rosenfeld 
5439c3024a3SHans Rosenfeld 	if (reg == VM_REG_LAST)
5449c3024a3SHans Rosenfeld 		return (-1);
5459c3024a3SHans Rosenfeld 
546*32640292SAndy Fiddaman 	return (vm_get_register(vmm->vmm_vcpu[vcpuid], reg, val));
5479c3024a3SHans Rosenfeld }
5489c3024a3SHans Rosenfeld 
5499c3024a3SHans Rosenfeld int
vmm_setreg(vmm_t * vmm,int vcpuid,int reg,uint64_t val)550*32640292SAndy Fiddaman vmm_setreg(vmm_t *vmm, int vcpuid, int reg, uint64_t val)
5519c3024a3SHans Rosenfeld {
5529c3024a3SHans Rosenfeld 	reg = vmm_mapreg(reg);
5539c3024a3SHans Rosenfeld 
5549c3024a3SHans Rosenfeld 	if (reg == VM_REG_LAST)
5559c3024a3SHans Rosenfeld 		return (-1);
5569c3024a3SHans Rosenfeld 
557*32640292SAndy Fiddaman 	return (vm_set_register(vmm->vmm_vcpu[vcpuid], reg, val));
5589c3024a3SHans Rosenfeld }
5599c3024a3SHans Rosenfeld 
5609c3024a3SHans Rosenfeld int
vmm_get_regset(vmm_t * vmm,int vcpuid,size_t nregs,const int * regnums,uint64_t * regvals)561*32640292SAndy Fiddaman vmm_get_regset(vmm_t *vmm, int vcpuid, size_t nregs, const int *regnums,
5629c3024a3SHans Rosenfeld     uint64_t *regvals)
5639c3024a3SHans Rosenfeld {
5649c3024a3SHans Rosenfeld 	int *vm_regnums;
5659c3024a3SHans Rosenfeld 	int i;
5669c3024a3SHans Rosenfeld 	int ret = -1;
5679c3024a3SHans Rosenfeld 
5689c3024a3SHans Rosenfeld 	vm_regnums = malloc(sizeof (int) * nregs);
5699c3024a3SHans Rosenfeld 	if (vm_regnums == NULL)
5709c3024a3SHans Rosenfeld 		return (ret);
5719c3024a3SHans Rosenfeld 
5729c3024a3SHans Rosenfeld 	for (i = 0; i != nregs; i++) {
5739c3024a3SHans Rosenfeld 		vm_regnums[i] = vmm_mapreg(regnums[i]);
5749c3024a3SHans Rosenfeld 		if (vm_regnums[i] == VM_REG_LAST)
5759c3024a3SHans Rosenfeld 			goto fail;
5769c3024a3SHans Rosenfeld 	}
5779c3024a3SHans Rosenfeld 
578*32640292SAndy Fiddaman 	ret = vm_get_register_set(vmm->vmm_vcpu[vcpuid], nregs, vm_regnums,
5799c3024a3SHans Rosenfeld 	    regvals);
5809c3024a3SHans Rosenfeld 
5819c3024a3SHans Rosenfeld fail:
5829c3024a3SHans Rosenfeld 	free(vm_regnums);
5839c3024a3SHans Rosenfeld 	return (ret);
5849c3024a3SHans Rosenfeld }
5859c3024a3SHans Rosenfeld 
5869c3024a3SHans Rosenfeld int
vmm_set_regset(vmm_t * vmm,int vcpuid,size_t nregs,const int * regnums,uint64_t * regvals)587*32640292SAndy Fiddaman vmm_set_regset(vmm_t *vmm, int vcpuid, size_t nregs, const int *regnums,
5889c3024a3SHans Rosenfeld     uint64_t *regvals)
5899c3024a3SHans Rosenfeld {
5909c3024a3SHans Rosenfeld 	int *vm_regnums;
5919c3024a3SHans Rosenfeld 	int i;
5929c3024a3SHans Rosenfeld 	int ret = -1;
5939c3024a3SHans Rosenfeld 
5949c3024a3SHans Rosenfeld 	vm_regnums = malloc(sizeof (int) * nregs);
5959c3024a3SHans Rosenfeld 	if (vm_regnums == NULL)
5969c3024a3SHans Rosenfeld 		return (ret);
5979c3024a3SHans Rosenfeld 
5989c3024a3SHans Rosenfeld 	for (i = 0; i != nregs; i++) {
5999c3024a3SHans Rosenfeld 		vm_regnums[i] = vmm_mapreg(regnums[i]);
6009c3024a3SHans Rosenfeld 		if (vm_regnums[i] == VM_REG_LAST)
6019c3024a3SHans Rosenfeld 			goto fail;
6029c3024a3SHans Rosenfeld 	}
6039c3024a3SHans Rosenfeld 
604*32640292SAndy Fiddaman 	ret = vm_set_register_set(vmm->vmm_vcpu[vcpuid], nregs, vm_regnums,
6059c3024a3SHans Rosenfeld 	    regvals);
6069c3024a3SHans Rosenfeld 
6079c3024a3SHans Rosenfeld fail:
6089c3024a3SHans Rosenfeld 	free(vm_regnums);
6099c3024a3SHans Rosenfeld 	return (ret);
6109c3024a3SHans Rosenfeld }
6119c3024a3SHans Rosenfeld 
6129c3024a3SHans Rosenfeld int
vmm_get_desc(vmm_t * vmm,int vcpuid,int desc,vmm_desc_t * vd)613*32640292SAndy Fiddaman vmm_get_desc(vmm_t *vmm, int vcpuid, int desc, vmm_desc_t *vd)
6149c3024a3SHans Rosenfeld {
6159c3024a3SHans Rosenfeld 	desc = vmm_mapdesc(desc);
6169c3024a3SHans Rosenfeld 	if (desc == VM_REG_LAST)
6179c3024a3SHans Rosenfeld 		return (-1);
6189c3024a3SHans Rosenfeld 
619*32640292SAndy Fiddaman 	return (vm_get_desc(vmm->vmm_vcpu[vcpuid], desc, &vd->vd_base,
620*32640292SAndy Fiddaman 	    &vd->vd_lim,
6219c3024a3SHans Rosenfeld 	    &vd->vd_acc));
6229c3024a3SHans Rosenfeld }
6239c3024a3SHans Rosenfeld 
6249c3024a3SHans Rosenfeld int
vmm_set_desc(vmm_t * vmm,int vcpuid,int desc,vmm_desc_t * vd)625*32640292SAndy Fiddaman vmm_set_desc(vmm_t *vmm, int vcpuid, int desc, vmm_desc_t *vd)
6269c3024a3SHans Rosenfeld {
6279c3024a3SHans Rosenfeld 	desc = vmm_mapdesc(desc);
6289c3024a3SHans Rosenfeld 	if (desc == VM_REG_LAST)
6299c3024a3SHans Rosenfeld 		return (-1);
6309c3024a3SHans Rosenfeld 
631*32640292SAndy Fiddaman 	return (vm_set_desc(vmm->vmm_vcpu[vcpuid], desc, vd->vd_base,
632*32640292SAndy Fiddaman 	    vd->vd_lim, vd->vd_acc));
6339c3024a3SHans Rosenfeld }
6349c3024a3SHans Rosenfeld 
6359c3024a3SHans Rosenfeld /*
6369c3024a3SHans Rosenfeld  * Structure to hold MMU state during address translation.
6379c3024a3SHans Rosenfeld  * The contents of vmm_mmu_regnum[] must be kept in sync with this.
6389c3024a3SHans Rosenfeld  */
6399c3024a3SHans Rosenfeld typedef struct vmm_mmu {
6409c3024a3SHans Rosenfeld 	uint64_t vm_cr0;
6419c3024a3SHans Rosenfeld 	uint64_t vm_cr3;
6429c3024a3SHans Rosenfeld 	uint64_t vm_cr4;
6439c3024a3SHans Rosenfeld 	uint64_t vm_efer;
6449c3024a3SHans Rosenfeld } vmm_mmu_t;
6459c3024a3SHans Rosenfeld 
6469c3024a3SHans Rosenfeld static const int vmm_mmu_regnum[] = {
6479c3024a3SHans Rosenfeld 	VMM_REG_CR0,
6489c3024a3SHans Rosenfeld 	VMM_REG_CR3,
6499c3024a3SHans Rosenfeld 	VMM_REG_CR4,
6509c3024a3SHans Rosenfeld 	VMM_REG_EFER
6519c3024a3SHans Rosenfeld };
6529c3024a3SHans Rosenfeld 
6539c3024a3SHans Rosenfeld #define	X86_PTE_P		0x001ULL
6549c3024a3SHans Rosenfeld #define	X86_PTE_PS		0x080ULL
6559c3024a3SHans Rosenfeld 
6569c3024a3SHans Rosenfeld #define	X86_PTE_PHYSMASK	0x000ffffffffff000ULL
6579c3024a3SHans Rosenfeld #define	X86_PAGE_SHIFT		12
6589c3024a3SHans Rosenfeld #define	X86_PAGE_SIZE		(1ULL << X86_PAGE_SHIFT)
6599c3024a3SHans Rosenfeld 
6609c3024a3SHans Rosenfeld #define	X86_SEG_CODE_DATA	(1ULL << 4)
6619c3024a3SHans Rosenfeld #define	X86_SEG_PRESENT		(1ULL << 7)
6629c3024a3SHans Rosenfeld #define	X86_SEG_LONG		(1ULL << 13)
6639c3024a3SHans Rosenfeld #define	X86_SEG_BIG		(1ULL << 14)
6649c3024a3SHans Rosenfeld #define	X86_SEG_GRANULARITY	(1ULL << 15)
6659c3024a3SHans Rosenfeld #define	X86_SEG_UNUSABLE	(1ULL << 16)
6669c3024a3SHans Rosenfeld 
6679c3024a3SHans Rosenfeld #define	X86_SEG_USABLE		(X86_SEG_PRESENT | X86_SEG_CODE_DATA)
6689c3024a3SHans Rosenfeld #define	X86_SEG_USABLE_MASK	(X86_SEG_UNUSABLE | X86_SEG_USABLE)
6699c3024a3SHans Rosenfeld 
6709c3024a3SHans Rosenfeld /*
6719c3024a3SHans Rosenfeld  * vmm_pte2paddr:
6729c3024a3SHans Rosenfeld  *
6739c3024a3SHans Rosenfeld  * Recursively calculate the physical address from a virtual address,
6749c3024a3SHans Rosenfeld  * starting at the given PTE level using the given PTE.
6759c3024a3SHans Rosenfeld  */
6769c3024a3SHans Rosenfeld static int
vmm_pte2paddr(vmm_t * vmm,uint64_t pte,boolean_t ia32,int level,uint64_t vaddr,uint64_t * paddr)6779c3024a3SHans Rosenfeld vmm_pte2paddr(vmm_t *vmm, uint64_t pte, boolean_t ia32, int level,
6789c3024a3SHans Rosenfeld     uint64_t vaddr, uint64_t *paddr)
6799c3024a3SHans Rosenfeld {
6809c3024a3SHans Rosenfeld 	int pte_size = ia32 ? sizeof (uint32_t) : sizeof (uint64_t);
6819c3024a3SHans Rosenfeld 	int off_bits = ia32 ? 10 : 9;
6829c3024a3SHans Rosenfeld 	boolean_t hugepage = B_FALSE;
6839c3024a3SHans Rosenfeld 	uint64_t offset;
6849c3024a3SHans Rosenfeld 	uint64_t off_mask, off_shift;
6859c3024a3SHans Rosenfeld 
6869c3024a3SHans Rosenfeld 	if (level < 4 && (pte & X86_PTE_P) == 0) {
6879c3024a3SHans Rosenfeld 		errno = EFAULT;
6889c3024a3SHans Rosenfeld 		return (-1);
6899c3024a3SHans Rosenfeld 	}
6909c3024a3SHans Rosenfeld 
6919c3024a3SHans Rosenfeld 	off_shift = X86_PAGE_SHIFT + off_bits * level;
6929c3024a3SHans Rosenfeld 	off_mask = (1ULL << off_shift) - 1;
6939c3024a3SHans Rosenfeld 
6949c3024a3SHans Rosenfeld 	offset = vaddr & off_mask;
6959c3024a3SHans Rosenfeld 
6969c3024a3SHans Rosenfeld 	if ((level == 1 || level == 2) && (pte & X86_PTE_PS) != 0) {
6979c3024a3SHans Rosenfeld 		hugepage = B_TRUE;
6989c3024a3SHans Rosenfeld 	} else {
6999c3024a3SHans Rosenfeld 		if (level > 0) {
7009c3024a3SHans Rosenfeld 			offset >>= off_shift - off_bits;
7019c3024a3SHans Rosenfeld 			offset <<= X86_PAGE_SHIFT - off_bits;
7029c3024a3SHans Rosenfeld 		}
7039c3024a3SHans Rosenfeld 		off_mask = 0xfff;
7049c3024a3SHans Rosenfeld 	}
7059c3024a3SHans Rosenfeld 
7069c3024a3SHans Rosenfeld 	*paddr = (pte & X86_PTE_PHYSMASK & ~off_mask) + offset;
7079c3024a3SHans Rosenfeld 
7089c3024a3SHans Rosenfeld 	if (level == 0 || hugepage)
7099c3024a3SHans Rosenfeld 		return (0);
7109c3024a3SHans Rosenfeld 
7119c3024a3SHans Rosenfeld 	pte = 0;
7129c3024a3SHans Rosenfeld 	if (vmm_pread(vmm, &pte,  pte_size, *paddr) != pte_size)
7139c3024a3SHans Rosenfeld 		return (-1);
7149c3024a3SHans Rosenfeld 	return (vmm_pte2paddr(vmm, pte, ia32, level - 1, vaddr, paddr));
7159c3024a3SHans Rosenfeld }
7169c3024a3SHans Rosenfeld 
7179c3024a3SHans Rosenfeld static vmm_mode_t
vmm_vcpu_mmu_mode(vmm_t * vmm,int vcpuid __unused,vmm_mmu_t * mmu)718*32640292SAndy Fiddaman vmm_vcpu_mmu_mode(vmm_t *vmm, int vcpuid __unused, vmm_mmu_t *mmu)
7199c3024a3SHans Rosenfeld {
7209c3024a3SHans Rosenfeld 	if ((mmu->vm_cr0 & CR0_PE) == 0)
7219c3024a3SHans Rosenfeld 		return (VMM_MODE_REAL);
7229c3024a3SHans Rosenfeld 	else if ((mmu->vm_cr4 & CR4_PAE) == 0)
7239c3024a3SHans Rosenfeld 		return (VMM_MODE_PROT);
7249c3024a3SHans Rosenfeld 	else if ((mmu->vm_efer & AMD_EFER_LME) == 0)
7259c3024a3SHans Rosenfeld 		return (VMM_MODE_PAE);
7269c3024a3SHans Rosenfeld 	else
7279c3024a3SHans Rosenfeld 		return (VMM_MODE_LONG);
7289c3024a3SHans Rosenfeld }
7299c3024a3SHans Rosenfeld 
7309c3024a3SHans Rosenfeld vmm_mode_t
vmm_vcpu_mode(vmm_t * vmm,int vcpuid)731*32640292SAndy Fiddaman vmm_vcpu_mode(vmm_t *vmm, int vcpuid)
7329c3024a3SHans Rosenfeld {
7339c3024a3SHans Rosenfeld 	vmm_mmu_t mmu = { 0 };
7349c3024a3SHans Rosenfeld 
735*32640292SAndy Fiddaman 	if (vmm_get_regset(vmm, vcpuid, ARRAY_SIZE(vmm_mmu_regnum),
7369c3024a3SHans Rosenfeld 	    vmm_mmu_regnum, (uint64_t *)&mmu) != 0)
7379c3024a3SHans Rosenfeld 		return (VMM_MODE_UNKNOWN);
7389c3024a3SHans Rosenfeld 
739*32640292SAndy Fiddaman 	return (vmm_vcpu_mmu_mode(vmm, vcpuid, &mmu));
7409c3024a3SHans Rosenfeld }
7419c3024a3SHans Rosenfeld 
7429c3024a3SHans Rosenfeld vmm_isa_t
vmm_vcpu_isa(vmm_t * vmm,int vcpuid)743*32640292SAndy Fiddaman vmm_vcpu_isa(vmm_t *vmm, int vcpuid)
7449c3024a3SHans Rosenfeld {
7459c3024a3SHans Rosenfeld 	vmm_desc_t cs;
7469c3024a3SHans Rosenfeld 
747*32640292SAndy Fiddaman 	if (vmm_get_desc(vmm, vcpuid, VMM_DESC_CS, &cs) != 0)
7489c3024a3SHans Rosenfeld 		return (VMM_ISA_UNKNOWN);
7499c3024a3SHans Rosenfeld 
7509c3024a3SHans Rosenfeld 	switch (cs.vd_acc & (X86_SEG_BIG | X86_SEG_LONG)) {
7519c3024a3SHans Rosenfeld 	case 0x0:		/* 16b code segment */
7529c3024a3SHans Rosenfeld 		return (VMM_ISA_16);
7539c3024a3SHans Rosenfeld 	case X86_SEG_LONG:	/* 64b code segment */
7549c3024a3SHans Rosenfeld 		return (VMM_ISA_64);
7559c3024a3SHans Rosenfeld 	case X86_SEG_BIG:	/* 32b code segment */
7569c3024a3SHans Rosenfeld 		return (VMM_ISA_32);
7579c3024a3SHans Rosenfeld 	}
7589c3024a3SHans Rosenfeld 
7599c3024a3SHans Rosenfeld 	return (VMM_ISA_UNKNOWN);
7609c3024a3SHans Rosenfeld }
7619c3024a3SHans Rosenfeld 
7629c3024a3SHans Rosenfeld /*
7639c3024a3SHans Rosenfeld  * vmm_vtol:
7649c3024a3SHans Rosenfeld  *
7659c3024a3SHans Rosenfeld  * Translate a virtual address to a physical address on a certain vCPU,
7669c3024a3SHans Rosenfeld  * using the specified segment register or descriptor according to the mode.
7679c3024a3SHans Rosenfeld  *
7689c3024a3SHans Rosenfeld  */
7699c3024a3SHans Rosenfeld int
vmm_vtol(vmm_t * vmm,int vcpuid,int seg,uint64_t vaddr,uint64_t * laddr)770*32640292SAndy Fiddaman vmm_vtol(vmm_t *vmm, int vcpuid, int seg, uint64_t vaddr, uint64_t *laddr)
7719c3024a3SHans Rosenfeld {
7729c3024a3SHans Rosenfeld 	vmm_desc_t desc;
7739c3024a3SHans Rosenfeld 	uint64_t limit;
7749c3024a3SHans Rosenfeld 
775*32640292SAndy Fiddaman 	if (vmm_get_desc(vmm, vcpuid, seg, &desc) != 0)
7769c3024a3SHans Rosenfeld 		return (-1);
7779c3024a3SHans Rosenfeld 
778*32640292SAndy Fiddaman 	switch (vmm_vcpu_mode(vmm, vcpuid)) {
7799c3024a3SHans Rosenfeld 	case VMM_MODE_REAL:
7809c3024a3SHans Rosenfeld 		if (seg == VMM_DESC_FS || seg == VMM_DESC_GS)
7819c3024a3SHans Rosenfeld 			goto fault;
7829c3024a3SHans Rosenfeld 		/* FALLTHRU */
7839c3024a3SHans Rosenfeld 	case VMM_MODE_PROT:
7849c3024a3SHans Rosenfeld 	case VMM_MODE_PAE:
7859c3024a3SHans Rosenfeld 		if ((desc.vd_acc & X86_SEG_USABLE_MASK) != X86_SEG_USABLE)
7869c3024a3SHans Rosenfeld 			/* unusable, system segment, or not present */
7879c3024a3SHans Rosenfeld 			goto fault;
7889c3024a3SHans Rosenfeld 
7899c3024a3SHans Rosenfeld 		limit = desc.vd_lim;
7909c3024a3SHans Rosenfeld 		if (desc.vd_acc & X86_SEG_GRANULARITY)
7919c3024a3SHans Rosenfeld 			limit *= 4096;
7929c3024a3SHans Rosenfeld 
7939c3024a3SHans Rosenfeld 		if (vaddr > limit)
7949c3024a3SHans Rosenfeld 			goto fault;
7959c3024a3SHans Rosenfeld 		/* FALLTHRU */
7969c3024a3SHans Rosenfeld 	case VMM_MODE_LONG:
7979c3024a3SHans Rosenfeld 		*laddr = desc.vd_base + vaddr;
7989c3024a3SHans Rosenfeld 		return (0);
7999c3024a3SHans Rosenfeld 
8009c3024a3SHans Rosenfeld 	default:
8019c3024a3SHans Rosenfeld 	fault:
8029c3024a3SHans Rosenfeld 		errno = EFAULT;
8039c3024a3SHans Rosenfeld 		return (-1);
8049c3024a3SHans Rosenfeld 	}
8059c3024a3SHans Rosenfeld 
8069c3024a3SHans Rosenfeld }
8079c3024a3SHans Rosenfeld 
8089c3024a3SHans Rosenfeld /*
8099c3024a3SHans Rosenfeld  * vmm_vtop:
8109c3024a3SHans Rosenfeld  *
8119c3024a3SHans Rosenfeld  * Translate a virtual address to a guest physical address on a certain vCPU,
8129c3024a3SHans Rosenfeld  * according to the mode the vCPU is in.
8139c3024a3SHans Rosenfeld  */
8149c3024a3SHans Rosenfeld int
vmm_vtop(vmm_t * vmm,int vcpuid,int seg,uint64_t vaddr,uint64_t * paddr)815*32640292SAndy Fiddaman vmm_vtop(vmm_t *vmm, int vcpuid, int seg, uint64_t vaddr, uint64_t *paddr)
8169c3024a3SHans Rosenfeld {
8179c3024a3SHans Rosenfeld 	vmm_mmu_t mmu = { 0 };
8189c3024a3SHans Rosenfeld 	int ret = 0;
8199c3024a3SHans Rosenfeld 
820*32640292SAndy Fiddaman 	if (vmm_vtol(vmm, vcpuid, seg, vaddr, &vaddr) != 0)
8219c3024a3SHans Rosenfeld 		return (-1);
8229c3024a3SHans Rosenfeld 
823*32640292SAndy Fiddaman 	if (vmm_get_regset(vmm, vcpuid, ARRAY_SIZE(vmm_mmu_regnum),
8249c3024a3SHans Rosenfeld 	    vmm_mmu_regnum, (uint64_t *)&mmu) != 0)
8259c3024a3SHans Rosenfeld 		return (-1);
8269c3024a3SHans Rosenfeld 
8279c3024a3SHans Rosenfeld 	if ((mmu.vm_cr0 & CR0_PG) == 0) {
8289c3024a3SHans Rosenfeld 		/* no paging, physical equals virtual */
8299c3024a3SHans Rosenfeld 		*paddr = vaddr;
8309c3024a3SHans Rosenfeld 		return (0);
8319c3024a3SHans Rosenfeld 	}
8329c3024a3SHans Rosenfeld 
833*32640292SAndy Fiddaman 	switch (vmm_vcpu_mmu_mode(vmm, vcpuid, &mmu)) {
8349c3024a3SHans Rosenfeld 	case VMM_MODE_PROT:
8359c3024a3SHans Rosenfeld 		/* protected mode, no PAE: 2-level paging, 32bit PTEs */
8369c3024a3SHans Rosenfeld 		ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_TRUE, 2, vaddr, paddr);
8379c3024a3SHans Rosenfeld 		break;
8389c3024a3SHans Rosenfeld 	case VMM_MODE_PAE:
8399c3024a3SHans Rosenfeld 		/* protected mode with PAE: 3-level paging, 64bit PTEs */
8409c3024a3SHans Rosenfeld 		ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 3, vaddr, paddr);
8419c3024a3SHans Rosenfeld 		break;
8429c3024a3SHans Rosenfeld 	case VMM_MODE_LONG:
8439c3024a3SHans Rosenfeld 		/* long mode: 4-level paging, 64bit PTEs */
8449c3024a3SHans Rosenfeld 		ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 4, vaddr, paddr);
8459c3024a3SHans Rosenfeld 		break;
8469c3024a3SHans Rosenfeld 	default:
8479c3024a3SHans Rosenfeld 		ret = -1;
8489c3024a3SHans Rosenfeld 	}
8499c3024a3SHans Rosenfeld 
8509c3024a3SHans Rosenfeld 	return (ret);
8519c3024a3SHans Rosenfeld }
8529c3024a3SHans Rosenfeld 
8539c3024a3SHans Rosenfeld ssize_t
vmm_vread(vmm_t * vmm,int vcpuid,int seg,void * buf,size_t len,uintptr_t addr)854*32640292SAndy Fiddaman vmm_vread(vmm_t *vmm, int vcpuid, int seg, void *buf, size_t len, uintptr_t
855*32640292SAndy Fiddaman     addr)
8569c3024a3SHans Rosenfeld {
8579c3024a3SHans Rosenfeld 	ssize_t res = 0;
8589c3024a3SHans Rosenfeld 	uint64_t paddr;
8599c3024a3SHans Rosenfeld 	size_t plen;
8609c3024a3SHans Rosenfeld 	uint64_t boundary;
8619c3024a3SHans Rosenfeld 
8629c3024a3SHans Rosenfeld 	while (len != 0) {
863*32640292SAndy Fiddaman 		if (vmm_vtop(vmm, vcpuid, seg, addr, &paddr) != 0) {
8649c3024a3SHans Rosenfeld 			errno = EFAULT;
8659c3024a3SHans Rosenfeld 			return (0);
8669c3024a3SHans Rosenfeld 		}
8679c3024a3SHans Rosenfeld 
8689c3024a3SHans Rosenfeld 		boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1);
8699c3024a3SHans Rosenfeld 		if (addr + len > boundary)
8709c3024a3SHans Rosenfeld 			plen = boundary - addr;
8719c3024a3SHans Rosenfeld 		else
8729c3024a3SHans Rosenfeld 			plen = len;
8739c3024a3SHans Rosenfeld 
8749c3024a3SHans Rosenfeld 		if (vmm_pread(vmm, buf, plen, paddr) != plen)
8759c3024a3SHans Rosenfeld 			return (0);
8769c3024a3SHans Rosenfeld 		len -= plen;
8779c3024a3SHans Rosenfeld 		addr += plen;
8789c3024a3SHans Rosenfeld 		buf += plen;
8799c3024a3SHans Rosenfeld 		res += plen;
8809c3024a3SHans Rosenfeld 	}
8819c3024a3SHans Rosenfeld 
8829c3024a3SHans Rosenfeld 	return (res);
8839c3024a3SHans Rosenfeld }
8849c3024a3SHans Rosenfeld 
8859c3024a3SHans Rosenfeld ssize_t
vmm_vwrite(vmm_t * vmm,int vcpuid,int seg,const void * buf,size_t len,uintptr_t addr)886*32640292SAndy Fiddaman vmm_vwrite(vmm_t *vmm, int vcpuid, int seg, const void *buf, size_t len,
8879c3024a3SHans Rosenfeld     uintptr_t addr)
8889c3024a3SHans Rosenfeld {
8899c3024a3SHans Rosenfeld 	ssize_t res = 0;
8909c3024a3SHans Rosenfeld 	uint64_t paddr;
8919c3024a3SHans Rosenfeld 	size_t plen;
8929c3024a3SHans Rosenfeld 	uint64_t boundary;
8939c3024a3SHans Rosenfeld 
8949c3024a3SHans Rosenfeld 	while (len != 0) {
895*32640292SAndy Fiddaman 		if (vmm_vtop(vmm, vcpuid, seg, addr, &paddr) != 0) {
8969c3024a3SHans Rosenfeld 			errno = EFAULT;
8979c3024a3SHans Rosenfeld 			return (0);
8989c3024a3SHans Rosenfeld 		}
8999c3024a3SHans Rosenfeld 
9009c3024a3SHans Rosenfeld 		boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1);
9019c3024a3SHans Rosenfeld 		if (addr + len > boundary)
9029c3024a3SHans Rosenfeld 			plen = boundary - addr;
9039c3024a3SHans Rosenfeld 		else
9049c3024a3SHans Rosenfeld 			plen = len;
9059c3024a3SHans Rosenfeld 
9069c3024a3SHans Rosenfeld 		if (vmm_pwrite(vmm, buf, plen, paddr) != plen)
9079c3024a3SHans Rosenfeld 			return (0);
9089c3024a3SHans Rosenfeld 		len -= plen;
9099c3024a3SHans Rosenfeld 		addr += plen;
9109c3024a3SHans Rosenfeld 		buf += plen;
9119c3024a3SHans Rosenfeld 		res += plen;
9129c3024a3SHans Rosenfeld 	}
9139c3024a3SHans Rosenfeld 
9149c3024a3SHans Rosenfeld 	return (res);
9159c3024a3SHans Rosenfeld }
916