19c3024a3SHans Rosenfeld /*
29c3024a3SHans Rosenfeld * This file and its contents are supplied under the terms of the
39c3024a3SHans Rosenfeld * Common Development and Distribution License ("CDDL"), version 1.0.
49c3024a3SHans Rosenfeld * You may only use this file in accordance with the terms of version
59c3024a3SHans Rosenfeld * 1.0 of the CDDL.
69c3024a3SHans Rosenfeld *
79c3024a3SHans Rosenfeld * A full copy of the text of the CDDL should have accompanied this
89c3024a3SHans Rosenfeld * source. A copy of the CDDL is also available via the Internet at
99c3024a3SHans Rosenfeld * http://www.illumos.org/license/CDDL.
109c3024a3SHans Rosenfeld */
119c3024a3SHans Rosenfeld
129c3024a3SHans Rosenfeld /*
139c3024a3SHans Rosenfeld * Copyright 2019 Joyent, Inc.
14c3d209caSPatrick Mooney * Copyright 2020 Oxide Computer Company
15*32640292SAndy Fiddaman * Copyright 2023 OmniOS Community Edition (OmniOSce) Association.
169c3024a3SHans Rosenfeld */
179c3024a3SHans Rosenfeld
189c3024a3SHans Rosenfeld /*
199c3024a3SHans Rosenfeld * Library for native code to access bhyve VMs, without the need to use
209c3024a3SHans Rosenfeld * FreeBSD compat headers
219c3024a3SHans Rosenfeld */
229c3024a3SHans Rosenfeld
239c3024a3SHans Rosenfeld #include <sys/param.h>
249c3024a3SHans Rosenfeld #include <sys/list.h>
259c3024a3SHans Rosenfeld #include <sys/stddef.h>
269c3024a3SHans Rosenfeld #include <sys/mman.h>
279c3024a3SHans Rosenfeld #include <sys/kdi_regs.h>
289c3024a3SHans Rosenfeld #include <sys/sysmacros.h>
299c3024a3SHans Rosenfeld #include <sys/controlregs.h>
309c3024a3SHans Rosenfeld #include <sys/note.h>
319c3024a3SHans Rosenfeld #include <sys/debug.h>
329c3024a3SHans Rosenfeld #include <errno.h>
339c3024a3SHans Rosenfeld #include <stdlib.h>
349c3024a3SHans Rosenfeld #include <strings.h>
359c3024a3SHans Rosenfeld #include <unistd.h>
369c3024a3SHans Rosenfeld #include <assert.h>
379c3024a3SHans Rosenfeld
389c3024a3SHans Rosenfeld #include <machine/vmm.h>
399c3024a3SHans Rosenfeld #include <vmmapi.h>
409c3024a3SHans Rosenfeld
419c3024a3SHans Rosenfeld #include <libvmm.h>
429c3024a3SHans Rosenfeld
439c3024a3SHans Rosenfeld typedef struct vmm_memseg vmm_memseg_t;
449c3024a3SHans Rosenfeld
459c3024a3SHans Rosenfeld #define VMM_MEMSEG_DEVMEM 0x1
469c3024a3SHans Rosenfeld
479c3024a3SHans Rosenfeld struct vmm_memseg {
489c3024a3SHans Rosenfeld list_node_t vms_list;
499c3024a3SHans Rosenfeld int vms_segid;
509c3024a3SHans Rosenfeld int vms_prot;
519c3024a3SHans Rosenfeld int vms_flags;
529c3024a3SHans Rosenfeld uintptr_t vms_gpa;
539c3024a3SHans Rosenfeld off_t vms_segoff;
549c3024a3SHans Rosenfeld size_t vms_seglen;
559c3024a3SHans Rosenfeld size_t vms_maplen;
569c3024a3SHans Rosenfeld char vms_name[64];
579c3024a3SHans Rosenfeld };
589c3024a3SHans Rosenfeld
599c3024a3SHans Rosenfeld struct vmm {
609c3024a3SHans Rosenfeld struct vmctx *vmm_ctx;
619c3024a3SHans Rosenfeld list_t vmm_memlist;
629c3024a3SHans Rosenfeld char *vmm_mem;
639c3024a3SHans Rosenfeld size_t vmm_memsize;
649c3024a3SHans Rosenfeld size_t vmm_ncpu;
65*32640292SAndy Fiddaman struct vcpu **vmm_vcpu;
669c3024a3SHans Rosenfeld };
679c3024a3SHans Rosenfeld
689c3024a3SHans Rosenfeld
699c3024a3SHans Rosenfeld /*
709c3024a3SHans Rosenfeld * This code relies on two assumptions:
719c3024a3SHans Rosenfeld * - CPUs are never removed from the "active set", not even when suspended.
729c3024a3SHans Rosenfeld * A CPU being active just means that it has been used by the guest OS.
739c3024a3SHans Rosenfeld * - The CPU numbering is consecutive.
749c3024a3SHans Rosenfeld */
759c3024a3SHans Rosenfeld static void
vmm_update_ncpu(vmm_t * vmm)769c3024a3SHans Rosenfeld vmm_update_ncpu(vmm_t *vmm)
779c3024a3SHans Rosenfeld {
789c3024a3SHans Rosenfeld cpuset_t cpuset;
799c3024a3SHans Rosenfeld
809c3024a3SHans Rosenfeld assert(vm_active_cpus(vmm->vmm_ctx, &cpuset) == 0);
819c3024a3SHans Rosenfeld
829c3024a3SHans Rosenfeld for (vmm->vmm_ncpu = 0;
839c3024a3SHans Rosenfeld CPU_ISSET(vmm->vmm_ncpu, &cpuset) == 1;
849c3024a3SHans Rosenfeld vmm->vmm_ncpu++)
859c3024a3SHans Rosenfeld ;
869c3024a3SHans Rosenfeld }
879c3024a3SHans Rosenfeld
889c3024a3SHans Rosenfeld vmm_t *
vmm_open_vm(const char * name)899c3024a3SHans Rosenfeld vmm_open_vm(const char *name)
909c3024a3SHans Rosenfeld {
919c3024a3SHans Rosenfeld vmm_t *vmm = NULL;
92*32640292SAndy Fiddaman int _errno;
93*32640292SAndy Fiddaman int i;
949c3024a3SHans Rosenfeld
959c3024a3SHans Rosenfeld vmm = malloc(sizeof (vmm_t));
969c3024a3SHans Rosenfeld if (vmm == NULL)
979c3024a3SHans Rosenfeld return (NULL);
989c3024a3SHans Rosenfeld
999c3024a3SHans Rosenfeld bzero(vmm, sizeof (vmm_t));
1009c3024a3SHans Rosenfeld vmm->vmm_mem = MAP_FAILED;
1019c3024a3SHans Rosenfeld
1029c3024a3SHans Rosenfeld list_create(&vmm->vmm_memlist, sizeof (vmm_memseg_t),
1039c3024a3SHans Rosenfeld offsetof(vmm_memseg_t, vms_list));
1049c3024a3SHans Rosenfeld
1059c3024a3SHans Rosenfeld vmm->vmm_ctx = vm_open(name);
1069c3024a3SHans Rosenfeld if (vmm->vmm_ctx == NULL) {
107*32640292SAndy Fiddaman list_destroy(&vmm->vmm_memlist);
1089c3024a3SHans Rosenfeld free(vmm);
1099c3024a3SHans Rosenfeld return (NULL);
1109c3024a3SHans Rosenfeld }
1119c3024a3SHans Rosenfeld
1129c3024a3SHans Rosenfeld vmm_update_ncpu(vmm);
1139c3024a3SHans Rosenfeld
1149c3024a3SHans Rosenfeld /*
1159c3024a3SHans Rosenfeld * If we open a VM that has just been created we may see a state
1169c3024a3SHans Rosenfeld * where it has no CPUs configured yet. We'll just wait for 10ms
1179c3024a3SHans Rosenfeld * and retry until we get a non-zero CPU count.
1189c3024a3SHans Rosenfeld */
1199c3024a3SHans Rosenfeld if (vmm->vmm_ncpu == 0) {
1209c3024a3SHans Rosenfeld do {
1219c3024a3SHans Rosenfeld (void) usleep(10000);
1229c3024a3SHans Rosenfeld vmm_update_ncpu(vmm);
1239c3024a3SHans Rosenfeld } while (vmm->vmm_ncpu == 0);
1249c3024a3SHans Rosenfeld }
1259c3024a3SHans Rosenfeld
126*32640292SAndy Fiddaman vmm->vmm_vcpu = calloc(vmm->vmm_ncpu, sizeof (struct vcpu *));
127*32640292SAndy Fiddaman if (vmm->vmm_vcpu == NULL)
128*32640292SAndy Fiddaman goto fail;
129*32640292SAndy Fiddaman for (i = 0; i < vmm->vmm_ncpu; i++) {
130*32640292SAndy Fiddaman vmm->vmm_vcpu[i] = vm_vcpu_open(vmm->vmm_ctx, i);
131*32640292SAndy Fiddaman if (vmm->vmm_vcpu[i] == NULL) {
132*32640292SAndy Fiddaman _errno = errno;
133*32640292SAndy Fiddaman while (i-- >= 0)
134*32640292SAndy Fiddaman vm_vcpu_close(vmm->vmm_vcpu[i]);
135*32640292SAndy Fiddaman free(vmm->vmm_vcpu);
136*32640292SAndy Fiddaman errno = _errno;
137*32640292SAndy Fiddaman goto fail;
138*32640292SAndy Fiddaman }
139*32640292SAndy Fiddaman }
140*32640292SAndy Fiddaman
1419c3024a3SHans Rosenfeld return (vmm);
142*32640292SAndy Fiddaman
143*32640292SAndy Fiddaman fail:
144*32640292SAndy Fiddaman _errno = errno;
145*32640292SAndy Fiddaman vmm_close_vm(vmm);
146*32640292SAndy Fiddaman errno = _errno;
147*32640292SAndy Fiddaman
148*32640292SAndy Fiddaman return (NULL);
1499c3024a3SHans Rosenfeld }
1509c3024a3SHans Rosenfeld
1519c3024a3SHans Rosenfeld void
vmm_close_vm(vmm_t * vmm)1529c3024a3SHans Rosenfeld vmm_close_vm(vmm_t *vmm)
1539c3024a3SHans Rosenfeld {
154*32640292SAndy Fiddaman uint_t i;
155*32640292SAndy Fiddaman
1569c3024a3SHans Rosenfeld vmm_unmap(vmm);
1579c3024a3SHans Rosenfeld
158*32640292SAndy Fiddaman for (i = 0; i < vmm->vmm_ncpu; i++)
159*32640292SAndy Fiddaman vm_vcpu_close(vmm->vmm_vcpu[i]);
160*32640292SAndy Fiddaman free(vmm->vmm_vcpu);
161*32640292SAndy Fiddaman
1629c3024a3SHans Rosenfeld list_destroy(&vmm->vmm_memlist);
1639c3024a3SHans Rosenfeld
1649c3024a3SHans Rosenfeld if (vmm->vmm_ctx != NULL)
1659c3024a3SHans Rosenfeld vm_close(vmm->vmm_ctx);
1669c3024a3SHans Rosenfeld
1679c3024a3SHans Rosenfeld free(vmm);
1689c3024a3SHans Rosenfeld }
1699c3024a3SHans Rosenfeld
1709c3024a3SHans Rosenfeld static vmm_memseg_t *
vmm_get_memseg(vmm_t * vmm,uintptr_t gpa)1719c3024a3SHans Rosenfeld vmm_get_memseg(vmm_t *vmm, uintptr_t gpa)
1729c3024a3SHans Rosenfeld {
1739c3024a3SHans Rosenfeld vmm_memseg_t ms, *ret;
1749c3024a3SHans Rosenfeld int error, flags;
1759c3024a3SHans Rosenfeld
1769c3024a3SHans Rosenfeld bzero(&ms, sizeof (vmm_memseg_t));
1779c3024a3SHans Rosenfeld ms.vms_gpa = gpa;
1789c3024a3SHans Rosenfeld error = vm_mmap_getnext(vmm->vmm_ctx, &ms.vms_gpa, &ms.vms_segid,
1799c3024a3SHans Rosenfeld &ms.vms_segoff, &ms.vms_maplen, &ms.vms_prot, &flags);
1809c3024a3SHans Rosenfeld if (error)
1819c3024a3SHans Rosenfeld return (NULL);
1829c3024a3SHans Rosenfeld
1839c3024a3SHans Rosenfeld error = vm_get_memseg(vmm->vmm_ctx, ms.vms_segid, &ms.vms_seglen,
1849c3024a3SHans Rosenfeld ms.vms_name, sizeof (ms.vms_name));
1859c3024a3SHans Rosenfeld if (error)
1869c3024a3SHans Rosenfeld return (NULL);
1879c3024a3SHans Rosenfeld
1889c3024a3SHans Rosenfeld /*
1899c3024a3SHans Rosenfeld * Regular memory segments don't have a name, but devmem segments do.
1909c3024a3SHans Rosenfeld * We can use that information to set the DEVMEM flag if necessary.
1919c3024a3SHans Rosenfeld */
1929c3024a3SHans Rosenfeld ms.vms_flags = ms.vms_name[0] != '\0' ? VMM_MEMSEG_DEVMEM : 0;
1939c3024a3SHans Rosenfeld
1949c3024a3SHans Rosenfeld ret = malloc(sizeof (vmm_memseg_t));
1959c3024a3SHans Rosenfeld if (ret == NULL)
1969c3024a3SHans Rosenfeld return (NULL);
1979c3024a3SHans Rosenfeld
1989c3024a3SHans Rosenfeld *ret = ms;
1999c3024a3SHans Rosenfeld
2009c3024a3SHans Rosenfeld return (ret);
2019c3024a3SHans Rosenfeld }
2029c3024a3SHans Rosenfeld
2039c3024a3SHans Rosenfeld int
vmm_map(vmm_t * vmm,boolean_t writable)2049c3024a3SHans Rosenfeld vmm_map(vmm_t *vmm, boolean_t writable)
2059c3024a3SHans Rosenfeld {
2069c3024a3SHans Rosenfeld uintptr_t last_gpa = 0;
2079c3024a3SHans Rosenfeld vmm_memseg_t *ms;
2089c3024a3SHans Rosenfeld int prot_write = writable ? PROT_WRITE : 0;
2099c3024a3SHans Rosenfeld
2109c3024a3SHans Rosenfeld if (vmm->vmm_mem != MAP_FAILED) {
2119c3024a3SHans Rosenfeld errno = EINVAL;
2129c3024a3SHans Rosenfeld return (-1);
2139c3024a3SHans Rosenfeld }
2149c3024a3SHans Rosenfeld
2159c3024a3SHans Rosenfeld assert(list_is_empty(&vmm->vmm_memlist));
2169c3024a3SHans Rosenfeld
2179c3024a3SHans Rosenfeld for (;;) {
2189c3024a3SHans Rosenfeld ms = vmm_get_memseg(vmm, last_gpa);
2199c3024a3SHans Rosenfeld
2209c3024a3SHans Rosenfeld if (ms == NULL)
2219c3024a3SHans Rosenfeld break;
2229c3024a3SHans Rosenfeld
2239c3024a3SHans Rosenfeld last_gpa = ms->vms_gpa + ms->vms_maplen;
2249c3024a3SHans Rosenfeld list_insert_tail(&vmm->vmm_memlist, ms);
2259c3024a3SHans Rosenfeld }
2269c3024a3SHans Rosenfeld
2279c3024a3SHans Rosenfeld vmm->vmm_mem = mmap(NULL, last_gpa, PROT_NONE,
2289c3024a3SHans Rosenfeld MAP_PRIVATE | MAP_ANON | MAP_NORESERVE, -1, 0);
2299c3024a3SHans Rosenfeld
2309c3024a3SHans Rosenfeld if (vmm->vmm_mem == MAP_FAILED)
2319c3024a3SHans Rosenfeld goto fail;
2329c3024a3SHans Rosenfeld
2339c3024a3SHans Rosenfeld for (ms = list_head(&vmm->vmm_memlist);
2349c3024a3SHans Rosenfeld ms != NULL;
2359c3024a3SHans Rosenfeld ms = list_next(&vmm->vmm_memlist, ms)) {
236c3d209caSPatrick Mooney off_t mapoff;
237c3d209caSPatrick Mooney
238c3d209caSPatrick Mooney if ((ms->vms_flags & VMM_MEMSEG_DEVMEM) == 0) {
239c3d209caSPatrick Mooney /*
240c3d209caSPatrick Mooney * sysmem segments will be located at an offset
241c3d209caSPatrick Mooney * equivalent to their GPA.
242c3d209caSPatrick Mooney */
243c3d209caSPatrick Mooney mapoff = ms->vms_gpa;
244c3d209caSPatrick Mooney } else {
245c3d209caSPatrick Mooney /*
246c3d209caSPatrick Mooney * devmem segments are located in a special region away
247c3d209caSPatrick Mooney * from the normal GPA space.
248c3d209caSPatrick Mooney */
249c3d209caSPatrick Mooney if (vm_get_devmem_offset(vmm->vmm_ctx, ms->vms_segid,
250c3d209caSPatrick Mooney &mapoff) != 0) {
251c3d209caSPatrick Mooney goto fail;
252c3d209caSPatrick Mooney }
253c3d209caSPatrick Mooney }
254c3d209caSPatrick Mooney
255c3d209caSPatrick Mooney /*
256c3d209caSPatrick Mooney * While 'mapoff' points to the front of the segment, the actual
257c3d209caSPatrick Mooney * mapping may be at some offset beyond that.
258c3d209caSPatrick Mooney */
259c3d209caSPatrick Mooney VERIFY(ms->vms_segoff >= 0);
260c3d209caSPatrick Mooney mapoff += ms->vms_segoff;
2619c3024a3SHans Rosenfeld
2629c3024a3SHans Rosenfeld vmm->vmm_memsize += ms->vms_maplen;
2639c3024a3SHans Rosenfeld
2649c3024a3SHans Rosenfeld if (mmap(vmm->vmm_mem + ms->vms_gpa, ms->vms_maplen,
2659c3024a3SHans Rosenfeld PROT_READ | prot_write, MAP_SHARED | MAP_FIXED,
2669c3024a3SHans Rosenfeld vm_get_device_fd(vmm->vmm_ctx), mapoff) == MAP_FAILED)
2679c3024a3SHans Rosenfeld goto fail;
2689c3024a3SHans Rosenfeld }
2699c3024a3SHans Rosenfeld
2709c3024a3SHans Rosenfeld return (0);
2719c3024a3SHans Rosenfeld
2729c3024a3SHans Rosenfeld fail:
2739c3024a3SHans Rosenfeld vmm_unmap(vmm);
2749c3024a3SHans Rosenfeld
2759c3024a3SHans Rosenfeld return (-1);
2769c3024a3SHans Rosenfeld }
2779c3024a3SHans Rosenfeld
2789c3024a3SHans Rosenfeld void
vmm_unmap(vmm_t * vmm)2799c3024a3SHans Rosenfeld vmm_unmap(vmm_t *vmm)
2809c3024a3SHans Rosenfeld {
2819c3024a3SHans Rosenfeld while (!list_is_empty(&vmm->vmm_memlist)) {
2829c3024a3SHans Rosenfeld vmm_memseg_t *ms = list_remove_head(&vmm->vmm_memlist);
2839c3024a3SHans Rosenfeld
2849c3024a3SHans Rosenfeld if (vmm->vmm_mem != MAP_FAILED) {
2859c3024a3SHans Rosenfeld (void) munmap(vmm->vmm_mem + ms->vms_gpa,
2869c3024a3SHans Rosenfeld ms->vms_maplen);
2879c3024a3SHans Rosenfeld }
2889c3024a3SHans Rosenfeld
2899c3024a3SHans Rosenfeld free(ms);
2909c3024a3SHans Rosenfeld }
2919c3024a3SHans Rosenfeld
2929c3024a3SHans Rosenfeld if (vmm->vmm_mem != MAP_FAILED)
2939c3024a3SHans Rosenfeld (void) munmap(vmm->vmm_mem, vmm->vmm_memsize);
2949c3024a3SHans Rosenfeld
2959c3024a3SHans Rosenfeld vmm->vmm_mem = MAP_FAILED;
2969c3024a3SHans Rosenfeld vmm->vmm_memsize = 0;
2979c3024a3SHans Rosenfeld }
2989c3024a3SHans Rosenfeld
2999c3024a3SHans Rosenfeld ssize_t
vmm_pread(vmm_t * vmm,void * buf,size_t len,uintptr_t addr)3009c3024a3SHans Rosenfeld vmm_pread(vmm_t *vmm, void *buf, size_t len, uintptr_t addr)
3019c3024a3SHans Rosenfeld {
3029c3024a3SHans Rosenfeld ssize_t count = 0;
3039c3024a3SHans Rosenfeld vmm_memseg_t *ms;
3049c3024a3SHans Rosenfeld ssize_t res = len;
3059c3024a3SHans Rosenfeld
3069c3024a3SHans Rosenfeld for (ms = list_head(&vmm->vmm_memlist);
3079c3024a3SHans Rosenfeld ms != NULL && len != 0;
3089c3024a3SHans Rosenfeld ms = list_next(&vmm->vmm_memlist, ms)) {
3099c3024a3SHans Rosenfeld
3109c3024a3SHans Rosenfeld if (addr >= ms->vms_gpa &&
3119c3024a3SHans Rosenfeld addr < ms->vms_gpa + ms->vms_maplen) {
3129c3024a3SHans Rosenfeld res = (addr + len) - (ms->vms_gpa + ms->vms_maplen);
3139c3024a3SHans Rosenfeld
3149c3024a3SHans Rosenfeld if (res < 0)
3159c3024a3SHans Rosenfeld res = 0;
3169c3024a3SHans Rosenfeld
3179c3024a3SHans Rosenfeld bcopy(vmm->vmm_mem + addr, buf, len - res);
3189c3024a3SHans Rosenfeld count += len - res;
3199c3024a3SHans Rosenfeld addr += len - res;
3209c3024a3SHans Rosenfeld len = res;
3219c3024a3SHans Rosenfeld }
3229c3024a3SHans Rosenfeld }
3239c3024a3SHans Rosenfeld
3249c3024a3SHans Rosenfeld if (res)
3259c3024a3SHans Rosenfeld errno = EFAULT;
3269c3024a3SHans Rosenfeld else
3279c3024a3SHans Rosenfeld errno = 0;
3289c3024a3SHans Rosenfeld
3299c3024a3SHans Rosenfeld return (count);
3309c3024a3SHans Rosenfeld }
3319c3024a3SHans Rosenfeld
3329c3024a3SHans Rosenfeld ssize_t
vmm_pwrite(vmm_t * vmm,const void * buf,size_t len,uintptr_t addr)3339c3024a3SHans Rosenfeld vmm_pwrite(vmm_t *vmm, const void *buf, size_t len, uintptr_t addr)
3349c3024a3SHans Rosenfeld {
3359c3024a3SHans Rosenfeld ssize_t count = 0;
3369c3024a3SHans Rosenfeld vmm_memseg_t *ms;
3379c3024a3SHans Rosenfeld ssize_t res = len;
3389c3024a3SHans Rosenfeld
3399c3024a3SHans Rosenfeld for (ms = list_head(&vmm->vmm_memlist);
3409c3024a3SHans Rosenfeld ms != NULL;
3419c3024a3SHans Rosenfeld ms = list_next(&vmm->vmm_memlist, ms)) {
3429c3024a3SHans Rosenfeld if (addr >= ms->vms_gpa &&
3439c3024a3SHans Rosenfeld addr < ms->vms_gpa + ms->vms_maplen) {
3449c3024a3SHans Rosenfeld res = (addr + len) - (ms->vms_gpa + ms->vms_maplen);
3459c3024a3SHans Rosenfeld
3469c3024a3SHans Rosenfeld if (res < 0)
3479c3024a3SHans Rosenfeld res = 0;
3489c3024a3SHans Rosenfeld
3499c3024a3SHans Rosenfeld bcopy(buf, vmm->vmm_mem + addr, len - res);
3509c3024a3SHans Rosenfeld count += len - res;
3519c3024a3SHans Rosenfeld addr += len - res;
3529c3024a3SHans Rosenfeld len = res;
3539c3024a3SHans Rosenfeld }
3549c3024a3SHans Rosenfeld }
3559c3024a3SHans Rosenfeld
3569c3024a3SHans Rosenfeld if (res)
3579c3024a3SHans Rosenfeld errno = EFAULT;
3589c3024a3SHans Rosenfeld else
3599c3024a3SHans Rosenfeld errno = 0;
3609c3024a3SHans Rosenfeld
3619c3024a3SHans Rosenfeld return (count);
3629c3024a3SHans Rosenfeld }
3639c3024a3SHans Rosenfeld
3649c3024a3SHans Rosenfeld size_t
vmm_ncpu(vmm_t * vmm)3659c3024a3SHans Rosenfeld vmm_ncpu(vmm_t *vmm)
3669c3024a3SHans Rosenfeld {
3679c3024a3SHans Rosenfeld return (vmm->vmm_ncpu);
3689c3024a3SHans Rosenfeld }
3699c3024a3SHans Rosenfeld
3709c3024a3SHans Rosenfeld size_t
vmm_memsize(vmm_t * vmm)3719c3024a3SHans Rosenfeld vmm_memsize(vmm_t *vmm)
3729c3024a3SHans Rosenfeld {
3739c3024a3SHans Rosenfeld return (vmm->vmm_memsize);
3749c3024a3SHans Rosenfeld }
3759c3024a3SHans Rosenfeld
3769c3024a3SHans Rosenfeld int
vmm_cont(vmm_t * vmm)3779c3024a3SHans Rosenfeld vmm_cont(vmm_t *vmm)
3789c3024a3SHans Rosenfeld {
379*32640292SAndy Fiddaman return (vm_resume_all_cpus(vmm->vmm_ctx));
3809c3024a3SHans Rosenfeld }
3819c3024a3SHans Rosenfeld
3829c3024a3SHans Rosenfeld int
vmm_step(vmm_t * vmm,int vcpuid)383*32640292SAndy Fiddaman vmm_step(vmm_t *vmm, int vcpuid)
3849c3024a3SHans Rosenfeld {
3859c3024a3SHans Rosenfeld cpuset_t cpuset;
3869c3024a3SHans Rosenfeld int ret;
3879c3024a3SHans Rosenfeld
388*32640292SAndy Fiddaman if (vcpuid >= vmm->vmm_ncpu) {
3899c3024a3SHans Rosenfeld errno = EINVAL;
3909c3024a3SHans Rosenfeld return (-1);
3919c3024a3SHans Rosenfeld }
3929c3024a3SHans Rosenfeld
393*32640292SAndy Fiddaman ret = vm_set_capability(vmm->vmm_vcpu[vcpuid], VM_CAP_MTRAP_EXIT, 1);
3949c3024a3SHans Rosenfeld if (ret != 0)
3959c3024a3SHans Rosenfeld return (-1);
3969c3024a3SHans Rosenfeld
397*32640292SAndy Fiddaman assert(vm_resume_cpu(vmm->vmm_vcpu[vcpuid]) == 0);
3989c3024a3SHans Rosenfeld
3999c3024a3SHans Rosenfeld do {
4009c3024a3SHans Rosenfeld (void) vm_debug_cpus(vmm->vmm_ctx, &cpuset);
401*32640292SAndy Fiddaman } while (!CPU_ISSET(vcpuid, &cpuset));
4029c3024a3SHans Rosenfeld
403*32640292SAndy Fiddaman (void) vm_set_capability(vmm->vmm_vcpu[vcpuid], VM_CAP_MTRAP_EXIT, 0);
4049c3024a3SHans Rosenfeld
4059c3024a3SHans Rosenfeld return (ret);
4069c3024a3SHans Rosenfeld }
4079c3024a3SHans Rosenfeld
4089c3024a3SHans Rosenfeld int
vmm_stop(vmm_t * vmm)4099c3024a3SHans Rosenfeld vmm_stop(vmm_t *vmm)
4109c3024a3SHans Rosenfeld {
411*32640292SAndy Fiddaman int ret = vm_suspend_all_cpus(vmm->vmm_ctx);
4129c3024a3SHans Rosenfeld
4139c3024a3SHans Rosenfeld if (ret == 0)
4149c3024a3SHans Rosenfeld vmm_update_ncpu(vmm);
4159c3024a3SHans Rosenfeld
4169c3024a3SHans Rosenfeld return (ret);
4179c3024a3SHans Rosenfeld }
4189c3024a3SHans Rosenfeld
4199c3024a3SHans Rosenfeld /*
4209c3024a3SHans Rosenfeld * Mapping of KDI-defined registers to vmmapi-defined registers.
4219c3024a3SHans Rosenfeld * Registers not known to vmmapi use VM_REG_LAST, which is invalid and
4229c3024a3SHans Rosenfeld * causes an error in vm_{get,set}_register_set().
4239c3024a3SHans Rosenfeld *
4249c3024a3SHans Rosenfeld * This array must be kept in sync with the definitions in kdi_regs.h.
4259c3024a3SHans Rosenfeld */
4269c3024a3SHans Rosenfeld static int vmm_kdi_regmap[] = {
4279c3024a3SHans Rosenfeld VM_REG_LAST, /* KDIREG_SAVFP */
4289c3024a3SHans Rosenfeld VM_REG_LAST, /* KDIREG_SAVPC */
4299c3024a3SHans Rosenfeld VM_REG_GUEST_RDI, /* KDIREG_RDI */
4309c3024a3SHans Rosenfeld VM_REG_GUEST_RSI, /* KDIREG_RSI */
4319c3024a3SHans Rosenfeld VM_REG_GUEST_RDX, /* KDIREG_RDX */
4329c3024a3SHans Rosenfeld VM_REG_GUEST_RCX, /* KDIREG_RCX */
4339c3024a3SHans Rosenfeld VM_REG_GUEST_R8, /* KDIREG_R8 */
4349c3024a3SHans Rosenfeld VM_REG_GUEST_R9, /* KDIREG_R9 */
4359c3024a3SHans Rosenfeld VM_REG_GUEST_RAX, /* KDIREG_RAX */
4369c3024a3SHans Rosenfeld VM_REG_GUEST_RBX, /* KDIREG_RBX */
4379c3024a3SHans Rosenfeld VM_REG_GUEST_RBP, /* KDIREG_RBP */
4389c3024a3SHans Rosenfeld VM_REG_GUEST_R10, /* KDIREG_R10 */
4399c3024a3SHans Rosenfeld VM_REG_GUEST_R11, /* KDIREG_R11 */
4409c3024a3SHans Rosenfeld VM_REG_GUEST_R12, /* KDIREG_R12 */
4419c3024a3SHans Rosenfeld VM_REG_GUEST_R13, /* KDIREG_R13 */
4429c3024a3SHans Rosenfeld VM_REG_GUEST_R14, /* KDIREG_R14 */
4439c3024a3SHans Rosenfeld VM_REG_GUEST_R15, /* KDIREG_R15 */
4449c3024a3SHans Rosenfeld VM_REG_LAST, /* KDIREG_FSBASE */
4459c3024a3SHans Rosenfeld VM_REG_LAST, /* KDIREG_GSBASE */
4469c3024a3SHans Rosenfeld VM_REG_LAST, /* KDIREG_KGSBASE */
4479c3024a3SHans Rosenfeld VM_REG_GUEST_CR2, /* KDIREG_CR2 */
4489c3024a3SHans Rosenfeld VM_REG_GUEST_CR3, /* KDIREG_CR3 */
4499c3024a3SHans Rosenfeld VM_REG_GUEST_DS, /* KDIREG_DS */
4509c3024a3SHans Rosenfeld VM_REG_GUEST_ES, /* KDIREG_ES */
4519c3024a3SHans Rosenfeld VM_REG_GUEST_FS, /* KDIREG_FS */
4529c3024a3SHans Rosenfeld VM_REG_GUEST_GS, /* KDIREG_GS */
4539c3024a3SHans Rosenfeld VM_REG_LAST, /* KDIREG_TRAPNO */
4549c3024a3SHans Rosenfeld VM_REG_LAST, /* KDIREG_ERR */
4559c3024a3SHans Rosenfeld VM_REG_GUEST_RIP, /* KDIREG_RIP */
4569c3024a3SHans Rosenfeld VM_REG_GUEST_CS, /* KDIREG_CS */
4579c3024a3SHans Rosenfeld VM_REG_GUEST_RFLAGS, /* KDIREG_RFLAGS */
4589c3024a3SHans Rosenfeld VM_REG_GUEST_RSP, /* KDIREG_RSP */
4599c3024a3SHans Rosenfeld VM_REG_GUEST_SS /* KDIREG_SS */
4609c3024a3SHans Rosenfeld };
4619c3024a3SHans Rosenfeld CTASSERT(ARRAY_SIZE(vmm_kdi_regmap) == KDIREG_NGREG);
4629c3024a3SHans Rosenfeld
4639c3024a3SHans Rosenfeld /*
4649c3024a3SHans Rosenfeld * Mapping of libvmm-defined registers to vmmapi-defined registers.
4659c3024a3SHans Rosenfeld *
4669c3024a3SHans Rosenfeld * This array must be kept in sync with the definitions in libvmm.h
4679c3024a3SHans Rosenfeld */
4689c3024a3SHans Rosenfeld static int vmm_sys_regmap[] = {
4699c3024a3SHans Rosenfeld VM_REG_GUEST_CR0, /* VMM_REG_CR0 */
4709c3024a3SHans Rosenfeld VM_REG_GUEST_CR2, /* VMM_REG_CR2 */
4719c3024a3SHans Rosenfeld VM_REG_GUEST_CR3, /* VMM_REG_CR3 */
4729c3024a3SHans Rosenfeld VM_REG_GUEST_CR4, /* VMM_REG_CR4 */
4739c3024a3SHans Rosenfeld VM_REG_GUEST_DR0, /* VMM_REG_DR0 */
4749c3024a3SHans Rosenfeld VM_REG_GUEST_DR1, /* VMM_REG_DR1 */
4759c3024a3SHans Rosenfeld VM_REG_GUEST_DR2, /* VMM_REG_DR2 */
4769c3024a3SHans Rosenfeld VM_REG_GUEST_DR3, /* VMM_REG_DR3 */
4779c3024a3SHans Rosenfeld VM_REG_GUEST_DR6, /* VMM_REG_DR6 */
4789c3024a3SHans Rosenfeld VM_REG_GUEST_DR7, /* VMM_REG_DR7 */
4799c3024a3SHans Rosenfeld VM_REG_GUEST_EFER, /* VMM_REG_EFER */
4809c3024a3SHans Rosenfeld VM_REG_GUEST_PDPTE0, /* VMM_REG_PDPTE0 */
4819c3024a3SHans Rosenfeld VM_REG_GUEST_PDPTE1, /* VMM_REG_PDPTE1 */
4829c3024a3SHans Rosenfeld VM_REG_GUEST_PDPTE2, /* VMM_REG_PDPTE2 */
4839c3024a3SHans Rosenfeld VM_REG_GUEST_PDPTE3, /* VMM_REG_PDPTE3 */
4849c3024a3SHans Rosenfeld VM_REG_GUEST_INTR_SHADOW, /* VMM_REG_INTR_SHADOW */
4859c3024a3SHans Rosenfeld };
4869c3024a3SHans Rosenfeld
4879c3024a3SHans Rosenfeld /*
4889c3024a3SHans Rosenfeld * Mapping of libvmm-defined descriptors to vmmapi-defined descriptors.
4899c3024a3SHans Rosenfeld *
4909c3024a3SHans Rosenfeld * This array must be kept in sync with the definitions in libvmm.h
4919c3024a3SHans Rosenfeld */
4929c3024a3SHans Rosenfeld static int vmm_descmap[] = {
4939c3024a3SHans Rosenfeld VM_REG_GUEST_GDTR,
4949c3024a3SHans Rosenfeld VM_REG_GUEST_LDTR,
4959c3024a3SHans Rosenfeld VM_REG_GUEST_IDTR,
4969c3024a3SHans Rosenfeld VM_REG_GUEST_TR,
4979c3024a3SHans Rosenfeld VM_REG_GUEST_CS,
4989c3024a3SHans Rosenfeld VM_REG_GUEST_DS,
4999c3024a3SHans Rosenfeld VM_REG_GUEST_ES,
5009c3024a3SHans Rosenfeld VM_REG_GUEST_FS,
5019c3024a3SHans Rosenfeld VM_REG_GUEST_GS,
5029c3024a3SHans Rosenfeld VM_REG_GUEST_SS
5039c3024a3SHans Rosenfeld };
5049c3024a3SHans Rosenfeld
5059c3024a3SHans Rosenfeld static int
vmm_mapreg(int reg)5069c3024a3SHans Rosenfeld vmm_mapreg(int reg)
5079c3024a3SHans Rosenfeld {
5089c3024a3SHans Rosenfeld errno = 0;
5099c3024a3SHans Rosenfeld
5109c3024a3SHans Rosenfeld if (reg < 0)
5119c3024a3SHans Rosenfeld goto fail;
5129c3024a3SHans Rosenfeld
5139c3024a3SHans Rosenfeld if (reg < KDIREG_NGREG)
5149c3024a3SHans Rosenfeld return (vmm_kdi_regmap[reg]);
5159c3024a3SHans Rosenfeld
5169c3024a3SHans Rosenfeld if (reg >= VMM_REG_OFFSET &&
5179c3024a3SHans Rosenfeld reg < VMM_REG_OFFSET + ARRAY_SIZE(vmm_sys_regmap))
5189c3024a3SHans Rosenfeld return (vmm_sys_regmap[reg - VMM_REG_OFFSET]);
5199c3024a3SHans Rosenfeld
5209c3024a3SHans Rosenfeld fail:
5219c3024a3SHans Rosenfeld errno = EINVAL;
5229c3024a3SHans Rosenfeld return (VM_REG_LAST);
5239c3024a3SHans Rosenfeld }
5249c3024a3SHans Rosenfeld
5259c3024a3SHans Rosenfeld static int
vmm_mapdesc(int desc)5269c3024a3SHans Rosenfeld vmm_mapdesc(int desc)
5279c3024a3SHans Rosenfeld {
5289c3024a3SHans Rosenfeld errno = 0;
5299c3024a3SHans Rosenfeld
5309c3024a3SHans Rosenfeld if (desc >= VMM_DESC_OFFSET &&
5319c3024a3SHans Rosenfeld desc < VMM_DESC_OFFSET + ARRAY_SIZE(vmm_descmap))
5329c3024a3SHans Rosenfeld return (vmm_descmap[desc - VMM_DESC_OFFSET]);
5339c3024a3SHans Rosenfeld
5349c3024a3SHans Rosenfeld errno = EINVAL;
5359c3024a3SHans Rosenfeld return (VM_REG_LAST);
5369c3024a3SHans Rosenfeld }
5379c3024a3SHans Rosenfeld
5389c3024a3SHans Rosenfeld int
vmm_getreg(vmm_t * vmm,int vcpuid,int reg,uint64_t * val)539*32640292SAndy Fiddaman vmm_getreg(vmm_t *vmm, int vcpuid, int reg, uint64_t *val)
5409c3024a3SHans Rosenfeld {
5419c3024a3SHans Rosenfeld reg = vmm_mapreg(reg);
5429c3024a3SHans Rosenfeld
5439c3024a3SHans Rosenfeld if (reg == VM_REG_LAST)
5449c3024a3SHans Rosenfeld return (-1);
5459c3024a3SHans Rosenfeld
546*32640292SAndy Fiddaman return (vm_get_register(vmm->vmm_vcpu[vcpuid], reg, val));
5479c3024a3SHans Rosenfeld }
5489c3024a3SHans Rosenfeld
5499c3024a3SHans Rosenfeld int
vmm_setreg(vmm_t * vmm,int vcpuid,int reg,uint64_t val)550*32640292SAndy Fiddaman vmm_setreg(vmm_t *vmm, int vcpuid, int reg, uint64_t val)
5519c3024a3SHans Rosenfeld {
5529c3024a3SHans Rosenfeld reg = vmm_mapreg(reg);
5539c3024a3SHans Rosenfeld
5549c3024a3SHans Rosenfeld if (reg == VM_REG_LAST)
5559c3024a3SHans Rosenfeld return (-1);
5569c3024a3SHans Rosenfeld
557*32640292SAndy Fiddaman return (vm_set_register(vmm->vmm_vcpu[vcpuid], reg, val));
5589c3024a3SHans Rosenfeld }
5599c3024a3SHans Rosenfeld
5609c3024a3SHans Rosenfeld int
vmm_get_regset(vmm_t * vmm,int vcpuid,size_t nregs,const int * regnums,uint64_t * regvals)561*32640292SAndy Fiddaman vmm_get_regset(vmm_t *vmm, int vcpuid, size_t nregs, const int *regnums,
5629c3024a3SHans Rosenfeld uint64_t *regvals)
5639c3024a3SHans Rosenfeld {
5649c3024a3SHans Rosenfeld int *vm_regnums;
5659c3024a3SHans Rosenfeld int i;
5669c3024a3SHans Rosenfeld int ret = -1;
5679c3024a3SHans Rosenfeld
5689c3024a3SHans Rosenfeld vm_regnums = malloc(sizeof (int) * nregs);
5699c3024a3SHans Rosenfeld if (vm_regnums == NULL)
5709c3024a3SHans Rosenfeld return (ret);
5719c3024a3SHans Rosenfeld
5729c3024a3SHans Rosenfeld for (i = 0; i != nregs; i++) {
5739c3024a3SHans Rosenfeld vm_regnums[i] = vmm_mapreg(regnums[i]);
5749c3024a3SHans Rosenfeld if (vm_regnums[i] == VM_REG_LAST)
5759c3024a3SHans Rosenfeld goto fail;
5769c3024a3SHans Rosenfeld }
5779c3024a3SHans Rosenfeld
578*32640292SAndy Fiddaman ret = vm_get_register_set(vmm->vmm_vcpu[vcpuid], nregs, vm_regnums,
5799c3024a3SHans Rosenfeld regvals);
5809c3024a3SHans Rosenfeld
5819c3024a3SHans Rosenfeld fail:
5829c3024a3SHans Rosenfeld free(vm_regnums);
5839c3024a3SHans Rosenfeld return (ret);
5849c3024a3SHans Rosenfeld }
5859c3024a3SHans Rosenfeld
5869c3024a3SHans Rosenfeld int
vmm_set_regset(vmm_t * vmm,int vcpuid,size_t nregs,const int * regnums,uint64_t * regvals)587*32640292SAndy Fiddaman vmm_set_regset(vmm_t *vmm, int vcpuid, size_t nregs, const int *regnums,
5889c3024a3SHans Rosenfeld uint64_t *regvals)
5899c3024a3SHans Rosenfeld {
5909c3024a3SHans Rosenfeld int *vm_regnums;
5919c3024a3SHans Rosenfeld int i;
5929c3024a3SHans Rosenfeld int ret = -1;
5939c3024a3SHans Rosenfeld
5949c3024a3SHans Rosenfeld vm_regnums = malloc(sizeof (int) * nregs);
5959c3024a3SHans Rosenfeld if (vm_regnums == NULL)
5969c3024a3SHans Rosenfeld return (ret);
5979c3024a3SHans Rosenfeld
5989c3024a3SHans Rosenfeld for (i = 0; i != nregs; i++) {
5999c3024a3SHans Rosenfeld vm_regnums[i] = vmm_mapreg(regnums[i]);
6009c3024a3SHans Rosenfeld if (vm_regnums[i] == VM_REG_LAST)
6019c3024a3SHans Rosenfeld goto fail;
6029c3024a3SHans Rosenfeld }
6039c3024a3SHans Rosenfeld
604*32640292SAndy Fiddaman ret = vm_set_register_set(vmm->vmm_vcpu[vcpuid], nregs, vm_regnums,
6059c3024a3SHans Rosenfeld regvals);
6069c3024a3SHans Rosenfeld
6079c3024a3SHans Rosenfeld fail:
6089c3024a3SHans Rosenfeld free(vm_regnums);
6099c3024a3SHans Rosenfeld return (ret);
6109c3024a3SHans Rosenfeld }
6119c3024a3SHans Rosenfeld
6129c3024a3SHans Rosenfeld int
vmm_get_desc(vmm_t * vmm,int vcpuid,int desc,vmm_desc_t * vd)613*32640292SAndy Fiddaman vmm_get_desc(vmm_t *vmm, int vcpuid, int desc, vmm_desc_t *vd)
6149c3024a3SHans Rosenfeld {
6159c3024a3SHans Rosenfeld desc = vmm_mapdesc(desc);
6169c3024a3SHans Rosenfeld if (desc == VM_REG_LAST)
6179c3024a3SHans Rosenfeld return (-1);
6189c3024a3SHans Rosenfeld
619*32640292SAndy Fiddaman return (vm_get_desc(vmm->vmm_vcpu[vcpuid], desc, &vd->vd_base,
620*32640292SAndy Fiddaman &vd->vd_lim,
6219c3024a3SHans Rosenfeld &vd->vd_acc));
6229c3024a3SHans Rosenfeld }
6239c3024a3SHans Rosenfeld
6249c3024a3SHans Rosenfeld int
vmm_set_desc(vmm_t * vmm,int vcpuid,int desc,vmm_desc_t * vd)625*32640292SAndy Fiddaman vmm_set_desc(vmm_t *vmm, int vcpuid, int desc, vmm_desc_t *vd)
6269c3024a3SHans Rosenfeld {
6279c3024a3SHans Rosenfeld desc = vmm_mapdesc(desc);
6289c3024a3SHans Rosenfeld if (desc == VM_REG_LAST)
6299c3024a3SHans Rosenfeld return (-1);
6309c3024a3SHans Rosenfeld
631*32640292SAndy Fiddaman return (vm_set_desc(vmm->vmm_vcpu[vcpuid], desc, vd->vd_base,
632*32640292SAndy Fiddaman vd->vd_lim, vd->vd_acc));
6339c3024a3SHans Rosenfeld }
6349c3024a3SHans Rosenfeld
6359c3024a3SHans Rosenfeld /*
6369c3024a3SHans Rosenfeld * Structure to hold MMU state during address translation.
6379c3024a3SHans Rosenfeld * The contents of vmm_mmu_regnum[] must be kept in sync with this.
6389c3024a3SHans Rosenfeld */
6399c3024a3SHans Rosenfeld typedef struct vmm_mmu {
6409c3024a3SHans Rosenfeld uint64_t vm_cr0;
6419c3024a3SHans Rosenfeld uint64_t vm_cr3;
6429c3024a3SHans Rosenfeld uint64_t vm_cr4;
6439c3024a3SHans Rosenfeld uint64_t vm_efer;
6449c3024a3SHans Rosenfeld } vmm_mmu_t;
6459c3024a3SHans Rosenfeld
6469c3024a3SHans Rosenfeld static const int vmm_mmu_regnum[] = {
6479c3024a3SHans Rosenfeld VMM_REG_CR0,
6489c3024a3SHans Rosenfeld VMM_REG_CR3,
6499c3024a3SHans Rosenfeld VMM_REG_CR4,
6509c3024a3SHans Rosenfeld VMM_REG_EFER
6519c3024a3SHans Rosenfeld };
6529c3024a3SHans Rosenfeld
6539c3024a3SHans Rosenfeld #define X86_PTE_P 0x001ULL
6549c3024a3SHans Rosenfeld #define X86_PTE_PS 0x080ULL
6559c3024a3SHans Rosenfeld
6569c3024a3SHans Rosenfeld #define X86_PTE_PHYSMASK 0x000ffffffffff000ULL
6579c3024a3SHans Rosenfeld #define X86_PAGE_SHIFT 12
6589c3024a3SHans Rosenfeld #define X86_PAGE_SIZE (1ULL << X86_PAGE_SHIFT)
6599c3024a3SHans Rosenfeld
6609c3024a3SHans Rosenfeld #define X86_SEG_CODE_DATA (1ULL << 4)
6619c3024a3SHans Rosenfeld #define X86_SEG_PRESENT (1ULL << 7)
6629c3024a3SHans Rosenfeld #define X86_SEG_LONG (1ULL << 13)
6639c3024a3SHans Rosenfeld #define X86_SEG_BIG (1ULL << 14)
6649c3024a3SHans Rosenfeld #define X86_SEG_GRANULARITY (1ULL << 15)
6659c3024a3SHans Rosenfeld #define X86_SEG_UNUSABLE (1ULL << 16)
6669c3024a3SHans Rosenfeld
6679c3024a3SHans Rosenfeld #define X86_SEG_USABLE (X86_SEG_PRESENT | X86_SEG_CODE_DATA)
6689c3024a3SHans Rosenfeld #define X86_SEG_USABLE_MASK (X86_SEG_UNUSABLE | X86_SEG_USABLE)
6699c3024a3SHans Rosenfeld
6709c3024a3SHans Rosenfeld /*
6719c3024a3SHans Rosenfeld * vmm_pte2paddr:
6729c3024a3SHans Rosenfeld *
6739c3024a3SHans Rosenfeld * Recursively calculate the physical address from a virtual address,
6749c3024a3SHans Rosenfeld * starting at the given PTE level using the given PTE.
6759c3024a3SHans Rosenfeld */
6769c3024a3SHans Rosenfeld static int
vmm_pte2paddr(vmm_t * vmm,uint64_t pte,boolean_t ia32,int level,uint64_t vaddr,uint64_t * paddr)6779c3024a3SHans Rosenfeld vmm_pte2paddr(vmm_t *vmm, uint64_t pte, boolean_t ia32, int level,
6789c3024a3SHans Rosenfeld uint64_t vaddr, uint64_t *paddr)
6799c3024a3SHans Rosenfeld {
6809c3024a3SHans Rosenfeld int pte_size = ia32 ? sizeof (uint32_t) : sizeof (uint64_t);
6819c3024a3SHans Rosenfeld int off_bits = ia32 ? 10 : 9;
6829c3024a3SHans Rosenfeld boolean_t hugepage = B_FALSE;
6839c3024a3SHans Rosenfeld uint64_t offset;
6849c3024a3SHans Rosenfeld uint64_t off_mask, off_shift;
6859c3024a3SHans Rosenfeld
6869c3024a3SHans Rosenfeld if (level < 4 && (pte & X86_PTE_P) == 0) {
6879c3024a3SHans Rosenfeld errno = EFAULT;
6889c3024a3SHans Rosenfeld return (-1);
6899c3024a3SHans Rosenfeld }
6909c3024a3SHans Rosenfeld
6919c3024a3SHans Rosenfeld off_shift = X86_PAGE_SHIFT + off_bits * level;
6929c3024a3SHans Rosenfeld off_mask = (1ULL << off_shift) - 1;
6939c3024a3SHans Rosenfeld
6949c3024a3SHans Rosenfeld offset = vaddr & off_mask;
6959c3024a3SHans Rosenfeld
6969c3024a3SHans Rosenfeld if ((level == 1 || level == 2) && (pte & X86_PTE_PS) != 0) {
6979c3024a3SHans Rosenfeld hugepage = B_TRUE;
6989c3024a3SHans Rosenfeld } else {
6999c3024a3SHans Rosenfeld if (level > 0) {
7009c3024a3SHans Rosenfeld offset >>= off_shift - off_bits;
7019c3024a3SHans Rosenfeld offset <<= X86_PAGE_SHIFT - off_bits;
7029c3024a3SHans Rosenfeld }
7039c3024a3SHans Rosenfeld off_mask = 0xfff;
7049c3024a3SHans Rosenfeld }
7059c3024a3SHans Rosenfeld
7069c3024a3SHans Rosenfeld *paddr = (pte & X86_PTE_PHYSMASK & ~off_mask) + offset;
7079c3024a3SHans Rosenfeld
7089c3024a3SHans Rosenfeld if (level == 0 || hugepage)
7099c3024a3SHans Rosenfeld return (0);
7109c3024a3SHans Rosenfeld
7119c3024a3SHans Rosenfeld pte = 0;
7129c3024a3SHans Rosenfeld if (vmm_pread(vmm, &pte, pte_size, *paddr) != pte_size)
7139c3024a3SHans Rosenfeld return (-1);
7149c3024a3SHans Rosenfeld return (vmm_pte2paddr(vmm, pte, ia32, level - 1, vaddr, paddr));
7159c3024a3SHans Rosenfeld }
7169c3024a3SHans Rosenfeld
7179c3024a3SHans Rosenfeld static vmm_mode_t
vmm_vcpu_mmu_mode(vmm_t * vmm,int vcpuid __unused,vmm_mmu_t * mmu)718*32640292SAndy Fiddaman vmm_vcpu_mmu_mode(vmm_t *vmm, int vcpuid __unused, vmm_mmu_t *mmu)
7199c3024a3SHans Rosenfeld {
7209c3024a3SHans Rosenfeld if ((mmu->vm_cr0 & CR0_PE) == 0)
7219c3024a3SHans Rosenfeld return (VMM_MODE_REAL);
7229c3024a3SHans Rosenfeld else if ((mmu->vm_cr4 & CR4_PAE) == 0)
7239c3024a3SHans Rosenfeld return (VMM_MODE_PROT);
7249c3024a3SHans Rosenfeld else if ((mmu->vm_efer & AMD_EFER_LME) == 0)
7259c3024a3SHans Rosenfeld return (VMM_MODE_PAE);
7269c3024a3SHans Rosenfeld else
7279c3024a3SHans Rosenfeld return (VMM_MODE_LONG);
7289c3024a3SHans Rosenfeld }
7299c3024a3SHans Rosenfeld
7309c3024a3SHans Rosenfeld vmm_mode_t
vmm_vcpu_mode(vmm_t * vmm,int vcpuid)731*32640292SAndy Fiddaman vmm_vcpu_mode(vmm_t *vmm, int vcpuid)
7329c3024a3SHans Rosenfeld {
7339c3024a3SHans Rosenfeld vmm_mmu_t mmu = { 0 };
7349c3024a3SHans Rosenfeld
735*32640292SAndy Fiddaman if (vmm_get_regset(vmm, vcpuid, ARRAY_SIZE(vmm_mmu_regnum),
7369c3024a3SHans Rosenfeld vmm_mmu_regnum, (uint64_t *)&mmu) != 0)
7379c3024a3SHans Rosenfeld return (VMM_MODE_UNKNOWN);
7389c3024a3SHans Rosenfeld
739*32640292SAndy Fiddaman return (vmm_vcpu_mmu_mode(vmm, vcpuid, &mmu));
7409c3024a3SHans Rosenfeld }
7419c3024a3SHans Rosenfeld
7429c3024a3SHans Rosenfeld vmm_isa_t
vmm_vcpu_isa(vmm_t * vmm,int vcpuid)743*32640292SAndy Fiddaman vmm_vcpu_isa(vmm_t *vmm, int vcpuid)
7449c3024a3SHans Rosenfeld {
7459c3024a3SHans Rosenfeld vmm_desc_t cs;
7469c3024a3SHans Rosenfeld
747*32640292SAndy Fiddaman if (vmm_get_desc(vmm, vcpuid, VMM_DESC_CS, &cs) != 0)
7489c3024a3SHans Rosenfeld return (VMM_ISA_UNKNOWN);
7499c3024a3SHans Rosenfeld
7509c3024a3SHans Rosenfeld switch (cs.vd_acc & (X86_SEG_BIG | X86_SEG_LONG)) {
7519c3024a3SHans Rosenfeld case 0x0: /* 16b code segment */
7529c3024a3SHans Rosenfeld return (VMM_ISA_16);
7539c3024a3SHans Rosenfeld case X86_SEG_LONG: /* 64b code segment */
7549c3024a3SHans Rosenfeld return (VMM_ISA_64);
7559c3024a3SHans Rosenfeld case X86_SEG_BIG: /* 32b code segment */
7569c3024a3SHans Rosenfeld return (VMM_ISA_32);
7579c3024a3SHans Rosenfeld }
7589c3024a3SHans Rosenfeld
7599c3024a3SHans Rosenfeld return (VMM_ISA_UNKNOWN);
7609c3024a3SHans Rosenfeld }
7619c3024a3SHans Rosenfeld
7629c3024a3SHans Rosenfeld /*
7639c3024a3SHans Rosenfeld * vmm_vtol:
7649c3024a3SHans Rosenfeld *
7659c3024a3SHans Rosenfeld * Translate a virtual address to a physical address on a certain vCPU,
7669c3024a3SHans Rosenfeld * using the specified segment register or descriptor according to the mode.
7679c3024a3SHans Rosenfeld *
7689c3024a3SHans Rosenfeld */
7699c3024a3SHans Rosenfeld int
vmm_vtol(vmm_t * vmm,int vcpuid,int seg,uint64_t vaddr,uint64_t * laddr)770*32640292SAndy Fiddaman vmm_vtol(vmm_t *vmm, int vcpuid, int seg, uint64_t vaddr, uint64_t *laddr)
7719c3024a3SHans Rosenfeld {
7729c3024a3SHans Rosenfeld vmm_desc_t desc;
7739c3024a3SHans Rosenfeld uint64_t limit;
7749c3024a3SHans Rosenfeld
775*32640292SAndy Fiddaman if (vmm_get_desc(vmm, vcpuid, seg, &desc) != 0)
7769c3024a3SHans Rosenfeld return (-1);
7779c3024a3SHans Rosenfeld
778*32640292SAndy Fiddaman switch (vmm_vcpu_mode(vmm, vcpuid)) {
7799c3024a3SHans Rosenfeld case VMM_MODE_REAL:
7809c3024a3SHans Rosenfeld if (seg == VMM_DESC_FS || seg == VMM_DESC_GS)
7819c3024a3SHans Rosenfeld goto fault;
7829c3024a3SHans Rosenfeld /* FALLTHRU */
7839c3024a3SHans Rosenfeld case VMM_MODE_PROT:
7849c3024a3SHans Rosenfeld case VMM_MODE_PAE:
7859c3024a3SHans Rosenfeld if ((desc.vd_acc & X86_SEG_USABLE_MASK) != X86_SEG_USABLE)
7869c3024a3SHans Rosenfeld /* unusable, system segment, or not present */
7879c3024a3SHans Rosenfeld goto fault;
7889c3024a3SHans Rosenfeld
7899c3024a3SHans Rosenfeld limit = desc.vd_lim;
7909c3024a3SHans Rosenfeld if (desc.vd_acc & X86_SEG_GRANULARITY)
7919c3024a3SHans Rosenfeld limit *= 4096;
7929c3024a3SHans Rosenfeld
7939c3024a3SHans Rosenfeld if (vaddr > limit)
7949c3024a3SHans Rosenfeld goto fault;
7959c3024a3SHans Rosenfeld /* FALLTHRU */
7969c3024a3SHans Rosenfeld case VMM_MODE_LONG:
7979c3024a3SHans Rosenfeld *laddr = desc.vd_base + vaddr;
7989c3024a3SHans Rosenfeld return (0);
7999c3024a3SHans Rosenfeld
8009c3024a3SHans Rosenfeld default:
8019c3024a3SHans Rosenfeld fault:
8029c3024a3SHans Rosenfeld errno = EFAULT;
8039c3024a3SHans Rosenfeld return (-1);
8049c3024a3SHans Rosenfeld }
8059c3024a3SHans Rosenfeld
8069c3024a3SHans Rosenfeld }
8079c3024a3SHans Rosenfeld
8089c3024a3SHans Rosenfeld /*
8099c3024a3SHans Rosenfeld * vmm_vtop:
8109c3024a3SHans Rosenfeld *
8119c3024a3SHans Rosenfeld * Translate a virtual address to a guest physical address on a certain vCPU,
8129c3024a3SHans Rosenfeld * according to the mode the vCPU is in.
8139c3024a3SHans Rosenfeld */
8149c3024a3SHans Rosenfeld int
vmm_vtop(vmm_t * vmm,int vcpuid,int seg,uint64_t vaddr,uint64_t * paddr)815*32640292SAndy Fiddaman vmm_vtop(vmm_t *vmm, int vcpuid, int seg, uint64_t vaddr, uint64_t *paddr)
8169c3024a3SHans Rosenfeld {
8179c3024a3SHans Rosenfeld vmm_mmu_t mmu = { 0 };
8189c3024a3SHans Rosenfeld int ret = 0;
8199c3024a3SHans Rosenfeld
820*32640292SAndy Fiddaman if (vmm_vtol(vmm, vcpuid, seg, vaddr, &vaddr) != 0)
8219c3024a3SHans Rosenfeld return (-1);
8229c3024a3SHans Rosenfeld
823*32640292SAndy Fiddaman if (vmm_get_regset(vmm, vcpuid, ARRAY_SIZE(vmm_mmu_regnum),
8249c3024a3SHans Rosenfeld vmm_mmu_regnum, (uint64_t *)&mmu) != 0)
8259c3024a3SHans Rosenfeld return (-1);
8269c3024a3SHans Rosenfeld
8279c3024a3SHans Rosenfeld if ((mmu.vm_cr0 & CR0_PG) == 0) {
8289c3024a3SHans Rosenfeld /* no paging, physical equals virtual */
8299c3024a3SHans Rosenfeld *paddr = vaddr;
8309c3024a3SHans Rosenfeld return (0);
8319c3024a3SHans Rosenfeld }
8329c3024a3SHans Rosenfeld
833*32640292SAndy Fiddaman switch (vmm_vcpu_mmu_mode(vmm, vcpuid, &mmu)) {
8349c3024a3SHans Rosenfeld case VMM_MODE_PROT:
8359c3024a3SHans Rosenfeld /* protected mode, no PAE: 2-level paging, 32bit PTEs */
8369c3024a3SHans Rosenfeld ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_TRUE, 2, vaddr, paddr);
8379c3024a3SHans Rosenfeld break;
8389c3024a3SHans Rosenfeld case VMM_MODE_PAE:
8399c3024a3SHans Rosenfeld /* protected mode with PAE: 3-level paging, 64bit PTEs */
8409c3024a3SHans Rosenfeld ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 3, vaddr, paddr);
8419c3024a3SHans Rosenfeld break;
8429c3024a3SHans Rosenfeld case VMM_MODE_LONG:
8439c3024a3SHans Rosenfeld /* long mode: 4-level paging, 64bit PTEs */
8449c3024a3SHans Rosenfeld ret = vmm_pte2paddr(vmm, mmu.vm_cr3, B_FALSE, 4, vaddr, paddr);
8459c3024a3SHans Rosenfeld break;
8469c3024a3SHans Rosenfeld default:
8479c3024a3SHans Rosenfeld ret = -1;
8489c3024a3SHans Rosenfeld }
8499c3024a3SHans Rosenfeld
8509c3024a3SHans Rosenfeld return (ret);
8519c3024a3SHans Rosenfeld }
8529c3024a3SHans Rosenfeld
8539c3024a3SHans Rosenfeld ssize_t
vmm_vread(vmm_t * vmm,int vcpuid,int seg,void * buf,size_t len,uintptr_t addr)854*32640292SAndy Fiddaman vmm_vread(vmm_t *vmm, int vcpuid, int seg, void *buf, size_t len, uintptr_t
855*32640292SAndy Fiddaman addr)
8569c3024a3SHans Rosenfeld {
8579c3024a3SHans Rosenfeld ssize_t res = 0;
8589c3024a3SHans Rosenfeld uint64_t paddr;
8599c3024a3SHans Rosenfeld size_t plen;
8609c3024a3SHans Rosenfeld uint64_t boundary;
8619c3024a3SHans Rosenfeld
8629c3024a3SHans Rosenfeld while (len != 0) {
863*32640292SAndy Fiddaman if (vmm_vtop(vmm, vcpuid, seg, addr, &paddr) != 0) {
8649c3024a3SHans Rosenfeld errno = EFAULT;
8659c3024a3SHans Rosenfeld return (0);
8669c3024a3SHans Rosenfeld }
8679c3024a3SHans Rosenfeld
8689c3024a3SHans Rosenfeld boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1);
8699c3024a3SHans Rosenfeld if (addr + len > boundary)
8709c3024a3SHans Rosenfeld plen = boundary - addr;
8719c3024a3SHans Rosenfeld else
8729c3024a3SHans Rosenfeld plen = len;
8739c3024a3SHans Rosenfeld
8749c3024a3SHans Rosenfeld if (vmm_pread(vmm, buf, plen, paddr) != plen)
8759c3024a3SHans Rosenfeld return (0);
8769c3024a3SHans Rosenfeld len -= plen;
8779c3024a3SHans Rosenfeld addr += plen;
8789c3024a3SHans Rosenfeld buf += plen;
8799c3024a3SHans Rosenfeld res += plen;
8809c3024a3SHans Rosenfeld }
8819c3024a3SHans Rosenfeld
8829c3024a3SHans Rosenfeld return (res);
8839c3024a3SHans Rosenfeld }
8849c3024a3SHans Rosenfeld
8859c3024a3SHans Rosenfeld ssize_t
vmm_vwrite(vmm_t * vmm,int vcpuid,int seg,const void * buf,size_t len,uintptr_t addr)886*32640292SAndy Fiddaman vmm_vwrite(vmm_t *vmm, int vcpuid, int seg, const void *buf, size_t len,
8879c3024a3SHans Rosenfeld uintptr_t addr)
8889c3024a3SHans Rosenfeld {
8899c3024a3SHans Rosenfeld ssize_t res = 0;
8909c3024a3SHans Rosenfeld uint64_t paddr;
8919c3024a3SHans Rosenfeld size_t plen;
8929c3024a3SHans Rosenfeld uint64_t boundary;
8939c3024a3SHans Rosenfeld
8949c3024a3SHans Rosenfeld while (len != 0) {
895*32640292SAndy Fiddaman if (vmm_vtop(vmm, vcpuid, seg, addr, &paddr) != 0) {
8969c3024a3SHans Rosenfeld errno = EFAULT;
8979c3024a3SHans Rosenfeld return (0);
8989c3024a3SHans Rosenfeld }
8999c3024a3SHans Rosenfeld
9009c3024a3SHans Rosenfeld boundary = (addr + X86_PAGE_SIZE) & ~(X86_PAGE_SIZE - 1);
9019c3024a3SHans Rosenfeld if (addr + len > boundary)
9029c3024a3SHans Rosenfeld plen = boundary - addr;
9039c3024a3SHans Rosenfeld else
9049c3024a3SHans Rosenfeld plen = len;
9059c3024a3SHans Rosenfeld
9069c3024a3SHans Rosenfeld if (vmm_pwrite(vmm, buf, plen, paddr) != plen)
9079c3024a3SHans Rosenfeld return (0);
9089c3024a3SHans Rosenfeld len -= plen;
9099c3024a3SHans Rosenfeld addr += plen;
9109c3024a3SHans Rosenfeld buf += plen;
9119c3024a3SHans Rosenfeld res += plen;
9129c3024a3SHans Rosenfeld }
9139c3024a3SHans Rosenfeld
9149c3024a3SHans Rosenfeld return (res);
9159c3024a3SHans Rosenfeld }
916