10153d828SPatrick Mooney /*
20153d828SPatrick Mooney * This file and its contents are supplied under the terms of the
30153d828SPatrick Mooney * Common Development and Distribution License ("CDDL"), version 1.0.
40153d828SPatrick Mooney * You may only use this file in accordance with the terms of version
50153d828SPatrick Mooney * 1.0 of the CDDL.
60153d828SPatrick Mooney *
70153d828SPatrick Mooney * A full copy of the text of the CDDL should have accompanied this
80153d828SPatrick Mooney * source. A copy of the CDDL is also available via the Internet at
90153d828SPatrick Mooney * http://www.illumos.org/license/CDDL.
100153d828SPatrick Mooney */
110153d828SPatrick Mooney /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
120153d828SPatrick Mooney
130153d828SPatrick Mooney /*
140153d828SPatrick Mooney * Copyright 2019 Joyent, Inc.
153a0fa64cSPatrick Mooney * Copyright 2023 Oxide Computer Company
160153d828SPatrick Mooney * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
170153d828SPatrick Mooney */
180153d828SPatrick Mooney
190153d828SPatrick Mooney #include <sys/param.h>
200153d828SPatrick Mooney #include <sys/kmem.h>
210153d828SPatrick Mooney #include <sys/thread.h>
220153d828SPatrick Mooney #include <sys/list.h>
230153d828SPatrick Mooney #include <sys/mman.h>
240153d828SPatrick Mooney #include <sys/types.h>
250153d828SPatrick Mooney #include <sys/ddi.h>
260153d828SPatrick Mooney #include <sys/sysmacros.h>
270153d828SPatrick Mooney #include <sys/machsystm.h>
280153d828SPatrick Mooney #include <sys/vmsystm.h>
290153d828SPatrick Mooney #include <sys/x86_archext.h>
300153d828SPatrick Mooney #include <vm/as.h>
310153d828SPatrick Mooney #include <vm/hat_i86.h>
320153d828SPatrick Mooney #include <vm/seg_vn.h>
330153d828SPatrick Mooney #include <vm/seg_kmem.h>
340153d828SPatrick Mooney
350153d828SPatrick Mooney #include <sys/vmm_vm.h>
360153d828SPatrick Mooney #include <sys/seg_vmm.h>
370153d828SPatrick Mooney #include <sys/vmm_kernel.h>
380153d828SPatrick Mooney #include <sys/vmm_reservoir.h>
390153d828SPatrick Mooney #include <sys/vmm_gpt.h>
400153d828SPatrick Mooney
410153d828SPatrick Mooney
420153d828SPatrick Mooney /*
430153d828SPatrick Mooney * VMM Virtual Memory
440153d828SPatrick Mooney *
450153d828SPatrick Mooney * History
460153d828SPatrick Mooney *
470153d828SPatrick Mooney * When bhyve was ported to illumos, one significant hole was handling guest
480153d828SPatrick Mooney * memory and memory accesses. In the original Pluribus port, bhyve itself
490153d828SPatrick Mooney * manually handled the EPT structures for guest memory. The updated sources
500153d828SPatrick Mooney * (from FreeBSD 11) took a different approach, using the native FreeBSD VM
510153d828SPatrick Mooney * system for memory allocations and management of the EPT structures. Keeping
520153d828SPatrick Mooney * source differences to a minimum was a priority, so illumos-bhyve implemented
530153d828SPatrick Mooney * a makeshift "VM shim" which exposed the bare minimum of those interfaces to
540153d828SPatrick Mooney * boot and run guests.
550153d828SPatrick Mooney *
560153d828SPatrick Mooney * While the VM shim was successful in getting illumos-bhyve to a functional
570153d828SPatrick Mooney * state on Intel (and later AMD) gear, the FreeBSD-specific nature of the
580153d828SPatrick Mooney * compatibility interfaces made it awkward to use. As source differences with
590153d828SPatrick Mooney * the upstream kernel code became less of a concern, and upcoming features
600153d828SPatrick Mooney * (such as live migration) would demand more of those VM interfaces, it became
610153d828SPatrick Mooney * clear that an overhaul was prudent.
620153d828SPatrick Mooney *
630153d828SPatrick Mooney * Design
640153d828SPatrick Mooney *
650153d828SPatrick Mooney * The new VM system for bhyve retains a number of the same concepts as what it
660153d828SPatrick Mooney * replaces:
670153d828SPatrick Mooney *
680153d828SPatrick Mooney * - `vmspace_t` is the top-level entity for a guest memory space
690153d828SPatrick Mooney * - `vm_object_t` represents a memory object which can be mapped into a vmspace
700153d828SPatrick Mooney * - `vm_page_t` represents a page hold within a given vmspace, providing access
710153d828SPatrick Mooney * to the underlying memory page
720153d828SPatrick Mooney *
730153d828SPatrick Mooney * Unlike the old code, where most of the involved structures were exposed via
740153d828SPatrick Mooney * public definitions, this replacement VM interface keeps all involved
750153d828SPatrick Mooney * structures opaque to consumers. Furthermore, there is a clear delineation
760153d828SPatrick Mooney * between infrequent administrative operations (such as mapping/unmapping
770153d828SPatrick Mooney * regions) and common data-path operations (attempting a page hold at a given
780153d828SPatrick Mooney * guest-physical address). Those administrative operations are performed
790153d828SPatrick Mooney * directly against the vmspace, whereas the data-path operations are performed
800153d828SPatrick Mooney * through a `vm_client_t` handle. That VM client abstraction is meant to
810153d828SPatrick Mooney * reduce contention and overhead for frequent access operations and provide
820153d828SPatrick Mooney * debugging insight into how different subcomponents are accessing the vmspace.
830153d828SPatrick Mooney * A VM client is allocated for each vCPU, each viona ring (via the vmm_drv
840153d828SPatrick Mooney * interface) and each VMM userspace segment mapping.
850153d828SPatrick Mooney *
860153d828SPatrick Mooney * Exclusion
870153d828SPatrick Mooney *
880153d828SPatrick Mooney * Making changes to the vmspace (such as mapping or unmapping regions) requires
890153d828SPatrick Mooney * other accessors be excluded while the change is underway to prevent them from
900153d828SPatrick Mooney * observing invalid intermediate states. A simple approach could use a mutex
910153d828SPatrick Mooney * or rwlock to achieve this, but that risks contention when the rate of access
920153d828SPatrick Mooney * to the vmspace is high.
930153d828SPatrick Mooney *
940153d828SPatrick Mooney * Since vmspace changes (map/unmap) are rare, we can instead do the exclusion
950153d828SPatrick Mooney * at a per-vm_client_t basis. While this raises the cost for vmspace changes,
960153d828SPatrick Mooney * it means that the much more common page accesses through the vm_client can
970153d828SPatrick Mooney * normally proceed unimpeded and independently.
980153d828SPatrick Mooney *
990153d828SPatrick Mooney * When a change to the vmspace is required, the caller will put the vmspace in
1000153d828SPatrick Mooney * a 'hold' state, iterating over all associated vm_client instances, waiting
1010153d828SPatrick Mooney * for them to complete any in-flight lookup (indicated by VCS_ACTIVE) before
1020153d828SPatrick Mooney * setting VCS_HOLD in their state flag fields. With VCS_HOLD set, any call on
1030153d828SPatrick Mooney * the vm_client which would access the vmspace state (vmc_hold or vmc_fault)
1040153d828SPatrick Mooney * will block until the hold condition is cleared. Once the hold is asserted
1050153d828SPatrick Mooney * for all clients, the vmspace change can proceed with confidence. Upon
1060153d828SPatrick Mooney * completion of that operation, VCS_HOLD is cleared from the clients, and they
1070153d828SPatrick Mooney * are released to resume vmspace accesses.
1080153d828SPatrick Mooney *
1090153d828SPatrick Mooney * vCPU Consumers
1100153d828SPatrick Mooney *
1110153d828SPatrick Mooney * Access to the vmspace for vCPUs running in guest context is different from
1120153d828SPatrick Mooney * emulation-related vm_client activity: they solely rely on the contents of the
1130153d828SPatrick Mooney * page tables. Furthermore, the existing VCS_HOLD mechanism used to exclude
1140153d828SPatrick Mooney * client access is not feasible when entering guest context, since interrupts
1150153d828SPatrick Mooney * are disabled, making it impossible to block entry. This is not a concern as
1160153d828SPatrick Mooney * long as vmspace modifications never place the page tables in invalid states
1170153d828SPatrick Mooney * (either intermediate, or final). The vm_client hold mechanism does provide
1180153d828SPatrick Mooney * the means to IPI vCPU consumers which will trigger a notification once they
1190153d828SPatrick Mooney * report their exit from guest context. This can be used to ensure that page
1200153d828SPatrick Mooney * table modifications are made visible to those vCPUs within a certain
1210153d828SPatrick Mooney * time frame.
1220153d828SPatrick Mooney */
1230153d828SPatrick Mooney
1240153d828SPatrick Mooney typedef struct vmspace_mapping {
1250153d828SPatrick Mooney list_node_t vmsm_node;
1260153d828SPatrick Mooney vm_object_t *vmsm_object; /* object backing this mapping */
1270153d828SPatrick Mooney uintptr_t vmsm_addr; /* start addr in vmspace for mapping */
1280153d828SPatrick Mooney size_t vmsm_len; /* length (in bytes) of mapping */
1290153d828SPatrick Mooney off_t vmsm_offset; /* byte offset into object */
1300153d828SPatrick Mooney uint_t vmsm_prot;
1310153d828SPatrick Mooney } vmspace_mapping_t;
1320153d828SPatrick Mooney
1330153d828SPatrick Mooney #define VMSM_OFFSET(vmsm, addr) ( \
1340153d828SPatrick Mooney (vmsm)->vmsm_offset + \
1350153d828SPatrick Mooney ((addr) - (uintptr_t)(vmsm)->vmsm_addr))
1360153d828SPatrick Mooney
1370153d828SPatrick Mooney typedef enum vm_client_state {
1380153d828SPatrick Mooney VCS_IDLE = 0,
1390153d828SPatrick Mooney /* currently accessing vmspace for client operation (hold or fault) */
1400153d828SPatrick Mooney VCS_ACTIVE = (1 << 0),
1410153d828SPatrick Mooney /* client hold requested/asserted */
1420153d828SPatrick Mooney VCS_HOLD = (1 << 1),
1430153d828SPatrick Mooney /* vCPU is accessing page tables in guest context */
1440153d828SPatrick Mooney VCS_ON_CPU = (1 << 2),
1450153d828SPatrick Mooney /* client has been orphaned (no more access to vmspace) */
1460153d828SPatrick Mooney VCS_ORPHANED = (1 << 3),
1470153d828SPatrick Mooney /* client undergoing destroy operation */
1480153d828SPatrick Mooney VCS_DESTROY = (1 << 4),
1490153d828SPatrick Mooney } vm_client_state_t;
1500153d828SPatrick Mooney
1510153d828SPatrick Mooney struct vmspace {
1520153d828SPatrick Mooney kmutex_t vms_lock;
1530153d828SPatrick Mooney kcondvar_t vms_cv;
1540153d828SPatrick Mooney bool vms_held;
1550153d828SPatrick Mooney uintptr_t vms_size; /* immutable after creation */
1560153d828SPatrick Mooney
1570153d828SPatrick Mooney /* (nested) page table state */
1580153d828SPatrick Mooney vmm_gpt_t *vms_gpt;
1590153d828SPatrick Mooney uint64_t vms_pt_gen;
1600153d828SPatrick Mooney uint64_t vms_pages_mapped;
1610153d828SPatrick Mooney bool vms_track_dirty;
1620153d828SPatrick Mooney
1630153d828SPatrick Mooney list_t vms_maplist;
1640153d828SPatrick Mooney list_t vms_clients;
1650153d828SPatrick Mooney };
1660153d828SPatrick Mooney
1670153d828SPatrick Mooney struct vm_client {
1680153d828SPatrick Mooney vmspace_t *vmc_space;
1690153d828SPatrick Mooney list_node_t vmc_node;
1700153d828SPatrick Mooney
1710153d828SPatrick Mooney kmutex_t vmc_lock;
1720153d828SPatrick Mooney kcondvar_t vmc_cv;
1730153d828SPatrick Mooney vm_client_state_t vmc_state;
1740153d828SPatrick Mooney int vmc_cpu_active;
1750153d828SPatrick Mooney uint64_t vmc_cpu_gen;
1760153d828SPatrick Mooney bool vmc_track_dirty;
1770153d828SPatrick Mooney vmc_inval_cb_t vmc_inval_func;
1780153d828SPatrick Mooney void *vmc_inval_data;
1790153d828SPatrick Mooney
1800153d828SPatrick Mooney list_t vmc_held_pages;
1810153d828SPatrick Mooney };
1820153d828SPatrick Mooney
1830153d828SPatrick Mooney typedef enum vm_object_type {
1840153d828SPatrick Mooney VMOT_NONE,
1850153d828SPatrick Mooney VMOT_MEM,
1860153d828SPatrick Mooney VMOT_MMIO,
1870153d828SPatrick Mooney } vm_object_type_t;
1880153d828SPatrick Mooney
1890153d828SPatrick Mooney struct vm_object {
1900153d828SPatrick Mooney uint_t vmo_refcnt; /* manipulated with atomic ops */
1910153d828SPatrick Mooney
1920153d828SPatrick Mooney /* Fields below are fixed at creation time */
1930153d828SPatrick Mooney vm_object_type_t vmo_type;
1940153d828SPatrick Mooney size_t vmo_size;
1950153d828SPatrick Mooney void *vmo_data;
1960153d828SPatrick Mooney uint8_t vmo_attr;
1970153d828SPatrick Mooney };
1980153d828SPatrick Mooney
199f2357d97SPatrick Mooney /* Convenience consolidation of all flag(s) for validity checking */
200f2357d97SPatrick Mooney #define VPF_ALL (VPF_DEFER_DIRTY)
201f2357d97SPatrick Mooney
2020153d828SPatrick Mooney struct vm_page {
2030153d828SPatrick Mooney vm_client_t *vmp_client;
2040153d828SPatrick Mooney list_node_t vmp_node;
2050153d828SPatrick Mooney vm_page_t *vmp_chain;
2060153d828SPatrick Mooney uintptr_t vmp_gpa;
2070153d828SPatrick Mooney pfn_t vmp_pfn;
2080153d828SPatrick Mooney uint64_t *vmp_ptep;
2090153d828SPatrick Mooney vm_object_t *vmp_obj_ref;
210f2357d97SPatrick Mooney uint8_t vmp_prot;
211f2357d97SPatrick Mooney uint8_t vmp_flags;
2120153d828SPatrick Mooney };
2130153d828SPatrick Mooney
2140153d828SPatrick Mooney static vmspace_mapping_t *vm_mapping_find(vmspace_t *, uintptr_t, size_t);
2158779b448SDan Cross static void vmspace_hold_enter(vmspace_t *);
2168779b448SDan Cross static void vmspace_hold_exit(vmspace_t *, bool);
217*b9b43e84SPatrick Mooney static void vmspace_clients_invalidate(vmspace_t *, uintptr_t, size_t);
218*b9b43e84SPatrick Mooney static int vmspace_ensure_mapped(vmspace_t *, uintptr_t, int, pfn_t *,
219*b9b43e84SPatrick Mooney uint64_t *);
2200153d828SPatrick Mooney static void vmc_space_hold(vm_client_t *);
2210153d828SPatrick Mooney static void vmc_space_release(vm_client_t *, bool);
2220153d828SPatrick Mooney static void vmc_space_invalidate(vm_client_t *, uintptr_t, size_t, uint64_t);
2230153d828SPatrick Mooney static void vmc_space_unmap(vm_client_t *, uintptr_t, size_t, vm_object_t *);
2240153d828SPatrick Mooney static vm_client_t *vmc_space_orphan(vm_client_t *, vmspace_t *);
2250153d828SPatrick Mooney
2260153d828SPatrick Mooney
2270153d828SPatrick Mooney /*
2280153d828SPatrick Mooney * Create a new vmspace with a maximum address of `end`.
2290153d828SPatrick Mooney */
2300153d828SPatrick Mooney vmspace_t *
vmspace_alloc(size_t end,vmm_pte_ops_t * pte_ops,bool track_dirty)2310153d828SPatrick Mooney vmspace_alloc(size_t end, vmm_pte_ops_t *pte_ops, bool track_dirty)
2320153d828SPatrick Mooney {
2330153d828SPatrick Mooney vmspace_t *vms;
2340153d828SPatrick Mooney const uintptr_t size = end + 1;
2350153d828SPatrick Mooney
2360153d828SPatrick Mooney /*
2370153d828SPatrick Mooney * This whole mess is built on the assumption that a 64-bit address
2380153d828SPatrick Mooney * space is available to work with for the various pagetable tricks.
2390153d828SPatrick Mooney */
2400153d828SPatrick Mooney VERIFY(size > 0 && (size & PAGEOFFSET) == 0 &&
2410153d828SPatrick Mooney size <= (uintptr_t)USERLIMIT);
2420153d828SPatrick Mooney
2430153d828SPatrick Mooney vms = kmem_zalloc(sizeof (*vms), KM_SLEEP);
2440153d828SPatrick Mooney vms->vms_size = size;
2450153d828SPatrick Mooney list_create(&vms->vms_maplist, sizeof (vmspace_mapping_t),
2460153d828SPatrick Mooney offsetof(vmspace_mapping_t, vmsm_node));
2470153d828SPatrick Mooney list_create(&vms->vms_clients, sizeof (vm_client_t),
2480153d828SPatrick Mooney offsetof(vm_client_t, vmc_node));
2490153d828SPatrick Mooney
2500153d828SPatrick Mooney vms->vms_gpt = vmm_gpt_alloc(pte_ops);
2510153d828SPatrick Mooney vms->vms_pt_gen = 1;
2520153d828SPatrick Mooney vms->vms_track_dirty = track_dirty;
2530153d828SPatrick Mooney
2540153d828SPatrick Mooney return (vms);
2550153d828SPatrick Mooney }
2560153d828SPatrick Mooney
2570153d828SPatrick Mooney /*
2580153d828SPatrick Mooney * Destroy a vmspace. All regions in the space must be unmapped. Any remaining
2590153d828SPatrick Mooney * clients will be orphaned.
2600153d828SPatrick Mooney */
2610153d828SPatrick Mooney void
vmspace_destroy(vmspace_t * vms)2620153d828SPatrick Mooney vmspace_destroy(vmspace_t *vms)
2630153d828SPatrick Mooney {
2640153d828SPatrick Mooney mutex_enter(&vms->vms_lock);
2650153d828SPatrick Mooney VERIFY(list_is_empty(&vms->vms_maplist));
2660153d828SPatrick Mooney
2670153d828SPatrick Mooney if (!list_is_empty(&vms->vms_clients)) {
2680153d828SPatrick Mooney vm_client_t *vmc = list_head(&vms->vms_clients);
2690153d828SPatrick Mooney while (vmc != NULL) {
2700153d828SPatrick Mooney vmc = vmc_space_orphan(vmc, vms);
2710153d828SPatrick Mooney }
2720153d828SPatrick Mooney /*
2730153d828SPatrick Mooney * Wait for any clients which were in the process of destroying
2740153d828SPatrick Mooney * themselves to disappear.
2750153d828SPatrick Mooney */
2760153d828SPatrick Mooney while (!list_is_empty(&vms->vms_clients)) {
2770153d828SPatrick Mooney cv_wait(&vms->vms_cv, &vms->vms_lock);
2780153d828SPatrick Mooney }
2790153d828SPatrick Mooney }
2800153d828SPatrick Mooney VERIFY(list_is_empty(&vms->vms_clients));
2810153d828SPatrick Mooney
2820153d828SPatrick Mooney vmm_gpt_free(vms->vms_gpt);
2830153d828SPatrick Mooney mutex_exit(&vms->vms_lock);
2840153d828SPatrick Mooney
2850153d828SPatrick Mooney mutex_destroy(&vms->vms_lock);
2860153d828SPatrick Mooney cv_destroy(&vms->vms_cv);
2870153d828SPatrick Mooney list_destroy(&vms->vms_maplist);
2880153d828SPatrick Mooney list_destroy(&vms->vms_clients);
2890153d828SPatrick Mooney
2900153d828SPatrick Mooney kmem_free(vms, sizeof (*vms));
2910153d828SPatrick Mooney }
2920153d828SPatrick Mooney
2930153d828SPatrick Mooney /*
2940153d828SPatrick Mooney * Retrieve the count of resident (mapped into the page tables) pages.
2950153d828SPatrick Mooney */
2960153d828SPatrick Mooney uint64_t
vmspace_resident_count(vmspace_t * vms)2970153d828SPatrick Mooney vmspace_resident_count(vmspace_t *vms)
2980153d828SPatrick Mooney {
2990153d828SPatrick Mooney return (vms->vms_pages_mapped);
3000153d828SPatrick Mooney }
3010153d828SPatrick Mooney
302*b9b43e84SPatrick Mooney /*
303*b9b43e84SPatrick Mooney * Perform an operation on the status (accessed/dirty) bits held in the page
304*b9b43e84SPatrick Mooney * tables of this vmspace.
305*b9b43e84SPatrick Mooney *
306*b9b43e84SPatrick Mooney * Such manipulations race against both hardware writes (from running vCPUs) and
307*b9b43e84SPatrick Mooney * emulated accesses reflected from userspace. Safe functionality depends on
308*b9b43e84SPatrick Mooney * the VM instance being read-locked to prevent vmspace_map/vmspace_unmap
309*b9b43e84SPatrick Mooney * operations from changing the page tables during the walk.
310*b9b43e84SPatrick Mooney */
311*b9b43e84SPatrick Mooney void
vmspace_bits_operate(vmspace_t * vms,uint64_t gpa,size_t len,vmspace_bit_oper_t oper,uint8_t * bitmap)312*b9b43e84SPatrick Mooney vmspace_bits_operate(vmspace_t *vms, uint64_t gpa, size_t len,
313*b9b43e84SPatrick Mooney vmspace_bit_oper_t oper, uint8_t *bitmap)
3148779b448SDan Cross {
315*b9b43e84SPatrick Mooney const bool bit_input = (oper & VBO_FLAG_BITMAP_IN) != 0;
316*b9b43e84SPatrick Mooney const bool bit_output = (oper & VBO_FLAG_BITMAP_OUT) != 0;
317*b9b43e84SPatrick Mooney const vmspace_bit_oper_t oper_only =
318*b9b43e84SPatrick Mooney oper & ~(VBO_FLAG_BITMAP_IN | VBO_FLAG_BITMAP_OUT);
319*b9b43e84SPatrick Mooney vmm_gpt_t *gpt = vms->vms_gpt;
3204ac713daSLuqman Aden
3218779b448SDan Cross /*
322*b9b43e84SPatrick Mooney * The bitmap cannot be NULL if the requested operation involves reading
323*b9b43e84SPatrick Mooney * or writing from it.
3248779b448SDan Cross */
325*b9b43e84SPatrick Mooney ASSERT(bitmap != NULL || (!bit_input && !bit_output));
326*b9b43e84SPatrick Mooney
3278779b448SDan Cross for (size_t offset = 0; offset < len; offset += PAGESIZE) {
328*b9b43e84SPatrick Mooney const uint64_t pfn_offset = offset >> PAGESHIFT;
329*b9b43e84SPatrick Mooney const size_t bit_offset = pfn_offset / 8;
330*b9b43e84SPatrick Mooney const uint8_t bit_mask = 1 << (pfn_offset % 8);
331*b9b43e84SPatrick Mooney
332*b9b43e84SPatrick Mooney if (bit_input && (bitmap[bit_offset] & bit_mask) == 0) {
333*b9b43e84SPatrick Mooney continue;
334*b9b43e84SPatrick Mooney }
335*b9b43e84SPatrick Mooney
336*b9b43e84SPatrick Mooney bool value = false;
337*b9b43e84SPatrick Mooney uint64_t *entry = vmm_gpt_lookup(gpt, gpa + offset);
338*b9b43e84SPatrick Mooney if (entry == NULL) {
339*b9b43e84SPatrick Mooney if (bit_output) {
340*b9b43e84SPatrick Mooney bitmap[bit_offset] &= ~bit_mask;
341*b9b43e84SPatrick Mooney }
342*b9b43e84SPatrick Mooney continue;
343*b9b43e84SPatrick Mooney }
344*b9b43e84SPatrick Mooney
345*b9b43e84SPatrick Mooney switch (oper_only) {
346*b9b43e84SPatrick Mooney case VBO_GET_DIRTY:
347*b9b43e84SPatrick Mooney value = vmm_gpt_query(gpt, entry, VGQ_DIRTY);
348*b9b43e84SPatrick Mooney break;
349*b9b43e84SPatrick Mooney case VBO_SET_DIRTY: {
350*b9b43e84SPatrick Mooney uint_t prot = 0;
351*b9b43e84SPatrick Mooney bool present_writable = false;
352*b9b43e84SPatrick Mooney pfn_t pfn;
353*b9b43e84SPatrick Mooney
354*b9b43e84SPatrick Mooney /*
355*b9b43e84SPatrick Mooney * To avoid blindly setting the dirty bit on otherwise
356*b9b43e84SPatrick Mooney * empty PTEs, we must first check if the entry for the
357*b9b43e84SPatrick Mooney * address in question has been populated.
358*b9b43e84SPatrick Mooney *
359*b9b43e84SPatrick Mooney * Only if the page is marked both Present and Writable
360*b9b43e84SPatrick Mooney * will we permit the dirty bit to be set.
361*b9b43e84SPatrick Mooney */
362*b9b43e84SPatrick Mooney if (!vmm_gpt_is_mapped(gpt, entry, &pfn, &prot)) {
363*b9b43e84SPatrick Mooney int err = vmspace_ensure_mapped(vms, gpa,
364*b9b43e84SPatrick Mooney PROT_WRITE, &pfn, entry);
365*b9b43e84SPatrick Mooney if (err == 0) {
366*b9b43e84SPatrick Mooney present_writable = true;
367*b9b43e84SPatrick Mooney }
368*b9b43e84SPatrick Mooney } else if ((prot & PROT_WRITE) != 0) {
369*b9b43e84SPatrick Mooney present_writable = true;
370*b9b43e84SPatrick Mooney }
371*b9b43e84SPatrick Mooney
372*b9b43e84SPatrick Mooney if (present_writable) {
373*b9b43e84SPatrick Mooney value = !vmm_gpt_reset_dirty(gpt, entry, true);
374*b9b43e84SPatrick Mooney }
375*b9b43e84SPatrick Mooney break;
376*b9b43e84SPatrick Mooney }
377*b9b43e84SPatrick Mooney case VBO_RESET_DIRTY:
378*b9b43e84SPatrick Mooney /*
379*b9b43e84SPatrick Mooney * Although at first glance, it may seem like the act of
380*b9b43e84SPatrick Mooney * resetting the dirty bit may require the same care as
381*b9b43e84SPatrick Mooney * setting it, the constraints make for a simpler task.
382*b9b43e84SPatrick Mooney *
383*b9b43e84SPatrick Mooney * Any PTEs with the dirty bit set will have already
384*b9b43e84SPatrick Mooney * been properly populated.
385*b9b43e84SPatrick Mooney */
386*b9b43e84SPatrick Mooney value = vmm_gpt_reset_dirty(gpt, entry, false);
387*b9b43e84SPatrick Mooney break;
388*b9b43e84SPatrick Mooney default:
389*b9b43e84SPatrick Mooney panic("unrecognized operator: %d", oper_only);
390*b9b43e84SPatrick Mooney break;
391*b9b43e84SPatrick Mooney }
392*b9b43e84SPatrick Mooney if (bit_output) {
393*b9b43e84SPatrick Mooney if (value) {
394*b9b43e84SPatrick Mooney bitmap[bit_offset] |= bit_mask;
395*b9b43e84SPatrick Mooney } else {
396*b9b43e84SPatrick Mooney bitmap[bit_offset] &= ~bit_mask;
397*b9b43e84SPatrick Mooney }
398*b9b43e84SPatrick Mooney }
3998779b448SDan Cross }
4008779b448SDan Cross
4018779b448SDan Cross /*
402*b9b43e84SPatrick Mooney * Invalidate the address range potentially effected by the changes to
403*b9b43e84SPatrick Mooney * page table bits, issuing shoot-downs for those who might have it in
404*b9b43e84SPatrick Mooney * cache.
4058779b448SDan Cross */
4068779b448SDan Cross vmspace_hold_enter(vms);
4078779b448SDan Cross vms->vms_pt_gen++;
408*b9b43e84SPatrick Mooney vmspace_clients_invalidate(vms, gpa, len);
409*b9b43e84SPatrick Mooney vmspace_hold_exit(vms, true);
410*b9b43e84SPatrick Mooney }
411*b9b43e84SPatrick Mooney
412*b9b43e84SPatrick Mooney /*
413*b9b43e84SPatrick Mooney * Is dirty-page-tracking enabled for the vmspace?
414*b9b43e84SPatrick Mooney */
415*b9b43e84SPatrick Mooney bool
vmspace_get_tracking(vmspace_t * vms)416*b9b43e84SPatrick Mooney vmspace_get_tracking(vmspace_t *vms)
417*b9b43e84SPatrick Mooney {
418*b9b43e84SPatrick Mooney mutex_enter(&vms->vms_lock);
419*b9b43e84SPatrick Mooney const bool val = vms->vms_track_dirty;
420*b9b43e84SPatrick Mooney mutex_exit(&vms->vms_lock);
421*b9b43e84SPatrick Mooney return (val);
422*b9b43e84SPatrick Mooney }
423*b9b43e84SPatrick Mooney
424*b9b43e84SPatrick Mooney /*
425*b9b43e84SPatrick Mooney * Set the state (enabled/disabled) of dirty-page-tracking for the vmspace.
426*b9b43e84SPatrick Mooney */
427*b9b43e84SPatrick Mooney int
vmspace_set_tracking(vmspace_t * vms,bool enable_dirty_tracking)428*b9b43e84SPatrick Mooney vmspace_set_tracking(vmspace_t *vms, bool enable_dirty_tracking)
429*b9b43e84SPatrick Mooney {
430*b9b43e84SPatrick Mooney if (enable_dirty_tracking && !vmm_gpt_can_track_dirty(vms->vms_gpt)) {
431*b9b43e84SPatrick Mooney /* Do not allow this to be set if it is not supported */
432*b9b43e84SPatrick Mooney return (ENOTSUP);
433*b9b43e84SPatrick Mooney }
434*b9b43e84SPatrick Mooney
435*b9b43e84SPatrick Mooney vmspace_hold_enter(vms);
436*b9b43e84SPatrick Mooney if (vms->vms_track_dirty == enable_dirty_tracking) {
437*b9b43e84SPatrick Mooney /* No further effort required if state already matches */
438*b9b43e84SPatrick Mooney vmspace_hold_exit(vms, false);
439*b9b43e84SPatrick Mooney return (0);
440*b9b43e84SPatrick Mooney }
441*b9b43e84SPatrick Mooney
442*b9b43e84SPatrick Mooney vms->vms_track_dirty = enable_dirty_tracking;
443*b9b43e84SPatrick Mooney
444*b9b43e84SPatrick Mooney /* Configure all existing clients for new tracking behavior */
4458779b448SDan Cross for (vm_client_t *vmc = list_head(&vms->vms_clients);
4468779b448SDan Cross vmc != NULL;
4478779b448SDan Cross vmc = list_next(&vms->vms_clients, vmc)) {
448*b9b43e84SPatrick Mooney mutex_enter(&vmc->vmc_lock);
449*b9b43e84SPatrick Mooney vmc->vmc_track_dirty = enable_dirty_tracking;
450*b9b43e84SPatrick Mooney mutex_exit(&vmc->vmc_lock);
4518779b448SDan Cross }
4524ac713daSLuqman Aden
453*b9b43e84SPatrick Mooney /*
454*b9b43e84SPatrick Mooney * Notify all clients of what is considered an invalidation of the
455*b9b43e84SPatrick Mooney * entire vmspace.
456*b9b43e84SPatrick Mooney */
457*b9b43e84SPatrick Mooney vms->vms_pt_gen++;
458*b9b43e84SPatrick Mooney vmspace_clients_invalidate(vms, 0, vms->vms_size);
459*b9b43e84SPatrick Mooney
460*b9b43e84SPatrick Mooney vmspace_hold_exit(vms, true);
4614ac713daSLuqman Aden return (0);
4628779b448SDan Cross }
4638779b448SDan Cross
4640153d828SPatrick Mooney static pfn_t
vm_object_pager_reservoir(vm_object_t * vmo,uintptr_t off)4650153d828SPatrick Mooney vm_object_pager_reservoir(vm_object_t *vmo, uintptr_t off)
4660153d828SPatrick Mooney {
4670153d828SPatrick Mooney vmmr_region_t *region;
4680153d828SPatrick Mooney pfn_t pfn;
4690153d828SPatrick Mooney
4700153d828SPatrick Mooney ASSERT3U(vmo->vmo_type, ==, VMOT_MEM);
4710153d828SPatrick Mooney
4720153d828SPatrick Mooney region = vmo->vmo_data;
4730153d828SPatrick Mooney pfn = vmmr_region_pfn_at(region, off);
4740153d828SPatrick Mooney
4750153d828SPatrick Mooney return (pfn);
4760153d828SPatrick Mooney }
4770153d828SPatrick Mooney
4780153d828SPatrick Mooney static pfn_t
vm_object_pager_mmio(vm_object_t * vmo,uintptr_t off)4790153d828SPatrick Mooney vm_object_pager_mmio(vm_object_t *vmo, uintptr_t off)
4800153d828SPatrick Mooney {
4810153d828SPatrick Mooney pfn_t pfn;
4820153d828SPatrick Mooney
4830153d828SPatrick Mooney ASSERT3U(vmo->vmo_type, ==, VMOT_MMIO);
4840153d828SPatrick Mooney ASSERT3P(vmo->vmo_data, !=, NULL);
4850153d828SPatrick Mooney ASSERT3U(off, <, vmo->vmo_size);
4860153d828SPatrick Mooney
4870153d828SPatrick Mooney pfn = ((uintptr_t)vmo->vmo_data + off) >> PAGESHIFT;
4880153d828SPatrick Mooney
4890153d828SPatrick Mooney return (pfn);
4900153d828SPatrick Mooney }
4910153d828SPatrick Mooney
4920153d828SPatrick Mooney /*
4930153d828SPatrick Mooney * Allocate a VM object backed by VMM reservoir memory.
4940153d828SPatrick Mooney */
4950153d828SPatrick Mooney vm_object_t *
vm_object_mem_allocate(size_t size,bool transient)4960153d828SPatrick Mooney vm_object_mem_allocate(size_t size, bool transient)
4970153d828SPatrick Mooney {
4980153d828SPatrick Mooney int err;
4990153d828SPatrick Mooney vmmr_region_t *region = NULL;
5000153d828SPatrick Mooney vm_object_t *vmo;
5010153d828SPatrick Mooney
5020153d828SPatrick Mooney ASSERT3U(size, !=, 0);
5030153d828SPatrick Mooney ASSERT3U(size & PAGEOFFSET, ==, 0);
5040153d828SPatrick Mooney
5050153d828SPatrick Mooney err = vmmr_alloc(size, transient, ®ion);
5060153d828SPatrick Mooney if (err != 0) {
5070153d828SPatrick Mooney return (NULL);
5080153d828SPatrick Mooney }
5090153d828SPatrick Mooney
5100153d828SPatrick Mooney vmo = kmem_alloc(sizeof (*vmo), KM_SLEEP);
5110153d828SPatrick Mooney
5120153d828SPatrick Mooney /* For now, these are to stay fixed after allocation */
5130153d828SPatrick Mooney vmo->vmo_type = VMOT_MEM;
5140153d828SPatrick Mooney vmo->vmo_size = size;
5150153d828SPatrick Mooney vmo->vmo_attr = MTRR_TYPE_WB;
5160153d828SPatrick Mooney vmo->vmo_data = region;
5170153d828SPatrick Mooney vmo->vmo_refcnt = 1;
5180153d828SPatrick Mooney
5190153d828SPatrick Mooney return (vmo);
5200153d828SPatrick Mooney }
5210153d828SPatrick Mooney
5220153d828SPatrick Mooney static vm_object_t *
vm_object_mmio_allocate(size_t size,uintptr_t hpa)5230153d828SPatrick Mooney vm_object_mmio_allocate(size_t size, uintptr_t hpa)
5240153d828SPatrick Mooney {
5250153d828SPatrick Mooney vm_object_t *vmo;
5260153d828SPatrick Mooney
5270153d828SPatrick Mooney ASSERT3U(size, !=, 0);
5280153d828SPatrick Mooney ASSERT3U(size & PAGEOFFSET, ==, 0);
5290153d828SPatrick Mooney ASSERT3U(hpa & PAGEOFFSET, ==, 0);
5300153d828SPatrick Mooney
5310153d828SPatrick Mooney vmo = kmem_alloc(sizeof (*vmo), KM_SLEEP);
5320153d828SPatrick Mooney
5330153d828SPatrick Mooney /* For now, these are to stay fixed after allocation */
5340153d828SPatrick Mooney vmo->vmo_type = VMOT_MMIO;
5350153d828SPatrick Mooney vmo->vmo_size = size;
5360153d828SPatrick Mooney vmo->vmo_attr = MTRR_TYPE_UC;
5370153d828SPatrick Mooney vmo->vmo_data = (void *)hpa;
5380153d828SPatrick Mooney vmo->vmo_refcnt = 1;
5390153d828SPatrick Mooney
5400153d828SPatrick Mooney return (vmo);
5410153d828SPatrick Mooney }
5420153d828SPatrick Mooney
5430153d828SPatrick Mooney /*
5440153d828SPatrick Mooney * Allocate a VM object backed by an existing range of physical memory.
5450153d828SPatrick Mooney */
5460153d828SPatrick Mooney vm_object_t *
vmm_mmio_alloc(vmspace_t * vmspace,uintptr_t gpa,size_t len,uintptr_t hpa)5470153d828SPatrick Mooney vmm_mmio_alloc(vmspace_t *vmspace, uintptr_t gpa, size_t len, uintptr_t hpa)
5480153d828SPatrick Mooney {
5490153d828SPatrick Mooney int error;
5500153d828SPatrick Mooney vm_object_t *obj;
5510153d828SPatrick Mooney
5520153d828SPatrick Mooney obj = vm_object_mmio_allocate(len, hpa);
5530153d828SPatrick Mooney if (obj != NULL) {
5540153d828SPatrick Mooney error = vmspace_map(vmspace, obj, 0, gpa, len,
5550153d828SPatrick Mooney PROT_READ | PROT_WRITE);
5560153d828SPatrick Mooney if (error != 0) {
5570153d828SPatrick Mooney vm_object_release(obj);
5580153d828SPatrick Mooney obj = NULL;
5590153d828SPatrick Mooney }
5600153d828SPatrick Mooney }
5610153d828SPatrick Mooney
5620153d828SPatrick Mooney return (obj);
5630153d828SPatrick Mooney }
5640153d828SPatrick Mooney
5650153d828SPatrick Mooney /*
5660153d828SPatrick Mooney * Release a vm_object reference
5670153d828SPatrick Mooney */
5680153d828SPatrick Mooney void
vm_object_release(vm_object_t * vmo)5690153d828SPatrick Mooney vm_object_release(vm_object_t *vmo)
5700153d828SPatrick Mooney {
5710153d828SPatrick Mooney ASSERT(vmo != NULL);
5720153d828SPatrick Mooney
5730153d828SPatrick Mooney uint_t ref = atomic_dec_uint_nv(&vmo->vmo_refcnt);
5740153d828SPatrick Mooney /* underflow would be a deadly serious mistake */
5750153d828SPatrick Mooney VERIFY3U(ref, !=, UINT_MAX);
5760153d828SPatrick Mooney if (ref != 0) {
5770153d828SPatrick Mooney return;
5780153d828SPatrick Mooney }
5790153d828SPatrick Mooney
5800153d828SPatrick Mooney switch (vmo->vmo_type) {
5810153d828SPatrick Mooney case VMOT_MEM:
5820153d828SPatrick Mooney vmmr_free((vmmr_region_t *)vmo->vmo_data);
5830153d828SPatrick Mooney break;
5840153d828SPatrick Mooney case VMOT_MMIO:
5850153d828SPatrick Mooney break;
5860153d828SPatrick Mooney default:
5870153d828SPatrick Mooney panic("unexpected object type %u", vmo->vmo_type);
5880153d828SPatrick Mooney break;
5890153d828SPatrick Mooney }
5900153d828SPatrick Mooney
5910153d828SPatrick Mooney vmo->vmo_data = NULL;
5920153d828SPatrick Mooney vmo->vmo_size = 0;
5930153d828SPatrick Mooney kmem_free(vmo, sizeof (*vmo));
5940153d828SPatrick Mooney }
5950153d828SPatrick Mooney
5960153d828SPatrick Mooney /*
5970153d828SPatrick Mooney * Increase refcount for vm_object reference
5980153d828SPatrick Mooney */
5990153d828SPatrick Mooney void
vm_object_reference(vm_object_t * vmo)6000153d828SPatrick Mooney vm_object_reference(vm_object_t *vmo)
6010153d828SPatrick Mooney {
6020153d828SPatrick Mooney ASSERT(vmo != NULL);
6030153d828SPatrick Mooney
6040153d828SPatrick Mooney uint_t ref = atomic_inc_uint_nv(&vmo->vmo_refcnt);
6050153d828SPatrick Mooney /* overflow would be a deadly serious mistake */
6060153d828SPatrick Mooney VERIFY3U(ref, !=, 0);
6070153d828SPatrick Mooney }
6080153d828SPatrick Mooney
6090153d828SPatrick Mooney /*
6100153d828SPatrick Mooney * Get the host-physical PFN for a given offset into a vm_object.
6110153d828SPatrick Mooney *
6120153d828SPatrick Mooney * The provided `off` must be within the allocated size of the vm_object.
6130153d828SPatrick Mooney */
6140153d828SPatrick Mooney pfn_t
vm_object_pfn(vm_object_t * vmo,uintptr_t off)6150153d828SPatrick Mooney vm_object_pfn(vm_object_t *vmo, uintptr_t off)
6160153d828SPatrick Mooney {
6170153d828SPatrick Mooney const uintptr_t aligned_off = off & PAGEMASK;
6180153d828SPatrick Mooney
6190153d828SPatrick Mooney switch (vmo->vmo_type) {
6200153d828SPatrick Mooney case VMOT_MEM:
6210153d828SPatrick Mooney return (vm_object_pager_reservoir(vmo, aligned_off));
6220153d828SPatrick Mooney case VMOT_MMIO:
6230153d828SPatrick Mooney return (vm_object_pager_mmio(vmo, aligned_off));
6240153d828SPatrick Mooney case VMOT_NONE:
6250153d828SPatrick Mooney break;
6260153d828SPatrick Mooney }
6270153d828SPatrick Mooney panic("unexpected object type %u", vmo->vmo_type);
6280153d828SPatrick Mooney }
6290153d828SPatrick Mooney
6300153d828SPatrick Mooney static vmspace_mapping_t *
vm_mapping_find(vmspace_t * vms,uintptr_t addr,size_t size)6310153d828SPatrick Mooney vm_mapping_find(vmspace_t *vms, uintptr_t addr, size_t size)
6320153d828SPatrick Mooney {
6330153d828SPatrick Mooney vmspace_mapping_t *vmsm;
6340153d828SPatrick Mooney list_t *ml = &vms->vms_maplist;
6350153d828SPatrick Mooney const uintptr_t range_end = addr + size;
6360153d828SPatrick Mooney
6370153d828SPatrick Mooney ASSERT3U(addr, <=, range_end);
6380153d828SPatrick Mooney
6390153d828SPatrick Mooney if (addr >= vms->vms_size) {
6400153d828SPatrick Mooney return (NULL);
6410153d828SPatrick Mooney }
6420153d828SPatrick Mooney for (vmsm = list_head(ml); vmsm != NULL; vmsm = list_next(ml, vmsm)) {
6430153d828SPatrick Mooney const uintptr_t seg_end = vmsm->vmsm_addr + vmsm->vmsm_len;
6440153d828SPatrick Mooney
6450153d828SPatrick Mooney if (addr >= vmsm->vmsm_addr && addr < seg_end) {
6460153d828SPatrick Mooney if (range_end <= seg_end) {
6470153d828SPatrick Mooney return (vmsm);
6480153d828SPatrick Mooney } else {
6490153d828SPatrick Mooney return (NULL);
6500153d828SPatrick Mooney }
6510153d828SPatrick Mooney }
6520153d828SPatrick Mooney }
6530153d828SPatrick Mooney return (NULL);
6540153d828SPatrick Mooney }
6550153d828SPatrick Mooney
6560153d828SPatrick Mooney /*
6570153d828SPatrick Mooney * Check to see if any mappings reside within [addr, addr + size) span in the
6580153d828SPatrick Mooney * vmspace, returning true if that span is indeed empty.
6590153d828SPatrick Mooney */
6600153d828SPatrick Mooney static bool
vm_mapping_gap(vmspace_t * vms,uintptr_t addr,size_t size)6610153d828SPatrick Mooney vm_mapping_gap(vmspace_t *vms, uintptr_t addr, size_t size)
6620153d828SPatrick Mooney {
6630153d828SPatrick Mooney vmspace_mapping_t *vmsm;
6640153d828SPatrick Mooney list_t *ml = &vms->vms_maplist;
6650153d828SPatrick Mooney const uintptr_t range_end = addr + size - 1;
6660153d828SPatrick Mooney
6670153d828SPatrick Mooney ASSERT(MUTEX_HELD(&vms->vms_lock));
6680153d828SPatrick Mooney ASSERT(size > 0);
6690153d828SPatrick Mooney
6700153d828SPatrick Mooney for (vmsm = list_head(ml); vmsm != NULL; vmsm = list_next(ml, vmsm)) {
6710153d828SPatrick Mooney const uintptr_t seg_end = vmsm->vmsm_addr + vmsm->vmsm_len - 1;
6720153d828SPatrick Mooney
6730153d828SPatrick Mooney /*
6740153d828SPatrick Mooney * The two ranges do not overlap if the start of either of
6750153d828SPatrick Mooney * them is after the end of the other.
6760153d828SPatrick Mooney */
6770153d828SPatrick Mooney if (vmsm->vmsm_addr > range_end || addr > seg_end)
6780153d828SPatrick Mooney continue;
6790153d828SPatrick Mooney return (false);
6800153d828SPatrick Mooney }
6810153d828SPatrick Mooney return (true);
6820153d828SPatrick Mooney }
6830153d828SPatrick Mooney
6840153d828SPatrick Mooney static void
vm_mapping_remove(vmspace_t * vms,vmspace_mapping_t * vmsm)6850153d828SPatrick Mooney vm_mapping_remove(vmspace_t *vms, vmspace_mapping_t *vmsm)
6860153d828SPatrick Mooney {
6870153d828SPatrick Mooney list_t *ml = &vms->vms_maplist;
6880153d828SPatrick Mooney
6890153d828SPatrick Mooney ASSERT(MUTEX_HELD(&vms->vms_lock));
6900153d828SPatrick Mooney ASSERT(vms->vms_held);
6910153d828SPatrick Mooney
6920153d828SPatrick Mooney list_remove(ml, vmsm);
6930153d828SPatrick Mooney vm_object_release(vmsm->vmsm_object);
6940153d828SPatrick Mooney kmem_free(vmsm, sizeof (*vmsm));
6950153d828SPatrick Mooney }
6960153d828SPatrick Mooney
6970153d828SPatrick Mooney /*
6980153d828SPatrick Mooney * Enter a hold state on the vmspace. This ensures that all VM clients
6990153d828SPatrick Mooney * associated with the vmspace are excluded from establishing new page holds,
7000153d828SPatrick Mooney * or any other actions which would require accessing vmspace state subject to
7010153d828SPatrick Mooney * potential change.
7020153d828SPatrick Mooney *
7030153d828SPatrick Mooney * Returns with vmspace_t`vms_lock held.
7040153d828SPatrick Mooney */
7050153d828SPatrick Mooney static void
vmspace_hold_enter(vmspace_t * vms)7060153d828SPatrick Mooney vmspace_hold_enter(vmspace_t *vms)
7070153d828SPatrick Mooney {
7080153d828SPatrick Mooney mutex_enter(&vms->vms_lock);
7090153d828SPatrick Mooney VERIFY(!vms->vms_held);
7100153d828SPatrick Mooney
7110153d828SPatrick Mooney vm_client_t *vmc = list_head(&vms->vms_clients);
7120153d828SPatrick Mooney for (; vmc != NULL; vmc = list_next(&vms->vms_clients, vmc)) {
7130153d828SPatrick Mooney vmc_space_hold(vmc);
7140153d828SPatrick Mooney }
7150153d828SPatrick Mooney vms->vms_held = true;
7160153d828SPatrick Mooney }
7170153d828SPatrick Mooney
7180153d828SPatrick Mooney /*
7190153d828SPatrick Mooney * Exit a hold state on the vmspace. This releases all VM clients associated
7200153d828SPatrick Mooney * with the vmspace to be able to establish new page holds, and partake in other
7210153d828SPatrick Mooney * actions which require accessing changed vmspace state. If `kick_on_cpu` is
7220153d828SPatrick Mooney * true, then any CPUs actively using the page tables will be IPIed, and the
7230153d828SPatrick Mooney * call will block until they have acknowledged being ready to use the latest
7240153d828SPatrick Mooney * state of the tables.
7250153d828SPatrick Mooney *
7260153d828SPatrick Mooney * Requires vmspace_t`vms_lock be held, which is released as part of the call.
7270153d828SPatrick Mooney */
7280153d828SPatrick Mooney static void
vmspace_hold_exit(vmspace_t * vms,bool kick_on_cpu)7290153d828SPatrick Mooney vmspace_hold_exit(vmspace_t *vms, bool kick_on_cpu)
7300153d828SPatrick Mooney {
7310153d828SPatrick Mooney ASSERT(MUTEX_HELD(&vms->vms_lock));
7320153d828SPatrick Mooney VERIFY(vms->vms_held);
7330153d828SPatrick Mooney
7340153d828SPatrick Mooney vm_client_t *vmc = list_head(&vms->vms_clients);
7350153d828SPatrick Mooney for (; vmc != NULL; vmc = list_next(&vms->vms_clients, vmc)) {
7360153d828SPatrick Mooney vmc_space_release(vmc, kick_on_cpu);
7370153d828SPatrick Mooney }
7380153d828SPatrick Mooney vms->vms_held = false;
7390153d828SPatrick Mooney mutex_exit(&vms->vms_lock);
7400153d828SPatrick Mooney }
7410153d828SPatrick Mooney
742*b9b43e84SPatrick Mooney static void
vmspace_clients_invalidate(vmspace_t * vms,uintptr_t gpa,size_t len)743*b9b43e84SPatrick Mooney vmspace_clients_invalidate(vmspace_t *vms, uintptr_t gpa, size_t len)
744*b9b43e84SPatrick Mooney {
745*b9b43e84SPatrick Mooney ASSERT(MUTEX_HELD(&vms->vms_lock));
746*b9b43e84SPatrick Mooney VERIFY(vms->vms_held);
747*b9b43e84SPatrick Mooney
748*b9b43e84SPatrick Mooney for (vm_client_t *vmc = list_head(&vms->vms_clients);
749*b9b43e84SPatrick Mooney vmc != NULL;
750*b9b43e84SPatrick Mooney vmc = list_next(&vms->vms_clients, vmc)) {
751*b9b43e84SPatrick Mooney vmc_space_invalidate(vmc, gpa, len, vms->vms_pt_gen);
752*b9b43e84SPatrick Mooney }
753*b9b43e84SPatrick Mooney }
754*b9b43e84SPatrick Mooney
7550153d828SPatrick Mooney /*
7560153d828SPatrick Mooney * Attempt to map a vm_object span into the vmspace.
7570153d828SPatrick Mooney *
7580153d828SPatrick Mooney * Requirements:
7590153d828SPatrick Mooney * - `obj_off`, `addr`, and `len` must be page-aligned
7600153d828SPatrick Mooney * - `obj_off` cannot be greater than the allocated size of the object
7610153d828SPatrick Mooney * - [`obj_off`, `obj_off` + `len`) span cannot extend beyond the allocated
7620153d828SPatrick Mooney * size of the object
7630153d828SPatrick Mooney * - [`addr`, `addr` + `len`) span cannot reside beyond the maximum address
7640153d828SPatrick Mooney * of the vmspace
7650153d828SPatrick Mooney */
7660153d828SPatrick Mooney int
vmspace_map(vmspace_t * vms,vm_object_t * vmo,uintptr_t obj_off,uintptr_t addr,size_t len,uint8_t prot)7670153d828SPatrick Mooney vmspace_map(vmspace_t *vms, vm_object_t *vmo, uintptr_t obj_off, uintptr_t addr,
7680153d828SPatrick Mooney size_t len, uint8_t prot)
7690153d828SPatrick Mooney {
7700153d828SPatrick Mooney vmspace_mapping_t *vmsm;
7710153d828SPatrick Mooney int res = 0;
7720153d828SPatrick Mooney
7730153d828SPatrick Mooney if (len == 0 || (addr + len) < addr ||
7740153d828SPatrick Mooney obj_off >= (obj_off + len) || vmo->vmo_size < (obj_off + len)) {
7750153d828SPatrick Mooney return (EINVAL);
7760153d828SPatrick Mooney }
7770153d828SPatrick Mooney if ((addr + len) >= vms->vms_size) {
7780153d828SPatrick Mooney return (ENOMEM);
7790153d828SPatrick Mooney }
7800153d828SPatrick Mooney
7810153d828SPatrick Mooney vmsm = kmem_alloc(sizeof (*vmsm), KM_SLEEP);
7820153d828SPatrick Mooney
7830153d828SPatrick Mooney vmspace_hold_enter(vms);
7840153d828SPatrick Mooney if (!vm_mapping_gap(vms, addr, len)) {
7850153d828SPatrick Mooney kmem_free(vmsm, sizeof (*vmsm));
7860153d828SPatrick Mooney res = ENOMEM;
7870153d828SPatrick Mooney } else {
7880153d828SPatrick Mooney vmsm->vmsm_object = vmo;
7890153d828SPatrick Mooney vmsm->vmsm_addr = addr;
7900153d828SPatrick Mooney vmsm->vmsm_len = len;
7910153d828SPatrick Mooney vmsm->vmsm_offset = (off_t)obj_off;
7920153d828SPatrick Mooney vmsm->vmsm_prot = prot;
7930153d828SPatrick Mooney list_insert_tail(&vms->vms_maplist, vmsm);
7940153d828SPatrick Mooney
7950153d828SPatrick Mooney /*
7960153d828SPatrick Mooney * Make sure the GPT has tables ready for leaf entries across
7970153d828SPatrick Mooney * the entire new mapping.
7980153d828SPatrick Mooney */
7993a0fa64cSPatrick Mooney vmm_gpt_populate_region(vms->vms_gpt, addr, len);
8000153d828SPatrick Mooney }
8010153d828SPatrick Mooney vmspace_hold_exit(vms, false);
8020153d828SPatrick Mooney return (res);
8030153d828SPatrick Mooney }
8040153d828SPatrick Mooney
8050153d828SPatrick Mooney /*
8060153d828SPatrick Mooney * Unmap a region of the vmspace.
8070153d828SPatrick Mooney *
8080153d828SPatrick Mooney * Presently the [start, end) span must equal a region previously mapped by a
8090153d828SPatrick Mooney * call to vmspace_map().
8100153d828SPatrick Mooney */
8110153d828SPatrick Mooney int
vmspace_unmap(vmspace_t * vms,uintptr_t addr,uintptr_t len)8123a0fa64cSPatrick Mooney vmspace_unmap(vmspace_t *vms, uintptr_t addr, uintptr_t len)
8130153d828SPatrick Mooney {
8143a0fa64cSPatrick Mooney const uintptr_t end = addr + len;
8150153d828SPatrick Mooney vmspace_mapping_t *vmsm;
8160153d828SPatrick Mooney vm_client_t *vmc;
8170153d828SPatrick Mooney uint64_t gen = 0;
8180153d828SPatrick Mooney
8193a0fa64cSPatrick Mooney ASSERT3U(addr, <, end);
8200153d828SPatrick Mooney
8210153d828SPatrick Mooney vmspace_hold_enter(vms);
8220153d828SPatrick Mooney /* expect to match existing mapping exactly */
8233a0fa64cSPatrick Mooney if ((vmsm = vm_mapping_find(vms, addr, len)) == NULL ||
8243a0fa64cSPatrick Mooney vmsm->vmsm_addr != addr || vmsm->vmsm_len != len) {
8250153d828SPatrick Mooney vmspace_hold_exit(vms, false);
8260153d828SPatrick Mooney return (ENOENT);
8270153d828SPatrick Mooney }
8280153d828SPatrick Mooney
8290153d828SPatrick Mooney /* Prepare clients (and their held pages) for the unmap. */
8300153d828SPatrick Mooney for (vmc = list_head(&vms->vms_clients); vmc != NULL;
8310153d828SPatrick Mooney vmc = list_next(&vms->vms_clients, vmc)) {
8323a0fa64cSPatrick Mooney vmc_space_unmap(vmc, addr, len, vmsm->vmsm_object);
8330153d828SPatrick Mooney }
8340153d828SPatrick Mooney
8350153d828SPatrick Mooney /* Clear all PTEs for region */
8363a0fa64cSPatrick Mooney if (vmm_gpt_unmap_region(vms->vms_gpt, addr, len) != 0) {
8370153d828SPatrick Mooney vms->vms_pt_gen++;
8380153d828SPatrick Mooney gen = vms->vms_pt_gen;
8390153d828SPatrick Mooney }
8400153d828SPatrick Mooney /* ... and the intermediate (directory) PTEs as well */
8413a0fa64cSPatrick Mooney vmm_gpt_vacate_region(vms->vms_gpt, addr, len);
8420153d828SPatrick Mooney
8430153d828SPatrick Mooney /*
8440153d828SPatrick Mooney * If pages were actually unmapped from the GPT, provide clients with
8450153d828SPatrick Mooney * an invalidation notice.
8460153d828SPatrick Mooney */
8470153d828SPatrick Mooney if (gen != 0) {
848*b9b43e84SPatrick Mooney vmspace_clients_invalidate(vms, addr, len);
8490153d828SPatrick Mooney }
8500153d828SPatrick Mooney
8510153d828SPatrick Mooney vm_mapping_remove(vms, vmsm);
8520153d828SPatrick Mooney vmspace_hold_exit(vms, true);
8530153d828SPatrick Mooney return (0);
8540153d828SPatrick Mooney }
8550153d828SPatrick Mooney
856*b9b43e84SPatrick Mooney /*
857*b9b43e84SPatrick Mooney * For a given GPA in the vmspace, ensure that the backing page (if any) is
858*b9b43e84SPatrick Mooney * properly mapped as present in the provided PTE.
859*b9b43e84SPatrick Mooney */
860*b9b43e84SPatrick Mooney static int
vmspace_ensure_mapped(vmspace_t * vms,uintptr_t gpa,int req_prot,pfn_t * pfnp,uint64_t * leaf_pte)861*b9b43e84SPatrick Mooney vmspace_ensure_mapped(vmspace_t *vms, uintptr_t gpa, int req_prot, pfn_t *pfnp,
862*b9b43e84SPatrick Mooney uint64_t *leaf_pte)
863*b9b43e84SPatrick Mooney {
864*b9b43e84SPatrick Mooney vmspace_mapping_t *vmsm;
865*b9b43e84SPatrick Mooney vm_object_t *vmo;
866*b9b43e84SPatrick Mooney pfn_t pfn;
867*b9b43e84SPatrick Mooney
868*b9b43e84SPatrick Mooney ASSERT(pfnp != NULL);
869*b9b43e84SPatrick Mooney ASSERT(leaf_pte != NULL);
870*b9b43e84SPatrick Mooney
871*b9b43e84SPatrick Mooney vmsm = vm_mapping_find(vms, gpa, PAGESIZE);
872*b9b43e84SPatrick Mooney if (vmsm == NULL) {
873*b9b43e84SPatrick Mooney return (FC_NOMAP);
874*b9b43e84SPatrick Mooney }
875*b9b43e84SPatrick Mooney if ((req_prot & vmsm->vmsm_prot) != req_prot) {
876*b9b43e84SPatrick Mooney return (FC_PROT);
877*b9b43e84SPatrick Mooney }
878*b9b43e84SPatrick Mooney
879*b9b43e84SPatrick Mooney vmo = vmsm->vmsm_object;
880*b9b43e84SPatrick Mooney pfn = vm_object_pfn(vmo, VMSM_OFFSET(vmsm, gpa));
881*b9b43e84SPatrick Mooney VERIFY(pfn != PFN_INVALID);
882*b9b43e84SPatrick Mooney
883*b9b43e84SPatrick Mooney if (vmm_gpt_map_at(vms->vms_gpt, leaf_pte, pfn, vmsm->vmsm_prot,
884*b9b43e84SPatrick Mooney vmo->vmo_attr)) {
885*b9b43e84SPatrick Mooney atomic_inc_64(&vms->vms_pages_mapped);
886*b9b43e84SPatrick Mooney }
887*b9b43e84SPatrick Mooney
888*b9b43e84SPatrick Mooney *pfnp = pfn;
889*b9b43e84SPatrick Mooney return (0);
890*b9b43e84SPatrick Mooney }
891*b9b43e84SPatrick Mooney
892*b9b43e84SPatrick Mooney /*
893*b9b43e84SPatrick Mooney * Look up the PTE for a given GPA in the vmspace, populating it with
894*b9b43e84SPatrick Mooney * appropriate contents (pfn, protection, etc) if it is empty, but backed by a
895*b9b43e84SPatrick Mooney * valid mapping.
896*b9b43e84SPatrick Mooney */
8970153d828SPatrick Mooney static int
vmspace_lookup_map(vmspace_t * vms,uintptr_t gpa,int req_prot,pfn_t * pfnp,uint64_t ** ptepp)8980153d828SPatrick Mooney vmspace_lookup_map(vmspace_t *vms, uintptr_t gpa, int req_prot, pfn_t *pfnp,
8990153d828SPatrick Mooney uint64_t **ptepp)
9000153d828SPatrick Mooney {
9010153d828SPatrick Mooney vmm_gpt_t *gpt = vms->vms_gpt;
9020153d828SPatrick Mooney uint64_t *entries[MAX_GPT_LEVEL], *leaf;
9030153d828SPatrick Mooney pfn_t pfn = PFN_INVALID;
9040153d828SPatrick Mooney uint_t prot;
9050153d828SPatrick Mooney
9060153d828SPatrick Mooney ASSERT0(gpa & PAGEOFFSET);
9070153d828SPatrick Mooney ASSERT((req_prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) != PROT_NONE);
9080153d828SPatrick Mooney
9090153d828SPatrick Mooney vmm_gpt_walk(gpt, gpa, entries, MAX_GPT_LEVEL);
9100153d828SPatrick Mooney leaf = entries[LEVEL1];
9110153d828SPatrick Mooney if (leaf == NULL) {
9120153d828SPatrick Mooney /*
9130153d828SPatrick Mooney * Since we populated the intermediate tables for any regions
9140153d828SPatrick Mooney * mapped in the GPT, an empty leaf entry indicates there is no
9150153d828SPatrick Mooney * mapping, populated or not, at this GPT.
9160153d828SPatrick Mooney */
9170153d828SPatrick Mooney return (FC_NOMAP);
9180153d828SPatrick Mooney }
9190153d828SPatrick Mooney
9200153d828SPatrick Mooney if (vmm_gpt_is_mapped(gpt, leaf, &pfn, &prot)) {
9210153d828SPatrick Mooney if ((req_prot & prot) != req_prot) {
9220153d828SPatrick Mooney return (FC_PROT);
9230153d828SPatrick Mooney }
9240153d828SPatrick Mooney } else {
925*b9b43e84SPatrick Mooney int err = vmspace_ensure_mapped(vms, gpa, req_prot, &pfn, leaf);
926*b9b43e84SPatrick Mooney if (err != 0) {
927*b9b43e84SPatrick Mooney return (err);
9280153d828SPatrick Mooney }
9290153d828SPatrick Mooney }
9300153d828SPatrick Mooney
9310153d828SPatrick Mooney ASSERT(pfn != PFN_INVALID && leaf != NULL);
9320153d828SPatrick Mooney if (pfnp != NULL) {
9330153d828SPatrick Mooney *pfnp = pfn;
9340153d828SPatrick Mooney }
9350153d828SPatrick Mooney if (ptepp != NULL) {
9360153d828SPatrick Mooney *ptepp = leaf;
9370153d828SPatrick Mooney }
9380153d828SPatrick Mooney return (0);
9390153d828SPatrick Mooney }
9400153d828SPatrick Mooney
9410153d828SPatrick Mooney /*
9420153d828SPatrick Mooney * Populate (make resident in the page tables) a region of the vmspace.
9430153d828SPatrick Mooney *
9440153d828SPatrick Mooney * Presently the [start, end) span must equal a region previously mapped by a
9450153d828SPatrick Mooney * call to vmspace_map().
9460153d828SPatrick Mooney */
9470153d828SPatrick Mooney int
vmspace_populate(vmspace_t * vms,uintptr_t addr,uintptr_t len)9483a0fa64cSPatrick Mooney vmspace_populate(vmspace_t *vms, uintptr_t addr, uintptr_t len)
9490153d828SPatrick Mooney {
9500153d828SPatrick Mooney vmspace_mapping_t *vmsm;
9510153d828SPatrick Mooney mutex_enter(&vms->vms_lock);
9520153d828SPatrick Mooney
9530153d828SPatrick Mooney /* For the time being, only exact-match mappings are expected */
9543a0fa64cSPatrick Mooney if ((vmsm = vm_mapping_find(vms, addr, len)) == NULL) {
9550153d828SPatrick Mooney mutex_exit(&vms->vms_lock);
9560153d828SPatrick Mooney return (FC_NOMAP);
9570153d828SPatrick Mooney }
9580153d828SPatrick Mooney
9590153d828SPatrick Mooney vm_object_t *vmo = vmsm->vmsm_object;
9600153d828SPatrick Mooney const int prot = vmsm->vmsm_prot;
9610153d828SPatrick Mooney const uint8_t attr = vmo->vmo_attr;
9620153d828SPatrick Mooney size_t populated = 0;
9633a0fa64cSPatrick Mooney const size_t end = addr + len;
9643a0fa64cSPatrick Mooney for (uintptr_t gpa = addr & PAGEMASK; gpa < end; gpa += PAGESIZE) {
9650153d828SPatrick Mooney const pfn_t pfn = vm_object_pfn(vmo, VMSM_OFFSET(vmsm, gpa));
9660153d828SPatrick Mooney VERIFY(pfn != PFN_INVALID);
9670153d828SPatrick Mooney
9680153d828SPatrick Mooney if (vmm_gpt_map(vms->vms_gpt, gpa, pfn, prot, attr)) {
9690153d828SPatrick Mooney populated++;
9700153d828SPatrick Mooney }
9710153d828SPatrick Mooney }
9720153d828SPatrick Mooney atomic_add_64(&vms->vms_pages_mapped, populated);
9730153d828SPatrick Mooney
9740153d828SPatrick Mooney mutex_exit(&vms->vms_lock);
9750153d828SPatrick Mooney return (0);
9760153d828SPatrick Mooney }
9770153d828SPatrick Mooney
9780153d828SPatrick Mooney /*
9790153d828SPatrick Mooney * Allocate a client from a given vmspace.
9800153d828SPatrick Mooney */
9810153d828SPatrick Mooney vm_client_t *
vmspace_client_alloc(vmspace_t * vms)9820153d828SPatrick Mooney vmspace_client_alloc(vmspace_t *vms)
9830153d828SPatrick Mooney {
9840153d828SPatrick Mooney vm_client_t *vmc;
9850153d828SPatrick Mooney
9860153d828SPatrick Mooney vmc = kmem_zalloc(sizeof (vm_client_t), KM_SLEEP);
9870153d828SPatrick Mooney vmc->vmc_space = vms;
9880153d828SPatrick Mooney mutex_init(&vmc->vmc_lock, NULL, MUTEX_DRIVER, NULL);
9890153d828SPatrick Mooney cv_init(&vmc->vmc_cv, NULL, CV_DRIVER, NULL);
9900153d828SPatrick Mooney vmc->vmc_state = VCS_IDLE;
9910153d828SPatrick Mooney vmc->vmc_cpu_active = -1;
9920153d828SPatrick Mooney list_create(&vmc->vmc_held_pages, sizeof (vm_page_t),
9930153d828SPatrick Mooney offsetof(vm_page_t, vmp_node));
9940153d828SPatrick Mooney vmc->vmc_track_dirty = vms->vms_track_dirty;
9950153d828SPatrick Mooney
9960153d828SPatrick Mooney mutex_enter(&vms->vms_lock);
9970153d828SPatrick Mooney list_insert_tail(&vms->vms_clients, vmc);
9980153d828SPatrick Mooney mutex_exit(&vms->vms_lock);
9990153d828SPatrick Mooney
10000153d828SPatrick Mooney return (vmc);
10010153d828SPatrick Mooney }
10020153d828SPatrick Mooney
10030153d828SPatrick Mooney /*
10040153d828SPatrick Mooney * Get the nested page table root pointer (EPTP/NCR3) value.
10050153d828SPatrick Mooney */
10060153d828SPatrick Mooney uint64_t
vmspace_table_root(vmspace_t * vms)10070153d828SPatrick Mooney vmspace_table_root(vmspace_t *vms)
10080153d828SPatrick Mooney {
10094ac713daSLuqman Aden return (vmm_gpt_get_pmtp(vms->vms_gpt, vms->vms_track_dirty));
10100153d828SPatrick Mooney }
10110153d828SPatrick Mooney
10120153d828SPatrick Mooney /*
10130153d828SPatrick Mooney * Get the current generation number of the nested page table.
10140153d828SPatrick Mooney */
10150153d828SPatrick Mooney uint64_t
vmspace_table_gen(vmspace_t * vms)10160153d828SPatrick Mooney vmspace_table_gen(vmspace_t *vms)
10170153d828SPatrick Mooney {
10180153d828SPatrick Mooney return (vms->vms_pt_gen);
10190153d828SPatrick Mooney }
10200153d828SPatrick Mooney
10210153d828SPatrick Mooney /*
10220153d828SPatrick Mooney * Mark a vm_client as active. This will block if/while the client is held by
10230153d828SPatrick Mooney * the vmspace. On success, it returns with vm_client_t`vmc_lock held. It will
10240153d828SPatrick Mooney * fail if the vm_client has been orphaned.
10250153d828SPatrick Mooney */
10260153d828SPatrick Mooney static int
vmc_activate(vm_client_t * vmc)10270153d828SPatrick Mooney vmc_activate(vm_client_t *vmc)
10280153d828SPatrick Mooney {
10290153d828SPatrick Mooney mutex_enter(&vmc->vmc_lock);
10300153d828SPatrick Mooney VERIFY0(vmc->vmc_state & VCS_ACTIVE);
10310153d828SPatrick Mooney if ((vmc->vmc_state & VCS_ORPHANED) != 0) {
1032cc7a5a3bSPatrick Mooney mutex_exit(&vmc->vmc_lock);
10330153d828SPatrick Mooney return (ENXIO);
10340153d828SPatrick Mooney }
10350153d828SPatrick Mooney while ((vmc->vmc_state & VCS_HOLD) != 0) {
10360153d828SPatrick Mooney cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
10370153d828SPatrick Mooney }
10380153d828SPatrick Mooney vmc->vmc_state |= VCS_ACTIVE;
10390153d828SPatrick Mooney return (0);
10400153d828SPatrick Mooney }
10410153d828SPatrick Mooney
10420153d828SPatrick Mooney /*
10430153d828SPatrick Mooney * Mark a vm_client as no longer active. It must be called with
10440153d828SPatrick Mooney * vm_client_t`vmc_lock already held, and will return with it released.
10450153d828SPatrick Mooney */
10460153d828SPatrick Mooney static void
vmc_deactivate(vm_client_t * vmc)10470153d828SPatrick Mooney vmc_deactivate(vm_client_t *vmc)
10480153d828SPatrick Mooney {
10490153d828SPatrick Mooney ASSERT(MUTEX_HELD(&vmc->vmc_lock));
10500153d828SPatrick Mooney VERIFY(vmc->vmc_state & VCS_ACTIVE);
10510153d828SPatrick Mooney
10520153d828SPatrick Mooney vmc->vmc_state ^= VCS_ACTIVE;
10530153d828SPatrick Mooney if ((vmc->vmc_state & VCS_HOLD) != 0) {
10540153d828SPatrick Mooney cv_broadcast(&vmc->vmc_cv);
10550153d828SPatrick Mooney }
10560153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
10570153d828SPatrick Mooney }
10580153d828SPatrick Mooney
10590153d828SPatrick Mooney /*
10600153d828SPatrick Mooney * Indicate that a CPU will be utilizing the nested page tables through this VM
10610153d828SPatrick Mooney * client. Interrupts (and/or the GIF) are expected to be disabled when calling
10620153d828SPatrick Mooney * this function. Returns the generation number of the nested page table (to be
10630153d828SPatrick Mooney * used for TLB invalidations).
10640153d828SPatrick Mooney */
10650153d828SPatrick Mooney uint64_t
vmc_table_enter(vm_client_t * vmc)10660153d828SPatrick Mooney vmc_table_enter(vm_client_t *vmc)
10670153d828SPatrick Mooney {
10680153d828SPatrick Mooney vmspace_t *vms = vmc->vmc_space;
10690153d828SPatrick Mooney uint64_t gen;
10700153d828SPatrick Mooney
10710153d828SPatrick Mooney ASSERT0(vmc->vmc_state & (VCS_ACTIVE | VCS_ON_CPU));
10720153d828SPatrick Mooney ASSERT3S(vmc->vmc_cpu_active, ==, -1);
10730153d828SPatrick Mooney
10740153d828SPatrick Mooney /*
10750153d828SPatrick Mooney * Since the NPT activation occurs with interrupts disabled, this must
10760153d828SPatrick Mooney * be done without taking vmc_lock like normal.
10770153d828SPatrick Mooney */
10780153d828SPatrick Mooney gen = vms->vms_pt_gen;
10790153d828SPatrick Mooney vmc->vmc_cpu_active = CPU->cpu_id;
10800153d828SPatrick Mooney vmc->vmc_cpu_gen = gen;
10810153d828SPatrick Mooney atomic_or_uint(&vmc->vmc_state, VCS_ON_CPU);
10820153d828SPatrick Mooney
10830153d828SPatrick Mooney return (gen);
10840153d828SPatrick Mooney }
10850153d828SPatrick Mooney
10860153d828SPatrick Mooney /*
10870153d828SPatrick Mooney * Indicate that this VM client is not longer (directly) using the underlying
10880153d828SPatrick Mooney * page tables. Interrupts (and/or the GIF) must be enabled prior to calling
10890153d828SPatrick Mooney * this function.
10900153d828SPatrick Mooney */
10910153d828SPatrick Mooney void
vmc_table_exit(vm_client_t * vmc)10920153d828SPatrick Mooney vmc_table_exit(vm_client_t *vmc)
10930153d828SPatrick Mooney {
10940153d828SPatrick Mooney mutex_enter(&vmc->vmc_lock);
10950153d828SPatrick Mooney
10960153d828SPatrick Mooney ASSERT(vmc->vmc_state & VCS_ON_CPU);
10970153d828SPatrick Mooney vmc->vmc_state ^= VCS_ON_CPU;
10980153d828SPatrick Mooney vmc->vmc_cpu_active = -1;
10990153d828SPatrick Mooney if ((vmc->vmc_state & VCS_HOLD) != 0) {
11000153d828SPatrick Mooney cv_broadcast(&vmc->vmc_cv);
11010153d828SPatrick Mooney }
11020153d828SPatrick Mooney
11030153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
11040153d828SPatrick Mooney }
11050153d828SPatrick Mooney
11060153d828SPatrick Mooney static void
vmc_space_hold(vm_client_t * vmc)11070153d828SPatrick Mooney vmc_space_hold(vm_client_t *vmc)
11080153d828SPatrick Mooney {
11090153d828SPatrick Mooney mutex_enter(&vmc->vmc_lock);
11100153d828SPatrick Mooney VERIFY0(vmc->vmc_state & VCS_HOLD);
11110153d828SPatrick Mooney
11120153d828SPatrick Mooney /*
11130153d828SPatrick Mooney * Because vmc_table_enter() alters vmc_state from a context where
11140153d828SPatrick Mooney * interrupts are disabled, it cannot pay heed to vmc_lock, so setting
11150153d828SPatrick Mooney * VMC_HOLD must be done atomically here.
11160153d828SPatrick Mooney */
11170153d828SPatrick Mooney atomic_or_uint(&vmc->vmc_state, VCS_HOLD);
11180153d828SPatrick Mooney
11190153d828SPatrick Mooney /* Wait for client to go inactive */
11200153d828SPatrick Mooney while ((vmc->vmc_state & VCS_ACTIVE) != 0) {
11210153d828SPatrick Mooney cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
11220153d828SPatrick Mooney }
11230153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
11240153d828SPatrick Mooney }
11250153d828SPatrick Mooney
11260153d828SPatrick Mooney static void
vmc_space_release(vm_client_t * vmc,bool kick_on_cpu)11270153d828SPatrick Mooney vmc_space_release(vm_client_t *vmc, bool kick_on_cpu)
11280153d828SPatrick Mooney {
11290153d828SPatrick Mooney mutex_enter(&vmc->vmc_lock);
11300153d828SPatrick Mooney VERIFY(vmc->vmc_state & VCS_HOLD);
11310153d828SPatrick Mooney
11320153d828SPatrick Mooney if (kick_on_cpu && (vmc->vmc_state & VCS_ON_CPU) != 0) {
11330153d828SPatrick Mooney poke_cpu(vmc->vmc_cpu_active);
11340153d828SPatrick Mooney
11350153d828SPatrick Mooney while ((vmc->vmc_state & VCS_ON_CPU) != 0) {
11360153d828SPatrick Mooney cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
11370153d828SPatrick Mooney }
11380153d828SPatrick Mooney }
11390153d828SPatrick Mooney
11400153d828SPatrick Mooney /*
11410153d828SPatrick Mooney * Because vmc_table_enter() alters vmc_state from a context where
11420153d828SPatrick Mooney * interrupts are disabled, it cannot pay heed to vmc_lock, so clearing
11430153d828SPatrick Mooney * VMC_HOLD must be done atomically here.
11440153d828SPatrick Mooney */
11450153d828SPatrick Mooney atomic_and_uint(&vmc->vmc_state, ~VCS_HOLD);
1146cc7a5a3bSPatrick Mooney cv_broadcast(&vmc->vmc_cv);
11470153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
11480153d828SPatrick Mooney }
11490153d828SPatrick Mooney
11500153d828SPatrick Mooney static void
vmc_space_invalidate(vm_client_t * vmc,uintptr_t addr,size_t size,uint64_t gen)11510153d828SPatrick Mooney vmc_space_invalidate(vm_client_t *vmc, uintptr_t addr, size_t size,
11520153d828SPatrick Mooney uint64_t gen)
11530153d828SPatrick Mooney {
11540153d828SPatrick Mooney mutex_enter(&vmc->vmc_lock);
11550153d828SPatrick Mooney VERIFY(vmc->vmc_state & VCS_HOLD);
11560153d828SPatrick Mooney if ((vmc->vmc_state & VCS_ON_CPU) != 0) {
11570153d828SPatrick Mooney /*
11580153d828SPatrick Mooney * Wait for clients using an old generation of the page tables
11590153d828SPatrick Mooney * to exit guest context, where they subsequently flush the TLB
11600153d828SPatrick Mooney * for the new generation.
11610153d828SPatrick Mooney */
11620153d828SPatrick Mooney if (vmc->vmc_cpu_gen < gen) {
11630153d828SPatrick Mooney poke_cpu(vmc->vmc_cpu_active);
11640153d828SPatrick Mooney
11650153d828SPatrick Mooney while ((vmc->vmc_state & VCS_ON_CPU) != 0) {
11660153d828SPatrick Mooney cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
11670153d828SPatrick Mooney }
11680153d828SPatrick Mooney }
11690153d828SPatrick Mooney }
11700153d828SPatrick Mooney if (vmc->vmc_inval_func != NULL) {
11710153d828SPatrick Mooney vmc_inval_cb_t func = vmc->vmc_inval_func;
11720153d828SPatrick Mooney void *data = vmc->vmc_inval_data;
11730153d828SPatrick Mooney
11740153d828SPatrick Mooney /*
11750153d828SPatrick Mooney * Perform the actual invalidation call outside vmc_lock to
11760153d828SPatrick Mooney * avoid lock ordering issues in the consumer. Since the client
11770153d828SPatrick Mooney * is under VCS_HOLD, this is safe.
11780153d828SPatrick Mooney */
11790153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
11800153d828SPatrick Mooney func(data, addr, size);
11810153d828SPatrick Mooney mutex_enter(&vmc->vmc_lock);
11820153d828SPatrick Mooney }
11830153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
11840153d828SPatrick Mooney }
11850153d828SPatrick Mooney
11860153d828SPatrick Mooney static void
vmc_space_unmap(vm_client_t * vmc,uintptr_t addr,size_t size,vm_object_t * vmo)11870153d828SPatrick Mooney vmc_space_unmap(vm_client_t *vmc, uintptr_t addr, size_t size,
11880153d828SPatrick Mooney vm_object_t *vmo)
11890153d828SPatrick Mooney {
11900153d828SPatrick Mooney mutex_enter(&vmc->vmc_lock);
11910153d828SPatrick Mooney VERIFY(vmc->vmc_state & VCS_HOLD);
11920153d828SPatrick Mooney
11930153d828SPatrick Mooney /*
11940153d828SPatrick Mooney * With the current vCPU exclusion invariants in place, we do not expect
11950153d828SPatrick Mooney * a vCPU to be in guest context during an unmap.
11960153d828SPatrick Mooney */
11970153d828SPatrick Mooney VERIFY0(vmc->vmc_state & VCS_ON_CPU);
11980153d828SPatrick Mooney
11990153d828SPatrick Mooney /*
12000153d828SPatrick Mooney * Any holds against the unmapped region need to establish their own
12010153d828SPatrick Mooney * reference to the underlying object to avoid a potential
12020153d828SPatrick Mooney * use-after-free.
12030153d828SPatrick Mooney */
12040153d828SPatrick Mooney for (vm_page_t *vmp = list_head(&vmc->vmc_held_pages);
12050153d828SPatrick Mooney vmp != NULL;
12060153d828SPatrick Mooney vmp = list_next(&vmc->vmc_held_pages, vmc)) {
12070153d828SPatrick Mooney if (vmp->vmp_gpa < addr ||
12080153d828SPatrick Mooney vmp->vmp_gpa >= (addr + size)) {
12090153d828SPatrick Mooney /* Hold outside region in question */
12100153d828SPatrick Mooney continue;
12110153d828SPatrick Mooney }
12120153d828SPatrick Mooney if (vmp->vmp_obj_ref == NULL) {
12130153d828SPatrick Mooney vm_object_reference(vmo);
12140153d828SPatrick Mooney vmp->vmp_obj_ref = vmo;
12150153d828SPatrick Mooney /* For an unmapped region, PTE is now meaningless */
12160153d828SPatrick Mooney vmp->vmp_ptep = NULL;
12170153d828SPatrick Mooney } else {
12180153d828SPatrick Mooney /*
12190153d828SPatrick Mooney * Object could have gone through cycle of
12200153d828SPatrick Mooney * unmap-map-unmap before the hold was released.
12210153d828SPatrick Mooney */
12220153d828SPatrick Mooney VERIFY3P(vmp->vmp_ptep, ==, NULL);
12230153d828SPatrick Mooney }
12240153d828SPatrick Mooney }
12250153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
12260153d828SPatrick Mooney }
12270153d828SPatrick Mooney
12280153d828SPatrick Mooney static vm_client_t *
vmc_space_orphan(vm_client_t * vmc,vmspace_t * vms)12290153d828SPatrick Mooney vmc_space_orphan(vm_client_t *vmc, vmspace_t *vms)
12300153d828SPatrick Mooney {
12310153d828SPatrick Mooney vm_client_t *next;
12320153d828SPatrick Mooney
12330153d828SPatrick Mooney ASSERT(MUTEX_HELD(&vms->vms_lock));
12340153d828SPatrick Mooney
12350153d828SPatrick Mooney mutex_enter(&vmc->vmc_lock);
12360153d828SPatrick Mooney VERIFY3P(vmc->vmc_space, ==, vms);
12370153d828SPatrick Mooney VERIFY0(vmc->vmc_state & VCS_ORPHANED);
12380153d828SPatrick Mooney if (vmc->vmc_state & VCS_DESTROY) {
12390153d828SPatrick Mooney /*
12400153d828SPatrick Mooney * This vm_client is currently undergoing destruction, so it
12410153d828SPatrick Mooney * does not need to be orphaned. Let it proceed with its own
12420153d828SPatrick Mooney * clean-up task.
12430153d828SPatrick Mooney */
12440153d828SPatrick Mooney next = list_next(&vms->vms_clients, vmc);
12450153d828SPatrick Mooney } else {
12460153d828SPatrick Mooney /*
12470153d828SPatrick Mooney * Clients are only orphaned when the containing vmspace is
12480153d828SPatrick Mooney * being torn down. All mappings from the vmspace should
12490153d828SPatrick Mooney * already be gone, meaning any remaining held pages should have
12500153d828SPatrick Mooney * direct references to the object.
12510153d828SPatrick Mooney */
12520153d828SPatrick Mooney for (vm_page_t *vmp = list_head(&vmc->vmc_held_pages);
12530153d828SPatrick Mooney vmp != NULL;
12540153d828SPatrick Mooney vmp = list_next(&vmc->vmc_held_pages, vmp)) {
12550153d828SPatrick Mooney ASSERT3P(vmp->vmp_ptep, ==, NULL);
12560153d828SPatrick Mooney ASSERT3P(vmp->vmp_obj_ref, !=, NULL);
12570153d828SPatrick Mooney }
12580153d828SPatrick Mooney
12590153d828SPatrick Mooney /*
12600153d828SPatrick Mooney * After this point, the client will be orphaned, unable to
12610153d828SPatrick Mooney * establish new page holds (or access any vmspace-related
12620153d828SPatrick Mooney * resources) and is in charge of cleaning up after itself.
12630153d828SPatrick Mooney */
12640153d828SPatrick Mooney vmc->vmc_state |= VCS_ORPHANED;
12650153d828SPatrick Mooney next = list_next(&vms->vms_clients, vmc);
12660153d828SPatrick Mooney list_remove(&vms->vms_clients, vmc);
12670153d828SPatrick Mooney vmc->vmc_space = NULL;
12680153d828SPatrick Mooney }
12690153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
12700153d828SPatrick Mooney return (next);
12710153d828SPatrick Mooney }
12720153d828SPatrick Mooney
12730153d828SPatrick Mooney /*
12740153d828SPatrick Mooney * Attempt to hold a page at `gpa` inside the referenced vmspace.
12750153d828SPatrick Mooney */
12760153d828SPatrick Mooney vm_page_t *
vmc_hold_ext(vm_client_t * vmc,uintptr_t gpa,int prot,int flags)1277f2357d97SPatrick Mooney vmc_hold_ext(vm_client_t *vmc, uintptr_t gpa, int prot, int flags)
12780153d828SPatrick Mooney {
12790153d828SPatrick Mooney vmspace_t *vms = vmc->vmc_space;
12800153d828SPatrick Mooney vm_page_t *vmp;
12810153d828SPatrick Mooney pfn_t pfn = PFN_INVALID;
12820153d828SPatrick Mooney uint64_t *ptep = NULL;
12830153d828SPatrick Mooney
12840153d828SPatrick Mooney ASSERT0(gpa & PAGEOFFSET);
12850153d828SPatrick Mooney ASSERT((prot & (PROT_READ | PROT_WRITE)) != PROT_NONE);
1286f2357d97SPatrick Mooney ASSERT0(prot & ~PROT_ALL);
1287f2357d97SPatrick Mooney ASSERT0(flags & ~VPF_ALL);
12880153d828SPatrick Mooney
12890153d828SPatrick Mooney vmp = kmem_alloc(sizeof (*vmp), KM_SLEEP);
12900153d828SPatrick Mooney if (vmc_activate(vmc) != 0) {
12910153d828SPatrick Mooney kmem_free(vmp, sizeof (*vmp));
12920153d828SPatrick Mooney return (NULL);
12930153d828SPatrick Mooney }
12940153d828SPatrick Mooney
12950153d828SPatrick Mooney if (vmspace_lookup_map(vms, gpa, prot, &pfn, &ptep) != 0) {
12960153d828SPatrick Mooney vmc_deactivate(vmc);
12970153d828SPatrick Mooney kmem_free(vmp, sizeof (*vmp));
12980153d828SPatrick Mooney return (NULL);
12990153d828SPatrick Mooney }
13000153d828SPatrick Mooney ASSERT(pfn != PFN_INVALID && ptep != NULL);
13010153d828SPatrick Mooney
13020153d828SPatrick Mooney vmp->vmp_client = vmc;
13030153d828SPatrick Mooney vmp->vmp_chain = NULL;
13040153d828SPatrick Mooney vmp->vmp_gpa = gpa;
13050153d828SPatrick Mooney vmp->vmp_pfn = pfn;
13060153d828SPatrick Mooney vmp->vmp_ptep = ptep;
13070153d828SPatrick Mooney vmp->vmp_obj_ref = NULL;
1308f2357d97SPatrick Mooney vmp->vmp_prot = (uint8_t)prot;
1309f2357d97SPatrick Mooney vmp->vmp_flags = (uint8_t)flags;
13100153d828SPatrick Mooney list_insert_tail(&vmc->vmc_held_pages, vmp);
13110153d828SPatrick Mooney vmc_deactivate(vmc);
13120153d828SPatrick Mooney
13130153d828SPatrick Mooney return (vmp);
13140153d828SPatrick Mooney }
13150153d828SPatrick Mooney
1316f2357d97SPatrick Mooney /*
1317f2357d97SPatrick Mooney * Attempt to hold a page at `gpa` inside the referenced vmspace.
1318f2357d97SPatrick Mooney */
1319f2357d97SPatrick Mooney vm_page_t *
vmc_hold(vm_client_t * vmc,uintptr_t gpa,int prot)1320f2357d97SPatrick Mooney vmc_hold(vm_client_t *vmc, uintptr_t gpa, int prot)
1321f2357d97SPatrick Mooney {
1322f2357d97SPatrick Mooney return (vmc_hold_ext(vmc, gpa, prot, VPF_DEFAULT));
1323f2357d97SPatrick Mooney }
1324f2357d97SPatrick Mooney
13250153d828SPatrick Mooney int
vmc_fault(vm_client_t * vmc,uintptr_t gpa,int prot)13260153d828SPatrick Mooney vmc_fault(vm_client_t *vmc, uintptr_t gpa, int prot)
13270153d828SPatrick Mooney {
13280153d828SPatrick Mooney vmspace_t *vms = vmc->vmc_space;
13290153d828SPatrick Mooney int err;
13300153d828SPatrick Mooney
13310153d828SPatrick Mooney err = vmc_activate(vmc);
13320153d828SPatrick Mooney if (err == 0) {
13330153d828SPatrick Mooney err = vmspace_lookup_map(vms, gpa & PAGEMASK, prot, NULL, NULL);
13340153d828SPatrick Mooney vmc_deactivate(vmc);
13350153d828SPatrick Mooney }
13360153d828SPatrick Mooney
13370153d828SPatrick Mooney return (err);
13380153d828SPatrick Mooney }
13390153d828SPatrick Mooney
13400153d828SPatrick Mooney /*
13410153d828SPatrick Mooney * Allocate an additional vm_client_t, based on an existing one. Only the
13420153d828SPatrick Mooney * associatation with the vmspace is cloned, not existing holds or any
13430153d828SPatrick Mooney * configured invalidation function.
13440153d828SPatrick Mooney */
13450153d828SPatrick Mooney vm_client_t *
vmc_clone(vm_client_t * vmc)13460153d828SPatrick Mooney vmc_clone(vm_client_t *vmc)
13470153d828SPatrick Mooney {
13480153d828SPatrick Mooney vmspace_t *vms = vmc->vmc_space;
13490153d828SPatrick Mooney
13500153d828SPatrick Mooney return (vmspace_client_alloc(vms));
13510153d828SPatrick Mooney }
13520153d828SPatrick Mooney
13530153d828SPatrick Mooney /*
13540153d828SPatrick Mooney * Register a function (and associated data pointer) to be called when an
13550153d828SPatrick Mooney * address range in the vmspace is invalidated.
13560153d828SPatrick Mooney */
13570153d828SPatrick Mooney int
vmc_set_inval_cb(vm_client_t * vmc,vmc_inval_cb_t func,void * data)13580153d828SPatrick Mooney vmc_set_inval_cb(vm_client_t *vmc, vmc_inval_cb_t func, void *data)
13590153d828SPatrick Mooney {
13600153d828SPatrick Mooney int err;
13610153d828SPatrick Mooney
13620153d828SPatrick Mooney err = vmc_activate(vmc);
13630153d828SPatrick Mooney if (err == 0) {
13640153d828SPatrick Mooney vmc->vmc_inval_func = func;
13650153d828SPatrick Mooney vmc->vmc_inval_data = data;
13660153d828SPatrick Mooney vmc_deactivate(vmc);
13670153d828SPatrick Mooney }
13680153d828SPatrick Mooney
13690153d828SPatrick Mooney return (err);
13700153d828SPatrick Mooney }
13710153d828SPatrick Mooney
13720153d828SPatrick Mooney /*
13730153d828SPatrick Mooney * Destroy a vm_client_t instance.
13740153d828SPatrick Mooney *
13750153d828SPatrick Mooney * No pages held through this vm_client_t may be outstanding when performing a
13760153d828SPatrick Mooney * vmc_destroy(). For vCPU clients, the client cannot be on-CPU (a call to
13770153d828SPatrick Mooney * vmc_table_exit() has been made).
13780153d828SPatrick Mooney */
13790153d828SPatrick Mooney void
vmc_destroy(vm_client_t * vmc)13800153d828SPatrick Mooney vmc_destroy(vm_client_t *vmc)
13810153d828SPatrick Mooney {
13820153d828SPatrick Mooney mutex_enter(&vmc->vmc_lock);
13830153d828SPatrick Mooney
13840153d828SPatrick Mooney VERIFY(list_is_empty(&vmc->vmc_held_pages));
13850153d828SPatrick Mooney VERIFY0(vmc->vmc_state & (VCS_ACTIVE | VCS_ON_CPU));
13860153d828SPatrick Mooney
13870153d828SPatrick Mooney if ((vmc->vmc_state & VCS_ORPHANED) == 0) {
13880153d828SPatrick Mooney vmspace_t *vms;
13890153d828SPatrick Mooney
13900153d828SPatrick Mooney /*
13910153d828SPatrick Mooney * Deassociation with the parent vmspace must be done carefully:
13920153d828SPatrick Mooney * The vmspace could attempt to orphan this vm_client while we
13930153d828SPatrick Mooney * release vmc_lock in order to take vms_lock (the required
13940153d828SPatrick Mooney * order). The client is marked to indicate that destruction is
13950153d828SPatrick Mooney * under way. Doing so prevents any racing orphan operation
13960153d828SPatrick Mooney * from applying to this client, allowing us to deassociate from
13970153d828SPatrick Mooney * the vmspace safely.
13980153d828SPatrick Mooney */
13990153d828SPatrick Mooney vmc->vmc_state |= VCS_DESTROY;
14000153d828SPatrick Mooney vms = vmc->vmc_space;
14010153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
14020153d828SPatrick Mooney
14030153d828SPatrick Mooney mutex_enter(&vms->vms_lock);
14040153d828SPatrick Mooney mutex_enter(&vmc->vmc_lock);
14050153d828SPatrick Mooney list_remove(&vms->vms_clients, vmc);
14060153d828SPatrick Mooney /*
14070153d828SPatrick Mooney * If the vmspace began its own destruction operation while we
14080153d828SPatrick Mooney * were navigating the locks, be sure to notify it about this
14090153d828SPatrick Mooney * vm_client being deassociated.
14100153d828SPatrick Mooney */
14110153d828SPatrick Mooney cv_signal(&vms->vms_cv);
14120153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
14130153d828SPatrick Mooney mutex_exit(&vms->vms_lock);
14140153d828SPatrick Mooney } else {
14150153d828SPatrick Mooney VERIFY3P(vmc->vmc_space, ==, NULL);
14160153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
14170153d828SPatrick Mooney }
14180153d828SPatrick Mooney
14190153d828SPatrick Mooney mutex_destroy(&vmc->vmc_lock);
14200153d828SPatrick Mooney cv_destroy(&vmc->vmc_cv);
14210153d828SPatrick Mooney list_destroy(&vmc->vmc_held_pages);
14220153d828SPatrick Mooney
14230153d828SPatrick Mooney kmem_free(vmc, sizeof (*vmc));
14240153d828SPatrick Mooney }
14250153d828SPatrick Mooney
14260153d828SPatrick Mooney static __inline void *
vmp_ptr(const vm_page_t * vmp)14270153d828SPatrick Mooney vmp_ptr(const vm_page_t *vmp)
14280153d828SPatrick Mooney {
14290153d828SPatrick Mooney ASSERT3U(vmp->vmp_pfn, !=, PFN_INVALID);
14300153d828SPatrick Mooney
14310153d828SPatrick Mooney const uintptr_t paddr = (vmp->vmp_pfn << PAGESHIFT);
14320153d828SPatrick Mooney return ((void *)((uintptr_t)kpm_vbase + paddr));
14330153d828SPatrick Mooney }
14340153d828SPatrick Mooney
14350153d828SPatrick Mooney /*
14360153d828SPatrick Mooney * Get a readable kernel-virtual pointer for a held page.
14370153d828SPatrick Mooney *
14380153d828SPatrick Mooney * Only legal to call if PROT_READ was specified in `prot` for the vmc_hold()
14390153d828SPatrick Mooney * call to acquire this page reference.
14400153d828SPatrick Mooney */
14410153d828SPatrick Mooney const void *
vmp_get_readable(const vm_page_t * vmp)14420153d828SPatrick Mooney vmp_get_readable(const vm_page_t *vmp)
14430153d828SPatrick Mooney {
14440153d828SPatrick Mooney ASSERT(vmp->vmp_prot & PROT_READ);
14450153d828SPatrick Mooney
14460153d828SPatrick Mooney return (vmp_ptr(vmp));
14470153d828SPatrick Mooney }
14480153d828SPatrick Mooney
14490153d828SPatrick Mooney /*
14500153d828SPatrick Mooney * Get a writable kernel-virtual pointer for a held page.
14510153d828SPatrick Mooney *
14520153d828SPatrick Mooney * Only legal to call if PROT_WRITE was specified in `prot` for the vmc_hold()
14530153d828SPatrick Mooney * call to acquire this page reference.
14540153d828SPatrick Mooney */
14550153d828SPatrick Mooney void *
vmp_get_writable(const vm_page_t * vmp)14560153d828SPatrick Mooney vmp_get_writable(const vm_page_t *vmp)
14570153d828SPatrick Mooney {
14580153d828SPatrick Mooney ASSERT(vmp->vmp_prot & PROT_WRITE);
14590153d828SPatrick Mooney
14600153d828SPatrick Mooney return (vmp_ptr(vmp));
14610153d828SPatrick Mooney }
14620153d828SPatrick Mooney
14630153d828SPatrick Mooney /*
14640153d828SPatrick Mooney * Get the host-physical PFN for a held page.
14650153d828SPatrick Mooney */
14660153d828SPatrick Mooney pfn_t
vmp_get_pfn(const vm_page_t * vmp)14670153d828SPatrick Mooney vmp_get_pfn(const vm_page_t *vmp)
14680153d828SPatrick Mooney {
14690153d828SPatrick Mooney return (vmp->vmp_pfn);
14700153d828SPatrick Mooney }
14710153d828SPatrick Mooney
1472f2357d97SPatrick Mooney /*
1473f2357d97SPatrick Mooney * If this page was deferring dirty-marking in the corresponding vmspace page
1474f2357d97SPatrick Mooney * tables, clear such a state so it is considered dirty from now on.
1475f2357d97SPatrick Mooney */
1476f2357d97SPatrick Mooney void
vmp_mark_dirty(vm_page_t * vmp)1477f2357d97SPatrick Mooney vmp_mark_dirty(vm_page_t *vmp)
1478f2357d97SPatrick Mooney {
1479f2357d97SPatrick Mooney ASSERT((vmp->vmp_prot & PROT_WRITE) != 0);
1480f2357d97SPatrick Mooney
1481f2357d97SPatrick Mooney atomic_and_8(&vmp->vmp_flags, ~VPF_DEFER_DIRTY);
1482f2357d97SPatrick Mooney }
1483f2357d97SPatrick Mooney
14840153d828SPatrick Mooney /*
14850153d828SPatrick Mooney * Store a pointer to `to_chain` in the page-chaining slot of `vmp`.
14860153d828SPatrick Mooney */
14870153d828SPatrick Mooney void
vmp_chain(vm_page_t * vmp,vm_page_t * to_chain)14880153d828SPatrick Mooney vmp_chain(vm_page_t *vmp, vm_page_t *to_chain)
14890153d828SPatrick Mooney {
14900153d828SPatrick Mooney ASSERT3P(vmp->vmp_chain, ==, NULL);
14910153d828SPatrick Mooney
14920153d828SPatrick Mooney vmp->vmp_chain = to_chain;
14930153d828SPatrick Mooney }
14940153d828SPatrick Mooney
14950153d828SPatrick Mooney /*
14960153d828SPatrick Mooney * Retrieve the pointer from the page-chaining in `vmp`.
14970153d828SPatrick Mooney */
14980153d828SPatrick Mooney vm_page_t *
vmp_next(const vm_page_t * vmp)14990153d828SPatrick Mooney vmp_next(const vm_page_t *vmp)
15000153d828SPatrick Mooney {
15010153d828SPatrick Mooney return (vmp->vmp_chain);
15020153d828SPatrick Mooney }
15030153d828SPatrick Mooney
15040153d828SPatrick Mooney static __inline bool
vmp_release_inner(vm_page_t * vmp,vm_client_t * vmc)15050153d828SPatrick Mooney vmp_release_inner(vm_page_t *vmp, vm_client_t *vmc)
15060153d828SPatrick Mooney {
15070153d828SPatrick Mooney ASSERT(MUTEX_HELD(&vmc->vmc_lock));
15080153d828SPatrick Mooney
15090153d828SPatrick Mooney bool was_unmapped = false;
15100153d828SPatrick Mooney
15110153d828SPatrick Mooney list_remove(&vmc->vmc_held_pages, vmp);
15120153d828SPatrick Mooney if (vmp->vmp_obj_ref != NULL) {
15130153d828SPatrick Mooney ASSERT3P(vmp->vmp_ptep, ==, NULL);
15140153d828SPatrick Mooney
15150153d828SPatrick Mooney vm_object_release(vmp->vmp_obj_ref);
15160153d828SPatrick Mooney was_unmapped = true;
15170153d828SPatrick Mooney } else {
15180153d828SPatrick Mooney ASSERT3P(vmp->vmp_ptep, !=, NULL);
15190153d828SPatrick Mooney
1520f2357d97SPatrick Mooney /*
1521f2357d97SPatrick Mooney * Track appropriate (accessed/dirty) bits for the guest-virtual
1522f2357d97SPatrick Mooney * address corresponding to this page, if it is from the vmspace
1523f2357d97SPatrick Mooney * rather than a direct reference to an underlying object.
1524f2357d97SPatrick Mooney *
1525f2357d97SPatrick Mooney * The protection and/or configured flags may obviate the need
1526f2357d97SPatrick Mooney * for such an update.
1527f2357d97SPatrick Mooney */
1528f2357d97SPatrick Mooney if ((vmp->vmp_prot & PROT_WRITE) != 0 &&
1529f2357d97SPatrick Mooney (vmp->vmp_flags & VPF_DEFER_DIRTY) == 0 &&
1530f2357d97SPatrick Mooney vmc->vmc_track_dirty) {
15310153d828SPatrick Mooney vmm_gpt_t *gpt = vmc->vmc_space->vms_gpt;
1532e0994bd2SPatrick Mooney (void) vmm_gpt_reset_dirty(gpt, vmp->vmp_ptep, true);
15330153d828SPatrick Mooney }
15340153d828SPatrick Mooney }
15350153d828SPatrick Mooney kmem_free(vmp, sizeof (*vmp));
15360153d828SPatrick Mooney return (was_unmapped);
15370153d828SPatrick Mooney }
15380153d828SPatrick Mooney
15390153d828SPatrick Mooney /*
15400153d828SPatrick Mooney * Release held page. Returns true if page resided on region which was
15410153d828SPatrick Mooney * subsequently unmapped.
15420153d828SPatrick Mooney */
15430153d828SPatrick Mooney bool
vmp_release(vm_page_t * vmp)15440153d828SPatrick Mooney vmp_release(vm_page_t *vmp)
15450153d828SPatrick Mooney {
15460153d828SPatrick Mooney vm_client_t *vmc = vmp->vmp_client;
15470153d828SPatrick Mooney
15480153d828SPatrick Mooney VERIFY(vmc != NULL);
15490153d828SPatrick Mooney
15500153d828SPatrick Mooney mutex_enter(&vmc->vmc_lock);
15510153d828SPatrick Mooney const bool was_unmapped = vmp_release_inner(vmp, vmc);
15520153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
15530153d828SPatrick Mooney return (was_unmapped);
15540153d828SPatrick Mooney }
15550153d828SPatrick Mooney
15560153d828SPatrick Mooney /*
15570153d828SPatrick Mooney * Release a chain of pages which were associated via vmp_chain() (setting
15580153d828SPatrick Mooney * page-chaining pointer). Returns true if any pages resided upon a region
15590153d828SPatrick Mooney * which was subsequently unmapped.
15600153d828SPatrick Mooney *
15610153d828SPatrick Mooney * All of those pages must have been held through the same vm_client_t.
15620153d828SPatrick Mooney */
15630153d828SPatrick Mooney bool
vmp_release_chain(vm_page_t * vmp)15640153d828SPatrick Mooney vmp_release_chain(vm_page_t *vmp)
15650153d828SPatrick Mooney {
15660153d828SPatrick Mooney vm_client_t *vmc = vmp->vmp_client;
15670153d828SPatrick Mooney bool any_unmapped = false;
15680153d828SPatrick Mooney
15690153d828SPatrick Mooney ASSERT(vmp != NULL);
15700153d828SPatrick Mooney
15710153d828SPatrick Mooney mutex_enter(&vmc->vmc_lock);
15720153d828SPatrick Mooney while (vmp != NULL) {
15730153d828SPatrick Mooney vm_page_t *next = vmp->vmp_chain;
15740153d828SPatrick Mooney
15750153d828SPatrick Mooney /* We expect all pages in chain to be from same client */
15760153d828SPatrick Mooney ASSERT3P(vmp->vmp_client, ==, vmc);
15770153d828SPatrick Mooney
15780153d828SPatrick Mooney if (vmp_release_inner(vmp, vmc)) {
15790153d828SPatrick Mooney any_unmapped = true;
15800153d828SPatrick Mooney }
15810153d828SPatrick Mooney vmp = next;
15820153d828SPatrick Mooney }
15830153d828SPatrick Mooney mutex_exit(&vmc->vmc_lock);
15840153d828SPatrick Mooney return (any_unmapped);
15850153d828SPatrick Mooney }
15860153d828SPatrick Mooney
15870153d828SPatrick Mooney
15880153d828SPatrick Mooney int
vm_segmap_obj(struct vm * vm,int segid,off_t segoff,off_t len,struct as * as,caddr_t * addrp,uint_t prot,uint_t maxprot,uint_t flags)15890153d828SPatrick Mooney vm_segmap_obj(struct vm *vm, int segid, off_t segoff, off_t len,
15900153d828SPatrick Mooney struct as *as, caddr_t *addrp, uint_t prot, uint_t maxprot, uint_t flags)
15910153d828SPatrick Mooney {
15920153d828SPatrick Mooney vm_object_t *vmo;
15930153d828SPatrick Mooney int err;
15940153d828SPatrick Mooney
15950153d828SPatrick Mooney if (segoff < 0 || len <= 0 ||
15960153d828SPatrick Mooney (segoff & PAGEOFFSET) != 0 || (len & PAGEOFFSET) != 0) {
15970153d828SPatrick Mooney return (EINVAL);
15980153d828SPatrick Mooney }
15990153d828SPatrick Mooney if ((prot & PROT_USER) == 0) {
16000153d828SPatrick Mooney return (ENOTSUP);
16010153d828SPatrick Mooney }
16020153d828SPatrick Mooney err = vm_get_memseg(vm, segid, NULL, NULL, &vmo);
16030153d828SPatrick Mooney if (err != 0) {
16040153d828SPatrick Mooney return (err);
16050153d828SPatrick Mooney }
16060153d828SPatrick Mooney
16070153d828SPatrick Mooney VERIFY(segoff >= 0);
16080153d828SPatrick Mooney VERIFY(len <= vmo->vmo_size);
16090153d828SPatrick Mooney VERIFY((len + segoff) <= vmo->vmo_size);
16100153d828SPatrick Mooney
16110153d828SPatrick Mooney if (vmo->vmo_type != VMOT_MEM) {
16120153d828SPatrick Mooney /* Only support memory objects for now */
16130153d828SPatrick Mooney return (ENOTSUP);
16140153d828SPatrick Mooney }
16150153d828SPatrick Mooney
16160153d828SPatrick Mooney as_rangelock(as);
16170153d828SPatrick Mooney
16180153d828SPatrick Mooney err = choose_addr(as, addrp, (size_t)len, 0, ADDR_VACALIGN, flags);
16190153d828SPatrick Mooney if (err == 0) {
16200153d828SPatrick Mooney segvmm_crargs_t svma;
16210153d828SPatrick Mooney
16220153d828SPatrick Mooney svma.prot = prot;
16230153d828SPatrick Mooney svma.offset = segoff;
16240153d828SPatrick Mooney svma.vmo = vmo;
16250153d828SPatrick Mooney svma.vmc = NULL;
16260153d828SPatrick Mooney
16270153d828SPatrick Mooney err = as_map(as, *addrp, (size_t)len, segvmm_create, &svma);
16280153d828SPatrick Mooney }
16290153d828SPatrick Mooney
16300153d828SPatrick Mooney as_rangeunlock(as);
16310153d828SPatrick Mooney return (err);
16320153d828SPatrick Mooney }
16330153d828SPatrick Mooney
16340153d828SPatrick Mooney int
vm_segmap_space(struct vm * vm,off_t off,struct as * as,caddr_t * addrp,off_t len,uint_t prot,uint_t maxprot,uint_t flags)16350153d828SPatrick Mooney vm_segmap_space(struct vm *vm, off_t off, struct as *as, caddr_t *addrp,
16360153d828SPatrick Mooney off_t len, uint_t prot, uint_t maxprot, uint_t flags)
16370153d828SPatrick Mooney {
16380153d828SPatrick Mooney
16390153d828SPatrick Mooney const uintptr_t gpa = (uintptr_t)off;
16400153d828SPatrick Mooney const size_t size = (uintptr_t)len;
16410153d828SPatrick Mooney int err;
16420153d828SPatrick Mooney
16430153d828SPatrick Mooney if (off < 0 || len <= 0 ||
16440153d828SPatrick Mooney (gpa & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) {
16450153d828SPatrick Mooney return (EINVAL);
16460153d828SPatrick Mooney }
16470153d828SPatrick Mooney if ((prot & PROT_USER) == 0) {
16480153d828SPatrick Mooney return (ENOTSUP);
16490153d828SPatrick Mooney }
16500153d828SPatrick Mooney
16510153d828SPatrick Mooney as_rangelock(as);
16520153d828SPatrick Mooney
16530153d828SPatrick Mooney err = choose_addr(as, addrp, size, off, ADDR_VACALIGN, flags);
16540153d828SPatrick Mooney if (err == 0) {
16550153d828SPatrick Mooney segvmm_crargs_t svma;
16560153d828SPatrick Mooney
16570153d828SPatrick Mooney svma.prot = prot;
16580153d828SPatrick Mooney svma.offset = gpa;
16590153d828SPatrick Mooney svma.vmo = NULL;
16600153d828SPatrick Mooney svma.vmc = vmspace_client_alloc(vm_get_vmspace(vm));
16610153d828SPatrick Mooney
16620153d828SPatrick Mooney err = as_map(as, *addrp, len, segvmm_create, &svma);
16630153d828SPatrick Mooney }
16640153d828SPatrick Mooney
16650153d828SPatrick Mooney as_rangeunlock(as);
16660153d828SPatrick Mooney return (err);
16670153d828SPatrick Mooney }
1668