xref: /illumos-gate/usr/src/uts/intel/io/vmm/vmm_vm.c (revision b9b43e84)
10153d828SPatrick Mooney /*
20153d828SPatrick Mooney  * This file and its contents are supplied under the terms of the
30153d828SPatrick Mooney  * Common Development and Distribution License ("CDDL"), version 1.0.
40153d828SPatrick Mooney  * You may only use this file in accordance with the terms of version
50153d828SPatrick Mooney  * 1.0 of the CDDL.
60153d828SPatrick Mooney  *
70153d828SPatrick Mooney  * A full copy of the text of the CDDL should have accompanied this
80153d828SPatrick Mooney  * source.  A copy of the CDDL is also available via the Internet at
90153d828SPatrick Mooney  * http://www.illumos.org/license/CDDL.
100153d828SPatrick Mooney  */
110153d828SPatrick Mooney /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
120153d828SPatrick Mooney 
130153d828SPatrick Mooney /*
140153d828SPatrick Mooney  * Copyright 2019 Joyent, Inc.
153a0fa64cSPatrick Mooney  * Copyright 2023 Oxide Computer Company
160153d828SPatrick Mooney  * Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
170153d828SPatrick Mooney  */
180153d828SPatrick Mooney 
190153d828SPatrick Mooney #include <sys/param.h>
200153d828SPatrick Mooney #include <sys/kmem.h>
210153d828SPatrick Mooney #include <sys/thread.h>
220153d828SPatrick Mooney #include <sys/list.h>
230153d828SPatrick Mooney #include <sys/mman.h>
240153d828SPatrick Mooney #include <sys/types.h>
250153d828SPatrick Mooney #include <sys/ddi.h>
260153d828SPatrick Mooney #include <sys/sysmacros.h>
270153d828SPatrick Mooney #include <sys/machsystm.h>
280153d828SPatrick Mooney #include <sys/vmsystm.h>
290153d828SPatrick Mooney #include <sys/x86_archext.h>
300153d828SPatrick Mooney #include <vm/as.h>
310153d828SPatrick Mooney #include <vm/hat_i86.h>
320153d828SPatrick Mooney #include <vm/seg_vn.h>
330153d828SPatrick Mooney #include <vm/seg_kmem.h>
340153d828SPatrick Mooney 
350153d828SPatrick Mooney #include <sys/vmm_vm.h>
360153d828SPatrick Mooney #include <sys/seg_vmm.h>
370153d828SPatrick Mooney #include <sys/vmm_kernel.h>
380153d828SPatrick Mooney #include <sys/vmm_reservoir.h>
390153d828SPatrick Mooney #include <sys/vmm_gpt.h>
400153d828SPatrick Mooney 
410153d828SPatrick Mooney 
420153d828SPatrick Mooney /*
430153d828SPatrick Mooney  * VMM Virtual Memory
440153d828SPatrick Mooney  *
450153d828SPatrick Mooney  * History
460153d828SPatrick Mooney  *
470153d828SPatrick Mooney  * When bhyve was ported to illumos, one significant hole was handling guest
480153d828SPatrick Mooney  * memory and memory accesses.  In the original Pluribus port, bhyve itself
490153d828SPatrick Mooney  * manually handled the EPT structures for guest memory.  The updated sources
500153d828SPatrick Mooney  * (from FreeBSD 11) took a different approach, using the native FreeBSD VM
510153d828SPatrick Mooney  * system for memory allocations and management of the EPT structures.  Keeping
520153d828SPatrick Mooney  * source differences to a minimum was a priority, so illumos-bhyve implemented
530153d828SPatrick Mooney  * a makeshift "VM shim" which exposed the bare minimum of those interfaces to
540153d828SPatrick Mooney  * boot and run guests.
550153d828SPatrick Mooney  *
560153d828SPatrick Mooney  * While the VM shim was successful in getting illumos-bhyve to a functional
570153d828SPatrick Mooney  * state on Intel (and later AMD) gear, the FreeBSD-specific nature of the
580153d828SPatrick Mooney  * compatibility interfaces made it awkward to use.  As source differences with
590153d828SPatrick Mooney  * the upstream kernel code became less of a concern, and upcoming features
600153d828SPatrick Mooney  * (such as live migration) would demand more of those VM interfaces, it became
610153d828SPatrick Mooney  * clear that an overhaul was prudent.
620153d828SPatrick Mooney  *
630153d828SPatrick Mooney  * Design
640153d828SPatrick Mooney  *
650153d828SPatrick Mooney  * The new VM system for bhyve retains a number of the same concepts as what it
660153d828SPatrick Mooney  * replaces:
670153d828SPatrick Mooney  *
680153d828SPatrick Mooney  * - `vmspace_t` is the top-level entity for a guest memory space
690153d828SPatrick Mooney  * - `vm_object_t` represents a memory object which can be mapped into a vmspace
700153d828SPatrick Mooney  * - `vm_page_t` represents a page hold within a given vmspace, providing access
710153d828SPatrick Mooney  *   to the underlying memory page
720153d828SPatrick Mooney  *
730153d828SPatrick Mooney  * Unlike the old code, where most of the involved structures were exposed via
740153d828SPatrick Mooney  * public definitions, this replacement VM interface keeps all involved
750153d828SPatrick Mooney  * structures opaque to consumers.  Furthermore, there is a clear delineation
760153d828SPatrick Mooney  * between infrequent administrative operations (such as mapping/unmapping
770153d828SPatrick Mooney  * regions) and common data-path operations (attempting a page hold at a given
780153d828SPatrick Mooney  * guest-physical address).  Those administrative operations are performed
790153d828SPatrick Mooney  * directly against the vmspace, whereas the data-path operations are performed
800153d828SPatrick Mooney  * through a `vm_client_t` handle.  That VM client abstraction is meant to
810153d828SPatrick Mooney  * reduce contention and overhead for frequent access operations and provide
820153d828SPatrick Mooney  * debugging insight into how different subcomponents are accessing the vmspace.
830153d828SPatrick Mooney  * A VM client is allocated for each vCPU, each viona ring (via the vmm_drv
840153d828SPatrick Mooney  * interface) and each VMM userspace segment mapping.
850153d828SPatrick Mooney  *
860153d828SPatrick Mooney  * Exclusion
870153d828SPatrick Mooney  *
880153d828SPatrick Mooney  * Making changes to the vmspace (such as mapping or unmapping regions) requires
890153d828SPatrick Mooney  * other accessors be excluded while the change is underway to prevent them from
900153d828SPatrick Mooney  * observing invalid intermediate states.  A simple approach could use a mutex
910153d828SPatrick Mooney  * or rwlock to achieve this, but that risks contention when the rate of access
920153d828SPatrick Mooney  * to the vmspace is high.
930153d828SPatrick Mooney  *
940153d828SPatrick Mooney  * Since vmspace changes (map/unmap) are rare, we can instead do the exclusion
950153d828SPatrick Mooney  * at a per-vm_client_t basis.  While this raises the cost for vmspace changes,
960153d828SPatrick Mooney  * it means that the much more common page accesses through the vm_client can
970153d828SPatrick Mooney  * normally proceed unimpeded and independently.
980153d828SPatrick Mooney  *
990153d828SPatrick Mooney  * When a change to the vmspace is required, the caller will put the vmspace in
1000153d828SPatrick Mooney  * a 'hold' state, iterating over all associated vm_client instances, waiting
1010153d828SPatrick Mooney  * for them to complete any in-flight lookup (indicated by VCS_ACTIVE) before
1020153d828SPatrick Mooney  * setting VCS_HOLD in their state flag fields.  With VCS_HOLD set, any call on
1030153d828SPatrick Mooney  * the vm_client which would access the vmspace state (vmc_hold or vmc_fault)
1040153d828SPatrick Mooney  * will block until the hold condition is cleared.  Once the hold is asserted
1050153d828SPatrick Mooney  * for all clients, the vmspace change can proceed with confidence.  Upon
1060153d828SPatrick Mooney  * completion of that operation, VCS_HOLD is cleared from the clients, and they
1070153d828SPatrick Mooney  * are released to resume vmspace accesses.
1080153d828SPatrick Mooney  *
1090153d828SPatrick Mooney  * vCPU Consumers
1100153d828SPatrick Mooney  *
1110153d828SPatrick Mooney  * Access to the vmspace for vCPUs running in guest context is different from
1120153d828SPatrick Mooney  * emulation-related vm_client activity: they solely rely on the contents of the
1130153d828SPatrick Mooney  * page tables.  Furthermore, the existing VCS_HOLD mechanism used to exclude
1140153d828SPatrick Mooney  * client access is not feasible when entering guest context, since interrupts
1150153d828SPatrick Mooney  * are disabled, making it impossible to block entry.  This is not a concern as
1160153d828SPatrick Mooney  * long as vmspace modifications never place the page tables in invalid states
1170153d828SPatrick Mooney  * (either intermediate, or final).  The vm_client hold mechanism does provide
1180153d828SPatrick Mooney  * the means to IPI vCPU consumers which will trigger a notification once they
1190153d828SPatrick Mooney  * report their exit from guest context.  This can be used to ensure that page
1200153d828SPatrick Mooney  * table modifications are made visible to those vCPUs within a certain
1210153d828SPatrick Mooney  * time frame.
1220153d828SPatrick Mooney  */
1230153d828SPatrick Mooney 
1240153d828SPatrick Mooney typedef struct vmspace_mapping {
1250153d828SPatrick Mooney 	list_node_t	vmsm_node;
1260153d828SPatrick Mooney 	vm_object_t	*vmsm_object;	/* object backing this mapping */
1270153d828SPatrick Mooney 	uintptr_t	vmsm_addr;	/* start addr in vmspace for mapping */
1280153d828SPatrick Mooney 	size_t		vmsm_len;	/* length (in bytes) of mapping */
1290153d828SPatrick Mooney 	off_t		vmsm_offset;	/* byte offset into object */
1300153d828SPatrick Mooney 	uint_t		vmsm_prot;
1310153d828SPatrick Mooney } vmspace_mapping_t;
1320153d828SPatrick Mooney 
1330153d828SPatrick Mooney #define	VMSM_OFFSET(vmsm, addr)	(			\
1340153d828SPatrick Mooney 	    (vmsm)->vmsm_offset +			\
1350153d828SPatrick Mooney 	    ((addr) - (uintptr_t)(vmsm)->vmsm_addr))
1360153d828SPatrick Mooney 
1370153d828SPatrick Mooney typedef enum vm_client_state {
1380153d828SPatrick Mooney 	VCS_IDLE	= 0,
1390153d828SPatrick Mooney 	/* currently accessing vmspace for client operation (hold or fault) */
1400153d828SPatrick Mooney 	VCS_ACTIVE	= (1 << 0),
1410153d828SPatrick Mooney 	/* client hold requested/asserted */
1420153d828SPatrick Mooney 	VCS_HOLD	= (1 << 1),
1430153d828SPatrick Mooney 	/* vCPU is accessing page tables in guest context */
1440153d828SPatrick Mooney 	VCS_ON_CPU	= (1 << 2),
1450153d828SPatrick Mooney 	/* client has been orphaned (no more access to vmspace) */
1460153d828SPatrick Mooney 	VCS_ORPHANED	= (1 << 3),
1470153d828SPatrick Mooney 	/* client undergoing destroy operation */
1480153d828SPatrick Mooney 	VCS_DESTROY	= (1 << 4),
1490153d828SPatrick Mooney } vm_client_state_t;
1500153d828SPatrick Mooney 
1510153d828SPatrick Mooney struct vmspace {
1520153d828SPatrick Mooney 	kmutex_t	vms_lock;
1530153d828SPatrick Mooney 	kcondvar_t	vms_cv;
1540153d828SPatrick Mooney 	bool		vms_held;
1550153d828SPatrick Mooney 	uintptr_t	vms_size;	/* immutable after creation */
1560153d828SPatrick Mooney 
1570153d828SPatrick Mooney 	/* (nested) page table state */
1580153d828SPatrick Mooney 	vmm_gpt_t	*vms_gpt;
1590153d828SPatrick Mooney 	uint64_t	vms_pt_gen;
1600153d828SPatrick Mooney 	uint64_t	vms_pages_mapped;
1610153d828SPatrick Mooney 	bool		vms_track_dirty;
1620153d828SPatrick Mooney 
1630153d828SPatrick Mooney 	list_t		vms_maplist;
1640153d828SPatrick Mooney 	list_t		vms_clients;
1650153d828SPatrick Mooney };
1660153d828SPatrick Mooney 
1670153d828SPatrick Mooney struct vm_client {
1680153d828SPatrick Mooney 	vmspace_t	*vmc_space;
1690153d828SPatrick Mooney 	list_node_t	vmc_node;
1700153d828SPatrick Mooney 
1710153d828SPatrick Mooney 	kmutex_t	vmc_lock;
1720153d828SPatrick Mooney 	kcondvar_t	vmc_cv;
1730153d828SPatrick Mooney 	vm_client_state_t vmc_state;
1740153d828SPatrick Mooney 	int		vmc_cpu_active;
1750153d828SPatrick Mooney 	uint64_t	vmc_cpu_gen;
1760153d828SPatrick Mooney 	bool		vmc_track_dirty;
1770153d828SPatrick Mooney 	vmc_inval_cb_t	vmc_inval_func;
1780153d828SPatrick Mooney 	void		*vmc_inval_data;
1790153d828SPatrick Mooney 
1800153d828SPatrick Mooney 	list_t		vmc_held_pages;
1810153d828SPatrick Mooney };
1820153d828SPatrick Mooney 
1830153d828SPatrick Mooney typedef enum vm_object_type {
1840153d828SPatrick Mooney 	VMOT_NONE,
1850153d828SPatrick Mooney 	VMOT_MEM,
1860153d828SPatrick Mooney 	VMOT_MMIO,
1870153d828SPatrick Mooney } vm_object_type_t;
1880153d828SPatrick Mooney 
1890153d828SPatrick Mooney struct vm_object {
1900153d828SPatrick Mooney 	uint_t		vmo_refcnt;	/* manipulated with atomic ops */
1910153d828SPatrick Mooney 
1920153d828SPatrick Mooney 	/* Fields below are fixed at creation time */
1930153d828SPatrick Mooney 	vm_object_type_t vmo_type;
1940153d828SPatrick Mooney 	size_t		vmo_size;
1950153d828SPatrick Mooney 	void		*vmo_data;
1960153d828SPatrick Mooney 	uint8_t		vmo_attr;
1970153d828SPatrick Mooney };
1980153d828SPatrick Mooney 
199f2357d97SPatrick Mooney /* Convenience consolidation of all flag(s) for validity checking */
200f2357d97SPatrick Mooney #define	VPF_ALL		(VPF_DEFER_DIRTY)
201f2357d97SPatrick Mooney 
2020153d828SPatrick Mooney struct vm_page {
2030153d828SPatrick Mooney 	vm_client_t	*vmp_client;
2040153d828SPatrick Mooney 	list_node_t	vmp_node;
2050153d828SPatrick Mooney 	vm_page_t	*vmp_chain;
2060153d828SPatrick Mooney 	uintptr_t	vmp_gpa;
2070153d828SPatrick Mooney 	pfn_t		vmp_pfn;
2080153d828SPatrick Mooney 	uint64_t	*vmp_ptep;
2090153d828SPatrick Mooney 	vm_object_t	*vmp_obj_ref;
210f2357d97SPatrick Mooney 	uint8_t		vmp_prot;
211f2357d97SPatrick Mooney 	uint8_t		vmp_flags;
2120153d828SPatrick Mooney };
2130153d828SPatrick Mooney 
2140153d828SPatrick Mooney static vmspace_mapping_t *vm_mapping_find(vmspace_t *, uintptr_t, size_t);
2158779b448SDan Cross static void vmspace_hold_enter(vmspace_t *);
2168779b448SDan Cross static void vmspace_hold_exit(vmspace_t *, bool);
217*b9b43e84SPatrick Mooney static void vmspace_clients_invalidate(vmspace_t *, uintptr_t, size_t);
218*b9b43e84SPatrick Mooney static int vmspace_ensure_mapped(vmspace_t *, uintptr_t, int, pfn_t *,
219*b9b43e84SPatrick Mooney     uint64_t *);
2200153d828SPatrick Mooney static void vmc_space_hold(vm_client_t *);
2210153d828SPatrick Mooney static void vmc_space_release(vm_client_t *, bool);
2220153d828SPatrick Mooney static void vmc_space_invalidate(vm_client_t *, uintptr_t, size_t, uint64_t);
2230153d828SPatrick Mooney static void vmc_space_unmap(vm_client_t *, uintptr_t, size_t, vm_object_t *);
2240153d828SPatrick Mooney static vm_client_t *vmc_space_orphan(vm_client_t *, vmspace_t *);
2250153d828SPatrick Mooney 
2260153d828SPatrick Mooney 
2270153d828SPatrick Mooney /*
2280153d828SPatrick Mooney  * Create a new vmspace with a maximum address of `end`.
2290153d828SPatrick Mooney  */
2300153d828SPatrick Mooney vmspace_t *
vmspace_alloc(size_t end,vmm_pte_ops_t * pte_ops,bool track_dirty)2310153d828SPatrick Mooney vmspace_alloc(size_t end, vmm_pte_ops_t *pte_ops, bool track_dirty)
2320153d828SPatrick Mooney {
2330153d828SPatrick Mooney 	vmspace_t *vms;
2340153d828SPatrick Mooney 	const uintptr_t size = end + 1;
2350153d828SPatrick Mooney 
2360153d828SPatrick Mooney 	/*
2370153d828SPatrick Mooney 	 * This whole mess is built on the assumption that a 64-bit address
2380153d828SPatrick Mooney 	 * space is available to work with for the various pagetable tricks.
2390153d828SPatrick Mooney 	 */
2400153d828SPatrick Mooney 	VERIFY(size > 0 && (size & PAGEOFFSET) == 0 &&
2410153d828SPatrick Mooney 	    size <= (uintptr_t)USERLIMIT);
2420153d828SPatrick Mooney 
2430153d828SPatrick Mooney 	vms = kmem_zalloc(sizeof (*vms), KM_SLEEP);
2440153d828SPatrick Mooney 	vms->vms_size = size;
2450153d828SPatrick Mooney 	list_create(&vms->vms_maplist, sizeof (vmspace_mapping_t),
2460153d828SPatrick Mooney 	    offsetof(vmspace_mapping_t, vmsm_node));
2470153d828SPatrick Mooney 	list_create(&vms->vms_clients, sizeof (vm_client_t),
2480153d828SPatrick Mooney 	    offsetof(vm_client_t, vmc_node));
2490153d828SPatrick Mooney 
2500153d828SPatrick Mooney 	vms->vms_gpt = vmm_gpt_alloc(pte_ops);
2510153d828SPatrick Mooney 	vms->vms_pt_gen = 1;
2520153d828SPatrick Mooney 	vms->vms_track_dirty = track_dirty;
2530153d828SPatrick Mooney 
2540153d828SPatrick Mooney 	return (vms);
2550153d828SPatrick Mooney }
2560153d828SPatrick Mooney 
2570153d828SPatrick Mooney /*
2580153d828SPatrick Mooney  * Destroy a vmspace.  All regions in the space must be unmapped.  Any remaining
2590153d828SPatrick Mooney  * clients will be orphaned.
2600153d828SPatrick Mooney  */
2610153d828SPatrick Mooney void
vmspace_destroy(vmspace_t * vms)2620153d828SPatrick Mooney vmspace_destroy(vmspace_t *vms)
2630153d828SPatrick Mooney {
2640153d828SPatrick Mooney 	mutex_enter(&vms->vms_lock);
2650153d828SPatrick Mooney 	VERIFY(list_is_empty(&vms->vms_maplist));
2660153d828SPatrick Mooney 
2670153d828SPatrick Mooney 	if (!list_is_empty(&vms->vms_clients)) {
2680153d828SPatrick Mooney 		vm_client_t *vmc = list_head(&vms->vms_clients);
2690153d828SPatrick Mooney 		while (vmc != NULL) {
2700153d828SPatrick Mooney 			vmc = vmc_space_orphan(vmc, vms);
2710153d828SPatrick Mooney 		}
2720153d828SPatrick Mooney 		/*
2730153d828SPatrick Mooney 		 * Wait for any clients which were in the process of destroying
2740153d828SPatrick Mooney 		 * themselves to disappear.
2750153d828SPatrick Mooney 		 */
2760153d828SPatrick Mooney 		while (!list_is_empty(&vms->vms_clients)) {
2770153d828SPatrick Mooney 			cv_wait(&vms->vms_cv, &vms->vms_lock);
2780153d828SPatrick Mooney 		}
2790153d828SPatrick Mooney 	}
2800153d828SPatrick Mooney 	VERIFY(list_is_empty(&vms->vms_clients));
2810153d828SPatrick Mooney 
2820153d828SPatrick Mooney 	vmm_gpt_free(vms->vms_gpt);
2830153d828SPatrick Mooney 	mutex_exit(&vms->vms_lock);
2840153d828SPatrick Mooney 
2850153d828SPatrick Mooney 	mutex_destroy(&vms->vms_lock);
2860153d828SPatrick Mooney 	cv_destroy(&vms->vms_cv);
2870153d828SPatrick Mooney 	list_destroy(&vms->vms_maplist);
2880153d828SPatrick Mooney 	list_destroy(&vms->vms_clients);
2890153d828SPatrick Mooney 
2900153d828SPatrick Mooney 	kmem_free(vms, sizeof (*vms));
2910153d828SPatrick Mooney }
2920153d828SPatrick Mooney 
2930153d828SPatrick Mooney /*
2940153d828SPatrick Mooney  * Retrieve the count of resident (mapped into the page tables) pages.
2950153d828SPatrick Mooney  */
2960153d828SPatrick Mooney uint64_t
vmspace_resident_count(vmspace_t * vms)2970153d828SPatrick Mooney vmspace_resident_count(vmspace_t *vms)
2980153d828SPatrick Mooney {
2990153d828SPatrick Mooney 	return (vms->vms_pages_mapped);
3000153d828SPatrick Mooney }
3010153d828SPatrick Mooney 
302*b9b43e84SPatrick Mooney /*
303*b9b43e84SPatrick Mooney  * Perform an operation on the status (accessed/dirty) bits held in the page
304*b9b43e84SPatrick Mooney  * tables of this vmspace.
305*b9b43e84SPatrick Mooney  *
306*b9b43e84SPatrick Mooney  * Such manipulations race against both hardware writes (from running vCPUs) and
307*b9b43e84SPatrick Mooney  * emulated accesses reflected from userspace.  Safe functionality depends on
308*b9b43e84SPatrick Mooney  * the VM instance being read-locked to prevent vmspace_map/vmspace_unmap
309*b9b43e84SPatrick Mooney  * operations from changing the page tables during the walk.
310*b9b43e84SPatrick Mooney  */
311*b9b43e84SPatrick Mooney void
vmspace_bits_operate(vmspace_t * vms,uint64_t gpa,size_t len,vmspace_bit_oper_t oper,uint8_t * bitmap)312*b9b43e84SPatrick Mooney vmspace_bits_operate(vmspace_t *vms, uint64_t gpa, size_t len,
313*b9b43e84SPatrick Mooney     vmspace_bit_oper_t oper, uint8_t *bitmap)
3148779b448SDan Cross {
315*b9b43e84SPatrick Mooney 	const bool bit_input = (oper & VBO_FLAG_BITMAP_IN) != 0;
316*b9b43e84SPatrick Mooney 	const bool bit_output = (oper & VBO_FLAG_BITMAP_OUT) != 0;
317*b9b43e84SPatrick Mooney 	const vmspace_bit_oper_t oper_only =
318*b9b43e84SPatrick Mooney 	    oper & ~(VBO_FLAG_BITMAP_IN | VBO_FLAG_BITMAP_OUT);
319*b9b43e84SPatrick Mooney 	vmm_gpt_t *gpt = vms->vms_gpt;
3204ac713daSLuqman Aden 
3218779b448SDan Cross 	/*
322*b9b43e84SPatrick Mooney 	 * The bitmap cannot be NULL if the requested operation involves reading
323*b9b43e84SPatrick Mooney 	 * or writing from it.
3248779b448SDan Cross 	 */
325*b9b43e84SPatrick Mooney 	ASSERT(bitmap != NULL || (!bit_input && !bit_output));
326*b9b43e84SPatrick Mooney 
3278779b448SDan Cross 	for (size_t offset = 0; offset < len; offset += PAGESIZE) {
328*b9b43e84SPatrick Mooney 		const uint64_t pfn_offset = offset >> PAGESHIFT;
329*b9b43e84SPatrick Mooney 		const size_t bit_offset = pfn_offset / 8;
330*b9b43e84SPatrick Mooney 		const uint8_t bit_mask = 1 << (pfn_offset % 8);
331*b9b43e84SPatrick Mooney 
332*b9b43e84SPatrick Mooney 		if (bit_input && (bitmap[bit_offset] & bit_mask) == 0) {
333*b9b43e84SPatrick Mooney 			continue;
334*b9b43e84SPatrick Mooney 		}
335*b9b43e84SPatrick Mooney 
336*b9b43e84SPatrick Mooney 		bool value = false;
337*b9b43e84SPatrick Mooney 		uint64_t *entry = vmm_gpt_lookup(gpt, gpa + offset);
338*b9b43e84SPatrick Mooney 		if (entry == NULL) {
339*b9b43e84SPatrick Mooney 			if (bit_output) {
340*b9b43e84SPatrick Mooney 				bitmap[bit_offset] &= ~bit_mask;
341*b9b43e84SPatrick Mooney 			}
342*b9b43e84SPatrick Mooney 			continue;
343*b9b43e84SPatrick Mooney 		}
344*b9b43e84SPatrick Mooney 
345*b9b43e84SPatrick Mooney 		switch (oper_only) {
346*b9b43e84SPatrick Mooney 		case VBO_GET_DIRTY:
347*b9b43e84SPatrick Mooney 			value = vmm_gpt_query(gpt, entry, VGQ_DIRTY);
348*b9b43e84SPatrick Mooney 			break;
349*b9b43e84SPatrick Mooney 		case VBO_SET_DIRTY: {
350*b9b43e84SPatrick Mooney 			uint_t prot = 0;
351*b9b43e84SPatrick Mooney 			bool present_writable = false;
352*b9b43e84SPatrick Mooney 			pfn_t pfn;
353*b9b43e84SPatrick Mooney 
354*b9b43e84SPatrick Mooney 			/*
355*b9b43e84SPatrick Mooney 			 * To avoid blindly setting the dirty bit on otherwise
356*b9b43e84SPatrick Mooney 			 * empty PTEs, we must first check if the entry for the
357*b9b43e84SPatrick Mooney 			 * address in question has been populated.
358*b9b43e84SPatrick Mooney 			 *
359*b9b43e84SPatrick Mooney 			 * Only if the page is marked both Present and Writable
360*b9b43e84SPatrick Mooney 			 * will we permit the dirty bit to be set.
361*b9b43e84SPatrick Mooney 			 */
362*b9b43e84SPatrick Mooney 			if (!vmm_gpt_is_mapped(gpt, entry, &pfn, &prot)) {
363*b9b43e84SPatrick Mooney 				int err = vmspace_ensure_mapped(vms, gpa,
364*b9b43e84SPatrick Mooney 				    PROT_WRITE, &pfn, entry);
365*b9b43e84SPatrick Mooney 				if (err == 0) {
366*b9b43e84SPatrick Mooney 					present_writable = true;
367*b9b43e84SPatrick Mooney 				}
368*b9b43e84SPatrick Mooney 			} else if ((prot & PROT_WRITE) != 0) {
369*b9b43e84SPatrick Mooney 				present_writable = true;
370*b9b43e84SPatrick Mooney 			}
371*b9b43e84SPatrick Mooney 
372*b9b43e84SPatrick Mooney 			if (present_writable) {
373*b9b43e84SPatrick Mooney 				value = !vmm_gpt_reset_dirty(gpt, entry, true);
374*b9b43e84SPatrick Mooney 			}
375*b9b43e84SPatrick Mooney 			break;
376*b9b43e84SPatrick Mooney 		}
377*b9b43e84SPatrick Mooney 		case VBO_RESET_DIRTY:
378*b9b43e84SPatrick Mooney 			/*
379*b9b43e84SPatrick Mooney 			 * Although at first glance, it may seem like the act of
380*b9b43e84SPatrick Mooney 			 * resetting the dirty bit may require the same care as
381*b9b43e84SPatrick Mooney 			 * setting it, the constraints make for a simpler task.
382*b9b43e84SPatrick Mooney 			 *
383*b9b43e84SPatrick Mooney 			 * Any PTEs with the dirty bit set will have already
384*b9b43e84SPatrick Mooney 			 * been properly populated.
385*b9b43e84SPatrick Mooney 			 */
386*b9b43e84SPatrick Mooney 			value = vmm_gpt_reset_dirty(gpt, entry, false);
387*b9b43e84SPatrick Mooney 			break;
388*b9b43e84SPatrick Mooney 		default:
389*b9b43e84SPatrick Mooney 			panic("unrecognized operator: %d", oper_only);
390*b9b43e84SPatrick Mooney 			break;
391*b9b43e84SPatrick Mooney 		}
392*b9b43e84SPatrick Mooney 		if (bit_output) {
393*b9b43e84SPatrick Mooney 			if (value) {
394*b9b43e84SPatrick Mooney 				bitmap[bit_offset] |= bit_mask;
395*b9b43e84SPatrick Mooney 			} else {
396*b9b43e84SPatrick Mooney 				bitmap[bit_offset] &= ~bit_mask;
397*b9b43e84SPatrick Mooney 			}
398*b9b43e84SPatrick Mooney 		}
3998779b448SDan Cross 	}
4008779b448SDan Cross 
4018779b448SDan Cross 	/*
402*b9b43e84SPatrick Mooney 	 * Invalidate the address range potentially effected by the changes to
403*b9b43e84SPatrick Mooney 	 * page table bits, issuing shoot-downs for those who might have it in
404*b9b43e84SPatrick Mooney 	 * cache.
4058779b448SDan Cross 	 */
4068779b448SDan Cross 	vmspace_hold_enter(vms);
4078779b448SDan Cross 	vms->vms_pt_gen++;
408*b9b43e84SPatrick Mooney 	vmspace_clients_invalidate(vms, gpa, len);
409*b9b43e84SPatrick Mooney 	vmspace_hold_exit(vms, true);
410*b9b43e84SPatrick Mooney }
411*b9b43e84SPatrick Mooney 
412*b9b43e84SPatrick Mooney /*
413*b9b43e84SPatrick Mooney  * Is dirty-page-tracking enabled for the vmspace?
414*b9b43e84SPatrick Mooney  */
415*b9b43e84SPatrick Mooney bool
vmspace_get_tracking(vmspace_t * vms)416*b9b43e84SPatrick Mooney vmspace_get_tracking(vmspace_t *vms)
417*b9b43e84SPatrick Mooney {
418*b9b43e84SPatrick Mooney 	mutex_enter(&vms->vms_lock);
419*b9b43e84SPatrick Mooney 	const bool val = vms->vms_track_dirty;
420*b9b43e84SPatrick Mooney 	mutex_exit(&vms->vms_lock);
421*b9b43e84SPatrick Mooney 	return (val);
422*b9b43e84SPatrick Mooney }
423*b9b43e84SPatrick Mooney 
424*b9b43e84SPatrick Mooney /*
425*b9b43e84SPatrick Mooney  * Set the state (enabled/disabled) of dirty-page-tracking for the vmspace.
426*b9b43e84SPatrick Mooney  */
427*b9b43e84SPatrick Mooney int
vmspace_set_tracking(vmspace_t * vms,bool enable_dirty_tracking)428*b9b43e84SPatrick Mooney vmspace_set_tracking(vmspace_t *vms, bool enable_dirty_tracking)
429*b9b43e84SPatrick Mooney {
430*b9b43e84SPatrick Mooney 	if (enable_dirty_tracking && !vmm_gpt_can_track_dirty(vms->vms_gpt)) {
431*b9b43e84SPatrick Mooney 		/* Do not allow this to be set if it is not supported */
432*b9b43e84SPatrick Mooney 		return (ENOTSUP);
433*b9b43e84SPatrick Mooney 	}
434*b9b43e84SPatrick Mooney 
435*b9b43e84SPatrick Mooney 	vmspace_hold_enter(vms);
436*b9b43e84SPatrick Mooney 	if (vms->vms_track_dirty == enable_dirty_tracking) {
437*b9b43e84SPatrick Mooney 		/* No further effort required if state already matches */
438*b9b43e84SPatrick Mooney 		vmspace_hold_exit(vms, false);
439*b9b43e84SPatrick Mooney 		return (0);
440*b9b43e84SPatrick Mooney 	}
441*b9b43e84SPatrick Mooney 
442*b9b43e84SPatrick Mooney 	vms->vms_track_dirty = enable_dirty_tracking;
443*b9b43e84SPatrick Mooney 
444*b9b43e84SPatrick Mooney 	/* Configure all existing clients for new tracking behavior */
4458779b448SDan Cross 	for (vm_client_t *vmc = list_head(&vms->vms_clients);
4468779b448SDan Cross 	    vmc != NULL;
4478779b448SDan Cross 	    vmc = list_next(&vms->vms_clients, vmc)) {
448*b9b43e84SPatrick Mooney 		mutex_enter(&vmc->vmc_lock);
449*b9b43e84SPatrick Mooney 		vmc->vmc_track_dirty = enable_dirty_tracking;
450*b9b43e84SPatrick Mooney 		mutex_exit(&vmc->vmc_lock);
4518779b448SDan Cross 	}
4524ac713daSLuqman Aden 
453*b9b43e84SPatrick Mooney 	/*
454*b9b43e84SPatrick Mooney 	 * Notify all clients of what is considered an invalidation of the
455*b9b43e84SPatrick Mooney 	 * entire vmspace.
456*b9b43e84SPatrick Mooney 	 */
457*b9b43e84SPatrick Mooney 	vms->vms_pt_gen++;
458*b9b43e84SPatrick Mooney 	vmspace_clients_invalidate(vms, 0, vms->vms_size);
459*b9b43e84SPatrick Mooney 
460*b9b43e84SPatrick Mooney 	vmspace_hold_exit(vms, true);
4614ac713daSLuqman Aden 	return (0);
4628779b448SDan Cross }
4638779b448SDan Cross 
4640153d828SPatrick Mooney static pfn_t
vm_object_pager_reservoir(vm_object_t * vmo,uintptr_t off)4650153d828SPatrick Mooney vm_object_pager_reservoir(vm_object_t *vmo, uintptr_t off)
4660153d828SPatrick Mooney {
4670153d828SPatrick Mooney 	vmmr_region_t *region;
4680153d828SPatrick Mooney 	pfn_t pfn;
4690153d828SPatrick Mooney 
4700153d828SPatrick Mooney 	ASSERT3U(vmo->vmo_type, ==, VMOT_MEM);
4710153d828SPatrick Mooney 
4720153d828SPatrick Mooney 	region = vmo->vmo_data;
4730153d828SPatrick Mooney 	pfn = vmmr_region_pfn_at(region, off);
4740153d828SPatrick Mooney 
4750153d828SPatrick Mooney 	return (pfn);
4760153d828SPatrick Mooney }
4770153d828SPatrick Mooney 
4780153d828SPatrick Mooney static pfn_t
vm_object_pager_mmio(vm_object_t * vmo,uintptr_t off)4790153d828SPatrick Mooney vm_object_pager_mmio(vm_object_t *vmo, uintptr_t off)
4800153d828SPatrick Mooney {
4810153d828SPatrick Mooney 	pfn_t pfn;
4820153d828SPatrick Mooney 
4830153d828SPatrick Mooney 	ASSERT3U(vmo->vmo_type, ==, VMOT_MMIO);
4840153d828SPatrick Mooney 	ASSERT3P(vmo->vmo_data, !=, NULL);
4850153d828SPatrick Mooney 	ASSERT3U(off, <, vmo->vmo_size);
4860153d828SPatrick Mooney 
4870153d828SPatrick Mooney 	pfn = ((uintptr_t)vmo->vmo_data + off) >> PAGESHIFT;
4880153d828SPatrick Mooney 
4890153d828SPatrick Mooney 	return (pfn);
4900153d828SPatrick Mooney }
4910153d828SPatrick Mooney 
4920153d828SPatrick Mooney /*
4930153d828SPatrick Mooney  * Allocate a VM object backed by VMM reservoir memory.
4940153d828SPatrick Mooney  */
4950153d828SPatrick Mooney vm_object_t *
vm_object_mem_allocate(size_t size,bool transient)4960153d828SPatrick Mooney vm_object_mem_allocate(size_t size, bool transient)
4970153d828SPatrick Mooney {
4980153d828SPatrick Mooney 	int err;
4990153d828SPatrick Mooney 	vmmr_region_t *region = NULL;
5000153d828SPatrick Mooney 	vm_object_t *vmo;
5010153d828SPatrick Mooney 
5020153d828SPatrick Mooney 	ASSERT3U(size, !=, 0);
5030153d828SPatrick Mooney 	ASSERT3U(size & PAGEOFFSET, ==, 0);
5040153d828SPatrick Mooney 
5050153d828SPatrick Mooney 	err = vmmr_alloc(size, transient, &region);
5060153d828SPatrick Mooney 	if (err != 0) {
5070153d828SPatrick Mooney 		return (NULL);
5080153d828SPatrick Mooney 	}
5090153d828SPatrick Mooney 
5100153d828SPatrick Mooney 	vmo = kmem_alloc(sizeof (*vmo), KM_SLEEP);
5110153d828SPatrick Mooney 
5120153d828SPatrick Mooney 	/* For now, these are to stay fixed after allocation */
5130153d828SPatrick Mooney 	vmo->vmo_type = VMOT_MEM;
5140153d828SPatrick Mooney 	vmo->vmo_size = size;
5150153d828SPatrick Mooney 	vmo->vmo_attr = MTRR_TYPE_WB;
5160153d828SPatrick Mooney 	vmo->vmo_data = region;
5170153d828SPatrick Mooney 	vmo->vmo_refcnt = 1;
5180153d828SPatrick Mooney 
5190153d828SPatrick Mooney 	return (vmo);
5200153d828SPatrick Mooney }
5210153d828SPatrick Mooney 
5220153d828SPatrick Mooney static vm_object_t *
vm_object_mmio_allocate(size_t size,uintptr_t hpa)5230153d828SPatrick Mooney vm_object_mmio_allocate(size_t size, uintptr_t hpa)
5240153d828SPatrick Mooney {
5250153d828SPatrick Mooney 	vm_object_t *vmo;
5260153d828SPatrick Mooney 
5270153d828SPatrick Mooney 	ASSERT3U(size, !=, 0);
5280153d828SPatrick Mooney 	ASSERT3U(size & PAGEOFFSET, ==, 0);
5290153d828SPatrick Mooney 	ASSERT3U(hpa & PAGEOFFSET, ==, 0);
5300153d828SPatrick Mooney 
5310153d828SPatrick Mooney 	vmo = kmem_alloc(sizeof (*vmo), KM_SLEEP);
5320153d828SPatrick Mooney 
5330153d828SPatrick Mooney 	/* For now, these are to stay fixed after allocation */
5340153d828SPatrick Mooney 	vmo->vmo_type = VMOT_MMIO;
5350153d828SPatrick Mooney 	vmo->vmo_size = size;
5360153d828SPatrick Mooney 	vmo->vmo_attr = MTRR_TYPE_UC;
5370153d828SPatrick Mooney 	vmo->vmo_data = (void *)hpa;
5380153d828SPatrick Mooney 	vmo->vmo_refcnt = 1;
5390153d828SPatrick Mooney 
5400153d828SPatrick Mooney 	return (vmo);
5410153d828SPatrick Mooney }
5420153d828SPatrick Mooney 
5430153d828SPatrick Mooney /*
5440153d828SPatrick Mooney  * Allocate a VM object backed by an existing range of physical memory.
5450153d828SPatrick Mooney  */
5460153d828SPatrick Mooney vm_object_t *
vmm_mmio_alloc(vmspace_t * vmspace,uintptr_t gpa,size_t len,uintptr_t hpa)5470153d828SPatrick Mooney vmm_mmio_alloc(vmspace_t *vmspace, uintptr_t gpa, size_t len, uintptr_t hpa)
5480153d828SPatrick Mooney {
5490153d828SPatrick Mooney 	int error;
5500153d828SPatrick Mooney 	vm_object_t *obj;
5510153d828SPatrick Mooney 
5520153d828SPatrick Mooney 	obj = vm_object_mmio_allocate(len, hpa);
5530153d828SPatrick Mooney 	if (obj != NULL) {
5540153d828SPatrick Mooney 		error = vmspace_map(vmspace, obj, 0, gpa, len,
5550153d828SPatrick Mooney 		    PROT_READ | PROT_WRITE);
5560153d828SPatrick Mooney 		if (error != 0) {
5570153d828SPatrick Mooney 			vm_object_release(obj);
5580153d828SPatrick Mooney 			obj = NULL;
5590153d828SPatrick Mooney 		}
5600153d828SPatrick Mooney 	}
5610153d828SPatrick Mooney 
5620153d828SPatrick Mooney 	return (obj);
5630153d828SPatrick Mooney }
5640153d828SPatrick Mooney 
5650153d828SPatrick Mooney /*
5660153d828SPatrick Mooney  * Release a vm_object reference
5670153d828SPatrick Mooney  */
5680153d828SPatrick Mooney void
vm_object_release(vm_object_t * vmo)5690153d828SPatrick Mooney vm_object_release(vm_object_t *vmo)
5700153d828SPatrick Mooney {
5710153d828SPatrick Mooney 	ASSERT(vmo != NULL);
5720153d828SPatrick Mooney 
5730153d828SPatrick Mooney 	uint_t ref = atomic_dec_uint_nv(&vmo->vmo_refcnt);
5740153d828SPatrick Mooney 	/* underflow would be a deadly serious mistake */
5750153d828SPatrick Mooney 	VERIFY3U(ref, !=, UINT_MAX);
5760153d828SPatrick Mooney 	if (ref != 0) {
5770153d828SPatrick Mooney 		return;
5780153d828SPatrick Mooney 	}
5790153d828SPatrick Mooney 
5800153d828SPatrick Mooney 	switch (vmo->vmo_type) {
5810153d828SPatrick Mooney 	case VMOT_MEM:
5820153d828SPatrick Mooney 		vmmr_free((vmmr_region_t *)vmo->vmo_data);
5830153d828SPatrick Mooney 		break;
5840153d828SPatrick Mooney 	case VMOT_MMIO:
5850153d828SPatrick Mooney 		break;
5860153d828SPatrick Mooney 	default:
5870153d828SPatrick Mooney 		panic("unexpected object type %u", vmo->vmo_type);
5880153d828SPatrick Mooney 		break;
5890153d828SPatrick Mooney 	}
5900153d828SPatrick Mooney 
5910153d828SPatrick Mooney 	vmo->vmo_data = NULL;
5920153d828SPatrick Mooney 	vmo->vmo_size = 0;
5930153d828SPatrick Mooney 	kmem_free(vmo, sizeof (*vmo));
5940153d828SPatrick Mooney }
5950153d828SPatrick Mooney 
5960153d828SPatrick Mooney /*
5970153d828SPatrick Mooney  * Increase refcount for vm_object reference
5980153d828SPatrick Mooney  */
5990153d828SPatrick Mooney void
vm_object_reference(vm_object_t * vmo)6000153d828SPatrick Mooney vm_object_reference(vm_object_t *vmo)
6010153d828SPatrick Mooney {
6020153d828SPatrick Mooney 	ASSERT(vmo != NULL);
6030153d828SPatrick Mooney 
6040153d828SPatrick Mooney 	uint_t ref = atomic_inc_uint_nv(&vmo->vmo_refcnt);
6050153d828SPatrick Mooney 	/* overflow would be a deadly serious mistake */
6060153d828SPatrick Mooney 	VERIFY3U(ref, !=, 0);
6070153d828SPatrick Mooney }
6080153d828SPatrick Mooney 
6090153d828SPatrick Mooney /*
6100153d828SPatrick Mooney  * Get the host-physical PFN for a given offset into a vm_object.
6110153d828SPatrick Mooney  *
6120153d828SPatrick Mooney  * The provided `off` must be within the allocated size of the vm_object.
6130153d828SPatrick Mooney  */
6140153d828SPatrick Mooney pfn_t
vm_object_pfn(vm_object_t * vmo,uintptr_t off)6150153d828SPatrick Mooney vm_object_pfn(vm_object_t *vmo, uintptr_t off)
6160153d828SPatrick Mooney {
6170153d828SPatrick Mooney 	const uintptr_t aligned_off = off & PAGEMASK;
6180153d828SPatrick Mooney 
6190153d828SPatrick Mooney 	switch (vmo->vmo_type) {
6200153d828SPatrick Mooney 	case VMOT_MEM:
6210153d828SPatrick Mooney 		return (vm_object_pager_reservoir(vmo, aligned_off));
6220153d828SPatrick Mooney 	case VMOT_MMIO:
6230153d828SPatrick Mooney 		return (vm_object_pager_mmio(vmo, aligned_off));
6240153d828SPatrick Mooney 	case VMOT_NONE:
6250153d828SPatrick Mooney 		break;
6260153d828SPatrick Mooney 	}
6270153d828SPatrick Mooney 	panic("unexpected object type %u", vmo->vmo_type);
6280153d828SPatrick Mooney }
6290153d828SPatrick Mooney 
6300153d828SPatrick Mooney static vmspace_mapping_t *
vm_mapping_find(vmspace_t * vms,uintptr_t addr,size_t size)6310153d828SPatrick Mooney vm_mapping_find(vmspace_t *vms, uintptr_t addr, size_t size)
6320153d828SPatrick Mooney {
6330153d828SPatrick Mooney 	vmspace_mapping_t *vmsm;
6340153d828SPatrick Mooney 	list_t *ml = &vms->vms_maplist;
6350153d828SPatrick Mooney 	const uintptr_t range_end = addr + size;
6360153d828SPatrick Mooney 
6370153d828SPatrick Mooney 	ASSERT3U(addr, <=, range_end);
6380153d828SPatrick Mooney 
6390153d828SPatrick Mooney 	if (addr >= vms->vms_size) {
6400153d828SPatrick Mooney 		return (NULL);
6410153d828SPatrick Mooney 	}
6420153d828SPatrick Mooney 	for (vmsm = list_head(ml); vmsm != NULL; vmsm = list_next(ml, vmsm)) {
6430153d828SPatrick Mooney 		const uintptr_t seg_end = vmsm->vmsm_addr + vmsm->vmsm_len;
6440153d828SPatrick Mooney 
6450153d828SPatrick Mooney 		if (addr >= vmsm->vmsm_addr && addr < seg_end) {
6460153d828SPatrick Mooney 			if (range_end <= seg_end) {
6470153d828SPatrick Mooney 				return (vmsm);
6480153d828SPatrick Mooney 			} else {
6490153d828SPatrick Mooney 				return (NULL);
6500153d828SPatrick Mooney 			}
6510153d828SPatrick Mooney 		}
6520153d828SPatrick Mooney 	}
6530153d828SPatrick Mooney 	return (NULL);
6540153d828SPatrick Mooney }
6550153d828SPatrick Mooney 
6560153d828SPatrick Mooney /*
6570153d828SPatrick Mooney  * Check to see if any mappings reside within [addr, addr + size) span in the
6580153d828SPatrick Mooney  * vmspace, returning true if that span is indeed empty.
6590153d828SPatrick Mooney  */
6600153d828SPatrick Mooney static bool
vm_mapping_gap(vmspace_t * vms,uintptr_t addr,size_t size)6610153d828SPatrick Mooney vm_mapping_gap(vmspace_t *vms, uintptr_t addr, size_t size)
6620153d828SPatrick Mooney {
6630153d828SPatrick Mooney 	vmspace_mapping_t *vmsm;
6640153d828SPatrick Mooney 	list_t *ml = &vms->vms_maplist;
6650153d828SPatrick Mooney 	const uintptr_t range_end = addr + size - 1;
6660153d828SPatrick Mooney 
6670153d828SPatrick Mooney 	ASSERT(MUTEX_HELD(&vms->vms_lock));
6680153d828SPatrick Mooney 	ASSERT(size > 0);
6690153d828SPatrick Mooney 
6700153d828SPatrick Mooney 	for (vmsm = list_head(ml); vmsm != NULL; vmsm = list_next(ml, vmsm)) {
6710153d828SPatrick Mooney 		const uintptr_t seg_end = vmsm->vmsm_addr + vmsm->vmsm_len - 1;
6720153d828SPatrick Mooney 
6730153d828SPatrick Mooney 		/*
6740153d828SPatrick Mooney 		 * The two ranges do not overlap if the start of either of
6750153d828SPatrick Mooney 		 * them is after the end of the other.
6760153d828SPatrick Mooney 		 */
6770153d828SPatrick Mooney 		if (vmsm->vmsm_addr > range_end || addr > seg_end)
6780153d828SPatrick Mooney 			continue;
6790153d828SPatrick Mooney 		return (false);
6800153d828SPatrick Mooney 	}
6810153d828SPatrick Mooney 	return (true);
6820153d828SPatrick Mooney }
6830153d828SPatrick Mooney 
6840153d828SPatrick Mooney static void
vm_mapping_remove(vmspace_t * vms,vmspace_mapping_t * vmsm)6850153d828SPatrick Mooney vm_mapping_remove(vmspace_t *vms, vmspace_mapping_t *vmsm)
6860153d828SPatrick Mooney {
6870153d828SPatrick Mooney 	list_t *ml = &vms->vms_maplist;
6880153d828SPatrick Mooney 
6890153d828SPatrick Mooney 	ASSERT(MUTEX_HELD(&vms->vms_lock));
6900153d828SPatrick Mooney 	ASSERT(vms->vms_held);
6910153d828SPatrick Mooney 
6920153d828SPatrick Mooney 	list_remove(ml, vmsm);
6930153d828SPatrick Mooney 	vm_object_release(vmsm->vmsm_object);
6940153d828SPatrick Mooney 	kmem_free(vmsm, sizeof (*vmsm));
6950153d828SPatrick Mooney }
6960153d828SPatrick Mooney 
6970153d828SPatrick Mooney /*
6980153d828SPatrick Mooney  * Enter a hold state on the vmspace.  This ensures that all VM clients
6990153d828SPatrick Mooney  * associated with the vmspace are excluded from establishing new page holds,
7000153d828SPatrick Mooney  * or any other actions which would require accessing vmspace state subject to
7010153d828SPatrick Mooney  * potential change.
7020153d828SPatrick Mooney  *
7030153d828SPatrick Mooney  * Returns with vmspace_t`vms_lock held.
7040153d828SPatrick Mooney  */
7050153d828SPatrick Mooney static void
vmspace_hold_enter(vmspace_t * vms)7060153d828SPatrick Mooney vmspace_hold_enter(vmspace_t *vms)
7070153d828SPatrick Mooney {
7080153d828SPatrick Mooney 	mutex_enter(&vms->vms_lock);
7090153d828SPatrick Mooney 	VERIFY(!vms->vms_held);
7100153d828SPatrick Mooney 
7110153d828SPatrick Mooney 	vm_client_t *vmc = list_head(&vms->vms_clients);
7120153d828SPatrick Mooney 	for (; vmc != NULL; vmc = list_next(&vms->vms_clients, vmc)) {
7130153d828SPatrick Mooney 		vmc_space_hold(vmc);
7140153d828SPatrick Mooney 	}
7150153d828SPatrick Mooney 	vms->vms_held = true;
7160153d828SPatrick Mooney }
7170153d828SPatrick Mooney 
7180153d828SPatrick Mooney /*
7190153d828SPatrick Mooney  * Exit a hold state on the vmspace.  This releases all VM clients associated
7200153d828SPatrick Mooney  * with the vmspace to be able to establish new page holds, and partake in other
7210153d828SPatrick Mooney  * actions which require accessing changed vmspace state.  If `kick_on_cpu` is
7220153d828SPatrick Mooney  * true, then any CPUs actively using the page tables will be IPIed, and the
7230153d828SPatrick Mooney  * call will block until they have acknowledged being ready to use the latest
7240153d828SPatrick Mooney  * state of the tables.
7250153d828SPatrick Mooney  *
7260153d828SPatrick Mooney  * Requires vmspace_t`vms_lock be held, which is released as part of the call.
7270153d828SPatrick Mooney  */
7280153d828SPatrick Mooney static void
vmspace_hold_exit(vmspace_t * vms,bool kick_on_cpu)7290153d828SPatrick Mooney vmspace_hold_exit(vmspace_t *vms, bool kick_on_cpu)
7300153d828SPatrick Mooney {
7310153d828SPatrick Mooney 	ASSERT(MUTEX_HELD(&vms->vms_lock));
7320153d828SPatrick Mooney 	VERIFY(vms->vms_held);
7330153d828SPatrick Mooney 
7340153d828SPatrick Mooney 	vm_client_t *vmc = list_head(&vms->vms_clients);
7350153d828SPatrick Mooney 	for (; vmc != NULL; vmc = list_next(&vms->vms_clients, vmc)) {
7360153d828SPatrick Mooney 		vmc_space_release(vmc, kick_on_cpu);
7370153d828SPatrick Mooney 	}
7380153d828SPatrick Mooney 	vms->vms_held = false;
7390153d828SPatrick Mooney 	mutex_exit(&vms->vms_lock);
7400153d828SPatrick Mooney }
7410153d828SPatrick Mooney 
742*b9b43e84SPatrick Mooney static void
vmspace_clients_invalidate(vmspace_t * vms,uintptr_t gpa,size_t len)743*b9b43e84SPatrick Mooney vmspace_clients_invalidate(vmspace_t *vms, uintptr_t gpa, size_t len)
744*b9b43e84SPatrick Mooney {
745*b9b43e84SPatrick Mooney 	ASSERT(MUTEX_HELD(&vms->vms_lock));
746*b9b43e84SPatrick Mooney 	VERIFY(vms->vms_held);
747*b9b43e84SPatrick Mooney 
748*b9b43e84SPatrick Mooney 	for (vm_client_t *vmc = list_head(&vms->vms_clients);
749*b9b43e84SPatrick Mooney 	    vmc != NULL;
750*b9b43e84SPatrick Mooney 	    vmc = list_next(&vms->vms_clients, vmc)) {
751*b9b43e84SPatrick Mooney 		vmc_space_invalidate(vmc, gpa, len, vms->vms_pt_gen);
752*b9b43e84SPatrick Mooney 	}
753*b9b43e84SPatrick Mooney }
754*b9b43e84SPatrick Mooney 
7550153d828SPatrick Mooney /*
7560153d828SPatrick Mooney  * Attempt to map a vm_object span into the vmspace.
7570153d828SPatrick Mooney  *
7580153d828SPatrick Mooney  * Requirements:
7590153d828SPatrick Mooney  * - `obj_off`, `addr`, and `len` must be page-aligned
7600153d828SPatrick Mooney  * - `obj_off` cannot be greater than the allocated size of the object
7610153d828SPatrick Mooney  * - [`obj_off`, `obj_off` + `len`) span cannot extend beyond the allocated
7620153d828SPatrick Mooney  *   size of the object
7630153d828SPatrick Mooney  * - [`addr`, `addr` + `len`) span cannot reside beyond the maximum address
7640153d828SPatrick Mooney  *   of the vmspace
7650153d828SPatrick Mooney  */
7660153d828SPatrick Mooney int
vmspace_map(vmspace_t * vms,vm_object_t * vmo,uintptr_t obj_off,uintptr_t addr,size_t len,uint8_t prot)7670153d828SPatrick Mooney vmspace_map(vmspace_t *vms, vm_object_t *vmo, uintptr_t obj_off, uintptr_t addr,
7680153d828SPatrick Mooney     size_t len, uint8_t prot)
7690153d828SPatrick Mooney {
7700153d828SPatrick Mooney 	vmspace_mapping_t *vmsm;
7710153d828SPatrick Mooney 	int res = 0;
7720153d828SPatrick Mooney 
7730153d828SPatrick Mooney 	if (len == 0 || (addr + len) < addr ||
7740153d828SPatrick Mooney 	    obj_off >= (obj_off + len) || vmo->vmo_size < (obj_off + len)) {
7750153d828SPatrick Mooney 		return (EINVAL);
7760153d828SPatrick Mooney 	}
7770153d828SPatrick Mooney 	if ((addr + len) >= vms->vms_size) {
7780153d828SPatrick Mooney 		return (ENOMEM);
7790153d828SPatrick Mooney 	}
7800153d828SPatrick Mooney 
7810153d828SPatrick Mooney 	vmsm = kmem_alloc(sizeof (*vmsm), KM_SLEEP);
7820153d828SPatrick Mooney 
7830153d828SPatrick Mooney 	vmspace_hold_enter(vms);
7840153d828SPatrick Mooney 	if (!vm_mapping_gap(vms, addr, len)) {
7850153d828SPatrick Mooney 		kmem_free(vmsm, sizeof (*vmsm));
7860153d828SPatrick Mooney 		res = ENOMEM;
7870153d828SPatrick Mooney 	} else {
7880153d828SPatrick Mooney 		vmsm->vmsm_object = vmo;
7890153d828SPatrick Mooney 		vmsm->vmsm_addr = addr;
7900153d828SPatrick Mooney 		vmsm->vmsm_len = len;
7910153d828SPatrick Mooney 		vmsm->vmsm_offset = (off_t)obj_off;
7920153d828SPatrick Mooney 		vmsm->vmsm_prot = prot;
7930153d828SPatrick Mooney 		list_insert_tail(&vms->vms_maplist, vmsm);
7940153d828SPatrick Mooney 
7950153d828SPatrick Mooney 		/*
7960153d828SPatrick Mooney 		 * Make sure the GPT has tables ready for leaf entries across
7970153d828SPatrick Mooney 		 * the entire new mapping.
7980153d828SPatrick Mooney 		 */
7993a0fa64cSPatrick Mooney 		vmm_gpt_populate_region(vms->vms_gpt, addr, len);
8000153d828SPatrick Mooney 	}
8010153d828SPatrick Mooney 	vmspace_hold_exit(vms, false);
8020153d828SPatrick Mooney 	return (res);
8030153d828SPatrick Mooney }
8040153d828SPatrick Mooney 
8050153d828SPatrick Mooney /*
8060153d828SPatrick Mooney  * Unmap a region of the vmspace.
8070153d828SPatrick Mooney  *
8080153d828SPatrick Mooney  * Presently the [start, end) span must equal a region previously mapped by a
8090153d828SPatrick Mooney  * call to vmspace_map().
8100153d828SPatrick Mooney  */
8110153d828SPatrick Mooney int
vmspace_unmap(vmspace_t * vms,uintptr_t addr,uintptr_t len)8123a0fa64cSPatrick Mooney vmspace_unmap(vmspace_t *vms, uintptr_t addr, uintptr_t len)
8130153d828SPatrick Mooney {
8143a0fa64cSPatrick Mooney 	const uintptr_t end = addr + len;
8150153d828SPatrick Mooney 	vmspace_mapping_t *vmsm;
8160153d828SPatrick Mooney 	vm_client_t *vmc;
8170153d828SPatrick Mooney 	uint64_t gen = 0;
8180153d828SPatrick Mooney 
8193a0fa64cSPatrick Mooney 	ASSERT3U(addr, <, end);
8200153d828SPatrick Mooney 
8210153d828SPatrick Mooney 	vmspace_hold_enter(vms);
8220153d828SPatrick Mooney 	/* expect to match existing mapping exactly */
8233a0fa64cSPatrick Mooney 	if ((vmsm = vm_mapping_find(vms, addr, len)) == NULL ||
8243a0fa64cSPatrick Mooney 	    vmsm->vmsm_addr != addr || vmsm->vmsm_len != len) {
8250153d828SPatrick Mooney 		vmspace_hold_exit(vms, false);
8260153d828SPatrick Mooney 		return (ENOENT);
8270153d828SPatrick Mooney 	}
8280153d828SPatrick Mooney 
8290153d828SPatrick Mooney 	/* Prepare clients (and their held pages) for the unmap. */
8300153d828SPatrick Mooney 	for (vmc = list_head(&vms->vms_clients); vmc != NULL;
8310153d828SPatrick Mooney 	    vmc = list_next(&vms->vms_clients, vmc)) {
8323a0fa64cSPatrick Mooney 		vmc_space_unmap(vmc, addr, len, vmsm->vmsm_object);
8330153d828SPatrick Mooney 	}
8340153d828SPatrick Mooney 
8350153d828SPatrick Mooney 	/* Clear all PTEs for region */
8363a0fa64cSPatrick Mooney 	if (vmm_gpt_unmap_region(vms->vms_gpt, addr, len) != 0) {
8370153d828SPatrick Mooney 		vms->vms_pt_gen++;
8380153d828SPatrick Mooney 		gen = vms->vms_pt_gen;
8390153d828SPatrick Mooney 	}
8400153d828SPatrick Mooney 	/* ... and the intermediate (directory) PTEs as well */
8413a0fa64cSPatrick Mooney 	vmm_gpt_vacate_region(vms->vms_gpt, addr, len);
8420153d828SPatrick Mooney 
8430153d828SPatrick Mooney 	/*
8440153d828SPatrick Mooney 	 * If pages were actually unmapped from the GPT, provide clients with
8450153d828SPatrick Mooney 	 * an invalidation notice.
8460153d828SPatrick Mooney 	 */
8470153d828SPatrick Mooney 	if (gen != 0) {
848*b9b43e84SPatrick Mooney 		vmspace_clients_invalidate(vms, addr, len);
8490153d828SPatrick Mooney 	}
8500153d828SPatrick Mooney 
8510153d828SPatrick Mooney 	vm_mapping_remove(vms, vmsm);
8520153d828SPatrick Mooney 	vmspace_hold_exit(vms, true);
8530153d828SPatrick Mooney 	return (0);
8540153d828SPatrick Mooney }
8550153d828SPatrick Mooney 
856*b9b43e84SPatrick Mooney /*
857*b9b43e84SPatrick Mooney  * For a given GPA in the vmspace, ensure that the backing page (if any) is
858*b9b43e84SPatrick Mooney  * properly mapped as present in the provided PTE.
859*b9b43e84SPatrick Mooney  */
860*b9b43e84SPatrick Mooney static int
vmspace_ensure_mapped(vmspace_t * vms,uintptr_t gpa,int req_prot,pfn_t * pfnp,uint64_t * leaf_pte)861*b9b43e84SPatrick Mooney vmspace_ensure_mapped(vmspace_t *vms, uintptr_t gpa, int req_prot, pfn_t *pfnp,
862*b9b43e84SPatrick Mooney     uint64_t *leaf_pte)
863*b9b43e84SPatrick Mooney {
864*b9b43e84SPatrick Mooney 	vmspace_mapping_t *vmsm;
865*b9b43e84SPatrick Mooney 	vm_object_t *vmo;
866*b9b43e84SPatrick Mooney 	pfn_t pfn;
867*b9b43e84SPatrick Mooney 
868*b9b43e84SPatrick Mooney 	ASSERT(pfnp != NULL);
869*b9b43e84SPatrick Mooney 	ASSERT(leaf_pte != NULL);
870*b9b43e84SPatrick Mooney 
871*b9b43e84SPatrick Mooney 	vmsm = vm_mapping_find(vms, gpa, PAGESIZE);
872*b9b43e84SPatrick Mooney 	if (vmsm == NULL) {
873*b9b43e84SPatrick Mooney 		return (FC_NOMAP);
874*b9b43e84SPatrick Mooney 	}
875*b9b43e84SPatrick Mooney 	if ((req_prot & vmsm->vmsm_prot) != req_prot) {
876*b9b43e84SPatrick Mooney 		return (FC_PROT);
877*b9b43e84SPatrick Mooney 	}
878*b9b43e84SPatrick Mooney 
879*b9b43e84SPatrick Mooney 	vmo = vmsm->vmsm_object;
880*b9b43e84SPatrick Mooney 	pfn = vm_object_pfn(vmo, VMSM_OFFSET(vmsm, gpa));
881*b9b43e84SPatrick Mooney 	VERIFY(pfn != PFN_INVALID);
882*b9b43e84SPatrick Mooney 
883*b9b43e84SPatrick Mooney 	if (vmm_gpt_map_at(vms->vms_gpt, leaf_pte, pfn, vmsm->vmsm_prot,
884*b9b43e84SPatrick Mooney 	    vmo->vmo_attr)) {
885*b9b43e84SPatrick Mooney 		atomic_inc_64(&vms->vms_pages_mapped);
886*b9b43e84SPatrick Mooney 	}
887*b9b43e84SPatrick Mooney 
888*b9b43e84SPatrick Mooney 	*pfnp = pfn;
889*b9b43e84SPatrick Mooney 	return (0);
890*b9b43e84SPatrick Mooney }
891*b9b43e84SPatrick Mooney 
892*b9b43e84SPatrick Mooney /*
893*b9b43e84SPatrick Mooney  * Look up the PTE for a given GPA in the vmspace, populating it with
894*b9b43e84SPatrick Mooney  * appropriate contents (pfn, protection, etc) if it is empty, but backed by a
895*b9b43e84SPatrick Mooney  * valid mapping.
896*b9b43e84SPatrick Mooney  */
8970153d828SPatrick Mooney static int
vmspace_lookup_map(vmspace_t * vms,uintptr_t gpa,int req_prot,pfn_t * pfnp,uint64_t ** ptepp)8980153d828SPatrick Mooney vmspace_lookup_map(vmspace_t *vms, uintptr_t gpa, int req_prot, pfn_t *pfnp,
8990153d828SPatrick Mooney     uint64_t **ptepp)
9000153d828SPatrick Mooney {
9010153d828SPatrick Mooney 	vmm_gpt_t *gpt = vms->vms_gpt;
9020153d828SPatrick Mooney 	uint64_t *entries[MAX_GPT_LEVEL], *leaf;
9030153d828SPatrick Mooney 	pfn_t pfn = PFN_INVALID;
9040153d828SPatrick Mooney 	uint_t prot;
9050153d828SPatrick Mooney 
9060153d828SPatrick Mooney 	ASSERT0(gpa & PAGEOFFSET);
9070153d828SPatrick Mooney 	ASSERT((req_prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) != PROT_NONE);
9080153d828SPatrick Mooney 
9090153d828SPatrick Mooney 	vmm_gpt_walk(gpt, gpa, entries, MAX_GPT_LEVEL);
9100153d828SPatrick Mooney 	leaf = entries[LEVEL1];
9110153d828SPatrick Mooney 	if (leaf == NULL) {
9120153d828SPatrick Mooney 		/*
9130153d828SPatrick Mooney 		 * Since we populated the intermediate tables for any regions
9140153d828SPatrick Mooney 		 * mapped in the GPT, an empty leaf entry indicates there is no
9150153d828SPatrick Mooney 		 * mapping, populated or not, at this GPT.
9160153d828SPatrick Mooney 		 */
9170153d828SPatrick Mooney 		return (FC_NOMAP);
9180153d828SPatrick Mooney 	}
9190153d828SPatrick Mooney 
9200153d828SPatrick Mooney 	if (vmm_gpt_is_mapped(gpt, leaf, &pfn, &prot)) {
9210153d828SPatrick Mooney 		if ((req_prot & prot) != req_prot) {
9220153d828SPatrick Mooney 			return (FC_PROT);
9230153d828SPatrick Mooney 		}
9240153d828SPatrick Mooney 	} else {
925*b9b43e84SPatrick Mooney 		int err = vmspace_ensure_mapped(vms, gpa, req_prot, &pfn, leaf);
926*b9b43e84SPatrick Mooney 		if (err != 0) {
927*b9b43e84SPatrick Mooney 			return (err);
9280153d828SPatrick Mooney 		}
9290153d828SPatrick Mooney 	}
9300153d828SPatrick Mooney 
9310153d828SPatrick Mooney 	ASSERT(pfn != PFN_INVALID && leaf != NULL);
9320153d828SPatrick Mooney 	if (pfnp != NULL) {
9330153d828SPatrick Mooney 		*pfnp = pfn;
9340153d828SPatrick Mooney 	}
9350153d828SPatrick Mooney 	if (ptepp != NULL) {
9360153d828SPatrick Mooney 		*ptepp = leaf;
9370153d828SPatrick Mooney 	}
9380153d828SPatrick Mooney 	return (0);
9390153d828SPatrick Mooney }
9400153d828SPatrick Mooney 
9410153d828SPatrick Mooney /*
9420153d828SPatrick Mooney  * Populate (make resident in the page tables) a region of the vmspace.
9430153d828SPatrick Mooney  *
9440153d828SPatrick Mooney  * Presently the [start, end) span must equal a region previously mapped by a
9450153d828SPatrick Mooney  * call to vmspace_map().
9460153d828SPatrick Mooney  */
9470153d828SPatrick Mooney int
vmspace_populate(vmspace_t * vms,uintptr_t addr,uintptr_t len)9483a0fa64cSPatrick Mooney vmspace_populate(vmspace_t *vms, uintptr_t addr, uintptr_t len)
9490153d828SPatrick Mooney {
9500153d828SPatrick Mooney 	vmspace_mapping_t *vmsm;
9510153d828SPatrick Mooney 	mutex_enter(&vms->vms_lock);
9520153d828SPatrick Mooney 
9530153d828SPatrick Mooney 	/* For the time being, only exact-match mappings are expected */
9543a0fa64cSPatrick Mooney 	if ((vmsm = vm_mapping_find(vms, addr, len)) == NULL) {
9550153d828SPatrick Mooney 		mutex_exit(&vms->vms_lock);
9560153d828SPatrick Mooney 		return (FC_NOMAP);
9570153d828SPatrick Mooney 	}
9580153d828SPatrick Mooney 
9590153d828SPatrick Mooney 	vm_object_t *vmo = vmsm->vmsm_object;
9600153d828SPatrick Mooney 	const int prot = vmsm->vmsm_prot;
9610153d828SPatrick Mooney 	const uint8_t attr = vmo->vmo_attr;
9620153d828SPatrick Mooney 	size_t populated = 0;
9633a0fa64cSPatrick Mooney 	const size_t end = addr + len;
9643a0fa64cSPatrick Mooney 	for (uintptr_t gpa = addr & PAGEMASK; gpa < end; gpa += PAGESIZE) {
9650153d828SPatrick Mooney 		const pfn_t pfn = vm_object_pfn(vmo, VMSM_OFFSET(vmsm, gpa));
9660153d828SPatrick Mooney 		VERIFY(pfn != PFN_INVALID);
9670153d828SPatrick Mooney 
9680153d828SPatrick Mooney 		if (vmm_gpt_map(vms->vms_gpt, gpa, pfn, prot, attr)) {
9690153d828SPatrick Mooney 			populated++;
9700153d828SPatrick Mooney 		}
9710153d828SPatrick Mooney 	}
9720153d828SPatrick Mooney 	atomic_add_64(&vms->vms_pages_mapped, populated);
9730153d828SPatrick Mooney 
9740153d828SPatrick Mooney 	mutex_exit(&vms->vms_lock);
9750153d828SPatrick Mooney 	return (0);
9760153d828SPatrick Mooney }
9770153d828SPatrick Mooney 
9780153d828SPatrick Mooney /*
9790153d828SPatrick Mooney  * Allocate a client from a given vmspace.
9800153d828SPatrick Mooney  */
9810153d828SPatrick Mooney vm_client_t *
vmspace_client_alloc(vmspace_t * vms)9820153d828SPatrick Mooney vmspace_client_alloc(vmspace_t *vms)
9830153d828SPatrick Mooney {
9840153d828SPatrick Mooney 	vm_client_t *vmc;
9850153d828SPatrick Mooney 
9860153d828SPatrick Mooney 	vmc = kmem_zalloc(sizeof (vm_client_t), KM_SLEEP);
9870153d828SPatrick Mooney 	vmc->vmc_space = vms;
9880153d828SPatrick Mooney 	mutex_init(&vmc->vmc_lock, NULL, MUTEX_DRIVER, NULL);
9890153d828SPatrick Mooney 	cv_init(&vmc->vmc_cv, NULL, CV_DRIVER, NULL);
9900153d828SPatrick Mooney 	vmc->vmc_state = VCS_IDLE;
9910153d828SPatrick Mooney 	vmc->vmc_cpu_active = -1;
9920153d828SPatrick Mooney 	list_create(&vmc->vmc_held_pages, sizeof (vm_page_t),
9930153d828SPatrick Mooney 	    offsetof(vm_page_t, vmp_node));
9940153d828SPatrick Mooney 	vmc->vmc_track_dirty = vms->vms_track_dirty;
9950153d828SPatrick Mooney 
9960153d828SPatrick Mooney 	mutex_enter(&vms->vms_lock);
9970153d828SPatrick Mooney 	list_insert_tail(&vms->vms_clients, vmc);
9980153d828SPatrick Mooney 	mutex_exit(&vms->vms_lock);
9990153d828SPatrick Mooney 
10000153d828SPatrick Mooney 	return (vmc);
10010153d828SPatrick Mooney }
10020153d828SPatrick Mooney 
10030153d828SPatrick Mooney /*
10040153d828SPatrick Mooney  * Get the nested page table root pointer (EPTP/NCR3) value.
10050153d828SPatrick Mooney  */
10060153d828SPatrick Mooney uint64_t
vmspace_table_root(vmspace_t * vms)10070153d828SPatrick Mooney vmspace_table_root(vmspace_t *vms)
10080153d828SPatrick Mooney {
10094ac713daSLuqman Aden 	return (vmm_gpt_get_pmtp(vms->vms_gpt, vms->vms_track_dirty));
10100153d828SPatrick Mooney }
10110153d828SPatrick Mooney 
10120153d828SPatrick Mooney /*
10130153d828SPatrick Mooney  * Get the current generation number of the nested page table.
10140153d828SPatrick Mooney  */
10150153d828SPatrick Mooney uint64_t
vmspace_table_gen(vmspace_t * vms)10160153d828SPatrick Mooney vmspace_table_gen(vmspace_t *vms)
10170153d828SPatrick Mooney {
10180153d828SPatrick Mooney 	return (vms->vms_pt_gen);
10190153d828SPatrick Mooney }
10200153d828SPatrick Mooney 
10210153d828SPatrick Mooney /*
10220153d828SPatrick Mooney  * Mark a vm_client as active.  This will block if/while the client is held by
10230153d828SPatrick Mooney  * the vmspace.  On success, it returns with vm_client_t`vmc_lock held.  It will
10240153d828SPatrick Mooney  * fail if the vm_client has been orphaned.
10250153d828SPatrick Mooney  */
10260153d828SPatrick Mooney static int
vmc_activate(vm_client_t * vmc)10270153d828SPatrick Mooney vmc_activate(vm_client_t *vmc)
10280153d828SPatrick Mooney {
10290153d828SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
10300153d828SPatrick Mooney 	VERIFY0(vmc->vmc_state & VCS_ACTIVE);
10310153d828SPatrick Mooney 	if ((vmc->vmc_state & VCS_ORPHANED) != 0) {
1032cc7a5a3bSPatrick Mooney 		mutex_exit(&vmc->vmc_lock);
10330153d828SPatrick Mooney 		return (ENXIO);
10340153d828SPatrick Mooney 	}
10350153d828SPatrick Mooney 	while ((vmc->vmc_state & VCS_HOLD) != 0) {
10360153d828SPatrick Mooney 		cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
10370153d828SPatrick Mooney 	}
10380153d828SPatrick Mooney 	vmc->vmc_state |= VCS_ACTIVE;
10390153d828SPatrick Mooney 	return (0);
10400153d828SPatrick Mooney }
10410153d828SPatrick Mooney 
10420153d828SPatrick Mooney /*
10430153d828SPatrick Mooney  * Mark a vm_client as no longer active.  It must be called with
10440153d828SPatrick Mooney  * vm_client_t`vmc_lock already held, and will return with it released.
10450153d828SPatrick Mooney  */
10460153d828SPatrick Mooney static void
vmc_deactivate(vm_client_t * vmc)10470153d828SPatrick Mooney vmc_deactivate(vm_client_t *vmc)
10480153d828SPatrick Mooney {
10490153d828SPatrick Mooney 	ASSERT(MUTEX_HELD(&vmc->vmc_lock));
10500153d828SPatrick Mooney 	VERIFY(vmc->vmc_state & VCS_ACTIVE);
10510153d828SPatrick Mooney 
10520153d828SPatrick Mooney 	vmc->vmc_state ^= VCS_ACTIVE;
10530153d828SPatrick Mooney 	if ((vmc->vmc_state & VCS_HOLD) != 0) {
10540153d828SPatrick Mooney 		cv_broadcast(&vmc->vmc_cv);
10550153d828SPatrick Mooney 	}
10560153d828SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
10570153d828SPatrick Mooney }
10580153d828SPatrick Mooney 
10590153d828SPatrick Mooney /*
10600153d828SPatrick Mooney  * Indicate that a CPU will be utilizing the nested page tables through this VM
10610153d828SPatrick Mooney  * client.  Interrupts (and/or the GIF) are expected to be disabled when calling
10620153d828SPatrick Mooney  * this function.  Returns the generation number of the nested page table (to be
10630153d828SPatrick Mooney  * used for TLB invalidations).
10640153d828SPatrick Mooney  */
10650153d828SPatrick Mooney uint64_t
vmc_table_enter(vm_client_t * vmc)10660153d828SPatrick Mooney vmc_table_enter(vm_client_t *vmc)
10670153d828SPatrick Mooney {
10680153d828SPatrick Mooney 	vmspace_t *vms = vmc->vmc_space;
10690153d828SPatrick Mooney 	uint64_t gen;
10700153d828SPatrick Mooney 
10710153d828SPatrick Mooney 	ASSERT0(vmc->vmc_state & (VCS_ACTIVE | VCS_ON_CPU));
10720153d828SPatrick Mooney 	ASSERT3S(vmc->vmc_cpu_active, ==, -1);
10730153d828SPatrick Mooney 
10740153d828SPatrick Mooney 	/*
10750153d828SPatrick Mooney 	 * Since the NPT activation occurs with interrupts disabled, this must
10760153d828SPatrick Mooney 	 * be done without taking vmc_lock like normal.
10770153d828SPatrick Mooney 	 */
10780153d828SPatrick Mooney 	gen = vms->vms_pt_gen;
10790153d828SPatrick Mooney 	vmc->vmc_cpu_active = CPU->cpu_id;
10800153d828SPatrick Mooney 	vmc->vmc_cpu_gen = gen;
10810153d828SPatrick Mooney 	atomic_or_uint(&vmc->vmc_state, VCS_ON_CPU);
10820153d828SPatrick Mooney 
10830153d828SPatrick Mooney 	return (gen);
10840153d828SPatrick Mooney }
10850153d828SPatrick Mooney 
10860153d828SPatrick Mooney /*
10870153d828SPatrick Mooney  * Indicate that this VM client is not longer (directly) using the underlying
10880153d828SPatrick Mooney  * page tables.  Interrupts (and/or the GIF) must be enabled prior to calling
10890153d828SPatrick Mooney  * this function.
10900153d828SPatrick Mooney  */
10910153d828SPatrick Mooney void
vmc_table_exit(vm_client_t * vmc)10920153d828SPatrick Mooney vmc_table_exit(vm_client_t *vmc)
10930153d828SPatrick Mooney {
10940153d828SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
10950153d828SPatrick Mooney 
10960153d828SPatrick Mooney 	ASSERT(vmc->vmc_state & VCS_ON_CPU);
10970153d828SPatrick Mooney 	vmc->vmc_state ^= VCS_ON_CPU;
10980153d828SPatrick Mooney 	vmc->vmc_cpu_active = -1;
10990153d828SPatrick Mooney 	if ((vmc->vmc_state & VCS_HOLD) != 0) {
11000153d828SPatrick Mooney 		cv_broadcast(&vmc->vmc_cv);
11010153d828SPatrick Mooney 	}
11020153d828SPatrick Mooney 
11030153d828SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
11040153d828SPatrick Mooney }
11050153d828SPatrick Mooney 
11060153d828SPatrick Mooney static void
vmc_space_hold(vm_client_t * vmc)11070153d828SPatrick Mooney vmc_space_hold(vm_client_t *vmc)
11080153d828SPatrick Mooney {
11090153d828SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
11100153d828SPatrick Mooney 	VERIFY0(vmc->vmc_state & VCS_HOLD);
11110153d828SPatrick Mooney 
11120153d828SPatrick Mooney 	/*
11130153d828SPatrick Mooney 	 * Because vmc_table_enter() alters vmc_state from a context where
11140153d828SPatrick Mooney 	 * interrupts are disabled, it cannot pay heed to vmc_lock, so setting
11150153d828SPatrick Mooney 	 * VMC_HOLD must be done atomically here.
11160153d828SPatrick Mooney 	 */
11170153d828SPatrick Mooney 	atomic_or_uint(&vmc->vmc_state, VCS_HOLD);
11180153d828SPatrick Mooney 
11190153d828SPatrick Mooney 	/* Wait for client to go inactive */
11200153d828SPatrick Mooney 	while ((vmc->vmc_state & VCS_ACTIVE) != 0) {
11210153d828SPatrick Mooney 		cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
11220153d828SPatrick Mooney 	}
11230153d828SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
11240153d828SPatrick Mooney }
11250153d828SPatrick Mooney 
11260153d828SPatrick Mooney static void
vmc_space_release(vm_client_t * vmc,bool kick_on_cpu)11270153d828SPatrick Mooney vmc_space_release(vm_client_t *vmc, bool kick_on_cpu)
11280153d828SPatrick Mooney {
11290153d828SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
11300153d828SPatrick Mooney 	VERIFY(vmc->vmc_state & VCS_HOLD);
11310153d828SPatrick Mooney 
11320153d828SPatrick Mooney 	if (kick_on_cpu && (vmc->vmc_state & VCS_ON_CPU) != 0) {
11330153d828SPatrick Mooney 		poke_cpu(vmc->vmc_cpu_active);
11340153d828SPatrick Mooney 
11350153d828SPatrick Mooney 		while ((vmc->vmc_state & VCS_ON_CPU) != 0) {
11360153d828SPatrick Mooney 			cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
11370153d828SPatrick Mooney 		}
11380153d828SPatrick Mooney 	}
11390153d828SPatrick Mooney 
11400153d828SPatrick Mooney 	/*
11410153d828SPatrick Mooney 	 * Because vmc_table_enter() alters vmc_state from a context where
11420153d828SPatrick Mooney 	 * interrupts are disabled, it cannot pay heed to vmc_lock, so clearing
11430153d828SPatrick Mooney 	 * VMC_HOLD must be done atomically here.
11440153d828SPatrick Mooney 	 */
11450153d828SPatrick Mooney 	atomic_and_uint(&vmc->vmc_state, ~VCS_HOLD);
1146cc7a5a3bSPatrick Mooney 	cv_broadcast(&vmc->vmc_cv);
11470153d828SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
11480153d828SPatrick Mooney }
11490153d828SPatrick Mooney 
11500153d828SPatrick Mooney static void
vmc_space_invalidate(vm_client_t * vmc,uintptr_t addr,size_t size,uint64_t gen)11510153d828SPatrick Mooney vmc_space_invalidate(vm_client_t *vmc, uintptr_t addr, size_t size,
11520153d828SPatrick Mooney     uint64_t gen)
11530153d828SPatrick Mooney {
11540153d828SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
11550153d828SPatrick Mooney 	VERIFY(vmc->vmc_state & VCS_HOLD);
11560153d828SPatrick Mooney 	if ((vmc->vmc_state & VCS_ON_CPU) != 0) {
11570153d828SPatrick Mooney 		/*
11580153d828SPatrick Mooney 		 * Wait for clients using an old generation of the page tables
11590153d828SPatrick Mooney 		 * to exit guest context, where they subsequently flush the TLB
11600153d828SPatrick Mooney 		 * for the new generation.
11610153d828SPatrick Mooney 		 */
11620153d828SPatrick Mooney 		if (vmc->vmc_cpu_gen < gen) {
11630153d828SPatrick Mooney 			poke_cpu(vmc->vmc_cpu_active);
11640153d828SPatrick Mooney 
11650153d828SPatrick Mooney 			while ((vmc->vmc_state & VCS_ON_CPU) != 0) {
11660153d828SPatrick Mooney 				cv_wait(&vmc->vmc_cv, &vmc->vmc_lock);
11670153d828SPatrick Mooney 			}
11680153d828SPatrick Mooney 		}
11690153d828SPatrick Mooney 	}
11700153d828SPatrick Mooney 	if (vmc->vmc_inval_func != NULL) {
11710153d828SPatrick Mooney 		vmc_inval_cb_t func = vmc->vmc_inval_func;
11720153d828SPatrick Mooney 		void *data = vmc->vmc_inval_data;
11730153d828SPatrick Mooney 
11740153d828SPatrick Mooney 		/*
11750153d828SPatrick Mooney 		 * Perform the actual invalidation call outside vmc_lock to
11760153d828SPatrick Mooney 		 * avoid lock ordering issues in the consumer.  Since the client
11770153d828SPatrick Mooney 		 * is under VCS_HOLD, this is safe.
11780153d828SPatrick Mooney 		 */
11790153d828SPatrick Mooney 		mutex_exit(&vmc->vmc_lock);
11800153d828SPatrick Mooney 		func(data, addr, size);
11810153d828SPatrick Mooney 		mutex_enter(&vmc->vmc_lock);
11820153d828SPatrick Mooney 	}
11830153d828SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
11840153d828SPatrick Mooney }
11850153d828SPatrick Mooney 
11860153d828SPatrick Mooney static void
vmc_space_unmap(vm_client_t * vmc,uintptr_t addr,size_t size,vm_object_t * vmo)11870153d828SPatrick Mooney vmc_space_unmap(vm_client_t *vmc, uintptr_t addr, size_t size,
11880153d828SPatrick Mooney     vm_object_t *vmo)
11890153d828SPatrick Mooney {
11900153d828SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
11910153d828SPatrick Mooney 	VERIFY(vmc->vmc_state & VCS_HOLD);
11920153d828SPatrick Mooney 
11930153d828SPatrick Mooney 	/*
11940153d828SPatrick Mooney 	 * With the current vCPU exclusion invariants in place, we do not expect
11950153d828SPatrick Mooney 	 * a vCPU to be in guest context during an unmap.
11960153d828SPatrick Mooney 	 */
11970153d828SPatrick Mooney 	VERIFY0(vmc->vmc_state & VCS_ON_CPU);
11980153d828SPatrick Mooney 
11990153d828SPatrick Mooney 	/*
12000153d828SPatrick Mooney 	 * Any holds against the unmapped region need to establish their own
12010153d828SPatrick Mooney 	 * reference to the underlying object to avoid a potential
12020153d828SPatrick Mooney 	 * use-after-free.
12030153d828SPatrick Mooney 	 */
12040153d828SPatrick Mooney 	for (vm_page_t *vmp = list_head(&vmc->vmc_held_pages);
12050153d828SPatrick Mooney 	    vmp != NULL;
12060153d828SPatrick Mooney 	    vmp = list_next(&vmc->vmc_held_pages, vmc)) {
12070153d828SPatrick Mooney 		if (vmp->vmp_gpa < addr ||
12080153d828SPatrick Mooney 		    vmp->vmp_gpa >= (addr + size)) {
12090153d828SPatrick Mooney 			/* Hold outside region in question */
12100153d828SPatrick Mooney 			continue;
12110153d828SPatrick Mooney 		}
12120153d828SPatrick Mooney 		if (vmp->vmp_obj_ref == NULL) {
12130153d828SPatrick Mooney 			vm_object_reference(vmo);
12140153d828SPatrick Mooney 			vmp->vmp_obj_ref = vmo;
12150153d828SPatrick Mooney 			/* For an unmapped region, PTE is now meaningless */
12160153d828SPatrick Mooney 			vmp->vmp_ptep = NULL;
12170153d828SPatrick Mooney 		} else {
12180153d828SPatrick Mooney 			/*
12190153d828SPatrick Mooney 			 * Object could have gone through cycle of
12200153d828SPatrick Mooney 			 * unmap-map-unmap before the hold was released.
12210153d828SPatrick Mooney 			 */
12220153d828SPatrick Mooney 			VERIFY3P(vmp->vmp_ptep, ==, NULL);
12230153d828SPatrick Mooney 		}
12240153d828SPatrick Mooney 	}
12250153d828SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
12260153d828SPatrick Mooney }
12270153d828SPatrick Mooney 
12280153d828SPatrick Mooney static vm_client_t *
vmc_space_orphan(vm_client_t * vmc,vmspace_t * vms)12290153d828SPatrick Mooney vmc_space_orphan(vm_client_t *vmc, vmspace_t *vms)
12300153d828SPatrick Mooney {
12310153d828SPatrick Mooney 	vm_client_t *next;
12320153d828SPatrick Mooney 
12330153d828SPatrick Mooney 	ASSERT(MUTEX_HELD(&vms->vms_lock));
12340153d828SPatrick Mooney 
12350153d828SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
12360153d828SPatrick Mooney 	VERIFY3P(vmc->vmc_space, ==, vms);
12370153d828SPatrick Mooney 	VERIFY0(vmc->vmc_state & VCS_ORPHANED);
12380153d828SPatrick Mooney 	if (vmc->vmc_state & VCS_DESTROY) {
12390153d828SPatrick Mooney 		/*
12400153d828SPatrick Mooney 		 * This vm_client is currently undergoing destruction, so it
12410153d828SPatrick Mooney 		 * does not need to be orphaned.  Let it proceed with its own
12420153d828SPatrick Mooney 		 * clean-up task.
12430153d828SPatrick Mooney 		 */
12440153d828SPatrick Mooney 		next = list_next(&vms->vms_clients, vmc);
12450153d828SPatrick Mooney 	} else {
12460153d828SPatrick Mooney 		/*
12470153d828SPatrick Mooney 		 * Clients are only orphaned when the containing vmspace is
12480153d828SPatrick Mooney 		 * being torn down.  All mappings from the vmspace should
12490153d828SPatrick Mooney 		 * already be gone, meaning any remaining held pages should have
12500153d828SPatrick Mooney 		 * direct references to the object.
12510153d828SPatrick Mooney 		 */
12520153d828SPatrick Mooney 		for (vm_page_t *vmp = list_head(&vmc->vmc_held_pages);
12530153d828SPatrick Mooney 		    vmp != NULL;
12540153d828SPatrick Mooney 		    vmp = list_next(&vmc->vmc_held_pages, vmp)) {
12550153d828SPatrick Mooney 			ASSERT3P(vmp->vmp_ptep, ==, NULL);
12560153d828SPatrick Mooney 			ASSERT3P(vmp->vmp_obj_ref, !=, NULL);
12570153d828SPatrick Mooney 		}
12580153d828SPatrick Mooney 
12590153d828SPatrick Mooney 		/*
12600153d828SPatrick Mooney 		 * After this point, the client will be orphaned, unable to
12610153d828SPatrick Mooney 		 * establish new page holds (or access any vmspace-related
12620153d828SPatrick Mooney 		 * resources) and is in charge of cleaning up after itself.
12630153d828SPatrick Mooney 		 */
12640153d828SPatrick Mooney 		vmc->vmc_state |= VCS_ORPHANED;
12650153d828SPatrick Mooney 		next = list_next(&vms->vms_clients, vmc);
12660153d828SPatrick Mooney 		list_remove(&vms->vms_clients, vmc);
12670153d828SPatrick Mooney 		vmc->vmc_space = NULL;
12680153d828SPatrick Mooney 	}
12690153d828SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
12700153d828SPatrick Mooney 	return (next);
12710153d828SPatrick Mooney }
12720153d828SPatrick Mooney 
12730153d828SPatrick Mooney /*
12740153d828SPatrick Mooney  * Attempt to hold a page at `gpa` inside the referenced vmspace.
12750153d828SPatrick Mooney  */
12760153d828SPatrick Mooney vm_page_t *
vmc_hold_ext(vm_client_t * vmc,uintptr_t gpa,int prot,int flags)1277f2357d97SPatrick Mooney vmc_hold_ext(vm_client_t *vmc, uintptr_t gpa, int prot, int flags)
12780153d828SPatrick Mooney {
12790153d828SPatrick Mooney 	vmspace_t *vms = vmc->vmc_space;
12800153d828SPatrick Mooney 	vm_page_t *vmp;
12810153d828SPatrick Mooney 	pfn_t pfn = PFN_INVALID;
12820153d828SPatrick Mooney 	uint64_t *ptep = NULL;
12830153d828SPatrick Mooney 
12840153d828SPatrick Mooney 	ASSERT0(gpa & PAGEOFFSET);
12850153d828SPatrick Mooney 	ASSERT((prot & (PROT_READ | PROT_WRITE)) != PROT_NONE);
1286f2357d97SPatrick Mooney 	ASSERT0(prot & ~PROT_ALL);
1287f2357d97SPatrick Mooney 	ASSERT0(flags & ~VPF_ALL);
12880153d828SPatrick Mooney 
12890153d828SPatrick Mooney 	vmp = kmem_alloc(sizeof (*vmp), KM_SLEEP);
12900153d828SPatrick Mooney 	if (vmc_activate(vmc) != 0) {
12910153d828SPatrick Mooney 		kmem_free(vmp, sizeof (*vmp));
12920153d828SPatrick Mooney 		return (NULL);
12930153d828SPatrick Mooney 	}
12940153d828SPatrick Mooney 
12950153d828SPatrick Mooney 	if (vmspace_lookup_map(vms, gpa, prot, &pfn, &ptep) != 0) {
12960153d828SPatrick Mooney 		vmc_deactivate(vmc);
12970153d828SPatrick Mooney 		kmem_free(vmp, sizeof (*vmp));
12980153d828SPatrick Mooney 		return (NULL);
12990153d828SPatrick Mooney 	}
13000153d828SPatrick Mooney 	ASSERT(pfn != PFN_INVALID && ptep != NULL);
13010153d828SPatrick Mooney 
13020153d828SPatrick Mooney 	vmp->vmp_client = vmc;
13030153d828SPatrick Mooney 	vmp->vmp_chain = NULL;
13040153d828SPatrick Mooney 	vmp->vmp_gpa = gpa;
13050153d828SPatrick Mooney 	vmp->vmp_pfn = pfn;
13060153d828SPatrick Mooney 	vmp->vmp_ptep = ptep;
13070153d828SPatrick Mooney 	vmp->vmp_obj_ref = NULL;
1308f2357d97SPatrick Mooney 	vmp->vmp_prot = (uint8_t)prot;
1309f2357d97SPatrick Mooney 	vmp->vmp_flags = (uint8_t)flags;
13100153d828SPatrick Mooney 	list_insert_tail(&vmc->vmc_held_pages, vmp);
13110153d828SPatrick Mooney 	vmc_deactivate(vmc);
13120153d828SPatrick Mooney 
13130153d828SPatrick Mooney 	return (vmp);
13140153d828SPatrick Mooney }
13150153d828SPatrick Mooney 
1316f2357d97SPatrick Mooney /*
1317f2357d97SPatrick Mooney  * Attempt to hold a page at `gpa` inside the referenced vmspace.
1318f2357d97SPatrick Mooney  */
1319f2357d97SPatrick Mooney vm_page_t *
vmc_hold(vm_client_t * vmc,uintptr_t gpa,int prot)1320f2357d97SPatrick Mooney vmc_hold(vm_client_t *vmc, uintptr_t gpa, int prot)
1321f2357d97SPatrick Mooney {
1322f2357d97SPatrick Mooney 	return (vmc_hold_ext(vmc, gpa, prot, VPF_DEFAULT));
1323f2357d97SPatrick Mooney }
1324f2357d97SPatrick Mooney 
13250153d828SPatrick Mooney int
vmc_fault(vm_client_t * vmc,uintptr_t gpa,int prot)13260153d828SPatrick Mooney vmc_fault(vm_client_t *vmc, uintptr_t gpa, int prot)
13270153d828SPatrick Mooney {
13280153d828SPatrick Mooney 	vmspace_t *vms = vmc->vmc_space;
13290153d828SPatrick Mooney 	int err;
13300153d828SPatrick Mooney 
13310153d828SPatrick Mooney 	err = vmc_activate(vmc);
13320153d828SPatrick Mooney 	if (err == 0) {
13330153d828SPatrick Mooney 		err = vmspace_lookup_map(vms, gpa & PAGEMASK, prot, NULL, NULL);
13340153d828SPatrick Mooney 		vmc_deactivate(vmc);
13350153d828SPatrick Mooney 	}
13360153d828SPatrick Mooney 
13370153d828SPatrick Mooney 	return (err);
13380153d828SPatrick Mooney }
13390153d828SPatrick Mooney 
13400153d828SPatrick Mooney /*
13410153d828SPatrick Mooney  * Allocate an additional vm_client_t, based on an existing one.  Only the
13420153d828SPatrick Mooney  * associatation with the vmspace is cloned, not existing holds or any
13430153d828SPatrick Mooney  * configured invalidation function.
13440153d828SPatrick Mooney  */
13450153d828SPatrick Mooney vm_client_t *
vmc_clone(vm_client_t * vmc)13460153d828SPatrick Mooney vmc_clone(vm_client_t *vmc)
13470153d828SPatrick Mooney {
13480153d828SPatrick Mooney 	vmspace_t *vms = vmc->vmc_space;
13490153d828SPatrick Mooney 
13500153d828SPatrick Mooney 	return (vmspace_client_alloc(vms));
13510153d828SPatrick Mooney }
13520153d828SPatrick Mooney 
13530153d828SPatrick Mooney /*
13540153d828SPatrick Mooney  * Register a function (and associated data pointer) to be called when an
13550153d828SPatrick Mooney  * address range in the vmspace is invalidated.
13560153d828SPatrick Mooney  */
13570153d828SPatrick Mooney int
vmc_set_inval_cb(vm_client_t * vmc,vmc_inval_cb_t func,void * data)13580153d828SPatrick Mooney vmc_set_inval_cb(vm_client_t *vmc, vmc_inval_cb_t func, void *data)
13590153d828SPatrick Mooney {
13600153d828SPatrick Mooney 	int err;
13610153d828SPatrick Mooney 
13620153d828SPatrick Mooney 	err = vmc_activate(vmc);
13630153d828SPatrick Mooney 	if (err == 0) {
13640153d828SPatrick Mooney 		vmc->vmc_inval_func = func;
13650153d828SPatrick Mooney 		vmc->vmc_inval_data = data;
13660153d828SPatrick Mooney 		vmc_deactivate(vmc);
13670153d828SPatrick Mooney 	}
13680153d828SPatrick Mooney 
13690153d828SPatrick Mooney 	return (err);
13700153d828SPatrick Mooney }
13710153d828SPatrick Mooney 
13720153d828SPatrick Mooney /*
13730153d828SPatrick Mooney  * Destroy a vm_client_t instance.
13740153d828SPatrick Mooney  *
13750153d828SPatrick Mooney  * No pages held through this vm_client_t may be outstanding when performing a
13760153d828SPatrick Mooney  * vmc_destroy().  For vCPU clients, the client cannot be on-CPU (a call to
13770153d828SPatrick Mooney  * vmc_table_exit() has been made).
13780153d828SPatrick Mooney  */
13790153d828SPatrick Mooney void
vmc_destroy(vm_client_t * vmc)13800153d828SPatrick Mooney vmc_destroy(vm_client_t *vmc)
13810153d828SPatrick Mooney {
13820153d828SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
13830153d828SPatrick Mooney 
13840153d828SPatrick Mooney 	VERIFY(list_is_empty(&vmc->vmc_held_pages));
13850153d828SPatrick Mooney 	VERIFY0(vmc->vmc_state & (VCS_ACTIVE | VCS_ON_CPU));
13860153d828SPatrick Mooney 
13870153d828SPatrick Mooney 	if ((vmc->vmc_state & VCS_ORPHANED) == 0) {
13880153d828SPatrick Mooney 		vmspace_t *vms;
13890153d828SPatrick Mooney 
13900153d828SPatrick Mooney 		/*
13910153d828SPatrick Mooney 		 * Deassociation with the parent vmspace must be done carefully:
13920153d828SPatrick Mooney 		 * The vmspace could attempt to orphan this vm_client while we
13930153d828SPatrick Mooney 		 * release vmc_lock in order to take vms_lock (the required
13940153d828SPatrick Mooney 		 * order).  The client is marked to indicate that destruction is
13950153d828SPatrick Mooney 		 * under way.  Doing so prevents any racing orphan operation
13960153d828SPatrick Mooney 		 * from applying to this client, allowing us to deassociate from
13970153d828SPatrick Mooney 		 * the vmspace safely.
13980153d828SPatrick Mooney 		 */
13990153d828SPatrick Mooney 		vmc->vmc_state |= VCS_DESTROY;
14000153d828SPatrick Mooney 		vms = vmc->vmc_space;
14010153d828SPatrick Mooney 		mutex_exit(&vmc->vmc_lock);
14020153d828SPatrick Mooney 
14030153d828SPatrick Mooney 		mutex_enter(&vms->vms_lock);
14040153d828SPatrick Mooney 		mutex_enter(&vmc->vmc_lock);
14050153d828SPatrick Mooney 		list_remove(&vms->vms_clients, vmc);
14060153d828SPatrick Mooney 		/*
14070153d828SPatrick Mooney 		 * If the vmspace began its own destruction operation while we
14080153d828SPatrick Mooney 		 * were navigating the locks, be sure to notify it about this
14090153d828SPatrick Mooney 		 * vm_client being deassociated.
14100153d828SPatrick Mooney 		 */
14110153d828SPatrick Mooney 		cv_signal(&vms->vms_cv);
14120153d828SPatrick Mooney 		mutex_exit(&vmc->vmc_lock);
14130153d828SPatrick Mooney 		mutex_exit(&vms->vms_lock);
14140153d828SPatrick Mooney 	} else {
14150153d828SPatrick Mooney 		VERIFY3P(vmc->vmc_space, ==, NULL);
14160153d828SPatrick Mooney 		mutex_exit(&vmc->vmc_lock);
14170153d828SPatrick Mooney 	}
14180153d828SPatrick Mooney 
14190153d828SPatrick Mooney 	mutex_destroy(&vmc->vmc_lock);
14200153d828SPatrick Mooney 	cv_destroy(&vmc->vmc_cv);
14210153d828SPatrick Mooney 	list_destroy(&vmc->vmc_held_pages);
14220153d828SPatrick Mooney 
14230153d828SPatrick Mooney 	kmem_free(vmc, sizeof (*vmc));
14240153d828SPatrick Mooney }
14250153d828SPatrick Mooney 
14260153d828SPatrick Mooney static __inline void *
vmp_ptr(const vm_page_t * vmp)14270153d828SPatrick Mooney vmp_ptr(const vm_page_t *vmp)
14280153d828SPatrick Mooney {
14290153d828SPatrick Mooney 	ASSERT3U(vmp->vmp_pfn, !=, PFN_INVALID);
14300153d828SPatrick Mooney 
14310153d828SPatrick Mooney 	const uintptr_t paddr = (vmp->vmp_pfn << PAGESHIFT);
14320153d828SPatrick Mooney 	return ((void *)((uintptr_t)kpm_vbase + paddr));
14330153d828SPatrick Mooney }
14340153d828SPatrick Mooney 
14350153d828SPatrick Mooney /*
14360153d828SPatrick Mooney  * Get a readable kernel-virtual pointer for a held page.
14370153d828SPatrick Mooney  *
14380153d828SPatrick Mooney  * Only legal to call if PROT_READ was specified in `prot` for the vmc_hold()
14390153d828SPatrick Mooney  * call to acquire this page reference.
14400153d828SPatrick Mooney  */
14410153d828SPatrick Mooney const void *
vmp_get_readable(const vm_page_t * vmp)14420153d828SPatrick Mooney vmp_get_readable(const vm_page_t *vmp)
14430153d828SPatrick Mooney {
14440153d828SPatrick Mooney 	ASSERT(vmp->vmp_prot & PROT_READ);
14450153d828SPatrick Mooney 
14460153d828SPatrick Mooney 	return (vmp_ptr(vmp));
14470153d828SPatrick Mooney }
14480153d828SPatrick Mooney 
14490153d828SPatrick Mooney /*
14500153d828SPatrick Mooney  * Get a writable kernel-virtual pointer for a held page.
14510153d828SPatrick Mooney  *
14520153d828SPatrick Mooney  * Only legal to call if PROT_WRITE was specified in `prot` for the vmc_hold()
14530153d828SPatrick Mooney  * call to acquire this page reference.
14540153d828SPatrick Mooney  */
14550153d828SPatrick Mooney void *
vmp_get_writable(const vm_page_t * vmp)14560153d828SPatrick Mooney vmp_get_writable(const vm_page_t *vmp)
14570153d828SPatrick Mooney {
14580153d828SPatrick Mooney 	ASSERT(vmp->vmp_prot & PROT_WRITE);
14590153d828SPatrick Mooney 
14600153d828SPatrick Mooney 	return (vmp_ptr(vmp));
14610153d828SPatrick Mooney }
14620153d828SPatrick Mooney 
14630153d828SPatrick Mooney /*
14640153d828SPatrick Mooney  * Get the host-physical PFN for a held page.
14650153d828SPatrick Mooney  */
14660153d828SPatrick Mooney pfn_t
vmp_get_pfn(const vm_page_t * vmp)14670153d828SPatrick Mooney vmp_get_pfn(const vm_page_t *vmp)
14680153d828SPatrick Mooney {
14690153d828SPatrick Mooney 	return (vmp->vmp_pfn);
14700153d828SPatrick Mooney }
14710153d828SPatrick Mooney 
1472f2357d97SPatrick Mooney /*
1473f2357d97SPatrick Mooney  * If this page was deferring dirty-marking in the corresponding vmspace page
1474f2357d97SPatrick Mooney  * tables, clear such a state so it is considered dirty from now on.
1475f2357d97SPatrick Mooney  */
1476f2357d97SPatrick Mooney void
vmp_mark_dirty(vm_page_t * vmp)1477f2357d97SPatrick Mooney vmp_mark_dirty(vm_page_t *vmp)
1478f2357d97SPatrick Mooney {
1479f2357d97SPatrick Mooney 	ASSERT((vmp->vmp_prot & PROT_WRITE) != 0);
1480f2357d97SPatrick Mooney 
1481f2357d97SPatrick Mooney 	atomic_and_8(&vmp->vmp_flags, ~VPF_DEFER_DIRTY);
1482f2357d97SPatrick Mooney }
1483f2357d97SPatrick Mooney 
14840153d828SPatrick Mooney /*
14850153d828SPatrick Mooney  * Store a pointer to `to_chain` in the page-chaining slot of `vmp`.
14860153d828SPatrick Mooney  */
14870153d828SPatrick Mooney void
vmp_chain(vm_page_t * vmp,vm_page_t * to_chain)14880153d828SPatrick Mooney vmp_chain(vm_page_t *vmp, vm_page_t *to_chain)
14890153d828SPatrick Mooney {
14900153d828SPatrick Mooney 	ASSERT3P(vmp->vmp_chain, ==, NULL);
14910153d828SPatrick Mooney 
14920153d828SPatrick Mooney 	vmp->vmp_chain = to_chain;
14930153d828SPatrick Mooney }
14940153d828SPatrick Mooney 
14950153d828SPatrick Mooney /*
14960153d828SPatrick Mooney  * Retrieve the pointer from the page-chaining in `vmp`.
14970153d828SPatrick Mooney  */
14980153d828SPatrick Mooney vm_page_t *
vmp_next(const vm_page_t * vmp)14990153d828SPatrick Mooney vmp_next(const vm_page_t *vmp)
15000153d828SPatrick Mooney {
15010153d828SPatrick Mooney 	return (vmp->vmp_chain);
15020153d828SPatrick Mooney }
15030153d828SPatrick Mooney 
15040153d828SPatrick Mooney static __inline bool
vmp_release_inner(vm_page_t * vmp,vm_client_t * vmc)15050153d828SPatrick Mooney vmp_release_inner(vm_page_t *vmp, vm_client_t *vmc)
15060153d828SPatrick Mooney {
15070153d828SPatrick Mooney 	ASSERT(MUTEX_HELD(&vmc->vmc_lock));
15080153d828SPatrick Mooney 
15090153d828SPatrick Mooney 	bool was_unmapped = false;
15100153d828SPatrick Mooney 
15110153d828SPatrick Mooney 	list_remove(&vmc->vmc_held_pages, vmp);
15120153d828SPatrick Mooney 	if (vmp->vmp_obj_ref != NULL) {
15130153d828SPatrick Mooney 		ASSERT3P(vmp->vmp_ptep, ==, NULL);
15140153d828SPatrick Mooney 
15150153d828SPatrick Mooney 		vm_object_release(vmp->vmp_obj_ref);
15160153d828SPatrick Mooney 		was_unmapped = true;
15170153d828SPatrick Mooney 	} else {
15180153d828SPatrick Mooney 		ASSERT3P(vmp->vmp_ptep, !=, NULL);
15190153d828SPatrick Mooney 
1520f2357d97SPatrick Mooney 		/*
1521f2357d97SPatrick Mooney 		 * Track appropriate (accessed/dirty) bits for the guest-virtual
1522f2357d97SPatrick Mooney 		 * address corresponding to this page, if it is from the vmspace
1523f2357d97SPatrick Mooney 		 * rather than a direct reference to an underlying object.
1524f2357d97SPatrick Mooney 		 *
1525f2357d97SPatrick Mooney 		 * The protection and/or configured flags may obviate the need
1526f2357d97SPatrick Mooney 		 * for such an update.
1527f2357d97SPatrick Mooney 		 */
1528f2357d97SPatrick Mooney 		if ((vmp->vmp_prot & PROT_WRITE) != 0 &&
1529f2357d97SPatrick Mooney 		    (vmp->vmp_flags & VPF_DEFER_DIRTY) == 0 &&
1530f2357d97SPatrick Mooney 		    vmc->vmc_track_dirty) {
15310153d828SPatrick Mooney 			vmm_gpt_t *gpt = vmc->vmc_space->vms_gpt;
1532e0994bd2SPatrick Mooney 			(void) vmm_gpt_reset_dirty(gpt, vmp->vmp_ptep, true);
15330153d828SPatrick Mooney 		}
15340153d828SPatrick Mooney 	}
15350153d828SPatrick Mooney 	kmem_free(vmp, sizeof (*vmp));
15360153d828SPatrick Mooney 	return (was_unmapped);
15370153d828SPatrick Mooney }
15380153d828SPatrick Mooney 
15390153d828SPatrick Mooney /*
15400153d828SPatrick Mooney  * Release held page.  Returns true if page resided on region which was
15410153d828SPatrick Mooney  * subsequently unmapped.
15420153d828SPatrick Mooney  */
15430153d828SPatrick Mooney bool
vmp_release(vm_page_t * vmp)15440153d828SPatrick Mooney vmp_release(vm_page_t *vmp)
15450153d828SPatrick Mooney {
15460153d828SPatrick Mooney 	vm_client_t *vmc = vmp->vmp_client;
15470153d828SPatrick Mooney 
15480153d828SPatrick Mooney 	VERIFY(vmc != NULL);
15490153d828SPatrick Mooney 
15500153d828SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
15510153d828SPatrick Mooney 	const bool was_unmapped = vmp_release_inner(vmp, vmc);
15520153d828SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
15530153d828SPatrick Mooney 	return (was_unmapped);
15540153d828SPatrick Mooney }
15550153d828SPatrick Mooney 
15560153d828SPatrick Mooney /*
15570153d828SPatrick Mooney  * Release a chain of pages which were associated via vmp_chain() (setting
15580153d828SPatrick Mooney  * page-chaining pointer).  Returns true if any pages resided upon a region
15590153d828SPatrick Mooney  * which was subsequently unmapped.
15600153d828SPatrick Mooney  *
15610153d828SPatrick Mooney  * All of those pages must have been held through the same vm_client_t.
15620153d828SPatrick Mooney  */
15630153d828SPatrick Mooney bool
vmp_release_chain(vm_page_t * vmp)15640153d828SPatrick Mooney vmp_release_chain(vm_page_t *vmp)
15650153d828SPatrick Mooney {
15660153d828SPatrick Mooney 	vm_client_t *vmc = vmp->vmp_client;
15670153d828SPatrick Mooney 	bool any_unmapped = false;
15680153d828SPatrick Mooney 
15690153d828SPatrick Mooney 	ASSERT(vmp != NULL);
15700153d828SPatrick Mooney 
15710153d828SPatrick Mooney 	mutex_enter(&vmc->vmc_lock);
15720153d828SPatrick Mooney 	while (vmp != NULL) {
15730153d828SPatrick Mooney 		vm_page_t *next = vmp->vmp_chain;
15740153d828SPatrick Mooney 
15750153d828SPatrick Mooney 		/* We expect all pages in chain to be from same client */
15760153d828SPatrick Mooney 		ASSERT3P(vmp->vmp_client, ==, vmc);
15770153d828SPatrick Mooney 
15780153d828SPatrick Mooney 		if (vmp_release_inner(vmp, vmc)) {
15790153d828SPatrick Mooney 			any_unmapped = true;
15800153d828SPatrick Mooney 		}
15810153d828SPatrick Mooney 		vmp = next;
15820153d828SPatrick Mooney 	}
15830153d828SPatrick Mooney 	mutex_exit(&vmc->vmc_lock);
15840153d828SPatrick Mooney 	return (any_unmapped);
15850153d828SPatrick Mooney }
15860153d828SPatrick Mooney 
15870153d828SPatrick Mooney 
15880153d828SPatrick Mooney int
vm_segmap_obj(struct vm * vm,int segid,off_t segoff,off_t len,struct as * as,caddr_t * addrp,uint_t prot,uint_t maxprot,uint_t flags)15890153d828SPatrick Mooney vm_segmap_obj(struct vm *vm, int segid, off_t segoff, off_t len,
15900153d828SPatrick Mooney     struct as *as, caddr_t *addrp, uint_t prot, uint_t maxprot, uint_t flags)
15910153d828SPatrick Mooney {
15920153d828SPatrick Mooney 	vm_object_t *vmo;
15930153d828SPatrick Mooney 	int err;
15940153d828SPatrick Mooney 
15950153d828SPatrick Mooney 	if (segoff < 0 || len <= 0 ||
15960153d828SPatrick Mooney 	    (segoff & PAGEOFFSET) != 0 || (len & PAGEOFFSET) != 0) {
15970153d828SPatrick Mooney 		return (EINVAL);
15980153d828SPatrick Mooney 	}
15990153d828SPatrick Mooney 	if ((prot & PROT_USER) == 0) {
16000153d828SPatrick Mooney 		return (ENOTSUP);
16010153d828SPatrick Mooney 	}
16020153d828SPatrick Mooney 	err = vm_get_memseg(vm, segid, NULL, NULL, &vmo);
16030153d828SPatrick Mooney 	if (err != 0) {
16040153d828SPatrick Mooney 		return (err);
16050153d828SPatrick Mooney 	}
16060153d828SPatrick Mooney 
16070153d828SPatrick Mooney 	VERIFY(segoff >= 0);
16080153d828SPatrick Mooney 	VERIFY(len <= vmo->vmo_size);
16090153d828SPatrick Mooney 	VERIFY((len + segoff) <= vmo->vmo_size);
16100153d828SPatrick Mooney 
16110153d828SPatrick Mooney 	if (vmo->vmo_type != VMOT_MEM) {
16120153d828SPatrick Mooney 		/* Only support memory objects for now */
16130153d828SPatrick Mooney 		return (ENOTSUP);
16140153d828SPatrick Mooney 	}
16150153d828SPatrick Mooney 
16160153d828SPatrick Mooney 	as_rangelock(as);
16170153d828SPatrick Mooney 
16180153d828SPatrick Mooney 	err = choose_addr(as, addrp, (size_t)len, 0, ADDR_VACALIGN, flags);
16190153d828SPatrick Mooney 	if (err == 0) {
16200153d828SPatrick Mooney 		segvmm_crargs_t svma;
16210153d828SPatrick Mooney 
16220153d828SPatrick Mooney 		svma.prot = prot;
16230153d828SPatrick Mooney 		svma.offset = segoff;
16240153d828SPatrick Mooney 		svma.vmo = vmo;
16250153d828SPatrick Mooney 		svma.vmc = NULL;
16260153d828SPatrick Mooney 
16270153d828SPatrick Mooney 		err = as_map(as, *addrp, (size_t)len, segvmm_create, &svma);
16280153d828SPatrick Mooney 	}
16290153d828SPatrick Mooney 
16300153d828SPatrick Mooney 	as_rangeunlock(as);
16310153d828SPatrick Mooney 	return (err);
16320153d828SPatrick Mooney }
16330153d828SPatrick Mooney 
16340153d828SPatrick Mooney int
vm_segmap_space(struct vm * vm,off_t off,struct as * as,caddr_t * addrp,off_t len,uint_t prot,uint_t maxprot,uint_t flags)16350153d828SPatrick Mooney vm_segmap_space(struct vm *vm, off_t off, struct as *as, caddr_t *addrp,
16360153d828SPatrick Mooney     off_t len, uint_t prot, uint_t maxprot, uint_t flags)
16370153d828SPatrick Mooney {
16380153d828SPatrick Mooney 
16390153d828SPatrick Mooney 	const uintptr_t gpa = (uintptr_t)off;
16400153d828SPatrick Mooney 	const size_t size = (uintptr_t)len;
16410153d828SPatrick Mooney 	int err;
16420153d828SPatrick Mooney 
16430153d828SPatrick Mooney 	if (off < 0 || len <= 0 ||
16440153d828SPatrick Mooney 	    (gpa & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) {
16450153d828SPatrick Mooney 		return (EINVAL);
16460153d828SPatrick Mooney 	}
16470153d828SPatrick Mooney 	if ((prot & PROT_USER) == 0) {
16480153d828SPatrick Mooney 		return (ENOTSUP);
16490153d828SPatrick Mooney 	}
16500153d828SPatrick Mooney 
16510153d828SPatrick Mooney 	as_rangelock(as);
16520153d828SPatrick Mooney 
16530153d828SPatrick Mooney 	err = choose_addr(as, addrp, size, off, ADDR_VACALIGN, flags);
16540153d828SPatrick Mooney 	if (err == 0) {
16550153d828SPatrick Mooney 		segvmm_crargs_t svma;
16560153d828SPatrick Mooney 
16570153d828SPatrick Mooney 		svma.prot = prot;
16580153d828SPatrick Mooney 		svma.offset = gpa;
16590153d828SPatrick Mooney 		svma.vmo = NULL;
16600153d828SPatrick Mooney 		svma.vmc = vmspace_client_alloc(vm_get_vmspace(vm));
16610153d828SPatrick Mooney 
16620153d828SPatrick Mooney 		err = as_map(as, *addrp, len, segvmm_create, &svma);
16630153d828SPatrick Mooney 	}
16640153d828SPatrick Mooney 
16650153d828SPatrick Mooney 	as_rangeunlock(as);
16660153d828SPatrick Mooney 	return (err);
16670153d828SPatrick Mooney }
1668