1b22a70abSPatrick Mooney /*
2b22a70abSPatrick Mooney  * Copyright (c) 2013  Chris Torek <torek @ torek net>
3b22a70abSPatrick Mooney  * All rights reserved.
4b22a70abSPatrick Mooney  *
5b22a70abSPatrick Mooney  * Redistribution and use in source and binary forms, with or without
6b22a70abSPatrick Mooney  * modification, are permitted provided that the following conditions
7b22a70abSPatrick Mooney  * are met:
8b22a70abSPatrick Mooney  * 1. Redistributions of source code must retain the above copyright
9b22a70abSPatrick Mooney  *    notice, this list of conditions and the following disclaimer.
10b22a70abSPatrick Mooney  * 2. Redistributions in binary form must reproduce the above copyright
11b22a70abSPatrick Mooney  *    notice, this list of conditions and the following disclaimer in the
12b22a70abSPatrick Mooney  *    documentation and/or other materials provided with the distribution.
13b22a70abSPatrick Mooney  *
14b22a70abSPatrick Mooney  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15b22a70abSPatrick Mooney  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16b22a70abSPatrick Mooney  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17b22a70abSPatrick Mooney  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18b22a70abSPatrick Mooney  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19b22a70abSPatrick Mooney  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20b22a70abSPatrick Mooney  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21b22a70abSPatrick Mooney  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22b22a70abSPatrick Mooney  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23b22a70abSPatrick Mooney  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24b22a70abSPatrick Mooney  * SUCH DAMAGE.
25b22a70abSPatrick Mooney  */
26b22a70abSPatrick Mooney /*
27b22a70abSPatrick Mooney  * This file and its contents are supplied under the terms of the
28b22a70abSPatrick Mooney  * Common Development and Distribution License ("CDDL"), version 1.0.
29b22a70abSPatrick Mooney  * You may only use this file in accordance with the terms of version
30b22a70abSPatrick Mooney  * 1.0 of the CDDL.
31b22a70abSPatrick Mooney  *
32b22a70abSPatrick Mooney  * A full copy of the text of the CDDL should have accompanied this
33b22a70abSPatrick Mooney  * source.  A copy of the CDDL is also available via the Internet at
34b22a70abSPatrick Mooney  * http://www.illumos.org/license/CDDL.
35b22a70abSPatrick Mooney  *
36b22a70abSPatrick Mooney  * Copyright 2015 Pluribus Networks Inc.
37b22a70abSPatrick Mooney  * Copyright 2019 Joyent, Inc.
38a26f9c14SPatrick Mooney  * Copyright 2022 Oxide Computer Company
39b22a70abSPatrick Mooney  */
40b22a70abSPatrick Mooney 
41b22a70abSPatrick Mooney 
42b22a70abSPatrick Mooney #include <sys/disp.h>
43b22a70abSPatrick Mooney 
44b22a70abSPatrick Mooney #include "viona_impl.h"
45b22a70abSPatrick Mooney 
46b22a70abSPatrick Mooney #define	VRING_MAX_LEN		32768
47b22a70abSPatrick Mooney 
48427f9b9aSPatrick Mooney /* Layout and sizing as defined in the spec for a legacy-style virtqueue */
49427f9b9aSPatrick Mooney 
50427f9b9aSPatrick Mooney #define	LEGACY_VQ_ALIGN		PAGESIZE
51427f9b9aSPatrick Mooney 
52427f9b9aSPatrick Mooney #define	LEGACY_DESC_SZ(qsz)	((qsz) * sizeof (struct virtio_desc))
53427f9b9aSPatrick Mooney /*
54427f9b9aSPatrick Mooney  * Available ring consists of avail_idx (uint16_t), flags (uint16_t), qsz avail
55427f9b9aSPatrick Mooney  * descriptors (uint16_t each), and (optional) used_event (uint16_t).
56427f9b9aSPatrick Mooney  */
57427f9b9aSPatrick Mooney #define	LEGACY_AVAIL_SZ(qsz)	(((qsz) + 3) * sizeof (uint16_t))
58427f9b9aSPatrick Mooney /*
59427f9b9aSPatrick Mooney  * Used ring consists of used_idx (uint16_t), flags (uint16_t), qsz used
60427f9b9aSPatrick Mooney  * descriptors (two uint32_t each), and (optional) avail_event (uint16_t).
61427f9b9aSPatrick Mooney  */
62427f9b9aSPatrick Mooney #define	LEGACY_USED_SZ(qsz)	\
63427f9b9aSPatrick Mooney 	((qsz) * sizeof (struct virtio_used) + 3 * sizeof (uint16_t))
64427f9b9aSPatrick Mooney 
65427f9b9aSPatrick Mooney #define	LEGACY_AVAIL_FLAGS_OFF(qsz)	LEGACY_DESC_SZ(qsz)
66427f9b9aSPatrick Mooney #define	LEGACY_AVAIL_IDX_OFF(qsz)	\
67427f9b9aSPatrick Mooney 	(LEGACY_DESC_SZ(qsz) + sizeof (uint16_t))
68427f9b9aSPatrick Mooney #define	LEGACY_AVAIL_ENT_OFF(qsz, idx)	\
69427f9b9aSPatrick Mooney 	(LEGACY_DESC_SZ(qsz) + (2 + (idx)) * sizeof (uint16_t))
70427f9b9aSPatrick Mooney 
71427f9b9aSPatrick Mooney #define	LEGACY_USED_FLAGS_OFF(qsz)	\
72427f9b9aSPatrick Mooney 	P2ROUNDUP(LEGACY_DESC_SZ(qsz) + LEGACY_AVAIL_SZ(qsz), LEGACY_VQ_ALIGN)
73427f9b9aSPatrick Mooney #define	LEGACY_USED_IDX_OFF(qsz)	\
74427f9b9aSPatrick Mooney 	(LEGACY_USED_FLAGS_OFF(qsz) + sizeof (uint16_t))
75427f9b9aSPatrick Mooney #define	LEGACY_USED_ENT_OFF(qsz, idx)	\
76427f9b9aSPatrick Mooney 	(LEGACY_USED_FLAGS_OFF(qsz) + 2 * sizeof (uint16_t) + \
77427f9b9aSPatrick Mooney 	(idx) * sizeof (struct virtio_used))
78427f9b9aSPatrick Mooney 
79427f9b9aSPatrick Mooney #define	LEGACY_VQ_SIZE(qsz)	\
80427f9b9aSPatrick Mooney 	(LEGACY_USED_FLAGS_OFF(qsz) + \
81427f9b9aSPatrick Mooney 	P2ROUNDUP(LEGACY_USED_SZ(qsz), LEGACY_VQ_ALIGN))
82427f9b9aSPatrick Mooney #define	LEGACY_VQ_PAGES(qsz)	(LEGACY_VQ_SIZE(qsz) / PAGESIZE)
83427f9b9aSPatrick Mooney 
84db9aa506SPatrick Mooney struct vq_held_region {
85db9aa506SPatrick Mooney 	struct iovec	*vhr_iov;
86db9aa506SPatrick Mooney 	vmm_page_t	*vhr_head;
87db9aa506SPatrick Mooney 	vmm_page_t	*vhr_tail;
88db9aa506SPatrick Mooney 	/* Length of iovec array supplied in `vhr_iov` */
89db9aa506SPatrick Mooney 	uint_t		vhr_niov;
90db9aa506SPatrick Mooney 	/*
91db9aa506SPatrick Mooney 	 * Index into vhr_iov, indicating the next "free" entry (following the
92db9aa506SPatrick Mooney 	 * last entry which has valid contents).
93db9aa506SPatrick Mooney 	 */
94db9aa506SPatrick Mooney 	uint_t		vhr_idx;
95db9aa506SPatrick Mooney };
96db9aa506SPatrick Mooney typedef struct vq_held_region vq_held_region_t;
97db9aa506SPatrick Mooney 
98f2357d97SPatrick Mooney static bool viona_ring_map(viona_vring_t *, bool);
99b22a70abSPatrick Mooney static void viona_ring_unmap(viona_vring_t *);
100b22a70abSPatrick Mooney static kthread_t *viona_create_worker(viona_vring_t *);
101b22a70abSPatrick Mooney 
102db9aa506SPatrick Mooney static vmm_page_t *
vq_page_hold(viona_vring_t * ring,uint64_t gpa,bool writable)103db9aa506SPatrick Mooney vq_page_hold(viona_vring_t *ring, uint64_t gpa, bool writable)
104b22a70abSPatrick Mooney {
105b22a70abSPatrick Mooney 	ASSERT3P(ring->vr_lease, !=, NULL);
106b22a70abSPatrick Mooney 
107db9aa506SPatrick Mooney 	int prot = PROT_READ;
108db9aa506SPatrick Mooney 	if (writable) {
109db9aa506SPatrick Mooney 		prot |= PROT_WRITE;
110db9aa506SPatrick Mooney 	}
111db9aa506SPatrick Mooney 
112db9aa506SPatrick Mooney 	return (vmm_drv_page_hold(ring->vr_lease, gpa, prot));
113db9aa506SPatrick Mooney }
114db9aa506SPatrick Mooney 
115db9aa506SPatrick Mooney /*
116db9aa506SPatrick Mooney  * Establish a hold on the page(s) which back the region of guest memory covered
117db9aa506SPatrick Mooney  * by [gpa, gpa + len).  The host-kernel-virtual pointers to those pages are
118db9aa506SPatrick Mooney  * stored in the iovec array supplied in `region`, along with the chain of
119db9aa506SPatrick Mooney  * vmm_page_t entries representing the held pages.  Since guest memory
120db9aa506SPatrick Mooney  * carries no guarantees of being physically contiguous (on the host), it is
121db9aa506SPatrick Mooney  * assumed that an iovec entry will be required for each PAGESIZE section
122db9aa506SPatrick Mooney  * covered by the specified `gpa` and `len` range.  For each iovec entry
123db9aa506SPatrick Mooney  * successfully populated by holding a page, `vhr_idx` will be incremented so it
124db9aa506SPatrick Mooney  * references the next available iovec entry (or `vhr_niov`, if the iovec array
125db9aa506SPatrick Mooney  * is full).  The responsibility for releasing the `vmm_page_t` chain (stored in
126db9aa506SPatrick Mooney  * `vhr_head` and `vhr_tail`) resides with the caller, regardless of the result.
127db9aa506SPatrick Mooney  */
128db9aa506SPatrick Mooney static int
vq_region_hold(viona_vring_t * ring,uint64_t gpa,uint32_t len,bool writable,vq_held_region_t * region)129db9aa506SPatrick Mooney vq_region_hold(viona_vring_t *ring, uint64_t gpa, uint32_t len,
130db9aa506SPatrick Mooney     bool writable, vq_held_region_t *region)
131db9aa506SPatrick Mooney {
132db9aa506SPatrick Mooney 	const uint32_t front_offset = gpa & PAGEOFFSET;
133db9aa506SPatrick Mooney 	const uint32_t front_len = MIN(len, PAGESIZE - front_offset);
134db9aa506SPatrick Mooney 	uint_t pages = 1;
135db9aa506SPatrick Mooney 	vmm_page_t *vmp;
136db9aa506SPatrick Mooney 	caddr_t buf;
137db9aa506SPatrick Mooney 
138db9aa506SPatrick Mooney 	ASSERT3U(region->vhr_idx, <, region->vhr_niov);
139db9aa506SPatrick Mooney 
140db9aa506SPatrick Mooney 	if (front_len < len) {
141db9aa506SPatrick Mooney 		pages += P2ROUNDUP((uint64_t)(len - front_len),
142db9aa506SPatrick Mooney 		    PAGESIZE) / PAGESIZE;
143db9aa506SPatrick Mooney 	}
144db9aa506SPatrick Mooney 	if (pages > (region->vhr_niov - region->vhr_idx)) {
145db9aa506SPatrick Mooney 		return (E2BIG);
146db9aa506SPatrick Mooney 	}
147db9aa506SPatrick Mooney 
148db9aa506SPatrick Mooney 	vmp = vq_page_hold(ring, gpa & PAGEMASK, writable);
149db9aa506SPatrick Mooney 	if (vmp == NULL) {
150db9aa506SPatrick Mooney 		return (EFAULT);
151db9aa506SPatrick Mooney 	}
152db9aa506SPatrick Mooney 	buf = (caddr_t)vmm_drv_page_readable(vmp);
153db9aa506SPatrick Mooney 
154db9aa506SPatrick Mooney 	region->vhr_iov[region->vhr_idx].iov_base = buf + front_offset;
155db9aa506SPatrick Mooney 	region->vhr_iov[region->vhr_idx].iov_len = front_len;
156db9aa506SPatrick Mooney 	region->vhr_idx++;
157db9aa506SPatrick Mooney 	gpa += front_len;
158db9aa506SPatrick Mooney 	len -= front_len;
159db9aa506SPatrick Mooney 	if (region->vhr_head == NULL) {
160db9aa506SPatrick Mooney 		region->vhr_head = vmp;
161db9aa506SPatrick Mooney 		region->vhr_tail = vmp;
162db9aa506SPatrick Mooney 	} else {
163db9aa506SPatrick Mooney 		vmm_drv_page_chain(region->vhr_tail, vmp);
164db9aa506SPatrick Mooney 		region->vhr_tail = vmp;
165db9aa506SPatrick Mooney 	}
166db9aa506SPatrick Mooney 
167db9aa506SPatrick Mooney 	for (uint_t i = 1; i < pages; i++) {
168db9aa506SPatrick Mooney 		ASSERT3U(gpa & PAGEOFFSET, ==, 0);
169db9aa506SPatrick Mooney 
170db9aa506SPatrick Mooney 		vmp = vq_page_hold(ring, gpa, writable);
171db9aa506SPatrick Mooney 		if (vmp == NULL) {
172db9aa506SPatrick Mooney 			return (EFAULT);
173db9aa506SPatrick Mooney 		}
174db9aa506SPatrick Mooney 		buf = (caddr_t)vmm_drv_page_readable(vmp);
175db9aa506SPatrick Mooney 
176db9aa506SPatrick Mooney 		const uint32_t chunk_len = MIN(len, PAGESIZE);
177db9aa506SPatrick Mooney 		region->vhr_iov[region->vhr_idx].iov_base = buf;
178db9aa506SPatrick Mooney 		region->vhr_iov[region->vhr_idx].iov_len = chunk_len;
179db9aa506SPatrick Mooney 		region->vhr_idx++;
180db9aa506SPatrick Mooney 		gpa += chunk_len;
181db9aa506SPatrick Mooney 		len -= chunk_len;
182db9aa506SPatrick Mooney 		vmm_drv_page_chain(region->vhr_tail, vmp);
183db9aa506SPatrick Mooney 		region->vhr_tail = vmp;
184db9aa506SPatrick Mooney 	}
185db9aa506SPatrick Mooney 
186db9aa506SPatrick Mooney 	return (0);
187b22a70abSPatrick Mooney }
188b22a70abSPatrick Mooney 
189b22a70abSPatrick Mooney static boolean_t
viona_ring_lease_expire_cb(void * arg)190b22a70abSPatrick Mooney viona_ring_lease_expire_cb(void *arg)
191b22a70abSPatrick Mooney {
192b22a70abSPatrick Mooney 	viona_vring_t *ring = arg;
193b22a70abSPatrick Mooney 
1946703a0e8SPatrick Mooney 	mutex_enter(&ring->vr_lock);
195b22a70abSPatrick Mooney 	cv_broadcast(&ring->vr_cv);
1966703a0e8SPatrick Mooney 	mutex_exit(&ring->vr_lock);
197b22a70abSPatrick Mooney 
198b22a70abSPatrick Mooney 	/* The lease will be broken asynchronously. */
199b22a70abSPatrick Mooney 	return (B_FALSE);
200b22a70abSPatrick Mooney }
201b22a70abSPatrick Mooney 
202b22a70abSPatrick Mooney static void
viona_ring_lease_drop(viona_vring_t * ring)203b22a70abSPatrick Mooney viona_ring_lease_drop(viona_vring_t *ring)
204b22a70abSPatrick Mooney {
205b22a70abSPatrick Mooney 	ASSERT(MUTEX_HELD(&ring->vr_lock));
206b22a70abSPatrick Mooney 
207b22a70abSPatrick Mooney 	if (ring->vr_lease != NULL) {
208b22a70abSPatrick Mooney 		vmm_hold_t *hold = ring->vr_link->l_vm_hold;
209b22a70abSPatrick Mooney 
210b22a70abSPatrick Mooney 		ASSERT(hold != NULL);
211b22a70abSPatrick Mooney 
212b22a70abSPatrick Mooney 		/*
213b22a70abSPatrick Mooney 		 * Without an active lease, the ring mappings cannot be
214b22a70abSPatrick Mooney 		 * considered valid.
215b22a70abSPatrick Mooney 		 */
216b22a70abSPatrick Mooney 		viona_ring_unmap(ring);
217b22a70abSPatrick Mooney 
218b22a70abSPatrick Mooney 		vmm_drv_lease_break(hold, ring->vr_lease);
219b22a70abSPatrick Mooney 		ring->vr_lease = NULL;
220b22a70abSPatrick Mooney 	}
221b22a70abSPatrick Mooney }
222b22a70abSPatrick Mooney 
223b22a70abSPatrick Mooney boolean_t
viona_ring_lease_renew(viona_vring_t * ring)224b22a70abSPatrick Mooney viona_ring_lease_renew(viona_vring_t *ring)
225b22a70abSPatrick Mooney {
226b22a70abSPatrick Mooney 	vmm_hold_t *hold = ring->vr_link->l_vm_hold;
227b22a70abSPatrick Mooney 
228b22a70abSPatrick Mooney 	ASSERT(hold != NULL);
229b22a70abSPatrick Mooney 	ASSERT(MUTEX_HELD(&ring->vr_lock));
230b22a70abSPatrick Mooney 
231b22a70abSPatrick Mooney 	viona_ring_lease_drop(ring);
232b22a70abSPatrick Mooney 
233b22a70abSPatrick Mooney 	/*
234b22a70abSPatrick Mooney 	 * Lease renewal will fail if the VM has requested that all holds be
235b22a70abSPatrick Mooney 	 * cleaned up.
236b22a70abSPatrick Mooney 	 */
237b22a70abSPatrick Mooney 	ring->vr_lease = vmm_drv_lease_sign(hold, viona_ring_lease_expire_cb,
238b22a70abSPatrick Mooney 	    ring);
239b22a70abSPatrick Mooney 	if (ring->vr_lease != NULL) {
240b22a70abSPatrick Mooney 		/* A ring undergoing renewal will need valid guest mappings */
241b22a70abSPatrick Mooney 		if (ring->vr_pa != 0 && ring->vr_size != 0) {
242b22a70abSPatrick Mooney 			/*
243b22a70abSPatrick Mooney 			 * If new mappings cannot be established, consider the
244b22a70abSPatrick Mooney 			 * lease renewal a failure.
245b22a70abSPatrick Mooney 			 */
246f2357d97SPatrick Mooney 			if (!viona_ring_map(ring, ring->vr_state == VRS_INIT)) {
247b22a70abSPatrick Mooney 				viona_ring_lease_drop(ring);
248b22a70abSPatrick Mooney 				return (B_FALSE);
249b22a70abSPatrick Mooney 			}
250b22a70abSPatrick Mooney 		}
251b22a70abSPatrick Mooney 	}
252b22a70abSPatrick Mooney 	return (ring->vr_lease != NULL);
253b22a70abSPatrick Mooney }
254b22a70abSPatrick Mooney 
255b22a70abSPatrick Mooney void
viona_ring_alloc(viona_link_t * link,viona_vring_t * ring)256b22a70abSPatrick Mooney viona_ring_alloc(viona_link_t *link, viona_vring_t *ring)
257b22a70abSPatrick Mooney {
258b22a70abSPatrick Mooney 	ring->vr_link = link;
259b22a70abSPatrick Mooney 	mutex_init(&ring->vr_lock, NULL, MUTEX_DRIVER, NULL);
260b22a70abSPatrick Mooney 	cv_init(&ring->vr_cv, NULL, CV_DRIVER, NULL);
261b22a70abSPatrick Mooney 	mutex_init(&ring->vr_a_mutex, NULL, MUTEX_DRIVER, NULL);
262b22a70abSPatrick Mooney 	mutex_init(&ring->vr_u_mutex, NULL, MUTEX_DRIVER, NULL);
263b22a70abSPatrick Mooney }
264b22a70abSPatrick Mooney 
265b22a70abSPatrick Mooney static void
viona_ring_misc_free(viona_vring_t * ring)266b22a70abSPatrick Mooney viona_ring_misc_free(viona_vring_t *ring)
267b22a70abSPatrick Mooney {
268b22a70abSPatrick Mooney 	const uint_t qsz = ring->vr_size;
269b22a70abSPatrick Mooney 
270b22a70abSPatrick Mooney 	viona_tx_ring_free(ring, qsz);
271b22a70abSPatrick Mooney }
272b22a70abSPatrick Mooney 
273b22a70abSPatrick Mooney void
viona_ring_free(viona_vring_t * ring)274b22a70abSPatrick Mooney viona_ring_free(viona_vring_t *ring)
275b22a70abSPatrick Mooney {
276b22a70abSPatrick Mooney 	mutex_destroy(&ring->vr_lock);
277b22a70abSPatrick Mooney 	cv_destroy(&ring->vr_cv);
278b22a70abSPatrick Mooney 	mutex_destroy(&ring->vr_a_mutex);
279b22a70abSPatrick Mooney 	mutex_destroy(&ring->vr_u_mutex);
280b22a70abSPatrick Mooney 	ring->vr_link = NULL;
281b22a70abSPatrick Mooney }
282b22a70abSPatrick Mooney 
283b22a70abSPatrick Mooney int
viona_ring_init(viona_link_t * link,uint16_t idx,const struct viona_ring_params * params)284a26f9c14SPatrick Mooney viona_ring_init(viona_link_t *link, uint16_t idx,
285a26f9c14SPatrick Mooney     const struct viona_ring_params *params)
286b22a70abSPatrick Mooney {
287b22a70abSPatrick Mooney 	viona_vring_t *ring;
288b22a70abSPatrick Mooney 	kthread_t *t;
289b22a70abSPatrick Mooney 	int err = 0;
290a26f9c14SPatrick Mooney 	const uint16_t qsz = params->vrp_size;
291a26f9c14SPatrick Mooney 	const uint64_t pa = params->vrp_pa;
292b22a70abSPatrick Mooney 
293b22a70abSPatrick Mooney 	if (idx >= VIONA_VQ_MAX) {
294b22a70abSPatrick Mooney 		return (EINVAL);
295b22a70abSPatrick Mooney 	}
296a26f9c14SPatrick Mooney 
297b22a70abSPatrick Mooney 	if (qsz == 0 || qsz > VRING_MAX_LEN || (1 << (ffs(qsz) - 1)) != qsz) {
298b22a70abSPatrick Mooney 		return (EINVAL);
299b22a70abSPatrick Mooney 	}
300427f9b9aSPatrick Mooney 	if ((pa & (LEGACY_VQ_ALIGN - 1)) != 0) {
301427f9b9aSPatrick Mooney 		return (EINVAL);
302427f9b9aSPatrick Mooney 	}
303b22a70abSPatrick Mooney 
304b22a70abSPatrick Mooney 	ring = &link->l_vrings[idx];
305b22a70abSPatrick Mooney 	mutex_enter(&ring->vr_lock);
306b22a70abSPatrick Mooney 	if (ring->vr_state != VRS_RESET) {
307b22a70abSPatrick Mooney 		mutex_exit(&ring->vr_lock);
308b22a70abSPatrick Mooney 		return (EBUSY);
309b22a70abSPatrick Mooney 	}
310b22a70abSPatrick Mooney 	VERIFY(ring->vr_state_flags == 0);
311b22a70abSPatrick Mooney 
312b22a70abSPatrick Mooney 	ring->vr_lease = NULL;
313b22a70abSPatrick Mooney 	if (!viona_ring_lease_renew(ring)) {
314b22a70abSPatrick Mooney 		err = EBUSY;
315b22a70abSPatrick Mooney 		goto fail;
316b22a70abSPatrick Mooney 	}
317b22a70abSPatrick Mooney 
318b22a70abSPatrick Mooney 	ring->vr_size = qsz;
319b22a70abSPatrick Mooney 	ring->vr_mask = (ring->vr_size - 1);
320b22a70abSPatrick Mooney 	ring->vr_pa = pa;
321f2357d97SPatrick Mooney 	if (!viona_ring_map(ring, true)) {
322b22a70abSPatrick Mooney 		err = EINVAL;
323b22a70abSPatrick Mooney 		goto fail;
324b22a70abSPatrick Mooney 	}
325b22a70abSPatrick Mooney 
326b22a70abSPatrick Mooney 	/* Initialize queue indexes */
327a26f9c14SPatrick Mooney 	ring->vr_cur_aidx = params->vrp_avail_idx;
328a26f9c14SPatrick Mooney 	ring->vr_cur_uidx = params->vrp_used_idx;
329b22a70abSPatrick Mooney 
330b22a70abSPatrick Mooney 	if (idx == VIONA_VQ_TX) {
331b22a70abSPatrick Mooney 		viona_tx_ring_alloc(ring, qsz);
332b22a70abSPatrick Mooney 	}
333b22a70abSPatrick Mooney 
334b22a70abSPatrick Mooney 	/* Zero out MSI-X configuration */
335b22a70abSPatrick Mooney 	ring->vr_msi_addr = 0;
336b22a70abSPatrick Mooney 	ring->vr_msi_msg = 0;
337b22a70abSPatrick Mooney 
338b22a70abSPatrick Mooney 	/* Clear the stats */
339b22a70abSPatrick Mooney 	bzero(&ring->vr_stats, sizeof (ring->vr_stats));
340b22a70abSPatrick Mooney 
341b22a70abSPatrick Mooney 	t = viona_create_worker(ring);
342b22a70abSPatrick Mooney 	if (t == NULL) {
343b22a70abSPatrick Mooney 		err = ENOMEM;
344b22a70abSPatrick Mooney 		goto fail;
345b22a70abSPatrick Mooney 	}
346b22a70abSPatrick Mooney 	ring->vr_worker_thread = t;
347b22a70abSPatrick Mooney 	ring->vr_state = VRS_SETUP;
348b22a70abSPatrick Mooney 	cv_broadcast(&ring->vr_cv);
349b22a70abSPatrick Mooney 	mutex_exit(&ring->vr_lock);
350b22a70abSPatrick Mooney 	return (0);
351b22a70abSPatrick Mooney 
352b22a70abSPatrick Mooney fail:
353b22a70abSPatrick Mooney 	viona_ring_lease_drop(ring);
354b22a70abSPatrick Mooney 	viona_ring_misc_free(ring);
355b22a70abSPatrick Mooney 	ring->vr_size = 0;
356b22a70abSPatrick Mooney 	ring->vr_mask = 0;
357427f9b9aSPatrick Mooney 	ring->vr_pa = 0;
358a26f9c14SPatrick Mooney 	ring->vr_cur_aidx = 0;
359a26f9c14SPatrick Mooney 	ring->vr_cur_uidx = 0;
360b22a70abSPatrick Mooney 	mutex_exit(&ring->vr_lock);
361b22a70abSPatrick Mooney 	return (err);
362b22a70abSPatrick Mooney }
363b22a70abSPatrick Mooney 
364a26f9c14SPatrick Mooney int
viona_ring_get_state(viona_link_t * link,uint16_t idx,struct viona_ring_params * params)365a26f9c14SPatrick Mooney viona_ring_get_state(viona_link_t *link, uint16_t idx,
366a26f9c14SPatrick Mooney     struct viona_ring_params *params)
367a26f9c14SPatrick Mooney {
368a26f9c14SPatrick Mooney 	viona_vring_t *ring;
369a26f9c14SPatrick Mooney 
370a26f9c14SPatrick Mooney 	if (idx >= VIONA_VQ_MAX) {
371a26f9c14SPatrick Mooney 		return (EINVAL);
372a26f9c14SPatrick Mooney 	}
373a26f9c14SPatrick Mooney 
374a26f9c14SPatrick Mooney 	ring = &link->l_vrings[idx];
375a26f9c14SPatrick Mooney 	mutex_enter(&ring->vr_lock);
376a26f9c14SPatrick Mooney 
377a26f9c14SPatrick Mooney 	params->vrp_size = ring->vr_size;
378a26f9c14SPatrick Mooney 	params->vrp_pa = ring->vr_pa;
379a26f9c14SPatrick Mooney 
380a26f9c14SPatrick Mooney 	if (ring->vr_state == VRS_RUN) {
381a26f9c14SPatrick Mooney 		/* On a running ring, we must heed the avail/used locks */
382a26f9c14SPatrick Mooney 		mutex_enter(&ring->vr_a_mutex);
383a26f9c14SPatrick Mooney 		params->vrp_avail_idx = ring->vr_cur_aidx;
384a26f9c14SPatrick Mooney 		mutex_exit(&ring->vr_a_mutex);
385a26f9c14SPatrick Mooney 		mutex_enter(&ring->vr_u_mutex);
386a26f9c14SPatrick Mooney 		params->vrp_used_idx = ring->vr_cur_uidx;
387a26f9c14SPatrick Mooney 		mutex_exit(&ring->vr_u_mutex);
388a26f9c14SPatrick Mooney 	} else {
389a26f9c14SPatrick Mooney 		/* Otherwise vr_lock is adequate protection */
390a26f9c14SPatrick Mooney 		params->vrp_avail_idx = ring->vr_cur_aidx;
391a26f9c14SPatrick Mooney 		params->vrp_used_idx = ring->vr_cur_uidx;
392a26f9c14SPatrick Mooney 	}
393a26f9c14SPatrick Mooney 
394a26f9c14SPatrick Mooney 	mutex_exit(&ring->vr_lock);
395a26f9c14SPatrick Mooney 
396a26f9c14SPatrick Mooney 	return (0);
397a26f9c14SPatrick Mooney }
398a26f9c14SPatrick Mooney 
399b22a70abSPatrick Mooney int
viona_ring_reset(viona_vring_t * ring,boolean_t heed_signals)400b22a70abSPatrick Mooney viona_ring_reset(viona_vring_t *ring, boolean_t heed_signals)
401b22a70abSPatrick Mooney {
402b22a70abSPatrick Mooney 	mutex_enter(&ring->vr_lock);
403b22a70abSPatrick Mooney 	if (ring->vr_state == VRS_RESET) {
404b22a70abSPatrick Mooney 		mutex_exit(&ring->vr_lock);
405b22a70abSPatrick Mooney 		return (0);
406b22a70abSPatrick Mooney 	}
407b22a70abSPatrick Mooney 
408b22a70abSPatrick Mooney 	if ((ring->vr_state_flags & VRSF_REQ_STOP) == 0) {
409b22a70abSPatrick Mooney 		ring->vr_state_flags |= VRSF_REQ_STOP;
410b22a70abSPatrick Mooney 		cv_broadcast(&ring->vr_cv);
411b22a70abSPatrick Mooney 	}
412b22a70abSPatrick Mooney 	while (ring->vr_state != VRS_RESET) {
413b22a70abSPatrick Mooney 		if (!heed_signals) {
414b22a70abSPatrick Mooney 			cv_wait(&ring->vr_cv, &ring->vr_lock);
415b22a70abSPatrick Mooney 		} else {
416b22a70abSPatrick Mooney 			int rs;
417b22a70abSPatrick Mooney 
418b22a70abSPatrick Mooney 			rs = cv_wait_sig(&ring->vr_cv, &ring->vr_lock);
419b22a70abSPatrick Mooney 			if (rs <= 0 && ring->vr_state != VRS_RESET) {
420b22a70abSPatrick Mooney 				mutex_exit(&ring->vr_lock);
421b22a70abSPatrick Mooney 				return (EINTR);
422b22a70abSPatrick Mooney 			}
423b22a70abSPatrick Mooney 		}
424b22a70abSPatrick Mooney 	}
425b22a70abSPatrick Mooney 	mutex_exit(&ring->vr_lock);
426b22a70abSPatrick Mooney 	return (0);
427b22a70abSPatrick Mooney }
428b22a70abSPatrick Mooney 
429f2357d97SPatrick Mooney static bool
viona_ring_map(viona_vring_t * ring,bool defer_dirty)430f2357d97SPatrick Mooney viona_ring_map(viona_vring_t *ring, bool defer_dirty)
431b22a70abSPatrick Mooney {
432b22a70abSPatrick Mooney 	const uint16_t qsz = ring->vr_size;
433427f9b9aSPatrick Mooney 	uintptr_t pa = ring->vr_pa;
434b22a70abSPatrick Mooney 
435b22a70abSPatrick Mooney 	ASSERT3U(qsz, !=, 0);
436427f9b9aSPatrick Mooney 	ASSERT3U(qsz, <=, VRING_MAX_LEN);
437427f9b9aSPatrick Mooney 	ASSERT3U(pa, !=, 0);
438427f9b9aSPatrick Mooney 	ASSERT3U(pa & (LEGACY_VQ_ALIGN - 1), ==, 0);
439427f9b9aSPatrick Mooney 	ASSERT3U(LEGACY_VQ_ALIGN, ==, PAGESIZE);
440b22a70abSPatrick Mooney 	ASSERT(MUTEX_HELD(&ring->vr_lock));
441427f9b9aSPatrick Mooney 	ASSERT3P(ring->vr_map_pages, ==, NULL);
442b22a70abSPatrick Mooney 
443427f9b9aSPatrick Mooney 	const uint_t npages = LEGACY_VQ_PAGES(qsz);
444427f9b9aSPatrick Mooney 	ring->vr_map_pages = kmem_zalloc(npages * sizeof (void *), KM_SLEEP);
445b22a70abSPatrick Mooney 
446f2357d97SPatrick Mooney 	int page_flags = 0;
447f2357d97SPatrick Mooney 	if (defer_dirty) {
448f2357d97SPatrick Mooney 		/*
449f2357d97SPatrick Mooney 		 * During initialization, and when entering the paused state,
450f2357d97SPatrick Mooney 		 * the page holds for a virtqueue are established with the
451f2357d97SPatrick Mooney 		 * DEFER_DIRTY flag set.
452f2357d97SPatrick Mooney 		 *
453f2357d97SPatrick Mooney 		 * This prevents those page holds from immediately marking the
454f2357d97SPatrick Mooney 		 * underlying pages as dirty, since the viona emulation is not
455f2357d97SPatrick Mooney 		 * yet performing any accesses.  Once the ring transitions to
456f2357d97SPatrick Mooney 		 * the VRS_RUN state, the held pages will be marked as dirty.
457f2357d97SPatrick Mooney 		 *
458f2357d97SPatrick Mooney 		 * Any ring mappings performed outside those state conditions,
459f2357d97SPatrick Mooney 		 * such as those part of vmm_lease renewal during steady-state
460f2357d97SPatrick Mooney 		 * operation, will map the ring pages normally (as considered
461f2357d97SPatrick Mooney 		 * immediately dirty).
462f2357d97SPatrick Mooney 		 */
463f2357d97SPatrick Mooney 		page_flags |= VMPF_DEFER_DIRTY;
464f2357d97SPatrick Mooney 	}
465db9aa506SPatrick Mooney 
466f2357d97SPatrick Mooney 	vmm_page_t *prev = NULL;
467427f9b9aSPatrick Mooney 	for (uint_t i = 0; i < npages; i++, pa += PAGESIZE) {
468db9aa506SPatrick Mooney 		vmm_page_t *vmp;
469b22a70abSPatrick Mooney 
470f2357d97SPatrick Mooney 		vmp = vmm_drv_page_hold_ext(ring->vr_lease, pa,
471f2357d97SPatrick Mooney 		    PROT_READ | PROT_WRITE, page_flags);
472db9aa506SPatrick Mooney 		if (vmp == NULL) {
473427f9b9aSPatrick Mooney 			viona_ring_unmap(ring);
474f2357d97SPatrick Mooney 			return (false);
475427f9b9aSPatrick Mooney 		}
476db9aa506SPatrick Mooney 
477db9aa506SPatrick Mooney 		/*
478db9aa506SPatrick Mooney 		 * Keep the first page has the head of the chain, appending all
479db9aa506SPatrick Mooney 		 * subsequent pages to the tail.
480db9aa506SPatrick Mooney 		 */
481db9aa506SPatrick Mooney 		if (prev == NULL) {
482db9aa506SPatrick Mooney 			ring->vr_map_hold = vmp;
483db9aa506SPatrick Mooney 		} else {
484db9aa506SPatrick Mooney 			vmm_drv_page_chain(prev, vmp);
485db9aa506SPatrick Mooney 		}
486db9aa506SPatrick Mooney 		prev = vmp;
487db9aa506SPatrick Mooney 		ring->vr_map_pages[i] = vmm_drv_page_writable(vmp);
488b22a70abSPatrick Mooney 	}
489b22a70abSPatrick Mooney 
490f2357d97SPatrick Mooney 	return (true);
491f2357d97SPatrick Mooney }
492f2357d97SPatrick Mooney 
493f2357d97SPatrick Mooney static void
viona_ring_mark_dirty(viona_vring_t * ring)494f2357d97SPatrick Mooney viona_ring_mark_dirty(viona_vring_t *ring)
495f2357d97SPatrick Mooney {
496f2357d97SPatrick Mooney 	ASSERT(MUTEX_HELD(&ring->vr_lock));
497f2357d97SPatrick Mooney 	ASSERT(ring->vr_map_hold != NULL);
498f2357d97SPatrick Mooney 
499f2357d97SPatrick Mooney 	for (vmm_page_t *vp = ring->vr_map_hold; vp != NULL;
500f2357d97SPatrick Mooney 	    vp = vmm_drv_page_next(vp)) {
501f2357d97SPatrick Mooney 		vmm_drv_page_mark_dirty(vp);
502f2357d97SPatrick Mooney 	}
503b22a70abSPatrick Mooney }
504b22a70abSPatrick Mooney 
505b22a70abSPatrick Mooney static void
viona_ring_unmap(viona_vring_t * ring)506b22a70abSPatrick Mooney viona_ring_unmap(viona_vring_t *ring)
507b22a70abSPatrick Mooney {
508b22a70abSPatrick Mooney 	ASSERT(MUTEX_HELD(&ring->vr_lock));
509b22a70abSPatrick Mooney 
510427f9b9aSPatrick Mooney 	void **map = ring->vr_map_pages;
511427f9b9aSPatrick Mooney 	if (map != NULL) {
512427f9b9aSPatrick Mooney 		const uint_t npages = LEGACY_VQ_PAGES(ring->vr_size);
513427f9b9aSPatrick Mooney 		kmem_free(map, npages * sizeof (void *));
514427f9b9aSPatrick Mooney 		ring->vr_map_pages = NULL;
515db9aa506SPatrick Mooney 
516db9aa506SPatrick Mooney 		vmm_drv_page_release_chain(ring->vr_map_hold);
517db9aa506SPatrick Mooney 		ring->vr_map_hold = NULL;
518db9aa506SPatrick Mooney 	} else {
519db9aa506SPatrick Mooney 		ASSERT3P(ring->vr_map_hold, ==, NULL);
520427f9b9aSPatrick Mooney 	}
521b22a70abSPatrick Mooney }
522b22a70abSPatrick Mooney 
523427f9b9aSPatrick Mooney static inline void *
viona_ring_addr(viona_vring_t * ring,uint_t off)524427f9b9aSPatrick Mooney viona_ring_addr(viona_vring_t *ring, uint_t off)
525b22a70abSPatrick Mooney {
526427f9b9aSPatrick Mooney 	ASSERT3P(ring->vr_map_pages, !=, NULL);
527427f9b9aSPatrick Mooney 	ASSERT3U(LEGACY_VQ_SIZE(ring->vr_size), >, off);
528b22a70abSPatrick Mooney 
529427f9b9aSPatrick Mooney 	const uint_t page_num = off / PAGESIZE;
530427f9b9aSPatrick Mooney 	const uint_t page_off = off % PAGESIZE;
531427f9b9aSPatrick Mooney 	return ((caddr_t)ring->vr_map_pages[page_num] + page_off);
532427f9b9aSPatrick Mooney }
533b22a70abSPatrick Mooney 
534427f9b9aSPatrick Mooney void
viona_intr_ring(viona_vring_t * ring,boolean_t skip_flags_check)535427f9b9aSPatrick Mooney viona_intr_ring(viona_vring_t *ring, boolean_t skip_flags_check)
536427f9b9aSPatrick Mooney {
537427f9b9aSPatrick Mooney 	if (!skip_flags_check) {
538427f9b9aSPatrick Mooney 		volatile uint16_t *avail_flags = viona_ring_addr(ring,
539427f9b9aSPatrick Mooney 		    LEGACY_AVAIL_FLAGS_OFF(ring->vr_size));
540427f9b9aSPatrick Mooney 
541427f9b9aSPatrick Mooney 		if ((*avail_flags & VRING_AVAIL_F_NO_INTERRUPT) != 0) {
542427f9b9aSPatrick Mooney 			return;
543427f9b9aSPatrick Mooney 		}
544b22a70abSPatrick Mooney 	}
545b22a70abSPatrick Mooney 
546427f9b9aSPatrick Mooney 	mutex_enter(&ring->vr_lock);
547427f9b9aSPatrick Mooney 	uint64_t addr = ring->vr_msi_addr;
548427f9b9aSPatrick Mooney 	uint64_t msg = ring->vr_msi_msg;
549427f9b9aSPatrick Mooney 	mutex_exit(&ring->vr_lock);
550427f9b9aSPatrick Mooney 	if (addr != 0) {
551427f9b9aSPatrick Mooney 		/* Deliver the interrupt directly, if so configured... */
552427f9b9aSPatrick Mooney 		(void) vmm_drv_msi(ring->vr_lease, addr, msg);
553427f9b9aSPatrick Mooney 	} else {
554427f9b9aSPatrick Mooney 		/* ... otherwise, leave it to userspace */
555427f9b9aSPatrick Mooney 		if (atomic_cas_uint(&ring->vr_intr_enabled, 0, 1) == 0) {
556427f9b9aSPatrick Mooney 			pollwakeup(&ring->vr_link->l_pollhead, POLLRDBAND);
557427f9b9aSPatrick Mooney 		}
558b22a70abSPatrick Mooney 	}
559b22a70abSPatrick Mooney }
560b22a70abSPatrick Mooney 
561a26f9c14SPatrick Mooney static inline bool
vring_stop_req(const viona_vring_t * ring)562a26f9c14SPatrick Mooney vring_stop_req(const viona_vring_t *ring)
563a26f9c14SPatrick Mooney {
564a26f9c14SPatrick Mooney 	return ((ring->vr_state_flags & VRSF_REQ_STOP) != 0);
565a26f9c14SPatrick Mooney }
566a26f9c14SPatrick Mooney 
567a26f9c14SPatrick Mooney static inline bool
vring_pause_req(const viona_vring_t * ring)568a26f9c14SPatrick Mooney vring_pause_req(const viona_vring_t *ring)
569a26f9c14SPatrick Mooney {
570a26f9c14SPatrick Mooney 	return ((ring->vr_state_flags & VRSF_REQ_PAUSE) != 0);
571a26f9c14SPatrick Mooney }
572a26f9c14SPatrick Mooney 
573a26f9c14SPatrick Mooney static inline bool
vring_start_req(const viona_vring_t * ring)574a26f9c14SPatrick Mooney vring_start_req(const viona_vring_t *ring)
575a26f9c14SPatrick Mooney {
576a26f9c14SPatrick Mooney 	return ((ring->vr_state_flags & VRSF_REQ_START) != 0);
577a26f9c14SPatrick Mooney }
578a26f9c14SPatrick Mooney 
579a26f9c14SPatrick Mooney /*
580a26f9c14SPatrick Mooney  * Check if vring worker thread should bail out.  This will heed indications
581a26f9c14SPatrick Mooney  * that the containing process is exiting, as well as requests to stop or pause
582a26f9c14SPatrick Mooney  * the ring.  The `stop_only` parameter controls if pause requests are ignored
583a26f9c14SPatrick Mooney  * (true) or checked (false).
584a26f9c14SPatrick Mooney  *
585a26f9c14SPatrick Mooney  * Caller should hold vr_lock.
586a26f9c14SPatrick Mooney  */
587a26f9c14SPatrick Mooney static bool
vring_need_bail_ext(const viona_vring_t * ring,bool stop_only)588a26f9c14SPatrick Mooney vring_need_bail_ext(const viona_vring_t *ring, bool stop_only)
589a26f9c14SPatrick Mooney {
590a26f9c14SPatrick Mooney 	ASSERT(MUTEX_HELD(&ring->vr_lock));
591a26f9c14SPatrick Mooney 
592a26f9c14SPatrick Mooney 	if (vring_stop_req(ring) ||
593a26f9c14SPatrick Mooney 	    (!stop_only && vring_pause_req(ring))) {
594a26f9c14SPatrick Mooney 		return (true);
595a26f9c14SPatrick Mooney 	}
596a26f9c14SPatrick Mooney 
597a26f9c14SPatrick Mooney 	kthread_t *t = ring->vr_worker_thread;
598a26f9c14SPatrick Mooney 	if (t != NULL) {
599a26f9c14SPatrick Mooney 		proc_t *p = ttoproc(t);
600a26f9c14SPatrick Mooney 
601a26f9c14SPatrick Mooney 		ASSERT(p != NULL);
602a26f9c14SPatrick Mooney 		if ((p->p_flag & SEXITING) != 0) {
603a26f9c14SPatrick Mooney 			return (true);
604a26f9c14SPatrick Mooney 		}
605a26f9c14SPatrick Mooney 	}
606a26f9c14SPatrick Mooney 	return (false);
607a26f9c14SPatrick Mooney }
608a26f9c14SPatrick Mooney 
609a26f9c14SPatrick Mooney bool
vring_need_bail(const viona_vring_t * ring)610a26f9c14SPatrick Mooney vring_need_bail(const viona_vring_t *ring)
611a26f9c14SPatrick Mooney {
612a26f9c14SPatrick Mooney 	return (vring_need_bail_ext(ring, false));
613a26f9c14SPatrick Mooney }
614a26f9c14SPatrick Mooney 
615a26f9c14SPatrick Mooney int
viona_ring_pause(viona_vring_t * ring)616a26f9c14SPatrick Mooney viona_ring_pause(viona_vring_t *ring)
617a26f9c14SPatrick Mooney {
618a26f9c14SPatrick Mooney 	mutex_enter(&ring->vr_lock);
619a26f9c14SPatrick Mooney 	switch (ring->vr_state) {
620a26f9c14SPatrick Mooney 	case VRS_RESET:
621a26f9c14SPatrick Mooney 	case VRS_SETUP:
622a26f9c14SPatrick Mooney 	case VRS_INIT:
623a26f9c14SPatrick Mooney 		/*
624a26f9c14SPatrick Mooney 		 * For rings which have not yet started (even those in the
625a26f9c14SPatrick Mooney 		 * VRS_SETUP and VRS_INIT phases, where there a running worker
626a26f9c14SPatrick Mooney 		 * thread (waiting to be released to do its intended task), it
627a26f9c14SPatrick Mooney 		 * is adequate to simply clear any start request, to keep them
628a26f9c14SPatrick Mooney 		 * from proceeding into the actual work processing function.
629a26f9c14SPatrick Mooney 		 */
630a26f9c14SPatrick Mooney 		ring->vr_state_flags &= ~VRSF_REQ_START;
631a26f9c14SPatrick Mooney 		mutex_exit(&ring->vr_lock);
632a26f9c14SPatrick Mooney 		return (0);
633a26f9c14SPatrick Mooney 
634a26f9c14SPatrick Mooney 	case VRS_STOP:
635a26f9c14SPatrick Mooney 		if ((ring->vr_state_flags & VRSF_REQ_STOP) != 0) {
636a26f9c14SPatrick Mooney 			/* A ring on its way to RESET cannot be paused. */
637a26f9c14SPatrick Mooney 			mutex_exit(&ring->vr_lock);
638a26f9c14SPatrick Mooney 			return (EBUSY);
639a26f9c14SPatrick Mooney 		}
640a26f9c14SPatrick Mooney 		/* FALLTHROUGH */
641a26f9c14SPatrick Mooney 	case VRS_RUN:
642a26f9c14SPatrick Mooney 		ring->vr_state_flags |= VRSF_REQ_PAUSE;
643a26f9c14SPatrick Mooney 		cv_broadcast(&ring->vr_cv);
644a26f9c14SPatrick Mooney 		break;
645a26f9c14SPatrick Mooney 
646a26f9c14SPatrick Mooney 	default:
647a26f9c14SPatrick Mooney 		panic("invalid ring state %d", ring->vr_state);
648a26f9c14SPatrick Mooney 		break;
649a26f9c14SPatrick Mooney 	}
650a26f9c14SPatrick Mooney 
651a26f9c14SPatrick Mooney 	for (;;) {
652a26f9c14SPatrick Mooney 		int res = cv_wait_sig(&ring->vr_cv, &ring->vr_lock);
653a26f9c14SPatrick Mooney 
654a26f9c14SPatrick Mooney 		if (ring->vr_state == VRS_INIT ||
655a26f9c14SPatrick Mooney 		    (ring->vr_state_flags & VRSF_REQ_PAUSE) == 0) {
656a26f9c14SPatrick Mooney 			/* Ring made it to (or through) paused state */
657a26f9c14SPatrick Mooney 			mutex_exit(&ring->vr_lock);
658a26f9c14SPatrick Mooney 			return (0);
659a26f9c14SPatrick Mooney 		}
660a26f9c14SPatrick Mooney 		if (res == 0) {
661a26f9c14SPatrick Mooney 			/* interrupted by signal */
662a26f9c14SPatrick Mooney 			mutex_exit(&ring->vr_lock);
663a26f9c14SPatrick Mooney 			return (EINTR);
664a26f9c14SPatrick Mooney 		}
665a26f9c14SPatrick Mooney 	}
666a26f9c14SPatrick Mooney 	/* NOTREACHED */
667a26f9c14SPatrick Mooney }
668a26f9c14SPatrick Mooney 
669b22a70abSPatrick Mooney static void
viona_worker(void * arg)670b22a70abSPatrick Mooney viona_worker(void *arg)
671b22a70abSPatrick Mooney {
672b22a70abSPatrick Mooney 	viona_vring_t *ring = (viona_vring_t *)arg;
673b22a70abSPatrick Mooney 	viona_link_t *link = ring->vr_link;
674b22a70abSPatrick Mooney 
675b22a70abSPatrick Mooney 	mutex_enter(&ring->vr_lock);
676b22a70abSPatrick Mooney 	VERIFY3U(ring->vr_state, ==, VRS_SETUP);
677b22a70abSPatrick Mooney 
678b22a70abSPatrick Mooney 	/* Bail immediately if ring shutdown or process exit was requested */
679a26f9c14SPatrick Mooney 	if (vring_need_bail_ext(ring, true)) {
680a26f9c14SPatrick Mooney 		goto ring_reset;
681b22a70abSPatrick Mooney 	}
682b22a70abSPatrick Mooney 
683b22a70abSPatrick Mooney 	/* Report worker thread as alive and notify creator */
684a26f9c14SPatrick Mooney ring_init:
685b22a70abSPatrick Mooney 	ring->vr_state = VRS_INIT;
686b22a70abSPatrick Mooney 	cv_broadcast(&ring->vr_cv);
687b22a70abSPatrick Mooney 
688a26f9c14SPatrick Mooney 	while (!vring_start_req(ring)) {
689b22a70abSPatrick Mooney 		/*
690b22a70abSPatrick Mooney 		 * Keeping lease renewals timely while waiting for the ring to
691b22a70abSPatrick Mooney 		 * be started is important for avoiding deadlocks.
692b22a70abSPatrick Mooney 		 */
693b22a70abSPatrick Mooney 		if (vmm_drv_lease_expired(ring->vr_lease)) {
694b22a70abSPatrick Mooney 			if (!viona_ring_lease_renew(ring)) {
695a26f9c14SPatrick Mooney 				goto ring_reset;
696b22a70abSPatrick Mooney 			}
697b22a70abSPatrick Mooney 		}
698b22a70abSPatrick Mooney 
699b22a70abSPatrick Mooney 		(void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock);
700b22a70abSPatrick Mooney 
701a26f9c14SPatrick Mooney 		if (vring_pause_req(ring)) {
702a26f9c14SPatrick Mooney 			/* We are already paused in the INIT state. */
703a26f9c14SPatrick Mooney 			ring->vr_state_flags &= ~VRSF_REQ_PAUSE;
704a26f9c14SPatrick Mooney 		}
705a26f9c14SPatrick Mooney 		if (vring_need_bail_ext(ring, true)) {
706a26f9c14SPatrick Mooney 			goto ring_reset;
707b22a70abSPatrick Mooney 		}
708b22a70abSPatrick Mooney 	}
709b22a70abSPatrick Mooney 
710b22a70abSPatrick Mooney 	ASSERT((ring->vr_state_flags & VRSF_REQ_START) != 0);
711b22a70abSPatrick Mooney 	ring->vr_state = VRS_RUN;
712b22a70abSPatrick Mooney 	ring->vr_state_flags &= ~VRSF_REQ_START;
713f2357d97SPatrick Mooney 	viona_ring_mark_dirty(ring);
714b22a70abSPatrick Mooney 
715b22a70abSPatrick Mooney 	/* Ensure ring lease is valid first */
716b22a70abSPatrick Mooney 	if (vmm_drv_lease_expired(ring->vr_lease)) {
717b22a70abSPatrick Mooney 		if (!viona_ring_lease_renew(ring)) {
718a26f9c14SPatrick Mooney 			goto ring_reset;
719b22a70abSPatrick Mooney 		}
720b22a70abSPatrick Mooney 	}
721b22a70abSPatrick Mooney 
722b22a70abSPatrick Mooney 	/* Process actual work */
723b22a70abSPatrick Mooney 	if (ring == &link->l_vrings[VIONA_VQ_RX]) {
724b22a70abSPatrick Mooney 		viona_worker_rx(ring, link);
725b22a70abSPatrick Mooney 	} else if (ring == &link->l_vrings[VIONA_VQ_TX]) {
726b22a70abSPatrick Mooney 		viona_worker_tx(ring, link);
727b22a70abSPatrick Mooney 	} else {
728b22a70abSPatrick Mooney 		panic("unexpected ring: %p", (void *)ring);
729b22a70abSPatrick Mooney 	}
730b22a70abSPatrick Mooney 
731b22a70abSPatrick Mooney 	VERIFY3U(ring->vr_state, ==, VRS_STOP);
732a26f9c14SPatrick Mooney 	VERIFY3U(ring->vr_xfer_outstanding, ==, 0);
733b22a70abSPatrick Mooney 
734a26f9c14SPatrick Mooney 	/* Respond to a pause request if the ring is not required to stop */
735a26f9c14SPatrick Mooney 	if (vring_pause_req(ring)) {
736a26f9c14SPatrick Mooney 		ring->vr_state_flags &= ~VRSF_REQ_PAUSE;
737a26f9c14SPatrick Mooney 
738f2357d97SPatrick Mooney 		if (vring_need_bail_ext(ring, true)) {
739f2357d97SPatrick Mooney 			goto ring_reset;
740f2357d97SPatrick Mooney 		}
741f2357d97SPatrick Mooney 
742f2357d97SPatrick Mooney 		/*
743f2357d97SPatrick Mooney 		 * To complete pausing of the ring, unmap and re-map the pages
744f2357d97SPatrick Mooney 		 * underpinning the virtqueue.  This is to synchronize their
745f2357d97SPatrick Mooney 		 * dirty state in the backing page tables and restore the
746f2357d97SPatrick Mooney 		 * defer-dirty state on the held pages.
747f2357d97SPatrick Mooney 		 */
748f2357d97SPatrick Mooney 		viona_ring_unmap(ring);
749f2357d97SPatrick Mooney 		if (viona_ring_map(ring, true)) {
750a26f9c14SPatrick Mooney 			goto ring_init;
751a26f9c14SPatrick Mooney 		}
752f2357d97SPatrick Mooney 
753f2357d97SPatrick Mooney 		/*
754f2357d97SPatrick Mooney 		 * If the ring pages failed to be mapped, fallthrough to
755f2357d97SPatrick Mooney 		 * ring-reset like any other failure.
756f2357d97SPatrick Mooney 		 */
757b22a70abSPatrick Mooney 	}
758a26f9c14SPatrick Mooney 
759a26f9c14SPatrick Mooney ring_reset:
760b22a70abSPatrick Mooney 	viona_ring_misc_free(ring);
761b22a70abSPatrick Mooney 
762b22a70abSPatrick Mooney 	viona_ring_lease_drop(ring);
763b22a70abSPatrick Mooney 	ring->vr_cur_aidx = 0;
764427f9b9aSPatrick Mooney 	ring->vr_size = 0;
765427f9b9aSPatrick Mooney 	ring->vr_mask = 0;
766427f9b9aSPatrick Mooney 	ring->vr_pa = 0;
767b22a70abSPatrick Mooney 	ring->vr_state = VRS_RESET;
768b22a70abSPatrick Mooney 	ring->vr_state_flags = 0;
769b22a70abSPatrick Mooney 	ring->vr_worker_thread = NULL;
770b22a70abSPatrick Mooney 	cv_broadcast(&ring->vr_cv);
771b22a70abSPatrick Mooney 	mutex_exit(&ring->vr_lock);
772b22a70abSPatrick Mooney 
773b22a70abSPatrick Mooney 	mutex_enter(&ttoproc(curthread)->p_lock);
774b22a70abSPatrick Mooney 	lwp_exit();
775b22a70abSPatrick Mooney }
776b22a70abSPatrick Mooney 
777b22a70abSPatrick Mooney static kthread_t *
viona_create_worker(viona_vring_t * ring)778b22a70abSPatrick Mooney viona_create_worker(viona_vring_t *ring)
779b22a70abSPatrick Mooney {
780b22a70abSPatrick Mooney 	k_sigset_t hold_set;
781b22a70abSPatrick Mooney 	proc_t *p = curproc;
782b22a70abSPatrick Mooney 	kthread_t *t;
783b22a70abSPatrick Mooney 	klwp_t *lwp;
784b22a70abSPatrick Mooney 
785b22a70abSPatrick Mooney 	ASSERT(MUTEX_HELD(&ring->vr_lock));
786b22a70abSPatrick Mooney 	ASSERT(ring->vr_state == VRS_RESET);
787b22a70abSPatrick Mooney 
788b22a70abSPatrick Mooney 	sigfillset(&hold_set);
789b22a70abSPatrick Mooney 	lwp = lwp_create(viona_worker, (void *)ring, 0, p, TS_STOPPED,
790b22a70abSPatrick Mooney 	    minclsyspri - 1, &hold_set, curthread->t_cid, 0);
791b22a70abSPatrick Mooney 	if (lwp == NULL) {
792b22a70abSPatrick Mooney 		return (NULL);
793b22a70abSPatrick Mooney 	}
794b22a70abSPatrick Mooney 
795b22a70abSPatrick Mooney 	t = lwptot(lwp);
796b22a70abSPatrick Mooney 	mutex_enter(&p->p_lock);
797b22a70abSPatrick Mooney 	t->t_proc_flag = (t->t_proc_flag & ~TP_HOLDLWP) | TP_KTHREAD;
798b22a70abSPatrick Mooney 	lwp_create_done(t);
799b22a70abSPatrick Mooney 	mutex_exit(&p->p_lock);
800b22a70abSPatrick Mooney 
801b22a70abSPatrick Mooney 	return (t);
802b22a70abSPatrick Mooney }
803b22a70abSPatrick Mooney 
804427f9b9aSPatrick Mooney void
vq_read_desc(viona_vring_t * ring,uint16_t idx,struct virtio_desc * descp)805427f9b9aSPatrick Mooney vq_read_desc(viona_vring_t *ring, uint16_t idx, struct virtio_desc *descp)
806427f9b9aSPatrick Mooney {
807427f9b9aSPatrick Mooney 	const uint_t entry_off = idx * sizeof (struct virtio_desc);
808427f9b9aSPatrick Mooney 
809427f9b9aSPatrick Mooney 	ASSERT3U(idx, <, ring->vr_size);
810427f9b9aSPatrick Mooney 
811427f9b9aSPatrick Mooney 	bcopy(viona_ring_addr(ring, entry_off), descp, sizeof (*descp));
812427f9b9aSPatrick Mooney }
813427f9b9aSPatrick Mooney 
814427f9b9aSPatrick Mooney static uint16_t
vq_read_avail(viona_vring_t * ring,uint16_t idx)815427f9b9aSPatrick Mooney vq_read_avail(viona_vring_t *ring, uint16_t idx)
816427f9b9aSPatrick Mooney {
817427f9b9aSPatrick Mooney 	ASSERT3U(idx, <, ring->vr_size);
818427f9b9aSPatrick Mooney 
819427f9b9aSPatrick Mooney 	volatile uint16_t *avail_ent =
820427f9b9aSPatrick Mooney 	    viona_ring_addr(ring, LEGACY_AVAIL_ENT_OFF(ring->vr_size, idx));
821427f9b9aSPatrick Mooney 	return (*avail_ent);
822427f9b9aSPatrick Mooney }
823427f9b9aSPatrick Mooney 
824427f9b9aSPatrick Mooney /*
825427f9b9aSPatrick Mooney  * Given a buffer descriptor `desc`, attempt to map the pages backing that
826427f9b9aSPatrick Mooney  * region of guest physical memory, taking into account that there are no
827427f9b9aSPatrick Mooney  * guarantees about guest-contiguous pages being host-contiguous.
828427f9b9aSPatrick Mooney  */
829427f9b9aSPatrick Mooney static int
vq_map_desc_bufs(viona_vring_t * ring,const struct virtio_desc * desc,vq_held_region_t * region)830427f9b9aSPatrick Mooney vq_map_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
831db9aa506SPatrick Mooney     vq_held_region_t *region)
832427f9b9aSPatrick Mooney {
833db9aa506SPatrick Mooney 	int err;
834427f9b9aSPatrick Mooney 
835427f9b9aSPatrick Mooney 	if (desc->vd_len == 0) {
836427f9b9aSPatrick Mooney 		VIONA_PROBE2(desc_bad_len, viona_vring_t *, ring,
837427f9b9aSPatrick Mooney 		    uint32_t, desc->vd_len);
838427f9b9aSPatrick Mooney 		VIONA_RING_STAT_INCR(ring, desc_bad_len);
839427f9b9aSPatrick Mooney 		return (EINVAL);
840427f9b9aSPatrick Mooney 	}
841427f9b9aSPatrick Mooney 
842db9aa506SPatrick Mooney 	err = vq_region_hold(ring, desc->vd_addr, desc->vd_len,
843db9aa506SPatrick Mooney 	    (desc->vd_flags & VRING_DESC_F_WRITE) != 0, region);
844db9aa506SPatrick Mooney 	switch (err) {
845db9aa506SPatrick Mooney 	case E2BIG:
846427f9b9aSPatrick Mooney 		VIONA_PROBE1(too_many_desc, viona_vring_t *, ring);
847427f9b9aSPatrick Mooney 		VIONA_RING_STAT_INCR(ring, too_many_desc);
848db9aa506SPatrick Mooney 		break;
849db9aa506SPatrick Mooney 	case EFAULT:
850427f9b9aSPatrick Mooney 		VIONA_PROBE_BAD_RING_ADDR(ring, desc->vd_addr);
851427f9b9aSPatrick Mooney 		VIONA_RING_STAT_INCR(ring, bad_ring_addr);
852db9aa506SPatrick Mooney 		break;
853db9aa506SPatrick Mooney 	default:
854db9aa506SPatrick Mooney 		break;
855427f9b9aSPatrick Mooney 	}
856427f9b9aSPatrick Mooney 
857db9aa506SPatrick Mooney 	return (err);
858427f9b9aSPatrick Mooney }
859427f9b9aSPatrick Mooney 
860427f9b9aSPatrick Mooney /*
861427f9b9aSPatrick Mooney  * Walk an indirect buffer descriptor `desc`, attempting to map the pages
862*d4221574SAndy Fiddaman  * backing the regions of guest memory covered by its constituent descriptors.
863427f9b9aSPatrick Mooney  */
864427f9b9aSPatrick Mooney static int
vq_map_indir_desc_bufs(viona_vring_t * ring,const struct virtio_desc * desc,vq_held_region_t * region)865427f9b9aSPatrick Mooney vq_map_indir_desc_bufs(viona_vring_t *ring, const struct virtio_desc *desc,
866db9aa506SPatrick Mooney     vq_held_region_t *region)
867427f9b9aSPatrick Mooney {
868427f9b9aSPatrick Mooney 	const uint16_t indir_count = desc->vd_len / sizeof (struct virtio_desc);
869427f9b9aSPatrick Mooney 
870427f9b9aSPatrick Mooney 	if ((desc->vd_len & 0xf) != 0 || indir_count == 0 ||
871427f9b9aSPatrick Mooney 	    indir_count > ring->vr_size ||
872427f9b9aSPatrick Mooney 	    desc->vd_addr > (desc->vd_addr + desc->vd_len)) {
873427f9b9aSPatrick Mooney 		VIONA_PROBE2(indir_bad_len, viona_vring_t *, ring,
874427f9b9aSPatrick Mooney 		    uint32_t, desc->vd_len);
875427f9b9aSPatrick Mooney 		VIONA_RING_STAT_INCR(ring, indir_bad_len);
876427f9b9aSPatrick Mooney 		return (EINVAL);
877427f9b9aSPatrick Mooney 	}
878427f9b9aSPatrick Mooney 
879427f9b9aSPatrick Mooney 	uint16_t indir_next = 0;
880db9aa506SPatrick Mooney 	const uint8_t *buf = NULL;
881427f9b9aSPatrick Mooney 	uint64_t buf_gpa = UINT64_MAX;
882db9aa506SPatrick Mooney 	vmm_page_t *vmp = NULL;
883db9aa506SPatrick Mooney 	int err = 0;
884427f9b9aSPatrick Mooney 
885427f9b9aSPatrick Mooney 	for (;;) {
886427f9b9aSPatrick Mooney 		uint64_t indir_gpa =
887427f9b9aSPatrick Mooney 		    desc->vd_addr + (indir_next * sizeof (struct virtio_desc));
888427f9b9aSPatrick Mooney 		uint64_t indir_page = indir_gpa & PAGEMASK;
889427f9b9aSPatrick Mooney 		struct virtio_desc vp;
890427f9b9aSPatrick Mooney 
891427f9b9aSPatrick Mooney 		/*
892427f9b9aSPatrick Mooney 		 * Get a mapping for the page that the next indirect descriptor
893427f9b9aSPatrick Mooney 		 * resides in, if has not already been done.
894427f9b9aSPatrick Mooney 		 */
895427f9b9aSPatrick Mooney 		if (indir_page != buf_gpa) {
896db9aa506SPatrick Mooney 			if (vmp != NULL) {
897db9aa506SPatrick Mooney 				vmm_drv_page_release(vmp);
898db9aa506SPatrick Mooney 			}
899db9aa506SPatrick Mooney 			vmp = vq_page_hold(ring, indir_page, false);
900db9aa506SPatrick Mooney 			if (vmp == NULL) {
901db9aa506SPatrick Mooney 				VIONA_PROBE_BAD_RING_ADDR(ring, indir_page);
902427f9b9aSPatrick Mooney 				VIONA_RING_STAT_INCR(ring, bad_ring_addr);
903db9aa506SPatrick Mooney 				err = EFAULT;
904db9aa506SPatrick Mooney 				break;
905427f9b9aSPatrick Mooney 			}
906427f9b9aSPatrick Mooney 			buf_gpa = indir_page;
907db9aa506SPatrick Mooney 			buf = vmm_drv_page_readable(vmp);
908427f9b9aSPatrick Mooney 		}
909427f9b9aSPatrick Mooney 
910427f9b9aSPatrick Mooney 		/*
911427f9b9aSPatrick Mooney 		 * A copy of the indirect descriptor is made here, rather than
912427f9b9aSPatrick Mooney 		 * simply using a reference pointer.  This prevents malicious or
913427f9b9aSPatrick Mooney 		 * erroneous guest writes to the descriptor from fooling the
914427f9b9aSPatrick Mooney 		 * flags/bounds verification through a race.
915427f9b9aSPatrick Mooney 		 */
916427f9b9aSPatrick Mooney 		bcopy(buf + (indir_gpa - indir_page), &vp, sizeof (vp));
917427f9b9aSPatrick Mooney 
918427f9b9aSPatrick Mooney 		if (vp.vd_flags & VRING_DESC_F_INDIRECT) {
919427f9b9aSPatrick Mooney 			VIONA_PROBE1(indir_bad_nest, viona_vring_t *, ring);
920427f9b9aSPatrick Mooney 			VIONA_RING_STAT_INCR(ring, indir_bad_nest);
921db9aa506SPatrick Mooney 			err = EINVAL;
922db9aa506SPatrick Mooney 			break;
923427f9b9aSPatrick Mooney 		} else if (vp.vd_len == 0) {
924427f9b9aSPatrick Mooney 			VIONA_PROBE2(desc_bad_len, viona_vring_t *, ring,
925427f9b9aSPatrick Mooney 			    uint32_t, vp.vd_len);
926427f9b9aSPatrick Mooney 			VIONA_RING_STAT_INCR(ring, desc_bad_len);
927db9aa506SPatrick Mooney 			err = EINVAL;
928db9aa506SPatrick Mooney 			break;
929427f9b9aSPatrick Mooney 		}
930427f9b9aSPatrick Mooney 
931db9aa506SPatrick Mooney 		err = vq_map_desc_bufs(ring, &vp, region);
932427f9b9aSPatrick Mooney 		if (err != 0) {
933db9aa506SPatrick Mooney 			break;
934427f9b9aSPatrick Mooney 		}
935427f9b9aSPatrick Mooney 
936427f9b9aSPatrick Mooney 		/* Successfully reach the end of the indir chain */
937427f9b9aSPatrick Mooney 		if ((vp.vd_flags & VRING_DESC_F_NEXT) == 0) {
938db9aa506SPatrick Mooney 			break;
939427f9b9aSPatrick Mooney 		}
940db9aa506SPatrick Mooney 		if (region->vhr_idx >= region->vhr_niov) {
941427f9b9aSPatrick Mooney 			VIONA_PROBE1(too_many_desc, viona_vring_t *, ring);
942427f9b9aSPatrick Mooney 			VIONA_RING_STAT_INCR(ring, too_many_desc);
943db9aa506SPatrick Mooney 			err = E2BIG;
944db9aa506SPatrick Mooney 			break;
945427f9b9aSPatrick Mooney 		}
946427f9b9aSPatrick Mooney 
947427f9b9aSPatrick Mooney 		indir_next = vp.vd_next;
948427f9b9aSPatrick Mooney 		if (indir_next >= indir_count) {
949427f9b9aSPatrick Mooney 			VIONA_PROBE3(indir_bad_next, viona_vring_t *, ring,
950427f9b9aSPatrick Mooney 			    uint16_t, indir_next, uint16_t, indir_count);
951427f9b9aSPatrick Mooney 			VIONA_RING_STAT_INCR(ring, indir_bad_next);
952db9aa506SPatrick Mooney 			err = EINVAL;
953db9aa506SPatrick Mooney 			break;
954427f9b9aSPatrick Mooney 		}
955427f9b9aSPatrick Mooney 	}
956427f9b9aSPatrick Mooney 
957db9aa506SPatrick Mooney 	if (vmp != NULL) {
958db9aa506SPatrick Mooney 		vmm_drv_page_release(vmp);
959db9aa506SPatrick Mooney 	}
960db9aa506SPatrick Mooney 	return (err);
961427f9b9aSPatrick Mooney }
962427f9b9aSPatrick Mooney 
963b22a70abSPatrick Mooney int
vq_popchain(viona_vring_t * ring,struct iovec * iov,uint_t niov,uint16_t * cookie,vmm_page_t ** chain)964b22a70abSPatrick Mooney vq_popchain(viona_vring_t *ring, struct iovec *iov, uint_t niov,
965db9aa506SPatrick Mooney     uint16_t *cookie, vmm_page_t **chain)
966b22a70abSPatrick Mooney {
967db9aa506SPatrick Mooney 	uint16_t ndesc, idx, head, next;
968b22a70abSPatrick Mooney 	struct virtio_desc vdir;
969db9aa506SPatrick Mooney 	vq_held_region_t region = {
970db9aa506SPatrick Mooney 		.vhr_niov = niov,
971db9aa506SPatrick Mooney 		.vhr_iov = iov,
972db9aa506SPatrick Mooney 	};
973b22a70abSPatrick Mooney 
974b22a70abSPatrick Mooney 	ASSERT(iov != NULL);
975b22a70abSPatrick Mooney 	ASSERT(niov > 0 && niov < INT_MAX);
976db9aa506SPatrick Mooney 	ASSERT(*chain == NULL);
977b22a70abSPatrick Mooney 
978b22a70abSPatrick Mooney 	mutex_enter(&ring->vr_a_mutex);
979b22a70abSPatrick Mooney 	idx = ring->vr_cur_aidx;
980427f9b9aSPatrick Mooney 	ndesc = viona_ring_num_avail(ring);
981b22a70abSPatrick Mooney 
982b22a70abSPatrick Mooney 	if (ndesc == 0) {
983b22a70abSPatrick Mooney 		mutex_exit(&ring->vr_a_mutex);
984b22a70abSPatrick Mooney 		return (0);
985b22a70abSPatrick Mooney 	}
986b22a70abSPatrick Mooney 	if (ndesc > ring->vr_size) {
987b22a70abSPatrick Mooney 		/*
988b22a70abSPatrick Mooney 		 * Despite the fact that the guest has provided an 'avail_idx'
989b22a70abSPatrick Mooney 		 * which indicates that an impossible number of descriptors are
990b22a70abSPatrick Mooney 		 * available, continue on and attempt to process the next one.
991b22a70abSPatrick Mooney 		 *
992b22a70abSPatrick Mooney 		 * The transgression will not escape the probe or stats though.
993b22a70abSPatrick Mooney 		 */
994b22a70abSPatrick Mooney 		VIONA_PROBE2(ndesc_too_high, viona_vring_t *, ring,
995b22a70abSPatrick Mooney 		    uint16_t, ndesc);
996b22a70abSPatrick Mooney 		VIONA_RING_STAT_INCR(ring, ndesc_too_high);
997b22a70abSPatrick Mooney 	}
998b22a70abSPatrick Mooney 
999427f9b9aSPatrick Mooney 	head = vq_read_avail(ring, idx & ring->vr_mask);
1000b22a70abSPatrick Mooney 	next = head;
1001b22a70abSPatrick Mooney 
1002db9aa506SPatrick Mooney 	for (region.vhr_idx = 0; region.vhr_idx < niov; next = vdir.vd_next) {
1003b22a70abSPatrick Mooney 		if (next >= ring->vr_size) {
1004b22a70abSPatrick Mooney 			VIONA_PROBE2(bad_idx, viona_vring_t *, ring,
1005b22a70abSPatrick Mooney 			    uint16_t, next);
1006b22a70abSPatrick Mooney 			VIONA_RING_STAT_INCR(ring, bad_idx);
1007427f9b9aSPatrick Mooney 			break;
1008b22a70abSPatrick Mooney 		}
1009b22a70abSPatrick Mooney 
1010427f9b9aSPatrick Mooney 		vq_read_desc(ring, next, &vdir);
1011b22a70abSPatrick Mooney 		if ((vdir.vd_flags & VRING_DESC_F_INDIRECT) == 0) {
1012db9aa506SPatrick Mooney 			if (vq_map_desc_bufs(ring, &vdir, &region) != 0) {
1013427f9b9aSPatrick Mooney 				break;
1014b22a70abSPatrick Mooney 			}
1015b22a70abSPatrick Mooney 		} else {
1016427f9b9aSPatrick Mooney 			/*
1017427f9b9aSPatrick Mooney 			 * Per the specification (Virtio 1.1 S2.6.5.3.1):
1018427f9b9aSPatrick Mooney 			 *   A driver MUST NOT set both VIRTQ_DESC_F_INDIRECT
1019427f9b9aSPatrick Mooney 			 *   and VIRTQ_DESC_F_NEXT in `flags`.
1020427f9b9aSPatrick Mooney 			 */
1021427f9b9aSPatrick Mooney 			if ((vdir.vd_flags & VRING_DESC_F_NEXT) != 0) {
1022427f9b9aSPatrick Mooney 				VIONA_PROBE3(indir_bad_next,
1023b22a70abSPatrick Mooney 				    viona_vring_t *, ring,
1024427f9b9aSPatrick Mooney 				    uint16_t, next, uint16_t, 0);
1025427f9b9aSPatrick Mooney 				VIONA_RING_STAT_INCR(ring, indir_bad_next);
1026427f9b9aSPatrick Mooney 				break;
1027b22a70abSPatrick Mooney 			}
1028427f9b9aSPatrick Mooney 
1029db9aa506SPatrick Mooney 			if (vq_map_indir_desc_bufs(ring, &vdir, &region) != 0) {
1030427f9b9aSPatrick Mooney 				break;
1031b22a70abSPatrick Mooney 			}
1032b22a70abSPatrick Mooney 		}
1033427f9b9aSPatrick Mooney 
1034b22a70abSPatrick Mooney 		if ((vdir.vd_flags & VRING_DESC_F_NEXT) == 0) {
1035b22a70abSPatrick Mooney 			ring->vr_cur_aidx++;
1036b22a70abSPatrick Mooney 			mutex_exit(&ring->vr_a_mutex);
1037db9aa506SPatrick Mooney 
1038db9aa506SPatrick Mooney 			*cookie = head;
1039db9aa506SPatrick Mooney 			*chain = region.vhr_head;
1040db9aa506SPatrick Mooney 			return (region.vhr_idx);
1041b22a70abSPatrick Mooney 		}
1042b22a70abSPatrick Mooney 	}
1043b22a70abSPatrick Mooney 
1044b22a70abSPatrick Mooney 	mutex_exit(&ring->vr_a_mutex);
1045db9aa506SPatrick Mooney 	if (region.vhr_head != NULL) {
1046db9aa506SPatrick Mooney 		/*
1047db9aa506SPatrick Mooney 		 * If any pages were held prior to encountering an error, we
1048db9aa506SPatrick Mooney 		 * must release them now.
1049db9aa506SPatrick Mooney 		 */
1050db9aa506SPatrick Mooney 		vmm_drv_page_release_chain(region.vhr_head);
1051db9aa506SPatrick Mooney 	}
1052b22a70abSPatrick Mooney 	return (-1);
1053b22a70abSPatrick Mooney }
1054b22a70abSPatrick Mooney 
1055427f9b9aSPatrick Mooney 
1056427f9b9aSPatrick Mooney static void
vq_write_used_ent(viona_vring_t * ring,uint16_t idx,uint16_t cookie,uint32_t len)1057427f9b9aSPatrick Mooney vq_write_used_ent(viona_vring_t *ring, uint16_t idx, uint16_t cookie,
1058427f9b9aSPatrick Mooney     uint32_t len)
1059427f9b9aSPatrick Mooney {
1060427f9b9aSPatrick Mooney 	/*
1061427f9b9aSPatrick Mooney 	 * In a larger ring, entry could be split across pages, so be sure to
1062427f9b9aSPatrick Mooney 	 * account for that when configuring the transfer by looking up the ID
1063427f9b9aSPatrick Mooney 	 * and length addresses separately, rather than an address for a
1064427f9b9aSPatrick Mooney 	 * combined `struct virtio_used`.
1065427f9b9aSPatrick Mooney 	 */
1066427f9b9aSPatrick Mooney 	const uint_t used_id_off = LEGACY_USED_ENT_OFF(ring->vr_size, idx);
1067427f9b9aSPatrick Mooney 	const uint_t used_len_off = used_id_off + sizeof (uint32_t);
1068427f9b9aSPatrick Mooney 	volatile uint32_t *idp = viona_ring_addr(ring, used_id_off);
1069427f9b9aSPatrick Mooney 	volatile uint32_t *lenp = viona_ring_addr(ring, used_len_off);
1070427f9b9aSPatrick Mooney 
1071427f9b9aSPatrick Mooney 	ASSERT(MUTEX_HELD(&ring->vr_u_mutex));
1072427f9b9aSPatrick Mooney 
1073427f9b9aSPatrick Mooney 	*idp = cookie;
1074427f9b9aSPatrick Mooney 	*lenp = len;
1075427f9b9aSPatrick Mooney }
1076427f9b9aSPatrick Mooney 
1077427f9b9aSPatrick Mooney static void
vq_write_used_idx(viona_vring_t * ring,uint16_t idx)1078427f9b9aSPatrick Mooney vq_write_used_idx(viona_vring_t *ring, uint16_t idx)
1079427f9b9aSPatrick Mooney {
1080427f9b9aSPatrick Mooney 	ASSERT(MUTEX_HELD(&ring->vr_u_mutex));
1081427f9b9aSPatrick Mooney 
1082427f9b9aSPatrick Mooney 	volatile uint16_t *used_idx =
1083427f9b9aSPatrick Mooney 	    viona_ring_addr(ring, LEGACY_USED_IDX_OFF(ring->vr_size));
1084427f9b9aSPatrick Mooney 	*used_idx = idx;
1085427f9b9aSPatrick Mooney }
1086427f9b9aSPatrick Mooney 
1087b22a70abSPatrick Mooney void
vq_pushchain(viona_vring_t * ring,uint32_t len,uint16_t cookie)1088b22a70abSPatrick Mooney vq_pushchain(viona_vring_t *ring, uint32_t len, uint16_t cookie)
1089b22a70abSPatrick Mooney {
1090427f9b9aSPatrick Mooney 	uint16_t uidx;
1091b22a70abSPatrick Mooney 
1092b22a70abSPatrick Mooney 	mutex_enter(&ring->vr_u_mutex);
1093b22a70abSPatrick Mooney 
1094427f9b9aSPatrick Mooney 	uidx = ring->vr_cur_uidx;
1095427f9b9aSPatrick Mooney 	vq_write_used_ent(ring, uidx & ring->vr_mask, cookie, len);
1096427f9b9aSPatrick Mooney 	uidx++;
1097b22a70abSPatrick Mooney 	membar_producer();
1098427f9b9aSPatrick Mooney 
1099427f9b9aSPatrick Mooney 	vq_write_used_idx(ring, uidx);
1100427f9b9aSPatrick Mooney 	ring->vr_cur_uidx = uidx;
1101b22a70abSPatrick Mooney 
1102b22a70abSPatrick Mooney 	mutex_exit(&ring->vr_u_mutex);
1103b22a70abSPatrick Mooney }
1104b22a70abSPatrick Mooney 
1105b22a70abSPatrick Mooney void
vq_pushchain_many(viona_vring_t * ring,uint_t num_bufs,used_elem_t * elem)1106b22a70abSPatrick Mooney vq_pushchain_many(viona_vring_t *ring, uint_t num_bufs, used_elem_t *elem)
1107b22a70abSPatrick Mooney {
1108427f9b9aSPatrick Mooney 	uint16_t uidx;
1109b22a70abSPatrick Mooney 
1110b22a70abSPatrick Mooney 	mutex_enter(&ring->vr_u_mutex);
1111b22a70abSPatrick Mooney 
1112427f9b9aSPatrick Mooney 	uidx = ring->vr_cur_uidx;
1113427f9b9aSPatrick Mooney 
1114be899113SPatrick Mooney 	for (uint_t i = 0; i < num_bufs; i++, uidx++) {
1115427f9b9aSPatrick Mooney 		vq_write_used_ent(ring, uidx & ring->vr_mask, elem[i].id,
1116427f9b9aSPatrick Mooney 		    elem[i].len);
1117b22a70abSPatrick Mooney 	}
1118427f9b9aSPatrick Mooney 
1119b22a70abSPatrick Mooney 	membar_producer();
1120427f9b9aSPatrick Mooney 	vq_write_used_idx(ring, uidx);
1121427f9b9aSPatrick Mooney 	ring->vr_cur_uidx = uidx;
1122b22a70abSPatrick Mooney 
1123b22a70abSPatrick Mooney 	mutex_exit(&ring->vr_u_mutex);
1124b22a70abSPatrick Mooney }
1125427f9b9aSPatrick Mooney 
1126427f9b9aSPatrick Mooney /*
1127427f9b9aSPatrick Mooney  * Set USED_NO_NOTIFY on VQ so guest elides doorbell calls for new entries.
1128427f9b9aSPatrick Mooney  */
1129427f9b9aSPatrick Mooney void
viona_ring_disable_notify(viona_vring_t * ring)1130427f9b9aSPatrick Mooney viona_ring_disable_notify(viona_vring_t *ring)
1131427f9b9aSPatrick Mooney {
1132427f9b9aSPatrick Mooney 	volatile uint16_t *used_flags =
1133427f9b9aSPatrick Mooney 	    viona_ring_addr(ring, LEGACY_USED_FLAGS_OFF(ring->vr_size));
1134427f9b9aSPatrick Mooney 
1135427f9b9aSPatrick Mooney 	*used_flags |= VRING_USED_F_NO_NOTIFY;
1136427f9b9aSPatrick Mooney }
1137427f9b9aSPatrick Mooney 
1138427f9b9aSPatrick Mooney /*
1139427f9b9aSPatrick Mooney  * Clear USED_NO_NOTIFY on VQ so guest resumes doorbell calls for new entries.
1140427f9b9aSPatrick Mooney  */
1141427f9b9aSPatrick Mooney void
viona_ring_enable_notify(viona_vring_t * ring)1142427f9b9aSPatrick Mooney viona_ring_enable_notify(viona_vring_t *ring)
1143427f9b9aSPatrick Mooney {
1144427f9b9aSPatrick Mooney 	volatile uint16_t *used_flags =
1145427f9b9aSPatrick Mooney 	    viona_ring_addr(ring, LEGACY_USED_FLAGS_OFF(ring->vr_size));
1146427f9b9aSPatrick Mooney 
1147427f9b9aSPatrick Mooney 	*used_flags &= ~VRING_USED_F_NO_NOTIFY;
1148427f9b9aSPatrick Mooney }
1149427f9b9aSPatrick Mooney 
1150427f9b9aSPatrick Mooney /*
1151427f9b9aSPatrick Mooney  * Return the number of available descriptors in the vring taking care of the
1152427f9b9aSPatrick Mooney  * 16-bit index wraparound.
1153427f9b9aSPatrick Mooney  *
1154427f9b9aSPatrick Mooney  * Note: If the number of apparently available descriptors is larger than the
1155427f9b9aSPatrick Mooney  * ring size (due to guest misbehavior), this check will still report the
1156427f9b9aSPatrick Mooney  * positive count of descriptors.
1157427f9b9aSPatrick Mooney  */
1158427f9b9aSPatrick Mooney uint16_t
viona_ring_num_avail(viona_vring_t * ring)1159427f9b9aSPatrick Mooney viona_ring_num_avail(viona_vring_t *ring)
1160427f9b9aSPatrick Mooney {
1161427f9b9aSPatrick Mooney 	volatile uint16_t *avail_idx =
1162427f9b9aSPatrick Mooney 	    viona_ring_addr(ring, LEGACY_AVAIL_IDX_OFF(ring->vr_size));
1163427f9b9aSPatrick Mooney 
1164427f9b9aSPatrick Mooney 	return (*avail_idx - ring->vr_cur_aidx);
1165427f9b9aSPatrick Mooney }
1166