1b22a70abSPatrick Mooney /*
2b22a70abSPatrick Mooney * Copyright (c) 2013 Chris Torek <torek @ torek net>
3b22a70abSPatrick Mooney * All rights reserved.
4b22a70abSPatrick Mooney *
5b22a70abSPatrick Mooney * Redistribution and use in source and binary forms, with or without
6b22a70abSPatrick Mooney * modification, are permitted provided that the following conditions
7b22a70abSPatrick Mooney * are met:
8b22a70abSPatrick Mooney * 1. Redistributions of source code must retain the above copyright
9b22a70abSPatrick Mooney * notice, this list of conditions and the following disclaimer.
10b22a70abSPatrick Mooney * 2. Redistributions in binary form must reproduce the above copyright
11b22a70abSPatrick Mooney * notice, this list of conditions and the following disclaimer in the
12b22a70abSPatrick Mooney * documentation and/or other materials provided with the distribution.
13b22a70abSPatrick Mooney *
14b22a70abSPatrick Mooney * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15b22a70abSPatrick Mooney * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16b22a70abSPatrick Mooney * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17b22a70abSPatrick Mooney * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18b22a70abSPatrick Mooney * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19b22a70abSPatrick Mooney * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20b22a70abSPatrick Mooney * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21b22a70abSPatrick Mooney * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22b22a70abSPatrick Mooney * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23b22a70abSPatrick Mooney * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24b22a70abSPatrick Mooney * SUCH DAMAGE.
25b22a70abSPatrick Mooney */
26b22a70abSPatrick Mooney /*
27b22a70abSPatrick Mooney * This file and its contents are supplied under the terms of the
28b22a70abSPatrick Mooney * Common Development and Distribution License ("CDDL"), version 1.0.
29b22a70abSPatrick Mooney * You may only use this file in accordance with the terms of version
30b22a70abSPatrick Mooney * 1.0 of the CDDL.
31b22a70abSPatrick Mooney *
32b22a70abSPatrick Mooney * A full copy of the text of the CDDL should have accompanied this
33b22a70abSPatrick Mooney * source. A copy of the CDDL is also available via the Internet at
34b22a70abSPatrick Mooney * http://www.illumos.org/license/CDDL.
35b22a70abSPatrick Mooney *
36b22a70abSPatrick Mooney * Copyright 2015 Pluribus Networks Inc.
37b22a70abSPatrick Mooney * Copyright 2019 Joyent, Inc.
38a26f9c14SPatrick Mooney * Copyright 2022 Oxide Computer Company
39b22a70abSPatrick Mooney */
40b22a70abSPatrick Mooney
41b22a70abSPatrick Mooney
42b22a70abSPatrick Mooney #include <sys/types.h>
43b22a70abSPatrick Mooney #include <sys/smt.h>
44b22a70abSPatrick Mooney #include <sys/strsubr.h>
45b22a70abSPatrick Mooney
46b22a70abSPatrick Mooney #include <sys/pattr.h>
47b22a70abSPatrick Mooney #include <sys/dlpi.h>
48b22a70abSPatrick Mooney #include <inet/ip.h>
49b22a70abSPatrick Mooney #include <inet/ip_impl.h>
50b22a70abSPatrick Mooney
51b22a70abSPatrick Mooney #include "viona_impl.h"
52b22a70abSPatrick Mooney
53b22a70abSPatrick Mooney #define BNXE_NIC_DRIVER "bnxe"
54b22a70abSPatrick Mooney
55*d1d478f3SJorge Schrauwen /*
56*d1d478f3SJorge Schrauwen * Tunable controls tx copy by default on or off
57*d1d478f3SJorge Schrauwen */
58*d1d478f3SJorge Schrauwen boolean_t viona_default_tx_copy = B_TRUE;
59*d1d478f3SJorge Schrauwen
60b22a70abSPatrick Mooney /*
61b22a70abSPatrick Mooney * copy tx mbufs from virtio ring to avoid necessitating a wait for packet
62b22a70abSPatrick Mooney * transmission to free resources.
63b22a70abSPatrick Mooney */
64b22a70abSPatrick Mooney kmutex_t viona_force_copy_lock;
65b22a70abSPatrick Mooney static enum viona_force_copy {
66b22a70abSPatrick Mooney VFC_UNINITALIZED = 0,
67b22a70abSPatrick Mooney VFC_COPY_UNEEDED = 1,
68b22a70abSPatrick Mooney VFC_COPY_REQUIRED = 2,
69b22a70abSPatrick Mooney } viona_force_copy_state = VFC_UNINITALIZED;
70b22a70abSPatrick Mooney
71b22a70abSPatrick Mooney struct viona_desb {
72b22a70abSPatrick Mooney frtn_t d_frtn;
73b22a70abSPatrick Mooney viona_vring_t *d_ring;
74b22a70abSPatrick Mooney uint_t d_ref;
75b22a70abSPatrick Mooney uint32_t d_len;
76b22a70abSPatrick Mooney uint16_t d_cookie;
77b22a70abSPatrick Mooney uchar_t *d_headers;
78db9aa506SPatrick Mooney vmm_page_t *d_pages;
79b22a70abSPatrick Mooney };
80b22a70abSPatrick Mooney
81b22a70abSPatrick Mooney static void viona_tx(viona_link_t *, viona_vring_t *);
82b22a70abSPatrick Mooney static void viona_desb_release(viona_desb_t *);
83b22a70abSPatrick Mooney
84b22a70abSPatrick Mooney
85b22a70abSPatrick Mooney static void
viona_tx_wait_outstanding(viona_vring_t * ring)86b22a70abSPatrick Mooney viona_tx_wait_outstanding(viona_vring_t *ring)
87b22a70abSPatrick Mooney {
88b22a70abSPatrick Mooney ASSERT(MUTEX_HELD(&ring->vr_lock));
89b22a70abSPatrick Mooney
90b22a70abSPatrick Mooney while (ring->vr_xfer_outstanding != 0) {
91b22a70abSPatrick Mooney /*
92b22a70abSPatrick Mooney * Paying heed to signals is counterproductive here. This is a
93b22a70abSPatrick Mooney * very tight loop if pending transfers take an extended amount
94b22a70abSPatrick Mooney * of time to be reclaimed while the host process is exiting.
95b22a70abSPatrick Mooney */
96b22a70abSPatrick Mooney cv_wait(&ring->vr_cv, &ring->vr_lock);
97b22a70abSPatrick Mooney }
98b22a70abSPatrick Mooney }
99b22a70abSPatrick Mooney
100b22a70abSPatrick Mooney /*
101b22a70abSPatrick Mooney * Check if full TX packet copying is needed. This should not be called from
102b22a70abSPatrick Mooney * viona attach()/detach() context.
103b22a70abSPatrick Mooney */
104b22a70abSPatrick Mooney static boolean_t
viona_tx_copy_needed(void)105b22a70abSPatrick Mooney viona_tx_copy_needed(void)
106b22a70abSPatrick Mooney {
107b22a70abSPatrick Mooney boolean_t result;
108b22a70abSPatrick Mooney
109*d1d478f3SJorge Schrauwen if (viona_default_tx_copy) {
110*d1d478f3SJorge Schrauwen return (B_TRUE);
111*d1d478f3SJorge Schrauwen }
112*d1d478f3SJorge Schrauwen
113b22a70abSPatrick Mooney mutex_enter(&viona_force_copy_lock);
114b22a70abSPatrick Mooney if (viona_force_copy_state == VFC_UNINITALIZED) {
115b22a70abSPatrick Mooney major_t bnxe_major;
116b22a70abSPatrick Mooney
117b22a70abSPatrick Mooney /*
118b22a70abSPatrick Mooney * The original code for viona featured an explicit check for
119b22a70abSPatrick Mooney * the bnxe driver which, when found present, necessitated that
120b22a70abSPatrick Mooney * all transmissions be copied into their own mblks instead of
121b22a70abSPatrick Mooney * passing guest memory to the underlying device.
122b22a70abSPatrick Mooney *
123b22a70abSPatrick Mooney * The motivations for this are unclear, but until it can be
124b22a70abSPatrick Mooney * proven unnecessary, the check lives on.
125b22a70abSPatrick Mooney */
126b22a70abSPatrick Mooney viona_force_copy_state = VFC_COPY_UNEEDED;
127b22a70abSPatrick Mooney if ((bnxe_major = ddi_name_to_major(BNXE_NIC_DRIVER))
128b22a70abSPatrick Mooney != DDI_MAJOR_T_NONE) {
129b22a70abSPatrick Mooney if (ddi_hold_installed_driver(bnxe_major) != NULL) {
130b22a70abSPatrick Mooney viona_force_copy_state = VFC_COPY_REQUIRED;
131b22a70abSPatrick Mooney ddi_rele_driver(bnxe_major);
132b22a70abSPatrick Mooney }
133b22a70abSPatrick Mooney }
134b22a70abSPatrick Mooney }
135b22a70abSPatrick Mooney result = (viona_force_copy_state == VFC_COPY_REQUIRED);
136b22a70abSPatrick Mooney mutex_exit(&viona_force_copy_lock);
137b22a70abSPatrick Mooney
138b22a70abSPatrick Mooney return (result);
139b22a70abSPatrick Mooney }
140b22a70abSPatrick Mooney
141b22a70abSPatrick Mooney void
viona_tx_ring_alloc(viona_vring_t * ring,const uint16_t qsz)142b22a70abSPatrick Mooney viona_tx_ring_alloc(viona_vring_t *ring, const uint16_t qsz)
143b22a70abSPatrick Mooney {
144d4221574SAndy Fiddaman /* Allocate desb handles for TX ring if packet copying is disabled */
145b22a70abSPatrick Mooney if (!viona_tx_copy_needed()) {
146b22a70abSPatrick Mooney viona_desb_t *dp;
147b22a70abSPatrick Mooney
148b22a70abSPatrick Mooney dp = kmem_zalloc(sizeof (viona_desb_t) * qsz, KM_SLEEP);
149b22a70abSPatrick Mooney ring->vr_txdesb = dp;
150b22a70abSPatrick Mooney for (uint_t i = 0; i < qsz; i++, dp++) {
151b22a70abSPatrick Mooney dp->d_frtn.free_func = viona_desb_release;
152b22a70abSPatrick Mooney dp->d_frtn.free_arg = (void *)dp;
153b22a70abSPatrick Mooney dp->d_ring = ring;
154b22a70abSPatrick Mooney dp->d_headers = kmem_zalloc(VIONA_MAX_HDRS_LEN,
155b22a70abSPatrick Mooney KM_SLEEP);
156b22a70abSPatrick Mooney }
157b22a70abSPatrick Mooney }
158b22a70abSPatrick Mooney
159b22a70abSPatrick Mooney /* Allocate ring-sized iovec buffers for TX */
160b22a70abSPatrick Mooney ring->vr_txiov = kmem_alloc(sizeof (struct iovec) * qsz, KM_SLEEP);
161b22a70abSPatrick Mooney }
162b22a70abSPatrick Mooney
163b22a70abSPatrick Mooney void
viona_tx_ring_free(viona_vring_t * ring,const uint16_t qsz)164b22a70abSPatrick Mooney viona_tx_ring_free(viona_vring_t *ring, const uint16_t qsz)
165b22a70abSPatrick Mooney {
166b22a70abSPatrick Mooney if (ring->vr_txdesb != NULL) {
167b22a70abSPatrick Mooney viona_desb_t *dp = ring->vr_txdesb;
168b22a70abSPatrick Mooney
169b22a70abSPatrick Mooney for (uint_t i = 0; i < qsz; i++, dp++) {
170b22a70abSPatrick Mooney kmem_free(dp->d_headers, VIONA_MAX_HDRS_LEN);
171b22a70abSPatrick Mooney }
172b22a70abSPatrick Mooney kmem_free(ring->vr_txdesb, sizeof (viona_desb_t) * qsz);
173b22a70abSPatrick Mooney ring->vr_txdesb = NULL;
174b22a70abSPatrick Mooney }
175b22a70abSPatrick Mooney
176b22a70abSPatrick Mooney if (ring->vr_txiov != NULL) {
177b22a70abSPatrick Mooney kmem_free(ring->vr_txiov, sizeof (struct iovec) * qsz);
178b22a70abSPatrick Mooney ring->vr_txiov = NULL;
179b22a70abSPatrick Mooney }
180b22a70abSPatrick Mooney }
181b22a70abSPatrick Mooney
182b22a70abSPatrick Mooney static void
viona_tx_done(viona_vring_t * ring,uint32_t len,uint16_t cookie)183b22a70abSPatrick Mooney viona_tx_done(viona_vring_t *ring, uint32_t len, uint16_t cookie)
184b22a70abSPatrick Mooney {
185b22a70abSPatrick Mooney vq_pushchain(ring, len, cookie);
186b22a70abSPatrick Mooney
187b22a70abSPatrick Mooney membar_enter();
188427f9b9aSPatrick Mooney viona_intr_ring(ring, B_FALSE);
189b22a70abSPatrick Mooney }
190b22a70abSPatrick Mooney
191a26f9c14SPatrick Mooney #define TX_BURST_THRESH 32
192a26f9c14SPatrick Mooney
193b22a70abSPatrick Mooney void
viona_worker_tx(viona_vring_t * ring,viona_link_t * link)194b22a70abSPatrick Mooney viona_worker_tx(viona_vring_t *ring, viona_link_t *link)
195b22a70abSPatrick Mooney {
196b22a70abSPatrick Mooney (void) thread_vsetname(curthread, "viona_tx_%p", ring);
197b22a70abSPatrick Mooney
198b22a70abSPatrick Mooney ASSERT(MUTEX_HELD(&ring->vr_lock));
199b22a70abSPatrick Mooney ASSERT3U(ring->vr_state, ==, VRS_RUN);
200b22a70abSPatrick Mooney
201b22a70abSPatrick Mooney mutex_exit(&ring->vr_lock);
202b22a70abSPatrick Mooney
203b22a70abSPatrick Mooney for (;;) {
204a26f9c14SPatrick Mooney uint_t ntx = 0, burst = 0;
205b22a70abSPatrick Mooney
206427f9b9aSPatrick Mooney viona_ring_disable_notify(ring);
207a26f9c14SPatrick Mooney while (viona_ring_num_avail(ring) != 0) {
208b22a70abSPatrick Mooney viona_tx(link, ring);
209a26f9c14SPatrick Mooney ntx++;
210a26f9c14SPatrick Mooney burst++;
211b22a70abSPatrick Mooney
212b22a70abSPatrick Mooney /*
213b22a70abSPatrick Mooney * It is advantageous for throughput to keep this
214b22a70abSPatrick Mooney * transmission loop tight, but periodic breaks to
215b22a70abSPatrick Mooney * check for other events are of value too.
216b22a70abSPatrick Mooney */
217a26f9c14SPatrick Mooney if (burst >= TX_BURST_THRESH) {
218a26f9c14SPatrick Mooney mutex_enter(&ring->vr_lock);
219a26f9c14SPatrick Mooney const bool need_bail = vring_need_bail(ring);
220a26f9c14SPatrick Mooney mutex_exit(&ring->vr_lock);
221a26f9c14SPatrick Mooney
222a26f9c14SPatrick Mooney if (need_bail) {
223a26f9c14SPatrick Mooney break;
224a26f9c14SPatrick Mooney }
225a26f9c14SPatrick Mooney burst = 0;
226a26f9c14SPatrick Mooney }
227b22a70abSPatrick Mooney }
228b22a70abSPatrick Mooney
229b22a70abSPatrick Mooney VIONA_PROBE2(tx, viona_link_t *, link, uint_t, ntx);
230b22a70abSPatrick Mooney
231b22a70abSPatrick Mooney /*
232b22a70abSPatrick Mooney * Check for available descriptors on the ring once more in
233b22a70abSPatrick Mooney * case a late addition raced with the NO_NOTIFY flag toggle.
234b22a70abSPatrick Mooney *
235427f9b9aSPatrick Mooney * The barrier ensures that visibility of the no-notify
236427f9b9aSPatrick Mooney * store does not cross the viona_ring_num_avail() check below.
237b22a70abSPatrick Mooney */
238a26f9c14SPatrick Mooney viona_ring_enable_notify(ring);
239b22a70abSPatrick Mooney membar_enter();
240b22a70abSPatrick Mooney
241a26f9c14SPatrick Mooney if (viona_ring_num_avail(ring) == 0 &&
242a26f9c14SPatrick Mooney (link->l_features & VIRTIO_F_RING_NOTIFY_ON_EMPTY) != 0) {
243427f9b9aSPatrick Mooney /*
244427f9b9aSPatrick Mooney * The NOTIFY_ON_EMPTY interrupt should not pay heed to
245427f9b9aSPatrick Mooney * the presence of AVAIL_NO_INTERRUPT.
246427f9b9aSPatrick Mooney */
247427f9b9aSPatrick Mooney viona_intr_ring(ring, B_TRUE);
248b22a70abSPatrick Mooney }
249b22a70abSPatrick Mooney
250b22a70abSPatrick Mooney mutex_enter(&ring->vr_lock);
251a26f9c14SPatrick Mooney for (;;) {
252a26f9c14SPatrick Mooney if (vring_need_bail(ring)) {
253a26f9c14SPatrick Mooney ring->vr_state = VRS_STOP;
254a26f9c14SPatrick Mooney viona_tx_wait_outstanding(ring);
255a26f9c14SPatrick Mooney return;
256a26f9c14SPatrick Mooney }
257b22a70abSPatrick Mooney
258a26f9c14SPatrick Mooney if (vmm_drv_lease_expired(ring->vr_lease)) {
259a26f9c14SPatrick Mooney ring->vr_state_flags |= VRSF_RENEW;
260a26f9c14SPatrick Mooney /*
261a26f9c14SPatrick Mooney * When renewing the lease for the ring, no TX
262a26f9c14SPatrick Mooney * frames may be outstanding, as they contain
263a26f9c14SPatrick Mooney * references to guest memory.
264a26f9c14SPatrick Mooney */
265a26f9c14SPatrick Mooney viona_tx_wait_outstanding(ring);
266a26f9c14SPatrick Mooney
267a26f9c14SPatrick Mooney const boolean_t renewed =
268a26f9c14SPatrick Mooney viona_ring_lease_renew(ring);
269a26f9c14SPatrick Mooney ring->vr_state_flags &= ~VRSF_RENEW;
270a26f9c14SPatrick Mooney
271a26f9c14SPatrick Mooney if (!renewed) {
272a26f9c14SPatrick Mooney /* stop ring on failed renewal */
273a26f9c14SPatrick Mooney ring->vr_state = VRS_STOP;
274a26f9c14SPatrick Mooney return;
275a26f9c14SPatrick Mooney }
276a26f9c14SPatrick Mooney }
277b22a70abSPatrick Mooney
278a26f9c14SPatrick Mooney if (viona_ring_num_avail(ring) != 0) {
279b22a70abSPatrick Mooney break;
280b22a70abSPatrick Mooney }
281a26f9c14SPatrick Mooney
282a26f9c14SPatrick Mooney /* Wait for further activity on the ring */
283a26f9c14SPatrick Mooney (void) cv_wait_sig(&ring->vr_cv, &ring->vr_lock);
284b22a70abSPatrick Mooney }
285b22a70abSPatrick Mooney mutex_exit(&ring->vr_lock);
286b22a70abSPatrick Mooney }
287a26f9c14SPatrick Mooney /* UNREACHABLE */
288b22a70abSPatrick Mooney }
289b22a70abSPatrick Mooney
290b22a70abSPatrick Mooney static void
viona_desb_release(viona_desb_t * dp)291b22a70abSPatrick Mooney viona_desb_release(viona_desb_t *dp)
292b22a70abSPatrick Mooney {
293b22a70abSPatrick Mooney viona_vring_t *ring = dp->d_ring;
294b22a70abSPatrick Mooney uint_t ref;
295b22a70abSPatrick Mooney uint32_t len;
296b22a70abSPatrick Mooney uint16_t cookie;
297b22a70abSPatrick Mooney
298b22a70abSPatrick Mooney ref = atomic_dec_uint_nv(&dp->d_ref);
299b22a70abSPatrick Mooney if (ref > 1) {
300b22a70abSPatrick Mooney return;
301b22a70abSPatrick Mooney }
302b22a70abSPatrick Mooney
303b22a70abSPatrick Mooney /*
304b22a70abSPatrick Mooney * The desb corresponding to this index must be ready for reuse before
305b22a70abSPatrick Mooney * the descriptor is returned to the guest via the 'used' ring.
306b22a70abSPatrick Mooney */
307b22a70abSPatrick Mooney len = dp->d_len;
308b22a70abSPatrick Mooney cookie = dp->d_cookie;
309b22a70abSPatrick Mooney dp->d_len = 0;
310b22a70abSPatrick Mooney dp->d_cookie = 0;
311db9aa506SPatrick Mooney vmm_drv_page_release_chain(dp->d_pages);
312db9aa506SPatrick Mooney dp->d_pages = NULL;
313db9aa506SPatrick Mooney
314db9aa506SPatrick Mooney /*
315db9aa506SPatrick Mooney * Ensure all other changes to the desb are visible prior to zeroing its
316db9aa506SPatrick Mooney * refcount, signifying its readiness for reuse.
317db9aa506SPatrick Mooney */
318db9aa506SPatrick Mooney membar_exit();
319b22a70abSPatrick Mooney dp->d_ref = 0;
320b22a70abSPatrick Mooney
321b22a70abSPatrick Mooney viona_tx_done(ring, len, cookie);
322b22a70abSPatrick Mooney
323b22a70abSPatrick Mooney mutex_enter(&ring->vr_lock);
324b22a70abSPatrick Mooney if ((--ring->vr_xfer_outstanding) == 0) {
325b22a70abSPatrick Mooney cv_broadcast(&ring->vr_cv);
326b22a70abSPatrick Mooney }
327b22a70abSPatrick Mooney mutex_exit(&ring->vr_lock);
328b22a70abSPatrick Mooney }
329b22a70abSPatrick Mooney
330b22a70abSPatrick Mooney static boolean_t
viona_tx_csum(viona_vring_t * ring,const struct virtio_net_hdr * hdr,mblk_t * mp,uint32_t len)331b22a70abSPatrick Mooney viona_tx_csum(viona_vring_t *ring, const struct virtio_net_hdr *hdr,
332b22a70abSPatrick Mooney mblk_t *mp, uint32_t len)
333b22a70abSPatrick Mooney {
334b22a70abSPatrick Mooney viona_link_t *link = ring->vr_link;
335b22a70abSPatrick Mooney const struct ether_header *eth;
336b22a70abSPatrick Mooney uint_t eth_len = sizeof (struct ether_header);
337b22a70abSPatrick Mooney ushort_t ftype;
338b22a70abSPatrick Mooney ipha_t *ipha = NULL;
339b22a70abSPatrick Mooney uint8_t ipproto = IPPROTO_NONE; /* NONE is not exactly right, but ok */
340b22a70abSPatrick Mooney uint16_t flags = 0;
341b22a70abSPatrick Mooney const uint_t csum_start = hdr->vrh_csum_start;
342b22a70abSPatrick Mooney const uint_t csum_stuff = hdr->vrh_csum_offset + csum_start;
343b22a70abSPatrick Mooney
344b22a70abSPatrick Mooney /*
345b22a70abSPatrick Mooney * Validate that the checksum offsets provided by the guest are within
346b22a70abSPatrick Mooney * the bounds of the packet. Additionally, ensure that the checksum
347b22a70abSPatrick Mooney * contents field is within the headers mblk copied by viona_tx().
348b22a70abSPatrick Mooney */
349b22a70abSPatrick Mooney if (csum_start >= len || csum_start < eth_len || csum_stuff >= len ||
350b22a70abSPatrick Mooney (csum_stuff + sizeof (uint16_t)) > MBLKL(mp)) {
351b22a70abSPatrick Mooney VIONA_PROBE2(fail_hcksum, viona_link_t *, link, mblk_t *, mp);
352b22a70abSPatrick Mooney VIONA_RING_STAT_INCR(ring, fail_hcksum);
353b22a70abSPatrick Mooney return (B_FALSE);
354b22a70abSPatrick Mooney }
355b22a70abSPatrick Mooney
356b22a70abSPatrick Mooney /*
357b22a70abSPatrick Mooney * This is guaranteed to be safe thanks to the header copying
358b22a70abSPatrick Mooney * done in viona_tx().
359b22a70abSPatrick Mooney */
360b22a70abSPatrick Mooney eth = (const struct ether_header *)mp->b_rptr;
361b22a70abSPatrick Mooney ftype = ntohs(eth->ether_type);
362b22a70abSPatrick Mooney
363b22a70abSPatrick Mooney if (ftype == ETHERTYPE_VLAN) {
364b22a70abSPatrick Mooney const struct ether_vlan_header *veth;
365b22a70abSPatrick Mooney
366b22a70abSPatrick Mooney /* punt on QinQ for now */
367b22a70abSPatrick Mooney eth_len = sizeof (struct ether_vlan_header);
368b22a70abSPatrick Mooney veth = (const struct ether_vlan_header *)eth;
369b22a70abSPatrick Mooney ftype = ntohs(veth->ether_type);
370b22a70abSPatrick Mooney }
371b22a70abSPatrick Mooney
372b22a70abSPatrick Mooney if (ftype == ETHERTYPE_IP) {
373b22a70abSPatrick Mooney ipha = (ipha_t *)(mp->b_rptr + eth_len);
374b22a70abSPatrick Mooney
375b22a70abSPatrick Mooney ipproto = ipha->ipha_protocol;
376b22a70abSPatrick Mooney } else if (ftype == ETHERTYPE_IPV6) {
377b22a70abSPatrick Mooney ip6_t *ip6h = (ip6_t *)(mp->b_rptr + eth_len);
378b22a70abSPatrick Mooney
379b22a70abSPatrick Mooney ipproto = ip6h->ip6_nxt;
380b22a70abSPatrick Mooney }
381b22a70abSPatrick Mooney
382b22a70abSPatrick Mooney /*
383b22a70abSPatrick Mooney * We ignore hdr_len because the spec says it can't be
384b22a70abSPatrick Mooney * trusted. Besides, our own stack will determine the header
385b22a70abSPatrick Mooney * boundary.
386b22a70abSPatrick Mooney */
387b22a70abSPatrick Mooney if ((link->l_cap_csum & HCKSUM_INET_PARTIAL) != 0 &&
388b22a70abSPatrick Mooney (hdr->vrh_gso_type & VIRTIO_NET_HDR_GSO_TCPV4) != 0 &&
389b22a70abSPatrick Mooney ftype == ETHERTYPE_IP) {
390b22a70abSPatrick Mooney uint16_t *cksump;
391b22a70abSPatrick Mooney uint32_t cksum;
392b22a70abSPatrick Mooney ipaddr_t src = ipha->ipha_src;
393b22a70abSPatrick Mooney ipaddr_t dst = ipha->ipha_dst;
394b22a70abSPatrick Mooney
395b22a70abSPatrick Mooney /*
396b22a70abSPatrick Mooney * Our native IP stack doesn't set the L4 length field
397b22a70abSPatrick Mooney * of the pseudo header when LSO is in play. Other IP
398b22a70abSPatrick Mooney * stacks, e.g. Linux, do include the length field.
399b22a70abSPatrick Mooney * This is a problem because the hardware expects that
400b22a70abSPatrick Mooney * the length field is not set. When it is set it will
401b22a70abSPatrick Mooney * cause an incorrect TCP checksum to be generated.
402b22a70abSPatrick Mooney * The reason this works in Linux is because Linux
403b22a70abSPatrick Mooney * corrects the pseudo-header checksum in the driver
404b22a70abSPatrick Mooney * code. In order to get the correct HW checksum we
405b22a70abSPatrick Mooney * need to assume the guest's IP stack gave us a bogus
406b22a70abSPatrick Mooney * TCP partial checksum and calculate it ourselves.
407b22a70abSPatrick Mooney */
408b22a70abSPatrick Mooney cksump = IPH_TCPH_CHECKSUMP(ipha, IPH_HDR_LENGTH(ipha));
409b22a70abSPatrick Mooney cksum = IP_TCP_CSUM_COMP;
410b22a70abSPatrick Mooney cksum += (dst >> 16) + (dst & 0xFFFF) +
411b22a70abSPatrick Mooney (src >> 16) + (src & 0xFFFF);
412b22a70abSPatrick Mooney cksum = (cksum & 0xFFFF) + (cksum >> 16);
413b22a70abSPatrick Mooney *(cksump) = (cksum & 0xFFFF) + (cksum >> 16);
414b22a70abSPatrick Mooney
415b22a70abSPatrick Mooney /*
416b22a70abSPatrick Mooney * Since viona is a "legacy device", the data stored
417b22a70abSPatrick Mooney * by the driver will be in the guest's native endian
418b22a70abSPatrick Mooney * format (see sections 2.4.3 and 5.1.6.1 of the
419b22a70abSPatrick Mooney * VIRTIO 1.0 spec for more info). At this time the
420b22a70abSPatrick Mooney * only guests using viona are x86 and we can assume
421b22a70abSPatrick Mooney * little-endian.
422b22a70abSPatrick Mooney */
423b22a70abSPatrick Mooney lso_info_set(mp, LE_16(hdr->vrh_gso_size), HW_LSO);
424b22a70abSPatrick Mooney
425b22a70abSPatrick Mooney /*
426b22a70abSPatrick Mooney * Hardware, like ixgbe, expects the client to request
427b22a70abSPatrick Mooney * IP header checksum offload if it's sending LSO (see
428b22a70abSPatrick Mooney * ixgbe_get_context()). Unfortunately, virtio makes
429b22a70abSPatrick Mooney * no allowances for negotiating IP header checksum
430b22a70abSPatrick Mooney * and HW offload, only TCP checksum. We add the flag
431b22a70abSPatrick Mooney * and zero-out the checksum field. This mirrors the
432b22a70abSPatrick Mooney * behavior of our native IP stack (which does this in
433b22a70abSPatrick Mooney * the interest of HW that expects the field to be
434b22a70abSPatrick Mooney * zero).
435b22a70abSPatrick Mooney */
436b22a70abSPatrick Mooney flags |= HCK_IPV4_HDRCKSUM;
437b22a70abSPatrick Mooney ipha->ipha_hdr_checksum = 0;
438b22a70abSPatrick Mooney }
439b22a70abSPatrick Mooney
440b22a70abSPatrick Mooney /*
441b22a70abSPatrick Mooney * Use DB_CKSUMFLAGS instead of mac_hcksum_get() to make sure
442b22a70abSPatrick Mooney * HW_LSO, if present, is not lost.
443b22a70abSPatrick Mooney */
444b22a70abSPatrick Mooney flags |= DB_CKSUMFLAGS(mp);
445b22a70abSPatrick Mooney
446b22a70abSPatrick Mooney /*
447b22a70abSPatrick Mooney * Partial checksum support from the NIC is ideal, since it most
448b22a70abSPatrick Mooney * closely maps to the interface defined by virtio.
449b22a70abSPatrick Mooney */
450b22a70abSPatrick Mooney if ((link->l_cap_csum & HCKSUM_INET_PARTIAL) != 0 &&
451b22a70abSPatrick Mooney (ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP)) {
452b22a70abSPatrick Mooney /*
453b22a70abSPatrick Mooney * MAC expects these offsets to be relative to the
454b22a70abSPatrick Mooney * start of the L3 header rather than the L2 frame.
455b22a70abSPatrick Mooney */
456b22a70abSPatrick Mooney flags |= HCK_PARTIALCKSUM;
457b22a70abSPatrick Mooney mac_hcksum_set(mp, csum_start - eth_len, csum_stuff - eth_len,
458b22a70abSPatrick Mooney len - eth_len, 0, flags);
459b22a70abSPatrick Mooney return (B_TRUE);
460b22a70abSPatrick Mooney }
461b22a70abSPatrick Mooney
462b22a70abSPatrick Mooney /*
463b22a70abSPatrick Mooney * Without partial checksum support, look to the L3/L4 protocol
464b22a70abSPatrick Mooney * information to see if the NIC can handle it. If not, the
465b22a70abSPatrick Mooney * checksum will need to calculated inline.
466b22a70abSPatrick Mooney */
467b22a70abSPatrick Mooney if (ftype == ETHERTYPE_IP) {
468b22a70abSPatrick Mooney if ((link->l_cap_csum & HCKSUM_INET_FULL_V4) != 0 &&
469b22a70abSPatrick Mooney (ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP)) {
470b22a70abSPatrick Mooney uint16_t *csump = (uint16_t *)(mp->b_rptr + csum_stuff);
471b22a70abSPatrick Mooney *csump = 0;
472b22a70abSPatrick Mooney flags |= HCK_FULLCKSUM;
473b22a70abSPatrick Mooney mac_hcksum_set(mp, 0, 0, 0, 0, flags);
474b22a70abSPatrick Mooney return (B_TRUE);
475b22a70abSPatrick Mooney }
476b22a70abSPatrick Mooney
477b22a70abSPatrick Mooney /* XXX: Implement manual fallback checksumming? */
478b22a70abSPatrick Mooney VIONA_PROBE2(fail_hcksum, viona_link_t *, link, mblk_t *, mp);
479b22a70abSPatrick Mooney VIONA_RING_STAT_INCR(ring, fail_hcksum);
480b22a70abSPatrick Mooney return (B_FALSE);
481b22a70abSPatrick Mooney } else if (ftype == ETHERTYPE_IPV6) {
482b22a70abSPatrick Mooney if ((link->l_cap_csum & HCKSUM_INET_FULL_V6) != 0 &&
483b22a70abSPatrick Mooney (ipproto == IPPROTO_TCP || ipproto == IPPROTO_UDP)) {
484b22a70abSPatrick Mooney uint16_t *csump = (uint16_t *)(mp->b_rptr + csum_stuff);
485b22a70abSPatrick Mooney *csump = 0;
486b22a70abSPatrick Mooney flags |= HCK_FULLCKSUM;
487b22a70abSPatrick Mooney mac_hcksum_set(mp, 0, 0, 0, 0, flags);
488b22a70abSPatrick Mooney return (B_TRUE);
489b22a70abSPatrick Mooney }
490b22a70abSPatrick Mooney
491b22a70abSPatrick Mooney /* XXX: Implement manual fallback checksumming? */
492b22a70abSPatrick Mooney VIONA_PROBE2(fail_hcksum6, viona_link_t *, link, mblk_t *, mp);
493b22a70abSPatrick Mooney VIONA_RING_STAT_INCR(ring, fail_hcksum6);
494b22a70abSPatrick Mooney return (B_FALSE);
495b22a70abSPatrick Mooney }
496b22a70abSPatrick Mooney
497b22a70abSPatrick Mooney /* Cannot even emulate hcksum for unrecognized protocols */
498b22a70abSPatrick Mooney VIONA_PROBE2(fail_hcksum_proto, viona_link_t *, link, mblk_t *, mp);
499b22a70abSPatrick Mooney VIONA_RING_STAT_INCR(ring, fail_hcksum_proto);
500b22a70abSPatrick Mooney return (B_FALSE);
501b22a70abSPatrick Mooney }
502b22a70abSPatrick Mooney
503b22a70abSPatrick Mooney static void
viona_tx(viona_link_t * link,viona_vring_t * ring)504b22a70abSPatrick Mooney viona_tx(viona_link_t *link, viona_vring_t *ring)
505b22a70abSPatrick Mooney {
506b22a70abSPatrick Mooney struct iovec *iov = ring->vr_txiov;
507b22a70abSPatrick Mooney const uint_t max_segs = ring->vr_size;
508b22a70abSPatrick Mooney uint16_t cookie;
509b22a70abSPatrick Mooney int i, n;
510b22a70abSPatrick Mooney uint32_t len, base_off = 0;
511b22a70abSPatrick Mooney uint32_t min_copy = VIONA_MAX_HDRS_LEN;
512b22a70abSPatrick Mooney mblk_t *mp_head, *mp_tail, *mp;
513b22a70abSPatrick Mooney viona_desb_t *dp = NULL;
514b22a70abSPatrick Mooney mac_client_handle_t link_mch = link->l_mch;
515b22a70abSPatrick Mooney const struct virtio_net_hdr *hdr;
516db9aa506SPatrick Mooney vmm_page_t *pages = NULL;
517b22a70abSPatrick Mooney
518b22a70abSPatrick Mooney mp_head = mp_tail = NULL;
519b22a70abSPatrick Mooney
520b22a70abSPatrick Mooney ASSERT(iov != NULL);
521b22a70abSPatrick Mooney
522db9aa506SPatrick Mooney n = vq_popchain(ring, iov, max_segs, &cookie, &pages);
523b22a70abSPatrick Mooney if (n == 0) {
524b22a70abSPatrick Mooney VIONA_PROBE1(tx_absent, viona_vring_t *, ring);
525b22a70abSPatrick Mooney VIONA_RING_STAT_INCR(ring, tx_absent);
526b22a70abSPatrick Mooney return;
527b22a70abSPatrick Mooney } else if (n < 0) {
528b22a70abSPatrick Mooney /*
529b22a70abSPatrick Mooney * Any error encountered in vq_popchain has already resulted in
530b22a70abSPatrick Mooney * specific probe and statistic handling. Further action here
531b22a70abSPatrick Mooney * is unnecessary.
532b22a70abSPatrick Mooney */
533b22a70abSPatrick Mooney return;
534b22a70abSPatrick Mooney }
535b22a70abSPatrick Mooney
536b22a70abSPatrick Mooney /* Grab the header and ensure it is of adequate length */
537b22a70abSPatrick Mooney hdr = (const struct virtio_net_hdr *)iov[0].iov_base;
538b22a70abSPatrick Mooney len = iov[0].iov_len;
539b22a70abSPatrick Mooney if (len < sizeof (struct virtio_net_hdr)) {
540b22a70abSPatrick Mooney goto drop_fail;
541b22a70abSPatrick Mooney }
542b22a70abSPatrick Mooney
543b22a70abSPatrick Mooney /* Make sure the packet headers are always in the first mblk. */
544b22a70abSPatrick Mooney if (ring->vr_txdesb != NULL) {
545b22a70abSPatrick Mooney dp = &ring->vr_txdesb[cookie];
546b22a70abSPatrick Mooney
547b22a70abSPatrick Mooney /*
548b22a70abSPatrick Mooney * If the guest driver is operating properly, each desb slot
549b22a70abSPatrick Mooney * should be available for use when processing a TX descriptor
550b22a70abSPatrick Mooney * from the 'avail' ring. In the case of drivers that reuse a
551b22a70abSPatrick Mooney * descriptor before it has been posted to the 'used' ring, the
552b22a70abSPatrick Mooney * data is simply dropped.
553b22a70abSPatrick Mooney */
554b22a70abSPatrick Mooney if (atomic_cas_uint(&dp->d_ref, 0, 1) != 0) {
555b22a70abSPatrick Mooney dp = NULL;
556b22a70abSPatrick Mooney goto drop_fail;
557b22a70abSPatrick Mooney }
558b22a70abSPatrick Mooney
559b22a70abSPatrick Mooney dp->d_cookie = cookie;
560b22a70abSPatrick Mooney mp_head = desballoc(dp->d_headers, VIONA_MAX_HDRS_LEN, 0,
561b22a70abSPatrick Mooney &dp->d_frtn);
562b22a70abSPatrick Mooney
563b22a70abSPatrick Mooney /* Account for the successful desballoc. */
564b22a70abSPatrick Mooney if (mp_head != NULL)
565b22a70abSPatrick Mooney dp->d_ref++;
566b22a70abSPatrick Mooney } else {
567b22a70abSPatrick Mooney mp_head = allocb(VIONA_MAX_HDRS_LEN, 0);
568b22a70abSPatrick Mooney }
569b22a70abSPatrick Mooney
570b22a70abSPatrick Mooney if (mp_head == NULL)
571b22a70abSPatrick Mooney goto drop_fail;
572b22a70abSPatrick Mooney
573b22a70abSPatrick Mooney mp_tail = mp_head;
574b22a70abSPatrick Mooney
575b22a70abSPatrick Mooney /*
576b22a70abSPatrick Mooney * We always copy enough of the guest data to cover the
577b22a70abSPatrick Mooney * headers. This protects us from TOCTOU attacks and allows
578b22a70abSPatrick Mooney * message block length assumptions to be made in subsequent
579b22a70abSPatrick Mooney * code. In many cases, this means copying more data than
580b22a70abSPatrick Mooney * strictly necessary. That's okay, as it is the larger packets
581b22a70abSPatrick Mooney * (such as LSO) that really benefit from desballoc().
582b22a70abSPatrick Mooney */
583b22a70abSPatrick Mooney for (i = 1; i < n; i++) {
584b22a70abSPatrick Mooney const uint32_t to_copy = MIN(min_copy, iov[i].iov_len);
585b22a70abSPatrick Mooney
586b22a70abSPatrick Mooney bcopy(iov[i].iov_base, mp_head->b_wptr, to_copy);
587b22a70abSPatrick Mooney mp_head->b_wptr += to_copy;
588b22a70abSPatrick Mooney len += to_copy;
589b22a70abSPatrick Mooney min_copy -= to_copy;
590b22a70abSPatrick Mooney
591b22a70abSPatrick Mooney /*
592b22a70abSPatrick Mooney * We've met the minimum copy requirement. The rest of
593b22a70abSPatrick Mooney * the guest data can be referenced.
594b22a70abSPatrick Mooney */
595b22a70abSPatrick Mooney if (min_copy == 0) {
596b22a70abSPatrick Mooney /*
597b22a70abSPatrick Mooney * If we copied all contents of this
598b22a70abSPatrick Mooney * descriptor then move onto the next one.
599b22a70abSPatrick Mooney * Otherwise, record how far we are into the
600b22a70abSPatrick Mooney * current descriptor.
601b22a70abSPatrick Mooney */
602b22a70abSPatrick Mooney if (iov[i].iov_len == to_copy)
603b22a70abSPatrick Mooney i++;
604b22a70abSPatrick Mooney else
605b22a70abSPatrick Mooney base_off = to_copy;
606b22a70abSPatrick Mooney
607b22a70abSPatrick Mooney break;
608b22a70abSPatrick Mooney }
609b22a70abSPatrick Mooney }
610b22a70abSPatrick Mooney
611b22a70abSPatrick Mooney ASSERT3P(mp_head, !=, NULL);
612b22a70abSPatrick Mooney ASSERT3P(mp_tail, !=, NULL);
613b22a70abSPatrick Mooney
614b22a70abSPatrick Mooney for (; i < n; i++) {
615b22a70abSPatrick Mooney uintptr_t base = (uintptr_t)iov[i].iov_base + base_off;
616b22a70abSPatrick Mooney uint32_t chunk = iov[i].iov_len - base_off;
617b22a70abSPatrick Mooney
618b22a70abSPatrick Mooney ASSERT3U(base_off, <, iov[i].iov_len);
619b22a70abSPatrick Mooney ASSERT3U(chunk, >, 0);
620b22a70abSPatrick Mooney
621b22a70abSPatrick Mooney if (dp != NULL) {
622b22a70abSPatrick Mooney mp = desballoc((uchar_t *)base, chunk, 0, &dp->d_frtn);
623b22a70abSPatrick Mooney if (mp == NULL) {
624b22a70abSPatrick Mooney goto drop_fail;
625b22a70abSPatrick Mooney }
626b22a70abSPatrick Mooney dp->d_ref++;
627b22a70abSPatrick Mooney } else {
628b22a70abSPatrick Mooney mp = allocb(chunk, BPRI_MED);
629b22a70abSPatrick Mooney if (mp == NULL) {
630b22a70abSPatrick Mooney goto drop_fail;
631b22a70abSPatrick Mooney }
632b22a70abSPatrick Mooney bcopy((uchar_t *)base, mp->b_wptr, chunk);
633b22a70abSPatrick Mooney }
634b22a70abSPatrick Mooney
635b22a70abSPatrick Mooney base_off = 0;
636b22a70abSPatrick Mooney len += chunk;
637b22a70abSPatrick Mooney mp->b_wptr += chunk;
638b22a70abSPatrick Mooney mp_tail->b_cont = mp;
639b22a70abSPatrick Mooney mp_tail = mp;
640b22a70abSPatrick Mooney }
641b22a70abSPatrick Mooney
642b22a70abSPatrick Mooney if (VNETHOOK_INTERESTED_OUT(link->l_neti)) {
643b22a70abSPatrick Mooney /*
644b22a70abSPatrick Mooney * The hook consumer may elect to free the mblk_t and set
645b22a70abSPatrick Mooney * our mblk_t ** to NULL. When using a viona_desb_t
646b22a70abSPatrick Mooney * (dp != NULL), we do not want the corresponding cleanup to
647b22a70abSPatrick Mooney * occur during the viona_hook() call. We instead want to
648b22a70abSPatrick Mooney * reset and recycle dp for future use. To prevent cleanup
649b22a70abSPatrick Mooney * during the viona_hook() call, we take a ref on dp (if being
650b22a70abSPatrick Mooney * used), and release it on success. On failure, the
651b22a70abSPatrick Mooney * freemsgchain() call will release all the refs taken earlier
652b22a70abSPatrick Mooney * in viona_tx() (aside from the initial ref and the one we
653b22a70abSPatrick Mooney * take), and drop_hook will reset dp for reuse.
654b22a70abSPatrick Mooney */
655b22a70abSPatrick Mooney if (dp != NULL)
656b22a70abSPatrick Mooney dp->d_ref++;
657b22a70abSPatrick Mooney
658b22a70abSPatrick Mooney /*
659b22a70abSPatrick Mooney * Pass &mp instead of &mp_head so we don't lose track of
660b22a70abSPatrick Mooney * mp_head if the hook consumer (i.e. ipf) elects to free mp
661b22a70abSPatrick Mooney * and set mp to NULL.
662b22a70abSPatrick Mooney */
663b22a70abSPatrick Mooney mp = mp_head;
664b22a70abSPatrick Mooney if (viona_hook(link, ring, &mp, B_TRUE) != 0) {
665b22a70abSPatrick Mooney if (mp != NULL)
666b22a70abSPatrick Mooney freemsgchain(mp);
667b22a70abSPatrick Mooney goto drop_hook;
668b22a70abSPatrick Mooney }
669b22a70abSPatrick Mooney
670b22a70abSPatrick Mooney if (dp != NULL) {
671b22a70abSPatrick Mooney dp->d_ref--;
672b22a70abSPatrick Mooney
673b22a70abSPatrick Mooney /*
674b22a70abSPatrick Mooney * It is possible that the hook(s) accepted the packet,
675b22a70abSPatrick Mooney * but as part of its processing, it issued a pull-up
676b22a70abSPatrick Mooney * which released all references to the desb. In that
677b22a70abSPatrick Mooney * case, go back to acting like the packet is entirely
678b22a70abSPatrick Mooney * copied (which it is).
679b22a70abSPatrick Mooney */
680b22a70abSPatrick Mooney if (dp->d_ref == 1) {
681b22a70abSPatrick Mooney dp->d_cookie = 0;
682b22a70abSPatrick Mooney dp->d_ref = 0;
683b22a70abSPatrick Mooney dp = NULL;
684b22a70abSPatrick Mooney }
685b22a70abSPatrick Mooney }
686b22a70abSPatrick Mooney }
687b22a70abSPatrick Mooney
688b22a70abSPatrick Mooney /*
689b22a70abSPatrick Mooney * Request hardware checksumming, if necessary. If the guest
690b22a70abSPatrick Mooney * sent an LSO packet then it must have also negotiated and
691b22a70abSPatrick Mooney * requested partial checksum; therefore the LSO logic is
692b22a70abSPatrick Mooney * contained within viona_tx_csum().
693b22a70abSPatrick Mooney */
694b22a70abSPatrick Mooney if ((link->l_features & VIRTIO_NET_F_CSUM) != 0 &&
695b22a70abSPatrick Mooney (hdr->vrh_flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) != 0) {
696b22a70abSPatrick Mooney if (!viona_tx_csum(ring, hdr, mp_head, len - iov[0].iov_len)) {
697b22a70abSPatrick Mooney goto drop_fail;
698b22a70abSPatrick Mooney }
699b22a70abSPatrick Mooney }
700b22a70abSPatrick Mooney
701b22a70abSPatrick Mooney if (dp != NULL) {
702b22a70abSPatrick Mooney dp->d_len = len;
703db9aa506SPatrick Mooney dp->d_pages = pages;
704b22a70abSPatrick Mooney mutex_enter(&ring->vr_lock);
705b22a70abSPatrick Mooney ring->vr_xfer_outstanding++;
706b22a70abSPatrick Mooney mutex_exit(&ring->vr_lock);
707b22a70abSPatrick Mooney } else {
708b22a70abSPatrick Mooney /*
709b22a70abSPatrick Mooney * If the data was cloned out of the ring, the descriptors can
710b22a70abSPatrick Mooney * be marked as 'used' now, rather than deferring that action
711b22a70abSPatrick Mooney * until after successful packet transmission.
712b22a70abSPatrick Mooney */
713db9aa506SPatrick Mooney vmm_drv_page_release_chain(pages);
714b22a70abSPatrick Mooney viona_tx_done(ring, len, cookie);
715b22a70abSPatrick Mooney }
716b22a70abSPatrick Mooney
717b22a70abSPatrick Mooney /*
718b22a70abSPatrick Mooney * We're potentially going deep into the networking layer; make sure the
719b22a70abSPatrick Mooney * guest can't run concurrently.
720b22a70abSPatrick Mooney */
721b22a70abSPatrick Mooney smt_begin_unsafe();
722ce9221f7SPatrick Mooney /*
723ce9221f7SPatrick Mooney * Ignore, for now, any signal from MAC about whether the outgoing
724ce9221f7SPatrick Mooney * packet was dropped or not.
725ce9221f7SPatrick Mooney */
726ce9221f7SPatrick Mooney (void) mac_tx(link_mch, mp_head, 0, MAC_DROP_ON_NO_DESC, NULL);
727b22a70abSPatrick Mooney smt_end_unsafe();
728b22a70abSPatrick Mooney return;
729b22a70abSPatrick Mooney
730b22a70abSPatrick Mooney drop_fail:
731b22a70abSPatrick Mooney /*
732b22a70abSPatrick Mooney * On the off chance that memory is not available via the desballoc or
733b22a70abSPatrick Mooney * allocb calls, there are few options left besides to fail and drop
734b22a70abSPatrick Mooney * the frame on the floor.
735b22a70abSPatrick Mooney */
736b22a70abSPatrick Mooney
737b22a70abSPatrick Mooney if (dp != NULL) {
738b22a70abSPatrick Mooney /*
739b22a70abSPatrick Mooney * Take an additional reference on the desb handle (if present)
740b22a70abSPatrick Mooney * so any desballoc-sourced mblks can release their hold on it
741b22a70abSPatrick Mooney * without the handle reaching its final state and executing
742b22a70abSPatrick Mooney * its clean-up logic.
743b22a70abSPatrick Mooney */
744b22a70abSPatrick Mooney dp->d_ref++;
745b22a70abSPatrick Mooney }
746b22a70abSPatrick Mooney
747b22a70abSPatrick Mooney /*
748b22a70abSPatrick Mooney * Free any already-allocated blocks and sum up the total length of the
749b22a70abSPatrick Mooney * dropped data to be released to the used ring.
750b22a70abSPatrick Mooney */
751b22a70abSPatrick Mooney freemsgchain(mp_head);
752b22a70abSPatrick Mooney
753b22a70abSPatrick Mooney drop_hook:
754b22a70abSPatrick Mooney len = 0;
755b22a70abSPatrick Mooney for (uint_t i = 0; i < n; i++) {
756b22a70abSPatrick Mooney len += iov[i].iov_len;
757b22a70abSPatrick Mooney }
758b22a70abSPatrick Mooney
759b22a70abSPatrick Mooney if (dp != NULL) {
760b22a70abSPatrick Mooney VERIFY(dp->d_ref == 2);
761b22a70abSPatrick Mooney
762b22a70abSPatrick Mooney /* Clean up the desb handle, releasing the extra hold. */
763b22a70abSPatrick Mooney dp->d_len = 0;
764b22a70abSPatrick Mooney dp->d_cookie = 0;
765b22a70abSPatrick Mooney dp->d_ref = 0;
766b22a70abSPatrick Mooney }
767b22a70abSPatrick Mooney
768b22a70abSPatrick Mooney VIONA_PROBE3(tx_drop, viona_vring_t *, ring, uint32_t, len,
769b22a70abSPatrick Mooney uint16_t, cookie);
770db9aa506SPatrick Mooney vmm_drv_page_release_chain(pages);
771b22a70abSPatrick Mooney viona_tx_done(ring, len, cookie);
772b22a70abSPatrick Mooney }
773