1 /*
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 /*
27  * This file and its contents are supplied under the terms of the
28  * Common Development and Distribution License ("CDDL"), version 1.0.
29  * You may only use this file in accordance with the terms of version
30  * 1.0 of the CDDL.
31  *
32  * A full copy of the text of the CDDL should have accompanied this
33  * source.  A copy of the CDDL is also available via the Internet at
34  * http://www.illumos.org/license/CDDL.
35  *
36  * Copyright 2015 Pluribus Networks Inc.
37  * Copyright 2019 Joyent, Inc.
38  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
39  */
40 
41 #include <sys/cdefs.h>
42 
43 #include <sys/param.h>
44 #include <sys/linker_set.h>
45 #include <sys/ioctl.h>
46 #include <sys/uio.h>
47 #include <sys/viona_io.h>
48 
49 #include <errno.h>
50 #include <fcntl.h>
51 #include <stdio.h>
52 #include <stdlib.h>
53 #include <stdint.h>
54 #include <string.h>
55 #include <strings.h>
56 #include <unistd.h>
57 #include <assert.h>
58 #include <pthread.h>
59 #include <signal.h>
60 #include <stdbool.h>
61 #include <poll.h>
62 #include <libdladm.h>
63 #include <libdllink.h>
64 #include <libdlvnic.h>
65 
66 #include <machine/vmm.h>
67 #include <vmmapi.h>
68 
69 #include "bhyverun.h"
70 #include "config.h"
71 #include "debug.h"
72 #include "pci_emul.h"
73 #include "virtio.h"
74 #include "iov.h"
75 #include "virtio_net.h"
76 
77 #define	VIONA_RINGSZ		1024
78 #define	VIONA_CTLQ_SIZE		64
79 #define	VIONA_CTLQ_MAXSEGS	32
80 
81 /*
82  * PCI config-space register offsets
83  */
84 #define	VIONA_R_CFG0	24
85 #define	VIONA_R_CFG1	25
86 #define	VIONA_R_CFG2	26
87 #define	VIONA_R_CFG3	27
88 #define	VIONA_R_CFG4	28
89 #define	VIONA_R_CFG5	29
90 #define	VIONA_R_CFG6	30
91 #define	VIONA_R_CFG7	31
92 #define	VIONA_R_MAX	31
93 
94 #define	VIONA_REGSZ	(VIONA_R_MAX + 1)
95 
96 /*
97  * Queue definitions.
98  */
99 #define	VIONA_RXQ	0
100 #define	VIONA_TXQ	1
101 #define	VIONA_CTLQ	2
102 
103 #define	VIONA_MAXQ	3
104 
105 /*
106  * Supplementary host capabilities provided in the userspace component.
107  */
108 #define	VIONA_S_HOSTCAPS_USERSPACE	(	\
109 	VIRTIO_NET_F_CTRL_VQ |			\
110 	VIRTIO_NET_F_CTRL_RX)
111 
112 /*
113  * Debug printf
114  */
115 static volatile int pci_viona_debug;
116 #define	DPRINTF(fmt, arg...) \
117 	do { \
118 		if (pci_viona_debug) { \
119 			FPRINTLN(stdout, fmt, ##arg); \
120 			fflush(stdout); \
121 		} \
122 	} while (0)
123 #define	WPRINTF(fmt, arg...) FPRINTLN(stderr, fmt, ##arg)
124 
125 /*
126  * Per-device softc
127  */
128 struct pci_viona_softc {
129 	struct virtio_softc	vsc_vs;
130 	struct virtio_consts	vsc_consts;
131 	struct vqueue_info	vsc_queues[VIONA_MAXQ];
132 	pthread_mutex_t		vsc_mtx;
133 
134 	datalink_id_t	vsc_linkid;
135 	int		vsc_vnafd;
136 
137 	/* Configurable parameters */
138 	char		vsc_linkname[MAXLINKNAMELEN];
139 	uint32_t	vsc_feature_mask;
140 	uint16_t	vsc_vq_size;
141 
142 	uint8_t		vsc_macaddr[6];
143 
144 	bool		vsc_resetting;
145 	bool		vsc_msix_active;
146 
147 	viona_promisc_t	vsc_promisc;		/* Current promisc mode */
148 	bool		vsc_promisc_promisc;	/* PROMISC enabled */
149 	bool		vsc_promisc_allmulti;	/* ALLMULTI enabled */
150 	bool		vsc_promisc_umac;	/* unicast MACs sent */
151 	bool		vsc_promisc_mmac;	/* multicast MACs sent */
152 };
153 
154 static struct virtio_consts viona_vi_consts = {
155 	.vc_name		= "viona",
156 	.vc_nvq			= VIONA_MAXQ,
157 	/*
158 	 * We use the common bhyve virtio framework so that we can call
159 	 * the utility functions to work with the queues handled in userspace.
160 	 * The framework PCI read/write functions are not used so these
161 	 * callbacks will not be invoked.
162 	 */
163 	.vc_cfgsize		= 0,
164 	.vc_reset		= NULL,
165 	.vc_qnotify		= NULL,
166 	.vc_cfgread		= NULL,
167 	.vc_cfgwrite		= NULL,
168 	.vc_apply_features	= NULL,
169 	/*
170 	 * The following field is populated using the response from the
171 	 * viona driver during initialisation, augmented with the additional
172 	 * capabilities emulated in userspace.
173 	 */
174 	.vc_hv_caps		= 0,
175 };
176 
177 /*
178  * Return the size of IO BAR that maps virtio header and device specific
179  * region. The size would vary depending on whether MSI-X is enabled or
180  * not.
181  */
182 static uint64_t
pci_viona_iosize(struct pci_devinst * pi)183 pci_viona_iosize(struct pci_devinst *pi)
184 {
185 	if (pci_msix_enabled(pi)) {
186 		return (VIONA_REGSZ);
187 	} else {
188 		return (VIONA_REGSZ -
189 		    (VIRTIO_PCI_CONFIG_OFF(1) - VIRTIO_PCI_CONFIG_OFF(0)));
190 	}
191 }
192 
193 static uint16_t
pci_viona_qsize(struct pci_viona_softc * sc,int qnum)194 pci_viona_qsize(struct pci_viona_softc *sc, int qnum)
195 {
196 	if (qnum == VIONA_CTLQ)
197 		return (VIONA_CTLQ_SIZE);
198 
199 	return (sc->vsc_vq_size);
200 }
201 
202 static void
pci_viona_ring_reset(struct pci_viona_softc * sc,int ring)203 pci_viona_ring_reset(struct pci_viona_softc *sc, int ring)
204 {
205 	assert(ring < VIONA_MAXQ);
206 
207 	switch (ring) {
208 	case VIONA_RXQ:
209 	case VIONA_TXQ:
210 		break;
211 	case VIONA_CTLQ:
212 	default:
213 		return;
214 	}
215 
216 	for (;;) {
217 		int res;
218 
219 		res = ioctl(sc->vsc_vnafd, VNA_IOC_RING_RESET, ring);
220 		if (res == 0) {
221 			break;
222 		} else if (errno != EINTR) {
223 			WPRINTF("ioctl viona ring %d reset failed %d",
224 			    ring, errno);
225 			return;
226 		}
227 	}
228 }
229 
230 static void
pci_viona_update_status(struct pci_viona_softc * sc,uint32_t value)231 pci_viona_update_status(struct pci_viona_softc *sc, uint32_t value)
232 {
233 
234 	if (value == 0) {
235 		DPRINTF("viona: device reset requested !");
236 
237 		vi_reset_dev(&sc->vsc_vs);
238 		pci_viona_ring_reset(sc, VIONA_RXQ);
239 		pci_viona_ring_reset(sc, VIONA_TXQ);
240 	}
241 
242 	sc->vsc_vs.vs_status = value;
243 }
244 
245 static const char *
pci_viona_promisc_descr(viona_promisc_t mode)246 pci_viona_promisc_descr(viona_promisc_t mode)
247 {
248 	switch (mode) {
249 	case VIONA_PROMISC_NONE:
250 		return ("none");
251 	case VIONA_PROMISC_MULTI:
252 		return ("multicast");
253 	case VIONA_PROMISC_ALL:
254 		return ("all");
255 	default:
256 		abort();
257 	}
258 }
259 
260 static int
pci_viona_eval_promisc(struct pci_viona_softc * sc)261 pci_viona_eval_promisc(struct pci_viona_softc *sc)
262 {
263 	viona_promisc_t mode = VIONA_PROMISC_NONE;
264 	int err = 0;
265 
266 	/*
267 	 * If the guest has explicitly requested promiscuous mode or has sent a
268 	 * non-empty unicast MAC address table, then set viona to promiscuous
269 	 * mode. Otherwise, if the guest has explicitly requested multicast
270 	 * promiscuity or has sent a non-empty multicast MAC address table,
271 	 * then set viona to multicast promiscuous mode.
272 	 */
273 	if (sc->vsc_promisc_promisc || sc->vsc_promisc_umac)
274 		mode = VIONA_PROMISC_ALL;
275 	else if (sc->vsc_promisc_allmulti || sc->vsc_promisc_mmac)
276 		mode = VIONA_PROMISC_MULTI;
277 
278 	if (mode != sc->vsc_promisc) {
279 		DPRINTF("viona: setting promiscuous mode to %d (%s)",
280 		    mode, pci_viona_promisc_descr(mode));
281 		DPRINTF("       promisc=%u, umac=%u, allmulti=%u, mmac=%u",
282 		    sc->vsc_promisc_promisc, sc->vsc_promisc_umac,
283 		    sc->vsc_promisc_allmulti, sc->vsc_promisc_mmac);
284 
285 		err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_PROMISC, mode);
286 		if (err == 0)
287 			sc->vsc_promisc = mode;
288 		else
289 			WPRINTF("ioctl viona set promisc failed %d", errno);
290 	}
291 
292 	return (err);
293 }
294 
295 static uint8_t
pci_viona_control_rx(struct vqueue_info * vq,const virtio_net_ctrl_hdr_t * hdr,struct iovec * iov,size_t niov)296 pci_viona_control_rx(struct vqueue_info *vq, const virtio_net_ctrl_hdr_t *hdr,
297     struct iovec *iov, size_t niov)
298 {
299 	struct pci_viona_softc *sc = (struct pci_viona_softc *)vq->vq_vs;
300 	uint8_t v;
301 
302 	if (iov[0].iov_len != sizeof (uint8_t) || niov != 1) {
303 		EPRINTLN("viona: bad control RX data");
304 		return (VIRTIO_NET_CQ_ERR);
305 	}
306 
307 	v = *(uint8_t *)iov[0].iov_base;
308 
309 	switch (hdr->vnch_command) {
310 	case VIRTIO_NET_CTRL_RX_PROMISC:
311 		DPRINTF("viona: ctrl RX promisc %d", v);
312 		sc->vsc_promisc_promisc = (v != 0);
313 		break;
314 	case VIRTIO_NET_CTRL_RX_ALLMULTI:
315 		DPRINTF("viona: ctrl RX allmulti %d", v);
316 		sc->vsc_promisc_allmulti = (v != 0);
317 		break;
318 	default:
319 		/*
320 		 * VIRTIO_NET_F_CTRL_RX_EXTRA was not offered so no other
321 		 * commands are expected.
322 		 */
323 		EPRINTLN("viona: unrecognised RX control cmd %u",
324 		    hdr->vnch_command);
325 		return (VIRTIO_NET_CQ_ERR);
326 	}
327 
328 	if (pci_viona_eval_promisc(sc) == 0)
329 		return (VIRTIO_NET_CQ_OK);
330 	return (VIRTIO_NET_CQ_ERR);
331 }
332 
333 static void
pci_viona_control_mac_dump(const char * tag,const struct iovec * iov)334 pci_viona_control_mac_dump(const char *tag, const struct iovec *iov)
335 {
336 	virtio_net_ctrl_mac_t *table = (virtio_net_ctrl_mac_t *)iov->iov_base;
337 	ether_addr_t *mac = &table->vncm_mac;
338 
339 	DPRINTF("-- %s MAC TABLE (entries: %u)", tag, table->vncm_entries);
340 
341 	if (table->vncm_entries * ETHERADDRL !=
342 	    iov->iov_len - sizeof (table->vncm_entries)) {
343 		DPRINTF("   Bad table size %u", iov->iov_len);
344 		return;
345 	}
346 
347 	for (uint32_t i = 0; i < table->vncm_entries; i++) {
348 		DPRINTF("   [%2d] %s", i, ether_ntoa((struct ether_addr *)mac));
349 		mac++;
350 	}
351 }
352 
353 static uint8_t
pci_viona_control_mac(struct vqueue_info * vq,const virtio_net_ctrl_hdr_t * hdr,struct iovec * iov,size_t niov)354 pci_viona_control_mac(struct vqueue_info *vq, const virtio_net_ctrl_hdr_t *hdr,
355     struct iovec *iov, size_t niov)
356 {
357 	struct pci_viona_softc *sc = (struct pci_viona_softc *)vq->vq_vs;
358 
359 	switch (hdr->vnch_command) {
360 	case VIRTIO_NET_CTRL_MAC_TABLE_SET: {
361 		virtio_net_ctrl_mac_t *table;
362 
363 		DPRINTF("viona: ctrl MAC table set");
364 
365 		if (niov != 2) {
366 			EPRINTLN("viona: bad control MAC data");
367 			return (VIRTIO_NET_CQ_ERR);
368 		}
369 
370 		/*
371 		 * We advertise VIRTIO_NET_F_CTRL_RX and therefore need to
372 		 * accept VIRTIO_NET_CTRL_MAC, but we don't support passing
373 		 * changes in the MAC address lists down to viona.
374 		 * Instead, we set flags to indicate if the guest has sent
375 		 * any MAC addresses for each table, and use these to determine
376 		 * the resulting promiscuous mode, see pci_viona_eval_promisc()
377 		 * above.
378 		 */
379 
380 		/* Unicast MAC table */
381 		table = (virtio_net_ctrl_mac_t *)iov[0].iov_base;
382 		sc->vsc_promisc_umac = (table->vncm_entries != 0);
383 		if (pci_viona_debug)
384 			pci_viona_control_mac_dump("UNICAST", &iov[0]);
385 
386 		/* Multicast MAC table */
387 		table = (virtio_net_ctrl_mac_t *)iov[1].iov_base;
388 		sc->vsc_promisc_mmac = (table->vncm_entries != 0);
389 		if (pci_viona_debug)
390 			pci_viona_control_mac_dump("MULTICAST", &iov[1]);
391 
392 		break;
393 	}
394 	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
395 		/* disallow setting the primary filter MAC address */
396 		DPRINTF("viona: ctrl MAC addr set %d", niov);
397 		return (VIRTIO_NET_CQ_ERR);
398 	default:
399 		EPRINTLN("viona: unrecognised MAC control cmd %u",
400 		    hdr->vnch_command);
401 		return (VIRTIO_NET_CQ_ERR);
402 	}
403 
404 	if (pci_viona_eval_promisc(sc) == 0)
405 		return (VIRTIO_NET_CQ_OK);
406 	return (VIRTIO_NET_CQ_ERR);
407 }
408 
409 static void
pci_viona_control(struct vqueue_info * vq)410 pci_viona_control(struct vqueue_info *vq)
411 {
412 	struct iovec iov[VIONA_CTLQ_MAXSEGS + 1];
413 	const virtio_net_ctrl_hdr_t *hdr;
414 	struct iovec *siov = iov;
415 	struct vi_req req = { 0 };
416 	uint8_t *ackp;
417 	size_t nsiov;
418 	uint32_t len;
419 	int n;
420 
421 	n = vq_getchain(vq, iov, VIONA_CTLQ_MAXSEGS, &req);
422 
423 	assert(n >= 1 && n <= VIONA_CTLQ_MAXSEGS);
424 
425 	/*
426 	 * Since we have not negotiated VIRTIO_F_ANY_LAYOUT, we expect the
427 	 * control message to be laid out in at least three descriptors as
428 	 * follows:
429 	 *	header		- sizeof (virtio_net_ctrl_hdr_t)
430 	 *	data[]		- at least one descriptor, varying size
431 	 *	ack		- uint8_t, flagged as writable
432 	 * Check the incoming message to make sure it matches this layout and
433 	 * drop the entire chain if not.
434 	 */
435 	if (n < 3 || req.writable != 1 || req.readable + 1 != n ||
436 	    iov[req.readable].iov_len != sizeof (uint8_t)) {
437 		EPRINTLN("viona: bad control chain, len=%d, w=%d, r=%d",
438 		    n, req.writable, req.readable);
439 		goto drop;
440 	}
441 
442 	hdr = (const virtio_net_ctrl_hdr_t *)iov[0].iov_base;
443 	if (iov[0].iov_len < sizeof (virtio_net_ctrl_hdr_t)) {
444 		EPRINTLN("viona: control header too short: %u", iov[0].iov_len);
445 		goto drop;
446 	}
447 
448 	/*
449 	 * Writable iovecs start at iov[req.readable], and we've already
450 	 * checked that there is only one writable, it's at the end, and the
451 	 * right size; it's the acknowledgement byte.
452 	 */
453 	ackp = (uint8_t *)iov[req.readable].iov_base;
454 
455 	siov = &iov[1];
456 	nsiov = n - 2;
457 
458 	switch (hdr->vnch_class) {
459 	case VIRTIO_NET_CTRL_RX:
460 		*ackp = pci_viona_control_rx(vq, hdr, siov, nsiov);
461 		break;
462 	case VIRTIO_NET_CTRL_MAC:
463 		*ackp = pci_viona_control_mac(vq, hdr, siov, nsiov);
464 		break;
465 	default:
466 		EPRINTLN("viona: unrecognised control class %u, cmd %u",
467 		    hdr->vnch_class, hdr->vnch_command);
468 		*ackp = VIRTIO_NET_CQ_ERR;
469 		break;
470 	}
471 
472 drop:
473 	len = 0;
474 	for (uint_t i = 0; i < n; i++)
475 		len += iov[i].iov_len;
476 
477 	vq_relchain(vq, req.idx, len);
478 }
479 
480 static void
pci_viona_process_ctrlq(struct vqueue_info * vq)481 pci_viona_process_ctrlq(struct vqueue_info *vq)
482 {
483 	for (;;) {
484 		vq_kick_disable(vq);
485 
486 		while (vq_has_descs(vq))
487 			pci_viona_control(vq);
488 
489 		vq_kick_enable(vq);
490 
491 		/*
492 		 * One more check in case a late addition raced with
493 		 * re-enabling kicks. Note that vq_kick_enable() includes a
494 		 * memory barrier.
495 		 */
496 
497 		if (!vq_has_descs(vq))
498 			break;
499 	}
500 
501 	vq_endchains(vq, /* used_all_avail= */1);
502 }
503 
504 static void *
pci_viona_poll_thread(void * param)505 pci_viona_poll_thread(void *param)
506 {
507 	struct pci_viona_softc *sc = param;
508 	pollfd_t pollset;
509 	const int fd = sc->vsc_vnafd;
510 
511 	pollset.fd = fd;
512 	pollset.events = POLLRDBAND;
513 
514 	for (;;) {
515 		if (poll(&pollset, 1, -1) < 0) {
516 			if (errno == EINTR || errno == EAGAIN) {
517 				continue;
518 			} else {
519 				WPRINTF("pci_viona_poll_thread poll() error %d",
520 				    errno);
521 				break;
522 			}
523 		}
524 		if (pollset.revents & POLLRDBAND) {
525 			vioc_intr_poll_t vip;
526 			uint_t i;
527 			int res;
528 			bool assert_lintr = false;
529 			const bool do_msix = pci_msix_enabled(sc->vsc_vs.vs_pi);
530 
531 			res = ioctl(fd, VNA_IOC_INTR_POLL, &vip);
532 			for (i = 0; res > 0 && i < VIONA_VQ_MAX; i++) {
533 				if (vip.vip_status[i] == 0) {
534 					continue;
535 				}
536 				if (do_msix) {
537 					pci_generate_msix(sc->vsc_vs.vs_pi,
538 					    sc->vsc_queues[i].vq_msix_idx);
539 				} else {
540 					assert_lintr = true;
541 				}
542 				res = ioctl(fd, VNA_IOC_RING_INTR_CLR, i);
543 				if (res != 0) {
544 					WPRINTF("ioctl viona vq %d intr "
545 					    "clear failed %d", i, errno);
546 				}
547 			}
548 			if (assert_lintr) {
549 				pthread_mutex_lock(&sc->vsc_mtx);
550 				sc->vsc_vs.vs_isr |= VIRTIO_PCI_ISR_INTR;
551 				pci_lintr_assert(sc->vsc_vs.vs_pi);
552 				pthread_mutex_unlock(&sc->vsc_mtx);
553 			}
554 		}
555 	}
556 
557 	pthread_exit(NULL);
558 }
559 
560 static void
pci_viona_ring_init(struct pci_viona_softc * sc,uint64_t pfn)561 pci_viona_ring_init(struct pci_viona_softc *sc, uint64_t pfn)
562 {
563 	int			qnum = sc->vsc_vs.vs_curq;
564 	vioc_ring_init_t	vna_ri;
565 	int			error;
566 
567 	assert(qnum < VIONA_MAXQ);
568 
569 	if (qnum == VIONA_CTLQ) {
570 		vi_vq_init(&sc->vsc_vs, pfn);
571 		return;
572 	}
573 
574 	sc->vsc_queues[qnum].vq_pfn = (pfn << VRING_PFN);
575 	vna_ri.ri_index = qnum;
576 	vna_ri.ri_qsize = pci_viona_qsize(sc, qnum);
577 	vna_ri.ri_qaddr = (pfn << VRING_PFN);
578 	error = ioctl(sc->vsc_vnafd, VNA_IOC_RING_INIT, &vna_ri);
579 
580 	if (error != 0) {
581 		WPRINTF("ioctl viona ring %u init failed %d", qnum, errno);
582 	}
583 }
584 
585 static int
pci_viona_viona_init(struct vmctx * ctx,struct pci_viona_softc * sc)586 pci_viona_viona_init(struct vmctx *ctx, struct pci_viona_softc *sc)
587 {
588 	vioc_create_t		vna_create;
589 	int			error;
590 
591 	sc->vsc_vnafd = open("/dev/viona", O_RDWR | O_EXCL);
592 	if (sc->vsc_vnafd == -1) {
593 		WPRINTF("open viona ctl failed: %d", errno);
594 		return (-1);
595 	}
596 
597 	vna_create.c_linkid = sc->vsc_linkid;
598 	vna_create.c_vmfd = vm_get_device_fd(ctx);
599 	error = ioctl(sc->vsc_vnafd, VNA_IOC_CREATE, &vna_create);
600 	if (error != 0) {
601 		(void) close(sc->vsc_vnafd);
602 		WPRINTF("ioctl viona create failed %d", errno);
603 		return (-1);
604 	}
605 
606 	return (0);
607 }
608 
609 static int
pci_viona_legacy_config(nvlist_t * nvl,const char * opt)610 pci_viona_legacy_config(nvlist_t *nvl, const char *opt)
611 {
612 	char *config, *name, *tofree, *value;
613 
614 	if (opt == NULL)
615 		return (0);
616 
617 	config = tofree = strdup(opt);
618 	while ((name = strsep(&config, ",")) != NULL) {
619 		value = strchr(name, '=');
620 		if (value != NULL) {
621 			*value++ = '\0';
622 			set_config_value_node(nvl, name, value);
623 		} else {
624 			set_config_value_node(nvl, "vnic", name);
625 		}
626 	}
627 	free(tofree);
628 	return (0);
629 }
630 
631 static int
pci_viona_parse_opts(struct pci_viona_softc * sc,nvlist_t * nvl)632 pci_viona_parse_opts(struct pci_viona_softc *sc, nvlist_t *nvl)
633 {
634 	const char *value;
635 	int err = 0;
636 
637 	sc->vsc_vq_size = VIONA_RINGSZ;
638 	sc->vsc_feature_mask = 0;
639 	sc->vsc_linkname[0] = '\0';
640 
641 	value = get_config_value_node(nvl, "feature_mask");
642 	if (value != NULL) {
643 		long num;
644 
645 		errno = 0;
646 		num = strtol(value, NULL, 0);
647 		if (errno != 0 || num < 0) {
648 			fprintf(stderr,
649 			    "viona: invalid mask '%s'", value);
650 		} else {
651 			sc->vsc_feature_mask = num;
652 		}
653 	}
654 
655 	value = get_config_value_node(nvl, "vqsize");
656 	if (value != NULL) {
657 		long num;
658 
659 		errno = 0;
660 		num = strtol(value, NULL, 0);
661 		if (errno != 0) {
662 			fprintf(stderr,
663 			    "viona: invalid vsqize '%s'", value);
664 			err = -1;
665 		} else if (num <= 2 || num > 32768) {
666 			fprintf(stderr,
667 			    "viona: vqsize out of range", num);
668 			err = -1;
669 		} else if ((1 << (ffs(num) - 1)) != num) {
670 			fprintf(stderr,
671 			    "viona: vqsize must be power of 2", num);
672 			err = -1;
673 		} else {
674 			sc->vsc_vq_size = num;
675 		}
676 	}
677 
678 	value = get_config_value_node(nvl, "vnic");
679 	if (value == NULL) {
680 		fprintf(stderr, "viona: vnic name required");
681 		err = -1;
682 	} else {
683 		(void) strlcpy(sc->vsc_linkname, value, MAXLINKNAMELEN);
684 	}
685 
686 	DPRINTF("viona=%p dev=%s vqsize=%x feature_mask=%x", sc,
687 	    sc->vsc_linkname, sc->vsc_vq_size, sc->vsc_feature_mask);
688 	return (err);
689 }
690 
691 static int
pci_viona_init(struct pci_devinst * pi,nvlist_t * nvl)692 pci_viona_init(struct pci_devinst *pi, nvlist_t *nvl)
693 {
694 	dladm_handle_t		handle;
695 	dladm_status_t		status;
696 	dladm_vnic_attr_t	attr;
697 	char			errmsg[DLADM_STRSIZE];
698 	char			tname[MAXCOMLEN + 1];
699 	int error, i;
700 	struct pci_viona_softc *sc;
701 	const char *vnic;
702 	pthread_t tid;
703 
704 	if (get_config_bool_default("viona.debug", false))
705 		pci_viona_debug = 1;
706 
707 	vnic = get_config_value_node(nvl, "vnic");
708 	if (vnic == NULL) {
709 		WPRINTF("virtio-viona: vnic required");
710 		return (1);
711 	}
712 
713 	sc = malloc(sizeof (struct pci_viona_softc));
714 	memset(sc, 0, sizeof (struct pci_viona_softc));
715 
716 	if (pci_viona_parse_opts(sc, nvl) != 0) {
717 		free(sc);
718 		return (1);
719 	}
720 
721 	if ((status = dladm_open(&handle)) != DLADM_STATUS_OK) {
722 		WPRINTF("could not open /dev/dld");
723 		free(sc);
724 		return (1);
725 	}
726 
727 	if ((status = dladm_name2info(handle, sc->vsc_linkname, &sc->vsc_linkid,
728 	    NULL, NULL, NULL)) != DLADM_STATUS_OK) {
729 		WPRINTF("dladm_name2info() for %s failed: %s", vnic,
730 		    dladm_status2str(status, errmsg));
731 		dladm_close(handle);
732 		free(sc);
733 		return (1);
734 	}
735 
736 	if ((status = dladm_vnic_info(handle, sc->vsc_linkid, &attr,
737 	    DLADM_OPT_ACTIVE)) != DLADM_STATUS_OK) {
738 		WPRINTF("dladm_vnic_info() for %s failed: %s", vnic,
739 		    dladm_status2str(status, errmsg));
740 		dladm_close(handle);
741 		free(sc);
742 		return (1);
743 	}
744 
745 	memcpy(sc->vsc_macaddr, attr.va_mac_addr, ETHERADDRL);
746 
747 	dladm_close(handle);
748 
749 	error = pci_viona_viona_init(pi->pi_vmctx, sc);
750 	if (error != 0) {
751 		free(sc);
752 		return (1);
753 	}
754 
755 	error = pthread_create(&tid, NULL, pci_viona_poll_thread, sc);
756 	assert(error == 0);
757 	snprintf(tname, sizeof (tname), "vionapoll:%s", vnic);
758 	pthread_set_name_np(tid, tname);
759 
760 	/* initialize config space */
761 	pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
762 	pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
763 	pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
764 	pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_NETWORK);
765 	pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
766 
767 	sc->vsc_consts = viona_vi_consts;
768 	pthread_mutex_init(&sc->vsc_mtx, NULL);
769 
770 	/*
771 	 * The RX and TX queues are handled in the kernel component of
772 	 * viona; however The control queue is emulated in userspace.
773 	 */
774 	sc->vsc_queues[VIONA_CTLQ].vq_qsize = pci_viona_qsize(sc, VIONA_CTLQ);
775 
776 	vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues);
777 	sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
778 
779 	/*
780 	 * Guests that do not support CTRL_RX_MAC still generally need to
781 	 * receive multicast packets. Guests that do support this feature will
782 	 * end up setting this flag indirectly via messages on the control
783 	 * queue but it does not hurt to default to multicast promiscuity here
784 	 * and it is what older version of viona did.
785 	 */
786 	sc->vsc_promisc_mmac = true;
787 	pci_viona_eval_promisc(sc);
788 
789 	/* MSI-X support */
790 	for (i = 0; i < VIONA_MAXQ; i++)
791 		sc->vsc_queues[i].vq_msix_idx = VIRTIO_MSI_NO_VECTOR;
792 
793 	/* BAR 1 used to map MSI-X table and PBA */
794 	if (pci_emul_add_msixcap(pi, VIONA_MAXQ, 1)) {
795 		free(sc);
796 		return (1);
797 	}
798 
799 	/* BAR 0 for legacy-style virtio register access. */
800 	error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VIONA_REGSZ);
801 	if (error != 0) {
802 		WPRINTF("could not allocate virtio BAR");
803 		free(sc);
804 		return (1);
805 	}
806 
807 	/*
808 	 * Need a legacy interrupt for virtio compliance, even though MSI-X
809 	 * operation is _strongly_ suggested for adequate performance.
810 	 */
811 	pci_lintr_request(pi);
812 
813 	return (0);
814 }
815 
816 static uint64_t
viona_adjust_offset(struct pci_devinst * pi,uint64_t offset)817 viona_adjust_offset(struct pci_devinst *pi, uint64_t offset)
818 {
819 	/*
820 	 * Device specific offsets used by guest would change based on
821 	 * whether MSI-X capability is enabled or not
822 	 */
823 	if (!pci_msix_enabled(pi)) {
824 		if (offset >= VIRTIO_PCI_CONFIG_OFF(0)) {
825 			return (offset + (VIRTIO_PCI_CONFIG_OFF(1) -
826 			    VIRTIO_PCI_CONFIG_OFF(0)));
827 		}
828 	}
829 
830 	return (offset);
831 }
832 
833 static void
pci_viona_ring_set_msix(struct pci_devinst * pi,uint_t ring)834 pci_viona_ring_set_msix(struct pci_devinst *pi, uint_t ring)
835 {
836 	struct pci_viona_softc *sc = pi->pi_arg;
837 	struct msix_table_entry mte;
838 	uint16_t tab_index;
839 	vioc_ring_msi_t vrm;
840 	int res;
841 
842 	if (ring == VIONA_CTLQ)
843 		return;
844 
845 	assert(ring <= VIONA_VQ_TX);
846 
847 	vrm.rm_index = ring;
848 	vrm.rm_addr = 0;
849 	vrm.rm_msg = 0;
850 	tab_index = sc->vsc_queues[ring].vq_msix_idx;
851 
852 	if (tab_index != VIRTIO_MSI_NO_VECTOR && sc->vsc_msix_active) {
853 		mte = pi->pi_msix.table[tab_index];
854 		if ((mte.vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
855 			vrm.rm_addr = mte.addr;
856 			vrm.rm_msg = mte.msg_data;
857 		}
858 	}
859 
860 	res = ioctl(sc->vsc_vnafd, VNA_IOC_RING_SET_MSI, &vrm);
861 	if (res != 0) {
862 		WPRINTF("ioctl viona set_msi %d failed %d", ring, errno);
863 	}
864 }
865 
866 static void
pci_viona_lintrupdate(struct pci_devinst * pi)867 pci_viona_lintrupdate(struct pci_devinst *pi)
868 {
869 	struct pci_viona_softc *sc = pi->pi_arg;
870 	bool msix_on = false;
871 
872 	pthread_mutex_lock(&sc->vsc_mtx);
873 	msix_on = pci_msix_enabled(pi) && (pi->pi_msix.function_mask == 0);
874 	if ((sc->vsc_msix_active && !msix_on) ||
875 	    (msix_on && !sc->vsc_msix_active)) {
876 		uint_t i;
877 
878 		sc->vsc_msix_active = msix_on;
879 		/* Update in-kernel ring configs */
880 		for (i = 0; i <= VIONA_VQ_TX; i++) {
881 			pci_viona_ring_set_msix(pi, i);
882 		}
883 	}
884 	pthread_mutex_unlock(&sc->vsc_mtx);
885 }
886 
887 static void
pci_viona_msix_update(struct pci_devinst * pi,uint64_t offset)888 pci_viona_msix_update(struct pci_devinst *pi, uint64_t offset)
889 {
890 	struct pci_viona_softc *sc = pi->pi_arg;
891 	uint_t tab_index, i;
892 
893 	pthread_mutex_lock(&sc->vsc_mtx);
894 	if (!sc->vsc_msix_active) {
895 		pthread_mutex_unlock(&sc->vsc_mtx);
896 		return;
897 	}
898 
899 	/*
900 	 * Rather than update every possible MSI-X vector, cheat and use the
901 	 * offset to calculate the entry within the table.  Since this should
902 	 * only be called when a write to the table succeeds, the index should
903 	 * be valid.
904 	 */
905 	tab_index = offset / MSIX_TABLE_ENTRY_SIZE;
906 
907 	for (i = 0; i <= VIONA_VQ_TX; i++) {
908 		if (sc->vsc_queues[i].vq_msix_idx != tab_index) {
909 			continue;
910 		}
911 		pci_viona_ring_set_msix(pi, i);
912 	}
913 
914 	pthread_mutex_unlock(&sc->vsc_mtx);
915 }
916 
917 static void
pci_viona_qnotify(struct pci_viona_softc * sc,int ring)918 pci_viona_qnotify(struct pci_viona_softc *sc, int ring)
919 {
920 	int error;
921 
922 	switch (ring) {
923 	case VIONA_TXQ:
924 	case VIONA_RXQ:
925 		error = ioctl(sc->vsc_vnafd, VNA_IOC_RING_KICK, ring);
926 		if (error != 0) {
927 			WPRINTF("ioctl viona ring %d kick failed %d",
928 			    ring, errno);
929 		}
930 		break;
931 	case VIONA_CTLQ: {
932 		struct vqueue_info *vq = &sc->vsc_queues[VIONA_CTLQ];
933 
934 		if (vq_has_descs(vq))
935 			pci_viona_process_ctrlq(vq);
936 		break;
937 	}
938 	}
939 }
940 
941 static void
pci_viona_baraddr(struct pci_devinst * pi,int baridx,int enabled,uint64_t address)942 pci_viona_baraddr(struct pci_devinst *pi, int baridx, int enabled,
943     uint64_t address)
944 {
945 	struct pci_viona_softc *sc = pi->pi_arg;
946 	uint64_t ioport;
947 	int error;
948 
949 	if (baridx != 0)
950 		return;
951 
952 	if (enabled == 0) {
953 		error = ioctl(sc->vsc_vnafd, VNA_IOC_SET_NOTIFY_IOP, 0);
954 		if (error != 0)
955 			WPRINTF("uninstall ioport hook failed %d", errno);
956 		return;
957 	}
958 
959 	/*
960 	 * Install ioport hook for virtqueue notification.
961 	 * This is part of the virtio common configuration area so the
962 	 * address does not change with MSI-X status.
963 	 */
964 	ioport = address + VIRTIO_PCI_QUEUE_NOTIFY;
965 	error = ioctl(sc->vsc_vnafd, VNA_IOC_SET_NOTIFY_IOP, ioport);
966 	if (error != 0) {
967 		WPRINTF("install ioport hook at %x failed %d",
968 		    ioport, errno);
969 	}
970 }
971 
972 static void
pci_viona_write(struct pci_devinst * pi,int baridx,uint64_t offset,int size,uint64_t value)973 pci_viona_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size,
974     uint64_t value)
975 {
976 	struct pci_viona_softc *sc = pi->pi_arg;
977 	void *ptr;
978 	int err = 0;
979 
980 	if (baridx == pci_msix_table_bar(pi) ||
981 	    baridx == pci_msix_pba_bar(pi)) {
982 		if (pci_emul_msix_twrite(pi, offset, size, value) == 0) {
983 			pci_viona_msix_update(pi, offset);
984 		}
985 		return;
986 	}
987 
988 	assert(baridx == 0);
989 
990 	if (offset + size > pci_viona_iosize(pi)) {
991 		DPRINTF("viona_write: 2big, offset %ld size %d",
992 		    offset, size);
993 		return;
994 	}
995 
996 	pthread_mutex_lock(&sc->vsc_mtx);
997 
998 	offset = viona_adjust_offset(pi, offset);
999 
1000 	switch (offset) {
1001 	case VIRTIO_PCI_GUEST_FEATURES:
1002 		assert(size == 4);
1003 		value &= ~(sc->vsc_feature_mask);
1004 		err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_FEATURES, &value);
1005 		if (err != 0) {
1006 			WPRINTF("ioctl feature negotiation returned err = %d",
1007 			    errno);
1008 		} else {
1009 			sc->vsc_vs.vs_negotiated_caps = value;
1010 		}
1011 		break;
1012 	case VIRTIO_PCI_QUEUE_PFN:
1013 		assert(size == 4);
1014 		pci_viona_ring_init(sc, value);
1015 		break;
1016 	case VIRTIO_PCI_QUEUE_SEL:
1017 		assert(size == 2);
1018 		assert(value < VIONA_MAXQ);
1019 		sc->vsc_vs.vs_curq = value;
1020 		break;
1021 	case VIRTIO_PCI_QUEUE_NOTIFY:
1022 		assert(size == 2);
1023 		assert(value < VIONA_MAXQ);
1024 		pci_viona_qnotify(sc, value);
1025 		break;
1026 	case VIRTIO_PCI_STATUS:
1027 		assert(size == 1);
1028 		pci_viona_update_status(sc, value);
1029 		break;
1030 	case VIRTIO_MSI_CONFIG_VECTOR:
1031 		assert(size == 2);
1032 		sc->vsc_vs.vs_msix_cfg_idx = value;
1033 		break;
1034 	case VIRTIO_MSI_QUEUE_VECTOR:
1035 		assert(size == 2);
1036 		assert(sc->vsc_vs.vs_curq < VIONA_MAXQ);
1037 		sc->vsc_queues[sc->vsc_vs.vs_curq].vq_msix_idx = value;
1038 		pci_viona_ring_set_msix(pi, sc->vsc_vs.vs_curq);
1039 		break;
1040 	case VIONA_R_CFG0:
1041 	case VIONA_R_CFG1:
1042 	case VIONA_R_CFG2:
1043 	case VIONA_R_CFG3:
1044 	case VIONA_R_CFG4:
1045 	case VIONA_R_CFG5:
1046 		assert((size + offset) <= (VIONA_R_CFG5 + 1));
1047 		ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0];
1048 		/*
1049 		 * The driver is allowed to change the MAC address
1050 		 */
1051 		sc->vsc_macaddr[offset - VIONA_R_CFG0] = value;
1052 		if (size == 1) {
1053 			*(uint8_t *)ptr = value;
1054 		} else if (size == 2) {
1055 			*(uint16_t *)ptr = value;
1056 		} else {
1057 			*(uint32_t *)ptr = value;
1058 		}
1059 		break;
1060 	case VIRTIO_PCI_HOST_FEATURES:
1061 	case VIRTIO_PCI_QUEUE_NUM:
1062 	case VIRTIO_PCI_ISR:
1063 	case VIONA_R_CFG6:
1064 	case VIONA_R_CFG7:
1065 		DPRINTF("viona: write to readonly reg %ld", offset);
1066 		break;
1067 	default:
1068 		DPRINTF("viona: unknown i/o write offset %ld", offset);
1069 		value = 0;
1070 		break;
1071 	}
1072 
1073 	pthread_mutex_unlock(&sc->vsc_mtx);
1074 }
1075 
1076 static uint64_t
pci_viona_read(struct pci_devinst * pi,int baridx,uint64_t offset,int size)1077 pci_viona_read(struct pci_devinst *pi, int baridx, uint64_t offset, int size)
1078 {
1079 	struct pci_viona_softc *sc = pi->pi_arg;
1080 	void *ptr;
1081 	uint64_t value;
1082 	int err = 0;
1083 
1084 	if (baridx == pci_msix_table_bar(pi) ||
1085 	    baridx == pci_msix_pba_bar(pi)) {
1086 		return (pci_emul_msix_tread(pi, offset, size));
1087 	}
1088 
1089 	assert(baridx == 0);
1090 
1091 	if (offset + size > pci_viona_iosize(pi)) {
1092 		DPRINTF("viona_read: 2big, offset %ld size %d",
1093 		    offset, size);
1094 		return (0);
1095 	}
1096 
1097 	pthread_mutex_lock(&sc->vsc_mtx);
1098 
1099 	offset = viona_adjust_offset(pi, offset);
1100 
1101 	switch (offset) {
1102 	case VIRTIO_PCI_HOST_FEATURES:
1103 		assert(size == 4);
1104 		err = ioctl(sc->vsc_vnafd, VNA_IOC_GET_FEATURES, &value);
1105 		if (err != 0) {
1106 			WPRINTF("ioctl get host features returned err = %d",
1107 			    errno);
1108 		}
1109 		value |= VIONA_S_HOSTCAPS_USERSPACE;
1110 		value &= ~sc->vsc_feature_mask;
1111 		sc->vsc_consts.vc_hv_caps = value;
1112 		break;
1113 	case VIRTIO_PCI_GUEST_FEATURES:
1114 		assert(size == 4);
1115 		value = sc->vsc_vs.vs_negotiated_caps; /* XXX never read ? */
1116 		break;
1117 	case VIRTIO_PCI_QUEUE_PFN:
1118 		assert(size == 4);
1119 		value = sc->vsc_queues[sc->vsc_vs.vs_curq].vq_pfn >> VRING_PFN;
1120 		break;
1121 	case VIRTIO_PCI_QUEUE_NUM:
1122 		assert(size == 2);
1123 		value = pci_viona_qsize(sc, sc->vsc_vs.vs_curq);
1124 		break;
1125 	case VIRTIO_PCI_QUEUE_SEL:
1126 		assert(size == 2);
1127 		value = sc->vsc_vs.vs_curq;  /* XXX never read ? */
1128 		break;
1129 	case VIRTIO_PCI_QUEUE_NOTIFY:
1130 		assert(size == 2);
1131 		value = sc->vsc_vs.vs_curq;  /* XXX never read ? */
1132 		break;
1133 	case VIRTIO_PCI_STATUS:
1134 		assert(size == 1);
1135 		value = sc->vsc_vs.vs_status;
1136 		break;
1137 	case VIRTIO_PCI_ISR:
1138 		assert(size == 1);
1139 		value = sc->vsc_vs.vs_isr;
1140 		sc->vsc_vs.vs_isr = 0;	/* a read clears this flag */
1141 		if (value != 0) {
1142 			pci_lintr_deassert(pi);
1143 		}
1144 		break;
1145 	case VIRTIO_MSI_CONFIG_VECTOR:
1146 		assert(size == 2);
1147 		value = sc->vsc_vs.vs_msix_cfg_idx;
1148 		break;
1149 	case VIRTIO_MSI_QUEUE_VECTOR:
1150 		assert(size == 2);
1151 		assert(sc->vsc_vs.vs_curq < VIONA_MAXQ);
1152 		value = sc->vsc_queues[sc->vsc_vs.vs_curq].vq_msix_idx;
1153 		break;
1154 	case VIONA_R_CFG0:
1155 	case VIONA_R_CFG1:
1156 	case VIONA_R_CFG2:
1157 	case VIONA_R_CFG3:
1158 	case VIONA_R_CFG4:
1159 	case VIONA_R_CFG5:
1160 		assert((size + offset) <= (VIONA_R_CFG5 + 1));
1161 		ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0];
1162 		if (size == 1) {
1163 			value = *(uint8_t *)ptr;
1164 		} else if (size == 2) {
1165 			value = *(uint16_t *)ptr;
1166 		} else {
1167 			value = *(uint32_t *)ptr;
1168 		}
1169 		break;
1170 	case VIONA_R_CFG6:
1171 		assert(size != 4);
1172 		value = 0x01;	/* XXX link always up */
1173 		break;
1174 	case VIONA_R_CFG7:
1175 		assert(size == 1);
1176 		value = 0;	/* XXX link status in LSB */
1177 		break;
1178 	default:
1179 		DPRINTF("viona: unknown i/o read offset %ld", offset);
1180 		value = 0;
1181 		break;
1182 	}
1183 
1184 	pthread_mutex_unlock(&sc->vsc_mtx);
1185 
1186 	return (value);
1187 }
1188 
1189 struct pci_devemu pci_de_viona = {
1190 	.pe_emu =	"virtio-net-viona",
1191 	.pe_init =	pci_viona_init,
1192 	.pe_legacy_config = pci_viona_legacy_config,
1193 	.pe_barwrite =	pci_viona_write,
1194 	.pe_barread =	pci_viona_read,
1195 	.pe_baraddr =	pci_viona_baraddr,
1196 	.pe_lintrupdate = pci_viona_lintrupdate
1197 };
1198 PCI_EMUL_SET(pci_de_viona);
1199