xref: /illumos-gate/usr/src/cmd/bhyve/net_backends.c (revision 32640292)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18  * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
19  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
21  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
24  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
25  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * This file implements multiple network backends (tap, netmap, ...),
30  * to be used by network frontends such as virtio-net and e1000.
31  * The API to access the backend (e.g. send/receive packets, negotiate
32  * features) is exported by net_backends.h.
33  */
34 
35 #include <sys/cdefs.h>
36 
37 #include <sys/types.h>		/* u_short etc */
38 #ifndef WITHOUT_CAPSICUM
39 #include <sys/capsicum.h>
40 #endif
41 #include <sys/ioctl.h>
42 #include <sys/mman.h>
43 #include <sys/uio.h>
44 
45 #include <net/if.h>
46 #ifdef __FreeBSD__
47 #if defined(INET6) || defined(INET)
48 #include <net/if_tap.h>
49 #endif
50 #include <net/netmap.h>
51 #include <net/netmap_virt.h>
52 #define NETMAP_WITH_LIBS
53 #include <net/netmap_user.h>
54 #endif /* __FreeBSD__ */
55 
56 #ifndef WITHOUT_CAPSICUM
57 #include <capsicum_helpers.h>
58 #endif
59 #include <err.h>
60 #include <errno.h>
61 #include <fcntl.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <stdint.h>
65 #include <string.h>
66 #include <unistd.h>
67 #include <sysexits.h>
68 #include <assert.h>
69 #include <pthread.h>
70 #include <pthread_np.h>
71 #include <poll.h>
72 #include <assert.h>
73 
74 #ifdef NETGRAPH
75 #include <sys/param.h>
76 #include <sys/sysctl.h>
77 #include <netgraph.h>
78 #endif
79 
80 #ifndef __FreeBSD__
81 #include <libdlpi.h>
82 #include <net/ethernet.h>
83 #endif
84 
85 #include "config.h"
86 #include "debug.h"
87 #include "iov.h"
88 #include "mevent.h"
89 #include "net_backends.h"
90 #include "pci_emul.h"
91 
92 #include <sys/linker_set.h>
93 
94 /*
95  * Each network backend registers a set of function pointers that are
96  * used to implement the net backends API.
97  * This might need to be exposed if we implement backends in separate files.
98  */
99 struct net_backend {
100 	const char *prefix;	/* prefix matching this backend */
101 
102 	/*
103 	 * Routines used to initialize and cleanup the resources needed
104 	 * by a backend. The cleanup function is used internally,
105 	 * and should not be called by the frontend.
106 	 */
107 	int (*init)(struct net_backend *be, const char *devname,
108 	    nvlist_t *nvl, net_be_rxeof_t cb, void *param);
109 	void (*cleanup)(struct net_backend *be);
110 
111 	/*
112 	 * Called to serve a guest transmit request. The scatter-gather
113 	 * vector provided by the caller has 'iovcnt' elements and contains
114 	 * the packet to send.
115 	 */
116 	ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
117 	    int iovcnt);
118 
119 	/*
120 	 * Get the length of the next packet that can be received from
121 	 * the backend. If no packets are currently available, this
122 	 * function returns 0.
123 	 */
124 	ssize_t (*peek_recvlen)(struct net_backend *be);
125 
126 	/*
127 	 * Called to receive a packet from the backend. When the function
128 	 * returns a positive value 'len', the scatter-gather vector
129 	 * provided by the caller contains a packet with such length.
130 	 * The function returns 0 if the backend doesn't have a new packet to
131 	 * receive.
132 	 */
133 	ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
134 	    int iovcnt);
135 
136 	/*
137 	 * Ask the backend to enable or disable receive operation in the
138 	 * backend. On return from a disable operation, it is guaranteed
139 	 * that the receive callback won't be called until receive is
140 	 * enabled again. Note however that it is up to the caller to make
141 	 * sure that netbe_recv() is not currently being executed by another
142 	 * thread.
143 	 */
144 	void (*recv_enable)(struct net_backend *be);
145 	void (*recv_disable)(struct net_backend *be);
146 
147 	/*
148 	 * Ask the backend for the virtio-net features it is able to
149 	 * support. Possible features are TSO, UFO and checksum offloading
150 	 * in both rx and tx direction and for both IPv4 and IPv6.
151 	 */
152 	uint64_t (*get_cap)(struct net_backend *be);
153 
154 	/*
155 	 * Tell the backend to enable/disable the specified virtio-net
156 	 * features (capabilities).
157 	 */
158 	int (*set_cap)(struct net_backend *be, uint64_t features,
159 	    unsigned int vnet_hdr_len);
160 
161 #ifndef __FreeBSD__
162 	int (*get_mac)(struct net_backend *be, void *, size_t *);
163 #endif
164 
165 	struct pci_vtnet_softc *sc;
166 	int fd;
167 
168 	/*
169 	 * Length of the virtio-net header used by the backend and the
170 	 * frontend, respectively. A zero value means that the header
171 	 * is not used.
172 	 */
173 	unsigned int be_vnet_hdr_len;
174 	unsigned int fe_vnet_hdr_len;
175 
176 	/* Size of backend-specific private data. */
177 	size_t priv_size;
178 
179 	/* Backend-specific private data follows. */
180 };
181 
182 #define	NET_BE_PRIV(be)		((void *)((be) + 1))
183 #define	NET_BE_SIZE(be)		(sizeof(*be) + (be)->priv_size)
184 
185 SET_DECLARE(net_backend_set, struct net_backend);
186 
187 #define VNET_HDR_LEN	sizeof(struct virtio_net_rxhdr)
188 
189 #define WPRINTF(params) PRINTLN params
190 
191 #ifdef __FreeBSD__
192 
193 /*
194  * The tap backend
195  */
196 
197 #if defined(INET6) || defined(INET)
198 static const int pf_list[] = {
199 #if defined(INET6)
200 	PF_INET6,
201 #endif
202 #if defined(INET)
203 	PF_INET,
204 #endif
205 };
206 #endif
207 
208 struct tap_priv {
209 	struct mevent *mevp;
210 	/*
211 	 * A bounce buffer that allows us to implement the peek_recvlen
212 	 * callback. In the future we may get the same information from
213 	 * the kevent data.
214 	 */
215 	char bbuf[1 << 16];
216 	ssize_t bbuflen;
217 };
218 
219 static void
tap_cleanup(struct net_backend * be)220 tap_cleanup(struct net_backend *be)
221 {
222 	struct tap_priv *priv = NET_BE_PRIV(be);
223 
224 	if (priv->mevp) {
225 		mevent_delete(priv->mevp);
226 	}
227 	if (be->fd != -1) {
228 		close(be->fd);
229 		be->fd = -1;
230 	}
231 }
232 
233 static int
tap_init(struct net_backend * be,const char * devname,nvlist_t * nvl __unused,net_be_rxeof_t cb,void * param)234 tap_init(struct net_backend *be, const char *devname,
235     nvlist_t *nvl __unused, net_be_rxeof_t cb, void *param)
236 {
237 	struct tap_priv *priv = NET_BE_PRIV(be);
238 	char tbuf[80];
239 	int opt = 1;
240 #if defined(INET6) || defined(INET)
241 	struct ifreq ifrq;
242 	int s;
243 #endif
244 #ifndef WITHOUT_CAPSICUM
245 	cap_rights_t rights;
246 #endif
247 
248 	if (cb == NULL) {
249 		WPRINTF(("TAP backend requires non-NULL callback"));
250 		return (-1);
251 	}
252 
253 	strcpy(tbuf, "/dev/");
254 	strlcat(tbuf, devname, sizeof(tbuf));
255 
256 	be->fd = open(tbuf, O_RDWR);
257 	if (be->fd == -1) {
258 		WPRINTF(("open of tap device %s failed", tbuf));
259 		goto error;
260 	}
261 
262 	/*
263 	 * Set non-blocking and register for read
264 	 * notifications with the event loop
265 	 */
266 	if (ioctl(be->fd, FIONBIO, &opt) < 0) {
267 		WPRINTF(("tap device O_NONBLOCK failed"));
268 		goto error;
269 	}
270 
271 #if defined(INET6) || defined(INET)
272 	/*
273 	 * Try to UP the interface rather than relying on
274 	 * net.link.tap.up_on_open.
275 	  */
276 	bzero(&ifrq, sizeof(ifrq));
277 	if (ioctl(be->fd, TAPGIFNAME, &ifrq) < 0) {
278 		WPRINTF(("Could not get interface name"));
279 		goto error;
280 	}
281 
282 	s = -1;
283 	for (size_t i = 0; s == -1 && i < nitems(pf_list); i++)
284 		s = socket(pf_list[i], SOCK_DGRAM, 0);
285 	if (s == -1) {
286 		WPRINTF(("Could open socket"));
287 		goto error;
288 	}
289 
290 	if (ioctl(s, SIOCGIFFLAGS, &ifrq) < 0) {
291 		(void)close(s);
292 		WPRINTF(("Could not get interface flags"));
293 		goto error;
294 	}
295 	ifrq.ifr_flags |= IFF_UP;
296 	if (ioctl(s, SIOCSIFFLAGS, &ifrq) < 0) {
297 		(void)close(s);
298 		WPRINTF(("Could not set interface flags"));
299 		goto error;
300 	}
301 	(void)close(s);
302 #endif
303 
304 #ifndef WITHOUT_CAPSICUM
305 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
306 	if (caph_rights_limit(be->fd, &rights) == -1)
307 		errx(EX_OSERR, "Unable to apply rights for sandbox");
308 #endif
309 
310 	memset(priv->bbuf, 0, sizeof(priv->bbuf));
311 	priv->bbuflen = 0;
312 
313 	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
314 	if (priv->mevp == NULL) {
315 		WPRINTF(("Could not register event"));
316 		goto error;
317 	}
318 
319 	return (0);
320 
321 error:
322 	tap_cleanup(be);
323 	return (-1);
324 }
325 
326 /*
327  * Called to send a buffer chain out to the tap device
328  */
329 static ssize_t
tap_send(struct net_backend * be,const struct iovec * iov,int iovcnt)330 tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
331 {
332 	return (writev(be->fd, iov, iovcnt));
333 }
334 
335 static ssize_t
tap_peek_recvlen(struct net_backend * be)336 tap_peek_recvlen(struct net_backend *be)
337 {
338 	struct tap_priv *priv = NET_BE_PRIV(be);
339 	ssize_t ret;
340 
341 	if (priv->bbuflen > 0) {
342 		/*
343 		 * We already have a packet in the bounce buffer.
344 		 * Just return its length.
345 		 */
346 		return priv->bbuflen;
347 	}
348 
349 	/*
350 	 * Read the next packet (if any) into the bounce buffer, so
351 	 * that we get to know its length and we can return that
352 	 * to the caller.
353 	 */
354 	ret = read(be->fd, priv->bbuf, sizeof(priv->bbuf));
355 	if (ret < 0 && errno == EWOULDBLOCK) {
356 		return (0);
357 	}
358 
359 	if (ret > 0)
360 		priv->bbuflen = ret;
361 
362 	return (ret);
363 }
364 
365 static ssize_t
tap_recv(struct net_backend * be,const struct iovec * iov,int iovcnt)366 tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
367 {
368 	struct tap_priv *priv = NET_BE_PRIV(be);
369 	ssize_t ret;
370 
371 	if (priv->bbuflen > 0) {
372 		/*
373 		 * A packet is available in the bounce buffer, so
374 		 * we read it from there.
375 		 */
376 		ret = buf_to_iov(priv->bbuf, priv->bbuflen,
377 		    iov, iovcnt, 0);
378 
379 		/* Mark the bounce buffer as empty. */
380 		priv->bbuflen = 0;
381 
382 		return (ret);
383 	}
384 
385 	ret = readv(be->fd, iov, iovcnt);
386 	if (ret < 0 && errno == EWOULDBLOCK) {
387 		return (0);
388 	}
389 
390 	return (ret);
391 }
392 
393 static void
tap_recv_enable(struct net_backend * be)394 tap_recv_enable(struct net_backend *be)
395 {
396 	struct tap_priv *priv = NET_BE_PRIV(be);
397 
398 	mevent_enable(priv->mevp);
399 }
400 
401 static void
tap_recv_disable(struct net_backend * be)402 tap_recv_disable(struct net_backend *be)
403 {
404 	struct tap_priv *priv = NET_BE_PRIV(be);
405 
406 	mevent_disable(priv->mevp);
407 }
408 
409 static uint64_t
tap_get_cap(struct net_backend * be __unused)410 tap_get_cap(struct net_backend *be __unused)
411 {
412 
413 	return (0); /* no capabilities for now */
414 }
415 
416 static int
tap_set_cap(struct net_backend * be __unused,uint64_t features,unsigned vnet_hdr_len)417 tap_set_cap(struct net_backend *be __unused, uint64_t features,
418     unsigned vnet_hdr_len)
419 {
420 
421 	return ((features || vnet_hdr_len) ? -1 : 0);
422 }
423 
424 static struct net_backend tap_backend = {
425 	.prefix = "tap",
426 	.priv_size = sizeof(struct tap_priv),
427 	.init = tap_init,
428 	.cleanup = tap_cleanup,
429 	.send = tap_send,
430 	.peek_recvlen = tap_peek_recvlen,
431 	.recv = tap_recv,
432 	.recv_enable = tap_recv_enable,
433 	.recv_disable = tap_recv_disable,
434 	.get_cap = tap_get_cap,
435 	.set_cap = tap_set_cap,
436 };
437 
438 /* A clone of the tap backend, with a different prefix. */
439 static struct net_backend vmnet_backend = {
440 	.prefix = "vmnet",
441 	.priv_size = sizeof(struct tap_priv),
442 	.init = tap_init,
443 	.cleanup = tap_cleanup,
444 	.send = tap_send,
445 	.peek_recvlen = tap_peek_recvlen,
446 	.recv = tap_recv,
447 	.recv_enable = tap_recv_enable,
448 	.recv_disable = tap_recv_disable,
449 	.get_cap = tap_get_cap,
450 	.set_cap = tap_set_cap,
451 };
452 
453 DATA_SET(net_backend_set, tap_backend);
454 DATA_SET(net_backend_set, vmnet_backend);
455 
456 #ifdef NETGRAPH
457 
458 /*
459  * Netgraph backend
460  */
461 
462 #define NG_SBUF_MAX_SIZE (4 * 1024 * 1024)
463 
464 static int
ng_init(struct net_backend * be,const char * devname __unused,nvlist_t * nvl,net_be_rxeof_t cb,void * param)465 ng_init(struct net_backend *be, const char *devname __unused,
466 	 nvlist_t *nvl, net_be_rxeof_t cb, void *param)
467 {
468 	struct tap_priv *p = NET_BE_PRIV(be);
469 	struct ngm_connect ngc;
470 	const char *value, *nodename;
471 	int sbsz;
472 	int ctrl_sock;
473 	int flags;
474 	unsigned long maxsbsz;
475 	size_t msbsz;
476 #ifndef WITHOUT_CAPSICUM
477 	cap_rights_t rights;
478 #endif
479 
480 	if (cb == NULL) {
481 		WPRINTF(("Netgraph backend requires non-NULL callback"));
482 		return (-1);
483 	}
484 
485 	be->fd = -1;
486 
487 	memset(&ngc, 0, sizeof(ngc));
488 
489 	value = get_config_value_node(nvl, "path");
490 	if (value == NULL) {
491 		WPRINTF(("path must be provided"));
492 		return (-1);
493 	}
494 	strncpy(ngc.path, value, NG_PATHSIZ - 1);
495 
496 	value = get_config_value_node(nvl, "hook");
497 	if (value == NULL)
498 		value = "vmlink";
499 	strncpy(ngc.ourhook, value, NG_HOOKSIZ - 1);
500 
501 	value = get_config_value_node(nvl, "peerhook");
502 	if (value == NULL) {
503 		WPRINTF(("peer hook must be provided"));
504 		return (-1);
505 	}
506 	strncpy(ngc.peerhook, value, NG_HOOKSIZ - 1);
507 
508 	nodename = get_config_value_node(nvl, "socket");
509 	if (NgMkSockNode(nodename,
510 		&ctrl_sock, &be->fd) < 0) {
511 		WPRINTF(("can't get Netgraph sockets"));
512 		return (-1);
513 	}
514 
515 	if (NgSendMsg(ctrl_sock, ".",
516 		NGM_GENERIC_COOKIE,
517 		NGM_CONNECT, &ngc, sizeof(ngc)) < 0) {
518 		WPRINTF(("can't connect to node"));
519 		close(ctrl_sock);
520 		goto error;
521 	}
522 
523 	close(ctrl_sock);
524 
525 	flags = fcntl(be->fd, F_GETFL);
526 
527 	if (flags < 0) {
528 		WPRINTF(("can't get socket flags"));
529 		goto error;
530 	}
531 
532 	if (fcntl(be->fd, F_SETFL, flags | O_NONBLOCK) < 0) {
533 		WPRINTF(("can't set O_NONBLOCK flag"));
534 		goto error;
535 	}
536 
537 	/*
538 	 * The default ng_socket(4) buffer's size is too low.
539 	 * Calculate the minimum value between NG_SBUF_MAX_SIZE
540 	 * and kern.ipc.maxsockbuf.
541 	 */
542 	msbsz = sizeof(maxsbsz);
543 	if (sysctlbyname("kern.ipc.maxsockbuf", &maxsbsz, &msbsz,
544 		NULL, 0) < 0) {
545 		WPRINTF(("can't get 'kern.ipc.maxsockbuf' value"));
546 		goto error;
547 	}
548 
549 	/*
550 	 * We can't set the socket buffer size to kern.ipc.maxsockbuf value,
551 	 * as it takes into account the mbuf(9) overhead.
552 	 */
553 	maxsbsz = maxsbsz * MCLBYTES / (MSIZE + MCLBYTES);
554 
555 	sbsz = MIN(NG_SBUF_MAX_SIZE, maxsbsz);
556 
557 	if (setsockopt(be->fd, SOL_SOCKET, SO_SNDBUF, &sbsz,
558 		sizeof(sbsz)) < 0) {
559 		WPRINTF(("can't set TX buffer size"));
560 		goto error;
561 	}
562 
563 	if (setsockopt(be->fd, SOL_SOCKET, SO_RCVBUF, &sbsz,
564 		sizeof(sbsz)) < 0) {
565 		WPRINTF(("can't set RX buffer size"));
566 		goto error;
567 	}
568 
569 #ifndef WITHOUT_CAPSICUM
570 	cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
571 	if (caph_rights_limit(be->fd, &rights) == -1)
572 		errx(EX_OSERR, "Unable to apply rights for sandbox");
573 #endif
574 
575 	memset(p->bbuf, 0, sizeof(p->bbuf));
576 	p->bbuflen = 0;
577 
578 	p->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
579 	if (p->mevp == NULL) {
580 		WPRINTF(("Could not register event"));
581 		goto error;
582 	}
583 
584 	return (0);
585 
586 error:
587 	tap_cleanup(be);
588 	return (-1);
589 }
590 
591 static struct net_backend ng_backend = {
592 	.prefix = "netgraph",
593 	.priv_size = sizeof(struct tap_priv),
594 	.init = ng_init,
595 	.cleanup = tap_cleanup,
596 	.send = tap_send,
597 	.peek_recvlen = tap_peek_recvlen,
598 	.recv = tap_recv,
599 	.recv_enable = tap_recv_enable,
600 	.recv_disable = tap_recv_disable,
601 	.get_cap = tap_get_cap,
602 	.set_cap = tap_set_cap,
603 };
604 
605 DATA_SET(net_backend_set, ng_backend);
606 
607 #endif /* NETGRAPH */
608 
609 /*
610  * The netmap backend
611  */
612 
613 /* The virtio-net features supported by netmap. */
614 #define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
615 		VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
616 		VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
617 		VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
618 
619 struct netmap_priv {
620 	char ifname[IFNAMSIZ];
621 	struct nm_desc *nmd;
622 	uint16_t memid;
623 	struct netmap_ring *rx;
624 	struct netmap_ring *tx;
625 	struct mevent *mevp;
626 	net_be_rxeof_t cb;
627 	void *cb_param;
628 };
629 
630 static void
nmreq_init(struct nmreq * req,char * ifname)631 nmreq_init(struct nmreq *req, char *ifname)
632 {
633 
634 	memset(req, 0, sizeof(*req));
635 	strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
636 	req->nr_version = NETMAP_API;
637 }
638 
639 static int
netmap_set_vnet_hdr_len(struct net_backend * be,int vnet_hdr_len)640 netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
641 {
642 	int err;
643 	struct nmreq req;
644 	struct netmap_priv *priv = NET_BE_PRIV(be);
645 
646 	nmreq_init(&req, priv->ifname);
647 	req.nr_cmd = NETMAP_BDG_VNET_HDR;
648 	req.nr_arg1 = vnet_hdr_len;
649 	err = ioctl(be->fd, NIOCREGIF, &req);
650 	if (err) {
651 		WPRINTF(("Unable to set vnet header length %d",
652 				vnet_hdr_len));
653 		return (err);
654 	}
655 
656 	be->be_vnet_hdr_len = vnet_hdr_len;
657 
658 	return (0);
659 }
660 
661 static int
netmap_has_vnet_hdr_len(struct net_backend * be,unsigned vnet_hdr_len)662 netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
663 {
664 	unsigned prev_hdr_len = be->be_vnet_hdr_len;
665 	int ret;
666 
667 	if (vnet_hdr_len == prev_hdr_len) {
668 		return (1);
669 	}
670 
671 	ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
672 	if (ret) {
673 		return (0);
674 	}
675 
676 	netmap_set_vnet_hdr_len(be, prev_hdr_len);
677 
678 	return (1);
679 }
680 
681 static uint64_t
netmap_get_cap(struct net_backend * be)682 netmap_get_cap(struct net_backend *be)
683 {
684 
685 	return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
686 	    NETMAP_FEATURES : 0);
687 }
688 
689 static int
netmap_set_cap(struct net_backend * be,uint64_t features __unused,unsigned vnet_hdr_len)690 netmap_set_cap(struct net_backend *be, uint64_t features __unused,
691     unsigned vnet_hdr_len)
692 {
693 
694 	return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
695 }
696 
697 static int
netmap_init(struct net_backend * be,const char * devname,nvlist_t * nvl __unused,net_be_rxeof_t cb,void * param)698 netmap_init(struct net_backend *be, const char *devname,
699     nvlist_t *nvl __unused, net_be_rxeof_t cb, void *param)
700 {
701 	struct netmap_priv *priv = NET_BE_PRIV(be);
702 
703 	strlcpy(priv->ifname, devname, sizeof(priv->ifname));
704 	priv->ifname[sizeof(priv->ifname) - 1] = '\0';
705 
706 	priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
707 	if (priv->nmd == NULL) {
708 		WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)",
709 			devname, strerror(errno)));
710 		return (-1);
711 	}
712 
713 	priv->memid = priv->nmd->req.nr_arg2;
714 	priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
715 	priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
716 	priv->cb = cb;
717 	priv->cb_param = param;
718 	be->fd = priv->nmd->fd;
719 
720 	priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
721 	if (priv->mevp == NULL) {
722 		WPRINTF(("Could not register event"));
723 		return (-1);
724 	}
725 
726 	return (0);
727 }
728 
729 static void
netmap_cleanup(struct net_backend * be)730 netmap_cleanup(struct net_backend *be)
731 {
732 	struct netmap_priv *priv = NET_BE_PRIV(be);
733 
734 	if (priv->mevp) {
735 		mevent_delete(priv->mevp);
736 	}
737 	if (priv->nmd) {
738 		nm_close(priv->nmd);
739 	}
740 	be->fd = -1;
741 }
742 
743 static ssize_t
netmap_send(struct net_backend * be,const struct iovec * iov,int iovcnt)744 netmap_send(struct net_backend *be, const struct iovec *iov,
745 	    int iovcnt)
746 {
747 	struct netmap_priv *priv = NET_BE_PRIV(be);
748 	struct netmap_ring *ring;
749 	ssize_t totlen = 0;
750 	int nm_buf_size;
751 	int nm_buf_len;
752 	uint32_t head;
753 	uint8_t *nm_buf;
754 	int j;
755 
756 	ring = priv->tx;
757 	head = ring->head;
758 	if (head == ring->tail) {
759 		WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt)));
760 		goto txsync;
761 	}
762 	nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
763 	nm_buf_size = ring->nr_buf_size;
764 	nm_buf_len = 0;
765 
766 	for (j = 0; j < iovcnt; j++) {
767 		uint8_t *iov_frag_buf = iov[j].iov_base;
768 		int iov_frag_size = iov[j].iov_len;
769 
770 		totlen += iov_frag_size;
771 
772 		/*
773 		 * Split each iovec fragment over more netmap slots, if
774 		 * necessary.
775 		 */
776 		for (;;) {
777 			int copylen;
778 
779 			copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
780 			memcpy(nm_buf, iov_frag_buf, copylen);
781 
782 			iov_frag_buf += copylen;
783 			iov_frag_size -= copylen;
784 			nm_buf += copylen;
785 			nm_buf_size -= copylen;
786 			nm_buf_len += copylen;
787 
788 			if (iov_frag_size == 0) {
789 				break;
790 			}
791 
792 			ring->slot[head].len = nm_buf_len;
793 			ring->slot[head].flags = NS_MOREFRAG;
794 			head = nm_ring_next(ring, head);
795 			if (head == ring->tail) {
796 				/*
797 				 * We ran out of netmap slots while
798 				 * splitting the iovec fragments.
799 				 */
800 				WPRINTF(("No space, drop %zu bytes",
801 				   count_iov(iov, iovcnt)));
802 				goto txsync;
803 			}
804 			nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
805 			nm_buf_size = ring->nr_buf_size;
806 			nm_buf_len = 0;
807 		}
808 	}
809 
810 	/* Complete the last slot, which must not have NS_MOREFRAG set. */
811 	ring->slot[head].len = nm_buf_len;
812 	ring->slot[head].flags = 0;
813 	head = nm_ring_next(ring, head);
814 
815 	/* Now update ring->head and ring->cur. */
816 	ring->head = ring->cur = head;
817 txsync:
818 	ioctl(be->fd, NIOCTXSYNC, NULL);
819 
820 	return (totlen);
821 }
822 
823 static ssize_t
netmap_peek_recvlen(struct net_backend * be)824 netmap_peek_recvlen(struct net_backend *be)
825 {
826 	struct netmap_priv *priv = NET_BE_PRIV(be);
827 	struct netmap_ring *ring = priv->rx;
828 	uint32_t head = ring->head;
829 	ssize_t totlen = 0;
830 
831 	while (head != ring->tail) {
832 		struct netmap_slot *slot = ring->slot + head;
833 
834 		totlen += slot->len;
835 		if ((slot->flags & NS_MOREFRAG) == 0)
836 			break;
837 		head = nm_ring_next(ring, head);
838 	}
839 
840 	return (totlen);
841 }
842 
843 static ssize_t
netmap_recv(struct net_backend * be,const struct iovec * iov,int iovcnt)844 netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
845 {
846 	struct netmap_priv *priv = NET_BE_PRIV(be);
847 	struct netmap_slot *slot = NULL;
848 	struct netmap_ring *ring;
849 	uint8_t *iov_frag_buf;
850 	int iov_frag_size;
851 	ssize_t totlen = 0;
852 	uint32_t head;
853 
854 	assert(iovcnt);
855 
856 	ring = priv->rx;
857 	head = ring->head;
858 	iov_frag_buf = iov->iov_base;
859 	iov_frag_size = iov->iov_len;
860 
861 	do {
862 		uint8_t *nm_buf;
863 		int nm_buf_len;
864 
865 		if (head == ring->tail) {
866 			return (0);
867 		}
868 
869 		slot = ring->slot + head;
870 		nm_buf = NETMAP_BUF(ring, slot->buf_idx);
871 		nm_buf_len = slot->len;
872 
873 		for (;;) {
874 			int copylen = nm_buf_len < iov_frag_size ?
875 			    nm_buf_len : iov_frag_size;
876 
877 			memcpy(iov_frag_buf, nm_buf, copylen);
878 			nm_buf += copylen;
879 			nm_buf_len -= copylen;
880 			iov_frag_buf += copylen;
881 			iov_frag_size -= copylen;
882 			totlen += copylen;
883 
884 			if (nm_buf_len == 0) {
885 				break;
886 			}
887 
888 			iov++;
889 			iovcnt--;
890 			if (iovcnt == 0) {
891 				/* No space to receive. */
892 				WPRINTF(("Short iov, drop %zd bytes",
893 				    totlen));
894 				return (-ENOSPC);
895 			}
896 			iov_frag_buf = iov->iov_base;
897 			iov_frag_size = iov->iov_len;
898 		}
899 
900 		head = nm_ring_next(ring, head);
901 
902 	} while (slot->flags & NS_MOREFRAG);
903 
904 	/* Release slots to netmap. */
905 	ring->head = ring->cur = head;
906 
907 	return (totlen);
908 }
909 
910 static void
netmap_recv_enable(struct net_backend * be)911 netmap_recv_enable(struct net_backend *be)
912 {
913 	struct netmap_priv *priv = NET_BE_PRIV(be);
914 
915 	mevent_enable(priv->mevp);
916 }
917 
918 static void
netmap_recv_disable(struct net_backend * be)919 netmap_recv_disable(struct net_backend *be)
920 {
921 	struct netmap_priv *priv = NET_BE_PRIV(be);
922 
923 	mevent_disable(priv->mevp);
924 }
925 
926 static struct net_backend netmap_backend = {
927 	.prefix = "netmap",
928 	.priv_size = sizeof(struct netmap_priv),
929 	.init = netmap_init,
930 	.cleanup = netmap_cleanup,
931 	.send = netmap_send,
932 	.peek_recvlen = netmap_peek_recvlen,
933 	.recv = netmap_recv,
934 	.recv_enable = netmap_recv_enable,
935 	.recv_disable = netmap_recv_disable,
936 	.get_cap = netmap_get_cap,
937 	.set_cap = netmap_set_cap,
938 };
939 
940 /* A clone of the netmap backend, with a different prefix. */
941 static struct net_backend vale_backend = {
942 	.prefix = "vale",
943 	.priv_size = sizeof(struct netmap_priv),
944 	.init = netmap_init,
945 	.cleanup = netmap_cleanup,
946 	.send = netmap_send,
947 	.peek_recvlen = netmap_peek_recvlen,
948 	.recv = netmap_recv,
949 	.recv_enable = netmap_recv_enable,
950 	.recv_disable = netmap_recv_disable,
951 	.get_cap = netmap_get_cap,
952 	.set_cap = netmap_set_cap,
953 };
954 
955 DATA_SET(net_backend_set, netmap_backend);
956 DATA_SET(net_backend_set, vale_backend);
957 
958 #else /* __FreeBSD__ */
959 
960 /*
961  * The illumos dlpi backend
962  */
963 
964 /*
965  * The size of the bounce buffer used to implement the peek callback.
966  * This value should be big enough to accommodate the largest of all possible
967  * frontend packet lengths. The value here matches the definition of
968  * VTNET_MAX_PKT_LEN in pci_virtio_net.c
969  */
970 #define	DLPI_BBUF_SIZE (65536 + 64)
971 
972 typedef struct be_dlpi_priv {
973 	dlpi_handle_t bdp_dhp;
974 	struct mevent *bdp_mevp;
975 	/*
976 	 * A bounce buffer that allows us to implement the peek_recvlen
977 	 * callback. Each structure is only used by a single thread so
978 	 * one is enough.
979 	 */
980 	uint8_t bdp_bbuf[DLPI_BBUF_SIZE];
981 	ssize_t bdp_bbuflen;
982 } be_dlpi_priv_t;
983 
984 static void
be_dlpi_cleanup(net_backend_t * be)985 be_dlpi_cleanup(net_backend_t *be)
986 {
987 	be_dlpi_priv_t *priv = NET_BE_PRIV(be);
988 
989 	if (priv->bdp_dhp != NULL)
990 		dlpi_close(priv->bdp_dhp);
991 	priv->bdp_dhp = NULL;
992 
993 	if (priv->bdp_mevp != NULL)
994 		mevent_delete(priv->bdp_mevp);
995 	priv->bdp_mevp = NULL;
996 
997 	priv->bdp_bbuflen = 0;
998 	be->fd = -1;
999 }
1000 
1001 static void
be_dlpi_err(int ret,const char * dev,char * msg)1002 be_dlpi_err(int ret, const char *dev, char *msg)
1003 {
1004 	WPRINTF(("%s: %s (%s)", dev, msg, dlpi_strerror(ret)));
1005 }
1006 
1007 static int
be_dlpi_init(net_backend_t * be,const char * devname __unused,nvlist_t * nvl,net_be_rxeof_t cb,void * param)1008 be_dlpi_init(net_backend_t *be, const char *devname __unused,
1009      nvlist_t *nvl, net_be_rxeof_t cb, void *param)
1010 {
1011 	be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1012 	const char *vnic;
1013 	int ret;
1014 
1015 	if (cb == NULL) {
1016 		WPRINTF(("dlpi backend requires non-NULL callback"));
1017 		return (-1);
1018 	}
1019 
1020 	vnic = get_config_value_node(nvl, "vnic");
1021 	if (vnic == NULL) {
1022 		WPRINTF(("dlpi backend requires a VNIC"));
1023 		return (-1);
1024 	}
1025 
1026 	priv->bdp_bbuflen = 0;
1027 
1028 	ret = dlpi_open(vnic, &priv->bdp_dhp, DLPI_RAW);
1029 
1030 	if (ret != DLPI_SUCCESS) {
1031 		be_dlpi_err(ret, vnic, "open failed");
1032 		goto error;
1033 	}
1034 
1035 	if ((ret = dlpi_bind(priv->bdp_dhp, DLPI_ANY_SAP, NULL)) !=
1036 	    DLPI_SUCCESS) {
1037 		be_dlpi_err(ret, vnic, "bind failed");
1038 		goto error;
1039 	}
1040 
1041 	if (get_config_bool_node_default(nvl, "promiscrxonly", true)) {
1042 		if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_RX_ONLY)) !=
1043 		    DLPI_SUCCESS) {
1044 			be_dlpi_err(ret, vnic,
1045 			    "enable promiscuous mode(rxonly) failed");
1046 			goto error;
1047 		}
1048 	}
1049 	if (get_config_bool_node_default(nvl, "promiscphys", false)) {
1050 		if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_PHYS)) !=
1051 		    DLPI_SUCCESS) {
1052 			be_dlpi_err(ret, vnic,
1053 			    "enable promiscuous mode(physical) failed");
1054 			goto error;
1055 		}
1056 	}
1057 	if (get_config_bool_node_default(nvl, "promiscsap", true)) {
1058 		if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_SAP)) !=
1059 		    DLPI_SUCCESS) {
1060 			be_dlpi_err(ret, vnic,
1061 			    "enable promiscuous mode(SAP) failed");
1062 			goto error;
1063 		}
1064 	}
1065 	if (get_config_bool_node_default(nvl, "promiscmulti", true)) {
1066 		if ((ret = dlpi_promiscon(priv->bdp_dhp, DL_PROMISC_MULTI)) !=
1067 		    DLPI_SUCCESS) {
1068 			be_dlpi_err(ret, vnic,
1069 			    "enable promiscuous mode(muticast) failed");
1070 			goto error;
1071 		}
1072 	}
1073 
1074         be->fd = dlpi_fd(priv->bdp_dhp);
1075 
1076         if (fcntl(be->fd, F_SETFL, O_NONBLOCK) < 0) {
1077                 WPRINTF(("%s: enable O_NONBLOCK failed", vnic));
1078 		goto error;
1079         }
1080 
1081 	priv->bdp_mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
1082 	if (priv->bdp_mevp == NULL) {
1083 		WPRINTF(("Could not register event"));
1084 		goto error;
1085 	}
1086 
1087 	return (0);
1088 
1089 error:
1090 	be_dlpi_cleanup(be);
1091 	return (-1);
1092 }
1093 
1094 /*
1095  * Called to send a buffer chain out to the dlpi device
1096  */
1097 static ssize_t
be_dlpi_send(net_backend_t * be,const struct iovec * iov,int iovcnt)1098 be_dlpi_send(net_backend_t *be, const struct iovec *iov, int iovcnt)
1099 {
1100 	be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1101 	ssize_t len = 0;
1102 	int ret;
1103 
1104 	if (iovcnt == 1) {
1105 		len = iov[0].iov_len;
1106 		ret = dlpi_send(priv->bdp_dhp, NULL, 0, iov[0].iov_base, len,
1107 		    NULL);
1108 	} else {
1109 		void *buf = NULL;
1110 
1111 		len = iov_to_buf(iov, iovcnt, &buf);
1112 
1113 		if (len <= 0 || buf == NULL)
1114 			return (-1);
1115 
1116 		ret = dlpi_send(priv->bdp_dhp, NULL, 0, buf, len, NULL);
1117 		free(buf);
1118 	}
1119 
1120 	if (ret != DLPI_SUCCESS)
1121 		return (-1);
1122 
1123 	return (len);
1124 }
1125 
1126 static ssize_t
be_dlpi_peek_recvlen(net_backend_t * be)1127 be_dlpi_peek_recvlen(net_backend_t *be)
1128 {
1129 	be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1130 	dlpi_recvinfo_t recv;
1131 	size_t len;
1132 	int ret;
1133 
1134 	/*
1135 	 * We already have a packet in the bounce buffer.
1136 	 * Just return its length.
1137 	 */
1138 	if (priv->bdp_bbuflen > 0)
1139 		return (priv->bdp_bbuflen);
1140 
1141 	/*
1142 	 * Read the next packet (if any) into the bounce buffer, so
1143 	 * that we get to know its length and we can return that
1144 	 * to the caller.
1145 	 */
1146 	len = sizeof (priv->bdp_bbuf);
1147 	ret = dlpi_recv(priv->bdp_dhp, NULL, NULL, priv->bdp_bbuf, &len,
1148 	    0, &recv);
1149 	if (ret == DL_SYSERR) {
1150 		if (errno == EWOULDBLOCK)
1151 			return (0);
1152 		return (-1);
1153 	} else if (ret == DLPI_ETIMEDOUT) {
1154 		return (0);
1155 	} else if (ret != DLPI_SUCCESS) {
1156 		return (-1);
1157 	}
1158 
1159 	if (recv.dri_totmsglen > sizeof (priv->bdp_bbuf)) {
1160 		EPRINTLN("DLPI bounce buffer was too small! - needed %x bytes",
1161 		    recv.dri_totmsglen);
1162 	}
1163 
1164 	priv->bdp_bbuflen = len;
1165 
1166 	return (len);
1167 }
1168 
1169 static ssize_t
be_dlpi_recv(net_backend_t * be,const struct iovec * iov,int iovcnt)1170 be_dlpi_recv(net_backend_t *be, const struct iovec *iov, int iovcnt)
1171 {
1172 	be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1173 	size_t len;
1174 	int ret;
1175 
1176 	if (priv->bdp_bbuflen > 0) {
1177 		/*
1178 		 * A packet is available in the bounce buffer, so
1179 		 * we read it from there.
1180 		 */
1181 		len = buf_to_iov(priv->bdp_bbuf, priv->bdp_bbuflen,
1182 		    iov, iovcnt, 0);
1183 
1184 		/* Mark the bounce buffer as empty. */
1185 		priv->bdp_bbuflen = 0;
1186 
1187 		return (len);
1188 	}
1189 
1190 	len = iov[0].iov_len;
1191 	ret = dlpi_recv(priv->bdp_dhp, NULL, NULL,
1192 	    (uint8_t *)iov[0].iov_base, &len, 0, NULL);
1193 	if (ret == DL_SYSERR) {
1194 		if (errno == EWOULDBLOCK)
1195 			return (0);
1196 		return (-1);
1197 	} else if (ret == DLPI_ETIMEDOUT) {
1198 		return (0);
1199 	} else if (ret != DLPI_SUCCESS) {
1200 		return (-1);
1201 	}
1202 
1203 	return (len);
1204 }
1205 
1206 static void
be_dlpi_recv_enable(net_backend_t * be)1207 be_dlpi_recv_enable(net_backend_t *be)
1208 {
1209 	be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1210 
1211 	mevent_enable(priv->bdp_mevp);
1212 }
1213 
1214 static void
be_dlpi_recv_disable(net_backend_t * be)1215 be_dlpi_recv_disable(net_backend_t *be)
1216 {
1217 	be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1218 
1219 	mevent_disable(priv->bdp_mevp);
1220 }
1221 
1222 static uint64_t
be_dlpi_get_cap(net_backend_t * be)1223 be_dlpi_get_cap(net_backend_t *be)
1224 {
1225 	return (0); /* no capabilities for now */
1226 }
1227 
1228 static int
be_dlpi_set_cap(net_backend_t * be,uint64_t features,unsigned vnet_hdr_len)1229 be_dlpi_set_cap(net_backend_t *be, uint64_t features,
1230     unsigned vnet_hdr_len)
1231 {
1232 	return ((features || vnet_hdr_len) ? -1 : 0);
1233 }
1234 
1235 static int
be_dlpi_get_mac(net_backend_t * be,void * buf,size_t * buflen)1236 be_dlpi_get_mac(net_backend_t *be, void *buf, size_t *buflen)
1237 {
1238 	be_dlpi_priv_t *priv = NET_BE_PRIV(be);
1239 	uchar_t physaddr[DLPI_PHYSADDR_MAX];
1240 	size_t physaddrlen = DLPI_PHYSADDR_MAX;
1241 	int ret;
1242 
1243 	if ((ret = dlpi_get_physaddr(priv->bdp_dhp, DL_CURR_PHYS_ADDR,
1244 	    physaddr, &physaddrlen)) != DLPI_SUCCESS) {
1245 		be_dlpi_err(ret, dlpi_linkname(priv->bdp_dhp),
1246 		    "read MAC address failed");
1247 		return (EINVAL);
1248 	}
1249 
1250 	if (physaddrlen != ETHERADDRL) {
1251 		WPRINTF(("%s: bad MAC address len %d",
1252 		    dlpi_linkname(priv->bdp_dhp), physaddrlen));
1253 		return (EINVAL);
1254 	}
1255 
1256 	if (physaddrlen > *buflen) {
1257 		WPRINTF(("%s: MAC address too long (%d bytes required)",
1258 		    dlpi_linkname(priv->bdp_dhp), physaddrlen));
1259 		return (ENOMEM);
1260 	}
1261 
1262 	*buflen = physaddrlen;
1263 	memcpy(buf, physaddr, *buflen);
1264 
1265 	return (0);
1266 }
1267 
1268 static struct net_backend dlpi_backend = {
1269 	.prefix = "dlpi",
1270 	.priv_size = sizeof(struct be_dlpi_priv),
1271 	.init = be_dlpi_init,
1272 	.cleanup = be_dlpi_cleanup,
1273 	.send = be_dlpi_send,
1274 	.peek_recvlen = be_dlpi_peek_recvlen,
1275 	.recv = be_dlpi_recv,
1276 	.recv_enable = be_dlpi_recv_enable,
1277 	.recv_disable = be_dlpi_recv_disable,
1278 	.get_cap = be_dlpi_get_cap,
1279 	.set_cap = be_dlpi_set_cap,
1280 	.get_mac = be_dlpi_get_mac,
1281 };
1282 
1283 DATA_SET(net_backend_set, dlpi_backend);
1284 
1285 #endif /* __FreeBSD__ */
1286 
1287 #ifdef __FreeBSD__
1288 int
netbe_legacy_config(nvlist_t * nvl,const char * opts)1289 netbe_legacy_config(nvlist_t *nvl, const char *opts)
1290 {
1291 	char *backend, *cp;
1292 
1293 	if (opts == NULL)
1294 		return (0);
1295 
1296 	cp = strchr(opts, ',');
1297 	if (cp == NULL) {
1298 		set_config_value_node(nvl, "backend", opts);
1299 		return (0);
1300 	}
1301 	backend = strndup(opts, cp - opts);
1302 	set_config_value_node(nvl, "backend", backend);
1303 	free(backend);
1304 	return (pci_parse_legacy_config(nvl, cp + 1));
1305 }
1306 #else
1307 int
netbe_legacy_config(nvlist_t * nvl,const char * opts)1308 netbe_legacy_config(nvlist_t *nvl, const char *opts)
1309 {
1310 	char *config, *name, *tofree, *value;
1311 
1312 	if (opts == NULL)
1313 		return (0);
1314 
1315 	/* Default to the 'dlpi' backend - can still be overridden by opts */
1316 	set_config_value_node(nvl, "backend", "dlpi");
1317 	set_config_value_node(nvl, "type", "dlpi");
1318 
1319 	config = tofree = strdup(opts);
1320 	if (config == NULL)
1321 		err(4, "netbe_legacy_config strdup()");
1322 	while ((name = strsep(&config, ",")) != NULL) {
1323 		value = strchr(name, '=');
1324 		if (value != NULL) {
1325 			*value++ = '\0';
1326 			set_config_value_node(nvl, name, value);
1327 		} else {
1328 			set_config_value_node(nvl, "vnic", name);
1329 		}
1330 	}
1331 	free(tofree);
1332 
1333 	return (0);
1334 }
1335 #endif
1336 
1337 /*
1338  * Initialize a backend and attach to the frontend.
1339  * This is called during frontend initialization.
1340  *  @ret is a pointer to the backend to be initialized
1341  *  @devname is the backend-name as supplied on the command line,
1342  * 	e.g. -s 2:0,frontend-name,backend-name[,other-args]
1343  *  @cb is the receive callback supplied by the frontend,
1344  *	and it is invoked in the event loop when a receive
1345  *	event is generated in the hypervisor,
1346  *  @param is a pointer to the frontend, and normally used as
1347  *	the argument for the callback.
1348  */
1349 int
netbe_init(struct net_backend ** ret,nvlist_t * nvl,net_be_rxeof_t cb,void * param)1350 netbe_init(struct net_backend **ret, nvlist_t *nvl, net_be_rxeof_t cb,
1351     void *param)
1352 {
1353 	struct net_backend **pbe, *nbe, *tbe = NULL;
1354 	const char *value, *type;
1355 	char *devname;
1356 	int err;
1357 
1358 	value = get_config_value_node(nvl, "backend");
1359 	if (value == NULL) {
1360 		return (-1);
1361 	}
1362 	devname = strdup(value);
1363 
1364 	/*
1365 	 * Use the type given by configuration if exists; otherwise
1366 	 * use the prefix of the backend as the type.
1367 	 */
1368 	type = get_config_value_node(nvl, "type");
1369 	if (type == NULL)
1370 		type = devname;
1371 
1372 	/*
1373 	 * Find the network backend that matches the user-provided
1374 	 * device name. net_backend_set is built using a linker set.
1375 	 */
1376 	SET_FOREACH(pbe, net_backend_set) {
1377 		if (strncmp(type, (*pbe)->prefix,
1378 		    strlen((*pbe)->prefix)) == 0) {
1379 			tbe = *pbe;
1380 			assert(tbe->init != NULL);
1381 			assert(tbe->cleanup != NULL);
1382 			assert(tbe->send != NULL);
1383 			assert(tbe->recv != NULL);
1384 			assert(tbe->get_cap != NULL);
1385 			assert(tbe->set_cap != NULL);
1386 			break;
1387 		}
1388 	}
1389 
1390 	*ret = NULL;
1391 	if (tbe == NULL) {
1392 		free(devname);
1393 		return (EINVAL);
1394 	}
1395 
1396 	nbe = calloc(1, NET_BE_SIZE(tbe));
1397 	*nbe = *tbe;	/* copy the template */
1398 	nbe->fd = -1;
1399 	nbe->sc = param;
1400 	nbe->be_vnet_hdr_len = 0;
1401 	nbe->fe_vnet_hdr_len = 0;
1402 
1403 	/* Initialize the backend. */
1404 	err = nbe->init(nbe, devname, nvl, cb, param);
1405 	if (err) {
1406 		free(devname);
1407 		free(nbe);
1408 		return (err);
1409 	}
1410 
1411 	*ret = nbe;
1412 	free(devname);
1413 
1414 	return (0);
1415 }
1416 
1417 void
netbe_cleanup(struct net_backend * be)1418 netbe_cleanup(struct net_backend *be)
1419 {
1420 
1421 	if (be != NULL) {
1422 		be->cleanup(be);
1423 		free(be);
1424 	}
1425 }
1426 
1427 uint64_t
netbe_get_cap(struct net_backend * be)1428 netbe_get_cap(struct net_backend *be)
1429 {
1430 
1431 	assert(be != NULL);
1432 	return (be->get_cap(be));
1433 }
1434 
1435 int
netbe_set_cap(struct net_backend * be,uint64_t features,unsigned vnet_hdr_len)1436 netbe_set_cap(struct net_backend *be, uint64_t features,
1437 	      unsigned vnet_hdr_len)
1438 {
1439 	int ret;
1440 
1441 	assert(be != NULL);
1442 
1443 	/* There are only three valid lengths, i.e., 0, 10 and 12. */
1444 	if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
1445 		&& vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
1446 		return (-1);
1447 
1448 	be->fe_vnet_hdr_len = vnet_hdr_len;
1449 
1450 	ret = be->set_cap(be, features, vnet_hdr_len);
1451 	assert(be->be_vnet_hdr_len == 0 ||
1452 	       be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
1453 
1454 	return (ret);
1455 }
1456 
1457 ssize_t
netbe_send(struct net_backend * be,const struct iovec * iov,int iovcnt)1458 netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
1459 {
1460 
1461 	return (be->send(be, iov, iovcnt));
1462 }
1463 
1464 ssize_t
netbe_peek_recvlen(struct net_backend * be)1465 netbe_peek_recvlen(struct net_backend *be)
1466 {
1467 
1468 	return (be->peek_recvlen(be));
1469 }
1470 
1471 /*
1472  * Try to read a packet from the backend, without blocking.
1473  * If no packets are available, return 0. In case of success, return
1474  * the length of the packet just read. Return -1 in case of errors.
1475  */
1476 ssize_t
netbe_recv(struct net_backend * be,const struct iovec * iov,int iovcnt)1477 netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
1478 {
1479 
1480 	return (be->recv(be, iov, iovcnt));
1481 }
1482 
1483 /*
1484  * Read a packet from the backend and discard it.
1485  * Returns the size of the discarded packet or zero if no packet was available.
1486  * A negative error code is returned in case of read error.
1487  */
1488 ssize_t
netbe_rx_discard(struct net_backend * be)1489 netbe_rx_discard(struct net_backend *be)
1490 {
1491 	/*
1492 	 * MP note: the dummybuf is only used to discard frames,
1493 	 * so there is no need for it to be per-vtnet or locked.
1494 	 * We only make it large enough for TSO-sized segment.
1495 	 */
1496 	static uint8_t dummybuf[65536 + 64];
1497 	struct iovec iov;
1498 
1499 #ifdef __FreeBSD__
1500 	iov.iov_base = dummybuf;
1501 #else
1502 	iov.iov_base = (caddr_t)dummybuf;
1503 #endif
1504 	iov.iov_len = sizeof(dummybuf);
1505 
1506 	return netbe_recv(be, &iov, 1);
1507 }
1508 
1509 void
netbe_rx_disable(struct net_backend * be)1510 netbe_rx_disable(struct net_backend *be)
1511 {
1512 
1513 	return be->recv_disable(be);
1514 }
1515 
1516 void
netbe_rx_enable(struct net_backend * be)1517 netbe_rx_enable(struct net_backend *be)
1518 {
1519 
1520 	return be->recv_enable(be);
1521 }
1522 
1523 size_t
netbe_get_vnet_hdr_len(struct net_backend * be)1524 netbe_get_vnet_hdr_len(struct net_backend *be)
1525 {
1526 
1527 	return (be->be_vnet_hdr_len);
1528 }
1529 
1530 #ifndef __FreeBSD__
1531 int
netbe_get_mac(net_backend_t * be,void * buf,size_t * buflen)1532 netbe_get_mac(net_backend_t *be, void *buf, size_t *buflen)
1533 {
1534 	if (be->get_mac == NULL)
1535 		return (ENOTSUP);
1536 	return (be->get_mac(be, buf, buflen));
1537 }
1538 #endif
1539