1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25/*
26 * Copyright (c) 2016 by Delphix. All rights reserved.
27 */
28
29/*
30 * Datalink management routines.
31 */
32
33#include <sys/types.h>
34#include <sys/door.h>
35#include <sys/zone.h>
36#include <sys/modctl.h>
37#include <sys/file.h>
38#include <sys/modhash.h>
39#include <sys/kstat.h>
40#include <sys/vnode.h>
41#include <sys/cmn_err.h>
42#include <sys/softmac.h>
43#include <sys/dls.h>
44#include <sys/dls_impl.h>
45#include <sys/stropts.h>
46#include <sys/netstack.h>
47#include <inet/iptun/iptun_impl.h>
48
49/*
50 * This vanity name management module is treated as part of the GLD framework
51 * and we don't hold any GLD framework lock across a call to any mac
52 * function that needs to acquire the mac perimeter. The hierarchy is
53 * mac perimeter -> framework locks
54 */
55
56typedef struct dls_stack {
57	zoneid_t	dlss_zoneid;
58} dls_stack_t;
59
60static kmem_cache_t	*i_dls_devnet_cachep;
61static kmutex_t		i_dls_mgmt_lock;
62static krwlock_t	i_dls_devnet_lock;
63static mod_hash_t	*i_dls_devnet_id_hash;
64static mod_hash_t	*i_dls_devnet_hash;
65
66boolean_t		devnet_need_rebuild;
67
68#define	VLAN_HASHSZ	67	/* prime */
69
70/*
71 * The following macros take a link name without the trailing PPA as input.
72 * Opening a /dev/net node with one of these names causes a tunnel link to be
73 * implicitly created in dls_devnet_hold_by_name() for backward compatibility
74 * with Solaris 10 and prior.
75 */
76#define	IS_IPV4_TUN(name)	(strcmp((name), "ip.tun") == 0)
77#define	IS_IPV6_TUN(name)	(strcmp((name), "ip6.tun") == 0)
78#define	IS_6TO4_TUN(name)	(strcmp((name), "ip.6to4tun") == 0)
79#define	IS_IPTUN_LINK(name)	(					\
80    IS_IPV4_TUN(name) || IS_IPV6_TUN(name) || IS_6TO4_TUN(name))
81
82/* Upcall door handle */
83static door_handle_t	dls_mgmt_dh = NULL;
84
85/* dls_devnet_t dd_flags */
86#define	DD_CONDEMNED		0x1
87#define	DD_IMPLICIT_IPTUN	0x2 /* Implicitly-created ip*.*tun* tunnel */
88
89/*
90 * This structure is used to keep the <linkid, macname> mapping.
91 * This structure itself is not protected by the mac perimeter, but is
92 * protected by the dd_mutex and i_dls_devnet_lock. Thus most of the
93 * functions manipulating this structure such as dls_devnet_set/unset etc.
94 * may be called while not holding the mac perimeter.
95 */
96typedef struct dls_devnet_s {
97	datalink_id_t	dd_linkid;
98	char		dd_linkname[MAXLINKNAMELEN];
99	char		dd_mac[MAXNAMELEN];
100	kstat_t		*dd_ksp;	/* kstat in owner_zid */
101	kstat_t		*dd_zone_ksp;	/* in dd_zid if != owner_zid */
102	uint32_t	dd_ref;
103	kmutex_t	dd_mutex;
104	kcondvar_t	dd_cv;
105	uint32_t	dd_tref;
106	uint_t		dd_flags;
107	zoneid_t	dd_owner_zid;	/* zone where node was created */
108	zoneid_t	dd_zid;		/* current zone */
109	boolean_t	dd_prop_loaded;
110	taskqid_t	dd_prop_taskid;
111} dls_devnet_t;
112
113static int i_dls_devnet_create_iptun(const char *, const char *,
114    datalink_id_t *);
115static int i_dls_devnet_destroy_iptun(datalink_id_t);
116static int i_dls_devnet_setzid(dls_devnet_t *, zoneid_t, boolean_t);
117static int dls_devnet_unset(const char *, datalink_id_t *, boolean_t);
118
119/*ARGSUSED*/
120static int
121i_dls_devnet_constructor(void *buf, void *arg, int kmflag)
122{
123	dls_devnet_t	*ddp = buf;
124
125	bzero(buf, sizeof (dls_devnet_t));
126	mutex_init(&ddp->dd_mutex, NULL, MUTEX_DEFAULT, NULL);
127	cv_init(&ddp->dd_cv, NULL, CV_DEFAULT, NULL);
128	return (0);
129}
130
131/*ARGSUSED*/
132static void
133i_dls_devnet_destructor(void *buf, void *arg)
134{
135	dls_devnet_t	*ddp = buf;
136
137	ASSERT(ddp->dd_ksp == NULL);
138	ASSERT(ddp->dd_ref == 0);
139	ASSERT(ddp->dd_tref == 0);
140	mutex_destroy(&ddp->dd_mutex);
141	cv_destroy(&ddp->dd_cv);
142}
143
144/* ARGSUSED */
145static int
146dls_zone_remove(datalink_id_t linkid, void *arg)
147{
148	dls_devnet_t *ddp;
149
150	if (dls_devnet_hold_tmp(linkid, &ddp) == 0) {
151		(void) dls_devnet_setzid(ddp, GLOBAL_ZONEID);
152		dls_devnet_rele_tmp(ddp);
153	}
154	return (0);
155}
156
157/* ARGSUSED */
158static void *
159dls_stack_init(netstackid_t stackid, netstack_t *ns)
160{
161	dls_stack_t *dlss;
162
163	dlss = kmem_zalloc(sizeof (*dlss), KM_SLEEP);
164	dlss->dlss_zoneid = netstackid_to_zoneid(stackid);
165	return (dlss);
166}
167
168/* ARGSUSED */
169static void
170dls_stack_shutdown(netstackid_t stackid, void *arg)
171{
172	dls_stack_t	*dlss = (dls_stack_t *)arg;
173
174	/* Move remaining datalinks in this zone back to the global zone. */
175	(void) zone_datalink_walk(dlss->dlss_zoneid, dls_zone_remove, NULL);
176}
177
178/* ARGSUSED */
179static void
180dls_stack_fini(netstackid_t stackid, void *arg)
181{
182	dls_stack_t	*dlss = (dls_stack_t *)arg;
183
184	kmem_free(dlss, sizeof (*dlss));
185}
186
187/*
188 * Module initialization and finalization functions.
189 */
190void
191dls_mgmt_init(void)
192{
193	mutex_init(&i_dls_mgmt_lock, NULL, MUTEX_DEFAULT, NULL);
194	rw_init(&i_dls_devnet_lock, NULL, RW_DEFAULT, NULL);
195
196	/*
197	 * Create a kmem_cache of dls_devnet_t structures.
198	 */
199	i_dls_devnet_cachep = kmem_cache_create("dls_devnet_cache",
200	    sizeof (dls_devnet_t), 0, i_dls_devnet_constructor,
201	    i_dls_devnet_destructor, NULL, NULL, NULL, 0);
202	ASSERT(i_dls_devnet_cachep != NULL);
203
204	/*
205	 * Create a hash table, keyed by dd_linkid, of dls_devnet_t.
206	 */
207	i_dls_devnet_id_hash = mod_hash_create_idhash("dls_devnet_id_hash",
208	    VLAN_HASHSZ, mod_hash_null_valdtor);
209
210	/*
211	 * Create a hash table, keyed by dd_mac
212	 */
213	i_dls_devnet_hash = mod_hash_create_extended("dls_devnet_hash",
214	    VLAN_HASHSZ, mod_hash_null_keydtor, mod_hash_null_valdtor,
215	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
216
217	devnet_need_rebuild = B_FALSE;
218
219	netstack_register(NS_DLS, dls_stack_init, dls_stack_shutdown,
220	    dls_stack_fini);
221}
222
223void
224dls_mgmt_fini(void)
225{
226	netstack_unregister(NS_DLS);
227	mod_hash_destroy_hash(i_dls_devnet_hash);
228	mod_hash_destroy_hash(i_dls_devnet_id_hash);
229	kmem_cache_destroy(i_dls_devnet_cachep);
230	rw_destroy(&i_dls_devnet_lock);
231	mutex_destroy(&i_dls_mgmt_lock);
232}
233
234int
235dls_mgmt_door_set(boolean_t start)
236{
237	int	err;
238
239	/* handle daemon restart */
240	mutex_enter(&i_dls_mgmt_lock);
241	if (dls_mgmt_dh != NULL) {
242		door_ki_rele(dls_mgmt_dh);
243		dls_mgmt_dh = NULL;
244	}
245
246	if (start && ((err = door_ki_open(DLMGMT_DOOR, &dls_mgmt_dh)) != 0)) {
247		mutex_exit(&i_dls_mgmt_lock);
248		return (err);
249	}
250
251	mutex_exit(&i_dls_mgmt_lock);
252
253	/*
254	 * Create and associate <link name, linkid> mapping for network devices
255	 * which are already attached before the daemon is started.
256	 */
257	if (start)
258		softmac_recreate();
259	return (0);
260}
261
262static boolean_t
263i_dls_mgmt_door_revoked(door_handle_t dh)
264{
265	struct door_info info;
266	extern int sys_shutdown;
267
268	ASSERT(dh != NULL);
269
270	if (sys_shutdown) {
271		cmn_err(CE_NOTE, "dls_mgmt_door: shutdown observed\n");
272		return (B_TRUE);
273	}
274
275	if (door_ki_info(dh, &info) != 0)
276		return (B_TRUE);
277
278	return ((info.di_attributes & DOOR_REVOKED) != 0);
279}
280
281/*
282 * Upcall to the datalink management daemon (dlmgmtd).
283 */
284static int
285i_dls_mgmt_upcall(void *arg, size_t asize, void *rbuf, size_t rsize)
286{
287	door_arg_t			darg, save_arg;
288	door_handle_t			dh;
289	int				err;
290	int				retry = 0;
291
292#define	MAXRETRYNUM	3
293
294	ASSERT(arg);
295	darg.data_ptr = arg;
296	darg.data_size = asize;
297	darg.desc_ptr = NULL;
298	darg.desc_num = 0;
299	darg.rbuf = rbuf;
300	darg.rsize = rsize;
301	save_arg = darg;
302
303retry:
304	mutex_enter(&i_dls_mgmt_lock);
305	dh = dls_mgmt_dh;
306	if ((dh == NULL) || i_dls_mgmt_door_revoked(dh)) {
307		mutex_exit(&i_dls_mgmt_lock);
308		return (EBADF);
309	}
310	door_ki_hold(dh);
311	mutex_exit(&i_dls_mgmt_lock);
312
313	for (;;) {
314		retry++;
315		if ((err = door_ki_upcall_limited(dh, &darg, zone_kcred(),
316		    SIZE_MAX, 0)) == 0)
317			break;
318
319		/*
320		 * handle door call errors
321		 */
322		darg = save_arg;
323		switch (err) {
324		case EINTR:
325			/*
326			 * If the operation which caused this door upcall gets
327			 * interrupted, return directly.
328			 */
329			goto done;
330		case EAGAIN:
331			/*
332			 * Repeat upcall if the maximum attempt limit has not
333			 * been reached.
334			 */
335			if (retry < MAXRETRYNUM) {
336				delay(2 * hz);
337				break;
338			}
339			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
340			goto done;
341		default:
342			/* A fatal door error */
343			if (i_dls_mgmt_door_revoked(dh)) {
344				cmn_err(CE_NOTE,
345				    "dls: dlmgmtd door service revoked\n");
346
347				if (retry < MAXRETRYNUM) {
348					door_ki_rele(dh);
349					goto retry;
350				}
351			}
352			cmn_err(CE_WARN, "dls: dlmgmtd fatal error %d\n", err);
353			goto done;
354		}
355	}
356
357	if (darg.rbuf != rbuf) {
358		/*
359		 * The size of the input rbuf was not big enough, so the
360		 * upcall allocated the rbuf itself.  If this happens, assume
361		 * that this was an invalid door call request.
362		 */
363		kmem_free(darg.rbuf, darg.rsize);
364		err = ENOSPC;
365		goto done;
366	}
367
368	if (darg.rsize != rsize) {
369		err = EINVAL;
370		goto done;
371	}
372
373	err = ((dlmgmt_retval_t *)rbuf)->lr_err;
374
375done:
376	door_ki_rele(dh);
377	return (err);
378}
379
380/*
381 * Request the datalink management daemon to create a link with the attributes
382 * below.  Upon success, zero is returned and linkidp contains the linkid for
383 * the new link; otherwise, an errno is returned.
384 *
385 *     - dev		physical dev_t.  required for all physical links,
386 *		        including GLDv3 links.  It will be used to force the
387 *		        attachment of a physical device, hence the
388 *		        registration of its mac
389 *     - class		datalink class
390 *     - media type	media type; DL_OTHER means unknown
391 *     - persist	whether to persist the datalink
392 */
393int
394dls_mgmt_create(const char *devname, dev_t dev, datalink_class_t class,
395    uint32_t media, boolean_t persist, datalink_id_t *linkidp)
396{
397	dlmgmt_upcall_arg_create_t	create;
398	dlmgmt_create_retval_t		retval;
399	int				err;
400
401	create.ld_cmd = DLMGMT_CMD_DLS_CREATE;
402	create.ld_class = class;
403	create.ld_media = media;
404	create.ld_phymaj = getmajor(dev);
405	create.ld_phyinst = getminor(dev);
406	create.ld_persist = persist;
407	if (strlcpy(create.ld_devname, devname, sizeof (create.ld_devname)) >=
408	    sizeof (create.ld_devname))
409		return (EINVAL);
410
411	if ((err = i_dls_mgmt_upcall(&create, sizeof (create), &retval,
412	    sizeof (retval))) == 0) {
413		*linkidp = retval.lr_linkid;
414	}
415	return (err);
416}
417
418/*
419 * Request the datalink management daemon to destroy the specified link.
420 * Returns zero upon success, or an errno upon failure.
421 */
422int
423dls_mgmt_destroy(datalink_id_t linkid, boolean_t persist)
424{
425	dlmgmt_upcall_arg_destroy_t	destroy;
426	dlmgmt_destroy_retval_t		retval;
427
428	destroy.ld_cmd = DLMGMT_CMD_DLS_DESTROY;
429	destroy.ld_linkid = linkid;
430	destroy.ld_persist = persist;
431
432	return (i_dls_mgmt_upcall(&destroy, sizeof (destroy),
433	    &retval, sizeof (retval)));
434}
435
436/*
437 * Request the datalink management daemon to verify/update the information
438 * for a physical link.  Upon success, get its linkid.
439 *
440 *     - media type	media type
441 *     - novanity	whether this physical datalink supports vanity naming.
442 *			physical links that do not use the GLDv3 MAC plugin
443 *			cannot suport vanity naming
444 *
445 * This function could fail with ENOENT or EEXIST.  Two cases return EEXIST:
446 *
447 * 1. A link with devname already exists, but the media type does not match.
448 *    In this case, mediap will bee set to the media type of the existing link.
449 * 2. A link with devname already exists, but its link name does not match
450 *    the device name, although this link does not support vanity naming.
451 */
452int
453dls_mgmt_update(const char *devname, uint32_t media, boolean_t novanity,
454    uint32_t *mediap, datalink_id_t *linkidp)
455{
456	dlmgmt_upcall_arg_update_t	update;
457	dlmgmt_update_retval_t		retval;
458	int				err;
459
460	update.ld_cmd = DLMGMT_CMD_DLS_UPDATE;
461
462	if (strlcpy(update.ld_devname, devname, sizeof (update.ld_devname)) >=
463	    sizeof (update.ld_devname))
464		return (EINVAL);
465
466	update.ld_media = media;
467	update.ld_novanity = novanity;
468
469	if ((err = i_dls_mgmt_upcall(&update, sizeof (update), &retval,
470	    sizeof (retval))) == EEXIST) {
471		*linkidp = retval.lr_linkid;
472		*mediap = retval.lr_media;
473	} else if (err == 0) {
474		*linkidp = retval.lr_linkid;
475	}
476
477	return (err);
478}
479
480/*
481 * Request the datalink management daemon to get the information for a link.
482 * Returns zero upon success, or an errno upon failure.
483 *
484 * Only fills in information for argument pointers that are non-NULL.
485 * Note that the link argument is expected to be MAXLINKNAMELEN bytes.
486 */
487int
488dls_mgmt_get_linkinfo(datalink_id_t linkid, char *link,
489    datalink_class_t *classp, uint32_t *mediap, uint32_t *flagsp)
490{
491	dlmgmt_door_getname_t	getname;
492	dlmgmt_getname_retval_t	retval;
493	int			err, len;
494
495	getname.ld_cmd = DLMGMT_CMD_GETNAME;
496	getname.ld_linkid = linkid;
497
498	if ((err = i_dls_mgmt_upcall(&getname, sizeof (getname), &retval,
499	    sizeof (retval))) != 0) {
500		return (err);
501	}
502
503	len = strlen(retval.lr_link);
504	if (len <= 1 || len >= MAXLINKNAMELEN)
505		return (EINVAL);
506
507	if (link != NULL)
508		(void) strlcpy(link, retval.lr_link, MAXLINKNAMELEN);
509	if (classp != NULL)
510		*classp = retval.lr_class;
511	if (mediap != NULL)
512		*mediap = retval.lr_media;
513	if (flagsp != NULL)
514		*flagsp = retval.lr_flags;
515	return (0);
516}
517
518/*
519 * Request the datalink management daemon to get the linkid for a link.
520 * Returns a non-zero error code on failure.  The linkid argument is only
521 * set on success (when zero is returned.)
522 */
523int
524dls_mgmt_get_linkid(const char *link, datalink_id_t *linkid)
525{
526	dlmgmt_door_getlinkid_t		getlinkid;
527	dlmgmt_getlinkid_retval_t	retval;
528	int				err;
529
530	getlinkid.ld_cmd = DLMGMT_CMD_GETLINKID;
531	(void) strlcpy(getlinkid.ld_link, link, MAXLINKNAMELEN);
532
533	if ((err = i_dls_mgmt_upcall(&getlinkid, sizeof (getlinkid), &retval,
534	    sizeof (retval))) == 0) {
535		*linkid = retval.lr_linkid;
536	}
537	return (err);
538}
539
540datalink_id_t
541dls_mgmt_get_next(datalink_id_t linkid, datalink_class_t class,
542    datalink_media_t dmedia, uint32_t flags)
543{
544	dlmgmt_door_getnext_t	getnext;
545	dlmgmt_getnext_retval_t	retval;
546
547	getnext.ld_cmd = DLMGMT_CMD_GETNEXT;
548	getnext.ld_class = class;
549	getnext.ld_dmedia = dmedia;
550	getnext.ld_flags = flags;
551	getnext.ld_linkid = linkid;
552
553	if (i_dls_mgmt_upcall(&getnext, sizeof (getnext), &retval,
554	    sizeof (retval)) != 0) {
555		return (DATALINK_INVALID_LINKID);
556	}
557
558	return (retval.lr_linkid);
559}
560
561static int
562i_dls_mgmt_get_linkattr(const datalink_id_t linkid, const char *attr,
563    void *attrval, size_t *attrszp)
564{
565	dlmgmt_upcall_arg_getattr_t	getattr;
566	dlmgmt_getattr_retval_t		retval;
567	int				err;
568
569	getattr.ld_cmd = DLMGMT_CMD_DLS_GETATTR;
570	getattr.ld_linkid = linkid;
571	(void) strlcpy(getattr.ld_attr, attr, MAXLINKATTRLEN);
572
573	if ((err = i_dls_mgmt_upcall(&getattr, sizeof (getattr), &retval,
574	    sizeof (retval))) == 0) {
575		if (*attrszp < retval.lr_attrsz)
576			return (EINVAL);
577		*attrszp = retval.lr_attrsz;
578		bcopy(retval.lr_attrval, attrval, retval.lr_attrsz);
579	}
580
581	return (err);
582}
583
584/*
585 * Note that this function can only get devp successfully for non-VLAN link.
586 */
587int
588dls_mgmt_get_phydev(datalink_id_t linkid, dev_t *devp)
589{
590	uint64_t	maj, inst;
591	size_t		attrsz = sizeof (uint64_t);
592
593	if (i_dls_mgmt_get_linkattr(linkid, FPHYMAJ, &maj, &attrsz) != 0 ||
594	    attrsz != sizeof (uint64_t) ||
595	    i_dls_mgmt_get_linkattr(linkid, FPHYINST, &inst, &attrsz) != 0 ||
596	    attrsz != sizeof (uint64_t)) {
597		return (EINVAL);
598	}
599
600	*devp = makedevice((major_t)maj, (minor_t)inst);
601	return (0);
602}
603
604/*
605 * Request the datalink management daemon to push in
606 * all properties associated with the link.
607 * Returns a non-zero error code on failure.
608 */
609int
610dls_mgmt_linkprop_init(datalink_id_t linkid)
611{
612	dlmgmt_door_linkprop_init_t	li;
613	dlmgmt_linkprop_init_retval_t	retval;
614	int				err;
615
616	li.ld_cmd = DLMGMT_CMD_LINKPROP_INIT;
617	li.ld_linkid = linkid;
618
619	err = i_dls_mgmt_upcall(&li, sizeof (li), &retval, sizeof (retval));
620	return (err);
621}
622
623static void
624dls_devnet_prop_task(void *arg)
625{
626	dls_devnet_t		*ddp = arg;
627
628	(void) dls_mgmt_linkprop_init(ddp->dd_linkid);
629
630	mutex_enter(&ddp->dd_mutex);
631	ddp->dd_prop_loaded = B_TRUE;
632	ddp->dd_prop_taskid = 0;
633	cv_broadcast(&ddp->dd_cv);
634	mutex_exit(&ddp->dd_mutex);
635}
636
637/*
638 * Ensure property loading task is completed.
639 */
640void
641dls_devnet_prop_task_wait(dls_dl_handle_t ddp)
642{
643	mutex_enter(&ddp->dd_mutex);
644	while (ddp->dd_prop_taskid != 0)
645		cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
646	mutex_exit(&ddp->dd_mutex);
647}
648
649void
650dls_devnet_rele_tmp(dls_dl_handle_t dlh)
651{
652	dls_devnet_t		*ddp = dlh;
653
654	mutex_enter(&ddp->dd_mutex);
655	ASSERT(ddp->dd_tref != 0);
656	if (--ddp->dd_tref == 0)
657		cv_signal(&ddp->dd_cv);
658	mutex_exit(&ddp->dd_mutex);
659}
660
661int
662dls_devnet_hold_link(datalink_id_t linkid, dls_dl_handle_t *ddhp,
663    dls_link_t **dlpp)
664{
665	dls_dl_handle_t	dlh;
666	dls_link_t	*dlp;
667	int		err;
668
669	if ((err = dls_devnet_hold_tmp(linkid, &dlh)) != 0)
670		return (err);
671
672	if ((err = dls_link_hold(dls_devnet_mac(dlh), &dlp)) != 0) {
673		dls_devnet_rele_tmp(dlh);
674		return (err);
675	}
676
677	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
678
679	*ddhp = dlh;
680	*dlpp = dlp;
681	return (0);
682}
683
684void
685dls_devnet_rele_link(dls_dl_handle_t dlh, dls_link_t *dlp)
686{
687	ASSERT(MAC_PERIM_HELD(dlp->dl_mh));
688
689	dls_link_rele(dlp);
690	dls_devnet_rele_tmp(dlh);
691}
692
693/*
694 * "link" kstats related functions.
695 */
696
697/*
698 * Query the "link" kstats.
699 *
700 * We may be called from the kstat subsystem in an arbitrary context.
701 * If the caller is the stack, the context could be an upcall data
702 * thread. Hence we can't acquire the mac perimeter in this function
703 * for fear of deadlock.
704 */
705static int
706dls_devnet_stat_update(kstat_t *ksp, int rw)
707{
708	datalink_id_t	linkid = (datalink_id_t)(uintptr_t)ksp->ks_private;
709	dls_devnet_t	*ddp;
710	dls_link_t	*dlp;
711	int		err;
712
713	if ((err = dls_devnet_hold_tmp(linkid, &ddp)) != 0) {
714		return (err);
715	}
716
717	/*
718	 * If a device detach happens at this time, it will block in
719	 * dls_devnet_unset since the dd_tref has been bumped in
720	 * dls_devnet_hold_tmp(). So the access to 'dlp' is safe even though
721	 * we don't hold the mac perimeter.
722	 */
723	if (mod_hash_find(i_dls_link_hash, (mod_hash_key_t)ddp->dd_mac,
724	    (mod_hash_val_t *)&dlp) != 0) {
725		dls_devnet_rele_tmp(ddp);
726		return (ENOENT);
727	}
728
729	err = dls_stat_update(ksp, dlp, rw);
730
731	dls_devnet_rele_tmp(ddp);
732	return (err);
733}
734
735/*
736 * Create the "link" kstats.
737 */
738static void
739dls_devnet_stat_create(dls_devnet_t *ddp, zoneid_t zoneid)
740{
741	kstat_t	*ksp;
742
743	if (dls_stat_create("link", 0, ddp->dd_linkname, zoneid,
744	    dls_devnet_stat_update, (void *)(uintptr_t)ddp->dd_linkid,
745	    &ksp) == 0) {
746		ASSERT(ksp != NULL);
747		if (zoneid == ddp->dd_owner_zid) {
748			ASSERT(ddp->dd_ksp == NULL);
749			ddp->dd_ksp = ksp;
750		} else {
751			ASSERT(ddp->dd_zone_ksp == NULL);
752			ddp->dd_zone_ksp = ksp;
753		}
754	}
755}
756
757/*
758 * Destroy the "link" kstats.
759 */
760static void
761dls_devnet_stat_destroy(dls_devnet_t *ddp, zoneid_t zoneid)
762{
763	if (zoneid == ddp->dd_owner_zid) {
764		if (ddp->dd_ksp != NULL) {
765			kstat_delete(ddp->dd_ksp);
766			ddp->dd_ksp = NULL;
767		}
768	} else {
769		if (ddp->dd_zone_ksp != NULL) {
770			kstat_delete(ddp->dd_zone_ksp);
771			ddp->dd_zone_ksp = NULL;
772		}
773	}
774}
775
776/*
777 * The link has been renamed. Destroy the old non-legacy kstats ("link kstats")
778 * and create the new set using the new name.
779 */
780static void
781dls_devnet_stat_rename(dls_devnet_t *ddp)
782{
783	if (ddp->dd_ksp != NULL) {
784		kstat_delete(ddp->dd_ksp);
785		ddp->dd_ksp = NULL;
786	}
787	/* We can't rename a link while it's assigned to a non-global zone. */
788	ASSERT(ddp->dd_zone_ksp == NULL);
789	dls_devnet_stat_create(ddp, ddp->dd_owner_zid);
790}
791
792/*
793 * Associate a linkid with a given link (identified by macname)
794 */
795static int
796dls_devnet_set(const char *macname, datalink_id_t linkid, zoneid_t zoneid,
797    dls_devnet_t **ddpp)
798{
799	dls_devnet_t		*ddp = NULL;
800	datalink_class_t	class;
801	int			err;
802	boolean_t		stat_create = B_FALSE;
803	char			linkname[MAXLINKNAMELEN];
804
805	rw_enter(&i_dls_devnet_lock, RW_WRITER);
806
807	/*
808	 * Don't allow callers to set a link name with a linkid that already
809	 * has a name association (that's what rename is for).
810	 */
811	if (linkid != DATALINK_INVALID_LINKID) {
812		if (mod_hash_find(i_dls_devnet_id_hash,
813		    (mod_hash_key_t)(uintptr_t)linkid,
814		    (mod_hash_val_t *)&ddp) == 0) {
815			err = EEXIST;
816			goto done;
817		}
818		if ((err = dls_mgmt_get_linkinfo(linkid, linkname, &class,
819		    NULL, NULL)) != 0)
820			goto done;
821	}
822
823	if ((err = mod_hash_find(i_dls_devnet_hash,
824	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) == 0) {
825		if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
826			err = EEXIST;
827			goto done;
828		}
829
830		/*
831		 * This might be a physical link that has already
832		 * been created, but which does not have a linkid
833		 * because dlmgmtd was not running when it was created.
834		 */
835		if (linkid == DATALINK_INVALID_LINKID ||
836		    class != DATALINK_CLASS_PHYS) {
837			err = EINVAL;
838			goto done;
839		}
840	} else {
841		ddp = kmem_cache_alloc(i_dls_devnet_cachep, KM_SLEEP);
842		ddp->dd_tref = 0;
843		ddp->dd_ref++;
844		ddp->dd_owner_zid = zoneid;
845		(void) strlcpy(ddp->dd_mac, macname, sizeof (ddp->dd_mac));
846		VERIFY(mod_hash_insert(i_dls_devnet_hash,
847		    (mod_hash_key_t)ddp->dd_mac, (mod_hash_val_t)ddp) == 0);
848	}
849
850	if (linkid != DATALINK_INVALID_LINKID) {
851		ddp->dd_linkid = linkid;
852		(void) strlcpy(ddp->dd_linkname, linkname,
853		    sizeof (ddp->dd_linkname));
854		VERIFY(mod_hash_insert(i_dls_devnet_id_hash,
855		    (mod_hash_key_t)(uintptr_t)linkid,
856		    (mod_hash_val_t)ddp) == 0);
857		devnet_need_rebuild = B_TRUE;
858		stat_create = B_TRUE;
859		mutex_enter(&ddp->dd_mutex);
860		if (!ddp->dd_prop_loaded && (ddp->dd_prop_taskid == 0)) {
861			ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
862			    dls_devnet_prop_task, ddp, TQ_SLEEP);
863		}
864		mutex_exit(&ddp->dd_mutex);
865	}
866	err = 0;
867done:
868	/*
869	 * It is safe to drop the i_dls_devnet_lock at this point. In the case
870	 * of physical devices, the softmac framework will fail the device
871	 * detach based on the smac_state or smac_hold_cnt. Other cases like
872	 * vnic and aggr use their own scheme to serialize creates and deletes
873	 * and ensure that *ddp is valid.
874	 */
875	rw_exit(&i_dls_devnet_lock);
876	if (err == 0) {
877		if (zoneid != GLOBAL_ZONEID &&
878		    (err = i_dls_devnet_setzid(ddp, zoneid, B_FALSE)) != 0)
879			(void) dls_devnet_unset(macname, &linkid, B_TRUE);
880		/*
881		 * The kstat subsystem holds its own locks (rather perimeter)
882		 * before calling the ks_update (dls_devnet_stat_update) entry
883		 * point which in turn grabs the i_dls_devnet_lock. So the
884		 * lock hierarchy is kstat locks -> i_dls_devnet_lock.
885		 */
886		if (stat_create)
887			dls_devnet_stat_create(ddp, zoneid);
888		if (ddpp != NULL)
889			*ddpp = ddp;
890	}
891	return (err);
892}
893
894/*
895 * Disassociate a linkid with a given link (identified by macname)
896 * This waits until temporary references to the dls_devnet_t are gone.
897 */
898static int
899dls_devnet_unset(const char *macname, datalink_id_t *id, boolean_t wait)
900{
901	dls_devnet_t	*ddp;
902	int		err;
903	mod_hash_val_t	val;
904
905	rw_enter(&i_dls_devnet_lock, RW_WRITER);
906	if ((err = mod_hash_find(i_dls_devnet_hash,
907	    (mod_hash_key_t)macname, (mod_hash_val_t *)&ddp)) != 0) {
908		ASSERT(err == MH_ERR_NOTFOUND);
909		rw_exit(&i_dls_devnet_lock);
910		return (ENOENT);
911	}
912
913	mutex_enter(&ddp->dd_mutex);
914
915	/*
916	 * Make sure downcalls into softmac_create or softmac_destroy from
917	 * devfs don't cv_wait on any devfs related condition for fear of
918	 * deadlock. Return EBUSY if the asynchronous thread started for
919	 * property loading as part of the post attach hasn't yet completed.
920	 */
921	ASSERT(ddp->dd_ref != 0);
922	if ((ddp->dd_ref != 1) || (!wait &&
923	    (ddp->dd_tref != 0 || ddp->dd_prop_taskid != 0))) {
924		mutex_exit(&ddp->dd_mutex);
925		rw_exit(&i_dls_devnet_lock);
926		return (EBUSY);
927	}
928
929	ddp->dd_flags |= DD_CONDEMNED;
930	ddp->dd_ref--;
931	*id = ddp->dd_linkid;
932
933	if (ddp->dd_zid != GLOBAL_ZONEID)
934		(void) i_dls_devnet_setzid(ddp, GLOBAL_ZONEID, B_FALSE);
935
936	/*
937	 * Remove this dls_devnet_t from the hash table.
938	 */
939	VERIFY(mod_hash_remove(i_dls_devnet_hash,
940	    (mod_hash_key_t)ddp->dd_mac, &val) == 0);
941
942	if (ddp->dd_linkid != DATALINK_INVALID_LINKID) {
943		VERIFY(mod_hash_remove(i_dls_devnet_id_hash,
944		    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, &val) == 0);
945
946		devnet_need_rebuild = B_TRUE;
947	}
948	rw_exit(&i_dls_devnet_lock);
949
950	if (wait) {
951		/*
952		 * Wait until all temporary references are released.
953		 */
954		while ((ddp->dd_tref != 0) || (ddp->dd_prop_taskid != 0))
955			cv_wait(&ddp->dd_cv, &ddp->dd_mutex);
956	} else {
957		ASSERT(ddp->dd_tref == 0 &&
958		    ddp->dd_prop_taskid == (taskqid_t)NULL);
959	}
960
961	if (ddp->dd_linkid != DATALINK_INVALID_LINKID)
962		dls_devnet_stat_destroy(ddp, ddp->dd_owner_zid);
963
964	ddp->dd_prop_loaded = B_FALSE;
965	ddp->dd_linkid = DATALINK_INVALID_LINKID;
966	ddp->dd_flags = 0;
967	mutex_exit(&ddp->dd_mutex);
968	kmem_cache_free(i_dls_devnet_cachep, ddp);
969
970	return (0);
971}
972
973/*
974 * This is a private hold routine used when we already have the dls_link_t, thus
975 * we know that it cannot go away.
976 */
977int
978dls_devnet_hold_tmp_by_link(dls_link_t *dlp, dls_dl_handle_t *ddhp)
979{
980	int err;
981	dls_devnet_t *ddp = NULL;
982
983	rw_enter(&i_dls_devnet_lock, RW_WRITER);
984	if ((err = mod_hash_find(i_dls_devnet_hash,
985	    (mod_hash_key_t)dlp->dl_name, (mod_hash_val_t *)&ddp)) != 0) {
986		ASSERT(err == MH_ERR_NOTFOUND);
987		rw_exit(&i_dls_devnet_lock);
988		return (ENOENT);
989	}
990
991	mutex_enter(&ddp->dd_mutex);
992	ASSERT(ddp->dd_ref > 0);
993	if (ddp->dd_flags & DD_CONDEMNED) {
994		mutex_exit(&ddp->dd_mutex);
995		rw_exit(&i_dls_devnet_lock);
996		return (ENOENT);
997	}
998	ddp->dd_tref++;
999	mutex_exit(&ddp->dd_mutex);
1000	rw_exit(&i_dls_devnet_lock);
1001
1002	*ddhp = ddp;
1003	return (0);
1004}
1005
1006static int
1007dls_devnet_hold_common(datalink_id_t linkid, dls_devnet_t **ddpp,
1008    boolean_t tmp_hold)
1009{
1010	dls_devnet_t		*ddp;
1011	int			err;
1012
1013	rw_enter(&i_dls_devnet_lock, RW_READER);
1014	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1015	    (mod_hash_key_t)(uintptr_t)linkid, (mod_hash_val_t *)&ddp)) != 0) {
1016		ASSERT(err == MH_ERR_NOTFOUND);
1017		rw_exit(&i_dls_devnet_lock);
1018		return (ENOENT);
1019	}
1020
1021	mutex_enter(&ddp->dd_mutex);
1022	ASSERT(ddp->dd_ref > 0);
1023	if (ddp->dd_flags & DD_CONDEMNED) {
1024		mutex_exit(&ddp->dd_mutex);
1025		rw_exit(&i_dls_devnet_lock);
1026		return (ENOENT);
1027	}
1028	if (tmp_hold)
1029		ddp->dd_tref++;
1030	else
1031		ddp->dd_ref++;
1032	mutex_exit(&ddp->dd_mutex);
1033	rw_exit(&i_dls_devnet_lock);
1034
1035	*ddpp = ddp;
1036	return (0);
1037}
1038
1039int
1040dls_devnet_hold(datalink_id_t linkid, dls_devnet_t **ddpp)
1041{
1042	return (dls_devnet_hold_common(linkid, ddpp, B_FALSE));
1043}
1044
1045/*
1046 * Hold the vanity naming structure (dls_devnet_t) temporarily.  The request to
1047 * delete the dls_devnet_t will wait until the temporary reference is released.
1048 */
1049int
1050dls_devnet_hold_tmp(datalink_id_t linkid, dls_devnet_t **ddpp)
1051{
1052	return (dls_devnet_hold_common(linkid, ddpp, B_TRUE));
1053}
1054
1055/*
1056 * This funtion is called when a DLS client tries to open a device node.
1057 * This dev_t could be a result of a /dev/net node access (returned by
1058 * devnet_create_rvp->dls_devnet_open()) or a direct /dev node access.
1059 * In both cases, this function bumps up the reference count of the
1060 * dls_devnet_t structure. The reference is held as long as the device node
1061 * is open. In the case of /dev/net while it is true that the initial reference
1062 * is held when the devnet_create_rvp->dls_devnet_open call happens, this
1063 * initial reference is released immediately in devnet_inactive_callback ->
1064 * dls_devnet_close(). (Note that devnet_inactive_callback() is called right
1065 * after dld_open completes, not when the /dev/net node is being closed).
1066 * To undo this function, call dls_devnet_rele()
1067 */
1068int
1069dls_devnet_hold_by_dev(dev_t dev, dls_dl_handle_t *ddhp)
1070{
1071	char			name[MAXNAMELEN];
1072	char			*drv;
1073	dls_devnet_t		*ddp;
1074	int			err;
1075
1076	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1077		return (EINVAL);
1078
1079	(void) snprintf(name, sizeof (name), "%s%d", drv,
1080	    DLS_MINOR2INST(getminor(dev)));
1081
1082	rw_enter(&i_dls_devnet_lock, RW_READER);
1083	if ((err = mod_hash_find(i_dls_devnet_hash,
1084	    (mod_hash_key_t)name, (mod_hash_val_t *)&ddp)) != 0) {
1085		ASSERT(err == MH_ERR_NOTFOUND);
1086		rw_exit(&i_dls_devnet_lock);
1087		return (ENOENT);
1088	}
1089	mutex_enter(&ddp->dd_mutex);
1090	ASSERT(ddp->dd_ref > 0);
1091	if (ddp->dd_flags & DD_CONDEMNED) {
1092		mutex_exit(&ddp->dd_mutex);
1093		rw_exit(&i_dls_devnet_lock);
1094		return (ENOENT);
1095	}
1096	ddp->dd_ref++;
1097	mutex_exit(&ddp->dd_mutex);
1098	rw_exit(&i_dls_devnet_lock);
1099
1100	*ddhp = ddp;
1101	return (0);
1102}
1103
1104void
1105dls_devnet_rele(dls_devnet_t *ddp)
1106{
1107	mutex_enter(&ddp->dd_mutex);
1108	ASSERT(ddp->dd_ref > 1);
1109	ddp->dd_ref--;
1110	if ((ddp->dd_flags & DD_IMPLICIT_IPTUN) && ddp->dd_ref == 1) {
1111		mutex_exit(&ddp->dd_mutex);
1112		if (i_dls_devnet_destroy_iptun(ddp->dd_linkid) != 0)
1113			ddp->dd_flags |= DD_IMPLICIT_IPTUN;
1114		return;
1115	}
1116	mutex_exit(&ddp->dd_mutex);
1117}
1118
1119static int
1120dls_devnet_hold_by_name(const char *link, dls_devnet_t **ddpp)
1121{
1122	char			drv[MAXLINKNAMELEN];
1123	uint_t			ppa;
1124	major_t			major;
1125	dev_t			phy_dev, tmp_dev;
1126	datalink_id_t		linkid;
1127	dls_dev_handle_t	ddh;
1128	int			err;
1129
1130	if ((err = dls_mgmt_get_linkid(link, &linkid)) == 0)
1131		return (dls_devnet_hold(linkid, ddpp));
1132
1133	/*
1134	 * If we failed to get the link's linkid because the dlmgmtd daemon
1135	 * has not been started, return ENOENT so that the application can
1136	 * fallback to open the /dev node.
1137	 */
1138	if (err == EBADF)
1139		return (ENOENT);
1140
1141	if (err != ENOENT)
1142		return (err);
1143
1144	/*
1145	 * If we reach this point it means dlmgmtd is up but has no
1146	 * mapping for the link name.
1147	 */
1148	if (ddi_parse(link, drv, &ppa) != DDI_SUCCESS)
1149		return (ENOENT);
1150
1151	if (IS_IPTUN_LINK(drv)) {
1152		if ((err = i_dls_devnet_create_iptun(link, drv, &linkid)) != 0)
1153			return (err);
1154		/*
1155		 * At this point, an IP tunnel MAC has registered, which
1156		 * resulted in a link being created.
1157		 */
1158		err = dls_devnet_hold(linkid, ddpp);
1159		if (err != 0) {
1160			VERIFY(i_dls_devnet_destroy_iptun(linkid) == 0);
1161			return (err);
1162		}
1163		/*
1164		 * dls_devnet_rele() will know to destroy the implicit IP
1165		 * tunnel on last reference release if DD_IMPLICIT_IPTUN is
1166		 * set.
1167		 */
1168		(*ddpp)->dd_flags |= DD_IMPLICIT_IPTUN;
1169		return (0);
1170	}
1171
1172	/*
1173	 * If this link:
1174	 * (a) is a physical device, (b) this is the first boot, (c) the MAC
1175	 * is not registered yet, and (d) we cannot find its linkid, then the
1176	 * linkname is the same as the devname.
1177	 *
1178	 * First filter out invalid names.
1179	 */
1180	if ((major = ddi_name_to_major(drv)) == (major_t)-1)
1181		return (ENOENT);
1182
1183	phy_dev = makedevice(major, DLS_PPA2MINOR(ppa));
1184	if (softmac_hold_device(phy_dev, &ddh) != 0)
1185		return (ENOENT);
1186
1187	/*
1188	 * At this time, the MAC should be registered, check its phy_dev using
1189	 * the given name.
1190	 */
1191	if ((err = dls_mgmt_get_linkid(link, &linkid)) != 0 ||
1192	    (err = dls_mgmt_get_phydev(linkid, &tmp_dev)) != 0) {
1193		softmac_rele_device(ddh);
1194		return (err);
1195	}
1196	if (tmp_dev != phy_dev) {
1197		softmac_rele_device(ddh);
1198		return (ENOENT);
1199	}
1200
1201	err = dls_devnet_hold(linkid, ddpp);
1202	softmac_rele_device(ddh);
1203	return (err);
1204}
1205
1206int
1207dls_devnet_macname2linkid(const char *macname, datalink_id_t *linkidp)
1208{
1209	dls_devnet_t	*ddp;
1210
1211	rw_enter(&i_dls_devnet_lock, RW_READER);
1212	if (mod_hash_find(i_dls_devnet_hash, (mod_hash_key_t)macname,
1213	    (mod_hash_val_t *)&ddp) != 0) {
1214		rw_exit(&i_dls_devnet_lock);
1215		return (ENOENT);
1216	}
1217
1218	*linkidp = ddp->dd_linkid;
1219	rw_exit(&i_dls_devnet_lock);
1220	return (0);
1221}
1222
1223/*
1224 * Get linkid for the given dev.
1225 */
1226int
1227dls_devnet_dev2linkid(dev_t dev, datalink_id_t *linkidp)
1228{
1229	char	macname[MAXNAMELEN];
1230	char	*drv;
1231
1232	if ((drv = ddi_major_to_name(getmajor(dev))) == NULL)
1233		return (EINVAL);
1234
1235	(void) snprintf(macname, sizeof (macname), "%s%d", drv,
1236	    DLS_MINOR2INST(getminor(dev)));
1237	return (dls_devnet_macname2linkid(macname, linkidp));
1238}
1239
1240/*
1241 * Get the link's physical dev_t. It this is a VLAN, get the dev_t of the
1242 * link this VLAN is created on.
1243 */
1244int
1245dls_devnet_phydev(datalink_id_t vlanid, dev_t *devp)
1246{
1247	dls_devnet_t	*ddp;
1248	int		err;
1249
1250	if ((err = dls_devnet_hold_tmp(vlanid, &ddp)) != 0)
1251		return (err);
1252
1253	err = dls_mgmt_get_phydev(ddp->dd_linkid, devp);
1254	dls_devnet_rele_tmp(ddp);
1255	return (err);
1256}
1257
1258/*
1259 * Handle the renaming requests.  There are two rename cases:
1260 *
1261 * 1. Request to rename a valid link (id1) to an non-existent link name
1262 *    (id2). In this case id2 is DATALINK_INVALID_LINKID.  Just check whether
1263 *    id1 is held by any applications.
1264 *
1265 *    In this case, the link's kstats need to be updated using the given name.
1266 *
1267 * 2. Request to rename a valid link (id1) to the name of a REMOVED
1268 *    physical link (id2). In this case, check that id1 and its associated
1269 *    mac is not held by any application, and update the link's linkid to id2.
1270 *
1271 *    This case does not change the <link name, linkid> mapping, so the link's
1272 *    kstats need to be updated with using name associated the given id2.
1273 */
1274int
1275dls_devnet_rename(datalink_id_t id1, datalink_id_t id2, const char *link)
1276{
1277	dls_dev_handle_t	ddh = NULL;
1278	int			err = 0;
1279	dev_t			phydev = 0;
1280	dls_devnet_t		*ddp;
1281	mac_perim_handle_t	mph = NULL;
1282	mac_handle_t		mh;
1283	mod_hash_val_t		val;
1284
1285	/*
1286	 * In the second case, id2 must be a REMOVED physical link.
1287	 */
1288	if ((id2 != DATALINK_INVALID_LINKID) &&
1289	    (dls_mgmt_get_phydev(id2, &phydev) == 0) &&
1290	    softmac_hold_device(phydev, &ddh) == 0) {
1291		softmac_rele_device(ddh);
1292		return (EEXIST);
1293	}
1294
1295	/*
1296	 * Hold id1 to prevent it from being detached (if a physical link).
1297	 */
1298	if (dls_mgmt_get_phydev(id1, &phydev) == 0)
1299		(void) softmac_hold_device(phydev, &ddh);
1300
1301	/*
1302	 * The framework does not hold hold locks across calls to the
1303	 * mac perimeter, hence enter the perimeter first. This also waits
1304	 * for the property loading to finish.
1305	 */
1306	if ((err = mac_perim_enter_by_linkid(id1, &mph)) != 0) {
1307		softmac_rele_device(ddh);
1308		return (err);
1309	}
1310
1311	rw_enter(&i_dls_devnet_lock, RW_WRITER);
1312	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1313	    (mod_hash_key_t)(uintptr_t)id1, (mod_hash_val_t *)&ddp)) != 0) {
1314		ASSERT(err == MH_ERR_NOTFOUND);
1315		err = ENOENT;
1316		goto done;
1317	}
1318
1319	mutex_enter(&ddp->dd_mutex);
1320	if (ddp->dd_ref > 1) {
1321		mutex_exit(&ddp->dd_mutex);
1322		err = EBUSY;
1323		goto done;
1324	}
1325	mutex_exit(&ddp->dd_mutex);
1326
1327	if (id2 == DATALINK_INVALID_LINKID) {
1328		(void) strlcpy(ddp->dd_linkname, link,
1329		    sizeof (ddp->dd_linkname));
1330
1331		/* rename mac client name and its flow if exists */
1332		if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1333			goto done;
1334		(void) mac_rename_primary(mh, link);
1335		mac_close(mh);
1336		goto done;
1337	}
1338
1339	/*
1340	 * The second case, check whether the MAC is used by any MAC
1341	 * user.  This must be a physical link so ddh must not be NULL.
1342	 */
1343	if (ddh == NULL) {
1344		err = EINVAL;
1345		goto done;
1346	}
1347
1348	if ((err = mac_open(ddp->dd_mac, &mh)) != 0)
1349		goto done;
1350
1351	/*
1352	 * We release the reference of the MAC which mac_open() is
1353	 * holding. Note that this mac will not be unregistered
1354	 * because the physical device is held.
1355	 */
1356	mac_close(mh);
1357
1358	/*
1359	 * Check if there is any other MAC clients, if not, hold this mac
1360	 * exclusively until we are done.
1361	 */
1362	if ((err = mac_mark_exclusive(mh)) != 0)
1363		goto done;
1364
1365	/*
1366	 * Update the link's linkid.
1367	 */
1368	if ((err = mod_hash_find(i_dls_devnet_id_hash,
1369	    (mod_hash_key_t)(uintptr_t)id2, &val)) != MH_ERR_NOTFOUND) {
1370		mac_unmark_exclusive(mh);
1371		err = EEXIST;
1372		goto done;
1373	}
1374
1375	err = dls_mgmt_get_linkinfo(id2, ddp->dd_linkname, NULL, NULL, NULL);
1376	if (err != 0) {
1377		mac_unmark_exclusive(mh);
1378		goto done;
1379	}
1380
1381	(void) mod_hash_remove(i_dls_devnet_id_hash,
1382	    (mod_hash_key_t)(uintptr_t)id1, &val);
1383
1384	ddp->dd_linkid = id2;
1385	(void) mod_hash_insert(i_dls_devnet_id_hash,
1386	    (mod_hash_key_t)(uintptr_t)ddp->dd_linkid, (mod_hash_val_t)ddp);
1387
1388	mac_unmark_exclusive(mh);
1389
1390	/* load properties for new id */
1391	mutex_enter(&ddp->dd_mutex);
1392	ddp->dd_prop_loaded = B_FALSE;
1393	ddp->dd_prop_taskid = taskq_dispatch(system_taskq,
1394	    dls_devnet_prop_task, ddp, TQ_SLEEP);
1395	mutex_exit(&ddp->dd_mutex);
1396
1397done:
1398	rw_exit(&i_dls_devnet_lock);
1399
1400	if (err == 0)
1401		dls_devnet_stat_rename(ddp);
1402
1403	if (mph != NULL)
1404		mac_perim_exit(mph);
1405	softmac_rele_device(ddh);
1406	return (err);
1407}
1408
1409static int
1410i_dls_devnet_setzid(dls_devnet_t *ddp, zoneid_t new_zoneid, boolean_t setprop)
1411{
1412	int			err;
1413	mac_perim_handle_t	mph;
1414	boolean_t		upcall_done = B_FALSE;
1415	datalink_id_t		linkid = ddp->dd_linkid;
1416	zoneid_t		old_zoneid = ddp->dd_zid;
1417	dlmgmt_door_setzoneid_t	setzid;
1418	dlmgmt_setzoneid_retval_t retval;
1419
1420	if (old_zoneid == new_zoneid)
1421		return (0);
1422
1423	if ((err = mac_perim_enter_by_macname(ddp->dd_mac, &mph)) != 0)
1424		return (err);
1425
1426	/*
1427	 * When changing the zoneid of an existing link, we need to tell
1428	 * dlmgmtd about it.  dlmgmtd already knows the zoneid associated with
1429	 * newly created links.
1430	 */
1431	if (setprop) {
1432		setzid.ld_cmd = DLMGMT_CMD_SETZONEID;
1433		setzid.ld_linkid = linkid;
1434		setzid.ld_zoneid = new_zoneid;
1435		err = i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1436		    sizeof (retval));
1437		if (err != 0)
1438			goto done;
1439		upcall_done = B_TRUE;
1440	}
1441	if ((err = dls_link_setzid(ddp->dd_mac, new_zoneid)) == 0) {
1442		ddp->dd_zid = new_zoneid;
1443		devnet_need_rebuild = B_TRUE;
1444	}
1445
1446done:
1447	if (err != 0 && upcall_done) {
1448		setzid.ld_zoneid = old_zoneid;
1449		(void) i_dls_mgmt_upcall(&setzid, sizeof (setzid), &retval,
1450		    sizeof (retval));
1451	}
1452	mac_perim_exit(mph);
1453	return (err);
1454}
1455
1456int
1457dls_devnet_setzid(dls_dl_handle_t ddh, zoneid_t new_zid)
1458{
1459	dls_devnet_t	*ddp;
1460	int		err;
1461	zoneid_t	old_zid;
1462	boolean_t	refheld = B_FALSE;
1463
1464	old_zid = ddh->dd_zid;
1465
1466	if (old_zid == new_zid)
1467		return (0);
1468
1469	/*
1470	 * Acquire an additional reference to the link if it is being assigned
1471	 * to a non-global zone from the global zone.
1472	 */
1473	if (old_zid == GLOBAL_ZONEID && new_zid != GLOBAL_ZONEID) {
1474		if ((err = dls_devnet_hold(ddh->dd_linkid, &ddp)) != 0)
1475			return (err);
1476		refheld = B_TRUE;
1477	}
1478
1479	if ((err = i_dls_devnet_setzid(ddh, new_zid, B_TRUE)) != 0) {
1480		if (refheld)
1481			dls_devnet_rele(ddp);
1482		return (err);
1483	}
1484
1485	/*
1486	 * Release the additional reference if the link is returning to the
1487	 * global zone from a non-global zone.
1488	 */
1489	if (old_zid != GLOBAL_ZONEID && new_zid == GLOBAL_ZONEID)
1490		dls_devnet_rele(ddh);
1491
1492	/* Re-create kstats in the appropriate zones. */
1493	if (old_zid != GLOBAL_ZONEID)
1494		dls_devnet_stat_destroy(ddh, old_zid);
1495	if (new_zid != GLOBAL_ZONEID)
1496		dls_devnet_stat_create(ddh, new_zid);
1497
1498	return (0);
1499}
1500
1501zoneid_t
1502dls_devnet_getzid(dls_dl_handle_t ddh)
1503{
1504	return (((dls_devnet_t *)ddh)->dd_zid);
1505}
1506
1507zoneid_t
1508dls_devnet_getownerzid(dls_dl_handle_t ddh)
1509{
1510	return (((dls_devnet_t *)ddh)->dd_owner_zid);
1511}
1512
1513/*
1514 * Is linkid visible from zoneid?  A link is visible if it was created in the
1515 * zone, or if it is currently assigned to the zone.
1516 */
1517boolean_t
1518dls_devnet_islinkvisible(datalink_id_t linkid, zoneid_t zoneid)
1519{
1520	dls_devnet_t	*ddp;
1521	boolean_t	result;
1522
1523	if (dls_devnet_hold_tmp(linkid, &ddp) != 0)
1524		return (B_FALSE);
1525	result = (ddp->dd_owner_zid == zoneid || ddp->dd_zid == zoneid);
1526	dls_devnet_rele_tmp(ddp);
1527	return (result);
1528}
1529
1530/*
1531 * Access a vanity naming node.
1532 */
1533int
1534dls_devnet_open(const char *link, dls_dl_handle_t *dhp, dev_t *devp)
1535{
1536	dls_devnet_t	*ddp;
1537	dls_link_t	*dlp;
1538	zoneid_t	zid = getzoneid();
1539	int		err;
1540	mac_perim_handle_t	mph;
1541
1542	if ((err = dls_devnet_hold_by_name(link, &ddp)) != 0)
1543		return (err);
1544
1545	dls_devnet_prop_task_wait(ddp);
1546
1547	/*
1548	 * Opening a link that does not belong to the current non-global zone
1549	 * is not allowed.
1550	 */
1551	if (zid != GLOBAL_ZONEID && ddp->dd_zid != zid) {
1552		dls_devnet_rele(ddp);
1553		return (ENOENT);
1554	}
1555
1556	err = mac_perim_enter_by_macname(ddp->dd_mac, &mph);
1557	if (err != 0) {
1558		dls_devnet_rele(ddp);
1559		return (err);
1560	}
1561
1562	err = dls_link_hold_create(ddp->dd_mac, &dlp);
1563	mac_perim_exit(mph);
1564
1565	if (err != 0) {
1566		dls_devnet_rele(ddp);
1567		return (err);
1568	}
1569
1570	*dhp = ddp;
1571	*devp = dls_link_dev(dlp);
1572	return (0);
1573}
1574
1575/*
1576 * Close access to a vanity naming node.
1577 */
1578void
1579dls_devnet_close(dls_dl_handle_t dlh)
1580{
1581	dls_devnet_t	*ddp = dlh;
1582	dls_link_t	*dlp;
1583	mac_perim_handle_t	mph;
1584
1585	VERIFY(mac_perim_enter_by_macname(ddp->dd_mac, &mph) == 0);
1586	VERIFY(dls_link_hold(ddp->dd_mac, &dlp) == 0);
1587
1588	/*
1589	 * One rele for the hold placed in dls_devnet_open, another for
1590	 * the hold done just above
1591	 */
1592	dls_link_rele(dlp);
1593	dls_link_rele(dlp);
1594	mac_perim_exit(mph);
1595
1596	dls_devnet_rele(ddp);
1597}
1598
1599/*
1600 * This is used by /dev/net to rebuild the nodes for readdir().  It is not
1601 * critical and no protection is needed.
1602 */
1603boolean_t
1604dls_devnet_rebuild()
1605{
1606	boolean_t updated = devnet_need_rebuild;
1607
1608	devnet_need_rebuild = B_FALSE;
1609	return (updated);
1610}
1611
1612int
1613dls_devnet_create(mac_handle_t mh, datalink_id_t linkid, zoneid_t zoneid)
1614{
1615	dls_link_t	*dlp;
1616	dls_devnet_t	*ddp;
1617	int		err;
1618	mac_perim_handle_t mph;
1619
1620	/*
1621	 * Holding the mac perimeter ensures that the downcall from the
1622	 * dlmgmt daemon which does the property loading does not proceed
1623	 * until we relinquish the perimeter.
1624	 */
1625	mac_perim_enter_by_mh(mh, &mph);
1626	/*
1627	 * Make this association before we call dls_link_hold_create as
1628	 * we need to use the linkid to get the user name for the link
1629	 * when we create the MAC client.
1630	 */
1631	if ((err = dls_devnet_set(mac_name(mh), linkid, zoneid, &ddp)) == 0) {
1632		if ((err = dls_link_hold_create(mac_name(mh), &dlp)) != 0) {
1633			mac_perim_exit(mph);
1634			(void) dls_devnet_unset(mac_name(mh), &linkid, B_TRUE);
1635			return (err);
1636		}
1637	}
1638	mac_perim_exit(mph);
1639	return (err);
1640}
1641
1642/*
1643 * Set the linkid of the dls_devnet_t and add it into the i_dls_devnet_id_hash.
1644 * This is called in the case that the dlmgmtd daemon is started later than
1645 * the physical devices get attached, and the linkid is only known after the
1646 * daemon starts.
1647 */
1648int
1649dls_devnet_recreate(mac_handle_t mh, datalink_id_t linkid)
1650{
1651	ASSERT(linkid != DATALINK_INVALID_LINKID);
1652	return (dls_devnet_set(mac_name(mh), linkid, GLOBAL_ZONEID, NULL));
1653}
1654
1655int
1656dls_devnet_destroy(mac_handle_t mh, datalink_id_t *idp, boolean_t wait)
1657{
1658	int			err;
1659	mac_perim_handle_t	mph;
1660
1661	*idp = DATALINK_INVALID_LINKID;
1662	err = dls_devnet_unset(mac_name(mh), idp, wait);
1663	if (err != 0 && err != ENOENT)
1664		return (err);
1665
1666	mac_perim_enter_by_mh(mh, &mph);
1667	err = dls_link_rele_by_name(mac_name(mh));
1668	mac_perim_exit(mph);
1669
1670	if (err != 0) {
1671		/*
1672		 * XXX It is a general GLDv3 bug that dls_devnet_set() has to
1673		 * be called to re-set the link when destroy fails.  The
1674		 * zoneid below will be incorrect if this function is ever
1675		 * called from kernel context or from a zone other than that
1676		 * which initially created the link.
1677		 */
1678		(void) dls_devnet_set(mac_name(mh), *idp, crgetzoneid(CRED()),
1679		    NULL);
1680	}
1681	return (err);
1682}
1683
1684/*
1685 * Implicitly create an IP tunnel link.
1686 */
1687static int
1688i_dls_devnet_create_iptun(const char *linkname, const char *drvname,
1689    datalink_id_t *linkid)
1690{
1691	int		err;
1692	iptun_kparams_t	ik;
1693	uint32_t	media;
1694	netstack_t	*ns;
1695	major_t		iptun_major;
1696	dev_info_t	*iptun_dip;
1697
1698	/* First ensure that the iptun device is attached. */
1699	if ((iptun_major = ddi_name_to_major(IPTUN_DRIVER_NAME)) == (major_t)-1)
1700		return (EINVAL);
1701	if ((iptun_dip = ddi_hold_devi_by_instance(iptun_major, 0, 0)) == NULL)
1702		return (EINVAL);
1703
1704	if (IS_IPV4_TUN(drvname)) {
1705		ik.iptun_kparam_type = IPTUN_TYPE_IPV4;
1706		media = DL_IPV4;
1707	} else if (IS_6TO4_TUN(drvname)) {
1708		ik.iptun_kparam_type = IPTUN_TYPE_6TO4;
1709		media = DL_6TO4;
1710	} else if (IS_IPV6_TUN(drvname)) {
1711		ik.iptun_kparam_type = IPTUN_TYPE_IPV6;
1712		media = DL_IPV6;
1713	}
1714	ik.iptun_kparam_flags = (IPTUN_KPARAM_TYPE | IPTUN_KPARAM_IMPLICIT);
1715
1716	/* Obtain a datalink id for this tunnel. */
1717	err = dls_mgmt_create((char *)linkname, 0, DATALINK_CLASS_IPTUN, media,
1718	    B_FALSE, &ik.iptun_kparam_linkid);
1719	if (err != 0) {
1720		ddi_release_devi(iptun_dip);
1721		return (err);
1722	}
1723
1724	ns = netstack_get_current();
1725	err = iptun_create(&ik, CRED());
1726	netstack_rele(ns);
1727
1728	if (err != 0)
1729		VERIFY(dls_mgmt_destroy(ik.iptun_kparam_linkid, B_FALSE) == 0);
1730	else
1731		*linkid = ik.iptun_kparam_linkid;
1732
1733	ddi_release_devi(iptun_dip);
1734	return (err);
1735}
1736
1737static int
1738i_dls_devnet_destroy_iptun(datalink_id_t linkid)
1739{
1740	int err;
1741
1742	/*
1743	 * Note the use of zone_kcred() here as opposed to CRED().  This is
1744	 * because the process that does the last close of this /dev/net node
1745	 * may not have necessary privileges to delete this IP tunnel, but the
1746	 * tunnel must always be implicitly deleted on last close.
1747	 */
1748	if ((err = iptun_delete(linkid, zone_kcred())) == 0)
1749		(void) dls_mgmt_destroy(linkid, B_FALSE);
1750	return (err);
1751}
1752
1753const char *
1754dls_devnet_link(dls_dl_handle_t ddh)
1755{
1756	return (ddh->dd_linkname);
1757}
1758
1759const char *
1760dls_devnet_mac(dls_dl_handle_t ddh)
1761{
1762	return (ddh->dd_mac);
1763}
1764
1765datalink_id_t
1766dls_devnet_linkid(dls_dl_handle_t ddh)
1767{
1768	return (ddh->dd_linkid);
1769}
1770