xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_multi.c (revision 6a634c9d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 1990 Mentat Inc.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/dlpi.h>
29 #include <sys/stropts.h>
30 #include <sys/strsun.h>
31 #include <sys/ddi.h>
32 #include <sys/cmn_err.h>
33 #include <sys/sdt.h>
34 #include <sys/zone.h>
35 
36 #include <sys/param.h>
37 #include <sys/socket.h>
38 #include <sys/sockio.h>
39 #include <net/if.h>
40 #include <sys/systm.h>
41 #include <sys/strsubr.h>
42 #include <net/route.h>
43 #include <netinet/in.h>
44 #include <net/if_dl.h>
45 #include <netinet/ip6.h>
46 #include <netinet/icmp6.h>
47 
48 #include <inet/common.h>
49 #include <inet/mi.h>
50 #include <inet/nd.h>
51 #include <inet/arp.h>
52 #include <inet/ip.h>
53 #include <inet/ip6.h>
54 #include <inet/ip_if.h>
55 #include <inet/ip_ndp.h>
56 #include <inet/ip_multi.h>
57 #include <inet/ipclassifier.h>
58 #include <inet/ipsec_impl.h>
59 #include <inet/sctp_ip.h>
60 #include <inet/ip_listutils.h>
61 #include <inet/udp_impl.h>
62 
63 /* igmpv3/mldv2 source filter manipulation */
64 static void	ilm_bld_flists(conn_t *conn, void *arg);
65 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
66     slist_t *flist);
67 
68 static ilm_t	*ilm_add(ill_t *ill, const in6_addr_t *group,
69     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
70     zoneid_t zoneid);
71 static void	ilm_delete(ilm_t *ilm);
72 static int	ilm_numentries(ill_t *, const in6_addr_t *);
73 
74 static ilm_t	*ip_addmulti_serial(const in6_addr_t *, ill_t *, zoneid_t,
75     ilg_stat_t, mcast_record_t, slist_t *, int *);
76 static ilm_t	*ip_addmulti_impl(const in6_addr_t *, ill_t *,
77     zoneid_t, ilg_stat_t, mcast_record_t, slist_t *, int *);
78 static int	ip_delmulti_serial(ilm_t *, boolean_t, boolean_t);
79 static int	ip_delmulti_impl(ilm_t *, boolean_t, boolean_t);
80 
81 static int	ip_ll_multireq(ill_t *ill, const in6_addr_t *group,
82     t_uscalar_t);
83 static ilg_t	*ilg_lookup(conn_t *, const in6_addr_t *, ipaddr_t ifaddr,
84     uint_t ifindex);
85 
86 static int	ilg_add(conn_t *connp, const in6_addr_t *group,
87     ipaddr_t ifaddr, uint_t ifindex, ill_t *ill, mcast_record_t fmode,
88     const in6_addr_t *v6src);
89 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
90 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
91     uint32_t *addr_lenp, uint32_t *addr_offp);
92 static int	ip_opt_delete_group_excl(conn_t *connp,
93     const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
94     mcast_record_t fmode, const in6_addr_t *v6src);
95 
96 static	ilm_t	*ilm_lookup(ill_t *, const in6_addr_t *, zoneid_t);
97 
98 static int	ip_msfilter_ill(conn_t *, mblk_t *, const ip_ioctl_cmd_t *,
99     ill_t **);
100 
101 static void	ilg_check_detach(conn_t *, ill_t *);
102 static void	ilg_check_reattach(conn_t *, ill_t *);
103 
104 /*
105  * MT notes:
106  *
107  * Multicast joins operate on both the ilg and ilm structures. Multiple
108  * threads operating on an conn (socket) trying to do multicast joins
109  * need to synchronize when operating on the ilg. Multiple threads
110  * potentially operating on different conn (socket endpoints) trying to
111  * do multicast joins could eventually end up trying to manipulate the
112  * ilm simulatenously and need to synchronize on the access to the ilm.
113  * The access and lookup of the ilm, as well as other ill multicast state,
114  * is under ill_mcast_lock.
115  * The modifications and lookup of ilg entries is serialized using conn_ilg_lock
116  * rwlock. An ilg will not be freed until ilg_refcnt drops to zero.
117  *
118  * In some cases we hold ill_mcast_lock and then acquire conn_ilg_lock, but
119  * never the other way around.
120  *
121  * An ilm is an IP data structure used to track multicast join/leave.
122  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
123  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
124  * referencing the ilm.
125  * The modifications and lookup of ilm entries is serialized using the
126  * ill_mcast_lock rwlock; that lock handles all the igmp/mld modifications
127  * of the ilm state.
128  * ilms are created / destroyed only as writer. ilms
129  * are not passed around. The datapath (anything outside of this file
130  * and igmp.c) use functions that do not return ilms - just the number
131  * of members. So we don't need a dynamic refcount of the number
132  * of threads holding reference to an ilm.
133  *
134  * In the cases where we serially access the ilg and ilm, which happens when
135  * we handle the applications requests to join or leave groups and sources,
136  * we use the ill_mcast_serializer mutex to ensure that a multithreaded
137  * application which does concurrent joins and/or leaves on the same group on
138  * the same socket always results in a consistent order for the ilg and ilm
139  * modifications.
140  *
141  * When a multicast operation results in needing to send a message to
142  * the driver (to join/leave a L2 multicast address), we use ill_dlpi_queue()
143  * which serialized the DLPI requests. The IGMP/MLD code uses ill_mcast_queue()
144  * to send IGMP/MLD IP packet to avoid dropping the lock just to send a packet.
145  */
146 
147 #define	GETSTRUCT(structure, number)	\
148 	((structure *)mi_zalloc(sizeof (structure) * (number)))
149 
150 /*
151  * Caller must ensure that the ilg has not been condemned
152  * The condemned flag is only set in ilg_delete under conn_ilg_lock.
153  *
154  * The caller must hold conn_ilg_lock as writer.
155  */
156 static void
ilg_refhold(ilg_t * ilg)157 ilg_refhold(ilg_t *ilg)
158 {
159 	ASSERT(ilg->ilg_refcnt != 0);
160 	ASSERT(!ilg->ilg_condemned);
161 	ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
162 
163 	ilg->ilg_refcnt++;
164 }
165 
166 static void
ilg_inactive(ilg_t * ilg)167 ilg_inactive(ilg_t *ilg)
168 {
169 	ASSERT(ilg->ilg_ill == NULL);
170 	ASSERT(ilg->ilg_ilm == NULL);
171 	ASSERT(ilg->ilg_filter == NULL);
172 	ASSERT(ilg->ilg_condemned);
173 
174 	/* Unlink from list */
175 	*ilg->ilg_ptpn = ilg->ilg_next;
176 	if (ilg->ilg_next != NULL)
177 		ilg->ilg_next->ilg_ptpn = ilg->ilg_ptpn;
178 	ilg->ilg_next = NULL;
179 	ilg->ilg_ptpn = NULL;
180 
181 	ilg->ilg_connp = NULL;
182 	kmem_free(ilg, sizeof (*ilg));
183 }
184 
185 /*
186  * The caller must hold conn_ilg_lock as writer.
187  */
188 static void
ilg_refrele(ilg_t * ilg)189 ilg_refrele(ilg_t *ilg)
190 {
191 	ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
192 	ASSERT(ilg->ilg_refcnt != 0);
193 	if (--ilg->ilg_refcnt == 0)
194 		ilg_inactive(ilg);
195 }
196 
197 /*
198  * Acquire reference on ilg and drop reference on held_ilg.
199  * In the case when held_ilg is the same as ilg we already have
200  * a reference, but the held_ilg might be condemned. In that case
201  * we avoid the ilg_refhold/rele so that we can assert in ire_refhold
202  * that the ilg isn't condemned.
203  */
204 static void
ilg_transfer_hold(ilg_t * held_ilg,ilg_t * ilg)205 ilg_transfer_hold(ilg_t *held_ilg, ilg_t *ilg)
206 {
207 	if (held_ilg == ilg)
208 		return;
209 
210 	ilg_refhold(ilg);
211 	if (held_ilg != NULL)
212 		ilg_refrele(held_ilg);
213 }
214 
215 /*
216  * Allocate a new ilg_t and links it into conn_ilg.
217  * Returns NULL on failure, in which case `*errp' will be
218  * filled in with the reason.
219  *
220  * Assumes connp->conn_ilg_lock is held.
221  */
222 static ilg_t *
conn_ilg_alloc(conn_t * connp,int * errp)223 conn_ilg_alloc(conn_t *connp, int *errp)
224 {
225 	ilg_t *ilg;
226 
227 	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
228 
229 	/*
230 	 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not
231 	 * create any ilgs.
232 	 */
233 	if (connp->conn_state_flags & CONN_CLOSING) {
234 		*errp = EINVAL;
235 		return (NULL);
236 	}
237 
238 	ilg = kmem_zalloc(sizeof (ilg_t), KM_NOSLEEP);
239 	if (ilg == NULL) {
240 		*errp = ENOMEM;
241 		return (NULL);
242 	}
243 
244 	ilg->ilg_refcnt = 1;
245 
246 	/* Insert at head */
247 	if (connp->conn_ilg != NULL)
248 		connp->conn_ilg->ilg_ptpn = &ilg->ilg_next;
249 	ilg->ilg_next = connp->conn_ilg;
250 	ilg->ilg_ptpn = &connp->conn_ilg;
251 	connp->conn_ilg = ilg;
252 
253 	ilg->ilg_connp = connp;
254 	return (ilg);
255 }
256 
257 typedef struct ilm_fbld_s {
258 	ilm_t		*fbld_ilm;
259 	int		fbld_in_cnt;
260 	int		fbld_ex_cnt;
261 	slist_t		fbld_in;
262 	slist_t		fbld_ex;
263 	boolean_t	fbld_in_overflow;
264 } ilm_fbld_t;
265 
266 /*
267  * Caller must hold ill_mcast_lock
268  */
269 static void
ilm_bld_flists(conn_t * connp,void * arg)270 ilm_bld_flists(conn_t *connp, void *arg)
271 {
272 	ilg_t *ilg;
273 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
274 	ilm_t *ilm = fbld->fbld_ilm;
275 	in6_addr_t *v6group = &ilm->ilm_v6addr;
276 
277 	if (connp->conn_ilg == NULL)
278 		return;
279 
280 	/*
281 	 * Since we can't break out of the ipcl_walk once started, we still
282 	 * have to look at every conn.  But if we've already found one
283 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
284 	 * ilgs--that will be our state.
285 	 */
286 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
287 		return;
288 
289 	/*
290 	 * Check this conn's ilgs to see if any are interested in our
291 	 * ilm (group, interface match).  If so, update the master
292 	 * include and exclude lists we're building in the fbld struct
293 	 * with this ilg's filter info.
294 	 *
295 	 * Note that the caller has already serialized on the ill we care
296 	 * about.
297 	 */
298 	ASSERT(MUTEX_HELD(&ilm->ilm_ill->ill_mcast_serializer));
299 
300 	rw_enter(&connp->conn_ilg_lock, RW_READER);
301 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
302 		if (ilg->ilg_condemned)
303 			continue;
304 
305 		/*
306 		 * Since we are under the ill_mcast_serializer we know
307 		 * that any ilg+ilm operations on this ilm have either
308 		 * not started or completed, except for the last ilg
309 		 * (the one that caused us to be called) which doesn't
310 		 * have ilg_ilm set yet. Hence we compare using ilg_ill
311 		 * and the address.
312 		 */
313 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
314 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
315 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
316 				fbld->fbld_in_cnt++;
317 				if (!fbld->fbld_in_overflow)
318 					l_union_in_a(&fbld->fbld_in,
319 					    ilg->ilg_filter,
320 					    &fbld->fbld_in_overflow);
321 			} else {
322 				fbld->fbld_ex_cnt++;
323 				/*
324 				 * On the first exclude list, don't try to do
325 				 * an intersection, as the master exclude list
326 				 * is intentionally empty.  If the master list
327 				 * is still empty on later iterations, that
328 				 * means we have at least one ilg with an empty
329 				 * exclude list, so that should be reflected
330 				 * when we take the intersection.
331 				 */
332 				if (fbld->fbld_ex_cnt == 1) {
333 					if (ilg->ilg_filter != NULL)
334 						l_copy(ilg->ilg_filter,
335 						    &fbld->fbld_ex);
336 				} else {
337 					l_intersection_in_a(&fbld->fbld_ex,
338 					    ilg->ilg_filter);
339 				}
340 			}
341 			/* there will only be one match, so break now. */
342 			break;
343 		}
344 	}
345 	rw_exit(&connp->conn_ilg_lock);
346 }
347 
348 /*
349  * Caller must hold ill_mcast_lock
350  */
351 static void
ilm_gen_filter(ilm_t * ilm,mcast_record_t * fmode,slist_t * flist)352 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
353 {
354 	ilm_fbld_t fbld;
355 	ip_stack_t *ipst = ilm->ilm_ipst;
356 
357 	fbld.fbld_ilm = ilm;
358 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
359 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
360 	fbld.fbld_in_overflow = B_FALSE;
361 
362 	/* first, construct our master include and exclude lists */
363 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
364 
365 	/* now use those master lists to generate the interface filter */
366 
367 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
368 	if (fbld.fbld_in_overflow) {
369 		*fmode = MODE_IS_EXCLUDE;
370 		flist->sl_numsrc = 0;
371 		return;
372 	}
373 
374 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
375 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
376 		*fmode = MODE_IS_INCLUDE;
377 		flist->sl_numsrc = 0;
378 		return;
379 	}
380 
381 	/*
382 	 * If there are no exclude lists, then the interface filter
383 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
384 	 * exclude list makes the interface filter EXCLUDE, with its
385 	 * filter list equal to (fbld_ex - fbld_in).
386 	 */
387 	if (fbld.fbld_ex_cnt == 0) {
388 		*fmode = MODE_IS_INCLUDE;
389 		l_copy(&fbld.fbld_in, flist);
390 	} else {
391 		*fmode = MODE_IS_EXCLUDE;
392 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
393 	}
394 }
395 
396 /*
397  * Caller must hold ill_mcast_lock
398  */
399 static int
ilm_update_add(ilm_t * ilm,ilg_stat_t ilgstat,slist_t * ilg_flist)400 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist)
401 {
402 	mcast_record_t fmode;
403 	slist_t *flist;
404 	boolean_t fdefault;
405 	char buf[INET6_ADDRSTRLEN];
406 	ill_t *ill = ilm->ilm_ill;
407 
408 	/*
409 	 * There are several cases where the ilm's filter state
410 	 * defaults to (EXCLUDE, NULL):
411 	 *	- we've had previous joins without associated ilgs
412 	 *	- this join has no associated ilg
413 	 *	- the ilg's filter state is (EXCLUDE, NULL)
414 	 */
415 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
416 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
417 
418 	/* attempt mallocs (if needed) before doing anything else */
419 	if ((flist = l_alloc()) == NULL)
420 		return (ENOMEM);
421 	if (!fdefault && ilm->ilm_filter == NULL) {
422 		ilm->ilm_filter = l_alloc();
423 		if (ilm->ilm_filter == NULL) {
424 			l_free(flist);
425 			return (ENOMEM);
426 		}
427 	}
428 
429 	if (ilgstat != ILGSTAT_CHANGE)
430 		ilm->ilm_refcnt++;
431 
432 	if (ilgstat == ILGSTAT_NONE)
433 		ilm->ilm_no_ilg_cnt++;
434 
435 	/*
436 	 * Determine new filter state.  If it's not the default
437 	 * (EXCLUDE, NULL), we must walk the conn list to find
438 	 * any ilgs interested in this group, and re-build the
439 	 * ilm filter.
440 	 */
441 	if (fdefault) {
442 		fmode = MODE_IS_EXCLUDE;
443 		flist->sl_numsrc = 0;
444 	} else {
445 		ilm_gen_filter(ilm, &fmode, flist);
446 	}
447 
448 	/* make sure state actually changed; nothing to do if not. */
449 	if ((ilm->ilm_fmode == fmode) &&
450 	    !lists_are_different(ilm->ilm_filter, flist)) {
451 		l_free(flist);
452 		return (0);
453 	}
454 
455 	/* send the state change report */
456 	if (!IS_LOOPBACK(ill)) {
457 		if (ill->ill_isv6)
458 			mld_statechange(ilm, fmode, flist);
459 		else
460 			igmp_statechange(ilm, fmode, flist);
461 	}
462 
463 	/* update the ilm state */
464 	ilm->ilm_fmode = fmode;
465 	if (flist->sl_numsrc > 0)
466 		l_copy(flist, ilm->ilm_filter);
467 	else
468 		CLEAR_SLIST(ilm->ilm_filter);
469 
470 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
471 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
472 
473 	l_free(flist);
474 	return (0);
475 }
476 
477 /*
478  * Caller must hold ill_mcast_lock
479  */
480 static int
ilm_update_del(ilm_t * ilm)481 ilm_update_del(ilm_t *ilm)
482 {
483 	mcast_record_t fmode;
484 	slist_t *flist;
485 	ill_t *ill = ilm->ilm_ill;
486 
487 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
488 	    ilm->ilm_refcnt));
489 
490 	if ((flist = l_alloc()) == NULL)
491 		return (ENOMEM);
492 
493 	/*
494 	 * If present, the ilg in question has already either been
495 	 * updated or removed from our list; so all we need to do
496 	 * now is walk the list to update the ilm filter state.
497 	 *
498 	 * Skip the list walk if we have any no-ilg joins, which
499 	 * cause the filter state to revert to (EXCLUDE, NULL).
500 	 */
501 	if (ilm->ilm_no_ilg_cnt != 0) {
502 		fmode = MODE_IS_EXCLUDE;
503 		flist->sl_numsrc = 0;
504 	} else {
505 		ilm_gen_filter(ilm, &fmode, flist);
506 	}
507 
508 	/* check to see if state needs to be updated */
509 	if ((ilm->ilm_fmode == fmode) &&
510 	    (!lists_are_different(ilm->ilm_filter, flist))) {
511 		l_free(flist);
512 		return (0);
513 	}
514 
515 	if (!IS_LOOPBACK(ill)) {
516 		if (ill->ill_isv6)
517 			mld_statechange(ilm, fmode, flist);
518 		else
519 			igmp_statechange(ilm, fmode, flist);
520 	}
521 
522 	ilm->ilm_fmode = fmode;
523 	if (flist->sl_numsrc > 0) {
524 		if (ilm->ilm_filter == NULL) {
525 			ilm->ilm_filter = l_alloc();
526 			if (ilm->ilm_filter == NULL) {
527 				char buf[INET6_ADDRSTRLEN];
528 				ip1dbg(("ilm_update_del: failed to alloc ilm "
529 				    "filter; no source filtering for %s on %s",
530 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
531 				    buf, sizeof (buf)), ill->ill_name));
532 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
533 				l_free(flist);
534 				return (0);
535 			}
536 		}
537 		l_copy(flist, ilm->ilm_filter);
538 	} else {
539 		CLEAR_SLIST(ilm->ilm_filter);
540 	}
541 
542 	l_free(flist);
543 	return (0);
544 }
545 
546 /*
547  * Create/update the ilm for the group/ill. Used by other parts of IP to
548  * do the ILGSTAT_NONE (no ilg), MODE_IS_EXCLUDE, with no slist join.
549  * Returns with a refhold on the ilm.
550  *
551  * The unspecified address means all multicast addresses for in both the
552  * case of IPv4 and IPv6.
553  *
554  * The caller should have already mapped an IPMP under ill to the upper.
555  */
556 ilm_t *
ip_addmulti(const in6_addr_t * v6group,ill_t * ill,zoneid_t zoneid,int * errorp)557 ip_addmulti(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
558     int *errorp)
559 {
560 	ilm_t *ilm;
561 
562 	/* Acquire serializer to keep assert in ilm_bld_flists happy */
563 	mutex_enter(&ill->ill_mcast_serializer);
564 	ilm = ip_addmulti_serial(v6group, ill, zoneid, ILGSTAT_NONE,
565 	    MODE_IS_EXCLUDE, NULL, errorp);
566 	mutex_exit(&ill->ill_mcast_serializer);
567 	/*
568 	 * Now that all locks have been dropped, we can send any
569 	 * deferred/queued DLPI or IP packets
570 	 */
571 	ill_mcast_send_queued(ill);
572 	ill_dlpi_send_queued(ill);
573 	return (ilm);
574 }
575 
576 /*
577  * Create/update the ilm for the group/ill. If ILGSTAT_CHANGE is not set
578  * then this returns with a refhold on the ilm.
579  *
580  * Internal routine which assumes the caller has already acquired
581  * ill_mcast_serializer. It is the caller's responsibility to send out
582  * queued DLPI/multicast packets after all locks are dropped.
583  *
584  * The unspecified address means all multicast addresses for in both the
585  * case of IPv4 and IPv6.
586  *
587  * ilgstat tells us if there's an ilg associated with this join,
588  * and if so, if it's a new ilg or a change to an existing one.
589  * ilg_fmode and ilg_flist give us the current filter state of
590  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
591  *
592  * The caller should have already mapped an IPMP under ill to the upper.
593  */
594 static ilm_t *
ip_addmulti_serial(const in6_addr_t * v6group,ill_t * ill,zoneid_t zoneid,ilg_stat_t ilgstat,mcast_record_t ilg_fmode,slist_t * ilg_flist,int * errorp)595 ip_addmulti_serial(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
596     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
597     int *errorp)
598 {
599 	ilm_t *ilm;
600 
601 	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
602 
603 	if (ill->ill_isv6) {
604 		if (!IN6_IS_ADDR_MULTICAST(v6group) &&
605 		    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
606 			*errorp = EINVAL;
607 			return (NULL);
608 		}
609 	} else {
610 		if (IN6_IS_ADDR_V4MAPPED(v6group)) {
611 			ipaddr_t v4group;
612 
613 			IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
614 			ASSERT(!IS_UNDER_IPMP(ill));
615 			if (!CLASSD(v4group)) {
616 				*errorp = EINVAL;
617 				return (NULL);
618 			}
619 		} else if (!IN6_IS_ADDR_UNSPECIFIED(v6group)) {
620 			*errorp = EINVAL;
621 			return (NULL);
622 		}
623 	}
624 
625 	if (IS_UNDER_IPMP(ill)) {
626 		*errorp = EINVAL;
627 		return (NULL);
628 	}
629 
630 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
631 	/*
632 	 * We do the equivalent of a lookup by checking after we get the lock
633 	 * This is needed since the ill could have been condemned after
634 	 * we looked it up, and we need to check condemned after we hold
635 	 * ill_mcast_lock to synchronize with the unplumb code.
636 	 */
637 	if (ill->ill_state_flags & ILL_CONDEMNED) {
638 		rw_exit(&ill->ill_mcast_lock);
639 		*errorp = ENXIO;
640 		return (NULL);
641 	}
642 	ilm = ip_addmulti_impl(v6group, ill, zoneid, ilgstat, ilg_fmode,
643 	    ilg_flist, errorp);
644 	rw_exit(&ill->ill_mcast_lock);
645 
646 	ill_mcast_timer_start(ill->ill_ipst);
647 	return (ilm);
648 }
649 
650 static ilm_t *
ip_addmulti_impl(const in6_addr_t * v6group,ill_t * ill,zoneid_t zoneid,ilg_stat_t ilgstat,mcast_record_t ilg_fmode,slist_t * ilg_flist,int * errorp)651 ip_addmulti_impl(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
652     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
653     int *errorp)
654 {
655 	ilm_t	*ilm;
656 	int	ret = 0;
657 
658 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
659 	*errorp = 0;
660 
661 	/*
662 	 * An ilm is uniquely identified by the tuple of (group, ill) where
663 	 * `group' is the multicast group address, and `ill' is the interface
664 	 * on which it is currently joined.
665 	 */
666 
667 	ilm = ilm_lookup(ill, v6group, zoneid);
668 	if (ilm != NULL) {
669 		/* ilm_update_add bumps ilm_refcnt unless ILGSTAT_CHANGE */
670 		ret = ilm_update_add(ilm, ilgstat, ilg_flist);
671 		if (ret == 0)
672 			return (ilm);
673 
674 		*errorp = ret;
675 		return (NULL);
676 	}
677 
678 	/*
679 	 * The callers checks on the ilg and the ilg+ilm consistency under
680 	 * ill_mcast_serializer ensures that we can not have ILGSTAT_CHANGE
681 	 * and no ilm.
682 	 */
683 	ASSERT(ilgstat != ILGSTAT_CHANGE);
684 	ilm = ilm_add(ill, v6group, ilgstat, ilg_fmode, ilg_flist, zoneid);
685 	if (ilm == NULL) {
686 		*errorp = ENOMEM;
687 		return (NULL);
688 	}
689 
690 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
691 		/*
692 		 * If we have more then one we should not tell the driver
693 		 * to join this time.
694 		 */
695 		if (ilm_numentries(ill, v6group) == 1) {
696 			ret = ill_join_allmulti(ill);
697 		}
698 	} else {
699 		if (!IS_LOOPBACK(ill)) {
700 			if (ill->ill_isv6)
701 				mld_joingroup(ilm);
702 			else
703 				igmp_joingroup(ilm);
704 		}
705 
706 		/*
707 		 * If we have more then one we should not tell the driver
708 		 * to join this time.
709 		 */
710 		if (ilm_numentries(ill, v6group) == 1) {
711 			ret = ip_ll_multireq(ill, v6group, DL_ENABMULTI_REQ);
712 		}
713 	}
714 	if (ret != 0) {
715 		if (ret == ENETDOWN) {
716 			char buf[INET6_ADDRSTRLEN];
717 
718 			ip0dbg(("ip_addmulti: ENETDOWN for %s on %s",
719 			    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
720 			    buf, sizeof (buf)), ill->ill_name));
721 		}
722 		ilm_delete(ilm);
723 		*errorp = ret;
724 		return (NULL);
725 	} else {
726 		return (ilm);
727 	}
728 }
729 
730 /*
731  * Looks up the list of multicast physical addresses this interface
732  * listens to. Add to the list if not present already.
733  */
734 boolean_t
ip_mphysaddr_add(ill_t * ill,uchar_t * hw_addr)735 ip_mphysaddr_add(ill_t *ill, uchar_t *hw_addr)
736 {
737 	multiphysaddr_t *mpa = NULL;
738 	int	hw_addr_length = ill->ill_phys_addr_length;
739 
740 	mutex_enter(&ill->ill_lock);
741 	for (mpa = ill->ill_mphysaddr_list; mpa != NULL; mpa = mpa->mpa_next) {
742 		if (bcmp(hw_addr, &(mpa->mpa_addr[0]), hw_addr_length) == 0) {
743 			mpa->mpa_refcnt++;
744 			mutex_exit(&ill->ill_lock);
745 			return (B_FALSE);
746 		}
747 	}
748 
749 	mpa = kmem_zalloc(sizeof (multiphysaddr_t), KM_NOSLEEP);
750 	if (mpa == NULL) {
751 		/*
752 		 * We risk not having the multiphysadd structure. At this
753 		 * point we can't fail. We can't afford to not send a
754 		 * DL_ENABMULTI_REQ also. It is better than pre-allocating
755 		 * the structure and having the code to track it also.
756 		 */
757 		ip0dbg(("ip_mphysaddr_add: ENOMEM. Some multicast apps"
758 		    " may have issues. hw_addr: %p ill_name: %s\n",
759 		    (void *)hw_addr, ill->ill_name));
760 		mutex_exit(&ill->ill_lock);
761 		return (B_TRUE);
762 	}
763 	bcopy(hw_addr, &(mpa->mpa_addr[0]), hw_addr_length);
764 	mpa->mpa_refcnt = 1;
765 	mpa->mpa_next = ill->ill_mphysaddr_list;
766 	ill->ill_mphysaddr_list = mpa;
767 	mutex_exit(&ill->ill_lock);
768 	return (B_TRUE);
769 }
770 
771 /*
772  * Look up hw_addr from the list of physical multicast addresses this interface
773  * listens to.
774  * Remove the entry if the refcnt is 0
775  */
776 boolean_t
ip_mphysaddr_del(ill_t * ill,uchar_t * hw_addr)777 ip_mphysaddr_del(ill_t *ill, uchar_t *hw_addr)
778 {
779 	multiphysaddr_t *mpap = NULL, **mpapp = NULL;
780 	int hw_addr_length = ill->ill_phys_addr_length;
781 	boolean_t ret = B_FALSE;
782 
783 	mutex_enter(&ill->ill_lock);
784 	for (mpapp = &ill->ill_mphysaddr_list; (mpap = *mpapp) != NULL;
785 	    mpapp = &(mpap->mpa_next)) {
786 		if (bcmp(hw_addr, &(mpap->mpa_addr[0]), hw_addr_length) == 0)
787 			break;
788 	}
789 	if (mpap == NULL) {
790 		/*
791 		 * Should be coming here only when there was a memory
792 		 * exhaustion and we were not able to allocate
793 		 * a multiphysaddr_t. We still send a DL_DISABMULTI_REQ down.
794 		 */
795 
796 		ip0dbg(("ip_mphysaddr_del: No entry for this addr. Some "
797 		    "multicast apps might have had issues. hw_addr: %p "
798 		    " ill_name: %s\n", (void *)hw_addr, ill->ill_name));
799 		ret = B_TRUE;
800 	} else if (--mpap->mpa_refcnt == 0) {
801 		*mpapp = mpap->mpa_next;
802 		kmem_free(mpap, sizeof (multiphysaddr_t));
803 		ret = B_TRUE;
804 	}
805 	mutex_exit(&ill->ill_lock);
806 	return (ret);
807 }
808 
809 /*
810  * Send a multicast request to the driver for enabling or disabling
811  * multicast reception for v6groupp address. The caller has already
812  * checked whether it is appropriate to send one or not.
813  *
814  * For IPMP we switch to the cast_ill since it has the right hardware
815  * information.
816  */
817 static int
ip_ll_send_multireq(ill_t * ill,const in6_addr_t * v6groupp,t_uscalar_t prim)818 ip_ll_send_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
819 {
820 	mblk_t	*mp;
821 	uint32_t addrlen, addroff;
822 	ill_t *release_ill = NULL;
823 	uchar_t *cp;
824 	int err = 0;
825 
826 	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
827 
828 	if (IS_IPMP(ill)) {
829 		/* On the upper IPMP ill. */
830 		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
831 		if (release_ill == NULL) {
832 			/*
833 			 * Avoid sending it down to the ipmpstub.
834 			 * We will be called again once the members of the
835 			 * group are in place
836 			 */
837 			ip1dbg(("ip_ll_send_multireq: no cast_ill for %s %d\n",
838 			    ill->ill_name, ill->ill_isv6));
839 			return (0);
840 		}
841 		ill = release_ill;
842 	}
843 	/* Create a DL_ENABMULTI_REQ or DL_DISABMULTI_REQ message. */
844 	mp = ill_create_dl(ill, prim, &addrlen, &addroff);
845 	if (mp == NULL) {
846 		err = ENOMEM;
847 		goto done;
848 	}
849 
850 	mp = ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp);
851 	if (mp == NULL) {
852 		ip0dbg(("null from ndp_mcastreq(ill %s)\n", ill->ill_name));
853 		err = ENOMEM;
854 		goto done;
855 	}
856 	cp = mp->b_rptr;
857 
858 	switch (((union DL_primitives *)cp)->dl_primitive) {
859 	case DL_ENABMULTI_REQ:
860 		cp += ((dl_enabmulti_req_t *)cp)->dl_addr_offset;
861 		if (!ip_mphysaddr_add(ill, cp)) {
862 			freemsg(mp);
863 			err = 0;
864 			goto done;
865 		}
866 		mutex_enter(&ill->ill_lock);
867 		/* Track the state if this is the first enabmulti */
868 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
869 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
870 		mutex_exit(&ill->ill_lock);
871 		break;
872 	case DL_DISABMULTI_REQ:
873 		cp += ((dl_disabmulti_req_t *)cp)->dl_addr_offset;
874 		if (!ip_mphysaddr_del(ill, cp)) {
875 			freemsg(mp);
876 			err = 0;
877 			goto done;
878 		}
879 	}
880 	ill_dlpi_queue(ill, mp);
881 done:
882 	if (release_ill != NULL)
883 		ill_refrele(release_ill);
884 	return (err);
885 }
886 
887 /*
888  * Send a multicast request to the driver for enabling multicast
889  * membership for v6group if appropriate.
890  */
891 static int
ip_ll_multireq(ill_t * ill,const in6_addr_t * v6groupp,t_uscalar_t prim)892 ip_ll_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
893 {
894 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
895 	    ill->ill_ipif->ipif_flags & IPIF_POINTOPOINT) {
896 		ip1dbg(("ip_ll_multireq: not resolver\n"));
897 		return (0);	/* Must be IRE_IF_NORESOLVER */
898 	}
899 
900 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
901 		ip1dbg(("ip_ll_multireq: MULTI_BCAST\n"));
902 		return (0);
903 	}
904 	return (ip_ll_send_multireq(ill, v6groupp, prim));
905 }
906 
907 /*
908  * Delete the ilm. Used by other parts of IP for the case of no_ilg/leaving
909  * being true.
910  */
911 int
ip_delmulti(ilm_t * ilm)912 ip_delmulti(ilm_t *ilm)
913 {
914 	ill_t *ill = ilm->ilm_ill;
915 	int error;
916 
917 	/* Acquire serializer to keep assert in ilm_bld_flists happy */
918 	mutex_enter(&ill->ill_mcast_serializer);
919 	error = ip_delmulti_serial(ilm, B_TRUE, B_TRUE);
920 	mutex_exit(&ill->ill_mcast_serializer);
921 	/*
922 	 * Now that all locks have been dropped, we can send any
923 	 * deferred/queued DLPI or IP packets
924 	 */
925 	ill_mcast_send_queued(ill);
926 	ill_dlpi_send_queued(ill);
927 	return (error);
928 }
929 
930 
931 /*
932  * Delete the ilm.
933  * Assumes ill_mcast_serializer is held by the caller.
934  * Caller must send out queued dlpi/multicast packets after dropping
935  * all locks.
936  */
937 static int
ip_delmulti_serial(ilm_t * ilm,boolean_t no_ilg,boolean_t leaving)938 ip_delmulti_serial(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
939 {
940 	ill_t *ill = ilm->ilm_ill;
941 	int ret;
942 
943 	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
944 	ASSERT(!(IS_UNDER_IPMP(ill)));
945 
946 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
947 	ret = ip_delmulti_impl(ilm, no_ilg, leaving);
948 	rw_exit(&ill->ill_mcast_lock);
949 	ill_mcast_timer_start(ill->ill_ipst);
950 	return (ret);
951 }
952 
953 static int
ip_delmulti_impl(ilm_t * ilm,boolean_t no_ilg,boolean_t leaving)954 ip_delmulti_impl(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
955 {
956 	ill_t *ill = ilm->ilm_ill;
957 	int error;
958 	in6_addr_t v6group;
959 
960 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
961 
962 	/* Update counters */
963 	if (no_ilg)
964 		ilm->ilm_no_ilg_cnt--;
965 
966 	if (leaving)
967 		ilm->ilm_refcnt--;
968 
969 	if (ilm->ilm_refcnt > 0)
970 		return (ilm_update_del(ilm));
971 
972 	v6group = ilm->ilm_v6addr;
973 
974 	if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
975 		ilm_delete(ilm);
976 		/*
977 		 * If we have some left then one we should not tell the driver
978 		 * to leave.
979 		 */
980 		if (ilm_numentries(ill, &v6group) != 0)
981 			return (0);
982 
983 		ill_leave_allmulti(ill);
984 
985 		return (0);
986 	}
987 
988 	if (!IS_LOOPBACK(ill)) {
989 		if (ill->ill_isv6)
990 			mld_leavegroup(ilm);
991 		else
992 			igmp_leavegroup(ilm);
993 	}
994 
995 	ilm_delete(ilm);
996 	/*
997 	 * If we have some left then one we should not tell the driver
998 	 * to leave.
999 	 */
1000 	if (ilm_numentries(ill, &v6group) != 0)
1001 		return (0);
1002 
1003 	error = ip_ll_multireq(ill, &v6group, DL_DISABMULTI_REQ);
1004 	/* We ignore the case when ill_dl_up is not set */
1005 	if (error == ENETDOWN) {
1006 		char buf[INET6_ADDRSTRLEN];
1007 
1008 		ip0dbg(("ip_delmulti: ENETDOWN for %s on %s",
1009 		    inet_ntop(AF_INET6, &v6group, buf, sizeof (buf)),
1010 		    ill->ill_name));
1011 	}
1012 	return (error);
1013 }
1014 
1015 /*
1016  * Make the driver pass up all multicast packets.
1017  */
1018 int
ill_join_allmulti(ill_t * ill)1019 ill_join_allmulti(ill_t *ill)
1020 {
1021 	mblk_t		*promiscon_mp, *promiscoff_mp = NULL;
1022 	uint32_t	addrlen, addroff;
1023 	ill_t		*release_ill = NULL;
1024 
1025 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1026 
1027 	if (IS_LOOPBACK(ill))
1028 		return (0);
1029 
1030 	if (!ill->ill_dl_up) {
1031 		/*
1032 		 * Nobody there. All multicast addresses will be re-joined
1033 		 * when we get the DL_BIND_ACK bringing the interface up.
1034 		 */
1035 		return (ENETDOWN);
1036 	}
1037 
1038 	if (IS_IPMP(ill)) {
1039 		/* On the upper IPMP ill. */
1040 		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
1041 		if (release_ill == NULL) {
1042 			/*
1043 			 * Avoid sending it down to the ipmpstub.
1044 			 * We will be called again once the members of the
1045 			 * group are in place
1046 			 */
1047 			ip1dbg(("ill_join_allmulti: no cast_ill for %s %d\n",
1048 			    ill->ill_name, ill->ill_isv6));
1049 			return (0);
1050 		}
1051 		ill = release_ill;
1052 		if (!ill->ill_dl_up) {
1053 			ill_refrele(ill);
1054 			return (ENETDOWN);
1055 		}
1056 	}
1057 
1058 	/*
1059 	 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI
1060 	 * provider.  We don't need to do this for certain media types for
1061 	 * which we never need to turn promiscuous mode on.  While we're here,
1062 	 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that
1063 	 * ill_leave_allmulti() will not fail due to low memory conditions.
1064 	 */
1065 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1066 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1067 		promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1068 		    &addrlen, &addroff);
1069 		if (ill->ill_promiscoff_mp == NULL)
1070 			promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1071 			    &addrlen, &addroff);
1072 		if (promiscon_mp == NULL ||
1073 		    (ill->ill_promiscoff_mp == NULL && promiscoff_mp == NULL)) {
1074 			freemsg(promiscon_mp);
1075 			freemsg(promiscoff_mp);
1076 			if (release_ill != NULL)
1077 				ill_refrele(release_ill);
1078 			return (ENOMEM);
1079 		}
1080 		if (ill->ill_promiscoff_mp == NULL)
1081 			ill->ill_promiscoff_mp = promiscoff_mp;
1082 		ill_dlpi_queue(ill, promiscon_mp);
1083 	}
1084 	if (release_ill != NULL)
1085 		ill_refrele(release_ill);
1086 	return (0);
1087 }
1088 
1089 /*
1090  * Make the driver stop passing up all multicast packets
1091  */
1092 void
ill_leave_allmulti(ill_t * ill)1093 ill_leave_allmulti(ill_t *ill)
1094 {
1095 	mblk_t	*promiscoff_mp;
1096 	ill_t	*release_ill = NULL;
1097 
1098 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1099 
1100 	if (IS_LOOPBACK(ill))
1101 		return;
1102 
1103 	if (!ill->ill_dl_up) {
1104 		/*
1105 		 * Nobody there. All multicast addresses will be re-joined
1106 		 * when we get the DL_BIND_ACK bringing the interface up.
1107 		 */
1108 		return;
1109 	}
1110 
1111 	if (IS_IPMP(ill)) {
1112 		/* On the upper IPMP ill. */
1113 		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
1114 		if (release_ill == NULL) {
1115 			/*
1116 			 * Avoid sending it down to the ipmpstub.
1117 			 * We will be called again once the members of the
1118 			 * group are in place
1119 			 */
1120 			ip1dbg(("ill_leave_allmulti: no cast_ill on %s %d\n",
1121 			    ill->ill_name, ill->ill_isv6));
1122 			return;
1123 		}
1124 		ill = release_ill;
1125 		if (!ill->ill_dl_up)
1126 			goto done;
1127 	}
1128 
1129 	/*
1130 	 * In the case of IPMP and ill_dl_up not being set when we joined
1131 	 * we didn't allocate a promiscoff_mp. In that case we have
1132 	 * nothing to do when we leave.
1133 	 * Ditto for PHYI_MULTI_BCAST
1134 	 */
1135 	promiscoff_mp = ill->ill_promiscoff_mp;
1136 	if (promiscoff_mp != NULL) {
1137 		ill->ill_promiscoff_mp = NULL;
1138 		ill_dlpi_queue(ill, promiscoff_mp);
1139 	}
1140 done:
1141 	if (release_ill != NULL)
1142 		ill_refrele(release_ill);
1143 }
1144 
1145 int
ip_join_allmulti(uint_t ifindex,boolean_t isv6,ip_stack_t * ipst)1146 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1147 {
1148 	ill_t		*ill;
1149 	int		ret;
1150 	ilm_t		*ilm;
1151 
1152 	ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1153 	if (ill == NULL)
1154 		return (ENODEV);
1155 
1156 	/*
1157 	 * The ip_addmulti() function doesn't allow IPMP underlying interfaces
1158 	 * to join allmulti since only the nominated underlying interface in
1159 	 * the group should receive multicast.  We silently succeed to avoid
1160 	 * having to teach IPobs (currently the only caller of this routine)
1161 	 * to ignore failures in this case.
1162 	 */
1163 	if (IS_UNDER_IPMP(ill)) {
1164 		ill_refrele(ill);
1165 		return (0);
1166 	}
1167 	mutex_enter(&ill->ill_lock);
1168 	if (ill->ill_ipallmulti_cnt > 0) {
1169 		/* Already joined */
1170 		ASSERT(ill->ill_ipallmulti_ilm != NULL);
1171 		ill->ill_ipallmulti_cnt++;
1172 		mutex_exit(&ill->ill_lock);
1173 		goto done;
1174 	}
1175 	mutex_exit(&ill->ill_lock);
1176 
1177 	ilm = ip_addmulti(&ipv6_all_zeros, ill, ill->ill_zoneid, &ret);
1178 	if (ilm == NULL) {
1179 		ASSERT(ret != 0);
1180 		ill_refrele(ill);
1181 		return (ret);
1182 	}
1183 
1184 	mutex_enter(&ill->ill_lock);
1185 	if (ill->ill_ipallmulti_cnt > 0) {
1186 		/* Another thread added it concurrently */
1187 		(void) ip_delmulti(ilm);
1188 		mutex_exit(&ill->ill_lock);
1189 		goto done;
1190 	}
1191 	ASSERT(ill->ill_ipallmulti_ilm == NULL);
1192 	ill->ill_ipallmulti_ilm = ilm;
1193 	ill->ill_ipallmulti_cnt++;
1194 	mutex_exit(&ill->ill_lock);
1195 done:
1196 	ill_refrele(ill);
1197 	return (0);
1198 }
1199 
1200 int
ip_leave_allmulti(uint_t ifindex,boolean_t isv6,ip_stack_t * ipst)1201 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1202 {
1203 	ill_t		*ill;
1204 	ilm_t		*ilm;
1205 
1206 	ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1207 	if (ill == NULL)
1208 		return (ENODEV);
1209 
1210 	if (IS_UNDER_IPMP(ill)) {
1211 		ill_refrele(ill);
1212 		return (0);
1213 	}
1214 
1215 	mutex_enter(&ill->ill_lock);
1216 	if (ill->ill_ipallmulti_cnt == 0) {
1217 		/* ip_purge_allmulti could have removed them all */
1218 		mutex_exit(&ill->ill_lock);
1219 		goto done;
1220 	}
1221 	ill->ill_ipallmulti_cnt--;
1222 	if (ill->ill_ipallmulti_cnt == 0) {
1223 		/* Last one */
1224 		ilm = ill->ill_ipallmulti_ilm;
1225 		ill->ill_ipallmulti_ilm = NULL;
1226 	} else {
1227 		ilm = NULL;
1228 	}
1229 	mutex_exit(&ill->ill_lock);
1230 	if (ilm != NULL)
1231 		(void) ip_delmulti(ilm);
1232 
1233 done:
1234 	ill_refrele(ill);
1235 	return (0);
1236 }
1237 
1238 /*
1239  * Delete the allmulti memberships that were added as part of
1240  * ip_join_allmulti().
1241  */
1242 void
ip_purge_allmulti(ill_t * ill)1243 ip_purge_allmulti(ill_t *ill)
1244 {
1245 	ilm_t	*ilm;
1246 
1247 	ASSERT(IAM_WRITER_ILL(ill));
1248 
1249 	mutex_enter(&ill->ill_lock);
1250 	ilm = ill->ill_ipallmulti_ilm;
1251 	ill->ill_ipallmulti_ilm = NULL;
1252 	ill->ill_ipallmulti_cnt = 0;
1253 	mutex_exit(&ill->ill_lock);
1254 
1255 	if (ilm != NULL)
1256 		(void) ip_delmulti(ilm);
1257 }
1258 
1259 /*
1260  * Create a dlpi message with room for phys+sap. Later
1261  * we will strip the sap for those primitives which
1262  * only need a physical address.
1263  */
1264 static mblk_t *
ill_create_dl(ill_t * ill,uint32_t dl_primitive,uint32_t * addr_lenp,uint32_t * addr_offp)1265 ill_create_dl(ill_t *ill, uint32_t dl_primitive,
1266     uint32_t *addr_lenp, uint32_t *addr_offp)
1267 {
1268 	mblk_t	*mp;
1269 	uint32_t	hw_addr_length;
1270 	char		*cp;
1271 	uint32_t	offset;
1272 	uint32_t	length;
1273 	uint32_t 	size;
1274 
1275 	*addr_lenp = *addr_offp = 0;
1276 
1277 	hw_addr_length = ill->ill_phys_addr_length;
1278 	if (!hw_addr_length) {
1279 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1280 		return (NULL);
1281 	}
1282 
1283 	switch (dl_primitive) {
1284 	case DL_ENABMULTI_REQ:
1285 		length = sizeof (dl_enabmulti_req_t);
1286 		size = length + hw_addr_length;
1287 		break;
1288 	case DL_DISABMULTI_REQ:
1289 		length = sizeof (dl_disabmulti_req_t);
1290 		size = length + hw_addr_length;
1291 		break;
1292 	case DL_PROMISCON_REQ:
1293 	case DL_PROMISCOFF_REQ:
1294 		size = length = sizeof (dl_promiscon_req_t);
1295 		break;
1296 	default:
1297 		return (NULL);
1298 	}
1299 	mp = allocb(size, BPRI_HI);
1300 	if (!mp)
1301 		return (NULL);
1302 	mp->b_wptr += size;
1303 	mp->b_datap->db_type = M_PROTO;
1304 
1305 	cp = (char *)mp->b_rptr;
1306 	offset = length;
1307 
1308 	switch (dl_primitive) {
1309 	case DL_ENABMULTI_REQ: {
1310 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1311 
1312 		dl->dl_primitive = dl_primitive;
1313 		dl->dl_addr_offset = offset;
1314 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1315 		*addr_offp = offset;
1316 		break;
1317 	}
1318 	case DL_DISABMULTI_REQ: {
1319 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1320 
1321 		dl->dl_primitive = dl_primitive;
1322 		dl->dl_addr_offset = offset;
1323 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1324 		*addr_offp = offset;
1325 		break;
1326 	}
1327 	case DL_PROMISCON_REQ:
1328 	case DL_PROMISCOFF_REQ: {
1329 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1330 
1331 		dl->dl_primitive = dl_primitive;
1332 		dl->dl_level = DL_PROMISC_MULTI;
1333 		break;
1334 	}
1335 	}
1336 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1337 	    *addr_lenp, *addr_offp));
1338 	return (mp);
1339 }
1340 
1341 /*
1342  * Rejoin any groups for which we have ilms.
1343  *
1344  * This is only needed for IPMP when the cast_ill changes since that
1345  * change is invisible to the ilm. Other interface changes are handled
1346  * by conn_update_ill.
1347  */
1348 void
ill_recover_multicast(ill_t * ill)1349 ill_recover_multicast(ill_t *ill)
1350 {
1351 	ilm_t	*ilm;
1352 	char    addrbuf[INET6_ADDRSTRLEN];
1353 
1354 	ill->ill_need_recover_multicast = 0;
1355 
1356 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1357 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1358 		/*
1359 		 * If we have more then one ilm for the group (e.g., with
1360 		 * different zoneid) then we should not tell the driver
1361 		 * to join unless this is the first ilm for the group.
1362 		 */
1363 		if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1364 		    ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1365 			continue;
1366 		}
1367 
1368 		ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6,
1369 		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1370 
1371 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1372 			(void) ill_join_allmulti(ill);
1373 		} else {
1374 			if (ill->ill_isv6)
1375 				mld_joingroup(ilm);
1376 			else
1377 				igmp_joingroup(ilm);
1378 
1379 			(void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1380 			    DL_ENABMULTI_REQ);
1381 		}
1382 	}
1383 	rw_exit(&ill->ill_mcast_lock);
1384 	/* Send any deferred/queued DLPI or IP packets */
1385 	ill_mcast_send_queued(ill);
1386 	ill_dlpi_send_queued(ill);
1387 	ill_mcast_timer_start(ill->ill_ipst);
1388 }
1389 
1390 /*
1391  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1392  * that were explicitly joined.
1393  *
1394  * This is only needed for IPMP when the cast_ill changes since that
1395  * change is invisible to the ilm. Other interface changes are handled
1396  * by conn_update_ill.
1397  */
1398 void
ill_leave_multicast(ill_t * ill)1399 ill_leave_multicast(ill_t *ill)
1400 {
1401 	ilm_t	*ilm;
1402 	char    addrbuf[INET6_ADDRSTRLEN];
1403 
1404 	ill->ill_need_recover_multicast = 1;
1405 
1406 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1407 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1408 		/*
1409 		 * If we have more then one ilm for the group (e.g., with
1410 		 * different zoneid) then we should not tell the driver
1411 		 * to leave unless this is the first ilm for the group.
1412 		 */
1413 		if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1414 		    ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1415 			continue;
1416 		}
1417 
1418 		ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6,
1419 		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1420 
1421 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1422 			ill_leave_allmulti(ill);
1423 		} else {
1424 			if (ill->ill_isv6)
1425 				mld_leavegroup(ilm);
1426 			else
1427 				igmp_leavegroup(ilm);
1428 
1429 			(void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1430 			    DL_DISABMULTI_REQ);
1431 		}
1432 	}
1433 	rw_exit(&ill->ill_mcast_lock);
1434 	/* Send any deferred/queued DLPI or IP packets */
1435 	ill_mcast_send_queued(ill);
1436 	ill_dlpi_send_queued(ill);
1437 	ill_mcast_timer_start(ill->ill_ipst);
1438 }
1439 
1440 /*
1441  * Interface used by IP input/output.
1442  * Returns true if there is a member on the ill for any zoneid.
1443  */
1444 boolean_t
ill_hasmembers_v6(ill_t * ill,const in6_addr_t * v6group)1445 ill_hasmembers_v6(ill_t *ill, const in6_addr_t *v6group)
1446 {
1447 	ilm_t		*ilm;
1448 
1449 	rw_enter(&ill->ill_mcast_lock, RW_READER);
1450 	ilm = ilm_lookup(ill, v6group, ALL_ZONES);
1451 	rw_exit(&ill->ill_mcast_lock);
1452 	return (ilm != NULL);
1453 }
1454 
1455 /*
1456  * Interface used by IP input/output.
1457  * Returns true if there is a member on the ill for any zoneid.
1458  *
1459  * The group and source can't be INADDR_ANY here so no need to translate to
1460  * the unspecified IPv6 address.
1461  */
1462 boolean_t
ill_hasmembers_v4(ill_t * ill,ipaddr_t group)1463 ill_hasmembers_v4(ill_t *ill, ipaddr_t group)
1464 {
1465 	in6_addr_t	v6group;
1466 
1467 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1468 	return (ill_hasmembers_v6(ill, &v6group));
1469 }
1470 
1471 /*
1472  * Interface used by IP input/output.
1473  * Returns true if there is a member on the ill for any zoneid except skipzone.
1474  */
1475 boolean_t
ill_hasmembers_otherzones_v6(ill_t * ill,const in6_addr_t * v6group,zoneid_t skipzone)1476 ill_hasmembers_otherzones_v6(ill_t *ill, const in6_addr_t *v6group,
1477     zoneid_t skipzone)
1478 {
1479 	ilm_t		*ilm;
1480 
1481 	rw_enter(&ill->ill_mcast_lock, RW_READER);
1482 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1483 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1484 		    ilm->ilm_zoneid != skipzone) {
1485 			rw_exit(&ill->ill_mcast_lock);
1486 			return (B_TRUE);
1487 		}
1488 	}
1489 	rw_exit(&ill->ill_mcast_lock);
1490 	return (B_FALSE);
1491 }
1492 
1493 /*
1494  * Interface used by IP input/output.
1495  * Returns true if there is a member on the ill for any zoneid except skipzone.
1496  *
1497  * The group and source can't be INADDR_ANY here so no need to translate to
1498  * the unspecified IPv6 address.
1499  */
1500 boolean_t
ill_hasmembers_otherzones_v4(ill_t * ill,ipaddr_t group,zoneid_t skipzone)1501 ill_hasmembers_otherzones_v4(ill_t *ill, ipaddr_t group, zoneid_t skipzone)
1502 {
1503 	in6_addr_t	v6group;
1504 
1505 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1506 	return (ill_hasmembers_otherzones_v6(ill, &v6group, skipzone));
1507 }
1508 
1509 /*
1510  * Interface used by IP input.
1511  * Returns the next numerically larger zoneid that has a member. If none exist
1512  * then returns -1 (ALL_ZONES).
1513  * The normal usage is for the caller to start with a -1 zoneid (ALL_ZONES)
1514  * to find the first zoneid which has a member, and then pass that in for
1515  * subsequent calls until ALL_ZONES is returned.
1516  *
1517  * The implementation of ill_hasmembers_nextzone() assumes the ilms
1518  * are sorted by zoneid for efficiency.
1519  */
1520 zoneid_t
ill_hasmembers_nextzone_v6(ill_t * ill,const in6_addr_t * v6group,zoneid_t zoneid)1521 ill_hasmembers_nextzone_v6(ill_t *ill, const in6_addr_t *v6group,
1522     zoneid_t zoneid)
1523 {
1524 	ilm_t		*ilm;
1525 
1526 	rw_enter(&ill->ill_mcast_lock, RW_READER);
1527 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1528 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1529 		    ilm->ilm_zoneid > zoneid) {
1530 			zoneid = ilm->ilm_zoneid;
1531 			rw_exit(&ill->ill_mcast_lock);
1532 			return (zoneid);
1533 		}
1534 	}
1535 	rw_exit(&ill->ill_mcast_lock);
1536 	return (ALL_ZONES);
1537 }
1538 
1539 /*
1540  * Interface used by IP input.
1541  * Returns the next numerically larger zoneid that has a member. If none exist
1542  * then returns -1 (ALL_ZONES).
1543  *
1544  * The group and source can't be INADDR_ANY here so no need to translate to
1545  * the unspecified IPv6 address.
1546  */
1547 zoneid_t
ill_hasmembers_nextzone_v4(ill_t * ill,ipaddr_t group,zoneid_t zoneid)1548 ill_hasmembers_nextzone_v4(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1549 {
1550 	in6_addr_t	v6group;
1551 
1552 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1553 
1554 	return (ill_hasmembers_nextzone_v6(ill, &v6group, zoneid));
1555 }
1556 
1557 /*
1558  * Find an ilm matching the ill, group, and zoneid.
1559  */
1560 static ilm_t *
ilm_lookup(ill_t * ill,const in6_addr_t * v6group,zoneid_t zoneid)1561 ilm_lookup(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1562 {
1563 	ilm_t	*ilm;
1564 
1565 	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1566 
1567 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1568 		if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group))
1569 			continue;
1570 		if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid)
1571 			continue;
1572 
1573 		ASSERT(ilm->ilm_ill == ill);
1574 		return (ilm);
1575 	}
1576 	return (NULL);
1577 }
1578 
1579 /*
1580  * How many members on this ill?
1581  * Since each shared-IP zone has a separate ilm for the same group/ill
1582  * we can have several.
1583  */
1584 static int
ilm_numentries(ill_t * ill,const in6_addr_t * v6group)1585 ilm_numentries(ill_t *ill, const in6_addr_t *v6group)
1586 {
1587 	ilm_t	*ilm;
1588 	int i = 0;
1589 
1590 	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1591 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1592 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1593 			i++;
1594 		}
1595 	}
1596 	return (i);
1597 }
1598 
1599 /* Caller guarantees that the group is not already on the list */
1600 static ilm_t *
ilm_add(ill_t * ill,const in6_addr_t * v6group,ilg_stat_t ilgstat,mcast_record_t ilg_fmode,slist_t * ilg_flist,zoneid_t zoneid)1601 ilm_add(ill_t *ill, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1602     mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid)
1603 {
1604 	ilm_t	*ilm;
1605 	ilm_t	*ilm_cur;
1606 	ilm_t	**ilm_ptpn;
1607 
1608 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1609 	ilm = GETSTRUCT(ilm_t, 1);
1610 	if (ilm == NULL)
1611 		return (NULL);
1612 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1613 		ilm->ilm_filter = l_alloc();
1614 		if (ilm->ilm_filter == NULL) {
1615 			mi_free(ilm);
1616 			return (NULL);
1617 		}
1618 	}
1619 	ilm->ilm_v6addr = *v6group;
1620 	ilm->ilm_refcnt = 1;
1621 	ilm->ilm_zoneid = zoneid;
1622 	ilm->ilm_timer = INFINITY;
1623 	ilm->ilm_rtx.rtx_timer = INFINITY;
1624 
1625 	ilm->ilm_ill = ill;
1626 	DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1627 	    (char *), "ilm", (void *), ilm);
1628 	ill->ill_ilm_cnt++;
1629 
1630 	ASSERT(ill->ill_ipst);
1631 	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1632 
1633 	/* The ill/ipif could have just been marked as condemned */
1634 
1635 	/*
1636 	 * To make ill_hasmembers_nextzone_v6 work we keep the list
1637 	 * sorted by zoneid.
1638 	 */
1639 	ilm_cur = ill->ill_ilm;
1640 	ilm_ptpn = &ill->ill_ilm;
1641 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid < ilm->ilm_zoneid) {
1642 		ilm_ptpn = &ilm_cur->ilm_next;
1643 		ilm_cur = ilm_cur->ilm_next;
1644 	}
1645 	ilm->ilm_next = ilm_cur;
1646 	*ilm_ptpn = ilm;
1647 
1648 	/*
1649 	 * If we have an associated ilg, use its filter state; if not,
1650 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1651 	 */
1652 	if (ilgstat != ILGSTAT_NONE) {
1653 		if (!SLIST_IS_EMPTY(ilg_flist))
1654 			l_copy(ilg_flist, ilm->ilm_filter);
1655 		ilm->ilm_fmode = ilg_fmode;
1656 	} else {
1657 		ilm->ilm_no_ilg_cnt = 1;
1658 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1659 	}
1660 
1661 	return (ilm);
1662 }
1663 
1664 void
ilm_inactive(ilm_t * ilm)1665 ilm_inactive(ilm_t *ilm)
1666 {
1667 	FREE_SLIST(ilm->ilm_filter);
1668 	FREE_SLIST(ilm->ilm_pendsrcs);
1669 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1670 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1671 	ilm->ilm_ipst = NULL;
1672 	mi_free((char *)ilm);
1673 }
1674 
1675 /*
1676  * Unlink ilm and free it.
1677  */
1678 static void
ilm_delete(ilm_t * ilm)1679 ilm_delete(ilm_t *ilm)
1680 {
1681 	ill_t		*ill = ilm->ilm_ill;
1682 	ilm_t		**ilmp;
1683 	boolean_t	need_wakeup;
1684 
1685 	/*
1686 	 * Delete under lock protection so that readers don't stumble
1687 	 * on bad ilm_next
1688 	 */
1689 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1690 
1691 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1692 		;
1693 
1694 	*ilmp = ilm->ilm_next;
1695 
1696 	mutex_enter(&ill->ill_lock);
1697 	/*
1698 	 * if we are the last reference to the ill, we may need to wakeup any
1699 	 * pending FREE or unplumb operations. This is because conn_update_ill
1700 	 * bails if there is a ilg_delete_all in progress.
1701 	 */
1702 	need_wakeup = B_FALSE;
1703 	DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1704 	    (char *), "ilm", (void *), ilm);
1705 	ASSERT(ill->ill_ilm_cnt > 0);
1706 	ill->ill_ilm_cnt--;
1707 	if (ILL_FREE_OK(ill))
1708 		need_wakeup = B_TRUE;
1709 
1710 	ilm_inactive(ilm); /* frees this ilm */
1711 
1712 	if (need_wakeup) {
1713 		/* drops ill lock */
1714 		ipif_ill_refrele_tail(ill);
1715 	} else {
1716 		mutex_exit(&ill->ill_lock);
1717 	}
1718 }
1719 
1720 /*
1721  * Lookup an ill based on the group, ifindex, ifaddr, and zoneid.
1722  * Applies to both IPv4 and IPv6, although ifaddr is only used with
1723  * IPv4.
1724  * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1725  * On error it sets *errorp.
1726  */
1727 static ill_t *
ill_mcast_lookup(const in6_addr_t * group,ipaddr_t ifaddr,uint_t ifindex,zoneid_t zoneid,ip_stack_t * ipst,int * errorp)1728 ill_mcast_lookup(const in6_addr_t *group, ipaddr_t ifaddr, uint_t ifindex,
1729     zoneid_t zoneid, ip_stack_t *ipst, int *errorp)
1730 {
1731 	ill_t *ill;
1732 	ipaddr_t v4group;
1733 
1734 	if (IN6_IS_ADDR_V4MAPPED(group)) {
1735 		IN6_V4MAPPED_TO_IPADDR(group, v4group);
1736 
1737 		if (ifindex != 0) {
1738 			ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1739 			    B_FALSE, ipst);
1740 		} else if (ifaddr != INADDR_ANY) {
1741 			ipif_t *ipif;
1742 
1743 			ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, ipst);
1744 			if (ipif == NULL) {
1745 				ill = NULL;
1746 			} else {
1747 				ill = ipif->ipif_ill;
1748 				ill_refhold(ill);
1749 				ipif_refrele(ipif);
1750 			}
1751 		} else {
1752 			ill = ill_lookup_group_v4(v4group, zoneid, ipst, NULL,
1753 			    NULL);
1754 		}
1755 	} else {
1756 		if (ifindex != 0) {
1757 			ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1758 			    B_TRUE, ipst);
1759 		} else {
1760 			ill = ill_lookup_group_v6(group, zoneid, ipst, NULL,
1761 			    NULL);
1762 		}
1763 	}
1764 	if (ill == NULL) {
1765 		if (ifindex != 0)
1766 			*errorp = ENXIO;
1767 		else
1768 			*errorp = EADDRNOTAVAIL;
1769 		return (NULL);
1770 	}
1771 	/* operation not supported on the virtual network interface */
1772 	if (IS_UNDER_IPMP(ill) || IS_VNI(ill)) {
1773 		ill_refrele(ill);
1774 		*errorp = EINVAL;
1775 		return (NULL);
1776 	}
1777 	return (ill);
1778 }
1779 
1780 /*
1781  * Looks up the appropriate ill given an interface index (or interface address)
1782  * and multicast group.  On success, returns 0, with *illpp pointing to the
1783  * found struct.  On failure, returns an errno and *illpp is set to NULL.
1784  *
1785  * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1786  *
1787  * Handles both IPv4 and IPv6. The ifaddr argument only applies in the
1788  * case of IPv4.
1789  */
1790 int
ip_opt_check(conn_t * connp,const in6_addr_t * v6group,const in6_addr_t * v6src,ipaddr_t ifaddr,uint_t ifindex,ill_t ** illpp)1791 ip_opt_check(conn_t *connp, const in6_addr_t *v6group,
1792     const in6_addr_t *v6src, ipaddr_t ifaddr, uint_t ifindex, ill_t **illpp)
1793 {
1794 	boolean_t src_unspec;
1795 	ill_t *ill = NULL;
1796 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1797 	int error = 0;
1798 
1799 	*illpp = NULL;
1800 
1801 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1802 
1803 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1804 		ipaddr_t v4group;
1805 		ipaddr_t v4src;
1806 
1807 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1808 			return (EINVAL);
1809 		IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
1810 		if (src_unspec) {
1811 			v4src = INADDR_ANY;
1812 		} else {
1813 			IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1814 		}
1815 		if (!CLASSD(v4group) || CLASSD(v4src))
1816 			return (EINVAL);
1817 	} else {
1818 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1819 			return (EINVAL);
1820 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1821 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1822 			return (EINVAL);
1823 		}
1824 	}
1825 
1826 	ill = ill_mcast_lookup(v6group, ifaddr, ifindex, IPCL_ZONEID(connp),
1827 	    ipst, &error);
1828 	*illpp = ill;
1829 	return (error);
1830 }
1831 
1832 static int
ip_get_srcfilter(conn_t * connp,struct group_filter * gf,struct ip_msfilter * imsf,const struct in6_addr * group,boolean_t issin6)1833 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1834     struct ip_msfilter *imsf, const struct in6_addr *group, boolean_t issin6)
1835 {
1836 	ilg_t *ilg;
1837 	int i, numsrc, fmode, outsrcs;
1838 	struct sockaddr_in *sin;
1839 	struct sockaddr_in6 *sin6;
1840 	struct in_addr *addrp;
1841 	slist_t *fp;
1842 	boolean_t is_v4only_api;
1843 	ipaddr_t ifaddr;
1844 	uint_t ifindex;
1845 
1846 	if (gf == NULL) {
1847 		ASSERT(imsf != NULL);
1848 		ASSERT(!issin6);
1849 		is_v4only_api = B_TRUE;
1850 		outsrcs = imsf->imsf_numsrc;
1851 		ifaddr = imsf->imsf_interface.s_addr;
1852 		ifindex = 0;
1853 	} else {
1854 		ASSERT(imsf == NULL);
1855 		is_v4only_api = B_FALSE;
1856 		outsrcs = gf->gf_numsrc;
1857 		ifaddr = INADDR_ANY;
1858 		ifindex = gf->gf_interface;
1859 	}
1860 
1861 	/* No need to use ill_mcast_serializer for the reader */
1862 	rw_enter(&connp->conn_ilg_lock, RW_READER);
1863 	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1864 	if (ilg == NULL) {
1865 		rw_exit(&connp->conn_ilg_lock);
1866 		return (EADDRNOTAVAIL);
1867 	}
1868 
1869 	/*
1870 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1871 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1872 	 * So we need to translate here.
1873 	 */
1874 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
1875 	    MCAST_INCLUDE : MCAST_EXCLUDE;
1876 	if ((fp = ilg->ilg_filter) == NULL) {
1877 		numsrc = 0;
1878 	} else {
1879 		for (i = 0; i < outsrcs; i++) {
1880 			if (i == fp->sl_numsrc)
1881 				break;
1882 			if (issin6) {
1883 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
1884 				sin6->sin6_family = AF_INET6;
1885 				sin6->sin6_addr = fp->sl_addr[i];
1886 			} else {
1887 				if (is_v4only_api) {
1888 					addrp = &imsf->imsf_slist[i];
1889 				} else {
1890 					sin = (struct sockaddr_in *)
1891 					    &gf->gf_slist[i];
1892 					sin->sin_family = AF_INET;
1893 					addrp = &sin->sin_addr;
1894 				}
1895 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
1896 			}
1897 		}
1898 		numsrc = fp->sl_numsrc;
1899 	}
1900 
1901 	if (is_v4only_api) {
1902 		imsf->imsf_numsrc = numsrc;
1903 		imsf->imsf_fmode = fmode;
1904 	} else {
1905 		gf->gf_numsrc = numsrc;
1906 		gf->gf_fmode = fmode;
1907 	}
1908 
1909 	rw_exit(&connp->conn_ilg_lock);
1910 
1911 	return (0);
1912 }
1913 
1914 /*
1915  * Common for IPv4 and IPv6.
1916  */
1917 static int
ip_set_srcfilter(conn_t * connp,struct group_filter * gf,struct ip_msfilter * imsf,const struct in6_addr * group,ill_t * ill,boolean_t issin6)1918 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
1919     struct ip_msfilter *imsf, const struct in6_addr *group, ill_t *ill,
1920     boolean_t issin6)
1921 {
1922 	ilg_t *ilg;
1923 	int i, err, infmode, new_fmode;
1924 	uint_t insrcs;
1925 	struct sockaddr_in *sin;
1926 	struct sockaddr_in6 *sin6;
1927 	struct in_addr *addrp;
1928 	slist_t *orig_filter = NULL;
1929 	slist_t *new_filter = NULL;
1930 	mcast_record_t orig_fmode;
1931 	boolean_t leave_group, is_v4only_api;
1932 	ilg_stat_t ilgstat;
1933 	ilm_t *ilm;
1934 	ipaddr_t ifaddr;
1935 	uint_t ifindex;
1936 
1937 	if (gf == NULL) {
1938 		ASSERT(imsf != NULL);
1939 		ASSERT(!issin6);
1940 		is_v4only_api = B_TRUE;
1941 		insrcs = imsf->imsf_numsrc;
1942 		infmode = imsf->imsf_fmode;
1943 		ifaddr = imsf->imsf_interface.s_addr;
1944 		ifindex = 0;
1945 	} else {
1946 		ASSERT(imsf == NULL);
1947 		is_v4only_api = B_FALSE;
1948 		insrcs = gf->gf_numsrc;
1949 		infmode = gf->gf_fmode;
1950 		ifaddr = INADDR_ANY;
1951 		ifindex = gf->gf_interface;
1952 	}
1953 
1954 	/* Make sure we can handle the source list */
1955 	if (insrcs > MAX_FILTER_SIZE)
1956 		return (ENOBUFS);
1957 
1958 	/*
1959 	 * setting the filter to (INCLUDE, NULL) is treated
1960 	 * as a request to leave the group.
1961 	 */
1962 	leave_group = (infmode == MCAST_INCLUDE && insrcs == 0);
1963 
1964 	mutex_enter(&ill->ill_mcast_serializer);
1965 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1966 	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1967 	if (ilg == NULL) {
1968 		/*
1969 		 * if the request was actually to leave, and we
1970 		 * didn't find an ilg, there's nothing to do.
1971 		 */
1972 		if (leave_group) {
1973 			rw_exit(&connp->conn_ilg_lock);
1974 			mutex_exit(&ill->ill_mcast_serializer);
1975 			return (0);
1976 		}
1977 		ilg = conn_ilg_alloc(connp, &err);
1978 		if (ilg == NULL) {
1979 			rw_exit(&connp->conn_ilg_lock);
1980 			mutex_exit(&ill->ill_mcast_serializer);
1981 			return (err);
1982 		}
1983 		ilgstat = ILGSTAT_NEW;
1984 		ilg->ilg_v6group = *group;
1985 		ilg->ilg_ill = ill;
1986 		ilg->ilg_ifaddr = ifaddr;
1987 		ilg->ilg_ifindex = ifindex;
1988 	} else if (leave_group) {
1989 		/*
1990 		 * Make sure we have the correct serializer. The ill argument
1991 		 * might not match ilg_ill.
1992 		 */
1993 		ilg_refhold(ilg);
1994 		mutex_exit(&ill->ill_mcast_serializer);
1995 		ill = ilg->ilg_ill;
1996 		rw_exit(&connp->conn_ilg_lock);
1997 
1998 		mutex_enter(&ill->ill_mcast_serializer);
1999 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2000 		ilm = ilg->ilg_ilm;
2001 		ilg->ilg_ilm = NULL;
2002 		ilg_delete(connp, ilg, NULL);
2003 		ilg_refrele(ilg);
2004 		rw_exit(&connp->conn_ilg_lock);
2005 		if (ilm != NULL)
2006 			(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2007 		mutex_exit(&ill->ill_mcast_serializer);
2008 		/*
2009 		 * Now that all locks have been dropped, we can send any
2010 		 * deferred/queued DLPI or IP packets
2011 		 */
2012 		ill_mcast_send_queued(ill);
2013 		ill_dlpi_send_queued(ill);
2014 		return (0);
2015 	} else {
2016 		ilgstat = ILGSTAT_CHANGE;
2017 		/* Preserve existing state in case ip_addmulti() fails */
2018 		orig_fmode = ilg->ilg_fmode;
2019 		if (ilg->ilg_filter == NULL) {
2020 			orig_filter = NULL;
2021 		} else {
2022 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2023 			if (orig_filter == NULL) {
2024 				rw_exit(&connp->conn_ilg_lock);
2025 				mutex_exit(&ill->ill_mcast_serializer);
2026 				return (ENOMEM);
2027 			}
2028 		}
2029 	}
2030 
2031 	/*
2032 	 * Alloc buffer to copy new state into (see below) before
2033 	 * we make any changes, so we can bail if it fails.
2034 	 */
2035 	if ((new_filter = l_alloc()) == NULL) {
2036 		rw_exit(&connp->conn_ilg_lock);
2037 		err = ENOMEM;
2038 		goto free_and_exit;
2039 	}
2040 
2041 	if (insrcs == 0) {
2042 		CLEAR_SLIST(ilg->ilg_filter);
2043 	} else {
2044 		slist_t *fp;
2045 		if (ilg->ilg_filter == NULL) {
2046 			fp = l_alloc();
2047 			if (fp == NULL) {
2048 				if (ilgstat == ILGSTAT_NEW)
2049 					ilg_delete(connp, ilg, NULL);
2050 				rw_exit(&connp->conn_ilg_lock);
2051 				err = ENOMEM;
2052 				goto free_and_exit;
2053 			}
2054 		} else {
2055 			fp = ilg->ilg_filter;
2056 		}
2057 		for (i = 0; i < insrcs; i++) {
2058 			if (issin6) {
2059 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2060 				fp->sl_addr[i] = sin6->sin6_addr;
2061 			} else {
2062 				if (is_v4only_api) {
2063 					addrp = &imsf->imsf_slist[i];
2064 				} else {
2065 					sin = (struct sockaddr_in *)
2066 					    &gf->gf_slist[i];
2067 					addrp = &sin->sin_addr;
2068 				}
2069 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2070 			}
2071 		}
2072 		fp->sl_numsrc = insrcs;
2073 		ilg->ilg_filter = fp;
2074 	}
2075 	/*
2076 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2077 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2078 	 * So we need to translate here.
2079 	 */
2080 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2081 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2082 
2083 	/*
2084 	 * Save copy of ilg's filter state to pass to other functions,
2085 	 * so we can release conn_ilg_lock now.
2086 	 */
2087 	new_fmode = ilg->ilg_fmode;
2088 	l_copy(ilg->ilg_filter, new_filter);
2089 
2090 	rw_exit(&connp->conn_ilg_lock);
2091 
2092 	/*
2093 	 * Now update the ill. We wait to do this until after the ilg
2094 	 * has been updated because we need to update the src filter
2095 	 * info for the ill, which involves looking at the status of
2096 	 * all the ilgs associated with this group/interface pair.
2097 	 */
2098 	ilm = ip_addmulti_serial(group, ill, connp->conn_zoneid, ilgstat,
2099 	    new_fmode, new_filter, &err);
2100 
2101 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2102 	/*
2103 	 * Must look up the ilg again since we've not been holding
2104 	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
2105 	 * having called conn_update_ill, which can run once we dropped the
2106 	 * conn_ilg_lock above.
2107 	 */
2108 	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
2109 	if (ilg == NULL) {
2110 		rw_exit(&connp->conn_ilg_lock);
2111 		if (ilm != NULL) {
2112 			(void) ip_delmulti_serial(ilm, B_FALSE,
2113 			    (ilgstat == ILGSTAT_NEW));
2114 		}
2115 		err = ENXIO;
2116 		goto free_and_exit;
2117 	}
2118 
2119 	if (ilm != NULL) {
2120 		if (ilg->ilg_ill == NULL) {
2121 			/* some other thread is re-attaching this.  */
2122 			rw_exit(&connp->conn_ilg_lock);
2123 			(void) ip_delmulti_serial(ilm, B_FALSE,
2124 			    (ilgstat == ILGSTAT_NEW));
2125 			err = 0;
2126 			goto free_and_exit;
2127 		}
2128 		/* Succeeded. Update the ilg to point at the ilm */
2129 		if (ilgstat == ILGSTAT_NEW) {
2130 			if (ilg->ilg_ilm == NULL) {
2131 				ilg->ilg_ilm = ilm;
2132 				ilm->ilm_ifaddr = ifaddr; /* For netstat */
2133 			} else {
2134 				/* some other thread is re-attaching this. */
2135 				rw_exit(&connp->conn_ilg_lock);
2136 				(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2137 				err = 0;
2138 				goto free_and_exit;
2139 			}
2140 		} else {
2141 			/*
2142 			 * ip_addmulti didn't get a held ilm for
2143 			 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2144 			 */
2145 			ASSERT(ilg->ilg_ilm == ilm);
2146 		}
2147 	} else {
2148 		ASSERT(err != 0);
2149 		/*
2150 		 * Failed to allocate the ilm.
2151 		 * Restore the original filter state, or delete the
2152 		 * newly-created ilg.
2153 		 * If ENETDOWN just clear ill_ilg since so that we
2154 		 * will rejoin when the ill comes back; don't report ENETDOWN
2155 		 * to application.
2156 		 */
2157 		if (ilgstat == ILGSTAT_NEW) {
2158 			if (err == ENETDOWN) {
2159 				ilg->ilg_ill = NULL;
2160 				err = 0;
2161 			} else {
2162 				ilg_delete(connp, ilg, NULL);
2163 			}
2164 		} else {
2165 			ilg->ilg_fmode = orig_fmode;
2166 			if (SLIST_IS_EMPTY(orig_filter)) {
2167 				CLEAR_SLIST(ilg->ilg_filter);
2168 			} else {
2169 				/*
2170 				 * We didn't free the filter, even if we
2171 				 * were trying to make the source list empty;
2172 				 * so if orig_filter isn't empty, the ilg
2173 				 * must still have a filter alloc'd.
2174 				 */
2175 				l_copy(orig_filter, ilg->ilg_filter);
2176 			}
2177 		}
2178 	}
2179 	rw_exit(&connp->conn_ilg_lock);
2180 
2181 free_and_exit:
2182 	mutex_exit(&ill->ill_mcast_serializer);
2183 	ill_mcast_send_queued(ill);
2184 	ill_dlpi_send_queued(ill);
2185 	l_free(orig_filter);
2186 	l_free(new_filter);
2187 
2188 	return (err);
2189 }
2190 
2191 /*
2192  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2193  */
2194 /* ARGSUSED */
2195 int
ip_sioctl_msfilter(ipif_t * ipif,sin_t * dummy_sin,queue_t * q,mblk_t * mp,ip_ioctl_cmd_t * ipip,void * ifreq)2196 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2197     ip_ioctl_cmd_t *ipip, void *ifreq)
2198 {
2199 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2200 	/* existence verified in ip_wput_nondata() */
2201 	mblk_t *data_mp = mp->b_cont->b_cont;
2202 	int datalen, err, cmd, minsize;
2203 	uint_t expsize = 0;
2204 	conn_t *connp;
2205 	boolean_t isv6, is_v4only_api, getcmd;
2206 	struct sockaddr_in *gsin;
2207 	struct sockaddr_in6 *gsin6;
2208 	ipaddr_t v4group;
2209 	in6_addr_t v6group;
2210 	struct group_filter *gf = NULL;
2211 	struct ip_msfilter *imsf = NULL;
2212 	mblk_t *ndp;
2213 	ill_t *ill;
2214 
2215 	connp = Q_TO_CONN(q);
2216 	err = ip_msfilter_ill(connp, mp, ipip, &ill);
2217 	if (err != 0)
2218 		return (err);
2219 
2220 	if (data_mp->b_cont != NULL) {
2221 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2222 			return (ENOMEM);
2223 		freemsg(data_mp);
2224 		data_mp = ndp;
2225 		mp->b_cont->b_cont = data_mp;
2226 	}
2227 
2228 	cmd = iocp->ioc_cmd;
2229 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2230 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2231 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2232 	datalen = MBLKL(data_mp);
2233 
2234 	if (datalen < minsize)
2235 		return (EINVAL);
2236 
2237 	/*
2238 	 * now we know we have at least have the initial structure,
2239 	 * but need to check for the source list array.
2240 	 */
2241 	if (is_v4only_api) {
2242 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2243 		isv6 = B_FALSE;
2244 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2245 	} else {
2246 		gf = (struct group_filter *)data_mp->b_rptr;
2247 		if (gf->gf_group.ss_family == AF_INET6) {
2248 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2249 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2250 		} else {
2251 			isv6 = B_FALSE;
2252 		}
2253 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2254 	}
2255 	if (datalen < expsize)
2256 		return (EINVAL);
2257 
2258 	if (isv6) {
2259 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2260 		v6group = gsin6->sin6_addr;
2261 		if (getcmd) {
2262 			err = ip_get_srcfilter(connp, gf, NULL, &v6group,
2263 			    B_TRUE);
2264 		} else {
2265 			err = ip_set_srcfilter(connp, gf, NULL, &v6group, ill,
2266 			    B_TRUE);
2267 		}
2268 	} else {
2269 		boolean_t issin6 = B_FALSE;
2270 		if (is_v4only_api) {
2271 			v4group = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2272 			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2273 		} else {
2274 			if (gf->gf_group.ss_family == AF_INET) {
2275 				gsin = (struct sockaddr_in *)&gf->gf_group;
2276 				v4group = (ipaddr_t)gsin->sin_addr.s_addr;
2277 				IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2278 			} else {
2279 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2280 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2281 				    v4group);
2282 				issin6 = B_TRUE;
2283 			}
2284 		}
2285 		/*
2286 		 * INADDR_ANY is represented as the IPv6 unspecifed addr.
2287 		 */
2288 		if (v4group == INADDR_ANY)
2289 			v6group = ipv6_all_zeros;
2290 		else
2291 			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2292 
2293 		if (getcmd) {
2294 			err = ip_get_srcfilter(connp, gf, imsf, &v6group,
2295 			    issin6);
2296 		} else {
2297 			err = ip_set_srcfilter(connp, gf, imsf, &v6group, ill,
2298 			    issin6);
2299 		}
2300 	}
2301 	ill_refrele(ill);
2302 
2303 	return (err);
2304 }
2305 
2306 /*
2307  * Determine the ill for the SIOC*MSFILTER ioctls
2308  *
2309  * Returns an error for IS_UNDER_IPMP interfaces.
2310  *
2311  * Finds the ill based on information in the ioctl headers.
2312  */
2313 static int
ip_msfilter_ill(conn_t * connp,mblk_t * mp,const ip_ioctl_cmd_t * ipip,ill_t ** illp)2314 ip_msfilter_ill(conn_t *connp, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2315     ill_t **illp)
2316 {
2317 	int cmd = ipip->ipi_cmd;
2318 	int err = 0;
2319 	ill_t *ill;
2320 	/* caller has verified this mblk exists */
2321 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2322 	struct ip_msfilter *imsf;
2323 	struct group_filter *gf;
2324 	ipaddr_t v4addr, v4group;
2325 	in6_addr_t v6group;
2326 	uint32_t index;
2327 	ip_stack_t *ipst;
2328 
2329 	ipst = connp->conn_netstack->netstack_ip;
2330 
2331 	*illp = NULL;
2332 
2333 	/* don't allow multicast operations on a tcp conn */
2334 	if (IPCL_IS_TCP(connp))
2335 		return (ENOPROTOOPT);
2336 
2337 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2338 		/* don't allow v4-specific ioctls on v6 socket */
2339 		if (connp->conn_family == AF_INET6)
2340 			return (EAFNOSUPPORT);
2341 
2342 		imsf = (struct ip_msfilter *)dbuf;
2343 		v4addr = imsf->imsf_interface.s_addr;
2344 		v4group = imsf->imsf_multiaddr.s_addr;
2345 		IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2346 		ill = ill_mcast_lookup(&v6group, v4addr, 0, IPCL_ZONEID(connp),
2347 		    ipst, &err);
2348 		if (ill == NULL && v4addr != INADDR_ANY)
2349 			err = ENXIO;
2350 	} else {
2351 		gf = (struct group_filter *)dbuf;
2352 		index = gf->gf_interface;
2353 		if (gf->gf_group.ss_family == AF_INET6) {
2354 			struct sockaddr_in6 *sin6;
2355 
2356 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2357 			v6group = sin6->sin6_addr;
2358 		} else if (gf->gf_group.ss_family == AF_INET) {
2359 			struct sockaddr_in *sin;
2360 
2361 			sin = (struct sockaddr_in *)&gf->gf_group;
2362 			v4group = sin->sin_addr.s_addr;
2363 			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2364 		} else {
2365 			return (EAFNOSUPPORT);
2366 		}
2367 		ill = ill_mcast_lookup(&v6group, INADDR_ANY, index,
2368 		    IPCL_ZONEID(connp), ipst, &err);
2369 	}
2370 	*illp = ill;
2371 	return (err);
2372 }
2373 
2374 /*
2375  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2376  * in in two stages, as the first copyin tells us the size of the attached
2377  * source buffer.  This function is called by ip_wput_nondata() after the
2378  * first copyin has completed; it figures out how big the second stage
2379  * needs to be, and kicks it off.
2380  *
2381  * In some cases (numsrc < 2), the second copyin is not needed as the
2382  * first one gets a complete structure containing 1 source addr.
2383  *
2384  * The function returns 0 if a second copyin has been started (i.e. there's
2385  * no more work to be done right now), or 1 if the second copyin is not
2386  * needed and ip_wput_nondata() can continue its processing.
2387  */
2388 int
ip_copyin_msfilter(queue_t * q,mblk_t * mp)2389 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2390 {
2391 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2392 	int cmd = iocp->ioc_cmd;
2393 	/* validity of this checked in ip_wput_nondata() */
2394 	mblk_t *mp1 = mp->b_cont->b_cont;
2395 	int copysize = 0;
2396 	int offset;
2397 
2398 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2399 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2400 		if (gf->gf_numsrc >= 2) {
2401 			offset = sizeof (struct group_filter);
2402 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2403 		}
2404 	} else {
2405 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2406 		if (imsf->imsf_numsrc >= 2) {
2407 			offset = sizeof (struct ip_msfilter);
2408 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2409 		}
2410 	}
2411 	if (copysize > 0) {
2412 		mi_copyin_n(q, mp, offset, copysize);
2413 		return (0);
2414 	}
2415 	return (1);
2416 }
2417 
2418 /*
2419  * Handle the following optmgmt:
2420  *	IP_ADD_MEMBERSHIP		must not have joined already
2421  *	IPV6_JOIN_GROUP			must not have joined already
2422  *	MCAST_JOIN_GROUP		must not have joined already
2423  *	IP_BLOCK_SOURCE			must have joined already
2424  *	MCAST_BLOCK_SOURCE		must have joined already
2425  *	IP_JOIN_SOURCE_GROUP		may have joined already
2426  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2427  *
2428  * fmode and src parameters may be used to determine which option is
2429  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2430  * are functionally equivalent):
2431  *	opt			fmode			v6src
2432  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		unspecified
2433  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2434  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2435  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		IPv4-mapped addr
2436  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2437  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		IPv4-mapped addr
2438  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2439  *
2440  * Changing the filter mode is not allowed; if a matching ilg already
2441  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2442  *
2443  * Verifies that there is a source address of appropriate scope for
2444  * the group; if not, EADDRNOTAVAIL is returned.
2445  *
2446  * The interface to be used may be identified by an IPv4 address or by an
2447  * interface index.
2448  *
2449  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2450  * with the IPv4 address.  Assumes that if v6group is v4-mapped,
2451  * v6src is also v4-mapped.
2452  */
2453 int
ip_opt_add_group(conn_t * connp,boolean_t checkonly,const in6_addr_t * v6group,ipaddr_t ifaddr,uint_t ifindex,mcast_record_t fmode,const in6_addr_t * v6src)2454 ip_opt_add_group(conn_t *connp, boolean_t checkonly,
2455     const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2456     mcast_record_t fmode, const in6_addr_t *v6src)
2457 {
2458 	ill_t *ill;
2459 	char buf[INET6_ADDRSTRLEN];
2460 	int	err;
2461 
2462 	err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, &ill);
2463 	if (err != 0) {
2464 		ip1dbg(("ip_opt_add_group: no ill for group %s/"
2465 		    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2466 		    sizeof (buf)), ifindex));
2467 		return (err);
2468 	}
2469 
2470 	if (checkonly) {
2471 		/*
2472 		 * do not do operation, just pretend to - new T_CHECK
2473 		 * semantics. The error return case above if encountered
2474 		 * considered a good enough "check" here.
2475 		 */
2476 		ill_refrele(ill);
2477 		return (0);
2478 	}
2479 	mutex_enter(&ill->ill_mcast_serializer);
2480 	/*
2481 	 * Multicast groups may not be joined on interfaces that are either
2482 	 * already underlying interfaces in an IPMP group, or in the process
2483 	 * of joining the IPMP group. The latter condition is enforced by
2484 	 * checking the value of ill->ill_grp_pending under the
2485 	 * ill_mcast_serializer lock.  We cannot serialize the
2486 	 * ill_grp_pending check on the ill_g_lock across ilg_add() because
2487 	 *  ill_mcast_send_queued -> ip_output_simple -> ill_lookup_on_ifindex
2488 	 * will take the ill_g_lock itself. Instead, we hold the
2489 	 * ill_mcast_serializer.
2490 	 */
2491 	if (ill->ill_grp_pending || IS_UNDER_IPMP(ill)) {
2492 		DTRACE_PROBE2(group__add__on__under, ill_t *, ill,
2493 		    in6_addr_t *, v6group);
2494 		mutex_exit(&ill->ill_mcast_serializer);
2495 		ill_refrele(ill);
2496 		return (EADDRNOTAVAIL);
2497 	}
2498 	err = ilg_add(connp, v6group, ifaddr, ifindex, ill, fmode, v6src);
2499 	mutex_exit(&ill->ill_mcast_serializer);
2500 	/*
2501 	 * We have done an addmulti_impl and/or delmulti_impl.
2502 	 * All locks have been dropped, we can send any
2503 	 * deferred/queued DLPI or IP packets
2504 	 */
2505 	ill_mcast_send_queued(ill);
2506 	ill_dlpi_send_queued(ill);
2507 	ill_refrele(ill);
2508 	return (err);
2509 }
2510 
2511 /*
2512  * Common for IPv6 and IPv4.
2513  * Here we handle ilgs that are still attached to their original ill
2514  * (the one ifaddr/ifindex points at), as well as detached ones.
2515  * The detached ones might have been attached to some other ill.
2516  */
2517 static int
ip_opt_delete_group_excl(conn_t * connp,const in6_addr_t * v6group,ipaddr_t ifaddr,uint_t ifindex,mcast_record_t fmode,const in6_addr_t * v6src)2518 ip_opt_delete_group_excl(conn_t *connp, const in6_addr_t *v6group,
2519     ipaddr_t ifaddr, uint_t ifindex, mcast_record_t fmode,
2520     const in6_addr_t *v6src)
2521 {
2522 	ilg_t	*ilg;
2523 	boolean_t leaving;
2524 	ilm_t *ilm;
2525 	ill_t *ill;
2526 	int err = 0;
2527 
2528 retry:
2529 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2530 	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2531 	if (ilg == NULL) {
2532 		rw_exit(&connp->conn_ilg_lock);
2533 		/*
2534 		 * Since we didn't have any ilg we now do the error checks
2535 		 * to determine the best errno.
2536 		 */
2537 		err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2538 		    &ill);
2539 		if (ill != NULL) {
2540 			/* The only error was a missing ilg for the group */
2541 			ill_refrele(ill);
2542 			err = EADDRNOTAVAIL;
2543 		}
2544 		return (err);
2545 	}
2546 
2547 	/* If the ilg is attached then we serialize using that ill */
2548 	ill = ilg->ilg_ill;
2549 	if (ill != NULL) {
2550 		/* Prevent the ill and ilg from being freed */
2551 		ill_refhold(ill);
2552 		ilg_refhold(ilg);
2553 		rw_exit(&connp->conn_ilg_lock);
2554 		mutex_enter(&ill->ill_mcast_serializer);
2555 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2556 		if (ilg->ilg_condemned) {
2557 			/* Disappeared */
2558 			ilg_refrele(ilg);
2559 			rw_exit(&connp->conn_ilg_lock);
2560 			mutex_exit(&ill->ill_mcast_serializer);
2561 			ill_refrele(ill);
2562 			goto retry;
2563 		}
2564 	}
2565 
2566 	/*
2567 	 * Decide if we're actually deleting the ilg or just removing a
2568 	 * source filter address; if just removing an addr, make sure we
2569 	 * aren't trying to change the filter mode, and that the addr is
2570 	 * actually in our filter list already.  If we're removing the
2571 	 * last src in an include list, just delete the ilg.
2572 	 */
2573 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2574 		leaving = B_TRUE;
2575 	} else {
2576 		if (fmode != ilg->ilg_fmode)
2577 			err = EINVAL;
2578 		else if (ilg->ilg_filter == NULL ||
2579 		    !list_has_addr(ilg->ilg_filter, v6src))
2580 			err = EADDRNOTAVAIL;
2581 		if (err != 0) {
2582 			if (ill != NULL)
2583 				ilg_refrele(ilg);
2584 			rw_exit(&connp->conn_ilg_lock);
2585 			goto done;
2586 		}
2587 		if (fmode == MODE_IS_INCLUDE &&
2588 		    ilg->ilg_filter->sl_numsrc == 1) {
2589 			leaving = B_TRUE;
2590 			v6src = NULL;
2591 		} else {
2592 			leaving = B_FALSE;
2593 		}
2594 	}
2595 	ilm = ilg->ilg_ilm;
2596 	if (leaving)
2597 		ilg->ilg_ilm = NULL;
2598 
2599 	ilg_delete(connp, ilg, v6src);
2600 	if (ill != NULL)
2601 		ilg_refrele(ilg);
2602 	rw_exit(&connp->conn_ilg_lock);
2603 
2604 	if (ilm != NULL) {
2605 		ASSERT(ill != NULL);
2606 		(void) ip_delmulti_serial(ilm, B_FALSE, leaving);
2607 	}
2608 done:
2609 	if (ill != NULL) {
2610 		mutex_exit(&ill->ill_mcast_serializer);
2611 		/*
2612 		 * Now that all locks have been dropped, we can
2613 		 * send any deferred/queued DLPI or IP packets
2614 		 */
2615 		ill_mcast_send_queued(ill);
2616 		ill_dlpi_send_queued(ill);
2617 		ill_refrele(ill);
2618 	}
2619 	return (err);
2620 }
2621 
2622 /*
2623  * Handle the following optmgmt:
2624  *	IP_DROP_MEMBERSHIP		will leave
2625  *	IPV6_LEAVE_GROUP		will leave
2626  *	MCAST_LEAVE_GROUP		will leave
2627  *	IP_UNBLOCK_SOURCE		will not leave
2628  *	MCAST_UNBLOCK_SOURCE		will not leave
2629  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
2630  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
2631  *
2632  * fmode and src parameters may be used to determine which option is
2633  * being set, as follows:
2634  *	opt			 fmode			v6src
2635  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	unspecified
2636  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
2637  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
2638  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	IPv4-mapped addr
2639  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
2640  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	IPv4-mapped addr
2641  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
2642  *
2643  * Changing the filter mode is not allowed; if a matching ilg already
2644  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2645  *
2646  * The interface to be used may be identified by an IPv4 address or by an
2647  * interface index.
2648  *
2649  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2650  * with the IPv4 address.  Assumes that if v6group is v4-mapped,
2651  * v6src is also v4-mapped.
2652  */
2653 int
ip_opt_delete_group(conn_t * connp,boolean_t checkonly,const in6_addr_t * v6group,ipaddr_t ifaddr,uint_t ifindex,mcast_record_t fmode,const in6_addr_t * v6src)2654 ip_opt_delete_group(conn_t *connp, boolean_t checkonly,
2655     const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2656     mcast_record_t fmode, const in6_addr_t *v6src)
2657 {
2658 
2659 	/*
2660 	 * In the normal case below we don't check for the ill existing.
2661 	 * Instead we look for an existing ilg in _excl.
2662 	 * If checkonly we sanity check the arguments
2663 	 */
2664 	if (checkonly) {
2665 		ill_t	*ill;
2666 		int	err;
2667 
2668 		err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2669 		    &ill);
2670 		/*
2671 		 * do not do operation, just pretend to - new T_CHECK semantics.
2672 		 * ip_opt_check is considered a good enough "check" here.
2673 		 */
2674 		if (ill != NULL)
2675 			ill_refrele(ill);
2676 		return (err);
2677 	}
2678 	return (ip_opt_delete_group_excl(connp, v6group, ifaddr, ifindex,
2679 	    fmode, v6src));
2680 }
2681 
2682 /*
2683  * Group mgmt for upper conn that passes things down
2684  * to the interface multicast list (and DLPI)
2685  * These routines can handle new style options that specify an interface name
2686  * as opposed to an interface address (needed for general handling of
2687  * unnumbered interfaces.)
2688  */
2689 
2690 /*
2691  * Add a group to an upper conn group data structure and pass things down
2692  * to the interface multicast list (and DLPI)
2693  * Common for IPv4 and IPv6; for IPv4 we can have an ifaddr.
2694  */
2695 static int
ilg_add(conn_t * connp,const in6_addr_t * v6group,ipaddr_t ifaddr,uint_t ifindex,ill_t * ill,mcast_record_t fmode,const in6_addr_t * v6src)2696 ilg_add(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
2697     uint_t ifindex, ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2698 {
2699 	int	error = 0;
2700 	ilg_t	*ilg;
2701 	ilg_stat_t ilgstat;
2702 	slist_t	*new_filter = NULL;
2703 	int	new_fmode;
2704 	ilm_t *ilm;
2705 
2706 	if (!(ill->ill_flags & ILLF_MULTICAST))
2707 		return (EADDRNOTAVAIL);
2708 
2709 	/* conn_ilg_lock protects the ilg list. */
2710 	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
2711 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2712 	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2713 
2714 	/*
2715 	 * Depending on the option we're handling, may or may not be okay
2716 	 * if group has already been added.  Figure out our rules based
2717 	 * on fmode and src params.  Also make sure there's enough room
2718 	 * in the filter if we're adding a source to an existing filter.
2719 	 */
2720 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2721 		/* we're joining for all sources, must not have joined */
2722 		if (ilg != NULL)
2723 			error = EADDRINUSE;
2724 	} else {
2725 		if (fmode == MODE_IS_EXCLUDE) {
2726 			/* (excl {addr}) => block source, must have joined */
2727 			if (ilg == NULL)
2728 				error = EADDRNOTAVAIL;
2729 		}
2730 		/* (incl {addr}) => join source, may have joined */
2731 
2732 		if (ilg != NULL &&
2733 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
2734 			error = ENOBUFS;
2735 	}
2736 	if (error != 0) {
2737 		rw_exit(&connp->conn_ilg_lock);
2738 		return (error);
2739 	}
2740 
2741 	/*
2742 	 * Alloc buffer to copy new state into (see below) before
2743 	 * we make any changes, so we can bail if it fails.
2744 	 */
2745 	if ((new_filter = l_alloc()) == NULL) {
2746 		rw_exit(&connp->conn_ilg_lock);
2747 		return (ENOMEM);
2748 	}
2749 
2750 	if (ilg == NULL) {
2751 		if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) {
2752 			rw_exit(&connp->conn_ilg_lock);
2753 			l_free(new_filter);
2754 			return (error);
2755 		}
2756 		ilg->ilg_ifindex = ifindex;
2757 		ilg->ilg_ifaddr = ifaddr;
2758 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2759 			ilg->ilg_filter = l_alloc();
2760 			if (ilg->ilg_filter == NULL) {
2761 				ilg_delete(connp, ilg, NULL);
2762 				rw_exit(&connp->conn_ilg_lock);
2763 				l_free(new_filter);
2764 				return (ENOMEM);
2765 			}
2766 			ilg->ilg_filter->sl_numsrc = 1;
2767 			ilg->ilg_filter->sl_addr[0] = *v6src;
2768 		}
2769 		ilgstat = ILGSTAT_NEW;
2770 		ilg->ilg_v6group = *v6group;
2771 		ilg->ilg_fmode = fmode;
2772 		ilg->ilg_ill = ill;
2773 	} else {
2774 		int index;
2775 
2776 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2777 			rw_exit(&connp->conn_ilg_lock);
2778 			l_free(new_filter);
2779 			return (EINVAL);
2780 		}
2781 		if (ilg->ilg_filter == NULL) {
2782 			ilg->ilg_filter = l_alloc();
2783 			if (ilg->ilg_filter == NULL) {
2784 				rw_exit(&connp->conn_ilg_lock);
2785 				l_free(new_filter);
2786 				return (ENOMEM);
2787 			}
2788 		}
2789 		if (list_has_addr(ilg->ilg_filter, v6src)) {
2790 			rw_exit(&connp->conn_ilg_lock);
2791 			l_free(new_filter);
2792 			return (EADDRNOTAVAIL);
2793 		}
2794 		ilgstat = ILGSTAT_CHANGE;
2795 		index = ilg->ilg_filter->sl_numsrc++;
2796 		ilg->ilg_filter->sl_addr[index] = *v6src;
2797 	}
2798 
2799 	/*
2800 	 * Save copy of ilg's filter state to pass to other functions,
2801 	 * so we can release conn_ilg_lock now.
2802 	 */
2803 	new_fmode = ilg->ilg_fmode;
2804 	l_copy(ilg->ilg_filter, new_filter);
2805 
2806 	rw_exit(&connp->conn_ilg_lock);
2807 
2808 	/*
2809 	 * Now update the ill. We wait to do this until after the ilg
2810 	 * has been updated because we need to update the src filter
2811 	 * info for the ill, which involves looking at the status of
2812 	 * all the ilgs associated with this group/interface pair.
2813 	 */
2814 	ilm = ip_addmulti_serial(v6group, ill, connp->conn_zoneid, ilgstat,
2815 	    new_fmode, new_filter, &error);
2816 
2817 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2818 	/*
2819 	 * Must look up the ilg again since we've not been holding
2820 	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
2821 	 * having called conn_update_ill, which can run once we dropped the
2822 	 * conn_ilg_lock above.
2823 	 */
2824 	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2825 	if (ilg == NULL) {
2826 		rw_exit(&connp->conn_ilg_lock);
2827 		if (ilm != NULL) {
2828 			(void) ip_delmulti_serial(ilm, B_FALSE,
2829 			    (ilgstat == ILGSTAT_NEW));
2830 		}
2831 		error = ENXIO;
2832 		goto free_and_exit;
2833 	}
2834 	if (ilm != NULL) {
2835 		if (ilg->ilg_ill == NULL) {
2836 			/* some other thread is re-attaching this.  */
2837 			rw_exit(&connp->conn_ilg_lock);
2838 			(void) ip_delmulti_serial(ilm, B_FALSE,
2839 			    (ilgstat == ILGSTAT_NEW));
2840 			error = 0;
2841 			goto free_and_exit;
2842 		}
2843 		/* Succeeded. Update the ilg to point at the ilm */
2844 		if (ilgstat == ILGSTAT_NEW) {
2845 			if (ilg->ilg_ilm == NULL) {
2846 				ilg->ilg_ilm = ilm;
2847 				ilm->ilm_ifaddr = ifaddr; /* For netstat */
2848 			} else {
2849 				/* some other thread is re-attaching this. */
2850 				rw_exit(&connp->conn_ilg_lock);
2851 				(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2852 				error = 0;
2853 				goto free_and_exit;
2854 			}
2855 		} else {
2856 			/*
2857 			 * ip_addmulti didn't get a held ilm for
2858 			 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2859 			 */
2860 			ASSERT(ilg->ilg_ilm == ilm);
2861 		}
2862 	} else {
2863 		ASSERT(error != 0);
2864 		/*
2865 		 * Failed to allocate the ilm.
2866 		 * Need to undo what we did before calling ip_addmulti()
2867 		 * If ENETDOWN just clear ill_ilg since so that we
2868 		 * will rejoin when the ill comes back; don't report ENETDOWN
2869 		 * to application.
2870 		 */
2871 		if (ilgstat == ILGSTAT_NEW && error == ENETDOWN) {
2872 			ilg->ilg_ill = NULL;
2873 			error = 0;
2874 		} else {
2875 			in6_addr_t delsrc =
2876 			    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
2877 
2878 			ilg_delete(connp, ilg, &delsrc);
2879 		}
2880 	}
2881 	rw_exit(&connp->conn_ilg_lock);
2882 
2883 free_and_exit:
2884 	l_free(new_filter);
2885 	return (error);
2886 }
2887 
2888 /*
2889  * Find an IPv4 ilg matching group, ill and source.
2890  * The group and source can't be INADDR_ANY here so no need to translate to
2891  * the unspecified IPv6 address.
2892  */
2893 boolean_t
conn_hasmembers_ill_withsrc_v4(conn_t * connp,ipaddr_t group,ipaddr_t src,ill_t * ill)2894 conn_hasmembers_ill_withsrc_v4(conn_t *connp, ipaddr_t group, ipaddr_t src,
2895     ill_t *ill)
2896 {
2897 	in6_addr_t v6group, v6src;
2898 	int i;
2899 	boolean_t isinlist;
2900 	ilg_t *ilg;
2901 
2902 	rw_enter(&connp->conn_ilg_lock, RW_READER);
2903 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
2904 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2905 		if (ilg->ilg_condemned)
2906 			continue;
2907 
2908 		/* ilg_ill could be NULL if an add is in progress */
2909 		if (ilg->ilg_ill != ill)
2910 			continue;
2911 
2912 		/* The callers use upper ill for IPMP */
2913 		ASSERT(!IS_UNDER_IPMP(ill));
2914 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
2915 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2916 				/* no source filter, so this is a match */
2917 				rw_exit(&connp->conn_ilg_lock);
2918 				return (B_TRUE);
2919 			}
2920 			break;
2921 		}
2922 	}
2923 	if (ilg == NULL) {
2924 		rw_exit(&connp->conn_ilg_lock);
2925 		return (B_FALSE);
2926 	}
2927 
2928 	/*
2929 	 * we have an ilg with matching ill and group; but
2930 	 * the ilg has a source list that we must check.
2931 	 */
2932 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2933 	isinlist = B_FALSE;
2934 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2935 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
2936 			isinlist = B_TRUE;
2937 			break;
2938 		}
2939 	}
2940 
2941 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2942 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
2943 		rw_exit(&connp->conn_ilg_lock);
2944 		return (B_TRUE);
2945 	}
2946 	rw_exit(&connp->conn_ilg_lock);
2947 	return (B_FALSE);
2948 }
2949 
2950 /*
2951  * Find an IPv6 ilg matching group, ill, and source
2952  */
2953 boolean_t
conn_hasmembers_ill_withsrc_v6(conn_t * connp,const in6_addr_t * v6group,const in6_addr_t * v6src,ill_t * ill)2954 conn_hasmembers_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
2955     const in6_addr_t *v6src, ill_t *ill)
2956 {
2957 	int i;
2958 	boolean_t isinlist;
2959 	ilg_t *ilg;
2960 
2961 	rw_enter(&connp->conn_ilg_lock, RW_READER);
2962 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2963 		if (ilg->ilg_condemned)
2964 			continue;
2965 
2966 		/* ilg_ill could be NULL if an add is in progress */
2967 		if (ilg->ilg_ill != ill)
2968 			continue;
2969 
2970 		/* The callers use upper ill for IPMP */
2971 		ASSERT(!IS_UNDER_IPMP(ill));
2972 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
2973 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2974 				/* no source filter, so this is a match */
2975 				rw_exit(&connp->conn_ilg_lock);
2976 				return (B_TRUE);
2977 			}
2978 			break;
2979 		}
2980 	}
2981 	if (ilg == NULL) {
2982 		rw_exit(&connp->conn_ilg_lock);
2983 		return (B_FALSE);
2984 	}
2985 
2986 	/*
2987 	 * we have an ilg with matching ill and group; but
2988 	 * the ilg has a source list that we must check.
2989 	 */
2990 	isinlist = B_FALSE;
2991 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2992 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
2993 			isinlist = B_TRUE;
2994 			break;
2995 		}
2996 	}
2997 
2998 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2999 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
3000 		rw_exit(&connp->conn_ilg_lock);
3001 		return (B_TRUE);
3002 	}
3003 	rw_exit(&connp->conn_ilg_lock);
3004 	return (B_FALSE);
3005 }
3006 
3007 /*
3008  * Find an ilg matching group and ifaddr/ifindex.
3009  * We check both ifaddr and ifindex even though at most one of them
3010  * will be non-zero; that way we always find the right one.
3011  */
3012 static ilg_t *
ilg_lookup(conn_t * connp,const in6_addr_t * v6group,ipaddr_t ifaddr,uint_t ifindex)3013 ilg_lookup(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
3014     uint_t ifindex)
3015 {
3016 	ilg_t	*ilg;
3017 
3018 	ASSERT(RW_LOCK_HELD(&connp->conn_ilg_lock));
3019 
3020 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3021 		if (ilg->ilg_condemned)
3022 			continue;
3023 
3024 		if (ilg->ilg_ifaddr == ifaddr &&
3025 		    ilg->ilg_ifindex == ifindex &&
3026 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3027 			return (ilg);
3028 	}
3029 	return (NULL);
3030 }
3031 
3032 /*
3033  * If a source address is passed in (src != NULL and src is not
3034  * unspecified), remove the specified src addr from the given ilg's
3035  * filter list, else delete the ilg.
3036  */
3037 static void
ilg_delete(conn_t * connp,ilg_t * ilg,const in6_addr_t * src)3038 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3039 {
3040 	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3041 	ASSERT(ilg->ilg_ptpn != NULL);
3042 	ASSERT(!ilg->ilg_condemned);
3043 
3044 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3045 		FREE_SLIST(ilg->ilg_filter);
3046 		ilg->ilg_filter = NULL;
3047 
3048 		ASSERT(ilg->ilg_ilm == NULL);
3049 		ilg->ilg_ill = NULL;
3050 		ilg->ilg_condemned = B_TRUE;
3051 
3052 		/* ilg_inactive will unlink from the list */
3053 		ilg_refrele(ilg);
3054 	} else {
3055 		l_remove(ilg->ilg_filter, src);
3056 	}
3057 }
3058 
3059 /*
3060  * Called from conn close. No new ilg can be added or removed
3061  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3062  * will return error if conn has started closing.
3063  *
3064  * We handle locking as follows.
3065  * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
3066  * proceed with the ilm part of the delete we hold a reference on both the ill
3067  * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
3068  * being deleted.
3069  *
3070  * Since the ilg_add code path uses two locks (conn_ilg_lock for the ilg part,
3071  * and ill_mcast_lock for the ip_addmulti part) we can run at a point between
3072  * the two. At that point ilg_ill is set, but ilg_ilm hasn't yet been set. In
3073  * that case we delete the ilg here, which makes ilg_add discover that the ilg
3074  * has disappeared when ip_addmulti returns, so it will discard the ilm it just
3075  * added.
3076  */
3077 void
ilg_delete_all(conn_t * connp)3078 ilg_delete_all(conn_t *connp)
3079 {
3080 	ilg_t	*ilg, *next_ilg, *held_ilg;
3081 	ilm_t	*ilm;
3082 	ill_t	*ill;
3083 	boolean_t need_refrele;
3084 
3085 	/*
3086 	 * Can not run if there is a conn_update_ill already running.
3087 	 * Wait for it to complete. Caller should have already set CONN_CLOSING
3088 	 * which prevents any new threads to run in conn_update_ill.
3089 	 */
3090 	mutex_enter(&connp->conn_lock);
3091 	ASSERT(connp->conn_state_flags & CONN_CLOSING);
3092 	while (connp->conn_state_flags & CONN_UPDATE_ILL)
3093 		cv_wait(&connp->conn_cv, &connp->conn_lock);
3094 	mutex_exit(&connp->conn_lock);
3095 
3096 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3097 	ilg = connp->conn_ilg;
3098 	held_ilg = NULL;
3099 	while (ilg != NULL) {
3100 		if (ilg->ilg_condemned) {
3101 			ilg = ilg->ilg_next;
3102 			continue;
3103 		}
3104 		/* If the ilg is detached then no need to serialize */
3105 		if (ilg->ilg_ilm == NULL) {
3106 			next_ilg = ilg->ilg_next;
3107 			ilg_delete(connp, ilg, NULL);
3108 			ilg = next_ilg;
3109 			continue;
3110 		}
3111 		ill = ilg->ilg_ilm->ilm_ill;
3112 
3113 		/*
3114 		 * In order to serialize on the ill we try to enter
3115 		 * and if that fails we unlock and relock and then
3116 		 * check that we still have an ilm.
3117 		 */
3118 		need_refrele = B_FALSE;
3119 		if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
3120 			ill_refhold(ill);
3121 			need_refrele = B_TRUE;
3122 			ilg_refhold(ilg);
3123 			if (held_ilg != NULL)
3124 				ilg_refrele(held_ilg);
3125 			held_ilg = ilg;
3126 			rw_exit(&connp->conn_ilg_lock);
3127 			mutex_enter(&ill->ill_mcast_serializer);
3128 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3129 			if (ilg->ilg_condemned) {
3130 				ilg = ilg->ilg_next;
3131 				goto next;
3132 			}
3133 		}
3134 		ilm = ilg->ilg_ilm;
3135 		ilg->ilg_ilm = NULL;
3136 		next_ilg = ilg->ilg_next;
3137 		ilg_delete(connp, ilg, NULL);
3138 		ilg = next_ilg;
3139 		rw_exit(&connp->conn_ilg_lock);
3140 
3141 		if (ilm != NULL)
3142 			(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3143 
3144 	next:
3145 		mutex_exit(&ill->ill_mcast_serializer);
3146 		/*
3147 		 * Now that all locks have been dropped, we can send any
3148 		 * deferred/queued DLPI or IP packets
3149 		 */
3150 		ill_mcast_send_queued(ill);
3151 		ill_dlpi_send_queued(ill);
3152 		if (need_refrele) {
3153 			/* Drop ill reference while we hold no locks */
3154 			ill_refrele(ill);
3155 		}
3156 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3157 	}
3158 	if (held_ilg != NULL)
3159 		ilg_refrele(held_ilg);
3160 	rw_exit(&connp->conn_ilg_lock);
3161 }
3162 
3163 /*
3164  * Attach the ilg to an ilm on the ill. If it fails we leave ilg_ill as NULL so
3165  * that a subsequent attempt can attach it. Drops and reacquires conn_ilg_lock.
3166  */
3167 static void
ilg_attach(conn_t * connp,ilg_t * ilg,ill_t * ill)3168 ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill)
3169 {
3170 	ilg_stat_t	ilgstat;
3171 	slist_t		*new_filter;
3172 	int		new_fmode;
3173 	in6_addr_t	v6group;
3174 	ipaddr_t	ifaddr;
3175 	uint_t		ifindex;
3176 	ilm_t		*ilm;
3177 	int		error = 0;
3178 
3179 	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3180 	/*
3181 	 * Alloc buffer to copy new state into (see below) before
3182 	 * we make any changes, so we can bail if it fails.
3183 	 */
3184 	if ((new_filter = l_alloc()) == NULL)
3185 		return;
3186 
3187 	/*
3188 	 * Save copy of ilg's filter state to pass to other functions, so
3189 	 * we can release conn_ilg_lock now.
3190 	 * Set ilg_ill so that an unplumb can find us.
3191 	 */
3192 	new_fmode = ilg->ilg_fmode;
3193 	l_copy(ilg->ilg_filter, new_filter);
3194 	v6group = ilg->ilg_v6group;
3195 	ifaddr = ilg->ilg_ifaddr;
3196 	ifindex = ilg->ilg_ifindex;
3197 	ilgstat = ILGSTAT_NEW;
3198 
3199 	ilg->ilg_ill = ill;
3200 	ASSERT(ilg->ilg_ilm == NULL);
3201 	rw_exit(&connp->conn_ilg_lock);
3202 
3203 	ilm = ip_addmulti_serial(&v6group, ill, connp->conn_zoneid, ilgstat,
3204 	    new_fmode, new_filter, &error);
3205 	l_free(new_filter);
3206 
3207 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3208 	/*
3209 	 * Must look up the ilg again since we've not been holding
3210 	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
3211 	 * having called conn_update_ill, which can run once we dropped the
3212 	 * conn_ilg_lock above. Alternatively, the ilg could have been attached
3213 	 * when the lock was dropped
3214 	 */
3215 	ilg = ilg_lookup(connp, &v6group, ifaddr, ifindex);
3216 	if (ilg == NULL || ilg->ilg_ilm != NULL) {
3217 		if (ilm != NULL) {
3218 			rw_exit(&connp->conn_ilg_lock);
3219 			(void) ip_delmulti_serial(ilm, B_FALSE,
3220 			    (ilgstat == ILGSTAT_NEW));
3221 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3222 		}
3223 		return;
3224 	}
3225 	if (ilm == NULL) {
3226 		ilg->ilg_ill = NULL;
3227 		return;
3228 	}
3229 	ilg->ilg_ilm = ilm;
3230 	ilm->ilm_ifaddr = ifaddr;	/* For netstat */
3231 }
3232 
3233 /*
3234  * Called when an ill is unplumbed to make sure that there are no
3235  * dangling conn references to that ill. In that case ill is non-NULL and
3236  * we make sure we remove all references to it.
3237  * Also called when we should revisit the ilg_ill used for multicast
3238  * memberships, in which case ill is NULL.
3239  *
3240  * conn is held by caller.
3241  *
3242  * Note that ipcl_walk only walks conns that are not yet condemned.
3243  * condemned conns can't be refheld. For this reason, conn must become clean
3244  * first, i.e. it must not refer to any ill/ire and then only set
3245  * condemned flag.
3246  *
3247  * We leave ixa_multicast_ifindex in place. We prefer dropping
3248  * packets instead of sending them out the wrong interface.
3249  *
3250  * We keep the ilg around in a detached state (with ilg_ill and ilg_ilm being
3251  * NULL) so that the application can leave it later. Also, if ilg_ifaddr and
3252  * ilg_ifindex are zero, indicating that the system should pick the interface,
3253  * then we attempt to reselect the ill and join on it.
3254  *
3255  * Locking notes:
3256  * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
3257  * proceed with the ilm part of the delete we hold a reference on both the ill
3258  * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
3259  * being deleted.
3260  *
3261  * Note: if this function is called when new ill/ipif's arrive or change status
3262  * (SIOCSLIFINDEX, SIOCSLIFADDR) then we will attempt to attach any ilgs with
3263  * a NULL ilg_ill to an ill/ilm.
3264  */
3265 static void
conn_update_ill(conn_t * connp,caddr_t arg)3266 conn_update_ill(conn_t *connp, caddr_t arg)
3267 {
3268 	ill_t	*ill = (ill_t *)arg;
3269 
3270 	/*
3271 	 * We have to prevent ip_close/ilg_delete_all from running at
3272 	 * the same time. ip_close sets CONN_CLOSING before doing the ilg_delete
3273 	 * all, and we set CONN_UPDATE_ILL. That ensures that only one of
3274 	 * ilg_delete_all and conn_update_ill run at a time for a given conn.
3275 	 * If ilg_delete_all got here first, then we have nothing to do.
3276 	 */
3277 	mutex_enter(&connp->conn_lock);
3278 	if (connp->conn_state_flags & (CONN_CLOSING|CONN_UPDATE_ILL)) {
3279 		/* Caller has to wait for ill_ilm_cnt to drop to zero */
3280 		mutex_exit(&connp->conn_lock);
3281 		return;
3282 	}
3283 	connp->conn_state_flags |= CONN_UPDATE_ILL;
3284 	mutex_exit(&connp->conn_lock);
3285 
3286 	if (ill != NULL)
3287 		ilg_check_detach(connp, ill);
3288 
3289 	ilg_check_reattach(connp, ill);
3290 
3291 	/* Do we need to wake up a thread in ilg_delete_all? */
3292 	mutex_enter(&connp->conn_lock);
3293 	connp->conn_state_flags &= ~CONN_UPDATE_ILL;
3294 	if (connp->conn_state_flags & CONN_CLOSING)
3295 		cv_broadcast(&connp->conn_cv);
3296 	mutex_exit(&connp->conn_lock);
3297 }
3298 
3299 /* Detach from an ill that is going away */
3300 static void
ilg_check_detach(conn_t * connp,ill_t * ill)3301 ilg_check_detach(conn_t *connp, ill_t *ill)
3302 {
3303 	char	group_buf[INET6_ADDRSTRLEN];
3304 	ilg_t	*ilg, *held_ilg;
3305 	ilm_t	*ilm;
3306 
3307 	mutex_enter(&ill->ill_mcast_serializer);
3308 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3309 	held_ilg = NULL;
3310 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3311 		if (ilg->ilg_condemned)
3312 			continue;
3313 
3314 		if (ilg->ilg_ill != ill)
3315 			continue;
3316 
3317 		/* Detach from current ill */
3318 		ip1dbg(("ilg_check_detach: detach %s on %s\n",
3319 		    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3320 		    group_buf, sizeof (group_buf)),
3321 		    ilg->ilg_ill->ill_name));
3322 
3323 		/* Detach this ilg from the ill/ilm */
3324 		ilm = ilg->ilg_ilm;
3325 		ilg->ilg_ilm = NULL;
3326 		ilg->ilg_ill = NULL;
3327 		if (ilm == NULL)
3328 			continue;
3329 
3330 		/* Prevent ilg from disappearing */
3331 		ilg_transfer_hold(held_ilg, ilg);
3332 		held_ilg = ilg;
3333 		rw_exit(&connp->conn_ilg_lock);
3334 
3335 		(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3336 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3337 	}
3338 	if (held_ilg != NULL)
3339 		ilg_refrele(held_ilg);
3340 	rw_exit(&connp->conn_ilg_lock);
3341 	mutex_exit(&ill->ill_mcast_serializer);
3342 	/*
3343 	 * Now that all locks have been dropped, we can send any
3344 	 * deferred/queued DLPI or IP packets
3345 	 */
3346 	ill_mcast_send_queued(ill);
3347 	ill_dlpi_send_queued(ill);
3348 }
3349 
3350 /*
3351  * Check if there is a place to attach the conn_ilgs. We do this for both
3352  * detached ilgs and attached ones, since for the latter there could be
3353  * a better ill to attach them to. oill is non-null if we just detached from
3354  * that ill.
3355  */
3356 static void
ilg_check_reattach(conn_t * connp,ill_t * oill)3357 ilg_check_reattach(conn_t *connp, ill_t *oill)
3358 {
3359 	ill_t	*ill;
3360 	char	group_buf[INET6_ADDRSTRLEN];
3361 	ilg_t	*ilg, *held_ilg;
3362 	ilm_t	*ilm;
3363 	zoneid_t zoneid = IPCL_ZONEID(connp);
3364 	int	error;
3365 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
3366 
3367 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3368 	held_ilg = NULL;
3369 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3370 		if (ilg->ilg_condemned)
3371 			continue;
3372 
3373 		/* Check if the conn_ill matches what we would pick now */
3374 		ill = ill_mcast_lookup(&ilg->ilg_v6group, ilg->ilg_ifaddr,
3375 		    ilg->ilg_ifindex, zoneid, ipst, &error);
3376 
3377 		/*
3378 		 * Make sure the ill is usable for multicast and that
3379 		 * we can send the DL_ADDMULTI_REQ before we create an
3380 		 * ilm.
3381 		 */
3382 		if (ill != NULL &&
3383 		    (!(ill->ill_flags & ILLF_MULTICAST) || !ill->ill_dl_up)) {
3384 			/* Drop locks across ill_refrele */
3385 			ilg_transfer_hold(held_ilg, ilg);
3386 			held_ilg = ilg;
3387 			rw_exit(&connp->conn_ilg_lock);
3388 			ill_refrele(ill);
3389 			ill = NULL;
3390 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3391 			/* Note that ilg could have become condemned */
3392 		}
3393 
3394 		/*
3395 		 * Is the ill unchanged, even if both are NULL?
3396 		 * Did we just detach from that ill?
3397 		 */
3398 		if (ill == ilg->ilg_ill || (ill != NULL && ill == oill)) {
3399 			if (ill != NULL) {
3400 				/* Drop locks across ill_refrele */
3401 				ilg_transfer_hold(held_ilg, ilg);
3402 				held_ilg = ilg;
3403 				rw_exit(&connp->conn_ilg_lock);
3404 				ill_refrele(ill);
3405 				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3406 			}
3407 			continue;
3408 		}
3409 
3410 		/* Something changed; detach from old first if needed */
3411 		if (ilg->ilg_ill != NULL) {
3412 			ill_t *ill2 = ilg->ilg_ill;
3413 			boolean_t need_refrele = B_FALSE;
3414 
3415 			/*
3416 			 * In order to serialize on the ill we try to enter
3417 			 * and if that fails we unlock and relock.
3418 			 */
3419 			if (!mutex_tryenter(&ill2->ill_mcast_serializer)) {
3420 				ill_refhold(ill2);
3421 				need_refrele = B_TRUE;
3422 				ilg_transfer_hold(held_ilg, ilg);
3423 				held_ilg = ilg;
3424 				rw_exit(&connp->conn_ilg_lock);
3425 				mutex_enter(&ill2->ill_mcast_serializer);
3426 				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3427 				/* Note that ilg could have become condemned */
3428 			}
3429 			/*
3430 			 * Check that nobody else re-attached the ilg while we
3431 			 * dropped the lock.
3432 			 */
3433 			if (ilg->ilg_ill == ill2) {
3434 				ASSERT(!ilg->ilg_condemned);
3435 				/* Detach from current ill */
3436 				ip1dbg(("conn_check_reattach: detach %s/%s\n",
3437 				    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3438 				    group_buf, sizeof (group_buf)),
3439 				    ill2->ill_name));
3440 
3441 				ilm = ilg->ilg_ilm;
3442 				ilg->ilg_ilm = NULL;
3443 				ilg->ilg_ill = NULL;
3444 			} else {
3445 				ilm = NULL;
3446 			}
3447 			ilg_transfer_hold(held_ilg, ilg);
3448 			held_ilg = ilg;
3449 			rw_exit(&connp->conn_ilg_lock);
3450 			if (ilm != NULL)
3451 				(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3452 			mutex_exit(&ill2->ill_mcast_serializer);
3453 			/*
3454 			 * Now that all locks have been dropped, we can send any
3455 			 * deferred/queued DLPI or IP packets
3456 			 */
3457 			ill_mcast_send_queued(ill2);
3458 			ill_dlpi_send_queued(ill2);
3459 			if (need_refrele) {
3460 				/* Drop ill reference while we hold no locks */
3461 				ill_refrele(ill2);
3462 			}
3463 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3464 			/*
3465 			 * While we dropped conn_ilg_lock some other thread
3466 			 * could have attached this ilg, thus we check again.
3467 			 */
3468 			if (ilg->ilg_ill != NULL) {
3469 				if (ill != NULL) {
3470 					/* Drop locks across ill_refrele */
3471 					ilg_transfer_hold(held_ilg, ilg);
3472 					held_ilg = ilg;
3473 					rw_exit(&connp->conn_ilg_lock);
3474 					ill_refrele(ill);
3475 					rw_enter(&connp->conn_ilg_lock,
3476 					    RW_WRITER);
3477 				}
3478 				continue;
3479 			}
3480 		}
3481 		if (ill != NULL) {
3482 			/*
3483 			 * In order to serialize on the ill we try to enter
3484 			 * and if that fails we unlock and relock.
3485 			 */
3486 			if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
3487 				/* Already have a refhold on ill */
3488 				ilg_transfer_hold(held_ilg, ilg);
3489 				held_ilg = ilg;
3490 				rw_exit(&connp->conn_ilg_lock);
3491 				mutex_enter(&ill->ill_mcast_serializer);
3492 				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3493 				/* Note that ilg could have become condemned */
3494 			}
3495 			ilg_transfer_hold(held_ilg, ilg);
3496 			held_ilg = ilg;
3497 			/*
3498 			 * Check that nobody else attached the ilg and that
3499 			 * it wasn't condemned while we dropped the lock.
3500 			 */
3501 			if (ilg->ilg_ill == NULL && !ilg->ilg_condemned) {
3502 				/*
3503 				 * Attach to the new ill. Can fail in which
3504 				 * case ilg_ill will remain NULL. ilg_attach
3505 				 * drops and reacquires conn_ilg_lock.
3506 				 */
3507 				ip1dbg(("conn_check_reattach: attach %s/%s\n",
3508 				    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3509 				    group_buf, sizeof (group_buf)),
3510 				    ill->ill_name));
3511 				ilg_attach(connp, ilg, ill);
3512 				ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3513 			}
3514 			/* Drop locks across ill_refrele */
3515 			rw_exit(&connp->conn_ilg_lock);
3516 			mutex_exit(&ill->ill_mcast_serializer);
3517 			/*
3518 			 * Now that all locks have been
3519 			 * dropped, we can send any
3520 			 * deferred/queued DLPI or IP packets
3521 			 */
3522 			ill_mcast_send_queued(ill);
3523 			ill_dlpi_send_queued(ill);
3524 			ill_refrele(ill);
3525 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3526 		}
3527 	}
3528 	if (held_ilg != NULL)
3529 		ilg_refrele(held_ilg);
3530 	rw_exit(&connp->conn_ilg_lock);
3531 }
3532 
3533 /*
3534  * Called when an ill is unplumbed to make sure that there are no
3535  * dangling conn references to that ill. In that case ill is non-NULL and
3536  * we make sure we remove all references to it.
3537  * Also called when we should revisit the ilg_ill used for multicast
3538  * memberships, in which case ill is NULL.
3539  */
3540 void
update_conn_ill(ill_t * ill,ip_stack_t * ipst)3541 update_conn_ill(ill_t *ill, ip_stack_t *ipst)
3542 {
3543 	ipcl_walk(conn_update_ill, (caddr_t)ill, ipst);
3544 }
3545