1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 1990 Mentat Inc.
24 */
25
26#include <sys/types.h>
27#include <sys/stream.h>
28#include <sys/dlpi.h>
29#include <sys/stropts.h>
30#include <sys/strsun.h>
31#include <sys/ddi.h>
32#include <sys/cmn_err.h>
33#include <sys/sdt.h>
34#include <sys/zone.h>
35
36#include <sys/param.h>
37#include <sys/socket.h>
38#include <sys/sockio.h>
39#include <net/if.h>
40#include <sys/systm.h>
41#include <sys/strsubr.h>
42#include <net/route.h>
43#include <netinet/in.h>
44#include <net/if_dl.h>
45#include <netinet/ip6.h>
46#include <netinet/icmp6.h>
47
48#include <inet/common.h>
49#include <inet/mi.h>
50#include <inet/nd.h>
51#include <inet/arp.h>
52#include <inet/ip.h>
53#include <inet/ip6.h>
54#include <inet/ip_if.h>
55#include <inet/ip_ndp.h>
56#include <inet/ip_multi.h>
57#include <inet/ipclassifier.h>
58#include <inet/ipsec_impl.h>
59#include <inet/sctp_ip.h>
60#include <inet/ip_listutils.h>
61#include <inet/udp_impl.h>
62
63/* igmpv3/mldv2 source filter manipulation */
64static void	ilm_bld_flists(conn_t *conn, void *arg);
65static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
66    slist_t *flist);
67
68static ilm_t	*ilm_add(ill_t *ill, const in6_addr_t *group,
69    ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
70    zoneid_t zoneid);
71static void	ilm_delete(ilm_t *ilm);
72static int	ilm_numentries(ill_t *, const in6_addr_t *);
73
74static ilm_t	*ip_addmulti_serial(const in6_addr_t *, ill_t *, zoneid_t,
75    ilg_stat_t, mcast_record_t, slist_t *, int *);
76static ilm_t	*ip_addmulti_impl(const in6_addr_t *, ill_t *,
77    zoneid_t, ilg_stat_t, mcast_record_t, slist_t *, int *);
78static int	ip_delmulti_serial(ilm_t *, boolean_t, boolean_t);
79static int	ip_delmulti_impl(ilm_t *, boolean_t, boolean_t);
80
81static int	ip_ll_multireq(ill_t *ill, const in6_addr_t *group,
82    t_uscalar_t);
83static ilg_t	*ilg_lookup(conn_t *, const in6_addr_t *, ipaddr_t ifaddr,
84    uint_t ifindex);
85
86static int	ilg_add(conn_t *connp, const in6_addr_t *group,
87    ipaddr_t ifaddr, uint_t ifindex, ill_t *ill, mcast_record_t fmode,
88    const in6_addr_t *v6src);
89static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
90static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
91    uint32_t *addr_lenp, uint32_t *addr_offp);
92static int	ip_opt_delete_group_excl(conn_t *connp,
93    const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
94    mcast_record_t fmode, const in6_addr_t *v6src);
95
96static	ilm_t	*ilm_lookup(ill_t *, const in6_addr_t *, zoneid_t);
97
98static int	ip_msfilter_ill(conn_t *, mblk_t *, const ip_ioctl_cmd_t *,
99    ill_t **);
100
101static void	ilg_check_detach(conn_t *, ill_t *);
102static void	ilg_check_reattach(conn_t *, ill_t *);
103
104/*
105 * MT notes:
106 *
107 * Multicast joins operate on both the ilg and ilm structures. Multiple
108 * threads operating on an conn (socket) trying to do multicast joins
109 * need to synchronize when operating on the ilg. Multiple threads
110 * potentially operating on different conn (socket endpoints) trying to
111 * do multicast joins could eventually end up trying to manipulate the
112 * ilm simulatenously and need to synchronize on the access to the ilm.
113 * The access and lookup of the ilm, as well as other ill multicast state,
114 * is under ill_mcast_lock.
115 * The modifications and lookup of ilg entries is serialized using conn_ilg_lock
116 * rwlock. An ilg will not be freed until ilg_refcnt drops to zero.
117 *
118 * In some cases we hold ill_mcast_lock and then acquire conn_ilg_lock, but
119 * never the other way around.
120 *
121 * An ilm is an IP data structure used to track multicast join/leave.
122 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
123 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
124 * referencing the ilm.
125 * The modifications and lookup of ilm entries is serialized using the
126 * ill_mcast_lock rwlock; that lock handles all the igmp/mld modifications
127 * of the ilm state.
128 * ilms are created / destroyed only as writer. ilms
129 * are not passed around. The datapath (anything outside of this file
130 * and igmp.c) use functions that do not return ilms - just the number
131 * of members. So we don't need a dynamic refcount of the number
132 * of threads holding reference to an ilm.
133 *
134 * In the cases where we serially access the ilg and ilm, which happens when
135 * we handle the applications requests to join or leave groups and sources,
136 * we use the ill_mcast_serializer mutex to ensure that a multithreaded
137 * application which does concurrent joins and/or leaves on the same group on
138 * the same socket always results in a consistent order for the ilg and ilm
139 * modifications.
140 *
141 * When a multicast operation results in needing to send a message to
142 * the driver (to join/leave a L2 multicast address), we use ill_dlpi_queue()
143 * which serialized the DLPI requests. The IGMP/MLD code uses ill_mcast_queue()
144 * to send IGMP/MLD IP packet to avoid dropping the lock just to send a packet.
145 */
146
147#define	GETSTRUCT(structure, number)	\
148	((structure *)mi_zalloc(sizeof (structure) * (number)))
149
150/*
151 * Caller must ensure that the ilg has not been condemned
152 * The condemned flag is only set in ilg_delete under conn_ilg_lock.
153 *
154 * The caller must hold conn_ilg_lock as writer.
155 */
156static void
157ilg_refhold(ilg_t *ilg)
158{
159	ASSERT(ilg->ilg_refcnt != 0);
160	ASSERT(!ilg->ilg_condemned);
161	ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
162
163	ilg->ilg_refcnt++;
164}
165
166static void
167ilg_inactive(ilg_t *ilg)
168{
169	ASSERT(ilg->ilg_ill == NULL);
170	ASSERT(ilg->ilg_ilm == NULL);
171	ASSERT(ilg->ilg_filter == NULL);
172	ASSERT(ilg->ilg_condemned);
173
174	/* Unlink from list */
175	*ilg->ilg_ptpn = ilg->ilg_next;
176	if (ilg->ilg_next != NULL)
177		ilg->ilg_next->ilg_ptpn = ilg->ilg_ptpn;
178	ilg->ilg_next = NULL;
179	ilg->ilg_ptpn = NULL;
180
181	ilg->ilg_connp = NULL;
182	kmem_free(ilg, sizeof (*ilg));
183}
184
185/*
186 * The caller must hold conn_ilg_lock as writer.
187 */
188static void
189ilg_refrele(ilg_t *ilg)
190{
191	ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
192	ASSERT(ilg->ilg_refcnt != 0);
193	if (--ilg->ilg_refcnt == 0)
194		ilg_inactive(ilg);
195}
196
197/*
198 * Acquire reference on ilg and drop reference on held_ilg.
199 * In the case when held_ilg is the same as ilg we already have
200 * a reference, but the held_ilg might be condemned. In that case
201 * we avoid the ilg_refhold/rele so that we can assert in ire_refhold
202 * that the ilg isn't condemned.
203 */
204static void
205ilg_transfer_hold(ilg_t *held_ilg, ilg_t *ilg)
206{
207	if (held_ilg == ilg)
208		return;
209
210	ilg_refhold(ilg);
211	if (held_ilg != NULL)
212		ilg_refrele(held_ilg);
213}
214
215/*
216 * Allocate a new ilg_t and links it into conn_ilg.
217 * Returns NULL on failure, in which case `*errp' will be
218 * filled in with the reason.
219 *
220 * Assumes connp->conn_ilg_lock is held.
221 */
222static ilg_t *
223conn_ilg_alloc(conn_t *connp, int *errp)
224{
225	ilg_t *ilg;
226
227	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
228
229	/*
230	 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not
231	 * create any ilgs.
232	 */
233	if (connp->conn_state_flags & CONN_CLOSING) {
234		*errp = EINVAL;
235		return (NULL);
236	}
237
238	ilg = kmem_zalloc(sizeof (ilg_t), KM_NOSLEEP);
239	if (ilg == NULL) {
240		*errp = ENOMEM;
241		return (NULL);
242	}
243
244	ilg->ilg_refcnt = 1;
245
246	/* Insert at head */
247	if (connp->conn_ilg != NULL)
248		connp->conn_ilg->ilg_ptpn = &ilg->ilg_next;
249	ilg->ilg_next = connp->conn_ilg;
250	ilg->ilg_ptpn = &connp->conn_ilg;
251	connp->conn_ilg = ilg;
252
253	ilg->ilg_connp = connp;
254	return (ilg);
255}
256
257typedef struct ilm_fbld_s {
258	ilm_t		*fbld_ilm;
259	int		fbld_in_cnt;
260	int		fbld_ex_cnt;
261	slist_t		fbld_in;
262	slist_t		fbld_ex;
263	boolean_t	fbld_in_overflow;
264} ilm_fbld_t;
265
266/*
267 * Caller must hold ill_mcast_lock
268 */
269static void
270ilm_bld_flists(conn_t *connp, void *arg)
271{
272	ilg_t *ilg;
273	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
274	ilm_t *ilm = fbld->fbld_ilm;
275	in6_addr_t *v6group = &ilm->ilm_v6addr;
276
277	if (connp->conn_ilg == NULL)
278		return;
279
280	/*
281	 * Since we can't break out of the ipcl_walk once started, we still
282	 * have to look at every conn.  But if we've already found one
283	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
284	 * ilgs--that will be our state.
285	 */
286	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
287		return;
288
289	/*
290	 * Check this conn's ilgs to see if any are interested in our
291	 * ilm (group, interface match).  If so, update the master
292	 * include and exclude lists we're building in the fbld struct
293	 * with this ilg's filter info.
294	 *
295	 * Note that the caller has already serialized on the ill we care
296	 * about.
297	 */
298	ASSERT(MUTEX_HELD(&ilm->ilm_ill->ill_mcast_serializer));
299
300	rw_enter(&connp->conn_ilg_lock, RW_READER);
301	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
302		if (ilg->ilg_condemned)
303			continue;
304
305		/*
306		 * Since we are under the ill_mcast_serializer we know
307		 * that any ilg+ilm operations on this ilm have either
308		 * not started or completed, except for the last ilg
309		 * (the one that caused us to be called) which doesn't
310		 * have ilg_ilm set yet. Hence we compare using ilg_ill
311		 * and the address.
312		 */
313		if ((ilg->ilg_ill == ilm->ilm_ill) &&
314		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
315			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
316				fbld->fbld_in_cnt++;
317				if (!fbld->fbld_in_overflow)
318					l_union_in_a(&fbld->fbld_in,
319					    ilg->ilg_filter,
320					    &fbld->fbld_in_overflow);
321			} else {
322				fbld->fbld_ex_cnt++;
323				/*
324				 * On the first exclude list, don't try to do
325				 * an intersection, as the master exclude list
326				 * is intentionally empty.  If the master list
327				 * is still empty on later iterations, that
328				 * means we have at least one ilg with an empty
329				 * exclude list, so that should be reflected
330				 * when we take the intersection.
331				 */
332				if (fbld->fbld_ex_cnt == 1) {
333					if (ilg->ilg_filter != NULL)
334						l_copy(ilg->ilg_filter,
335						    &fbld->fbld_ex);
336				} else {
337					l_intersection_in_a(&fbld->fbld_ex,
338					    ilg->ilg_filter);
339				}
340			}
341			/* there will only be one match, so break now. */
342			break;
343		}
344	}
345	rw_exit(&connp->conn_ilg_lock);
346}
347
348/*
349 * Caller must hold ill_mcast_lock
350 */
351static void
352ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
353{
354	ilm_fbld_t fbld;
355	ip_stack_t *ipst = ilm->ilm_ipst;
356
357	fbld.fbld_ilm = ilm;
358	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
359	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
360	fbld.fbld_in_overflow = B_FALSE;
361
362	/* first, construct our master include and exclude lists */
363	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
364
365	/* now use those master lists to generate the interface filter */
366
367	/* if include list overflowed, filter is (EXCLUDE, NULL) */
368	if (fbld.fbld_in_overflow) {
369		*fmode = MODE_IS_EXCLUDE;
370		flist->sl_numsrc = 0;
371		return;
372	}
373
374	/* if nobody interested, interface filter is (INCLUDE, NULL) */
375	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
376		*fmode = MODE_IS_INCLUDE;
377		flist->sl_numsrc = 0;
378		return;
379	}
380
381	/*
382	 * If there are no exclude lists, then the interface filter
383	 * is INCLUDE, with its filter list equal to fbld_in.  A single
384	 * exclude list makes the interface filter EXCLUDE, with its
385	 * filter list equal to (fbld_ex - fbld_in).
386	 */
387	if (fbld.fbld_ex_cnt == 0) {
388		*fmode = MODE_IS_INCLUDE;
389		l_copy(&fbld.fbld_in, flist);
390	} else {
391		*fmode = MODE_IS_EXCLUDE;
392		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
393	}
394}
395
396/*
397 * Caller must hold ill_mcast_lock
398 */
399static int
400ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist)
401{
402	mcast_record_t fmode;
403	slist_t *flist;
404	boolean_t fdefault;
405	char buf[INET6_ADDRSTRLEN];
406	ill_t *ill = ilm->ilm_ill;
407
408	/*
409	 * There are several cases where the ilm's filter state
410	 * defaults to (EXCLUDE, NULL):
411	 *	- we've had previous joins without associated ilgs
412	 *	- this join has no associated ilg
413	 *	- the ilg's filter state is (EXCLUDE, NULL)
414	 */
415	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
416	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
417
418	/* attempt mallocs (if needed) before doing anything else */
419	if ((flist = l_alloc()) == NULL)
420		return (ENOMEM);
421	if (!fdefault && ilm->ilm_filter == NULL) {
422		ilm->ilm_filter = l_alloc();
423		if (ilm->ilm_filter == NULL) {
424			l_free(flist);
425			return (ENOMEM);
426		}
427	}
428
429	if (ilgstat != ILGSTAT_CHANGE)
430		ilm->ilm_refcnt++;
431
432	if (ilgstat == ILGSTAT_NONE)
433		ilm->ilm_no_ilg_cnt++;
434
435	/*
436	 * Determine new filter state.  If it's not the default
437	 * (EXCLUDE, NULL), we must walk the conn list to find
438	 * any ilgs interested in this group, and re-build the
439	 * ilm filter.
440	 */
441	if (fdefault) {
442		fmode = MODE_IS_EXCLUDE;
443		flist->sl_numsrc = 0;
444	} else {
445		ilm_gen_filter(ilm, &fmode, flist);
446	}
447
448	/* make sure state actually changed; nothing to do if not. */
449	if ((ilm->ilm_fmode == fmode) &&
450	    !lists_are_different(ilm->ilm_filter, flist)) {
451		l_free(flist);
452		return (0);
453	}
454
455	/* send the state change report */
456	if (!IS_LOOPBACK(ill)) {
457		if (ill->ill_isv6)
458			mld_statechange(ilm, fmode, flist);
459		else
460			igmp_statechange(ilm, fmode, flist);
461	}
462
463	/* update the ilm state */
464	ilm->ilm_fmode = fmode;
465	if (flist->sl_numsrc > 0)
466		l_copy(flist, ilm->ilm_filter);
467	else
468		CLEAR_SLIST(ilm->ilm_filter);
469
470	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
471	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
472
473	l_free(flist);
474	return (0);
475}
476
477/*
478 * Caller must hold ill_mcast_lock
479 */
480static int
481ilm_update_del(ilm_t *ilm)
482{
483	mcast_record_t fmode;
484	slist_t *flist;
485	ill_t *ill = ilm->ilm_ill;
486
487	ip1dbg(("ilm_update_del: still %d left; updating state\n",
488	    ilm->ilm_refcnt));
489
490	if ((flist = l_alloc()) == NULL)
491		return (ENOMEM);
492
493	/*
494	 * If present, the ilg in question has already either been
495	 * updated or removed from our list; so all we need to do
496	 * now is walk the list to update the ilm filter state.
497	 *
498	 * Skip the list walk if we have any no-ilg joins, which
499	 * cause the filter state to revert to (EXCLUDE, NULL).
500	 */
501	if (ilm->ilm_no_ilg_cnt != 0) {
502		fmode = MODE_IS_EXCLUDE;
503		flist->sl_numsrc = 0;
504	} else {
505		ilm_gen_filter(ilm, &fmode, flist);
506	}
507
508	/* check to see if state needs to be updated */
509	if ((ilm->ilm_fmode == fmode) &&
510	    (!lists_are_different(ilm->ilm_filter, flist))) {
511		l_free(flist);
512		return (0);
513	}
514
515	if (!IS_LOOPBACK(ill)) {
516		if (ill->ill_isv6)
517			mld_statechange(ilm, fmode, flist);
518		else
519			igmp_statechange(ilm, fmode, flist);
520	}
521
522	ilm->ilm_fmode = fmode;
523	if (flist->sl_numsrc > 0) {
524		if (ilm->ilm_filter == NULL) {
525			ilm->ilm_filter = l_alloc();
526			if (ilm->ilm_filter == NULL) {
527				char buf[INET6_ADDRSTRLEN];
528				ip1dbg(("ilm_update_del: failed to alloc ilm "
529				    "filter; no source filtering for %s on %s",
530				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
531				    buf, sizeof (buf)), ill->ill_name));
532				ilm->ilm_fmode = MODE_IS_EXCLUDE;
533				l_free(flist);
534				return (0);
535			}
536		}
537		l_copy(flist, ilm->ilm_filter);
538	} else {
539		CLEAR_SLIST(ilm->ilm_filter);
540	}
541
542	l_free(flist);
543	return (0);
544}
545
546/*
547 * Create/update the ilm for the group/ill. Used by other parts of IP to
548 * do the ILGSTAT_NONE (no ilg), MODE_IS_EXCLUDE, with no slist join.
549 * Returns with a refhold on the ilm.
550 *
551 * The unspecified address means all multicast addresses for in both the
552 * case of IPv4 and IPv6.
553 *
554 * The caller should have already mapped an IPMP under ill to the upper.
555 */
556ilm_t *
557ip_addmulti(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
558    int *errorp)
559{
560	ilm_t *ilm;
561
562	/* Acquire serializer to keep assert in ilm_bld_flists happy */
563	mutex_enter(&ill->ill_mcast_serializer);
564	ilm = ip_addmulti_serial(v6group, ill, zoneid, ILGSTAT_NONE,
565	    MODE_IS_EXCLUDE, NULL, errorp);
566	mutex_exit(&ill->ill_mcast_serializer);
567	/*
568	 * Now that all locks have been dropped, we can send any
569	 * deferred/queued DLPI or IP packets
570	 */
571	ill_mcast_send_queued(ill);
572	ill_dlpi_send_queued(ill);
573	return (ilm);
574}
575
576/*
577 * Create/update the ilm for the group/ill. If ILGSTAT_CHANGE is not set
578 * then this returns with a refhold on the ilm.
579 *
580 * Internal routine which assumes the caller has already acquired
581 * ill_mcast_serializer. It is the caller's responsibility to send out
582 * queued DLPI/multicast packets after all locks are dropped.
583 *
584 * The unspecified address means all multicast addresses for in both the
585 * case of IPv4 and IPv6.
586 *
587 * ilgstat tells us if there's an ilg associated with this join,
588 * and if so, if it's a new ilg or a change to an existing one.
589 * ilg_fmode and ilg_flist give us the current filter state of
590 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
591 *
592 * The caller should have already mapped an IPMP under ill to the upper.
593 */
594static ilm_t *
595ip_addmulti_serial(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
596    ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
597    int *errorp)
598{
599	ilm_t *ilm;
600
601	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
602
603	if (ill->ill_isv6) {
604		if (!IN6_IS_ADDR_MULTICAST(v6group) &&
605		    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
606			*errorp = EINVAL;
607			return (NULL);
608		}
609	} else {
610		if (IN6_IS_ADDR_V4MAPPED(v6group)) {
611			ipaddr_t v4group;
612
613			IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
614			ASSERT(!IS_UNDER_IPMP(ill));
615			if (!CLASSD(v4group)) {
616				*errorp = EINVAL;
617				return (NULL);
618			}
619		} else if (!IN6_IS_ADDR_UNSPECIFIED(v6group)) {
620			*errorp = EINVAL;
621			return (NULL);
622		}
623	}
624
625	if (IS_UNDER_IPMP(ill)) {
626		*errorp = EINVAL;
627		return (NULL);
628	}
629
630	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
631	/*
632	 * We do the equivalent of a lookup by checking after we get the lock
633	 * This is needed since the ill could have been condemned after
634	 * we looked it up, and we need to check condemned after we hold
635	 * ill_mcast_lock to synchronize with the unplumb code.
636	 */
637	if (ill->ill_state_flags & ILL_CONDEMNED) {
638		rw_exit(&ill->ill_mcast_lock);
639		*errorp = ENXIO;
640		return (NULL);
641	}
642	ilm = ip_addmulti_impl(v6group, ill, zoneid, ilgstat, ilg_fmode,
643	    ilg_flist, errorp);
644	rw_exit(&ill->ill_mcast_lock);
645
646	ill_mcast_timer_start(ill->ill_ipst);
647	return (ilm);
648}
649
650static ilm_t *
651ip_addmulti_impl(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
652    ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
653    int *errorp)
654{
655	ilm_t	*ilm;
656	int	ret = 0;
657
658	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
659	*errorp = 0;
660
661	/*
662	 * An ilm is uniquely identified by the tuple of (group, ill) where
663	 * `group' is the multicast group address, and `ill' is the interface
664	 * on which it is currently joined.
665	 */
666
667	ilm = ilm_lookup(ill, v6group, zoneid);
668	if (ilm != NULL) {
669		/* ilm_update_add bumps ilm_refcnt unless ILGSTAT_CHANGE */
670		ret = ilm_update_add(ilm, ilgstat, ilg_flist);
671		if (ret == 0)
672			return (ilm);
673
674		*errorp = ret;
675		return (NULL);
676	}
677
678	/*
679	 * The callers checks on the ilg and the ilg+ilm consistency under
680	 * ill_mcast_serializer ensures that we can not have ILGSTAT_CHANGE
681	 * and no ilm.
682	 */
683	ASSERT(ilgstat != ILGSTAT_CHANGE);
684	ilm = ilm_add(ill, v6group, ilgstat, ilg_fmode, ilg_flist, zoneid);
685	if (ilm == NULL) {
686		*errorp = ENOMEM;
687		return (NULL);
688	}
689
690	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
691		/*
692		 * If we have more then one we should not tell the driver
693		 * to join this time.
694		 */
695		if (ilm_numentries(ill, v6group) == 1) {
696			ret = ill_join_allmulti(ill);
697		}
698	} else {
699		if (!IS_LOOPBACK(ill)) {
700			if (ill->ill_isv6)
701				mld_joingroup(ilm);
702			else
703				igmp_joingroup(ilm);
704		}
705
706		/*
707		 * If we have more then one we should not tell the driver
708		 * to join this time.
709		 */
710		if (ilm_numentries(ill, v6group) == 1) {
711			ret = ip_ll_multireq(ill, v6group, DL_ENABMULTI_REQ);
712		}
713	}
714	if (ret != 0) {
715		if (ret == ENETDOWN) {
716			char buf[INET6_ADDRSTRLEN];
717
718			ip0dbg(("ip_addmulti: ENETDOWN for %s on %s",
719			    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
720			    buf, sizeof (buf)), ill->ill_name));
721		}
722		ilm_delete(ilm);
723		*errorp = ret;
724		return (NULL);
725	} else {
726		return (ilm);
727	}
728}
729
730/*
731 * Looks up the list of multicast physical addresses this interface
732 * listens to. Add to the list if not present already.
733 */
734boolean_t
735ip_mphysaddr_add(ill_t *ill, uchar_t *hw_addr)
736{
737	multiphysaddr_t *mpa = NULL;
738	int	hw_addr_length = ill->ill_phys_addr_length;
739
740	mutex_enter(&ill->ill_lock);
741	for (mpa = ill->ill_mphysaddr_list; mpa != NULL; mpa = mpa->mpa_next) {
742		if (bcmp(hw_addr, &(mpa->mpa_addr[0]), hw_addr_length) == 0) {
743			mpa->mpa_refcnt++;
744			mutex_exit(&ill->ill_lock);
745			return (B_FALSE);
746		}
747	}
748
749	mpa = kmem_zalloc(sizeof (multiphysaddr_t), KM_NOSLEEP);
750	if (mpa == NULL) {
751		/*
752		 * We risk not having the multiphysadd structure. At this
753		 * point we can't fail. We can't afford to not send a
754		 * DL_ENABMULTI_REQ also. It is better than pre-allocating
755		 * the structure and having the code to track it also.
756		 */
757		ip0dbg(("ip_mphysaddr_add: ENOMEM. Some multicast apps"
758		    " may have issues. hw_addr: %p ill_name: %s\n",
759		    (void *)hw_addr, ill->ill_name));
760		mutex_exit(&ill->ill_lock);
761		return (B_TRUE);
762	}
763	bcopy(hw_addr, &(mpa->mpa_addr[0]), hw_addr_length);
764	mpa->mpa_refcnt = 1;
765	mpa->mpa_next = ill->ill_mphysaddr_list;
766	ill->ill_mphysaddr_list = mpa;
767	mutex_exit(&ill->ill_lock);
768	return (B_TRUE);
769}
770
771/*
772 * Look up hw_addr from the list of physical multicast addresses this interface
773 * listens to.
774 * Remove the entry if the refcnt is 0
775 */
776boolean_t
777ip_mphysaddr_del(ill_t *ill, uchar_t *hw_addr)
778{
779	multiphysaddr_t *mpap = NULL, **mpapp = NULL;
780	int hw_addr_length = ill->ill_phys_addr_length;
781	boolean_t ret = B_FALSE;
782
783	mutex_enter(&ill->ill_lock);
784	for (mpapp = &ill->ill_mphysaddr_list; (mpap = *mpapp) != NULL;
785	    mpapp = &(mpap->mpa_next)) {
786		if (bcmp(hw_addr, &(mpap->mpa_addr[0]), hw_addr_length) == 0)
787			break;
788	}
789	if (mpap == NULL) {
790		/*
791		 * Should be coming here only when there was a memory
792		 * exhaustion and we were not able to allocate
793		 * a multiphysaddr_t. We still send a DL_DISABMULTI_REQ down.
794		 */
795
796		ip0dbg(("ip_mphysaddr_del: No entry for this addr. Some "
797		    "multicast apps might have had issues. hw_addr: %p "
798		    " ill_name: %s\n", (void *)hw_addr, ill->ill_name));
799		ret = B_TRUE;
800	} else if (--mpap->mpa_refcnt == 0) {
801		*mpapp = mpap->mpa_next;
802		kmem_free(mpap, sizeof (multiphysaddr_t));
803		ret = B_TRUE;
804	}
805	mutex_exit(&ill->ill_lock);
806	return (ret);
807}
808
809/*
810 * Send a multicast request to the driver for enabling or disabling
811 * multicast reception for v6groupp address. The caller has already
812 * checked whether it is appropriate to send one or not.
813 *
814 * For IPMP we switch to the cast_ill since it has the right hardware
815 * information.
816 */
817static int
818ip_ll_send_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
819{
820	mblk_t	*mp;
821	uint32_t addrlen, addroff;
822	ill_t *release_ill = NULL;
823	uchar_t *cp;
824	int err = 0;
825
826	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
827
828	if (IS_IPMP(ill)) {
829		/* On the upper IPMP ill. */
830		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
831		if (release_ill == NULL) {
832			/*
833			 * Avoid sending it down to the ipmpstub.
834			 * We will be called again once the members of the
835			 * group are in place
836			 */
837			ip1dbg(("ip_ll_send_multireq: no cast_ill for %s %d\n",
838			    ill->ill_name, ill->ill_isv6));
839			return (0);
840		}
841		ill = release_ill;
842	}
843	/* Create a DL_ENABMULTI_REQ or DL_DISABMULTI_REQ message. */
844	mp = ill_create_dl(ill, prim, &addrlen, &addroff);
845	if (mp == NULL) {
846		err = ENOMEM;
847		goto done;
848	}
849
850	mp = ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp);
851	if (mp == NULL) {
852		ip0dbg(("null from ndp_mcastreq(ill %s)\n", ill->ill_name));
853		err = ENOMEM;
854		goto done;
855	}
856	cp = mp->b_rptr;
857
858	switch (((union DL_primitives *)cp)->dl_primitive) {
859	case DL_ENABMULTI_REQ:
860		cp += ((dl_enabmulti_req_t *)cp)->dl_addr_offset;
861		if (!ip_mphysaddr_add(ill, cp)) {
862			freemsg(mp);
863			err = 0;
864			goto done;
865		}
866		mutex_enter(&ill->ill_lock);
867		/* Track the state if this is the first enabmulti */
868		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
869			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
870		mutex_exit(&ill->ill_lock);
871		break;
872	case DL_DISABMULTI_REQ:
873		cp += ((dl_disabmulti_req_t *)cp)->dl_addr_offset;
874		if (!ip_mphysaddr_del(ill, cp)) {
875			freemsg(mp);
876			err = 0;
877			goto done;
878		}
879	}
880	ill_dlpi_queue(ill, mp);
881done:
882	if (release_ill != NULL)
883		ill_refrele(release_ill);
884	return (err);
885}
886
887/*
888 * Send a multicast request to the driver for enabling multicast
889 * membership for v6group if appropriate.
890 */
891static int
892ip_ll_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
893{
894	if (ill->ill_net_type != IRE_IF_RESOLVER ||
895	    ill->ill_ipif->ipif_flags & IPIF_POINTOPOINT) {
896		ip1dbg(("ip_ll_multireq: not resolver\n"));
897		return (0);	/* Must be IRE_IF_NORESOLVER */
898	}
899
900	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
901		ip1dbg(("ip_ll_multireq: MULTI_BCAST\n"));
902		return (0);
903	}
904	return (ip_ll_send_multireq(ill, v6groupp, prim));
905}
906
907/*
908 * Delete the ilm. Used by other parts of IP for the case of no_ilg/leaving
909 * being true.
910 */
911int
912ip_delmulti(ilm_t *ilm)
913{
914	ill_t *ill = ilm->ilm_ill;
915	int error;
916
917	/* Acquire serializer to keep assert in ilm_bld_flists happy */
918	mutex_enter(&ill->ill_mcast_serializer);
919	error = ip_delmulti_serial(ilm, B_TRUE, B_TRUE);
920	mutex_exit(&ill->ill_mcast_serializer);
921	/*
922	 * Now that all locks have been dropped, we can send any
923	 * deferred/queued DLPI or IP packets
924	 */
925	ill_mcast_send_queued(ill);
926	ill_dlpi_send_queued(ill);
927	return (error);
928}
929
930
931/*
932 * Delete the ilm.
933 * Assumes ill_mcast_serializer is held by the caller.
934 * Caller must send out queued dlpi/multicast packets after dropping
935 * all locks.
936 */
937static int
938ip_delmulti_serial(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
939{
940	ill_t *ill = ilm->ilm_ill;
941	int ret;
942
943	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
944	ASSERT(!(IS_UNDER_IPMP(ill)));
945
946	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
947	ret = ip_delmulti_impl(ilm, no_ilg, leaving);
948	rw_exit(&ill->ill_mcast_lock);
949	ill_mcast_timer_start(ill->ill_ipst);
950	return (ret);
951}
952
953static int
954ip_delmulti_impl(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
955{
956	ill_t *ill = ilm->ilm_ill;
957	int error;
958	in6_addr_t v6group;
959
960	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
961
962	/* Update counters */
963	if (no_ilg)
964		ilm->ilm_no_ilg_cnt--;
965
966	if (leaving)
967		ilm->ilm_refcnt--;
968
969	if (ilm->ilm_refcnt > 0)
970		return (ilm_update_del(ilm));
971
972	v6group = ilm->ilm_v6addr;
973
974	if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
975		ilm_delete(ilm);
976		/*
977		 * If we have some left then one we should not tell the driver
978		 * to leave.
979		 */
980		if (ilm_numentries(ill, &v6group) != 0)
981			return (0);
982
983		ill_leave_allmulti(ill);
984
985		return (0);
986	}
987
988	if (!IS_LOOPBACK(ill)) {
989		if (ill->ill_isv6)
990			mld_leavegroup(ilm);
991		else
992			igmp_leavegroup(ilm);
993	}
994
995	ilm_delete(ilm);
996	/*
997	 * If we have some left then one we should not tell the driver
998	 * to leave.
999	 */
1000	if (ilm_numentries(ill, &v6group) != 0)
1001		return (0);
1002
1003	error = ip_ll_multireq(ill, &v6group, DL_DISABMULTI_REQ);
1004	/* We ignore the case when ill_dl_up is not set */
1005	if (error == ENETDOWN) {
1006		char buf[INET6_ADDRSTRLEN];
1007
1008		ip0dbg(("ip_delmulti: ENETDOWN for %s on %s",
1009		    inet_ntop(AF_INET6, &v6group, buf, sizeof (buf)),
1010		    ill->ill_name));
1011	}
1012	return (error);
1013}
1014
1015/*
1016 * Make the driver pass up all multicast packets.
1017 */
1018int
1019ill_join_allmulti(ill_t *ill)
1020{
1021	mblk_t		*promiscon_mp, *promiscoff_mp = NULL;
1022	uint32_t	addrlen, addroff;
1023	ill_t		*release_ill = NULL;
1024
1025	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1026
1027	if (IS_LOOPBACK(ill))
1028		return (0);
1029
1030	if (!ill->ill_dl_up) {
1031		/*
1032		 * Nobody there. All multicast addresses will be re-joined
1033		 * when we get the DL_BIND_ACK bringing the interface up.
1034		 */
1035		return (ENETDOWN);
1036	}
1037
1038	if (IS_IPMP(ill)) {
1039		/* On the upper IPMP ill. */
1040		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
1041		if (release_ill == NULL) {
1042			/*
1043			 * Avoid sending it down to the ipmpstub.
1044			 * We will be called again once the members of the
1045			 * group are in place
1046			 */
1047			ip1dbg(("ill_join_allmulti: no cast_ill for %s %d\n",
1048			    ill->ill_name, ill->ill_isv6));
1049			return (0);
1050		}
1051		ill = release_ill;
1052		if (!ill->ill_dl_up) {
1053			ill_refrele(ill);
1054			return (ENETDOWN);
1055		}
1056	}
1057
1058	/*
1059	 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI
1060	 * provider.  We don't need to do this for certain media types for
1061	 * which we never need to turn promiscuous mode on.  While we're here,
1062	 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that
1063	 * ill_leave_allmulti() will not fail due to low memory conditions.
1064	 */
1065	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1066	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1067		promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1068		    &addrlen, &addroff);
1069		if (ill->ill_promiscoff_mp == NULL)
1070			promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1071			    &addrlen, &addroff);
1072		if (promiscon_mp == NULL ||
1073		    (ill->ill_promiscoff_mp == NULL && promiscoff_mp == NULL)) {
1074			freemsg(promiscon_mp);
1075			freemsg(promiscoff_mp);
1076			if (release_ill != NULL)
1077				ill_refrele(release_ill);
1078			return (ENOMEM);
1079		}
1080		if (ill->ill_promiscoff_mp == NULL)
1081			ill->ill_promiscoff_mp = promiscoff_mp;
1082		ill_dlpi_queue(ill, promiscon_mp);
1083	}
1084	if (release_ill != NULL)
1085		ill_refrele(release_ill);
1086	return (0);
1087}
1088
1089/*
1090 * Make the driver stop passing up all multicast packets
1091 */
1092void
1093ill_leave_allmulti(ill_t *ill)
1094{
1095	mblk_t	*promiscoff_mp;
1096	ill_t	*release_ill = NULL;
1097
1098	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1099
1100	if (IS_LOOPBACK(ill))
1101		return;
1102
1103	if (!ill->ill_dl_up) {
1104		/*
1105		 * Nobody there. All multicast addresses will be re-joined
1106		 * when we get the DL_BIND_ACK bringing the interface up.
1107		 */
1108		return;
1109	}
1110
1111	if (IS_IPMP(ill)) {
1112		/* On the upper IPMP ill. */
1113		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
1114		if (release_ill == NULL) {
1115			/*
1116			 * Avoid sending it down to the ipmpstub.
1117			 * We will be called again once the members of the
1118			 * group are in place
1119			 */
1120			ip1dbg(("ill_leave_allmulti: no cast_ill on %s %d\n",
1121			    ill->ill_name, ill->ill_isv6));
1122			return;
1123		}
1124		ill = release_ill;
1125		if (!ill->ill_dl_up)
1126			goto done;
1127	}
1128
1129	/*
1130	 * In the case of IPMP and ill_dl_up not being set when we joined
1131	 * we didn't allocate a promiscoff_mp. In that case we have
1132	 * nothing to do when we leave.
1133	 * Ditto for PHYI_MULTI_BCAST
1134	 */
1135	promiscoff_mp = ill->ill_promiscoff_mp;
1136	if (promiscoff_mp != NULL) {
1137		ill->ill_promiscoff_mp = NULL;
1138		ill_dlpi_queue(ill, promiscoff_mp);
1139	}
1140done:
1141	if (release_ill != NULL)
1142		ill_refrele(release_ill);
1143}
1144
1145int
1146ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1147{
1148	ill_t		*ill;
1149	int		ret;
1150	ilm_t		*ilm;
1151
1152	ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1153	if (ill == NULL)
1154		return (ENODEV);
1155
1156	/*
1157	 * The ip_addmulti() function doesn't allow IPMP underlying interfaces
1158	 * to join allmulti since only the nominated underlying interface in
1159	 * the group should receive multicast.  We silently succeed to avoid
1160	 * having to teach IPobs (currently the only caller of this routine)
1161	 * to ignore failures in this case.
1162	 */
1163	if (IS_UNDER_IPMP(ill)) {
1164		ill_refrele(ill);
1165		return (0);
1166	}
1167	mutex_enter(&ill->ill_lock);
1168	if (ill->ill_ipallmulti_cnt > 0) {
1169		/* Already joined */
1170		ASSERT(ill->ill_ipallmulti_ilm != NULL);
1171		ill->ill_ipallmulti_cnt++;
1172		mutex_exit(&ill->ill_lock);
1173		goto done;
1174	}
1175	mutex_exit(&ill->ill_lock);
1176
1177	ilm = ip_addmulti(&ipv6_all_zeros, ill, ill->ill_zoneid, &ret);
1178	if (ilm == NULL) {
1179		ASSERT(ret != 0);
1180		ill_refrele(ill);
1181		return (ret);
1182	}
1183
1184	mutex_enter(&ill->ill_lock);
1185	if (ill->ill_ipallmulti_cnt > 0) {
1186		/* Another thread added it concurrently */
1187		(void) ip_delmulti(ilm);
1188		mutex_exit(&ill->ill_lock);
1189		goto done;
1190	}
1191	ASSERT(ill->ill_ipallmulti_ilm == NULL);
1192	ill->ill_ipallmulti_ilm = ilm;
1193	ill->ill_ipallmulti_cnt++;
1194	mutex_exit(&ill->ill_lock);
1195done:
1196	ill_refrele(ill);
1197	return (0);
1198}
1199
1200int
1201ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1202{
1203	ill_t		*ill;
1204	ilm_t		*ilm;
1205
1206	ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1207	if (ill == NULL)
1208		return (ENODEV);
1209
1210	if (IS_UNDER_IPMP(ill)) {
1211		ill_refrele(ill);
1212		return (0);
1213	}
1214
1215	mutex_enter(&ill->ill_lock);
1216	if (ill->ill_ipallmulti_cnt == 0) {
1217		/* ip_purge_allmulti could have removed them all */
1218		mutex_exit(&ill->ill_lock);
1219		goto done;
1220	}
1221	ill->ill_ipallmulti_cnt--;
1222	if (ill->ill_ipallmulti_cnt == 0) {
1223		/* Last one */
1224		ilm = ill->ill_ipallmulti_ilm;
1225		ill->ill_ipallmulti_ilm = NULL;
1226	} else {
1227		ilm = NULL;
1228	}
1229	mutex_exit(&ill->ill_lock);
1230	if (ilm != NULL)
1231		(void) ip_delmulti(ilm);
1232
1233done:
1234	ill_refrele(ill);
1235	return (0);
1236}
1237
1238/*
1239 * Delete the allmulti memberships that were added as part of
1240 * ip_join_allmulti().
1241 */
1242void
1243ip_purge_allmulti(ill_t *ill)
1244{
1245	ilm_t	*ilm;
1246
1247	ASSERT(IAM_WRITER_ILL(ill));
1248
1249	mutex_enter(&ill->ill_lock);
1250	ilm = ill->ill_ipallmulti_ilm;
1251	ill->ill_ipallmulti_ilm = NULL;
1252	ill->ill_ipallmulti_cnt = 0;
1253	mutex_exit(&ill->ill_lock);
1254
1255	if (ilm != NULL)
1256		(void) ip_delmulti(ilm);
1257}
1258
1259/*
1260 * Create a dlpi message with room for phys+sap. Later
1261 * we will strip the sap for those primitives which
1262 * only need a physical address.
1263 */
1264static mblk_t *
1265ill_create_dl(ill_t *ill, uint32_t dl_primitive,
1266    uint32_t *addr_lenp, uint32_t *addr_offp)
1267{
1268	mblk_t	*mp;
1269	uint32_t	hw_addr_length;
1270	char		*cp;
1271	uint32_t	offset;
1272	uint32_t	length;
1273	uint32_t 	size;
1274
1275	*addr_lenp = *addr_offp = 0;
1276
1277	hw_addr_length = ill->ill_phys_addr_length;
1278	if (!hw_addr_length) {
1279		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1280		return (NULL);
1281	}
1282
1283	switch (dl_primitive) {
1284	case DL_ENABMULTI_REQ:
1285		length = sizeof (dl_enabmulti_req_t);
1286		size = length + hw_addr_length;
1287		break;
1288	case DL_DISABMULTI_REQ:
1289		length = sizeof (dl_disabmulti_req_t);
1290		size = length + hw_addr_length;
1291		break;
1292	case DL_PROMISCON_REQ:
1293	case DL_PROMISCOFF_REQ:
1294		size = length = sizeof (dl_promiscon_req_t);
1295		break;
1296	default:
1297		return (NULL);
1298	}
1299	mp = allocb(size, BPRI_HI);
1300	if (!mp)
1301		return (NULL);
1302	mp->b_wptr += size;
1303	mp->b_datap->db_type = M_PROTO;
1304
1305	cp = (char *)mp->b_rptr;
1306	offset = length;
1307
1308	switch (dl_primitive) {
1309	case DL_ENABMULTI_REQ: {
1310		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1311
1312		dl->dl_primitive = dl_primitive;
1313		dl->dl_addr_offset = offset;
1314		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1315		*addr_offp = offset;
1316		break;
1317	}
1318	case DL_DISABMULTI_REQ: {
1319		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1320
1321		dl->dl_primitive = dl_primitive;
1322		dl->dl_addr_offset = offset;
1323		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1324		*addr_offp = offset;
1325		break;
1326	}
1327	case DL_PROMISCON_REQ:
1328	case DL_PROMISCOFF_REQ: {
1329		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1330
1331		dl->dl_primitive = dl_primitive;
1332		dl->dl_level = DL_PROMISC_MULTI;
1333		break;
1334	}
1335	}
1336	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1337	    *addr_lenp, *addr_offp));
1338	return (mp);
1339}
1340
1341/*
1342 * Rejoin any groups for which we have ilms.
1343 *
1344 * This is only needed for IPMP when the cast_ill changes since that
1345 * change is invisible to the ilm. Other interface changes are handled
1346 * by conn_update_ill.
1347 */
1348void
1349ill_recover_multicast(ill_t *ill)
1350{
1351	ilm_t	*ilm;
1352	char    addrbuf[INET6_ADDRSTRLEN];
1353
1354	ill->ill_need_recover_multicast = 0;
1355
1356	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1357	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1358		/*
1359		 * If we have more then one ilm for the group (e.g., with
1360		 * different zoneid) then we should not tell the driver
1361		 * to join unless this is the first ilm for the group.
1362		 */
1363		if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1364		    ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1365			continue;
1366		}
1367
1368		ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6,
1369		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1370
1371		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1372			(void) ill_join_allmulti(ill);
1373		} else {
1374			if (ill->ill_isv6)
1375				mld_joingroup(ilm);
1376			else
1377				igmp_joingroup(ilm);
1378
1379			(void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1380			    DL_ENABMULTI_REQ);
1381		}
1382	}
1383	rw_exit(&ill->ill_mcast_lock);
1384	/* Send any deferred/queued DLPI or IP packets */
1385	ill_mcast_send_queued(ill);
1386	ill_dlpi_send_queued(ill);
1387	ill_mcast_timer_start(ill->ill_ipst);
1388}
1389
1390/*
1391 * The opposite of ill_recover_multicast() -- leaves all multicast groups
1392 * that were explicitly joined.
1393 *
1394 * This is only needed for IPMP when the cast_ill changes since that
1395 * change is invisible to the ilm. Other interface changes are handled
1396 * by conn_update_ill.
1397 */
1398void
1399ill_leave_multicast(ill_t *ill)
1400{
1401	ilm_t	*ilm;
1402	char    addrbuf[INET6_ADDRSTRLEN];
1403
1404	ill->ill_need_recover_multicast = 1;
1405
1406	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1407	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1408		/*
1409		 * If we have more then one ilm for the group (e.g., with
1410		 * different zoneid) then we should not tell the driver
1411		 * to leave unless this is the first ilm for the group.
1412		 */
1413		if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1414		    ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1415			continue;
1416		}
1417
1418		ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6,
1419		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1420
1421		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1422			ill_leave_allmulti(ill);
1423		} else {
1424			if (ill->ill_isv6)
1425				mld_leavegroup(ilm);
1426			else
1427				igmp_leavegroup(ilm);
1428
1429			(void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1430			    DL_DISABMULTI_REQ);
1431		}
1432	}
1433	rw_exit(&ill->ill_mcast_lock);
1434	/* Send any deferred/queued DLPI or IP packets */
1435	ill_mcast_send_queued(ill);
1436	ill_dlpi_send_queued(ill);
1437	ill_mcast_timer_start(ill->ill_ipst);
1438}
1439
1440/*
1441 * Interface used by IP input/output.
1442 * Returns true if there is a member on the ill for any zoneid.
1443 */
1444boolean_t
1445ill_hasmembers_v6(ill_t *ill, const in6_addr_t *v6group)
1446{
1447	ilm_t		*ilm;
1448
1449	rw_enter(&ill->ill_mcast_lock, RW_READER);
1450	ilm = ilm_lookup(ill, v6group, ALL_ZONES);
1451	rw_exit(&ill->ill_mcast_lock);
1452	return (ilm != NULL);
1453}
1454
1455/*
1456 * Interface used by IP input/output.
1457 * Returns true if there is a member on the ill for any zoneid.
1458 *
1459 * The group and source can't be INADDR_ANY here so no need to translate to
1460 * the unspecified IPv6 address.
1461 */
1462boolean_t
1463ill_hasmembers_v4(ill_t *ill, ipaddr_t group)
1464{
1465	in6_addr_t	v6group;
1466
1467	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1468	return (ill_hasmembers_v6(ill, &v6group));
1469}
1470
1471/*
1472 * Interface used by IP input/output.
1473 * Returns true if there is a member on the ill for any zoneid except skipzone.
1474 */
1475boolean_t
1476ill_hasmembers_otherzones_v6(ill_t *ill, const in6_addr_t *v6group,
1477    zoneid_t skipzone)
1478{
1479	ilm_t		*ilm;
1480
1481	rw_enter(&ill->ill_mcast_lock, RW_READER);
1482	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1483		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1484		    ilm->ilm_zoneid != skipzone) {
1485			rw_exit(&ill->ill_mcast_lock);
1486			return (B_TRUE);
1487		}
1488	}
1489	rw_exit(&ill->ill_mcast_lock);
1490	return (B_FALSE);
1491}
1492
1493/*
1494 * Interface used by IP input/output.
1495 * Returns true if there is a member on the ill for any zoneid except skipzone.
1496 *
1497 * The group and source can't be INADDR_ANY here so no need to translate to
1498 * the unspecified IPv6 address.
1499 */
1500boolean_t
1501ill_hasmembers_otherzones_v4(ill_t *ill, ipaddr_t group, zoneid_t skipzone)
1502{
1503	in6_addr_t	v6group;
1504
1505	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1506	return (ill_hasmembers_otherzones_v6(ill, &v6group, skipzone));
1507}
1508
1509/*
1510 * Interface used by IP input.
1511 * Returns the next numerically larger zoneid that has a member. If none exist
1512 * then returns -1 (ALL_ZONES).
1513 * The normal usage is for the caller to start with a -1 zoneid (ALL_ZONES)
1514 * to find the first zoneid which has a member, and then pass that in for
1515 * subsequent calls until ALL_ZONES is returned.
1516 *
1517 * The implementation of ill_hasmembers_nextzone() assumes the ilms
1518 * are sorted by zoneid for efficiency.
1519 */
1520zoneid_t
1521ill_hasmembers_nextzone_v6(ill_t *ill, const in6_addr_t *v6group,
1522    zoneid_t zoneid)
1523{
1524	ilm_t		*ilm;
1525
1526	rw_enter(&ill->ill_mcast_lock, RW_READER);
1527	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1528		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1529		    ilm->ilm_zoneid > zoneid) {
1530			zoneid = ilm->ilm_zoneid;
1531			rw_exit(&ill->ill_mcast_lock);
1532			return (zoneid);
1533		}
1534	}
1535	rw_exit(&ill->ill_mcast_lock);
1536	return (ALL_ZONES);
1537}
1538
1539/*
1540 * Interface used by IP input.
1541 * Returns the next numerically larger zoneid that has a member. If none exist
1542 * then returns -1 (ALL_ZONES).
1543 *
1544 * The group and source can't be INADDR_ANY here so no need to translate to
1545 * the unspecified IPv6 address.
1546 */
1547zoneid_t
1548ill_hasmembers_nextzone_v4(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1549{
1550	in6_addr_t	v6group;
1551
1552	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1553
1554	return (ill_hasmembers_nextzone_v6(ill, &v6group, zoneid));
1555}
1556
1557/*
1558 * Find an ilm matching the ill, group, and zoneid.
1559 */
1560static ilm_t *
1561ilm_lookup(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1562{
1563	ilm_t	*ilm;
1564
1565	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1566
1567	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1568		if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group))
1569			continue;
1570		if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid)
1571			continue;
1572
1573		ASSERT(ilm->ilm_ill == ill);
1574		return (ilm);
1575	}
1576	return (NULL);
1577}
1578
1579/*
1580 * How many members on this ill?
1581 * Since each shared-IP zone has a separate ilm for the same group/ill
1582 * we can have several.
1583 */
1584static int
1585ilm_numentries(ill_t *ill, const in6_addr_t *v6group)
1586{
1587	ilm_t	*ilm;
1588	int i = 0;
1589
1590	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1591	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1592		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1593			i++;
1594		}
1595	}
1596	return (i);
1597}
1598
1599/* Caller guarantees that the group is not already on the list */
1600static ilm_t *
1601ilm_add(ill_t *ill, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1602    mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid)
1603{
1604	ilm_t	*ilm;
1605	ilm_t	*ilm_cur;
1606	ilm_t	**ilm_ptpn;
1607
1608	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1609	ilm = GETSTRUCT(ilm_t, 1);
1610	if (ilm == NULL)
1611		return (NULL);
1612	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1613		ilm->ilm_filter = l_alloc();
1614		if (ilm->ilm_filter == NULL) {
1615			mi_free(ilm);
1616			return (NULL);
1617		}
1618	}
1619	ilm->ilm_v6addr = *v6group;
1620	ilm->ilm_refcnt = 1;
1621	ilm->ilm_zoneid = zoneid;
1622	ilm->ilm_timer = INFINITY;
1623	ilm->ilm_rtx.rtx_timer = INFINITY;
1624
1625	ilm->ilm_ill = ill;
1626	DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1627	    (char *), "ilm", (void *), ilm);
1628	ill->ill_ilm_cnt++;
1629
1630	ASSERT(ill->ill_ipst);
1631	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1632
1633	/* The ill/ipif could have just been marked as condemned */
1634
1635	/*
1636	 * To make ill_hasmembers_nextzone_v6 work we keep the list
1637	 * sorted by zoneid.
1638	 */
1639	ilm_cur = ill->ill_ilm;
1640	ilm_ptpn = &ill->ill_ilm;
1641	while (ilm_cur != NULL && ilm_cur->ilm_zoneid < ilm->ilm_zoneid) {
1642		ilm_ptpn = &ilm_cur->ilm_next;
1643		ilm_cur = ilm_cur->ilm_next;
1644	}
1645	ilm->ilm_next = ilm_cur;
1646	*ilm_ptpn = ilm;
1647
1648	/*
1649	 * If we have an associated ilg, use its filter state; if not,
1650	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1651	 */
1652	if (ilgstat != ILGSTAT_NONE) {
1653		if (!SLIST_IS_EMPTY(ilg_flist))
1654			l_copy(ilg_flist, ilm->ilm_filter);
1655		ilm->ilm_fmode = ilg_fmode;
1656	} else {
1657		ilm->ilm_no_ilg_cnt = 1;
1658		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1659	}
1660
1661	return (ilm);
1662}
1663
1664void
1665ilm_inactive(ilm_t *ilm)
1666{
1667	FREE_SLIST(ilm->ilm_filter);
1668	FREE_SLIST(ilm->ilm_pendsrcs);
1669	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1670	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1671	ilm->ilm_ipst = NULL;
1672	mi_free((char *)ilm);
1673}
1674
1675/*
1676 * Unlink ilm and free it.
1677 */
1678static void
1679ilm_delete(ilm_t *ilm)
1680{
1681	ill_t		*ill = ilm->ilm_ill;
1682	ilm_t		**ilmp;
1683	boolean_t	need_wakeup;
1684
1685	/*
1686	 * Delete under lock protection so that readers don't stumble
1687	 * on bad ilm_next
1688	 */
1689	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1690
1691	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1692		;
1693
1694	*ilmp = ilm->ilm_next;
1695
1696	mutex_enter(&ill->ill_lock);
1697	/*
1698	 * if we are the last reference to the ill, we may need to wakeup any
1699	 * pending FREE or unplumb operations. This is because conn_update_ill
1700	 * bails if there is a ilg_delete_all in progress.
1701	 */
1702	need_wakeup = B_FALSE;
1703	DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1704	    (char *), "ilm", (void *), ilm);
1705	ASSERT(ill->ill_ilm_cnt > 0);
1706	ill->ill_ilm_cnt--;
1707	if (ILL_FREE_OK(ill))
1708		need_wakeup = B_TRUE;
1709
1710	ilm_inactive(ilm); /* frees this ilm */
1711
1712	if (need_wakeup) {
1713		/* drops ill lock */
1714		ipif_ill_refrele_tail(ill);
1715	} else {
1716		mutex_exit(&ill->ill_lock);
1717	}
1718}
1719
1720/*
1721 * Lookup an ill based on the group, ifindex, ifaddr, and zoneid.
1722 * Applies to both IPv4 and IPv6, although ifaddr is only used with
1723 * IPv4.
1724 * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1725 * On error it sets *errorp.
1726 */
1727static ill_t *
1728ill_mcast_lookup(const in6_addr_t *group, ipaddr_t ifaddr, uint_t ifindex,
1729    zoneid_t zoneid, ip_stack_t *ipst, int *errorp)
1730{
1731	ill_t *ill;
1732	ipaddr_t v4group;
1733
1734	if (IN6_IS_ADDR_V4MAPPED(group)) {
1735		IN6_V4MAPPED_TO_IPADDR(group, v4group);
1736
1737		if (ifindex != 0) {
1738			ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1739			    B_FALSE, ipst);
1740		} else if (ifaddr != INADDR_ANY) {
1741			ipif_t *ipif;
1742
1743			ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, ipst);
1744			if (ipif == NULL) {
1745				ill = NULL;
1746			} else {
1747				ill = ipif->ipif_ill;
1748				ill_refhold(ill);
1749				ipif_refrele(ipif);
1750			}
1751		} else {
1752			ill = ill_lookup_group_v4(v4group, zoneid, ipst, NULL,
1753			    NULL);
1754		}
1755	} else {
1756		if (ifindex != 0) {
1757			ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1758			    B_TRUE, ipst);
1759		} else {
1760			ill = ill_lookup_group_v6(group, zoneid, ipst, NULL,
1761			    NULL);
1762		}
1763	}
1764	if (ill == NULL) {
1765		if (ifindex != 0)
1766			*errorp = ENXIO;
1767		else
1768			*errorp = EADDRNOTAVAIL;
1769		return (NULL);
1770	}
1771	/* operation not supported on the virtual network interface */
1772	if (IS_UNDER_IPMP(ill) || IS_VNI(ill)) {
1773		ill_refrele(ill);
1774		*errorp = EINVAL;
1775		return (NULL);
1776	}
1777	return (ill);
1778}
1779
1780/*
1781 * Looks up the appropriate ill given an interface index (or interface address)
1782 * and multicast group.  On success, returns 0, with *illpp pointing to the
1783 * found struct.  On failure, returns an errno and *illpp is set to NULL.
1784 *
1785 * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1786 *
1787 * Handles both IPv4 and IPv6. The ifaddr argument only applies in the
1788 * case of IPv4.
1789 */
1790int
1791ip_opt_check(conn_t *connp, const in6_addr_t *v6group,
1792    const in6_addr_t *v6src, ipaddr_t ifaddr, uint_t ifindex, ill_t **illpp)
1793{
1794	boolean_t src_unspec;
1795	ill_t *ill = NULL;
1796	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1797	int error = 0;
1798
1799	*illpp = NULL;
1800
1801	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1802
1803	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1804		ipaddr_t v4group;
1805		ipaddr_t v4src;
1806
1807		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1808			return (EINVAL);
1809		IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
1810		if (src_unspec) {
1811			v4src = INADDR_ANY;
1812		} else {
1813			IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1814		}
1815		if (!CLASSD(v4group) || CLASSD(v4src))
1816			return (EINVAL);
1817	} else {
1818		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1819			return (EINVAL);
1820		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1821		    IN6_IS_ADDR_MULTICAST(v6src)) {
1822			return (EINVAL);
1823		}
1824	}
1825
1826	ill = ill_mcast_lookup(v6group, ifaddr, ifindex, IPCL_ZONEID(connp),
1827	    ipst, &error);
1828	*illpp = ill;
1829	return (error);
1830}
1831
1832static int
1833ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1834    struct ip_msfilter *imsf, const struct in6_addr *group, boolean_t issin6)
1835{
1836	ilg_t *ilg;
1837	int i, numsrc, fmode, outsrcs;
1838	struct sockaddr_in *sin;
1839	struct sockaddr_in6 *sin6;
1840	struct in_addr *addrp;
1841	slist_t *fp;
1842	boolean_t is_v4only_api;
1843	ipaddr_t ifaddr;
1844	uint_t ifindex;
1845
1846	if (gf == NULL) {
1847		ASSERT(imsf != NULL);
1848		ASSERT(!issin6);
1849		is_v4only_api = B_TRUE;
1850		outsrcs = imsf->imsf_numsrc;
1851		ifaddr = imsf->imsf_interface.s_addr;
1852		ifindex = 0;
1853	} else {
1854		ASSERT(imsf == NULL);
1855		is_v4only_api = B_FALSE;
1856		outsrcs = gf->gf_numsrc;
1857		ifaddr = INADDR_ANY;
1858		ifindex = gf->gf_interface;
1859	}
1860
1861	/* No need to use ill_mcast_serializer for the reader */
1862	rw_enter(&connp->conn_ilg_lock, RW_READER);
1863	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1864	if (ilg == NULL) {
1865		rw_exit(&connp->conn_ilg_lock);
1866		return (EADDRNOTAVAIL);
1867	}
1868
1869	/*
1870	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1871	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1872	 * So we need to translate here.
1873	 */
1874	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
1875	    MCAST_INCLUDE : MCAST_EXCLUDE;
1876	if ((fp = ilg->ilg_filter) == NULL) {
1877		numsrc = 0;
1878	} else {
1879		for (i = 0; i < outsrcs; i++) {
1880			if (i == fp->sl_numsrc)
1881				break;
1882			if (issin6) {
1883				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
1884				sin6->sin6_family = AF_INET6;
1885				sin6->sin6_addr = fp->sl_addr[i];
1886			} else {
1887				if (is_v4only_api) {
1888					addrp = &imsf->imsf_slist[i];
1889				} else {
1890					sin = (struct sockaddr_in *)
1891					    &gf->gf_slist[i];
1892					sin->sin_family = AF_INET;
1893					addrp = &sin->sin_addr;
1894				}
1895				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
1896			}
1897		}
1898		numsrc = fp->sl_numsrc;
1899	}
1900
1901	if (is_v4only_api) {
1902		imsf->imsf_numsrc = numsrc;
1903		imsf->imsf_fmode = fmode;
1904	} else {
1905		gf->gf_numsrc = numsrc;
1906		gf->gf_fmode = fmode;
1907	}
1908
1909	rw_exit(&connp->conn_ilg_lock);
1910
1911	return (0);
1912}
1913
1914/*
1915 * Common for IPv4 and IPv6.
1916 */
1917static int
1918ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
1919    struct ip_msfilter *imsf, const struct in6_addr *group, ill_t *ill,
1920    boolean_t issin6)
1921{
1922	ilg_t *ilg;
1923	int i, err, infmode, new_fmode;
1924	uint_t insrcs;
1925	struct sockaddr_in *sin;
1926	struct sockaddr_in6 *sin6;
1927	struct in_addr *addrp;
1928	slist_t *orig_filter = NULL;
1929	slist_t *new_filter = NULL;
1930	mcast_record_t orig_fmode;
1931	boolean_t leave_group, is_v4only_api;
1932	ilg_stat_t ilgstat;
1933	ilm_t *ilm;
1934	ipaddr_t ifaddr;
1935	uint_t ifindex;
1936
1937	if (gf == NULL) {
1938		ASSERT(imsf != NULL);
1939		ASSERT(!issin6);
1940		is_v4only_api = B_TRUE;
1941		insrcs = imsf->imsf_numsrc;
1942		infmode = imsf->imsf_fmode;
1943		ifaddr = imsf->imsf_interface.s_addr;
1944		ifindex = 0;
1945	} else {
1946		ASSERT(imsf == NULL);
1947		is_v4only_api = B_FALSE;
1948		insrcs = gf->gf_numsrc;
1949		infmode = gf->gf_fmode;
1950		ifaddr = INADDR_ANY;
1951		ifindex = gf->gf_interface;
1952	}
1953
1954	/* Make sure we can handle the source list */
1955	if (insrcs > MAX_FILTER_SIZE)
1956		return (ENOBUFS);
1957
1958	/*
1959	 * setting the filter to (INCLUDE, NULL) is treated
1960	 * as a request to leave the group.
1961	 */
1962	leave_group = (infmode == MCAST_INCLUDE && insrcs == 0);
1963
1964	mutex_enter(&ill->ill_mcast_serializer);
1965	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1966	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1967	if (ilg == NULL) {
1968		/*
1969		 * if the request was actually to leave, and we
1970		 * didn't find an ilg, there's nothing to do.
1971		 */
1972		if (leave_group) {
1973			rw_exit(&connp->conn_ilg_lock);
1974			mutex_exit(&ill->ill_mcast_serializer);
1975			return (0);
1976		}
1977		ilg = conn_ilg_alloc(connp, &err);
1978		if (ilg == NULL) {
1979			rw_exit(&connp->conn_ilg_lock);
1980			mutex_exit(&ill->ill_mcast_serializer);
1981			return (err);
1982		}
1983		ilgstat = ILGSTAT_NEW;
1984		ilg->ilg_v6group = *group;
1985		ilg->ilg_ill = ill;
1986		ilg->ilg_ifaddr = ifaddr;
1987		ilg->ilg_ifindex = ifindex;
1988	} else if (leave_group) {
1989		/*
1990		 * Make sure we have the correct serializer. The ill argument
1991		 * might not match ilg_ill.
1992		 */
1993		ilg_refhold(ilg);
1994		mutex_exit(&ill->ill_mcast_serializer);
1995		ill = ilg->ilg_ill;
1996		rw_exit(&connp->conn_ilg_lock);
1997
1998		mutex_enter(&ill->ill_mcast_serializer);
1999		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2000		ilm = ilg->ilg_ilm;
2001		ilg->ilg_ilm = NULL;
2002		ilg_delete(connp, ilg, NULL);
2003		ilg_refrele(ilg);
2004		rw_exit(&connp->conn_ilg_lock);
2005		if (ilm != NULL)
2006			(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2007		mutex_exit(&ill->ill_mcast_serializer);
2008		/*
2009		 * Now that all locks have been dropped, we can send any
2010		 * deferred/queued DLPI or IP packets
2011		 */
2012		ill_mcast_send_queued(ill);
2013		ill_dlpi_send_queued(ill);
2014		return (0);
2015	} else {
2016		ilgstat = ILGSTAT_CHANGE;
2017		/* Preserve existing state in case ip_addmulti() fails */
2018		orig_fmode = ilg->ilg_fmode;
2019		if (ilg->ilg_filter == NULL) {
2020			orig_filter = NULL;
2021		} else {
2022			orig_filter = l_alloc_copy(ilg->ilg_filter);
2023			if (orig_filter == NULL) {
2024				rw_exit(&connp->conn_ilg_lock);
2025				mutex_exit(&ill->ill_mcast_serializer);
2026				return (ENOMEM);
2027			}
2028		}
2029	}
2030
2031	/*
2032	 * Alloc buffer to copy new state into (see below) before
2033	 * we make any changes, so we can bail if it fails.
2034	 */
2035	if ((new_filter = l_alloc()) == NULL) {
2036		rw_exit(&connp->conn_ilg_lock);
2037		err = ENOMEM;
2038		goto free_and_exit;
2039	}
2040
2041	if (insrcs == 0) {
2042		CLEAR_SLIST(ilg->ilg_filter);
2043	} else {
2044		slist_t *fp;
2045		if (ilg->ilg_filter == NULL) {
2046			fp = l_alloc();
2047			if (fp == NULL) {
2048				if (ilgstat == ILGSTAT_NEW)
2049					ilg_delete(connp, ilg, NULL);
2050				rw_exit(&connp->conn_ilg_lock);
2051				err = ENOMEM;
2052				goto free_and_exit;
2053			}
2054		} else {
2055			fp = ilg->ilg_filter;
2056		}
2057		for (i = 0; i < insrcs; i++) {
2058			if (issin6) {
2059				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2060				fp->sl_addr[i] = sin6->sin6_addr;
2061			} else {
2062				if (is_v4only_api) {
2063					addrp = &imsf->imsf_slist[i];
2064				} else {
2065					sin = (struct sockaddr_in *)
2066					    &gf->gf_slist[i];
2067					addrp = &sin->sin_addr;
2068				}
2069				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2070			}
2071		}
2072		fp->sl_numsrc = insrcs;
2073		ilg->ilg_filter = fp;
2074	}
2075	/*
2076	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2077	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2078	 * So we need to translate here.
2079	 */
2080	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2081	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2082
2083	/*
2084	 * Save copy of ilg's filter state to pass to other functions,
2085	 * so we can release conn_ilg_lock now.
2086	 */
2087	new_fmode = ilg->ilg_fmode;
2088	l_copy(ilg->ilg_filter, new_filter);
2089
2090	rw_exit(&connp->conn_ilg_lock);
2091
2092	/*
2093	 * Now update the ill. We wait to do this until after the ilg
2094	 * has been updated because we need to update the src filter
2095	 * info for the ill, which involves looking at the status of
2096	 * all the ilgs associated with this group/interface pair.
2097	 */
2098	ilm = ip_addmulti_serial(group, ill, connp->conn_zoneid, ilgstat,
2099	    new_fmode, new_filter, &err);
2100
2101	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2102	/*
2103	 * Must look up the ilg again since we've not been holding
2104	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
2105	 * having called conn_update_ill, which can run once we dropped the
2106	 * conn_ilg_lock above.
2107	 */
2108	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
2109	if (ilg == NULL) {
2110		rw_exit(&connp->conn_ilg_lock);
2111		if (ilm != NULL) {
2112			(void) ip_delmulti_serial(ilm, B_FALSE,
2113			    (ilgstat == ILGSTAT_NEW));
2114		}
2115		err = ENXIO;
2116		goto free_and_exit;
2117	}
2118
2119	if (ilm != NULL) {
2120		if (ilg->ilg_ill == NULL) {
2121			/* some other thread is re-attaching this.  */
2122			rw_exit(&connp->conn_ilg_lock);
2123			(void) ip_delmulti_serial(ilm, B_FALSE,
2124			    (ilgstat == ILGSTAT_NEW));
2125			err = 0;
2126			goto free_and_exit;
2127		}
2128		/* Succeeded. Update the ilg to point at the ilm */
2129		if (ilgstat == ILGSTAT_NEW) {
2130			if (ilg->ilg_ilm == NULL) {
2131				ilg->ilg_ilm = ilm;
2132				ilm->ilm_ifaddr = ifaddr; /* For netstat */
2133			} else {
2134				/* some other thread is re-attaching this. */
2135				rw_exit(&connp->conn_ilg_lock);
2136				(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2137				err = 0;
2138				goto free_and_exit;
2139			}
2140		} else {
2141			/*
2142			 * ip_addmulti didn't get a held ilm for
2143			 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2144			 */
2145			ASSERT(ilg->ilg_ilm == ilm);
2146		}
2147	} else {
2148		ASSERT(err != 0);
2149		/*
2150		 * Failed to allocate the ilm.
2151		 * Restore the original filter state, or delete the
2152		 * newly-created ilg.
2153		 * If ENETDOWN just clear ill_ilg since so that we
2154		 * will rejoin when the ill comes back; don't report ENETDOWN
2155		 * to application.
2156		 */
2157		if (ilgstat == ILGSTAT_NEW) {
2158			if (err == ENETDOWN) {
2159				ilg->ilg_ill = NULL;
2160				err = 0;
2161			} else {
2162				ilg_delete(connp, ilg, NULL);
2163			}
2164		} else {
2165			ilg->ilg_fmode = orig_fmode;
2166			if (SLIST_IS_EMPTY(orig_filter)) {
2167				CLEAR_SLIST(ilg->ilg_filter);
2168			} else {
2169				/*
2170				 * We didn't free the filter, even if we
2171				 * were trying to make the source list empty;
2172				 * so if orig_filter isn't empty, the ilg
2173				 * must still have a filter alloc'd.
2174				 */
2175				l_copy(orig_filter, ilg->ilg_filter);
2176			}
2177		}
2178	}
2179	rw_exit(&connp->conn_ilg_lock);
2180
2181free_and_exit:
2182	mutex_exit(&ill->ill_mcast_serializer);
2183	ill_mcast_send_queued(ill);
2184	ill_dlpi_send_queued(ill);
2185	l_free(orig_filter);
2186	l_free(new_filter);
2187
2188	return (err);
2189}
2190
2191/*
2192 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2193 */
2194/* ARGSUSED */
2195int
2196ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2197    ip_ioctl_cmd_t *ipip, void *ifreq)
2198{
2199	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2200	/* existence verified in ip_wput_nondata() */
2201	mblk_t *data_mp = mp->b_cont->b_cont;
2202	int datalen, err, cmd, minsize;
2203	uint_t expsize = 0;
2204	conn_t *connp;
2205	boolean_t isv6, is_v4only_api, getcmd;
2206	struct sockaddr_in *gsin;
2207	struct sockaddr_in6 *gsin6;
2208	ipaddr_t v4group;
2209	in6_addr_t v6group;
2210	struct group_filter *gf = NULL;
2211	struct ip_msfilter *imsf = NULL;
2212	mblk_t *ndp;
2213	ill_t *ill;
2214
2215	connp = Q_TO_CONN(q);
2216	err = ip_msfilter_ill(connp, mp, ipip, &ill);
2217	if (err != 0)
2218		return (err);
2219
2220	if (data_mp->b_cont != NULL) {
2221		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2222			return (ENOMEM);
2223		freemsg(data_mp);
2224		data_mp = ndp;
2225		mp->b_cont->b_cont = data_mp;
2226	}
2227
2228	cmd = iocp->ioc_cmd;
2229	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2230	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2231	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2232	datalen = MBLKL(data_mp);
2233
2234	if (datalen < minsize)
2235		return (EINVAL);
2236
2237	/*
2238	 * now we know we have at least have the initial structure,
2239	 * but need to check for the source list array.
2240	 */
2241	if (is_v4only_api) {
2242		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2243		isv6 = B_FALSE;
2244		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2245	} else {
2246		gf = (struct group_filter *)data_mp->b_rptr;
2247		if (gf->gf_group.ss_family == AF_INET6) {
2248			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2249			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2250		} else {
2251			isv6 = B_FALSE;
2252		}
2253		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2254	}
2255	if (datalen < expsize)
2256		return (EINVAL);
2257
2258	if (isv6) {
2259		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2260		v6group = gsin6->sin6_addr;
2261		if (getcmd) {
2262			err = ip_get_srcfilter(connp, gf, NULL, &v6group,
2263			    B_TRUE);
2264		} else {
2265			err = ip_set_srcfilter(connp, gf, NULL, &v6group, ill,
2266			    B_TRUE);
2267		}
2268	} else {
2269		boolean_t issin6 = B_FALSE;
2270		if (is_v4only_api) {
2271			v4group = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2272			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2273		} else {
2274			if (gf->gf_group.ss_family == AF_INET) {
2275				gsin = (struct sockaddr_in *)&gf->gf_group;
2276				v4group = (ipaddr_t)gsin->sin_addr.s_addr;
2277				IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2278			} else {
2279				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2280				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2281				    v4group);
2282				issin6 = B_TRUE;
2283			}
2284		}
2285		/*
2286		 * INADDR_ANY is represented as the IPv6 unspecifed addr.
2287		 */
2288		if (v4group == INADDR_ANY)
2289			v6group = ipv6_all_zeros;
2290		else
2291			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2292
2293		if (getcmd) {
2294			err = ip_get_srcfilter(connp, gf, imsf, &v6group,
2295			    issin6);
2296		} else {
2297			err = ip_set_srcfilter(connp, gf, imsf, &v6group, ill,
2298			    issin6);
2299		}
2300	}
2301	ill_refrele(ill);
2302
2303	return (err);
2304}
2305
2306/*
2307 * Determine the ill for the SIOC*MSFILTER ioctls
2308 *
2309 * Returns an error for IS_UNDER_IPMP interfaces.
2310 *
2311 * Finds the ill based on information in the ioctl headers.
2312 */
2313static int
2314ip_msfilter_ill(conn_t *connp, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2315    ill_t **illp)
2316{
2317	int cmd = ipip->ipi_cmd;
2318	int err = 0;
2319	ill_t *ill;
2320	/* caller has verified this mblk exists */
2321	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2322	struct ip_msfilter *imsf;
2323	struct group_filter *gf;
2324	ipaddr_t v4addr, v4group;
2325	in6_addr_t v6group;
2326	uint32_t index;
2327	ip_stack_t *ipst;
2328
2329	ipst = connp->conn_netstack->netstack_ip;
2330
2331	*illp = NULL;
2332
2333	/* don't allow multicast operations on a tcp conn */
2334	if (IPCL_IS_TCP(connp))
2335		return (ENOPROTOOPT);
2336
2337	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2338		/* don't allow v4-specific ioctls on v6 socket */
2339		if (connp->conn_family == AF_INET6)
2340			return (EAFNOSUPPORT);
2341
2342		imsf = (struct ip_msfilter *)dbuf;
2343		v4addr = imsf->imsf_interface.s_addr;
2344		v4group = imsf->imsf_multiaddr.s_addr;
2345		IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2346		ill = ill_mcast_lookup(&v6group, v4addr, 0, IPCL_ZONEID(connp),
2347		    ipst, &err);
2348		if (ill == NULL && v4addr != INADDR_ANY)
2349			err = ENXIO;
2350	} else {
2351		gf = (struct group_filter *)dbuf;
2352		index = gf->gf_interface;
2353		if (gf->gf_group.ss_family == AF_INET6) {
2354			struct sockaddr_in6 *sin6;
2355
2356			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2357			v6group = sin6->sin6_addr;
2358		} else if (gf->gf_group.ss_family == AF_INET) {
2359			struct sockaddr_in *sin;
2360
2361			sin = (struct sockaddr_in *)&gf->gf_group;
2362			v4group = sin->sin_addr.s_addr;
2363			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2364		} else {
2365			return (EAFNOSUPPORT);
2366		}
2367		ill = ill_mcast_lookup(&v6group, INADDR_ANY, index,
2368		    IPCL_ZONEID(connp), ipst, &err);
2369	}
2370	*illp = ill;
2371	return (err);
2372}
2373
2374/*
2375 * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2376 * in in two stages, as the first copyin tells us the size of the attached
2377 * source buffer.  This function is called by ip_wput_nondata() after the
2378 * first copyin has completed; it figures out how big the second stage
2379 * needs to be, and kicks it off.
2380 *
2381 * In some cases (numsrc < 2), the second copyin is not needed as the
2382 * first one gets a complete structure containing 1 source addr.
2383 *
2384 * The function returns 0 if a second copyin has been started (i.e. there's
2385 * no more work to be done right now), or 1 if the second copyin is not
2386 * needed and ip_wput_nondata() can continue its processing.
2387 */
2388int
2389ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2390{
2391	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2392	int cmd = iocp->ioc_cmd;
2393	/* validity of this checked in ip_wput_nondata() */
2394	mblk_t *mp1 = mp->b_cont->b_cont;
2395	int copysize = 0;
2396	int offset;
2397
2398	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2399		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2400		if (gf->gf_numsrc >= 2) {
2401			offset = sizeof (struct group_filter);
2402			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2403		}
2404	} else {
2405		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2406		if (imsf->imsf_numsrc >= 2) {
2407			offset = sizeof (struct ip_msfilter);
2408			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2409		}
2410	}
2411	if (copysize > 0) {
2412		mi_copyin_n(q, mp, offset, copysize);
2413		return (0);
2414	}
2415	return (1);
2416}
2417
2418/*
2419 * Handle the following optmgmt:
2420 *	IP_ADD_MEMBERSHIP		must not have joined already
2421 *	IPV6_JOIN_GROUP			must not have joined already
2422 *	MCAST_JOIN_GROUP		must not have joined already
2423 *	IP_BLOCK_SOURCE			must have joined already
2424 *	MCAST_BLOCK_SOURCE		must have joined already
2425 *	IP_JOIN_SOURCE_GROUP		may have joined already
2426 *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2427 *
2428 * fmode and src parameters may be used to determine which option is
2429 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2430 * are functionally equivalent):
2431 *	opt			fmode			v6src
2432 *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		unspecified
2433 *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2434 *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2435 *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		IPv4-mapped addr
2436 *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2437 *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		IPv4-mapped addr
2438 *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2439 *
2440 * Changing the filter mode is not allowed; if a matching ilg already
2441 * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2442 *
2443 * Verifies that there is a source address of appropriate scope for
2444 * the group; if not, EADDRNOTAVAIL is returned.
2445 *
2446 * The interface to be used may be identified by an IPv4 address or by an
2447 * interface index.
2448 *
2449 * Handles IPv4-mapped IPv6 multicast addresses by associating them
2450 * with the IPv4 address.  Assumes that if v6group is v4-mapped,
2451 * v6src is also v4-mapped.
2452 */
2453int
2454ip_opt_add_group(conn_t *connp, boolean_t checkonly,
2455    const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2456    mcast_record_t fmode, const in6_addr_t *v6src)
2457{
2458	ill_t *ill;
2459	char buf[INET6_ADDRSTRLEN];
2460	int	err;
2461
2462	err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, &ill);
2463	if (err != 0) {
2464		ip1dbg(("ip_opt_add_group: no ill for group %s/"
2465		    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2466		    sizeof (buf)), ifindex));
2467		return (err);
2468	}
2469
2470	if (checkonly) {
2471		/*
2472		 * do not do operation, just pretend to - new T_CHECK
2473		 * semantics. The error return case above if encountered
2474		 * considered a good enough "check" here.
2475		 */
2476		ill_refrele(ill);
2477		return (0);
2478	}
2479	mutex_enter(&ill->ill_mcast_serializer);
2480	/*
2481	 * Multicast groups may not be joined on interfaces that are either
2482	 * already underlying interfaces in an IPMP group, or in the process
2483	 * of joining the IPMP group. The latter condition is enforced by
2484	 * checking the value of ill->ill_grp_pending under the
2485	 * ill_mcast_serializer lock.  We cannot serialize the
2486	 * ill_grp_pending check on the ill_g_lock across ilg_add() because
2487	 *  ill_mcast_send_queued -> ip_output_simple -> ill_lookup_on_ifindex
2488	 * will take the ill_g_lock itself. Instead, we hold the
2489	 * ill_mcast_serializer.
2490	 */
2491	if (ill->ill_grp_pending || IS_UNDER_IPMP(ill)) {
2492		DTRACE_PROBE2(group__add__on__under, ill_t *, ill,
2493		    in6_addr_t *, v6group);
2494		mutex_exit(&ill->ill_mcast_serializer);
2495		ill_refrele(ill);
2496		return (EADDRNOTAVAIL);
2497	}
2498	err = ilg_add(connp, v6group, ifaddr, ifindex, ill, fmode, v6src);
2499	mutex_exit(&ill->ill_mcast_serializer);
2500	/*
2501	 * We have done an addmulti_impl and/or delmulti_impl.
2502	 * All locks have been dropped, we can send any
2503	 * deferred/queued DLPI or IP packets
2504	 */
2505	ill_mcast_send_queued(ill);
2506	ill_dlpi_send_queued(ill);
2507	ill_refrele(ill);
2508	return (err);
2509}
2510
2511/*
2512 * Common for IPv6 and IPv4.
2513 * Here we handle ilgs that are still attached to their original ill
2514 * (the one ifaddr/ifindex points at), as well as detached ones.
2515 * The detached ones might have been attached to some other ill.
2516 */
2517static int
2518ip_opt_delete_group_excl(conn_t *connp, const in6_addr_t *v6group,
2519    ipaddr_t ifaddr, uint_t ifindex, mcast_record_t fmode,
2520    const in6_addr_t *v6src)
2521{
2522	ilg_t	*ilg;
2523	boolean_t leaving;
2524	ilm_t *ilm;
2525	ill_t *ill;
2526	int err = 0;
2527
2528retry:
2529	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2530	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2531	if (ilg == NULL) {
2532		rw_exit(&connp->conn_ilg_lock);
2533		/*
2534		 * Since we didn't have any ilg we now do the error checks
2535		 * to determine the best errno.
2536		 */
2537		err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2538		    &ill);
2539		if (ill != NULL) {
2540			/* The only error was a missing ilg for the group */
2541			ill_refrele(ill);
2542			err = EADDRNOTAVAIL;
2543		}
2544		return (err);
2545	}
2546
2547	/* If the ilg is attached then we serialize using that ill */
2548	ill = ilg->ilg_ill;
2549	if (ill != NULL) {
2550		/* Prevent the ill and ilg from being freed */
2551		ill_refhold(ill);
2552		ilg_refhold(ilg);
2553		rw_exit(&connp->conn_ilg_lock);
2554		mutex_enter(&ill->ill_mcast_serializer);
2555		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2556		if (ilg->ilg_condemned) {
2557			/* Disappeared */
2558			ilg_refrele(ilg);
2559			rw_exit(&connp->conn_ilg_lock);
2560			mutex_exit(&ill->ill_mcast_serializer);
2561			ill_refrele(ill);
2562			goto retry;
2563		}
2564	}
2565
2566	/*
2567	 * Decide if we're actually deleting the ilg or just removing a
2568	 * source filter address; if just removing an addr, make sure we
2569	 * aren't trying to change the filter mode, and that the addr is
2570	 * actually in our filter list already.  If we're removing the
2571	 * last src in an include list, just delete the ilg.
2572	 */
2573	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2574		leaving = B_TRUE;
2575	} else {
2576		if (fmode != ilg->ilg_fmode)
2577			err = EINVAL;
2578		else if (ilg->ilg_filter == NULL ||
2579		    !list_has_addr(ilg->ilg_filter, v6src))
2580			err = EADDRNOTAVAIL;
2581		if (err != 0) {
2582			if (ill != NULL)
2583				ilg_refrele(ilg);
2584			rw_exit(&connp->conn_ilg_lock);
2585			goto done;
2586		}
2587		if (fmode == MODE_IS_INCLUDE &&
2588		    ilg->ilg_filter->sl_numsrc == 1) {
2589			leaving = B_TRUE;
2590			v6src = NULL;
2591		} else {
2592			leaving = B_FALSE;
2593		}
2594	}
2595	ilm = ilg->ilg_ilm;
2596	if (leaving)
2597		ilg->ilg_ilm = NULL;
2598
2599	ilg_delete(connp, ilg, v6src);
2600	if (ill != NULL)
2601		ilg_refrele(ilg);
2602	rw_exit(&connp->conn_ilg_lock);
2603
2604	if (ilm != NULL) {
2605		ASSERT(ill != NULL);
2606		(void) ip_delmulti_serial(ilm, B_FALSE, leaving);
2607	}
2608done:
2609	if (ill != NULL) {
2610		mutex_exit(&ill->ill_mcast_serializer);
2611		/*
2612		 * Now that all locks have been dropped, we can
2613		 * send any deferred/queued DLPI or IP packets
2614		 */
2615		ill_mcast_send_queued(ill);
2616		ill_dlpi_send_queued(ill);
2617		ill_refrele(ill);
2618	}
2619	return (err);
2620}
2621
2622/*
2623 * Handle the following optmgmt:
2624 *	IP_DROP_MEMBERSHIP		will leave
2625 *	IPV6_LEAVE_GROUP		will leave
2626 *	MCAST_LEAVE_GROUP		will leave
2627 *	IP_UNBLOCK_SOURCE		will not leave
2628 *	MCAST_UNBLOCK_SOURCE		will not leave
2629 *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
2630 *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
2631 *
2632 * fmode and src parameters may be used to determine which option is
2633 * being set, as follows:
2634 *	opt			 fmode			v6src
2635 *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	unspecified
2636 *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
2637 *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
2638 *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	IPv4-mapped addr
2639 *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
2640 *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	IPv4-mapped addr
2641 *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
2642 *
2643 * Changing the filter mode is not allowed; if a matching ilg already
2644 * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2645 *
2646 * The interface to be used may be identified by an IPv4 address or by an
2647 * interface index.
2648 *
2649 * Handles IPv4-mapped IPv6 multicast addresses by associating them
2650 * with the IPv4 address.  Assumes that if v6group is v4-mapped,
2651 * v6src is also v4-mapped.
2652 */
2653int
2654ip_opt_delete_group(conn_t *connp, boolean_t checkonly,
2655    const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2656    mcast_record_t fmode, const in6_addr_t *v6src)
2657{
2658
2659	/*
2660	 * In the normal case below we don't check for the ill existing.
2661	 * Instead we look for an existing ilg in _excl.
2662	 * If checkonly we sanity check the arguments
2663	 */
2664	if (checkonly) {
2665		ill_t	*ill;
2666		int	err;
2667
2668		err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2669		    &ill);
2670		/*
2671		 * do not do operation, just pretend to - new T_CHECK semantics.
2672		 * ip_opt_check is considered a good enough "check" here.
2673		 */
2674		if (ill != NULL)
2675			ill_refrele(ill);
2676		return (err);
2677	}
2678	return (ip_opt_delete_group_excl(connp, v6group, ifaddr, ifindex,
2679	    fmode, v6src));
2680}
2681
2682/*
2683 * Group mgmt for upper conn that passes things down
2684 * to the interface multicast list (and DLPI)
2685 * These routines can handle new style options that specify an interface name
2686 * as opposed to an interface address (needed for general handling of
2687 * unnumbered interfaces.)
2688 */
2689
2690/*
2691 * Add a group to an upper conn group data structure and pass things down
2692 * to the interface multicast list (and DLPI)
2693 * Common for IPv4 and IPv6; for IPv4 we can have an ifaddr.
2694 */
2695static int
2696ilg_add(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
2697    uint_t ifindex, ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2698{
2699	int	error = 0;
2700	ilg_t	*ilg;
2701	ilg_stat_t ilgstat;
2702	slist_t	*new_filter = NULL;
2703	int	new_fmode;
2704	ilm_t *ilm;
2705
2706	if (!(ill->ill_flags & ILLF_MULTICAST))
2707		return (EADDRNOTAVAIL);
2708
2709	/* conn_ilg_lock protects the ilg list. */
2710	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
2711	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2712	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2713
2714	/*
2715	 * Depending on the option we're handling, may or may not be okay
2716	 * if group has already been added.  Figure out our rules based
2717	 * on fmode and src params.  Also make sure there's enough room
2718	 * in the filter if we're adding a source to an existing filter.
2719	 */
2720	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2721		/* we're joining for all sources, must not have joined */
2722		if (ilg != NULL)
2723			error = EADDRINUSE;
2724	} else {
2725		if (fmode == MODE_IS_EXCLUDE) {
2726			/* (excl {addr}) => block source, must have joined */
2727			if (ilg == NULL)
2728				error = EADDRNOTAVAIL;
2729		}
2730		/* (incl {addr}) => join source, may have joined */
2731
2732		if (ilg != NULL &&
2733		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
2734			error = ENOBUFS;
2735	}
2736	if (error != 0) {
2737		rw_exit(&connp->conn_ilg_lock);
2738		return (error);
2739	}
2740
2741	/*
2742	 * Alloc buffer to copy new state into (see below) before
2743	 * we make any changes, so we can bail if it fails.
2744	 */
2745	if ((new_filter = l_alloc()) == NULL) {
2746		rw_exit(&connp->conn_ilg_lock);
2747		return (ENOMEM);
2748	}
2749
2750	if (ilg == NULL) {
2751		if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) {
2752			rw_exit(&connp->conn_ilg_lock);
2753			l_free(new_filter);
2754			return (error);
2755		}
2756		ilg->ilg_ifindex = ifindex;
2757		ilg->ilg_ifaddr = ifaddr;
2758		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2759			ilg->ilg_filter = l_alloc();
2760			if (ilg->ilg_filter == NULL) {
2761				ilg_delete(connp, ilg, NULL);
2762				rw_exit(&connp->conn_ilg_lock);
2763				l_free(new_filter);
2764				return (ENOMEM);
2765			}
2766			ilg->ilg_filter->sl_numsrc = 1;
2767			ilg->ilg_filter->sl_addr[0] = *v6src;
2768		}
2769		ilgstat = ILGSTAT_NEW;
2770		ilg->ilg_v6group = *v6group;
2771		ilg->ilg_fmode = fmode;
2772		ilg->ilg_ill = ill;
2773	} else {
2774		int index;
2775
2776		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2777			rw_exit(&connp->conn_ilg_lock);
2778			l_free(new_filter);
2779			return (EINVAL);
2780		}
2781		if (ilg->ilg_filter == NULL) {
2782			ilg->ilg_filter = l_alloc();
2783			if (ilg->ilg_filter == NULL) {
2784				rw_exit(&connp->conn_ilg_lock);
2785				l_free(new_filter);
2786				return (ENOMEM);
2787			}
2788		}
2789		if (list_has_addr(ilg->ilg_filter, v6src)) {
2790			rw_exit(&connp->conn_ilg_lock);
2791			l_free(new_filter);
2792			return (EADDRNOTAVAIL);
2793		}
2794		ilgstat = ILGSTAT_CHANGE;
2795		index = ilg->ilg_filter->sl_numsrc++;
2796		ilg->ilg_filter->sl_addr[index] = *v6src;
2797	}
2798
2799	/*
2800	 * Save copy of ilg's filter state to pass to other functions,
2801	 * so we can release conn_ilg_lock now.
2802	 */
2803	new_fmode = ilg->ilg_fmode;
2804	l_copy(ilg->ilg_filter, new_filter);
2805
2806	rw_exit(&connp->conn_ilg_lock);
2807
2808	/*
2809	 * Now update the ill. We wait to do this until after the ilg
2810	 * has been updated because we need to update the src filter
2811	 * info for the ill, which involves looking at the status of
2812	 * all the ilgs associated with this group/interface pair.
2813	 */
2814	ilm = ip_addmulti_serial(v6group, ill, connp->conn_zoneid, ilgstat,
2815	    new_fmode, new_filter, &error);
2816
2817	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2818	/*
2819	 * Must look up the ilg again since we've not been holding
2820	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
2821	 * having called conn_update_ill, which can run once we dropped the
2822	 * conn_ilg_lock above.
2823	 */
2824	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2825	if (ilg == NULL) {
2826		rw_exit(&connp->conn_ilg_lock);
2827		if (ilm != NULL) {
2828			(void) ip_delmulti_serial(ilm, B_FALSE,
2829			    (ilgstat == ILGSTAT_NEW));
2830		}
2831		error = ENXIO;
2832		goto free_and_exit;
2833	}
2834	if (ilm != NULL) {
2835		if (ilg->ilg_ill == NULL) {
2836			/* some other thread is re-attaching this.  */
2837			rw_exit(&connp->conn_ilg_lock);
2838			(void) ip_delmulti_serial(ilm, B_FALSE,
2839			    (ilgstat == ILGSTAT_NEW));
2840			error = 0;
2841			goto free_and_exit;
2842		}
2843		/* Succeeded. Update the ilg to point at the ilm */
2844		if (ilgstat == ILGSTAT_NEW) {
2845			if (ilg->ilg_ilm == NULL) {
2846				ilg->ilg_ilm = ilm;
2847				ilm->ilm_ifaddr = ifaddr; /* For netstat */
2848			} else {
2849				/* some other thread is re-attaching this. */
2850				rw_exit(&connp->conn_ilg_lock);
2851				(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2852				error = 0;
2853				goto free_and_exit;
2854			}
2855		} else {
2856			/*
2857			 * ip_addmulti didn't get a held ilm for
2858			 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2859			 */
2860			ASSERT(ilg->ilg_ilm == ilm);
2861		}
2862	} else {
2863		ASSERT(error != 0);
2864		/*
2865		 * Failed to allocate the ilm.
2866		 * Need to undo what we did before calling ip_addmulti()
2867		 * If ENETDOWN just clear ill_ilg since so that we
2868		 * will rejoin when the ill comes back; don't report ENETDOWN
2869		 * to application.
2870		 */
2871		if (ilgstat == ILGSTAT_NEW && error == ENETDOWN) {
2872			ilg->ilg_ill = NULL;
2873			error = 0;
2874		} else {
2875			in6_addr_t delsrc =
2876			    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
2877
2878			ilg_delete(connp, ilg, &delsrc);
2879		}
2880	}
2881	rw_exit(&connp->conn_ilg_lock);
2882
2883free_and_exit:
2884	l_free(new_filter);
2885	return (error);
2886}
2887
2888/*
2889 * Find an IPv4 ilg matching group, ill and source.
2890 * The group and source can't be INADDR_ANY here so no need to translate to
2891 * the unspecified IPv6 address.
2892 */
2893boolean_t
2894conn_hasmembers_ill_withsrc_v4(conn_t *connp, ipaddr_t group, ipaddr_t src,
2895    ill_t *ill)
2896{
2897	in6_addr_t v6group, v6src;
2898	int i;
2899	boolean_t isinlist;
2900	ilg_t *ilg;
2901
2902	rw_enter(&connp->conn_ilg_lock, RW_READER);
2903	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
2904	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2905		if (ilg->ilg_condemned)
2906			continue;
2907
2908		/* ilg_ill could be NULL if an add is in progress */
2909		if (ilg->ilg_ill != ill)
2910			continue;
2911
2912		/* The callers use upper ill for IPMP */
2913		ASSERT(!IS_UNDER_IPMP(ill));
2914		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
2915			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2916				/* no source filter, so this is a match */
2917				rw_exit(&connp->conn_ilg_lock);
2918				return (B_TRUE);
2919			}
2920			break;
2921		}
2922	}
2923	if (ilg == NULL) {
2924		rw_exit(&connp->conn_ilg_lock);
2925		return (B_FALSE);
2926	}
2927
2928	/*
2929	 * we have an ilg with matching ill and group; but
2930	 * the ilg has a source list that we must check.
2931	 */
2932	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2933	isinlist = B_FALSE;
2934	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2935		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
2936			isinlist = B_TRUE;
2937			break;
2938		}
2939	}
2940
2941	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2942	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
2943		rw_exit(&connp->conn_ilg_lock);
2944		return (B_TRUE);
2945	}
2946	rw_exit(&connp->conn_ilg_lock);
2947	return (B_FALSE);
2948}
2949
2950/*
2951 * Find an IPv6 ilg matching group, ill, and source
2952 */
2953boolean_t
2954conn_hasmembers_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
2955    const in6_addr_t *v6src, ill_t *ill)
2956{
2957	int i;
2958	boolean_t isinlist;
2959	ilg_t *ilg;
2960
2961	rw_enter(&connp->conn_ilg_lock, RW_READER);
2962	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2963		if (ilg->ilg_condemned)
2964			continue;
2965
2966		/* ilg_ill could be NULL if an add is in progress */
2967		if (ilg->ilg_ill != ill)
2968			continue;
2969
2970		/* The callers use upper ill for IPMP */
2971		ASSERT(!IS_UNDER_IPMP(ill));
2972		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
2973			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2974				/* no source filter, so this is a match */
2975				rw_exit(&connp->conn_ilg_lock);
2976				return (B_TRUE);
2977			}
2978			break;
2979		}
2980	}
2981	if (ilg == NULL) {
2982		rw_exit(&connp->conn_ilg_lock);
2983		return (B_FALSE);
2984	}
2985
2986	/*
2987	 * we have an ilg with matching ill and group; but
2988	 * the ilg has a source list that we must check.
2989	 */
2990	isinlist = B_FALSE;
2991	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2992		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
2993			isinlist = B_TRUE;
2994			break;
2995		}
2996	}
2997
2998	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2999	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
3000		rw_exit(&connp->conn_ilg_lock);
3001		return (B_TRUE);
3002	}
3003	rw_exit(&connp->conn_ilg_lock);
3004	return (B_FALSE);
3005}
3006
3007/*
3008 * Find an ilg matching group and ifaddr/ifindex.
3009 * We check both ifaddr and ifindex even though at most one of them
3010 * will be non-zero; that way we always find the right one.
3011 */
3012static ilg_t *
3013ilg_lookup(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
3014    uint_t ifindex)
3015{
3016	ilg_t	*ilg;
3017
3018	ASSERT(RW_LOCK_HELD(&connp->conn_ilg_lock));
3019
3020	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3021		if (ilg->ilg_condemned)
3022			continue;
3023
3024		if (ilg->ilg_ifaddr == ifaddr &&
3025		    ilg->ilg_ifindex == ifindex &&
3026		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3027			return (ilg);
3028	}
3029	return (NULL);
3030}
3031
3032/*
3033 * If a source address is passed in (src != NULL and src is not
3034 * unspecified), remove the specified src addr from the given ilg's
3035 * filter list, else delete the ilg.
3036 */
3037static void
3038ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3039{
3040	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3041	ASSERT(ilg->ilg_ptpn != NULL);
3042	ASSERT(!ilg->ilg_condemned);
3043
3044	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3045		FREE_SLIST(ilg->ilg_filter);
3046		ilg->ilg_filter = NULL;
3047
3048		ASSERT(ilg->ilg_ilm == NULL);
3049		ilg->ilg_ill = NULL;
3050		ilg->ilg_condemned = B_TRUE;
3051
3052		/* ilg_inactive will unlink from the list */
3053		ilg_refrele(ilg);
3054	} else {
3055		l_remove(ilg->ilg_filter, src);
3056	}
3057}
3058
3059/*
3060 * Called from conn close. No new ilg can be added or removed
3061 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3062 * will return error if conn has started closing.
3063 *
3064 * We handle locking as follows.
3065 * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
3066 * proceed with the ilm part of the delete we hold a reference on both the ill
3067 * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
3068 * being deleted.
3069 *
3070 * Since the ilg_add code path uses two locks (conn_ilg_lock for the ilg part,
3071 * and ill_mcast_lock for the ip_addmulti part) we can run at a point between
3072 * the two. At that point ilg_ill is set, but ilg_ilm hasn't yet been set. In
3073 * that case we delete the ilg here, which makes ilg_add discover that the ilg
3074 * has disappeared when ip_addmulti returns, so it will discard the ilm it just
3075 * added.
3076 */
3077void
3078ilg_delete_all(conn_t *connp)
3079{
3080	ilg_t	*ilg, *next_ilg, *held_ilg;
3081	ilm_t	*ilm;
3082	ill_t	*ill;
3083	boolean_t need_refrele;
3084
3085	/*
3086	 * Can not run if there is a conn_update_ill already running.
3087	 * Wait for it to complete. Caller should have already set CONN_CLOSING
3088	 * which prevents any new threads to run in conn_update_ill.
3089	 */
3090	mutex_enter(&connp->conn_lock);
3091	ASSERT(connp->conn_state_flags & CONN_CLOSING);
3092	while (connp->conn_state_flags & CONN_UPDATE_ILL)
3093		cv_wait(&connp->conn_cv, &connp->conn_lock);
3094	mutex_exit(&connp->conn_lock);
3095
3096	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3097	ilg = connp->conn_ilg;
3098	held_ilg = NULL;
3099	while (ilg != NULL) {
3100		if (ilg->ilg_condemned) {
3101			ilg = ilg->ilg_next;
3102			continue;
3103		}
3104		/* If the ilg is detached then no need to serialize */
3105		if (ilg->ilg_ilm == NULL) {
3106			next_ilg = ilg->ilg_next;
3107			ilg_delete(connp, ilg, NULL);
3108			ilg = next_ilg;
3109			continue;
3110		}
3111		ill = ilg->ilg_ilm->ilm_ill;
3112
3113		/*
3114		 * In order to serialize on the ill we try to enter
3115		 * and if that fails we unlock and relock and then
3116		 * check that we still have an ilm.
3117		 */
3118		need_refrele = B_FALSE;
3119		if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
3120			ill_refhold(ill);
3121			need_refrele = B_TRUE;
3122			ilg_refhold(ilg);
3123			if (held_ilg != NULL)
3124				ilg_refrele(held_ilg);
3125			held_ilg = ilg;
3126			rw_exit(&connp->conn_ilg_lock);
3127			mutex_enter(&ill->ill_mcast_serializer);
3128			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3129			if (ilg->ilg_condemned) {
3130				ilg = ilg->ilg_next;
3131				goto next;
3132			}
3133		}
3134		ilm = ilg->ilg_ilm;
3135		ilg->ilg_ilm = NULL;
3136		next_ilg = ilg->ilg_next;
3137		ilg_delete(connp, ilg, NULL);
3138		ilg = next_ilg;
3139		rw_exit(&connp->conn_ilg_lock);
3140
3141		if (ilm != NULL)
3142			(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3143
3144	next:
3145		mutex_exit(&ill->ill_mcast_serializer);
3146		/*
3147		 * Now that all locks have been dropped, we can send any
3148		 * deferred/queued DLPI or IP packets
3149		 */
3150		ill_mcast_send_queued(ill);
3151		ill_dlpi_send_queued(ill);
3152		if (need_refrele) {
3153			/* Drop ill reference while we hold no locks */
3154			ill_refrele(ill);
3155		}
3156		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3157	}
3158	if (held_ilg != NULL)
3159		ilg_refrele(held_ilg);
3160	rw_exit(&connp->conn_ilg_lock);
3161}
3162
3163/*
3164 * Attach the ilg to an ilm on the ill. If it fails we leave ilg_ill as NULL so
3165 * that a subsequent attempt can attach it. Drops and reacquires conn_ilg_lock.
3166 */
3167static void
3168ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill)
3169{
3170	ilg_stat_t	ilgstat;
3171	slist_t		*new_filter;
3172	int		new_fmode;
3173	in6_addr_t	v6group;
3174	ipaddr_t	ifaddr;
3175	uint_t		ifindex;
3176	ilm_t		*ilm;
3177	int		error = 0;
3178
3179	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3180	/*
3181	 * Alloc buffer to copy new state into (see below) before
3182	 * we make any changes, so we can bail if it fails.
3183	 */
3184	if ((new_filter = l_alloc()) == NULL)
3185		return;
3186
3187	/*
3188	 * Save copy of ilg's filter state to pass to other functions, so
3189	 * we can release conn_ilg_lock now.
3190	 * Set ilg_ill so that an unplumb can find us.
3191	 */
3192	new_fmode = ilg->ilg_fmode;
3193	l_copy(ilg->ilg_filter, new_filter);
3194	v6group = ilg->ilg_v6group;
3195	ifaddr = ilg->ilg_ifaddr;
3196	ifindex = ilg->ilg_ifindex;
3197	ilgstat = ILGSTAT_NEW;
3198
3199	ilg->ilg_ill = ill;
3200	ASSERT(ilg->ilg_ilm == NULL);
3201	rw_exit(&connp->conn_ilg_lock);
3202
3203	ilm = ip_addmulti_serial(&v6group, ill, connp->conn_zoneid, ilgstat,
3204	    new_fmode, new_filter, &error);
3205	l_free(new_filter);
3206
3207	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3208	/*
3209	 * Must look up the ilg again since we've not been holding
3210	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
3211	 * having called conn_update_ill, which can run once we dropped the
3212	 * conn_ilg_lock above. Alternatively, the ilg could have been attached
3213	 * when the lock was dropped
3214	 */
3215	ilg = ilg_lookup(connp, &v6group, ifaddr, ifindex);
3216	if (ilg == NULL || ilg->ilg_ilm != NULL) {
3217		if (ilm != NULL) {
3218			rw_exit(&connp->conn_ilg_lock);
3219			(void) ip_delmulti_serial(ilm, B_FALSE,
3220			    (ilgstat == ILGSTAT_NEW));
3221			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3222		}
3223		return;
3224	}
3225	if (ilm == NULL) {
3226		ilg->ilg_ill = NULL;
3227		return;
3228	}
3229	ilg->ilg_ilm = ilm;
3230	ilm->ilm_ifaddr = ifaddr;	/* For netstat */
3231}
3232
3233/*
3234 * Called when an ill is unplumbed to make sure that there are no
3235 * dangling conn references to that ill. In that case ill is non-NULL and
3236 * we make sure we remove all references to it.
3237 * Also called when we should revisit the ilg_ill used for multicast
3238 * memberships, in which case ill is NULL.
3239 *
3240 * conn is held by caller.
3241 *
3242 * Note that ipcl_walk only walks conns that are not yet condemned.
3243 * condemned conns can't be refheld. For this reason, conn must become clean
3244 * first, i.e. it must not refer to any ill/ire and then only set
3245 * condemned flag.
3246 *
3247 * We leave ixa_multicast_ifindex in place. We prefer dropping
3248 * packets instead of sending them out the wrong interface.
3249 *
3250 * We keep the ilg around in a detached state (with ilg_ill and ilg_ilm being
3251 * NULL) so that the application can leave it later. Also, if ilg_ifaddr and
3252 * ilg_ifindex are zero, indicating that the system should pick the interface,
3253 * then we attempt to reselect the ill and join on it.
3254 *
3255 * Locking notes:
3256 * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
3257 * proceed with the ilm part of the delete we hold a reference on both the ill
3258 * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
3259 * being deleted.
3260 *
3261 * Note: if this function is called when new ill/ipif's arrive or change status
3262 * (SIOCSLIFINDEX, SIOCSLIFADDR) then we will attempt to attach any ilgs with
3263 * a NULL ilg_ill to an ill/ilm.
3264 */
3265static void
3266conn_update_ill(conn_t *connp, caddr_t arg)
3267{
3268	ill_t	*ill = (ill_t *)arg;
3269
3270	/*
3271	 * We have to prevent ip_close/ilg_delete_all from running at
3272	 * the same time. ip_close sets CONN_CLOSING before doing the ilg_delete
3273	 * all, and we set CONN_UPDATE_ILL. That ensures that only one of
3274	 * ilg_delete_all and conn_update_ill run at a time for a given conn.
3275	 * If ilg_delete_all got here first, then we have nothing to do.
3276	 */
3277	mutex_enter(&connp->conn_lock);
3278	if (connp->conn_state_flags & (CONN_CLOSING|CONN_UPDATE_ILL)) {
3279		/* Caller has to wait for ill_ilm_cnt to drop to zero */
3280		mutex_exit(&connp->conn_lock);
3281		return;
3282	}
3283	connp->conn_state_flags |= CONN_UPDATE_ILL;
3284	mutex_exit(&connp->conn_lock);
3285
3286	if (ill != NULL)
3287		ilg_check_detach(connp, ill);
3288
3289	ilg_check_reattach(connp, ill);
3290
3291	/* Do we need to wake up a thread in ilg_delete_all? */
3292	mutex_enter(&connp->conn_lock);
3293	connp->conn_state_flags &= ~CONN_UPDATE_ILL;
3294	if (connp->conn_state_flags & CONN_CLOSING)
3295		cv_broadcast(&connp->conn_cv);
3296	mutex_exit(&connp->conn_lock);
3297}
3298
3299/* Detach from an ill that is going away */
3300static void
3301ilg_check_detach(conn_t *connp, ill_t *ill)
3302{
3303	char	group_buf[INET6_ADDRSTRLEN];
3304	ilg_t	*ilg, *held_ilg;
3305	ilm_t	*ilm;
3306
3307	mutex_enter(&ill->ill_mcast_serializer);
3308	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3309	held_ilg = NULL;
3310	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3311		if (ilg->ilg_condemned)
3312			continue;
3313
3314		if (ilg->ilg_ill != ill)
3315			continue;
3316
3317		/* Detach from current ill */
3318		ip1dbg(("ilg_check_detach: detach %s on %s\n",
3319		    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3320		    group_buf, sizeof (group_buf)),
3321		    ilg->ilg_ill->ill_name));
3322
3323		/* Detach this ilg from the ill/ilm */
3324		ilm = ilg->ilg_ilm;
3325		ilg->ilg_ilm = NULL;
3326		ilg->ilg_ill = NULL;
3327		if (ilm == NULL)
3328			continue;
3329
3330		/* Prevent ilg from disappearing */
3331		ilg_transfer_hold(held_ilg, ilg);
3332		held_ilg = ilg;
3333		rw_exit(&connp->conn_ilg_lock);
3334
3335		(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3336		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3337	}
3338	if (held_ilg != NULL)
3339		ilg_refrele(held_ilg);
3340	rw_exit(&connp->conn_ilg_lock);
3341	mutex_exit(&ill->ill_mcast_serializer);
3342	/*
3343	 * Now that all locks have been dropped, we can send any
3344	 * deferred/queued DLPI or IP packets
3345	 */
3346	ill_mcast_send_queued(ill);
3347	ill_dlpi_send_queued(ill);
3348}
3349
3350/*
3351 * Check if there is a place to attach the conn_ilgs. We do this for both
3352 * detached ilgs and attached ones, since for the latter there could be
3353 * a better ill to attach them to. oill is non-null if we just detached from
3354 * that ill.
3355 */
3356static void
3357ilg_check_reattach(conn_t *connp, ill_t *oill)
3358{
3359	ill_t	*ill;
3360	char	group_buf[INET6_ADDRSTRLEN];
3361	ilg_t	*ilg, *held_ilg;
3362	ilm_t	*ilm;
3363	zoneid_t zoneid = IPCL_ZONEID(connp);
3364	int	error;
3365	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
3366
3367	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3368	held_ilg = NULL;
3369	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3370		if (ilg->ilg_condemned)
3371			continue;
3372
3373		/* Check if the conn_ill matches what we would pick now */
3374		ill = ill_mcast_lookup(&ilg->ilg_v6group, ilg->ilg_ifaddr,
3375		    ilg->ilg_ifindex, zoneid, ipst, &error);
3376
3377		/*
3378		 * Make sure the ill is usable for multicast and that
3379		 * we can send the DL_ADDMULTI_REQ before we create an
3380		 * ilm.
3381		 */
3382		if (ill != NULL &&
3383		    (!(ill->ill_flags & ILLF_MULTICAST) || !ill->ill_dl_up)) {
3384			/* Drop locks across ill_refrele */
3385			ilg_transfer_hold(held_ilg, ilg);
3386			held_ilg = ilg;
3387			rw_exit(&connp->conn_ilg_lock);
3388			ill_refrele(ill);
3389			ill = NULL;
3390			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3391			/* Note that ilg could have become condemned */
3392		}
3393
3394		/*
3395		 * Is the ill unchanged, even if both are NULL?
3396		 * Did we just detach from that ill?
3397		 */
3398		if (ill == ilg->ilg_ill || (ill != NULL && ill == oill)) {
3399			if (ill != NULL) {
3400				/* Drop locks across ill_refrele */
3401				ilg_transfer_hold(held_ilg, ilg);
3402				held_ilg = ilg;
3403				rw_exit(&connp->conn_ilg_lock);
3404				ill_refrele(ill);
3405				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3406			}
3407			continue;
3408		}
3409
3410		/* Something changed; detach from old first if needed */
3411		if (ilg->ilg_ill != NULL) {
3412			ill_t *ill2 = ilg->ilg_ill;
3413			boolean_t need_refrele = B_FALSE;
3414
3415			/*
3416			 * In order to serialize on the ill we try to enter
3417			 * and if that fails we unlock and relock.
3418			 */
3419			if (!mutex_tryenter(&ill2->ill_mcast_serializer)) {
3420				ill_refhold(ill2);
3421				need_refrele = B_TRUE;
3422				ilg_transfer_hold(held_ilg, ilg);
3423				held_ilg = ilg;
3424				rw_exit(&connp->conn_ilg_lock);
3425				mutex_enter(&ill2->ill_mcast_serializer);
3426				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3427				/* Note that ilg could have become condemned */
3428			}
3429			/*
3430			 * Check that nobody else re-attached the ilg while we
3431			 * dropped the lock.
3432			 */
3433			if (ilg->ilg_ill == ill2) {
3434				ASSERT(!ilg->ilg_condemned);
3435				/* Detach from current ill */
3436				ip1dbg(("conn_check_reattach: detach %s/%s\n",
3437				    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3438				    group_buf, sizeof (group_buf)),
3439				    ill2->ill_name));
3440
3441				ilm = ilg->ilg_ilm;
3442				ilg->ilg_ilm = NULL;
3443				ilg->ilg_ill = NULL;
3444			} else {
3445				ilm = NULL;
3446			}
3447			ilg_transfer_hold(held_ilg, ilg);
3448			held_ilg = ilg;
3449			rw_exit(&connp->conn_ilg_lock);
3450			if (ilm != NULL)
3451				(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3452			mutex_exit(&ill2->ill_mcast_serializer);
3453			/*
3454			 * Now that all locks have been dropped, we can send any
3455			 * deferred/queued DLPI or IP packets
3456			 */
3457			ill_mcast_send_queued(ill2);
3458			ill_dlpi_send_queued(ill2);
3459			if (need_refrele) {
3460				/* Drop ill reference while we hold no locks */
3461				ill_refrele(ill2);
3462			}
3463			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3464			/*
3465			 * While we dropped conn_ilg_lock some other thread
3466			 * could have attached this ilg, thus we check again.
3467			 */
3468			if (ilg->ilg_ill != NULL) {
3469				if (ill != NULL) {
3470					/* Drop locks across ill_refrele */
3471					ilg_transfer_hold(held_ilg, ilg);
3472					held_ilg = ilg;
3473					rw_exit(&connp->conn_ilg_lock);
3474					ill_refrele(ill);
3475					rw_enter(&connp->conn_ilg_lock,
3476					    RW_WRITER);
3477				}
3478				continue;
3479			}
3480		}
3481		if (ill != NULL) {
3482			/*
3483			 * In order to serialize on the ill we try to enter
3484			 * and if that fails we unlock and relock.
3485			 */
3486			if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
3487				/* Already have a refhold on ill */
3488				ilg_transfer_hold(held_ilg, ilg);
3489				held_ilg = ilg;
3490				rw_exit(&connp->conn_ilg_lock);
3491				mutex_enter(&ill->ill_mcast_serializer);
3492				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3493				/* Note that ilg could have become condemned */
3494			}
3495			ilg_transfer_hold(held_ilg, ilg);
3496			held_ilg = ilg;
3497			/*
3498			 * Check that nobody else attached the ilg and that
3499			 * it wasn't condemned while we dropped the lock.
3500			 */
3501			if (ilg->ilg_ill == NULL && !ilg->ilg_condemned) {
3502				/*
3503				 * Attach to the new ill. Can fail in which
3504				 * case ilg_ill will remain NULL. ilg_attach
3505				 * drops and reacquires conn_ilg_lock.
3506				 */
3507				ip1dbg(("conn_check_reattach: attach %s/%s\n",
3508				    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3509				    group_buf, sizeof (group_buf)),
3510				    ill->ill_name));
3511				ilg_attach(connp, ilg, ill);
3512				ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3513			}
3514			/* Drop locks across ill_refrele */
3515			rw_exit(&connp->conn_ilg_lock);
3516			mutex_exit(&ill->ill_mcast_serializer);
3517			/*
3518			 * Now that all locks have been
3519			 * dropped, we can send any
3520			 * deferred/queued DLPI or IP packets
3521			 */
3522			ill_mcast_send_queued(ill);
3523			ill_dlpi_send_queued(ill);
3524			ill_refrele(ill);
3525			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3526		}
3527	}
3528	if (held_ilg != NULL)
3529		ilg_refrele(held_ilg);
3530	rw_exit(&connp->conn_ilg_lock);
3531}
3532
3533/*
3534 * Called when an ill is unplumbed to make sure that there are no
3535 * dangling conn references to that ill. In that case ill is non-NULL and
3536 * we make sure we remove all references to it.
3537 * Also called when we should revisit the ilg_ill used for multicast
3538 * memberships, in which case ill is NULL.
3539 */
3540void
3541update_conn_ill(ill_t *ill, ip_stack_t *ipst)
3542{
3543	ipcl_walk(conn_update_ill, (caddr_t)ill, ipst);
3544}
3545