1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2024 Oxide Computer Company
25  */
26 
27 #include <sys/sysmacros.h>
28 #include <sys/socket.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/tsol/tndb.h>
32 #include <sys/tsol/tnet.h>
33 
34 #include <netinet/in.h>
35 #include <netinet/ip6.h>
36 
37 #include <inet/common.h>
38 #include <inet/ip.h>
39 #include <inet/ip6.h>
40 #include <inet/ipclassifier.h>
41 #include <inet/ipsec_impl.h>
42 #include <inet/ipp_common.h>
43 #include <inet/sctp_ip.h>
44 
45 #include "sctp_impl.h"
46 #include "sctp_addr.h"
47 
48 /* Default association hash size.  The size must be a power of 2. */
49 #define	SCTP_CONN_HASH_SIZE	8192
50 
51 uint_t		sctp_conn_hash_size = SCTP_CONN_HASH_SIZE; /* /etc/system */
52 
53 /*
54  * Cluster networking hook for traversing current assoc list.
55  * This routine is used to extract the current list of live associations
56  * which must continue to to be dispatched to this node.
57  */
58 int cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *), void *,
59     boolean_t);
60 static int cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *,
61     void *), void *arg, boolean_t cansleep, sctp_stack_t *sctps);
62 
63 void
sctp_hash_init(sctp_stack_t * sctps)64 sctp_hash_init(sctp_stack_t *sctps)
65 {
66 	int i;
67 
68 	/* Start with /etc/system value */
69 	sctps->sctps_conn_hash_size = sctp_conn_hash_size;
70 
71 	if (!ISP2(sctps->sctps_conn_hash_size)) {
72 		/* Not a power of two. Round up to nearest power of two */
73 		for (i = 0; i < 31; i++) {
74 			if (sctps->sctps_conn_hash_size < (1 << i))
75 				break;
76 		}
77 		sctps->sctps_conn_hash_size = 1 << i;
78 	}
79 	if (sctps->sctps_conn_hash_size < SCTP_CONN_HASH_SIZE) {
80 		sctps->sctps_conn_hash_size = SCTP_CONN_HASH_SIZE;
81 		cmn_err(CE_CONT, "using sctp_conn_hash_size = %u\n",
82 		    sctps->sctps_conn_hash_size);
83 	}
84 	sctps->sctps_conn_fanout =
85 	    (sctp_tf_t *)kmem_zalloc(sctps->sctps_conn_hash_size *
86 	    sizeof (sctp_tf_t), KM_SLEEP);
87 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
88 		mutex_init(&sctps->sctps_conn_fanout[i].tf_lock, NULL,
89 		    MUTEX_DEFAULT, NULL);
90 	}
91 	sctps->sctps_listen_fanout = kmem_zalloc(SCTP_LISTEN_FANOUT_SIZE *
92 	    sizeof (sctp_tf_t),	KM_SLEEP);
93 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
94 		mutex_init(&sctps->sctps_listen_fanout[i].tf_lock, NULL,
95 		    MUTEX_DEFAULT, NULL);
96 	}
97 	sctps->sctps_bind_fanout = kmem_zalloc(SCTP_BIND_FANOUT_SIZE *
98 	    sizeof (sctp_tf_t),	KM_SLEEP);
99 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
100 		mutex_init(&sctps->sctps_bind_fanout[i].tf_lock, NULL,
101 		    MUTEX_DEFAULT, NULL);
102 	}
103 }
104 
105 void
sctp_hash_destroy(sctp_stack_t * sctps)106 sctp_hash_destroy(sctp_stack_t *sctps)
107 {
108 	int i;
109 
110 	for (i = 0; i < sctps->sctps_conn_hash_size; i++) {
111 		mutex_destroy(&sctps->sctps_conn_fanout[i].tf_lock);
112 	}
113 	kmem_free(sctps->sctps_conn_fanout, sctps->sctps_conn_hash_size *
114 	    sizeof (sctp_tf_t));
115 	sctps->sctps_conn_fanout = NULL;
116 
117 	for (i = 0; i < SCTP_LISTEN_FANOUT_SIZE; i++) {
118 		mutex_destroy(&sctps->sctps_listen_fanout[i].tf_lock);
119 	}
120 	kmem_free(sctps->sctps_listen_fanout, SCTP_LISTEN_FANOUT_SIZE *
121 	    sizeof (sctp_tf_t));
122 	sctps->sctps_listen_fanout = NULL;
123 
124 	for (i = 0; i < SCTP_BIND_FANOUT_SIZE; i++) {
125 		mutex_destroy(&sctps->sctps_bind_fanout[i].tf_lock);
126 	}
127 	kmem_free(sctps->sctps_bind_fanout, SCTP_BIND_FANOUT_SIZE *
128 	    sizeof (sctp_tf_t));
129 	sctps->sctps_bind_fanout = NULL;
130 }
131 
132 /*
133  * Exported routine for extracting active SCTP associations.
134  * Like TCP, we terminate the walk if the callback returns non-zero.
135  *
136  * Need to walk all sctp_stack_t instances since this clustering
137  * interface is assumed global for all instances
138  */
139 int
cl_sctp_walk_list(int (* cl_callback)(cl_sctp_info_t *,void *),void * arg,boolean_t cansleep)140 cl_sctp_walk_list(int (*cl_callback)(cl_sctp_info_t *, void *),
141     void *arg, boolean_t cansleep)
142 {
143 	netstack_handle_t nh;
144 	netstack_t *ns;
145 	int ret = 0;
146 
147 	netstack_next_init(&nh);
148 	while ((ns = netstack_next(&nh)) != NULL) {
149 		ret = cl_sctp_walk_list_stack(cl_callback, arg, cansleep,
150 		    ns->netstack_sctp);
151 		netstack_rele(ns);
152 	}
153 	netstack_next_fini(&nh);
154 	return (ret);
155 }
156 
157 static int
cl_sctp_walk_list_stack(int (* cl_callback)(cl_sctp_info_t *,void *),void * arg,boolean_t cansleep,sctp_stack_t * sctps)158 cl_sctp_walk_list_stack(int (*cl_callback)(cl_sctp_info_t *, void *),
159     void *arg, boolean_t cansleep, sctp_stack_t *sctps)
160 {
161 	sctp_t		*sctp;
162 	sctp_t		*sctp_prev;
163 	cl_sctp_info_t	cl_sctpi;
164 	uchar_t		*slist;
165 	uchar_t		*flist;
166 
167 	sctp_prev = NULL;
168 	mutex_enter(&sctps->sctps_g_lock);
169 	sctp = list_head(&sctps->sctps_g_list);
170 	while (sctp != NULL) {
171 		size_t	ssize;
172 		size_t	fsize;
173 
174 		mutex_enter(&sctp->sctp_reflock);
175 		if (sctp->sctp_condemned || sctp->sctp_state <= SCTPS_LISTEN) {
176 			mutex_exit(&sctp->sctp_reflock);
177 			sctp = list_next(&sctps->sctps_g_list, sctp);
178 			continue;
179 		}
180 		sctp->sctp_refcnt++;
181 		mutex_exit(&sctp->sctp_reflock);
182 		mutex_exit(&sctps->sctps_g_lock);
183 		if (sctp_prev != NULL)
184 			SCTP_REFRELE(sctp_prev);
185 		RUN_SCTP(sctp);
186 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
187 		fsize = sizeof (in6_addr_t) * sctp->sctp_nfaddrs;
188 
189 		slist = kmem_alloc(ssize, cansleep ? KM_SLEEP : KM_NOSLEEP);
190 		flist = kmem_alloc(fsize, cansleep ? KM_SLEEP : KM_NOSLEEP);
191 		if (slist == NULL || flist == NULL) {
192 			WAKE_SCTP(sctp);
193 			if (slist != NULL)
194 				kmem_free(slist, ssize);
195 			if (flist != NULL)
196 				kmem_free(flist, fsize);
197 			SCTP_REFRELE(sctp);
198 			return (1);
199 		}
200 		cl_sctpi.cl_sctpi_version = CL_SCTPI_V1;
201 		sctp_get_saddr_list(sctp, slist, ssize);
202 		sctp_get_faddr_list(sctp, flist, fsize);
203 		cl_sctpi.cl_sctpi_nladdr = sctp->sctp_nsaddrs;
204 		cl_sctpi.cl_sctpi_nfaddr = sctp->sctp_nfaddrs;
205 		cl_sctpi.cl_sctpi_family = sctp->sctp_connp->conn_family;
206 		if (cl_sctpi.cl_sctpi_family == AF_INET)
207 			cl_sctpi.cl_sctpi_ipversion = IPV4_VERSION;
208 		else
209 			cl_sctpi.cl_sctpi_ipversion = IPV6_VERSION;
210 		cl_sctpi.cl_sctpi_state = sctp->sctp_state;
211 		cl_sctpi.cl_sctpi_lport = sctp->sctp_connp->conn_lport;
212 		cl_sctpi.cl_sctpi_fport = sctp->sctp_connp->conn_fport;
213 		cl_sctpi.cl_sctpi_handle = (cl_sctp_handle_t)sctp;
214 		WAKE_SCTP(sctp);
215 		cl_sctpi.cl_sctpi_laddrp = slist;
216 		cl_sctpi.cl_sctpi_faddrp = flist;
217 		if ((*cl_callback)(&cl_sctpi, arg) != 0) {
218 			kmem_free(slist, ssize);
219 			kmem_free(flist, fsize);
220 			SCTP_REFRELE(sctp);
221 			return (1);
222 		}
223 		/* list will be freed by cl_callback */
224 		sctp_prev = sctp;
225 		mutex_enter(&sctps->sctps_g_lock);
226 		sctp = list_next(&sctps->sctps_g_list, sctp);
227 	}
228 	mutex_exit(&sctps->sctps_g_lock);
229 	if (sctp_prev != NULL)
230 		SCTP_REFRELE(sctp_prev);
231 	return (0);
232 }
233 
234 sctp_t *
sctp_conn_match(in6_addr_t ** faddrpp,uint32_t nfaddr,in6_addr_t * laddr,uint32_t ports,zoneid_t zoneid,iaflags_t iraflags,sctp_stack_t * sctps)235 sctp_conn_match(in6_addr_t **faddrpp, uint32_t nfaddr, in6_addr_t *laddr,
236     uint32_t ports, zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
237 {
238 	sctp_tf_t		*tf;
239 	sctp_t			*sctp;
240 	sctp_faddr_t		*fp;
241 	conn_t			*connp;
242 	in6_addr_t		**faddrs, **endaddrs = &faddrpp[nfaddr];
243 
244 	tf = &(sctps->sctps_conn_fanout[SCTP_CONN_HASH(sctps, ports)]);
245 	mutex_enter(&tf->tf_lock);
246 
247 	for (sctp = tf->tf_sctp; sctp != NULL; sctp =
248 	    sctp->sctp_conn_hash_next) {
249 		connp = sctp->sctp_connp;
250 		if (ports != connp->conn_ports)
251 			continue;
252 		if (!(connp->conn_zoneid == zoneid ||
253 		    connp->conn_allzones ||
254 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
255 		    (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
256 		    (iraflags & IRAF_TX_SHARED_ADDR))))
257 			continue;
258 
259 		/* check for faddr match */
260 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
261 			for (faddrs = faddrpp; faddrs < endaddrs; faddrs++) {
262 				if (IN6_ARE_ADDR_EQUAL(*faddrs,
263 				    &fp->sf_faddr)) {
264 					/* check for laddr match */
265 					if (sctp_saddr_lookup(sctp, laddr, 0)
266 					    != NULL) {
267 						SCTP_REFHOLD(sctp);
268 						mutex_exit(&tf->tf_lock);
269 						return (sctp);
270 					}
271 				}
272 			}
273 		}
274 
275 		/* no match; continue to the next in the chain */
276 	}
277 
278 	mutex_exit(&tf->tf_lock);
279 	return (sctp);
280 }
281 
282 static sctp_t *
listen_match(in6_addr_t * laddr,uint32_t ports,zoneid_t zoneid,iaflags_t iraflags,sctp_stack_t * sctps)283 listen_match(in6_addr_t *laddr, uint32_t ports, zoneid_t zoneid,
284     iaflags_t iraflags, sctp_stack_t *sctps)
285 {
286 	sctp_t			*sctp;
287 	sctp_tf_t		*tf;
288 	uint16_t		lport;
289 	conn_t			*connp;
290 
291 	lport = ((uint16_t *)&ports)[1];
292 
293 	tf = &(sctps->sctps_listen_fanout[SCTP_LISTEN_HASH(ntohs(lport))]);
294 	mutex_enter(&tf->tf_lock);
295 
296 	for (sctp = tf->tf_sctp; sctp; sctp = sctp->sctp_listen_hash_next) {
297 		connp = sctp->sctp_connp;
298 		if (lport != connp->conn_lport)
299 			continue;
300 
301 		if (!(connp->conn_zoneid == zoneid ||
302 		    connp->conn_allzones ||
303 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
304 		    (iraflags & IRAF_TX_MAC_EXEMPTABLE) &&
305 		    (iraflags & IRAF_TX_SHARED_ADDR))))
306 			continue;
307 
308 		if (sctp_saddr_lookup(sctp, laddr, 0) != NULL) {
309 			SCTP_REFHOLD(sctp);
310 			goto done;
311 		}
312 		/* no match; continue to the next in the chain */
313 	}
314 
315 done:
316 	mutex_exit(&tf->tf_lock);
317 	return (sctp);
318 }
319 
320 /* called by ipsec_sctp_pol */
321 conn_t *
sctp_find_conn(in6_addr_t * src,in6_addr_t * dst,uint32_t ports,zoneid_t zoneid,iaflags_t iraflags,sctp_stack_t * sctps)322 sctp_find_conn(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
323     zoneid_t zoneid, iaflags_t iraflags, sctp_stack_t *sctps)
324 {
325 	sctp_t *sctp;
326 
327 	sctp = sctp_conn_match(&src, 1, dst, ports, zoneid, iraflags, sctps);
328 	if (sctp == NULL) {
329 		/* Not in conn fanout; check listen fanout */
330 		sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
331 		if (sctp == NULL)
332 			return (NULL);
333 	}
334 	return (sctp->sctp_connp);
335 }
336 
337 /*
338  * This is called from sctp_fanout() with IP header src & dst addresses.
339  * First call sctp_conn_match() to get a match by passing in src & dst
340  * addresses from IP header.
341  * However sctp_conn_match() can return no match under condition such as :
342  * A host can send an INIT ACK from a different address than the INIT was sent
343  * to (in a multi-homed env).
344  * According to RFC4960, a host can send additional addresses in an INIT
345  * ACK chunk.
346  * Therefore extract all addresses from the INIT ACK chunk, pass to
347  * sctp_conn_match() to get a match.
348  */
349 static sctp_t *
sctp_lookup_by_faddrs(mblk_t * mp,sctp_hdr_t * sctph,in6_addr_t * srcp,in6_addr_t * dstp,uint32_t ports,zoneid_t zoneid,sctp_stack_t * sctps,iaflags_t iraflags)350 sctp_lookup_by_faddrs(mblk_t *mp, sctp_hdr_t *sctph, in6_addr_t *srcp,
351     in6_addr_t *dstp, uint32_t ports, zoneid_t zoneid, sctp_stack_t *sctps,
352     iaflags_t iraflags)
353 {
354 	sctp_t			*sctp;
355 	sctp_chunk_hdr_t	*ich;
356 	sctp_init_chunk_t	*iack;
357 	sctp_parm_hdr_t		*ph;
358 	ssize_t			mlen, remaining;
359 	uint16_t		param_type, addr_len = PARM_ADDR4_LEN;
360 	in6_addr_t		src;
361 	in6_addr_t		**addrbuf = NULL, **faddrpp = NULL;
362 	boolean_t		isv4;
363 	uint32_t		totaddr, nfaddr = 0;
364 
365 	/*
366 	 * If we get a match with the passed-in IP header src & dst addresses,
367 	 * quickly return the matched sctp.
368 	 */
369 	if ((sctp = sctp_conn_match(&srcp, 1, dstp, ports, zoneid, iraflags,
370 	    sctps)) != NULL) {
371 		return (sctp);
372 	}
373 
374 	/*
375 	 * Currently sctph is set to NULL in icmp error fanout case
376 	 * (ip_fanout_sctp()).
377 	 * The above sctp_conn_match() should handle that, otherwise
378 	 * return no match found.
379 	 */
380 	if (sctph == NULL)
381 		return (NULL);
382 
383 	/*
384 	 * Do a pullup again in case the previous one was partially successful,
385 	 * so try to complete the pullup here and have a single contiguous
386 	 * chunk for processing of entire INIT ACK chunk below.
387 	 */
388 	if (mp->b_cont != NULL) {
389 		if (pullupmsg(mp, -1) == 0) {
390 			return (NULL);
391 		}
392 	}
393 
394 	mlen = mp->b_wptr - (uchar_t *)(sctph + 1);
395 	if ((ich = sctp_first_chunk((uchar_t *)(sctph + 1), mlen)) == NULL) {
396 		return (NULL);
397 	}
398 
399 	if (ich->sch_id == CHUNK_INIT_ACK) {
400 		remaining = ntohs(ich->sch_len) - sizeof (*ich) -
401 		    sizeof (*iack);
402 		if (remaining < sizeof (*ph)) {
403 			return (NULL);
404 		}
405 
406 		isv4 = (iraflags & IRAF_IS_IPV4) ? B_TRUE : B_FALSE;
407 		if (!isv4)
408 			addr_len = PARM_ADDR6_LEN;
409 		totaddr = remaining/addr_len;
410 
411 		iack = (sctp_init_chunk_t *)(ich + 1);
412 		ph = (sctp_parm_hdr_t *)(iack + 1);
413 
414 		addrbuf = (in6_addr_t **)
415 		    kmem_zalloc(totaddr * sizeof (in6_addr_t *), KM_NOSLEEP);
416 		if (addrbuf == NULL)
417 			return (NULL);
418 		faddrpp = addrbuf;
419 
420 		while (ph != NULL) {
421 			/*
422 			 * According to RFC4960 :
423 			 * All integer fields in an SCTP packet MUST be
424 			 * transmitted in network byte order,
425 			 * unless otherwise stated.
426 			 * Therefore convert the param type to host byte order.
427 			 * Also do not add src address present in IP header
428 			 * as it has already been thru sctp_conn_match() above.
429 			 */
430 			param_type = ntohs(ph->sph_type);
431 			switch (param_type) {
432 			case PARM_ADDR4:
433 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
434 				    (ph + 1), &src);
435 				if (IN6_ARE_ADDR_EQUAL(&src, srcp))
436 					break;
437 				*faddrpp = (in6_addr_t *)
438 				    kmem_zalloc(sizeof (in6_addr_t),
439 				    KM_NOSLEEP);
440 				if (*faddrpp == NULL)
441 					break;
442 				IN6_INADDR_TO_V4MAPPED((struct in_addr *)
443 				    (ph + 1), *faddrpp);
444 				nfaddr++;
445 				faddrpp++;
446 				break;
447 			case PARM_ADDR6:
448 				*faddrpp = (in6_addr_t *)(ph + 1);
449 				if (IN6_ARE_ADDR_EQUAL(*faddrpp, srcp))
450 					break;
451 				nfaddr++;
452 				faddrpp++;
453 				break;
454 			default:
455 				break;
456 			}
457 			ph = sctp_next_parm(ph, &remaining);
458 		}
459 
460 		ASSERT(nfaddr < totaddr);
461 
462 		if (nfaddr > 0) {
463 			sctp = sctp_conn_match(addrbuf, nfaddr, dstp, ports,
464 			    zoneid, iraflags, sctps);
465 
466 			if (isv4) {
467 				for (faddrpp = addrbuf; nfaddr > 0;
468 				    faddrpp++, nfaddr--) {
469 					if (IN6_IS_ADDR_V4MAPPED(*faddrpp)) {
470 						kmem_free(*faddrpp,
471 						    sizeof (in6_addr_t));
472 					}
473 				}
474 			}
475 		}
476 		kmem_free(addrbuf, totaddr * sizeof (in6_addr_t *));
477 	}
478 	return (sctp);
479 }
480 
481 /*
482  * Fanout to a sctp instance.
483  */
484 conn_t *
sctp_fanout(in6_addr_t * src,in6_addr_t * dst,uint32_t ports,ip_recv_attr_t * ira,mblk_t * mp,sctp_stack_t * sctps,sctp_hdr_t * sctph)485 sctp_fanout(in6_addr_t *src, in6_addr_t *dst, uint32_t ports,
486     ip_recv_attr_t *ira, mblk_t *mp, sctp_stack_t *sctps, sctp_hdr_t *sctph)
487 {
488 	zoneid_t zoneid = ira->ira_zoneid;
489 	iaflags_t iraflags = ira->ira_flags;
490 	sctp_t *sctp;
491 
492 	sctp = sctp_lookup_by_faddrs(mp, sctph, src, dst, ports, zoneid,
493 	    sctps, iraflags);
494 	if (sctp == NULL) {
495 		/* Not in conn fanout; check listen fanout */
496 		sctp = listen_match(dst, ports, zoneid, iraflags, sctps);
497 		if (sctp == NULL)
498 			return (NULL);
499 		/*
500 		 * On systems running trusted extensions, check if dst
501 		 * should accept the packet. "IPV6_VERSION" indicates
502 		 * that dst is in 16 byte AF_INET6 format. IPv4-mapped
503 		 * IPv6 addresses are supported.
504 		 */
505 		if ((iraflags & IRAF_SYSTEM_LABELED) &&
506 		    !tsol_receive_local(mp, dst, IPV6_VERSION, ira,
507 		    sctp->sctp_connp)) {
508 			DTRACE_PROBE3(
509 			    tx__ip__log__info__classify__sctp,
510 			    char *,
511 			    "connp(1) could not receive mp(2)",
512 			    conn_t *, sctp->sctp_connp, mblk_t *, mp);
513 			SCTP_REFRELE(sctp);
514 			return (NULL);
515 		}
516 	}
517 	/*
518 	 * For labeled systems, there's no need to check the
519 	 * label here.  It's known to be good as we checked
520 	 * before allowing the connection to become bound.
521 	 */
522 	return (sctp->sctp_connp);
523 }
524 
525 /*
526  * Fanout for ICMP errors for SCTP
527  * The caller puts <fport, lport> in the ports parameter.
528  */
529 void
ip_fanout_sctp(mblk_t * mp,ipha_t * ipha,ip6_t * ip6h,uint32_t ports,ip_recv_attr_t * ira)530 ip_fanout_sctp(mblk_t *mp, ipha_t *ipha, ip6_t *ip6h, uint32_t ports,
531     ip_recv_attr_t *ira)
532 {
533 	sctp_t		*sctp;
534 	conn_t		*connp;
535 	in6_addr_t	map_src, map_dst;
536 	in6_addr_t	*src, *dst;
537 	boolean_t	secure;
538 	ill_t		*ill = ira->ira_ill;
539 	ip_stack_t	*ipst = ill->ill_ipst;
540 	netstack_t	*ns = ipst->ips_netstack;
541 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
542 	sctp_stack_t	*sctps = ns->netstack_sctp;
543 	iaflags_t	iraflags = ira->ira_flags;
544 	ill_t		*rill = ira->ira_rill;
545 
546 	ASSERT(iraflags & IRAF_ICMP_ERROR);
547 
548 	secure = iraflags & IRAF_IPSEC_SECURE;
549 
550 	/* Assume IP provides aligned packets - otherwise toss */
551 	if (!OK_32PTR(mp->b_rptr)) {
552 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
553 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
554 		freemsg(mp);
555 		return;
556 	}
557 
558 	if (!(iraflags & IRAF_IS_IPV4)) {
559 		src = &ip6h->ip6_src;
560 		dst = &ip6h->ip6_dst;
561 	} else {
562 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_src, &map_src);
563 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &map_dst);
564 		src = &map_src;
565 		dst = &map_dst;
566 	}
567 	connp = sctp_fanout(src, dst, ports, ira, mp, sctps, NULL);
568 	if (connp == NULL) {
569 		ip_fanout_sctp_raw(mp, ipha, ip6h, ports, ira);
570 		return;
571 	}
572 	sctp = CONN2SCTP(connp);
573 
574 	if (connp->conn_min_ttl != 0 && connp->conn_min_ttl > ira->ira_ttl) {
575 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
576 		ip_drop_input("ipIfStatsInDiscards", mp, ill);
577 		SCTP_REFRELE(sctp);
578 		freemsg(mp);
579 		return;
580 	}
581 
582 	/*
583 	 * We check some fields in conn_t without holding a lock.
584 	 * This should be fine.
585 	 */
586 	if (((iraflags & IRAF_IS_IPV4) ?
587 	    CONN_INBOUND_POLICY_PRESENT(connp, ipss) :
588 	    CONN_INBOUND_POLICY_PRESENT_V6(connp, ipss)) ||
589 	    secure) {
590 		mp = ipsec_check_inbound_policy(mp, connp, ipha,
591 		    ip6h, ira);
592 		if (mp == NULL) {
593 			SCTP_REFRELE(sctp);
594 			return;
595 		}
596 	}
597 
598 	ira->ira_ill = ira->ira_rill = NULL;
599 
600 	mutex_enter(&sctp->sctp_lock);
601 	if (sctp->sctp_running) {
602 		sctp_add_recvq(sctp, mp, B_FALSE, ira);
603 		mutex_exit(&sctp->sctp_lock);
604 	} else {
605 		sctp->sctp_running = B_TRUE;
606 		mutex_exit(&sctp->sctp_lock);
607 
608 		mutex_enter(&sctp->sctp_recvq_lock);
609 		if (sctp->sctp_recvq != NULL) {
610 			sctp_add_recvq(sctp, mp, B_TRUE, ira);
611 			mutex_exit(&sctp->sctp_recvq_lock);
612 			WAKE_SCTP(sctp);
613 		} else {
614 			mutex_exit(&sctp->sctp_recvq_lock);
615 			if (ira->ira_flags & IRAF_ICMP_ERROR) {
616 				sctp_icmp_error(sctp, mp);
617 			} else {
618 				sctp_input_data(sctp, mp, ira);
619 			}
620 			WAKE_SCTP(sctp);
621 		}
622 	}
623 	SCTP_REFRELE(sctp);
624 	ira->ira_ill = ill;
625 	ira->ira_rill = rill;
626 }
627 
628 void
sctp_conn_hash_remove(sctp_t * sctp)629 sctp_conn_hash_remove(sctp_t *sctp)
630 {
631 	sctp_tf_t *tf = sctp->sctp_conn_tfp;
632 
633 	if (!tf) {
634 		return;
635 	}
636 	/*
637 	 * On a clustered note send this notification to the clustering
638 	 * subsystem.
639 	 */
640 	if (cl_sctp_disconnect != NULL) {
641 		(*cl_sctp_disconnect)(sctp->sctp_connp->conn_family,
642 		    (cl_sctp_handle_t)sctp);
643 	}
644 
645 	mutex_enter(&tf->tf_lock);
646 	ASSERT(tf->tf_sctp);
647 	if (tf->tf_sctp == sctp) {
648 		tf->tf_sctp = sctp->sctp_conn_hash_next;
649 		if (sctp->sctp_conn_hash_next) {
650 			ASSERT(tf->tf_sctp->sctp_conn_hash_prev == sctp);
651 			tf->tf_sctp->sctp_conn_hash_prev = NULL;
652 		}
653 	} else {
654 		ASSERT(sctp->sctp_conn_hash_prev);
655 		ASSERT(sctp->sctp_conn_hash_prev->sctp_conn_hash_next == sctp);
656 		sctp->sctp_conn_hash_prev->sctp_conn_hash_next =
657 		    sctp->sctp_conn_hash_next;
658 
659 		if (sctp->sctp_conn_hash_next) {
660 			ASSERT(sctp->sctp_conn_hash_next->sctp_conn_hash_prev
661 			    == sctp);
662 			sctp->sctp_conn_hash_next->sctp_conn_hash_prev =
663 			    sctp->sctp_conn_hash_prev;
664 		}
665 	}
666 	sctp->sctp_conn_hash_next = NULL;
667 	sctp->sctp_conn_hash_prev = NULL;
668 	sctp->sctp_conn_tfp = NULL;
669 	mutex_exit(&tf->tf_lock);
670 }
671 
672 void
sctp_conn_hash_insert(sctp_tf_t * tf,sctp_t * sctp,int caller_holds_lock)673 sctp_conn_hash_insert(sctp_tf_t *tf, sctp_t *sctp, int caller_holds_lock)
674 {
675 	if (sctp->sctp_conn_tfp) {
676 		sctp_conn_hash_remove(sctp);
677 	}
678 
679 	if (!caller_holds_lock) {
680 		mutex_enter(&tf->tf_lock);
681 	} else {
682 		ASSERT(MUTEX_HELD(&tf->tf_lock));
683 	}
684 
685 	sctp->sctp_conn_hash_next = tf->tf_sctp;
686 	if (tf->tf_sctp) {
687 		tf->tf_sctp->sctp_conn_hash_prev = sctp;
688 	}
689 	sctp->sctp_conn_hash_prev = NULL;
690 	tf->tf_sctp = sctp;
691 	sctp->sctp_conn_tfp = tf;
692 	if (!caller_holds_lock) {
693 		mutex_exit(&tf->tf_lock);
694 	}
695 }
696 
697 void
sctp_listen_hash_remove(sctp_t * sctp)698 sctp_listen_hash_remove(sctp_t *sctp)
699 {
700 	sctp_tf_t *tf = sctp->sctp_listen_tfp;
701 	conn_t	*connp = sctp->sctp_connp;
702 
703 	if (!tf) {
704 		return;
705 	}
706 	/*
707 	 * On a clustered note send this notification to the clustering
708 	 * subsystem.
709 	 */
710 	if (cl_sctp_unlisten != NULL) {
711 		uchar_t	*slist;
712 		ssize_t	ssize;
713 
714 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
715 		slist = kmem_alloc(ssize, KM_SLEEP);
716 		sctp_get_saddr_list(sctp, slist, ssize);
717 		(*cl_sctp_unlisten)(connp->conn_family, slist,
718 		    sctp->sctp_nsaddrs, connp->conn_lport);
719 		/* list will be freed by the clustering module */
720 	}
721 
722 	mutex_enter(&tf->tf_lock);
723 	ASSERT(tf->tf_sctp);
724 	if (tf->tf_sctp == sctp) {
725 		tf->tf_sctp = sctp->sctp_listen_hash_next;
726 		if (sctp->sctp_listen_hash_next != NULL) {
727 			ASSERT(tf->tf_sctp->sctp_listen_hash_prev == sctp);
728 			tf->tf_sctp->sctp_listen_hash_prev = NULL;
729 		}
730 	} else {
731 		ASSERT(sctp->sctp_listen_hash_prev);
732 		ASSERT(sctp->sctp_listen_hash_prev->sctp_listen_hash_next ==
733 		    sctp);
734 		ASSERT(sctp->sctp_listen_hash_next == NULL ||
735 		    sctp->sctp_listen_hash_next->sctp_listen_hash_prev == sctp);
736 
737 		sctp->sctp_listen_hash_prev->sctp_listen_hash_next =
738 		    sctp->sctp_listen_hash_next;
739 
740 		if (sctp->sctp_listen_hash_next != NULL) {
741 			sctp_t *next = sctp->sctp_listen_hash_next;
742 
743 			ASSERT(next->sctp_listen_hash_prev == sctp);
744 			next->sctp_listen_hash_prev =
745 			    sctp->sctp_listen_hash_prev;
746 		}
747 	}
748 	sctp->sctp_listen_hash_next = NULL;
749 	sctp->sctp_listen_hash_prev = NULL;
750 	sctp->sctp_listen_tfp = NULL;
751 	mutex_exit(&tf->tf_lock);
752 }
753 
754 void
sctp_listen_hash_insert(sctp_tf_t * tf,sctp_t * sctp)755 sctp_listen_hash_insert(sctp_tf_t *tf, sctp_t *sctp)
756 {
757 	conn_t	*connp = sctp->sctp_connp;
758 
759 	if (sctp->sctp_listen_tfp) {
760 		sctp_listen_hash_remove(sctp);
761 	}
762 
763 	mutex_enter(&tf->tf_lock);
764 	sctp->sctp_listen_hash_next = tf->tf_sctp;
765 	if (tf->tf_sctp) {
766 		tf->tf_sctp->sctp_listen_hash_prev = sctp;
767 	}
768 	sctp->sctp_listen_hash_prev = NULL;
769 	tf->tf_sctp = sctp;
770 	sctp->sctp_listen_tfp = tf;
771 	mutex_exit(&tf->tf_lock);
772 	/*
773 	 * On a clustered note send this notification to the clustering
774 	 * subsystem.
775 	 */
776 	if (cl_sctp_listen != NULL) {
777 		uchar_t	*slist;
778 		ssize_t	ssize;
779 
780 		ssize = sizeof (in6_addr_t) * sctp->sctp_nsaddrs;
781 		slist = kmem_alloc(ssize, KM_SLEEP);
782 		sctp_get_saddr_list(sctp, slist, ssize);
783 		(*cl_sctp_listen)(connp->conn_family, slist,
784 		    sctp->sctp_nsaddrs, connp->conn_lport);
785 		/* list will be freed by the clustering module */
786 	}
787 }
788 
789 /*
790  * Hash list insertion routine for sctp_t structures.
791  * Inserts entries with the ones bound to a specific IP address first
792  * followed by those bound to INADDR_ANY.
793  */
794 void
sctp_bind_hash_insert(sctp_tf_t * tbf,sctp_t * sctp,int caller_holds_lock)795 sctp_bind_hash_insert(sctp_tf_t *tbf, sctp_t *sctp, int caller_holds_lock)
796 {
797 	sctp_t	**sctpp;
798 	sctp_t	*sctpnext;
799 
800 	if (sctp->sctp_ptpbhn != NULL) {
801 		ASSERT(!caller_holds_lock);
802 		sctp_bind_hash_remove(sctp);
803 	}
804 	sctpp = &tbf->tf_sctp;
805 	if (!caller_holds_lock) {
806 		mutex_enter(&tbf->tf_lock);
807 	} else {
808 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
809 	}
810 	sctpnext = sctpp[0];
811 	if (sctpnext) {
812 		sctpnext->sctp_ptpbhn = &sctp->sctp_bind_hash;
813 	}
814 	sctp->sctp_bind_hash = sctpnext;
815 	sctp->sctp_ptpbhn = sctpp;
816 	sctpp[0] = sctp;
817 	/* For sctp_*_hash_remove */
818 	sctp->sctp_bind_lockp = &tbf->tf_lock;
819 	if (!caller_holds_lock)
820 		mutex_exit(&tbf->tf_lock);
821 }
822 
823 /*
824  * Hash list removal routine for sctp_t structures.
825  */
826 void
sctp_bind_hash_remove(sctp_t * sctp)827 sctp_bind_hash_remove(sctp_t *sctp)
828 {
829 	sctp_t	*sctpnext;
830 	kmutex_t *lockp;
831 
832 	lockp = sctp->sctp_bind_lockp;
833 
834 	if (sctp->sctp_ptpbhn == NULL)
835 		return;
836 
837 	ASSERT(lockp != NULL);
838 	mutex_enter(lockp);
839 	if (sctp->sctp_ptpbhn) {
840 		sctpnext = sctp->sctp_bind_hash;
841 		if (sctpnext) {
842 			sctpnext->sctp_ptpbhn = sctp->sctp_ptpbhn;
843 			sctp->sctp_bind_hash = NULL;
844 		}
845 		*sctp->sctp_ptpbhn = sctpnext;
846 		sctp->sctp_ptpbhn = NULL;
847 	}
848 	mutex_exit(lockp);
849 	sctp->sctp_bind_lockp = NULL;
850 }
851 
852 /*
853  * Similar to but different from sctp_conn_match().
854  *
855  * Matches sets of addresses as follows: if the argument addr set is
856  * a complete subset of the corresponding addr set in the sctp_t, it
857  * is a match.
858  *
859  * Caller must hold tf->tf_lock.
860  *
861  * Returns with a SCTP_REFHOLD sctp structure. Caller must do a SCTP_REFRELE.
862  */
863 sctp_t *
sctp_lookup(sctp_t * sctp1,in6_addr_t * faddr,sctp_tf_t * tf,uint32_t * ports,int min_state)864 sctp_lookup(sctp_t *sctp1, in6_addr_t *faddr, sctp_tf_t *tf, uint32_t *ports,
865     int min_state)
866 {
867 	sctp_t *sctp;
868 	sctp_faddr_t *fp;
869 
870 	ASSERT(MUTEX_HELD(&tf->tf_lock));
871 
872 	for (sctp = tf->tf_sctp; sctp != NULL;
873 	    sctp = sctp->sctp_conn_hash_next) {
874 		if (*ports != sctp->sctp_connp->conn_ports ||
875 		    sctp->sctp_state < min_state) {
876 			continue;
877 		}
878 
879 		/* check for faddr match */
880 		for (fp = sctp->sctp_faddrs; fp != NULL; fp = fp->sf_next) {
881 			if (IN6_ARE_ADDR_EQUAL(faddr, &fp->sf_faddr)) {
882 				break;
883 			}
884 		}
885 
886 		if (fp == NULL) {
887 			/* no faddr match; keep looking */
888 			continue;
889 		}
890 
891 		/*
892 		 * There is an existing association with the same peer
893 		 * address.  So now we need to check if our local address
894 		 * set overlaps with the one of the existing association.
895 		 * If they overlap, we should return it.
896 		 */
897 		if (sctp_compare_saddrs(sctp1, sctp) <= SCTP_ADDR_OVERLAP) {
898 			goto done;
899 		}
900 
901 		/* no match; continue searching */
902 	}
903 
904 done:
905 	if (sctp != NULL) {
906 		SCTP_REFHOLD(sctp);
907 	}
908 	return (sctp);
909 }
910