1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/stream.h>
32 #include <sys/cmn_err.h>
33 #include <sys/kmem.h>
34 #define	_SUN_TPI_VERSION 2
35 #include <sys/tihdr.h>
36 #include <sys/stropts.h>
37 #include <sys/strsubr.h>
38 #include <sys/socket.h>
39 
40 #include <netinet/in.h>
41 #include <netinet/ip6.h>
42 
43 #include <inet/common.h>
44 #include <inet/ip.h>
45 #include <inet/ip6.h>
46 #include <inet/ipclassifier.h>
47 #include <inet/ipsec_impl.h>
48 
49 #include "sctp_impl.h"
50 #include "sctp_addr.h"
51 
52 /*
53  * Common accept code.  Called by sctp_conn_request.
54  * cr_pkt is the INIT / INIT ACK packet.
55  */
56 static int
57 sctp_accept_comm(sctp_t *listener, sctp_t *acceptor, mblk_t *cr_pkt,
58     uint_t ip_hdr_len, sctp_init_chunk_t *iack)
59 {
60 
61 	sctp_hdr_t		*sctph;
62 	sctp_chunk_hdr_t	*ich;
63 	sctp_init_chunk_t	*init;
64 	int			err;
65 	uint_t			sctp_options;
66 
67 	sctph = (sctp_hdr_t *)(cr_pkt->b_rptr + ip_hdr_len);
68 	ASSERT(OK_32PTR(sctph));
69 
70 	acceptor->sctp_lport = listener->sctp_lport;
71 	acceptor->sctp_fport = sctph->sh_sport;
72 
73 	ich = (sctp_chunk_hdr_t *)(iack + 1);
74 	init = (sctp_init_chunk_t *)(ich + 1);
75 
76 	/* acceptor isn't in any fanouts yet, so don't need to hold locks */
77 	ASSERT(acceptor->sctp_faddrs == NULL);
78 	err = sctp_get_addrparams(acceptor, listener, cr_pkt, ich,
79 	    &sctp_options);
80 	if (err != 0)
81 		return (err);
82 
83 	if ((sctp_options & SCTP_PRSCTP_OPTION) &&
84 	    listener->sctp_prsctp_aware && sctp_prsctp_enabled) {
85 		acceptor->sctp_prsctp_aware = B_TRUE;
86 	} else {
87 		acceptor->sctp_prsctp_aware = B_FALSE;
88 	}
89 	/* The new sctp_t is fully bound now. */
90 	acceptor->sctp_connp->conn_fully_bound = B_TRUE;
91 
92 	sctp_set_hdraddrs(acceptor);
93 
94 	/* Get  initial TSNs */
95 	acceptor->sctp_ltsn = ntohl(iack->sic_inittsn);
96 	acceptor->sctp_recovery_tsn = acceptor->sctp_lastack_rxd =
97 	    acceptor->sctp_ltsn - 1;
98 	acceptor->sctp_adv_pap = acceptor->sctp_lastack_rxd;
99 	/* Serial numbers are initialized to the same value as the TSNs */
100 	acceptor->sctp_lcsn = acceptor->sctp_ltsn;
101 
102 	if (!sctp_initialize_params(acceptor, init, iack))
103 		return (ENOMEM);
104 
105 	/*
106 	 * Copy sctp_secret from the listener in case we need to validate
107 	 * a possibly delayed cookie.
108 	 */
109 	bcopy(listener->sctp_secret, acceptor->sctp_secret, SCTP_SECRET_LEN);
110 	bcopy(listener->sctp_old_secret, acceptor->sctp_old_secret,
111 	    SCTP_SECRET_LEN);
112 	acceptor->sctp_last_secret_update = lbolt64;
113 
114 	/*
115 	 * After acceptor is inserted in the hash list, it can be found.
116 	 * So we need to lock it here.
117 	 */
118 	RUN_SCTP(acceptor);
119 
120 	sctp_conn_hash_insert(&sctp_conn_fanout[
121 	    SCTP_CONN_HASH(acceptor->sctp_ports)], acceptor, 0);
122 	sctp_bind_hash_insert(&sctp_bind_fanout[
123 	    SCTP_BIND_HASH(ntohs(acceptor->sctp_lport))], acceptor, 0);
124 
125 	/*
126 	 * No need to check for multicast destination since ip will only pass
127 	 * up multicasts to those that have expressed interest
128 	 * TODO: what about rejecting broadcasts?
129 	 * Also check that source is not a multicast or broadcast address.
130 	 */
131 	/* XXXSCTP */
132 	acceptor->sctp_state = SCTPS_ESTABLISHED;
133 	acceptor->sctp_assoc_start_time = (uint32_t)lbolt;
134 	/*
135 	 * listener->sctp_rwnd should be the default window size or a
136 	 * window size changed via SO_RCVBUF option.
137 	 */
138 	acceptor->sctp_rwnd = MSS_ROUNDUP(listener->sctp_rwnd,
139 	    (acceptor->sctp_mss - sizeof (sctp_data_hdr_t)));
140 
141 	bcopy(&listener->sctp_upcalls, &acceptor->sctp_upcalls,
142 	    sizeof (sctp_upcalls_t));
143 
144 	return (0);
145 }
146 
147 /* Process the COOKIE packet, mp, directed at the listener 'sctp' */
148 sctp_t *
149 sctp_conn_request(sctp_t *sctp, mblk_t *mp, uint_t ifindex, uint_t ip_hdr_len,
150     sctp_init_chunk_t *iack, mblk_t *ipsec_mp)
151 {
152 	sctp_t	*eager;
153 	uint_t	ipvers;
154 	ip6_t	*ip6h;
155 	int	err;
156 	conn_t	*connp, *econnp;
157 
158 	/*
159 	 * No need to check for duplicate as this is the listener
160 	 * and we are holding the lock.  This means that no new
161 	 * connection can be created out of it.  And since the
162 	 * fanout already done cannot find a match, it means that
163 	 * there is no duplicate.
164 	 */
165 	ipvers = IPH_HDR_VERSION(mp->b_rptr);
166 	ASSERT(ipvers == IPV6_VERSION || ipvers == IPV4_VERSION);
167 	ASSERT(OK_32PTR(mp->b_rptr));
168 
169 	if ((eager = sctp_create_eager(sctp)) == NULL) {
170 		return (NULL);
171 	}
172 
173 	if (ipvers != IPV4_VERSION) {
174 		ip6h = (ip6_t *)mp->b_rptr;
175 		if (IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))
176 			eager->sctp_linklocal = 1;
177 		/*
178 		 * Record ifindex (might be zero) to tie this connection to
179 		 * that interface if either the listener was bound or
180 		 * if the connection is using link-local addresses.
181 		 */
182 		if (sctp->sctp_bound_if == ifindex ||
183 		    IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))
184 			eager->sctp_bound_if = ifindex;
185 		/*
186 		 * XXX broken. bound_if is always overwritten by statement
187 		 * below. What is the right thing to do here?
188 		 */
189 		eager->sctp_bound_if = sctp->sctp_bound_if;
190 	}
191 
192 	connp = sctp->sctp_connp;
193 	econnp = eager->sctp_connp;
194 
195 	if (connp->conn_policy != NULL) {
196 		ipsec_in_t *ii;
197 
198 		ASSERT(ipsec_mp != NULL);
199 		ii = (ipsec_in_t *)(ipsec_mp->b_rptr);
200 		ASSERT(ii->ipsec_in_policy == NULL);
201 		IPPH_REFHOLD(connp->conn_policy);
202 		ii->ipsec_in_policy = connp->conn_policy;
203 
204 		ipsec_mp->b_datap->db_type = IPSEC_POLICY_SET;
205 		if (!ip_bind_ipsec_policy_set(econnp, ipsec_mp)) {
206 			sctp_close_eager(eager);
207 			BUMP_MIB(&sctp_mib, sctpListenDrop);
208 			return (NULL);
209 		}
210 	}
211 
212 	if (ipsec_mp != NULL) {
213 		/*
214 		 * XXX need to fix the cached policy issue here.
215 		 * We temporarily set the conn_src/conn_rem here so
216 		 * that IPsec can use it for the latched policy
217 		 * selector.  This is obvioursly wrong as SCTP can
218 		 * use different addresses...
219 		 */
220 		if (ipvers == IPV4_VERSION) {
221 			ipha_t	*ipha;
222 
223 			ipha = (ipha_t *)mp->b_rptr;
224 			econnp->conn_src = ipha->ipha_dst;
225 			econnp->conn_rem = ipha->ipha_src;
226 		} else {
227 			econnp->conn_srcv6 = ip6h->ip6_dst;
228 			econnp->conn_remv6 = ip6h->ip6_src;
229 		}
230 	}
231 	if (ipsec_conn_cache_policy(econnp, ipvers == IPV4_VERSION) != 0) {
232 		sctp_close_eager(eager);
233 		BUMP_MIB(&sctp_mib, sctpListenDrop);
234 		return (NULL);
235 	}
236 
237 	err = sctp_accept_comm(sctp, eager, mp, ip_hdr_len, iack);
238 	if (err) {
239 		sctp_close_eager(eager);
240 		BUMP_MIB(&sctp_mib, sctpListenDrop);
241 		return (NULL);
242 	}
243 
244 	/* Connection established, so send up the conn_ind */
245 	if ((eager->sctp_ulpd = sctp->sctp_ulp_newconn(sctp->sctp_ulpd,
246 	    eager)) == NULL) {
247 		sctp_close_eager(eager);
248 		BUMP_MIB(&sctp_mib, sctpListenDrop);
249 		return (NULL);
250 	}
251 	ASSERT(SCTP_IS_DETACHED(eager));
252 	eager->sctp_detached = B_FALSE;
253 	if (eager->sctp_family == AF_INET) {
254 		eager->sctp_ulp_prop(eager->sctp_ulpd,
255 		    sctp_wroff_xtra + sizeof (sctp_data_hdr_t) +
256 		    sctp->sctp_hdr_len, strmsgsz);
257 	} else {
258 		eager->sctp_ulp_prop(eager->sctp_ulpd,
259 		    sctp_wroff_xtra + sizeof (sctp_data_hdr_t) +
260 		    sctp->sctp_hdr6_len, strmsgsz);
261 	}
262 	return (eager);
263 }
264 
265 /*
266  * Connect to a peer - this function inserts the sctp in the
267  * bind and conn fanouts, sends the INIT, and replies to the client
268  * with an OK ack.
269  */
270 /* ARGSUSED */
271 int
272 sctp_connect(sctp_t *sctp, const struct sockaddr *dst, uint32_t addrlen)
273 {
274 	sin_t		*sin;
275 	sin6_t		*sin6;
276 	in_port_t	lport;
277 	in6_addr_t	dstaddr;
278 	in_port_t	dstport;
279 	mblk_t		*initmp;
280 	sctp_tf_t	*tbf;
281 	sctp_t		*lsctp;
282 	char		buf[INET6_ADDRSTRLEN];
283 	int		sleep = sctp->sctp_cansleep ? KM_SLEEP : KM_NOSLEEP;
284 	int 		hdrlen;
285 	ip6_rthdr_t	*rth;
286 	sctp_faddr_t	*cur_fp;
287 
288 	/*
289 	 * Determine packet type based on type of address passed in
290 	 * the request should contain an IPv4 or IPv6 address.
291 	 * Make sure that address family matches the type of
292 	 * family of the the address passed down
293 	 */
294 	if (addrlen < sizeof (sin_t)) {
295 		return (EINVAL);
296 	}
297 	switch (dst->sa_family) {
298 	case AF_INET:
299 		sin = (sin_t *)dst;
300 
301 		/* Check for attempt to connect to non-unicast */
302 		if (IN_MULTICAST(sin->sin_addr.s_addr) ||
303 		    (sin->sin_addr.s_addr == INADDR_BROADCAST)) {
304 			ip0dbg(("sctp_connect: non-unicast\n"));
305 			return (EINVAL);
306 		}
307 		if (sctp->sctp_connp->conn_ipv6_v6only)
308 			return (EAFNOSUPPORT);
309 
310 		/* convert to v6 mapped */
311 		/* Check for attempt to connect to INADDR_ANY */
312 		if (sin->sin_addr.s_addr == INADDR_ANY)  {
313 			struct in_addr v4_addr;
314 			/*
315 			 * SunOS 4.x and 4.3 BSD allow an application
316 			 * to connect a TCP socket to INADDR_ANY.
317 			 * When they do this, the kernel picks the
318 			 * address of one interface and uses it
319 			 * instead.  The kernel usually ends up
320 			 * picking the address of the loopback
321 			 * interface.  This is an undocumented feature.
322 			 * However, we provide the same thing here
323 			 * in case any TCP apps that use this feature
324 			 * are being ported to SCTP...
325 			 */
326 			v4_addr.s_addr = htonl(INADDR_LOOPBACK);
327 			IN6_INADDR_TO_V4MAPPED(&v4_addr, &dstaddr);
328 		} else {
329 			IN6_INADDR_TO_V4MAPPED(&sin->sin_addr, &dstaddr);
330 		}
331 		dstport = sin->sin_port;
332 		if (sin->sin_family == AF_INET) {
333 			hdrlen = sctp->sctp_hdr_len;
334 		} else {
335 			hdrlen = sctp->sctp_hdr6_len;
336 		}
337 		break;
338 	case AF_INET6:
339 		sin6 = (sin6_t *)dst;
340 		/* Check for attempt to connect to non-unicast. */
341 		if ((addrlen < sizeof (sin6_t)) ||
342 		    IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
343 			ip0dbg(("sctp_connect: non-unicast\n"));
344 			return (EINVAL);
345 		}
346 		if (sctp->sctp_connp->conn_ipv6_v6only &&
347 		    IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
348 			return (EAFNOSUPPORT);
349 		}
350 		/* check for attempt to connect to unspec */
351 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
352 			dstaddr = ipv6_loopback;
353 		} else {
354 			dstaddr = sin6->sin6_addr;
355 			if (IN6_IS_ADDR_LINKLOCAL(&dstaddr))
356 				sctp->sctp_linklocal = 1;
357 		}
358 		dstport = sin6->sin6_port;
359 		hdrlen = sctp->sctp_hdr6_len;
360 		break;
361 	default:
362 		dprint(1, ("sctp_connect: unknown family %d\n",
363 			dst->sa_family));
364 		return (EAFNOSUPPORT);
365 	}
366 
367 	(void) inet_ntop(AF_INET6, &dstaddr, buf, sizeof (buf));
368 	dprint(1, ("sctp_connect: attempting connect to %s...\n", buf));
369 
370 	RUN_SCTP(sctp);
371 
372 	if (sctp->sctp_family != dst->sa_family) {
373 		WAKE_SCTP(sctp);
374 		return (EINVAL);
375 	}
376 
377 	switch (sctp->sctp_state) {
378 	case SCTPS_IDLE: {
379 		int	err;
380 		/*
381 		 * We support a quick connect capability here, allowing
382 		 * clients to transition directly from IDLE to COOKIE_WAIT.
383 		 * sctp_bindi will pick an unused port, insert the connection
384 		 * in the bind hash and transition to BOUND state. SCTP
385 		 * picks and uses what it considers the optimal local address
386 		 * set (just like specifiying INADDR_ANY to bind()).
387 		 */
388 		dprint(1, ("sctp_connect: idle, attempting bind...\n"));
389 		ASSERT(sctp->sctp_nsaddrs == 0);
390 
391 		err = sctp_dup_saddrs(NULL, sctp, sleep);
392 		if (err != 0) {
393 			WAKE_SCTP(sctp);
394 			return (err);
395 		}
396 		lport = sctp_update_next_port(sctp_next_port_to_try);
397 		lport = sctp_bindi(sctp, lport, 0, 0);
398 		if (lport == 0) {
399 			WAKE_SCTP(sctp);
400 			sctp_free_saddrs(sctp);
401 			return (EADDRNOTAVAIL);
402 		}
403 		sctp->sctp_bound_to_all = 1;
404 		/* FALLTHRU */
405 	}
406 
407 	case SCTPS_BOUND:
408 		ASSERT(sctp->sctp_nsaddrs > 0);
409 
410 		/* do the connect */
411 		/* XXX check for attempt to connect to self */
412 		sctp->sctp_fport = dstport;
413 
414 		ASSERT(sctp->sctp_iphc);
415 		ASSERT(sctp->sctp_iphc6);
416 
417 		/*
418 		 * Don't allow this connection to completely duplicate
419 		 * an existing connection.
420 		 *
421 		 * Ensure that the duplicate check and insertion is atomic.
422 		 */
423 		sctp_conn_hash_remove(sctp);
424 		tbf = &sctp_conn_fanout[SCTP_CONN_HASH(sctp->sctp_ports)];
425 		mutex_enter(&tbf->tf_lock);
426 		lsctp = sctp_lookup(sctp, &dstaddr, tbf, &sctp->sctp_ports,
427 		    SCTPS_COOKIE_WAIT);
428 		if (lsctp != NULL) {
429 			/* found a duplicate connection */
430 			mutex_exit(&tbf->tf_lock);
431 			SCTP_REFRELE(lsctp);
432 			WAKE_SCTP(sctp);
433 			return (EADDRINUSE);
434 		}
435 		/*
436 		 * OK; set up the peer addr (this may grow after we get
437 		 * the INIT ACK from the peer with additional addresses).
438 		 */
439 		if (sctp_add_faddr(sctp, &dstaddr, sleep) < 0) {
440 			mutex_exit(&tbf->tf_lock);
441 			WAKE_SCTP(sctp);
442 			return (ENOMEM);
443 		}
444 		/* No valid src addr, return. */
445 		if (sctp->sctp_faddrs->state == SCTP_FADDRS_UNREACH) {
446 			mutex_exit(&tbf->tf_lock);
447 			WAKE_SCTP(sctp);
448 			return (EADDRNOTAVAIL);
449 		}
450 		sctp->sctp_primary = sctp->sctp_faddrs;
451 		sctp->sctp_current = sctp->sctp_faddrs;
452 		cur_fp = sctp->sctp_current;
453 		sctp->sctp_mss = sctp->sctp_faddrs->sfa_pmss;
454 		sctp_conn_hash_insert(tbf, sctp, 1);
455 		mutex_exit(&tbf->tf_lock);
456 
457 		/* initialize composite headers */
458 		sctp_set_hdraddrs(sctp);
459 
460 		/*
461 		 * Massage a routing header (if present) putting the first hop
462 		 * in ip6_dst.
463 		 */
464 		rth = ip_find_rthdr_v6(sctp->sctp_ip6h,
465 		    (uint8_t *)sctp->sctp_sctph6);
466 		if (rth != NULL)
467 			(void) ip_massage_options_v6(sctp->sctp_ip6h, rth);
468 
469 		/*
470 		 * Turn off the don't fragment bit on the (only) faddr,
471 		 * so that if one of the messages exchanged during the
472 		 * initialization sequence exceeds the path mtu, it
473 		 * at least has a chance to get there. SCTP does no
474 		 * fragmentation of initialization messages.  The DF bit
475 		 * will be turned on again in sctp_send_cookie_echo()
476 		 * (but the cookie echo will still be sent with the df bit
477 		 * off).
478 		 */
479 		cur_fp->df = B_FALSE;
480 
481 		/* Mark this address as alive */
482 		cur_fp->state = SCTP_FADDRS_ALIVE;
483 
484 		/* This sctp_t is fully bound now. */
485 		sctp->sctp_connp->conn_fully_bound = B_TRUE;
486 
487 		/* Send the INIT to the peer */
488 		SCTP_FADDR_TIMER_RESTART(sctp, cur_fp, cur_fp->rto);
489 		/*
490 		 * sctp_init_mp() could result in modifying the source
491 		 * address list, so take the hash lock.
492 		 */
493 		mutex_enter(&tbf->tf_lock);
494 		initmp = sctp_init_mp(sctp);
495 		if (initmp == NULL) {
496 			mutex_exit(&tbf->tf_lock);
497 			WAKE_SCTP(sctp);
498 			/* let timer retry */
499 			return (0);
500 		}
501 		mutex_exit(&tbf->tf_lock);
502 		sctp->sctp_state = SCTPS_COOKIE_WAIT;
503 		WAKE_SCTP(sctp);
504 		/* OK to call IP_PUT() here instead of sctp_add_sendq(). */
505 		CONN_INC_REF(sctp->sctp_connp);
506 		initmp->b_flag |= MSGHASREF;
507 		IP_PUT(initmp, sctp->sctp_connp, sctp->sctp_current->isv4);
508 		BUMP_LOCAL(sctp->sctp_opkts);
509 
510 		sctp->sctp_ulp_prop(sctp->sctp_ulpd,
511 		    sctp_wroff_xtra + hdrlen + sizeof (sctp_data_hdr_t), 0);
512 
513 		return (0);
514 	default:
515 		ip0dbg(("sctp_connect: invalid state. %d\n", sctp->sctp_state));
516 		WAKE_SCTP(sctp);
517 		return (EINVAL);
518 	}
519 }
520