xref: /illumos-gate/usr/src/uts/common/inet/tcp/tcp_bind.c (revision bbf21555)
1721fffe3SKacheong Poon /*
2721fffe3SKacheong Poon  * CDDL HEADER START
3721fffe3SKacheong Poon  *
4721fffe3SKacheong Poon  * The contents of this file are subject to the terms of the
5721fffe3SKacheong Poon  * Common Development and Distribution License (the "License").
6721fffe3SKacheong Poon  * You may not use this file except in compliance with the License.
7721fffe3SKacheong Poon  *
8721fffe3SKacheong Poon  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9721fffe3SKacheong Poon  * or http://www.opensolaris.org/os/licensing.
10721fffe3SKacheong Poon  * See the License for the specific language governing permissions
11721fffe3SKacheong Poon  * and limitations under the License.
12721fffe3SKacheong Poon  *
13721fffe3SKacheong Poon  * When distributing Covered Code, include this CDDL HEADER in each
14721fffe3SKacheong Poon  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15721fffe3SKacheong Poon  * If applicable, add the following below this CDDL HEADER, with the
16721fffe3SKacheong Poon  * fields enclosed by brackets "[]" replaced with your own identifying
17721fffe3SKacheong Poon  * information: Portions Copyright [yyyy] [name of copyright owner]
18721fffe3SKacheong Poon  *
19721fffe3SKacheong Poon  * CDDL HEADER END
20721fffe3SKacheong Poon  */
21721fffe3SKacheong Poon 
22721fffe3SKacheong Poon /*
239cd928feSAlan Maguire  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
247256a34eSDan McDonald  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
2548bbca81SDaniel Hoffman  * Copyright (c) 2016 by Delphix. All rights reserved.
26721fffe3SKacheong Poon  */
27721fffe3SKacheong Poon 
28721fffe3SKacheong Poon #include <sys/types.h>
29721fffe3SKacheong Poon #include <sys/stream.h>
30721fffe3SKacheong Poon #include <sys/strsun.h>
31721fffe3SKacheong Poon #include <sys/strsubr.h>
32721fffe3SKacheong Poon #include <sys/stropts.h>
33721fffe3SKacheong Poon #include <sys/strlog.h>
34721fffe3SKacheong Poon #define	_SUN_TPI_VERSION 2
35721fffe3SKacheong Poon #include <sys/tihdr.h>
36721fffe3SKacheong Poon #include <sys/suntpi.h>
37721fffe3SKacheong Poon #include <sys/xti_inet.h>
38721fffe3SKacheong Poon #include <sys/policy.h>
39721fffe3SKacheong Poon #include <sys/squeue_impl.h>
40721fffe3SKacheong Poon #include <sys/squeue.h>
41721fffe3SKacheong Poon #include <sys/tsol/tnet.h>
42721fffe3SKacheong Poon 
43721fffe3SKacheong Poon #include <rpc/pmap_prot.h>
44721fffe3SKacheong Poon 
45721fffe3SKacheong Poon #include <inet/common.h>
46721fffe3SKacheong Poon #include <inet/ip.h>
47721fffe3SKacheong Poon #include <inet/tcp.h>
48721fffe3SKacheong Poon #include <inet/tcp_impl.h>
49721fffe3SKacheong Poon #include <inet/proto_set.h>
50721fffe3SKacheong Poon #include <inet/ipsec_impl.h>
51721fffe3SKacheong Poon 
52721fffe3SKacheong Poon /* Setable in /etc/system */
53721fffe3SKacheong Poon /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
54721fffe3SKacheong Poon static uint32_t tcp_random_anon_port = 1;
55721fffe3SKacheong Poon 
56721fffe3SKacheong Poon static int	tcp_bind_select_lport(tcp_t *, in_port_t *, boolean_t,
57721fffe3SKacheong Poon 		    cred_t *cr);
58721fffe3SKacheong Poon static in_port_t	tcp_get_next_priv_port(const tcp_t *);
59721fffe3SKacheong Poon 
60721fffe3SKacheong Poon /*
61721fffe3SKacheong Poon  * Hash list insertion routine for tcp_t structures. Each hash bucket
62721fffe3SKacheong Poon  * contains a list of tcp_t entries, and each entry is bound to a unique
63721fffe3SKacheong Poon  * port. If there are multiple tcp_t's that are bound to the same port, then
64721fffe3SKacheong Poon  * one of them will be linked into the hash bucket list, and the rest will
65721fffe3SKacheong Poon  * hang off of that one entry. For each port, entries bound to a specific IP
66721fffe3SKacheong Poon  * address will be inserted before those those bound to INADDR_ANY.
67721fffe3SKacheong Poon  */
68721fffe3SKacheong Poon void
tcp_bind_hash_insert(tf_t * tbf,tcp_t * tcp,int caller_holds_lock)69721fffe3SKacheong Poon tcp_bind_hash_insert(tf_t *tbf, tcp_t *tcp, int caller_holds_lock)
70721fffe3SKacheong Poon {
71721fffe3SKacheong Poon 	tcp_t	**tcpp;
72721fffe3SKacheong Poon 	tcp_t	*tcpnext;
73721fffe3SKacheong Poon 	tcp_t	*tcphash;
74721fffe3SKacheong Poon 	conn_t	*connp = tcp->tcp_connp;
75721fffe3SKacheong Poon 	conn_t	*connext;
76721fffe3SKacheong Poon 
77721fffe3SKacheong Poon 	if (tcp->tcp_ptpbhn != NULL) {
78721fffe3SKacheong Poon 		ASSERT(!caller_holds_lock);
79721fffe3SKacheong Poon 		tcp_bind_hash_remove(tcp);
80721fffe3SKacheong Poon 	}
81721fffe3SKacheong Poon 	tcpp = &tbf->tf_tcp;
82721fffe3SKacheong Poon 	if (!caller_holds_lock) {
83721fffe3SKacheong Poon 		mutex_enter(&tbf->tf_lock);
84721fffe3SKacheong Poon 	} else {
85721fffe3SKacheong Poon 		ASSERT(MUTEX_HELD(&tbf->tf_lock));
86721fffe3SKacheong Poon 	}
87721fffe3SKacheong Poon 	tcphash = tcpp[0];
88721fffe3SKacheong Poon 	tcpnext = NULL;
89721fffe3SKacheong Poon 	if (tcphash != NULL) {
90721fffe3SKacheong Poon 		/* Look for an entry using the same port */
91721fffe3SKacheong Poon 		while ((tcphash = tcpp[0]) != NULL &&
92721fffe3SKacheong Poon 		    connp->conn_lport != tcphash->tcp_connp->conn_lport)
93721fffe3SKacheong Poon 			tcpp = &(tcphash->tcp_bind_hash);
94721fffe3SKacheong Poon 
95721fffe3SKacheong Poon 		/* The port was not found, just add to the end */
96721fffe3SKacheong Poon 		if (tcphash == NULL)
97721fffe3SKacheong Poon 			goto insert;
98721fffe3SKacheong Poon 
99721fffe3SKacheong Poon 		/*
100721fffe3SKacheong Poon 		 * OK, there already exists an entry bound to the
101721fffe3SKacheong Poon 		 * same port.
102721fffe3SKacheong Poon 		 *
103721fffe3SKacheong Poon 		 * If the new tcp bound to the INADDR_ANY address
104721fffe3SKacheong Poon 		 * and the first one in the list is not bound to
105721fffe3SKacheong Poon 		 * INADDR_ANY we skip all entries until we find the
106721fffe3SKacheong Poon 		 * first one bound to INADDR_ANY.
107721fffe3SKacheong Poon 		 * This makes sure that applications binding to a
108721fffe3SKacheong Poon 		 * specific address get preference over those binding to
109721fffe3SKacheong Poon 		 * INADDR_ANY.
110721fffe3SKacheong Poon 		 */
111721fffe3SKacheong Poon 		tcpnext = tcphash;
112721fffe3SKacheong Poon 		connext = tcpnext->tcp_connp;
113721fffe3SKacheong Poon 		tcphash = NULL;
114721fffe3SKacheong Poon 		if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
115721fffe3SKacheong Poon 		    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
116721fffe3SKacheong Poon 			while ((tcpnext = tcpp[0]) != NULL) {
117721fffe3SKacheong Poon 				connext = tcpnext->tcp_connp;
118721fffe3SKacheong Poon 				if (!V6_OR_V4_INADDR_ANY(
119721fffe3SKacheong Poon 				    connext->conn_bound_addr_v6))
120721fffe3SKacheong Poon 					tcpp = &(tcpnext->tcp_bind_hash_port);
121721fffe3SKacheong Poon 				else
122721fffe3SKacheong Poon 					break;
123721fffe3SKacheong Poon 			}
124721fffe3SKacheong Poon 			if (tcpnext != NULL) {
125721fffe3SKacheong Poon 				tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
126721fffe3SKacheong Poon 				tcphash = tcpnext->tcp_bind_hash;
127721fffe3SKacheong Poon 				if (tcphash != NULL) {
128721fffe3SKacheong Poon 					tcphash->tcp_ptpbhn =
129721fffe3SKacheong Poon 					    &(tcp->tcp_bind_hash);
130721fffe3SKacheong Poon 					tcpnext->tcp_bind_hash = NULL;
131721fffe3SKacheong Poon 				}
132721fffe3SKacheong Poon 			}
133721fffe3SKacheong Poon 		} else {
134721fffe3SKacheong Poon 			tcpnext->tcp_ptpbhn = &tcp->tcp_bind_hash_port;
135721fffe3SKacheong Poon 			tcphash = tcpnext->tcp_bind_hash;
136721fffe3SKacheong Poon 			if (tcphash != NULL) {
137721fffe3SKacheong Poon 				tcphash->tcp_ptpbhn =
138721fffe3SKacheong Poon 				    &(tcp->tcp_bind_hash);
139721fffe3SKacheong Poon 				tcpnext->tcp_bind_hash = NULL;
140721fffe3SKacheong Poon 			}
141721fffe3SKacheong Poon 		}
142721fffe3SKacheong Poon 	}
143721fffe3SKacheong Poon insert:
144721fffe3SKacheong Poon 	tcp->tcp_bind_hash_port = tcpnext;
145721fffe3SKacheong Poon 	tcp->tcp_bind_hash = tcphash;
146721fffe3SKacheong Poon 	tcp->tcp_ptpbhn = tcpp;
147721fffe3SKacheong Poon 	tcpp[0] = tcp;
148721fffe3SKacheong Poon 	if (!caller_holds_lock)
149721fffe3SKacheong Poon 		mutex_exit(&tbf->tf_lock);
150721fffe3SKacheong Poon }
151721fffe3SKacheong Poon 
152721fffe3SKacheong Poon /*
153721fffe3SKacheong Poon  * Hash list removal routine for tcp_t structures.
154721fffe3SKacheong Poon  */
155721fffe3SKacheong Poon void
tcp_bind_hash_remove(tcp_t * tcp)156721fffe3SKacheong Poon tcp_bind_hash_remove(tcp_t *tcp)
157721fffe3SKacheong Poon {
158721fffe3SKacheong Poon 	tcp_t	*tcpnext;
159721fffe3SKacheong Poon 	kmutex_t *lockp;
160721fffe3SKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
161721fffe3SKacheong Poon 	conn_t		*connp = tcp->tcp_connp;
162721fffe3SKacheong Poon 
163721fffe3SKacheong Poon 	if (tcp->tcp_ptpbhn == NULL)
164721fffe3SKacheong Poon 		return;
165721fffe3SKacheong Poon 
166721fffe3SKacheong Poon 	/*
167721fffe3SKacheong Poon 	 * Extract the lock pointer in case there are concurrent
168721fffe3SKacheong Poon 	 * hash_remove's for this instance.
169721fffe3SKacheong Poon 	 */
170721fffe3SKacheong Poon 	ASSERT(connp->conn_lport != 0);
171721fffe3SKacheong Poon 	lockp = &tcps->tcps_bind_fanout[TCP_BIND_HASH(
172721fffe3SKacheong Poon 	    connp->conn_lport)].tf_lock;
173721fffe3SKacheong Poon 
174721fffe3SKacheong Poon 	ASSERT(lockp != NULL);
175721fffe3SKacheong Poon 	mutex_enter(lockp);
176721fffe3SKacheong Poon 	if (tcp->tcp_ptpbhn) {
177721fffe3SKacheong Poon 		tcpnext = tcp->tcp_bind_hash_port;
178721fffe3SKacheong Poon 		if (tcpnext != NULL) {
179721fffe3SKacheong Poon 			tcp->tcp_bind_hash_port = NULL;
180721fffe3SKacheong Poon 			tcpnext->tcp_ptpbhn = tcp->tcp_ptpbhn;
181721fffe3SKacheong Poon 			tcpnext->tcp_bind_hash = tcp->tcp_bind_hash;
182721fffe3SKacheong Poon 			if (tcpnext->tcp_bind_hash != NULL) {
183721fffe3SKacheong Poon 				tcpnext->tcp_bind_hash->tcp_ptpbhn =
184721fffe3SKacheong Poon 				    &(tcpnext->tcp_bind_hash);
185721fffe3SKacheong Poon 				tcp->tcp_bind_hash = NULL;
186721fffe3SKacheong Poon 			}
187721fffe3SKacheong Poon 		} else if ((tcpnext = tcp->tcp_bind_hash) != NULL) {
188721fffe3SKacheong Poon 			tcpnext->tcp_ptpbhn = tcp->tcp_ptpbhn;
189721fffe3SKacheong Poon 			tcp->tcp_bind_hash = NULL;
190721fffe3SKacheong Poon 		}
191721fffe3SKacheong Poon 		*tcp->tcp_ptpbhn = tcpnext;
192721fffe3SKacheong Poon 		tcp->tcp_ptpbhn = NULL;
193721fffe3SKacheong Poon 	}
194721fffe3SKacheong Poon 	mutex_exit(lockp);
195721fffe3SKacheong Poon }
196721fffe3SKacheong Poon 
197721fffe3SKacheong Poon /*
198721fffe3SKacheong Poon  * Don't let port fall into the privileged range.
199721fffe3SKacheong Poon  * Since the extra privileged ports can be arbitrary we also
200721fffe3SKacheong Poon  * ensure that we exclude those from consideration.
201721fffe3SKacheong Poon  * tcp_g_epriv_ports is not sorted thus we loop over it until
202721fffe3SKacheong Poon  * there are no changes.
203721fffe3SKacheong Poon  *
204721fffe3SKacheong Poon  * Note: No locks are held when inspecting tcp_g_*epriv_ports
205721fffe3SKacheong Poon  * but instead the code relies on:
206721fffe3SKacheong Poon  * - the fact that the address of the array and its size never changes
207721fffe3SKacheong Poon  * - the atomic assignment of the elements of the array
208721fffe3SKacheong Poon  *
209721fffe3SKacheong Poon  * Returns 0 if there are no more ports available.
210721fffe3SKacheong Poon  *
211721fffe3SKacheong Poon  * TS note: skip multilevel ports.
212721fffe3SKacheong Poon  */
213721fffe3SKacheong Poon in_port_t
tcp_update_next_port(in_port_t port,const tcp_t * tcp,boolean_t random)214721fffe3SKacheong Poon tcp_update_next_port(in_port_t port, const tcp_t *tcp, boolean_t random)
215721fffe3SKacheong Poon {
2167256a34eSDan McDonald 	int i, bump;
217721fffe3SKacheong Poon 	boolean_t restart = B_FALSE;
218721fffe3SKacheong Poon 	tcp_stack_t *tcps = tcp->tcp_tcps;
219721fffe3SKacheong Poon 
220721fffe3SKacheong Poon 	if (random && tcp_random_anon_port != 0) {
221721fffe3SKacheong Poon 		(void) random_get_pseudo_bytes((uint8_t *)&port,
222721fffe3SKacheong Poon 		    sizeof (in_port_t));
223721fffe3SKacheong Poon 		/*
224721fffe3SKacheong Poon 		 * Unless changed by a sys admin, the smallest anon port
225721fffe3SKacheong Poon 		 * is 32768 and the largest anon port is 65535.  It is
226721fffe3SKacheong Poon 		 * very likely (50%) for the random port to be smaller
227721fffe3SKacheong Poon 		 * than the smallest anon port.  When that happens,
228721fffe3SKacheong Poon 		 * add port % (anon port range) to the smallest anon
229721fffe3SKacheong Poon 		 * port to get the random port.  It should fall into the
230721fffe3SKacheong Poon 		 * valid anon port range.
231721fffe3SKacheong Poon 		 */
232452bd827SMatt Barden 		if ((port < tcps->tcps_smallest_anon_port) ||
233452bd827SMatt Barden 		    (port > tcps->tcps_largest_anon_port)) {
2347256a34eSDan McDonald 			if (tcps->tcps_smallest_anon_port ==
2357256a34eSDan McDonald 			    tcps->tcps_largest_anon_port) {
2367256a34eSDan McDonald 				bump = 0;
2377256a34eSDan McDonald 			} else {
2387256a34eSDan McDonald 				bump = port % (tcps->tcps_largest_anon_port -
2397256a34eSDan McDonald 				    tcps->tcps_smallest_anon_port);
2407256a34eSDan McDonald 			}
2417256a34eSDan McDonald 			port = tcps->tcps_smallest_anon_port + bump;
242721fffe3SKacheong Poon 		}
243721fffe3SKacheong Poon 	}
244721fffe3SKacheong Poon 
245721fffe3SKacheong Poon retry:
246721fffe3SKacheong Poon 	if (port < tcps->tcps_smallest_anon_port)
247721fffe3SKacheong Poon 		port = (in_port_t)tcps->tcps_smallest_anon_port;
248721fffe3SKacheong Poon 
249721fffe3SKacheong Poon 	if (port > tcps->tcps_largest_anon_port) {
250721fffe3SKacheong Poon 		if (restart)
251721fffe3SKacheong Poon 			return (0);
252721fffe3SKacheong Poon 		restart = B_TRUE;
253721fffe3SKacheong Poon 		port = (in_port_t)tcps->tcps_smallest_anon_port;
254721fffe3SKacheong Poon 	}
255721fffe3SKacheong Poon 
256721fffe3SKacheong Poon 	if (port < tcps->tcps_smallest_nonpriv_port)
257721fffe3SKacheong Poon 		port = (in_port_t)tcps->tcps_smallest_nonpriv_port;
258721fffe3SKacheong Poon 
259721fffe3SKacheong Poon 	for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
260721fffe3SKacheong Poon 		if (port == tcps->tcps_g_epriv_ports[i]) {
261721fffe3SKacheong Poon 			port++;
262721fffe3SKacheong Poon 			/*
263721fffe3SKacheong Poon 			 * Make sure whether the port is in the
264721fffe3SKacheong Poon 			 * valid range.
265721fffe3SKacheong Poon 			 */
266721fffe3SKacheong Poon 			goto retry;
267721fffe3SKacheong Poon 		}
268721fffe3SKacheong Poon 	}
269721fffe3SKacheong Poon 	if (is_system_labeled() &&
270721fffe3SKacheong Poon 	    (i = tsol_next_port(crgetzone(tcp->tcp_connp->conn_cred), port,
271721fffe3SKacheong Poon 	    IPPROTO_TCP, B_TRUE)) != 0) {
272721fffe3SKacheong Poon 		port = i;
273721fffe3SKacheong Poon 		goto retry;
274721fffe3SKacheong Poon 	}
275721fffe3SKacheong Poon 	return (port);
276721fffe3SKacheong Poon }
277721fffe3SKacheong Poon 
278721fffe3SKacheong Poon /*
279721fffe3SKacheong Poon  * Return the next anonymous port in the privileged port range for
280721fffe3SKacheong Poon  * bind checking.  It starts at IPPORT_RESERVED - 1 and goes
281721fffe3SKacheong Poon  * downwards.  This is the same behavior as documented in the userland
282*bbf21555SRichard Lowe  * library call rresvport(3SOCKET).
283721fffe3SKacheong Poon  *
284721fffe3SKacheong Poon  * TS note: skip multilevel ports.
285721fffe3SKacheong Poon  */
286721fffe3SKacheong Poon static in_port_t
tcp_get_next_priv_port(const tcp_t * tcp)287721fffe3SKacheong Poon tcp_get_next_priv_port(const tcp_t *tcp)
288721fffe3SKacheong Poon {
289721fffe3SKacheong Poon 	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
290721fffe3SKacheong Poon 	in_port_t nextport;
291721fffe3SKacheong Poon 	boolean_t restart = B_FALSE;
292721fffe3SKacheong Poon 	tcp_stack_t *tcps = tcp->tcp_tcps;
293721fffe3SKacheong Poon retry:
294721fffe3SKacheong Poon 	if (next_priv_port < tcps->tcps_min_anonpriv_port ||
295721fffe3SKacheong Poon 	    next_priv_port >= IPPORT_RESERVED) {
296721fffe3SKacheong Poon 		next_priv_port = IPPORT_RESERVED - 1;
297721fffe3SKacheong Poon 		if (restart)
298721fffe3SKacheong Poon 			return (0);
299721fffe3SKacheong Poon 		restart = B_TRUE;
300721fffe3SKacheong Poon 	}
301721fffe3SKacheong Poon 	if (is_system_labeled() &&
302721fffe3SKacheong Poon 	    (nextport = tsol_next_port(crgetzone(tcp->tcp_connp->conn_cred),
303721fffe3SKacheong Poon 	    next_priv_port, IPPROTO_TCP, B_FALSE)) != 0) {
304721fffe3SKacheong Poon 		next_priv_port = nextport;
305721fffe3SKacheong Poon 		goto retry;
306721fffe3SKacheong Poon 	}
307721fffe3SKacheong Poon 	return (next_priv_port--);
308721fffe3SKacheong Poon }
309721fffe3SKacheong Poon 
310721fffe3SKacheong Poon static int
tcp_bind_select_lport(tcp_t * tcp,in_port_t * requested_port_ptr,boolean_t bind_to_req_port_only,cred_t * cr)311721fffe3SKacheong Poon tcp_bind_select_lport(tcp_t *tcp, in_port_t *requested_port_ptr,
312721fffe3SKacheong Poon     boolean_t bind_to_req_port_only, cred_t *cr)
313721fffe3SKacheong Poon {
314721fffe3SKacheong Poon 	in_port_t	mlp_port;
315ab82c29bSToomas Soome 	mlp_type_t	addrtype, mlptype;
316721fffe3SKacheong Poon 	boolean_t	user_specified;
317721fffe3SKacheong Poon 	in_port_t	allocated_port;
318721fffe3SKacheong Poon 	in_port_t	requested_port = *requested_port_ptr;
319721fffe3SKacheong Poon 	conn_t		*connp = tcp->tcp_connp;
320721fffe3SKacheong Poon 	zone_t		*zone;
321721fffe3SKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
322721fffe3SKacheong Poon 	in6_addr_t	v6addr = connp->conn_laddr_v6;
323721fffe3SKacheong Poon 
324ab82c29bSToomas Soome 	zone = NULL;
325721fffe3SKacheong Poon 	/*
326721fffe3SKacheong Poon 	 * XXX It's up to the caller to specify bind_to_req_port_only or not.
327721fffe3SKacheong Poon 	 */
328721fffe3SKacheong Poon 	ASSERT(cr != NULL);
329721fffe3SKacheong Poon 
330721fffe3SKacheong Poon 	/*
331721fffe3SKacheong Poon 	 * Get a valid port (within the anonymous range and should not
332721fffe3SKacheong Poon 	 * be a privileged one) to use if the user has not given a port.
333721fffe3SKacheong Poon 	 * If multiple threads are here, they may all start with
334721fffe3SKacheong Poon 	 * with the same initial port. But, it should be fine as long as
335721fffe3SKacheong Poon 	 * tcp_bindi will ensure that no two threads will be assigned
336721fffe3SKacheong Poon 	 * the same port.
337721fffe3SKacheong Poon 	 *
338721fffe3SKacheong Poon 	 * NOTE: XXX If a privileged process asks for an anonymous port, we
339721fffe3SKacheong Poon 	 * still check for ports only in the range > tcp_smallest_non_priv_port,
340721fffe3SKacheong Poon 	 * unless TCP_ANONPRIVBIND option is set.
341721fffe3SKacheong Poon 	 */
342721fffe3SKacheong Poon 	mlptype = mlptSingle;
343721fffe3SKacheong Poon 	mlp_port = requested_port;
344721fffe3SKacheong Poon 	if (requested_port == 0) {
345721fffe3SKacheong Poon 		requested_port = connp->conn_anon_priv_bind ?
346721fffe3SKacheong Poon 		    tcp_get_next_priv_port(tcp) :
347721fffe3SKacheong Poon 		    tcp_update_next_port(tcps->tcps_next_port_to_try,
348721fffe3SKacheong Poon 		    tcp, B_TRUE);
349721fffe3SKacheong Poon 		if (requested_port == 0) {
350721fffe3SKacheong Poon 			return (-TNOADDR);
351721fffe3SKacheong Poon 		}
352721fffe3SKacheong Poon 		user_specified = B_FALSE;
353721fffe3SKacheong Poon 
354721fffe3SKacheong Poon 		/*
355721fffe3SKacheong Poon 		 * If the user went through one of the RPC interfaces to create
35648bbca81SDaniel Hoffman 		 * this socket and RPC is MLP in this zone, then give them an
357721fffe3SKacheong Poon 		 * anonymous MLP.
358721fffe3SKacheong Poon 		 */
359721fffe3SKacheong Poon 		if (connp->conn_anon_mlp && is_system_labeled()) {
360721fffe3SKacheong Poon 			zone = crgetzone(cr);
361721fffe3SKacheong Poon 			addrtype = tsol_mlp_addr_type(
362721fffe3SKacheong Poon 			    connp->conn_allzones ? ALL_ZONES : zone->zone_id,
363721fffe3SKacheong Poon 			    IPV6_VERSION, &v6addr,
364721fffe3SKacheong Poon 			    tcps->tcps_netstack->netstack_ip);
365721fffe3SKacheong Poon 			if (addrtype == mlptSingle) {
366721fffe3SKacheong Poon 				return (-TNOADDR);
367721fffe3SKacheong Poon 			}
368721fffe3SKacheong Poon 			mlptype = tsol_mlp_port_type(zone, IPPROTO_TCP,
369721fffe3SKacheong Poon 			    PMAPPORT, addrtype);
370721fffe3SKacheong Poon 			mlp_port = PMAPPORT;
371721fffe3SKacheong Poon 		}
372721fffe3SKacheong Poon 	} else {
373721fffe3SKacheong Poon 		int i;
374721fffe3SKacheong Poon 		boolean_t priv = B_FALSE;
375721fffe3SKacheong Poon 
376721fffe3SKacheong Poon 		/*
377721fffe3SKacheong Poon 		 * If the requested_port is in the well-known privileged range,
378721fffe3SKacheong Poon 		 * verify that the stream was opened by a privileged user.
379721fffe3SKacheong Poon 		 * Note: No locks are held when inspecting tcp_g_*epriv_ports
380721fffe3SKacheong Poon 		 * but instead the code relies on:
381721fffe3SKacheong Poon 		 * - the fact that the address of the array and its size never
382721fffe3SKacheong Poon 		 *   changes
383721fffe3SKacheong Poon 		 * - the atomic assignment of the elements of the array
384721fffe3SKacheong Poon 		 */
385721fffe3SKacheong Poon 		if (requested_port < tcps->tcps_smallest_nonpriv_port) {
386721fffe3SKacheong Poon 			priv = B_TRUE;
387721fffe3SKacheong Poon 		} else {
388721fffe3SKacheong Poon 			for (i = 0; i < tcps->tcps_g_num_epriv_ports; i++) {
389721fffe3SKacheong Poon 				if (requested_port ==
390721fffe3SKacheong Poon 				    tcps->tcps_g_epriv_ports[i]) {
391721fffe3SKacheong Poon 					priv = B_TRUE;
392721fffe3SKacheong Poon 					break;
393721fffe3SKacheong Poon 				}
394721fffe3SKacheong Poon 			}
395721fffe3SKacheong Poon 		}
396721fffe3SKacheong Poon 		if (priv) {
397721fffe3SKacheong Poon 			if (secpolicy_net_privaddr(cr, requested_port,
398721fffe3SKacheong Poon 			    IPPROTO_TCP) != 0) {
399721fffe3SKacheong Poon 				if (connp->conn_debug) {
400721fffe3SKacheong Poon 					(void) strlog(TCP_MOD_ID, 0, 1,
401721fffe3SKacheong Poon 					    SL_ERROR|SL_TRACE,
402721fffe3SKacheong Poon 					    "tcp_bind: no priv for port %d",
403721fffe3SKacheong Poon 					    requested_port);
404721fffe3SKacheong Poon 				}
405721fffe3SKacheong Poon 				return (-TACCES);
406721fffe3SKacheong Poon 			}
407721fffe3SKacheong Poon 		}
408721fffe3SKacheong Poon 		user_specified = B_TRUE;
409721fffe3SKacheong Poon 
410721fffe3SKacheong Poon 		connp = tcp->tcp_connp;
411721fffe3SKacheong Poon 		if (is_system_labeled()) {
412721fffe3SKacheong Poon 			zone = crgetzone(cr);
413721fffe3SKacheong Poon 			addrtype = tsol_mlp_addr_type(
414721fffe3SKacheong Poon 			    connp->conn_allzones ? ALL_ZONES : zone->zone_id,
415721fffe3SKacheong Poon 			    IPV6_VERSION, &v6addr,
416721fffe3SKacheong Poon 			    tcps->tcps_netstack->netstack_ip);
417721fffe3SKacheong Poon 			if (addrtype == mlptSingle) {
418721fffe3SKacheong Poon 				return (-TNOADDR);
419721fffe3SKacheong Poon 			}
420721fffe3SKacheong Poon 			mlptype = tsol_mlp_port_type(zone, IPPROTO_TCP,
421721fffe3SKacheong Poon 			    requested_port, addrtype);
422721fffe3SKacheong Poon 		}
423721fffe3SKacheong Poon 	}
424721fffe3SKacheong Poon 
425721fffe3SKacheong Poon 	if (mlptype != mlptSingle) {
426721fffe3SKacheong Poon 		if (secpolicy_net_bindmlp(cr) != 0) {
427721fffe3SKacheong Poon 			if (connp->conn_debug) {
428721fffe3SKacheong Poon 				(void) strlog(TCP_MOD_ID, 0, 1,
429721fffe3SKacheong Poon 				    SL_ERROR|SL_TRACE,
430721fffe3SKacheong Poon 				    "tcp_bind: no priv for multilevel port %d",
431721fffe3SKacheong Poon 				    requested_port);
432721fffe3SKacheong Poon 			}
433721fffe3SKacheong Poon 			return (-TACCES);
434721fffe3SKacheong Poon 		}
435721fffe3SKacheong Poon 
436721fffe3SKacheong Poon 		/*
437721fffe3SKacheong Poon 		 * If we're specifically binding a shared IP address and the
438721fffe3SKacheong Poon 		 * port is MLP on shared addresses, then check to see if this
439721fffe3SKacheong Poon 		 * zone actually owns the MLP.  Reject if not.
440721fffe3SKacheong Poon 		 */
441721fffe3SKacheong Poon 		if (mlptype == mlptShared && addrtype == mlptShared) {
442721fffe3SKacheong Poon 			/*
443721fffe3SKacheong Poon 			 * No need to handle exclusive-stack zones since
444721fffe3SKacheong Poon 			 * ALL_ZONES only applies to the shared stack.
445721fffe3SKacheong Poon 			 */
446721fffe3SKacheong Poon 			zoneid_t mlpzone;
447721fffe3SKacheong Poon 
448721fffe3SKacheong Poon 			mlpzone = tsol_mlp_findzone(IPPROTO_TCP,
449721fffe3SKacheong Poon 			    htons(mlp_port));
450721fffe3SKacheong Poon 			if (connp->conn_zoneid != mlpzone) {
451721fffe3SKacheong Poon 				if (connp->conn_debug) {
452721fffe3SKacheong Poon 					(void) strlog(TCP_MOD_ID, 0, 1,
453721fffe3SKacheong Poon 					    SL_ERROR|SL_TRACE,
454721fffe3SKacheong Poon 					    "tcp_bind: attempt to bind port "
455721fffe3SKacheong Poon 					    "%d on shared addr in zone %d "
456721fffe3SKacheong Poon 					    "(should be %d)",
457721fffe3SKacheong Poon 					    mlp_port, connp->conn_zoneid,
458721fffe3SKacheong Poon 					    mlpzone);
459721fffe3SKacheong Poon 				}
460721fffe3SKacheong Poon 				return (-TACCES);
461721fffe3SKacheong Poon 			}
462721fffe3SKacheong Poon 		}
463721fffe3SKacheong Poon 
464721fffe3SKacheong Poon 		if (!user_specified) {
465721fffe3SKacheong Poon 			int err;
466721fffe3SKacheong Poon 			err = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
467721fffe3SKacheong Poon 			    requested_port, B_TRUE);
468721fffe3SKacheong Poon 			if (err != 0) {
469721fffe3SKacheong Poon 				if (connp->conn_debug) {
470721fffe3SKacheong Poon 					(void) strlog(TCP_MOD_ID, 0, 1,
471721fffe3SKacheong Poon 					    SL_ERROR|SL_TRACE,
472721fffe3SKacheong Poon 					    "tcp_bind: cannot establish anon "
473721fffe3SKacheong Poon 					    "MLP for port %d",
474721fffe3SKacheong Poon 					    requested_port);
475721fffe3SKacheong Poon 				}
476721fffe3SKacheong Poon 				return (err);
477721fffe3SKacheong Poon 			}
478721fffe3SKacheong Poon 			connp->conn_anon_port = B_TRUE;
479721fffe3SKacheong Poon 		}
480721fffe3SKacheong Poon 		connp->conn_mlp_type = mlptype;
481721fffe3SKacheong Poon 	}
482721fffe3SKacheong Poon 
483721fffe3SKacheong Poon 	allocated_port = tcp_bindi(tcp, requested_port, &v6addr,
484721fffe3SKacheong Poon 	    connp->conn_reuseaddr, B_FALSE, bind_to_req_port_only,
485721fffe3SKacheong Poon 	    user_specified);
486721fffe3SKacheong Poon 
487721fffe3SKacheong Poon 	if (allocated_port == 0) {
488721fffe3SKacheong Poon 		connp->conn_mlp_type = mlptSingle;
489721fffe3SKacheong Poon 		if (connp->conn_anon_port) {
490721fffe3SKacheong Poon 			connp->conn_anon_port = B_FALSE;
491721fffe3SKacheong Poon 			(void) tsol_mlp_anon(zone, mlptype, connp->conn_proto,
492721fffe3SKacheong Poon 			    requested_port, B_FALSE);
493721fffe3SKacheong Poon 		}
494721fffe3SKacheong Poon 		if (bind_to_req_port_only) {
495721fffe3SKacheong Poon 			if (connp->conn_debug) {
496721fffe3SKacheong Poon 				(void) strlog(TCP_MOD_ID, 0, 1,
497721fffe3SKacheong Poon 				    SL_ERROR|SL_TRACE,
498721fffe3SKacheong Poon 				    "tcp_bind: requested addr busy");
499721fffe3SKacheong Poon 			}
500721fffe3SKacheong Poon 			return (-TADDRBUSY);
501721fffe3SKacheong Poon 		} else {
502721fffe3SKacheong Poon 			/* If we are out of ports, fail the bind. */
503721fffe3SKacheong Poon 			if (connp->conn_debug) {
504721fffe3SKacheong Poon 				(void) strlog(TCP_MOD_ID, 0, 1,
505721fffe3SKacheong Poon 				    SL_ERROR|SL_TRACE,
506721fffe3SKacheong Poon 				    "tcp_bind: out of ports?");
507721fffe3SKacheong Poon 			}
508721fffe3SKacheong Poon 			return (-TNOADDR);
509721fffe3SKacheong Poon 		}
510721fffe3SKacheong Poon 	}
511721fffe3SKacheong Poon 
512721fffe3SKacheong Poon 	/* Pass the allocated port back */
513721fffe3SKacheong Poon 	*requested_port_ptr = allocated_port;
514721fffe3SKacheong Poon 	return (0);
515721fffe3SKacheong Poon }
516721fffe3SKacheong Poon 
517721fffe3SKacheong Poon /*
518721fffe3SKacheong Poon  * Check the address and check/pick a local port number.
519721fffe3SKacheong Poon  */
520721fffe3SKacheong Poon int
tcp_bind_check(conn_t * connp,struct sockaddr * sa,socklen_t len,cred_t * cr,boolean_t bind_to_req_port_only)521721fffe3SKacheong Poon tcp_bind_check(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
522721fffe3SKacheong Poon     boolean_t bind_to_req_port_only)
523721fffe3SKacheong Poon {
524721fffe3SKacheong Poon 	tcp_t	*tcp = connp->conn_tcp;
525721fffe3SKacheong Poon 	sin_t	*sin;
526721fffe3SKacheong Poon 	sin6_t  *sin6;
527721fffe3SKacheong Poon 	in_port_t	requested_port;
528721fffe3SKacheong Poon 	ipaddr_t	v4addr;
529721fffe3SKacheong Poon 	in6_addr_t	v6addr;
530721fffe3SKacheong Poon 	ip_laddr_t	laddr_type = IPVL_UNICAST_UP;	/* INADDR_ANY */
531721fffe3SKacheong Poon 	zoneid_t	zoneid = IPCL_ZONEID(connp);
532721fffe3SKacheong Poon 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
533721fffe3SKacheong Poon 	uint_t		scopeid = 0;
534721fffe3SKacheong Poon 	int		error = 0;
535721fffe3SKacheong Poon 	ip_xmit_attr_t	*ixa = connp->conn_ixa;
536721fffe3SKacheong Poon 
537721fffe3SKacheong Poon 	ASSERT((uintptr_t)len <= (uintptr_t)INT_MAX);
538721fffe3SKacheong Poon 
539721fffe3SKacheong Poon 	if (tcp->tcp_state == TCPS_BOUND) {
540721fffe3SKacheong Poon 		return (0);
541721fffe3SKacheong Poon 	} else if (tcp->tcp_state > TCPS_BOUND) {
542721fffe3SKacheong Poon 		if (connp->conn_debug) {
543721fffe3SKacheong Poon 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
544721fffe3SKacheong Poon 			    "tcp_bind: bad state, %d", tcp->tcp_state);
545721fffe3SKacheong Poon 		}
546721fffe3SKacheong Poon 		return (-TOUTSTATE);
547721fffe3SKacheong Poon 	}
548721fffe3SKacheong Poon 
549721fffe3SKacheong Poon 	ASSERT(sa != NULL && len != 0);
550721fffe3SKacheong Poon 
551721fffe3SKacheong Poon 	if (!OK_32PTR((char *)sa)) {
552721fffe3SKacheong Poon 		if (connp->conn_debug) {
553721fffe3SKacheong Poon 			(void) strlog(TCP_MOD_ID, 0, 1,
554721fffe3SKacheong Poon 			    SL_ERROR|SL_TRACE,
555721fffe3SKacheong Poon 			    "tcp_bind: bad address parameter, "
556721fffe3SKacheong Poon 			    "address %p, len %d",
557721fffe3SKacheong Poon 			    (void *)sa, len);
558721fffe3SKacheong Poon 		}
559721fffe3SKacheong Poon 		return (-TPROTO);
560721fffe3SKacheong Poon 	}
561721fffe3SKacheong Poon 
562721fffe3SKacheong Poon 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
563721fffe3SKacheong Poon 	if (error != 0) {
564721fffe3SKacheong Poon 		return (error);
565721fffe3SKacheong Poon 	}
566721fffe3SKacheong Poon 
567721fffe3SKacheong Poon 	switch (len) {
568721fffe3SKacheong Poon 	case sizeof (sin_t):	/* Complete IPv4 address */
569721fffe3SKacheong Poon 		sin = (sin_t *)sa;
570721fffe3SKacheong Poon 		requested_port = ntohs(sin->sin_port);
571721fffe3SKacheong Poon 		v4addr = sin->sin_addr.s_addr;
572721fffe3SKacheong Poon 		IN6_IPADDR_TO_V4MAPPED(v4addr, &v6addr);
573721fffe3SKacheong Poon 		if (v4addr != INADDR_ANY) {
574721fffe3SKacheong Poon 			laddr_type = ip_laddr_verify_v4(v4addr, zoneid, ipst,
575721fffe3SKacheong Poon 			    B_FALSE);
576721fffe3SKacheong Poon 		}
577721fffe3SKacheong Poon 		break;
578721fffe3SKacheong Poon 
579721fffe3SKacheong Poon 	case sizeof (sin6_t): /* Complete IPv6 address */
580721fffe3SKacheong Poon 		sin6 = (sin6_t *)sa;
581721fffe3SKacheong Poon 		v6addr = sin6->sin6_addr;
582721fffe3SKacheong Poon 		requested_port = ntohs(sin6->sin6_port);
583721fffe3SKacheong Poon 		if (IN6_IS_ADDR_V4MAPPED(&v6addr)) {
584721fffe3SKacheong Poon 			if (connp->conn_ipv6_v6only)
585721fffe3SKacheong Poon 				return (EADDRNOTAVAIL);
586721fffe3SKacheong Poon 
587721fffe3SKacheong Poon 			IN6_V4MAPPED_TO_IPADDR(&v6addr, v4addr);
588721fffe3SKacheong Poon 			if (v4addr != INADDR_ANY) {
589721fffe3SKacheong Poon 				laddr_type = ip_laddr_verify_v4(v4addr,
590721fffe3SKacheong Poon 				    zoneid, ipst, B_FALSE);
591721fffe3SKacheong Poon 			}
592721fffe3SKacheong Poon 		} else {
593721fffe3SKacheong Poon 			if (!IN6_IS_ADDR_UNSPECIFIED(&v6addr)) {
594721fffe3SKacheong Poon 				if (IN6_IS_ADDR_LINKSCOPE(&v6addr))
595721fffe3SKacheong Poon 					scopeid = sin6->sin6_scope_id;
596721fffe3SKacheong Poon 				laddr_type = ip_laddr_verify_v6(&v6addr,
597721fffe3SKacheong Poon 				    zoneid, ipst, B_FALSE, scopeid);
598721fffe3SKacheong Poon 			}
599721fffe3SKacheong Poon 		}
600721fffe3SKacheong Poon 		break;
601721fffe3SKacheong Poon 
602721fffe3SKacheong Poon 	default:
603721fffe3SKacheong Poon 		if (connp->conn_debug) {
604721fffe3SKacheong Poon 			(void) strlog(TCP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
605721fffe3SKacheong Poon 			    "tcp_bind: bad address length, %d", len);
606721fffe3SKacheong Poon 		}
607721fffe3SKacheong Poon 		return (EAFNOSUPPORT);
608721fffe3SKacheong Poon 		/* return (-TBADADDR); */
609721fffe3SKacheong Poon 	}
610721fffe3SKacheong Poon 
611721fffe3SKacheong Poon 	/* Is the local address a valid unicast address? */
612721fffe3SKacheong Poon 	if (laddr_type == IPVL_BAD)
613721fffe3SKacheong Poon 		return (EADDRNOTAVAIL);
614721fffe3SKacheong Poon 
615721fffe3SKacheong Poon 	connp->conn_bound_addr_v6 = v6addr;
616721fffe3SKacheong Poon 	if (scopeid != 0) {
617721fffe3SKacheong Poon 		ixa->ixa_flags |= IXAF_SCOPEID_SET;
618721fffe3SKacheong Poon 		ixa->ixa_scopeid = scopeid;
619721fffe3SKacheong Poon 		connp->conn_incoming_ifindex = scopeid;
620721fffe3SKacheong Poon 	} else {
621721fffe3SKacheong Poon 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
622721fffe3SKacheong Poon 		connp->conn_incoming_ifindex = connp->conn_bound_if;
623721fffe3SKacheong Poon 	}
624721fffe3SKacheong Poon 
625721fffe3SKacheong Poon 	connp->conn_laddr_v6 = v6addr;
626721fffe3SKacheong Poon 	connp->conn_saddr_v6 = v6addr;
627721fffe3SKacheong Poon 
628721fffe3SKacheong Poon 	bind_to_req_port_only = requested_port != 0 && bind_to_req_port_only;
629721fffe3SKacheong Poon 
630721fffe3SKacheong Poon 	error = tcp_bind_select_lport(tcp, &requested_port,
631721fffe3SKacheong Poon 	    bind_to_req_port_only, cr);
632721fffe3SKacheong Poon 	if (error != 0) {
633721fffe3SKacheong Poon 		connp->conn_laddr_v6 = ipv6_all_zeros;
634721fffe3SKacheong Poon 		connp->conn_saddr_v6 = ipv6_all_zeros;
635721fffe3SKacheong Poon 		connp->conn_bound_addr_v6 = ipv6_all_zeros;
636721fffe3SKacheong Poon 	}
637721fffe3SKacheong Poon 	return (error);
638721fffe3SKacheong Poon }
639721fffe3SKacheong Poon 
640721fffe3SKacheong Poon /*
641721fffe3SKacheong Poon  * If the "bind_to_req_port_only" parameter is set, if the requested port
642721fffe3SKacheong Poon  * number is available, return it, If not return 0
643721fffe3SKacheong Poon  *
644721fffe3SKacheong Poon  * If "bind_to_req_port_only" parameter is not set and
645721fffe3SKacheong Poon  * If the requested port number is available, return it.  If not, return
646721fffe3SKacheong Poon  * the first anonymous port we happen across.  If no anonymous ports are
647721fffe3SKacheong Poon  * available, return 0. addr is the requested local address, if any.
648721fffe3SKacheong Poon  *
649721fffe3SKacheong Poon  * In either case, when succeeding update the tcp_t to record the port number
650721fffe3SKacheong Poon  * and insert it in the bind hash table.
651721fffe3SKacheong Poon  *
652721fffe3SKacheong Poon  * Note that TCP over IPv4 and IPv6 sockets can use the same port number
653721fffe3SKacheong Poon  * without setting SO_REUSEADDR. This is needed so that they
654721fffe3SKacheong Poon  * can be viewed as two independent transport protocols.
655721fffe3SKacheong Poon  */
656721fffe3SKacheong Poon in_port_t
tcp_bindi(tcp_t * tcp,in_port_t port,const in6_addr_t * laddr,int reuseaddr,boolean_t quick_connect,boolean_t bind_to_req_port_only,boolean_t user_specified)657721fffe3SKacheong Poon tcp_bindi(tcp_t *tcp, in_port_t port, const in6_addr_t *laddr,
658721fffe3SKacheong Poon     int reuseaddr, boolean_t quick_connect,
659721fffe3SKacheong Poon     boolean_t bind_to_req_port_only, boolean_t user_specified)
660721fffe3SKacheong Poon {
661721fffe3SKacheong Poon 	/* number of times we have run around the loop */
662721fffe3SKacheong Poon 	int count = 0;
663721fffe3SKacheong Poon 	/* maximum number of times to run around the loop */
664721fffe3SKacheong Poon 	int loopmax;
665721fffe3SKacheong Poon 	conn_t *connp = tcp->tcp_connp;
666721fffe3SKacheong Poon 	tcp_stack_t	*tcps = tcp->tcp_tcps;
667721fffe3SKacheong Poon 
668721fffe3SKacheong Poon 	/*
669721fffe3SKacheong Poon 	 * Lookup for free addresses is done in a loop and "loopmax"
670721fffe3SKacheong Poon 	 * influences how long we spin in the loop
671721fffe3SKacheong Poon 	 */
672721fffe3SKacheong Poon 	if (bind_to_req_port_only) {
673721fffe3SKacheong Poon 		/*
674721fffe3SKacheong Poon 		 * If the requested port is busy, don't bother to look
675721fffe3SKacheong Poon 		 * for a new one. Setting loop maximum count to 1 has
676721fffe3SKacheong Poon 		 * that effect.
677721fffe3SKacheong Poon 		 */
678721fffe3SKacheong Poon 		loopmax = 1;
679721fffe3SKacheong Poon 	} else {
680721fffe3SKacheong Poon 		/*
681721fffe3SKacheong Poon 		 * If the requested port is busy, look for a free one
682721fffe3SKacheong Poon 		 * in the anonymous port range.
683721fffe3SKacheong Poon 		 * Set loopmax appropriately so that one does not look
684721fffe3SKacheong Poon 		 * forever in the case all of the anonymous ports are in use.
685721fffe3SKacheong Poon 		 */
686721fffe3SKacheong Poon 		if (connp->conn_anon_priv_bind) {
687721fffe3SKacheong Poon 			/*
688721fffe3SKacheong Poon 			 * loopmax =
689ab82c29bSToomas Soome 			 *	(IPPORT_RESERVED-1) - tcp_min_anonpriv_port + 1
690721fffe3SKacheong Poon 			 */
691721fffe3SKacheong Poon 			loopmax = IPPORT_RESERVED -
692721fffe3SKacheong Poon 			    tcps->tcps_min_anonpriv_port;
693721fffe3SKacheong Poon 		} else {
694721fffe3SKacheong Poon 			loopmax = (tcps->tcps_largest_anon_port -
695721fffe3SKacheong Poon 			    tcps->tcps_smallest_anon_port + 1);
696721fffe3SKacheong Poon 		}
697721fffe3SKacheong Poon 	}
698721fffe3SKacheong Poon 	do {
699721fffe3SKacheong Poon 		uint16_t	lport;
700721fffe3SKacheong Poon 		tf_t		*tbf;
701721fffe3SKacheong Poon 		tcp_t		*ltcp;
702721fffe3SKacheong Poon 		conn_t		*lconnp;
703721fffe3SKacheong Poon 
704721fffe3SKacheong Poon 		lport = htons(port);
705721fffe3SKacheong Poon 
706721fffe3SKacheong Poon 		/*
707721fffe3SKacheong Poon 		 * Ensure that the tcp_t is not currently in the bind hash.
708721fffe3SKacheong Poon 		 * Hold the lock on the hash bucket to ensure that
709721fffe3SKacheong Poon 		 * the duplicate check plus the insertion is an atomic
710721fffe3SKacheong Poon 		 * operation.
711721fffe3SKacheong Poon 		 *
712721fffe3SKacheong Poon 		 * This function does an inline lookup on the bind hash list
713721fffe3SKacheong Poon 		 * Make sure that we access only members of tcp_t
714721fffe3SKacheong Poon 		 * and that we don't look at tcp_tcp, since we are not
715721fffe3SKacheong Poon 		 * doing a CONN_INC_REF.
716721fffe3SKacheong Poon 		 */
717721fffe3SKacheong Poon 		tcp_bind_hash_remove(tcp);
718721fffe3SKacheong Poon 		tbf = &tcps->tcps_bind_fanout[TCP_BIND_HASH(lport)];
719721fffe3SKacheong Poon 		mutex_enter(&tbf->tf_lock);
720721fffe3SKacheong Poon 		for (ltcp = tbf->tf_tcp; ltcp != NULL;
721721fffe3SKacheong Poon 		    ltcp = ltcp->tcp_bind_hash) {
722721fffe3SKacheong Poon 			if (lport == ltcp->tcp_connp->conn_lport)
723721fffe3SKacheong Poon 				break;
724721fffe3SKacheong Poon 		}
725721fffe3SKacheong Poon 
726721fffe3SKacheong Poon 		for (; ltcp != NULL; ltcp = ltcp->tcp_bind_hash_port) {
727721fffe3SKacheong Poon 			boolean_t not_socket;
728721fffe3SKacheong Poon 			boolean_t exclbind;
729721fffe3SKacheong Poon 
730721fffe3SKacheong Poon 			lconnp = ltcp->tcp_connp;
731721fffe3SKacheong Poon 
732721fffe3SKacheong Poon 			/*
733721fffe3SKacheong Poon 			 * On a labeled system, we must treat bindings to ports
734721fffe3SKacheong Poon 			 * on shared IP addresses by sockets with MAC exemption
735721fffe3SKacheong Poon 			 * privilege as being in all zones, as there's
736721fffe3SKacheong Poon 			 * otherwise no way to identify the right receiver.
737721fffe3SKacheong Poon 			 */
738721fffe3SKacheong Poon 			if (!IPCL_BIND_ZONE_MATCH(lconnp, connp))
739721fffe3SKacheong Poon 				continue;
740721fffe3SKacheong Poon 
741721fffe3SKacheong Poon 			/*
742721fffe3SKacheong Poon 			 * If TCP_EXCLBIND is set for either the bound or
743721fffe3SKacheong Poon 			 * binding endpoint, the semantics of bind
744721fffe3SKacheong Poon 			 * is changed according to the following.
745721fffe3SKacheong Poon 			 *
746721fffe3SKacheong Poon 			 * spec = specified address (v4 or v6)
747721fffe3SKacheong Poon 			 * unspec = unspecified address (v4 or v6)
748721fffe3SKacheong Poon 			 * A = specified addresses are different for endpoints
749721fffe3SKacheong Poon 			 *
750721fffe3SKacheong Poon 			 * bound	bind to		allowed
751721fffe3SKacheong Poon 			 * -------------------------------------
752721fffe3SKacheong Poon 			 * unspec	unspec		no
753721fffe3SKacheong Poon 			 * unspec	spec		no
754721fffe3SKacheong Poon 			 * spec		unspec		no
755721fffe3SKacheong Poon 			 * spec		spec		yes if A
756721fffe3SKacheong Poon 			 *
757721fffe3SKacheong Poon 			 * For labeled systems, SO_MAC_EXEMPT behaves the same
758721fffe3SKacheong Poon 			 * as TCP_EXCLBIND, except that zoneid is ignored.
759721fffe3SKacheong Poon 			 *
760721fffe3SKacheong Poon 			 * Note:
761721fffe3SKacheong Poon 			 *
762721fffe3SKacheong Poon 			 * 1. Because of TLI semantics, an endpoint can go
763721fffe3SKacheong Poon 			 * back from, say TCP_ESTABLISHED to TCPS_LISTEN or
764721fffe3SKacheong Poon 			 * TCPS_BOUND, depending on whether it is originally
765721fffe3SKacheong Poon 			 * a listener or not.  That is why we need to check
766721fffe3SKacheong Poon 			 * for states greater than or equal to TCPS_BOUND
767721fffe3SKacheong Poon 			 * here.
768721fffe3SKacheong Poon 			 *
769721fffe3SKacheong Poon 			 * 2. Ideally, we should only check for state equals
770721fffe3SKacheong Poon 			 * to TCPS_LISTEN. And the following check should be
771721fffe3SKacheong Poon 			 * added.
772721fffe3SKacheong Poon 			 *
773721fffe3SKacheong Poon 			 * if (ltcp->tcp_state == TCPS_LISTEN ||
774721fffe3SKacheong Poon 			 *	!reuseaddr || !lconnp->conn_reuseaddr) {
775721fffe3SKacheong Poon 			 *		...
776721fffe3SKacheong Poon 			 * }
777721fffe3SKacheong Poon 			 *
778721fffe3SKacheong Poon 			 * The semantics will be changed to this.  If the
779721fffe3SKacheong Poon 			 * endpoint on the list is in state not equal to
780721fffe3SKacheong Poon 			 * TCPS_LISTEN and both endpoints have SO_REUSEADDR
781721fffe3SKacheong Poon 			 * set, let the bind succeed.
782721fffe3SKacheong Poon 			 *
783721fffe3SKacheong Poon 			 * Because of (1), we cannot do that for TLI
784721fffe3SKacheong Poon 			 * endpoints.  But we can do that for socket endpoints.
785721fffe3SKacheong Poon 			 * If in future, we can change this going back
786721fffe3SKacheong Poon 			 * semantics, we can use the above check for TLI also.
787721fffe3SKacheong Poon 			 */
788721fffe3SKacheong Poon 			not_socket = !(TCP_IS_SOCKET(ltcp) &&
789721fffe3SKacheong Poon 			    TCP_IS_SOCKET(tcp));
790721fffe3SKacheong Poon 			exclbind = lconnp->conn_exclbind ||
791721fffe3SKacheong Poon 			    connp->conn_exclbind;
792721fffe3SKacheong Poon 
793721fffe3SKacheong Poon 			if ((lconnp->conn_mac_mode != CONN_MAC_DEFAULT) ||
794721fffe3SKacheong Poon 			    (connp->conn_mac_mode != CONN_MAC_DEFAULT) ||
795721fffe3SKacheong Poon 			    (exclbind && (not_socket ||
796721fffe3SKacheong Poon 			    ltcp->tcp_state <= TCPS_ESTABLISHED))) {
797721fffe3SKacheong Poon 				if (V6_OR_V4_INADDR_ANY(
798721fffe3SKacheong Poon 				    lconnp->conn_bound_addr_v6) ||
799721fffe3SKacheong Poon 				    V6_OR_V4_INADDR_ANY(*laddr) ||
800721fffe3SKacheong Poon 				    IN6_ARE_ADDR_EQUAL(laddr,
801721fffe3SKacheong Poon 				    &lconnp->conn_bound_addr_v6)) {
802721fffe3SKacheong Poon 					break;
803721fffe3SKacheong Poon 				}
804721fffe3SKacheong Poon 				continue;
805721fffe3SKacheong Poon 			}
806721fffe3SKacheong Poon 
807721fffe3SKacheong Poon 			/*
808721fffe3SKacheong Poon 			 * Check ipversion to allow IPv4 and IPv6 sockets to
809721fffe3SKacheong Poon 			 * have disjoint port number spaces, if *_EXCLBIND
810721fffe3SKacheong Poon 			 * is not set and only if the application binds to a
811721fffe3SKacheong Poon 			 * specific port. We use the same autoassigned port
812721fffe3SKacheong Poon 			 * number space for IPv4 and IPv6 sockets.
813721fffe3SKacheong Poon 			 */
814721fffe3SKacheong Poon 			if (connp->conn_ipversion != lconnp->conn_ipversion &&
815721fffe3SKacheong Poon 			    bind_to_req_port_only)
816721fffe3SKacheong Poon 				continue;
817721fffe3SKacheong Poon 
818721fffe3SKacheong Poon 			/*
819721fffe3SKacheong Poon 			 * Ideally, we should make sure that the source
820721fffe3SKacheong Poon 			 * address, remote address, and remote port in the
821721fffe3SKacheong Poon 			 * four tuple for this tcp-connection is unique.
822721fffe3SKacheong Poon 			 * However, trying to find out the local source
823721fffe3SKacheong Poon 			 * address would require too much code duplication
824721fffe3SKacheong Poon 			 * with IP, since IP needs needs to have that code
825721fffe3SKacheong Poon 			 * to support userland TCP implementations.
826721fffe3SKacheong Poon 			 */
827721fffe3SKacheong Poon 			if (quick_connect &&
828721fffe3SKacheong Poon 			    (ltcp->tcp_state > TCPS_LISTEN) &&
829721fffe3SKacheong Poon 			    ((connp->conn_fport != lconnp->conn_fport) ||
830721fffe3SKacheong Poon 			    !IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
831721fffe3SKacheong Poon 			    &lconnp->conn_faddr_v6)))
832721fffe3SKacheong Poon 				continue;
833721fffe3SKacheong Poon 
834721fffe3SKacheong Poon 			if (!reuseaddr) {
835721fffe3SKacheong Poon 				/*
836721fffe3SKacheong Poon 				 * No socket option SO_REUSEADDR.
837721fffe3SKacheong Poon 				 * If existing port is bound to
838721fffe3SKacheong Poon 				 * a non-wildcard IP address
839721fffe3SKacheong Poon 				 * and the requesting stream is
840721fffe3SKacheong Poon 				 * bound to a distinct
841721fffe3SKacheong Poon 				 * different IP addresses
842721fffe3SKacheong Poon 				 * (non-wildcard, also), keep
843721fffe3SKacheong Poon 				 * going.
844721fffe3SKacheong Poon 				 */
845721fffe3SKacheong Poon 				if (!V6_OR_V4_INADDR_ANY(*laddr) &&
846721fffe3SKacheong Poon 				    !V6_OR_V4_INADDR_ANY(
847721fffe3SKacheong Poon 				    lconnp->conn_bound_addr_v6) &&
848721fffe3SKacheong Poon 				    !IN6_ARE_ADDR_EQUAL(laddr,
849721fffe3SKacheong Poon 				    &lconnp->conn_bound_addr_v6))
850721fffe3SKacheong Poon 					continue;
851721fffe3SKacheong Poon 				if (ltcp->tcp_state >= TCPS_BOUND) {
852721fffe3SKacheong Poon 					/*
853721fffe3SKacheong Poon 					 * This port is being used and
854721fffe3SKacheong Poon 					 * its state is >= TCPS_BOUND,
855721fffe3SKacheong Poon 					 * so we can't bind to it.
856721fffe3SKacheong Poon 					 */
857721fffe3SKacheong Poon 					break;
858721fffe3SKacheong Poon 				}
859721fffe3SKacheong Poon 			} else {
860721fffe3SKacheong Poon 				/*
861721fffe3SKacheong Poon 				 * socket option SO_REUSEADDR is set on the
862721fffe3SKacheong Poon 				 * binding tcp_t.
863721fffe3SKacheong Poon 				 *
864721fffe3SKacheong Poon 				 * If two streams are bound to
865721fffe3SKacheong Poon 				 * same IP address or both addr
866721fffe3SKacheong Poon 				 * and bound source are wildcards
867721fffe3SKacheong Poon 				 * (INADDR_ANY), we want to stop
868721fffe3SKacheong Poon 				 * searching.
869721fffe3SKacheong Poon 				 * We have found a match of IP source
870721fffe3SKacheong Poon 				 * address and source port, which is
871721fffe3SKacheong Poon 				 * refused regardless of the
872721fffe3SKacheong Poon 				 * SO_REUSEADDR setting, so we break.
873721fffe3SKacheong Poon 				 */
874721fffe3SKacheong Poon 				if (IN6_ARE_ADDR_EQUAL(laddr,
875721fffe3SKacheong Poon 				    &lconnp->conn_bound_addr_v6) &&
876721fffe3SKacheong Poon 				    (ltcp->tcp_state == TCPS_LISTEN ||
877721fffe3SKacheong Poon 				    ltcp->tcp_state == TCPS_BOUND))
878721fffe3SKacheong Poon 					break;
879721fffe3SKacheong Poon 			}
880721fffe3SKacheong Poon 		}
881721fffe3SKacheong Poon 		if (ltcp != NULL) {
882721fffe3SKacheong Poon 			/* The port number is busy */
883721fffe3SKacheong Poon 			mutex_exit(&tbf->tf_lock);
884721fffe3SKacheong Poon 		} else {
885721fffe3SKacheong Poon 			/*
886721fffe3SKacheong Poon 			 * This port is ours. Insert in fanout and mark as
887721fffe3SKacheong Poon 			 * bound to prevent others from getting the port
888721fffe3SKacheong Poon 			 * number.
889721fffe3SKacheong Poon 			 */
890721fffe3SKacheong Poon 			tcp->tcp_state = TCPS_BOUND;
8919cd928feSAlan Maguire 			DTRACE_TCP6(state__change, void, NULL,
8929cd928feSAlan Maguire 			    ip_xmit_attr_t *, connp->conn_ixa,
8939cd928feSAlan Maguire 			    void, NULL, tcp_t *, tcp, void, NULL,
8949cd928feSAlan Maguire 			    int32_t, TCPS_IDLE);
8959cd928feSAlan Maguire 
896721fffe3SKacheong Poon 			connp->conn_lport = htons(port);
897721fffe3SKacheong Poon 
898721fffe3SKacheong Poon 			ASSERT(&tcps->tcps_bind_fanout[TCP_BIND_HASH(
899721fffe3SKacheong Poon 			    connp->conn_lport)] == tbf);
900721fffe3SKacheong Poon 			tcp_bind_hash_insert(tbf, tcp, 1);
901721fffe3SKacheong Poon 
902721fffe3SKacheong Poon 			mutex_exit(&tbf->tf_lock);
903721fffe3SKacheong Poon 
904721fffe3SKacheong Poon 			/*
905721fffe3SKacheong Poon 			 * We don't want tcp_next_port_to_try to "inherit"
906721fffe3SKacheong Poon 			 * a port number supplied by the user in a bind.
907721fffe3SKacheong Poon 			 */
908721fffe3SKacheong Poon 			if (user_specified)
909721fffe3SKacheong Poon 				return (port);
910721fffe3SKacheong Poon 
911721fffe3SKacheong Poon 			/*
912721fffe3SKacheong Poon 			 * This is the only place where tcp_next_port_to_try
913721fffe3SKacheong Poon 			 * is updated. After the update, it may or may not
914721fffe3SKacheong Poon 			 * be in the valid range.
915721fffe3SKacheong Poon 			 */
916721fffe3SKacheong Poon 			if (!connp->conn_anon_priv_bind)
917721fffe3SKacheong Poon 				tcps->tcps_next_port_to_try = port + 1;
918721fffe3SKacheong Poon 			return (port);
919721fffe3SKacheong Poon 		}
920721fffe3SKacheong Poon 
921721fffe3SKacheong Poon 		if (connp->conn_anon_priv_bind) {
922721fffe3SKacheong Poon 			port = tcp_get_next_priv_port(tcp);
923721fffe3SKacheong Poon 		} else {
924721fffe3SKacheong Poon 			if (count == 0 && user_specified) {
925721fffe3SKacheong Poon 				/*
926721fffe3SKacheong Poon 				 * We may have to return an anonymous port. So
927721fffe3SKacheong Poon 				 * get one to start with.
928721fffe3SKacheong Poon 				 */
929721fffe3SKacheong Poon 				port =
930721fffe3SKacheong Poon 				    tcp_update_next_port(
931721fffe3SKacheong Poon 				    tcps->tcps_next_port_to_try,
932721fffe3SKacheong Poon 				    tcp, B_TRUE);
933721fffe3SKacheong Poon 				user_specified = B_FALSE;
934721fffe3SKacheong Poon 			} else {
935721fffe3SKacheong Poon 				port = tcp_update_next_port(port + 1, tcp,
936721fffe3SKacheong Poon 				    B_FALSE);
937721fffe3SKacheong Poon 			}
938721fffe3SKacheong Poon 		}
939721fffe3SKacheong Poon 		if (port == 0)
940721fffe3SKacheong Poon 			break;
941721fffe3SKacheong Poon 
942721fffe3SKacheong Poon 		/*
943721fffe3SKacheong Poon 		 * Don't let this loop run forever in the case where
944721fffe3SKacheong Poon 		 * all of the anonymous ports are in use.
945721fffe3SKacheong Poon 		 */
946721fffe3SKacheong Poon 	} while (++count < loopmax);
947721fffe3SKacheong Poon 	return (0);
948721fffe3SKacheong Poon }
949