xref: /illumos-gate/usr/src/uts/common/inet/tcp/tcp_socket.c (revision dd49f125507979bb2ab505a8daf2a46d1be27051)
1721fffe3SKacheong Poon /*
2721fffe3SKacheong Poon  * CDDL HEADER START
3721fffe3SKacheong Poon  *
4721fffe3SKacheong Poon  * The contents of this file are subject to the terms of the
5721fffe3SKacheong Poon  * Common Development and Distribution License (the "License").
6721fffe3SKacheong Poon  * You may not use this file except in compliance with the License.
7721fffe3SKacheong Poon  *
8721fffe3SKacheong Poon  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9721fffe3SKacheong Poon  * or http://www.opensolaris.org/os/licensing.
10721fffe3SKacheong Poon  * See the License for the specific language governing permissions
11721fffe3SKacheong Poon  * and limitations under the License.
12721fffe3SKacheong Poon  *
13721fffe3SKacheong Poon  * When distributing Covered Code, include this CDDL HEADER in each
14721fffe3SKacheong Poon  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15721fffe3SKacheong Poon  * If applicable, add the following below this CDDL HEADER, with the
16721fffe3SKacheong Poon  * fields enclosed by brackets "[]" replaced with your own identifying
17721fffe3SKacheong Poon  * information: Portions Copyright [yyyy] [name of copyright owner]
18721fffe3SKacheong Poon  *
19721fffe3SKacheong Poon  * CDDL HEADER END
20721fffe3SKacheong Poon  */
21721fffe3SKacheong Poon 
22721fffe3SKacheong Poon /*
239ee3959aSAnders Persson  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24721fffe3SKacheong Poon  */
25721fffe3SKacheong Poon 
26721fffe3SKacheong Poon /* This file contains all TCP kernel socket related functions. */
27721fffe3SKacheong Poon 
28721fffe3SKacheong Poon #include <sys/types.h>
29721fffe3SKacheong Poon #include <sys/strlog.h>
30721fffe3SKacheong Poon #include <sys/policy.h>
31721fffe3SKacheong Poon #include <sys/sockio.h>
32721fffe3SKacheong Poon #include <sys/strsubr.h>
33721fffe3SKacheong Poon #include <sys/strsun.h>
34721fffe3SKacheong Poon #include <sys/squeue_impl.h>
35721fffe3SKacheong Poon #include <sys/squeue.h>
363e95bd4aSAnders Persson #define	_SUN_TPI_VERSION 2
37721fffe3SKacheong Poon #include <sys/tihdr.h>
38721fffe3SKacheong Poon #include <sys/timod.h>
39721fffe3SKacheong Poon #include <sys/tpicommon.h>
40721fffe3SKacheong Poon #include <sys/socketvar.h>
41721fffe3SKacheong Poon 
42721fffe3SKacheong Poon #include <inet/common.h>
43721fffe3SKacheong Poon #include <inet/proto_set.h>
44721fffe3SKacheong Poon #include <inet/ip.h>
45721fffe3SKacheong Poon #include <inet/tcp.h>
46721fffe3SKacheong Poon #include <inet/tcp_impl.h>
47721fffe3SKacheong Poon 
48721fffe3SKacheong Poon static void	tcp_activate(sock_lower_handle_t, sock_upper_handle_t,
49721fffe3SKacheong Poon 		    sock_upcalls_t *, int, cred_t *);
50721fffe3SKacheong Poon static int	tcp_accept(sock_lower_handle_t, sock_lower_handle_t,
51721fffe3SKacheong Poon 		    sock_upper_handle_t, cred_t *);
52721fffe3SKacheong Poon static int	tcp_bind(sock_lower_handle_t, struct sockaddr *,
53721fffe3SKacheong Poon 		    socklen_t, cred_t *);
54721fffe3SKacheong Poon static int	tcp_listen(sock_lower_handle_t, int, cred_t *);
55721fffe3SKacheong Poon static int	tcp_connect(sock_lower_handle_t, const struct sockaddr *,
56721fffe3SKacheong Poon 		    socklen_t, sock_connid_t *, cred_t *);
57721fffe3SKacheong Poon static int	tcp_getsockopt(sock_lower_handle_t, int, int, void *,
58721fffe3SKacheong Poon 		    socklen_t *, cred_t *);
59721fffe3SKacheong Poon static int	tcp_setsockopt(sock_lower_handle_t, int, int, const void *,
60721fffe3SKacheong Poon 		    socklen_t, cred_t *);
61721fffe3SKacheong Poon static int	tcp_sendmsg(sock_lower_handle_t, mblk_t *, struct nmsghdr *,
62721fffe3SKacheong Poon 		    cred_t *cr);
63721fffe3SKacheong Poon static int	tcp_shutdown(sock_lower_handle_t, int, cred_t *);
64721fffe3SKacheong Poon static void	tcp_clr_flowctrl(sock_lower_handle_t);
65721fffe3SKacheong Poon static int	tcp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
66721fffe3SKacheong Poon 		    cred_t *);
67721fffe3SKacheong Poon static int	tcp_close(sock_lower_handle_t, int, cred_t *);
68721fffe3SKacheong Poon 
69721fffe3SKacheong Poon sock_downcalls_t sock_tcp_downcalls = {
70721fffe3SKacheong Poon 	tcp_activate,
71721fffe3SKacheong Poon 	tcp_accept,
72721fffe3SKacheong Poon 	tcp_bind,
73721fffe3SKacheong Poon 	tcp_listen,
74721fffe3SKacheong Poon 	tcp_connect,
75721fffe3SKacheong Poon 	tcp_getpeername,
76721fffe3SKacheong Poon 	tcp_getsockname,
77721fffe3SKacheong Poon 	tcp_getsockopt,
78721fffe3SKacheong Poon 	tcp_setsockopt,
79721fffe3SKacheong Poon 	tcp_sendmsg,
80721fffe3SKacheong Poon 	NULL,
81721fffe3SKacheong Poon 	NULL,
82721fffe3SKacheong Poon 	NULL,
83721fffe3SKacheong Poon 	tcp_shutdown,
84721fffe3SKacheong Poon 	tcp_clr_flowctrl,
85721fffe3SKacheong Poon 	tcp_ioctl,
86721fffe3SKacheong Poon 	tcp_close,
87721fffe3SKacheong Poon };
88721fffe3SKacheong Poon 
89721fffe3SKacheong Poon /* ARGSUSED */
90721fffe3SKacheong Poon static void
91721fffe3SKacheong Poon tcp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
92721fffe3SKacheong Poon     sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
93721fffe3SKacheong Poon {
94721fffe3SKacheong Poon 	conn_t *connp = (conn_t *)proto_handle;
95721fffe3SKacheong Poon 	struct sock_proto_props sopp;
96721fffe3SKacheong Poon 	extern struct module_info tcp_rinfo;
97721fffe3SKacheong Poon 
98721fffe3SKacheong Poon 	ASSERT(connp->conn_upper_handle == NULL);
99721fffe3SKacheong Poon 
100721fffe3SKacheong Poon 	/* All Solaris components should pass a cred for this operation. */
101721fffe3SKacheong Poon 	ASSERT(cr != NULL);
102721fffe3SKacheong Poon 
103721fffe3SKacheong Poon 	sopp.sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
104721fffe3SKacheong Poon 	    SOCKOPT_MAXPSZ | SOCKOPT_MAXBLK | SOCKOPT_RCVTIMER |
105721fffe3SKacheong Poon 	    SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ;
106721fffe3SKacheong Poon 
107721fffe3SKacheong Poon 	sopp.sopp_rxhiwat = SOCKET_RECVHIWATER;
108721fffe3SKacheong Poon 	sopp.sopp_rxlowat = SOCKET_RECVLOWATER;
109721fffe3SKacheong Poon 	sopp.sopp_maxpsz = INFPSZ;
110721fffe3SKacheong Poon 	sopp.sopp_maxblk = INFPSZ;
111721fffe3SKacheong Poon 	sopp.sopp_rcvtimer = SOCKET_TIMER_INTERVAL;
112721fffe3SKacheong Poon 	sopp.sopp_rcvthresh = SOCKET_RECVHIWATER >> 3;
113721fffe3SKacheong Poon 	sopp.sopp_maxaddrlen = sizeof (sin6_t);
114721fffe3SKacheong Poon 	sopp.sopp_minpsz = (tcp_rinfo.mi_minpsz == 1) ? 0 :
115721fffe3SKacheong Poon 	    tcp_rinfo.mi_minpsz;
116721fffe3SKacheong Poon 
117721fffe3SKacheong Poon 	connp->conn_upcalls = sock_upcalls;
118721fffe3SKacheong Poon 	connp->conn_upper_handle = sock_handle;
119721fffe3SKacheong Poon 
120721fffe3SKacheong Poon 	ASSERT(connp->conn_rcvbuf != 0 &&
121721fffe3SKacheong Poon 	    connp->conn_rcvbuf == connp->conn_tcp->tcp_rwnd);
122721fffe3SKacheong Poon 	(*sock_upcalls->su_set_proto_props)(sock_handle, &sopp);
123721fffe3SKacheong Poon }
124721fffe3SKacheong Poon 
1253e95bd4aSAnders Persson /*ARGSUSED*/
126721fffe3SKacheong Poon static int
127721fffe3SKacheong Poon tcp_accept(sock_lower_handle_t lproto_handle,
128721fffe3SKacheong Poon     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
129721fffe3SKacheong Poon     cred_t *cr)
130721fffe3SKacheong Poon {
131721fffe3SKacheong Poon 	conn_t *lconnp, *econnp;
132721fffe3SKacheong Poon 	tcp_t *listener, *eager;
133721fffe3SKacheong Poon 
134*dd49f125SAnders Persson 	/*
135*dd49f125SAnders Persson 	 * KSSL can move a socket from one listener to another, in which
136*dd49f125SAnders Persson 	 * case `lproto_handle' points to the new listener. To ensure that
137*dd49f125SAnders Persson 	 * the original listener is used the information is obtained from
138*dd49f125SAnders Persson 	 * the eager.
139*dd49f125SAnders Persson 	 */
140721fffe3SKacheong Poon 	econnp = (conn_t *)eproto_handle;
141721fffe3SKacheong Poon 	eager = econnp->conn_tcp;
1423e95bd4aSAnders Persson 	ASSERT(IPCL_IS_NONSTR(econnp));
143*dd49f125SAnders Persson 	ASSERT(eager->tcp_listener != NULL);
144*dd49f125SAnders Persson 	listener = eager->tcp_listener;
145*dd49f125SAnders Persson 	lconnp = (conn_t *)listener->tcp_connp;
146*dd49f125SAnders Persson 	ASSERT(listener->tcp_state == TCPS_LISTEN);
1473e95bd4aSAnders Persson 	ASSERT(lconnp->conn_upper_handle != NULL);
148721fffe3SKacheong Poon 
149721fffe3SKacheong Poon 	/*
1503e95bd4aSAnders Persson 	 * It is possible for the accept thread to race with the thread that
1513e95bd4aSAnders Persson 	 * made the su_newconn upcall in tcp_newconn_notify. Both
1523e95bd4aSAnders Persson 	 * tcp_newconn_notify and tcp_accept require that conn_upper_handle
1533e95bd4aSAnders Persson 	 * and conn_upcalls be set before returning, so they both write to
1543e95bd4aSAnders Persson 	 * them. However, we're guaranteed that the value written is the same
1553e95bd4aSAnders Persson 	 * for both threads.
156721fffe3SKacheong Poon 	 */
1573e95bd4aSAnders Persson 	ASSERT(econnp->conn_upper_handle == NULL ||
1583e95bd4aSAnders Persson 	    econnp->conn_upper_handle == sock_handle);
1593e95bd4aSAnders Persson 	ASSERT(econnp->conn_upcalls == NULL ||
1603e95bd4aSAnders Persson 	    econnp->conn_upcalls == lconnp->conn_upcalls);
161721fffe3SKacheong Poon 	econnp->conn_upper_handle = sock_handle;
162721fffe3SKacheong Poon 	econnp->conn_upcalls = lconnp->conn_upcalls;
1633e95bd4aSAnders Persson 
1643e95bd4aSAnders Persson 	ASSERT(econnp->conn_netstack ==
1653e95bd4aSAnders Persson 	    listener->tcp_connp->conn_netstack);
1663e95bd4aSAnders Persson 	ASSERT(eager->tcp_tcps == listener->tcp_tcps);
1673e95bd4aSAnders Persson 
1683e95bd4aSAnders Persson 	/*
1693e95bd4aSAnders Persson 	 * We should have a minimum of 2 references on the conn at this
1703e95bd4aSAnders Persson 	 * point. One for TCP and one for the newconn notification
1713e95bd4aSAnders Persson 	 * (which is now taken over by IP). In the normal case we would
1723e95bd4aSAnders Persson 	 * also have another reference (making a total of 3) for the conn
1733e95bd4aSAnders Persson 	 * being in the classifier hash list. However the eager could have
1743e95bd4aSAnders Persson 	 * received an RST subsequently and tcp_closei_local could have
1753e95bd4aSAnders Persson 	 * removed the eager from the classifier hash list, hence we can't
1763e95bd4aSAnders Persson 	 * assert that reference.
1773e95bd4aSAnders Persson 	 */
1783e95bd4aSAnders Persson 	ASSERT(econnp->conn_ref >= 2);
1793e95bd4aSAnders Persson 
1803e95bd4aSAnders Persson 	/*
1813e95bd4aSAnders Persson 	 * An error is returned if this conn has been reset, which will
1823e95bd4aSAnders Persson 	 * cause the socket to be closed immediately. The eager will be
1833e95bd4aSAnders Persson 	 * unlinked from the listener during close.
1843e95bd4aSAnders Persson 	 */
1853e95bd4aSAnders Persson 	if (eager->tcp_state < TCPS_ESTABLISHED)
1863e95bd4aSAnders Persson 		return (ECONNABORTED);
1873e95bd4aSAnders Persson 
1883e95bd4aSAnders Persson 	mutex_enter(&listener->tcp_eager_lock);
1893e95bd4aSAnders Persson 	/*
1903e95bd4aSAnders Persson 	 * Non-STREAMS listeners never defer the notification of new
1913e95bd4aSAnders Persson 	 * connections.
1923e95bd4aSAnders Persson 	 */
1933e95bd4aSAnders Persson 	ASSERT(!listener->tcp_eager_prev_q0->tcp_conn_def_q0);
1943e95bd4aSAnders Persson 	tcp_eager_unlink(eager);
1953e95bd4aSAnders Persson 	mutex_exit(&listener->tcp_eager_lock);
1963e95bd4aSAnders Persson 	CONN_DEC_REF(listener->tcp_connp);
1973e95bd4aSAnders Persson 
1983e95bd4aSAnders Persson 	return (0);
199721fffe3SKacheong Poon }
200721fffe3SKacheong Poon 
201721fffe3SKacheong Poon static int
202721fffe3SKacheong Poon tcp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
203721fffe3SKacheong Poon     socklen_t len, cred_t *cr)
204721fffe3SKacheong Poon {
205721fffe3SKacheong Poon 	int 		error;
206721fffe3SKacheong Poon 	conn_t		*connp = (conn_t *)proto_handle;
207721fffe3SKacheong Poon 
208721fffe3SKacheong Poon 	/* All Solaris components should pass a cred for this operation. */
209721fffe3SKacheong Poon 	ASSERT(cr != NULL);
210721fffe3SKacheong Poon 	ASSERT(connp->conn_upper_handle != NULL);
211721fffe3SKacheong Poon 
2129ee3959aSAnders Persson 	error = squeue_synch_enter(connp, NULL);
213721fffe3SKacheong Poon 	if (error != 0) {
214721fffe3SKacheong Poon 		/* failed to enter */
215721fffe3SKacheong Poon 		return (ENOSR);
216721fffe3SKacheong Poon 	}
217721fffe3SKacheong Poon 
218721fffe3SKacheong Poon 	/* binding to a NULL address really means unbind */
219721fffe3SKacheong Poon 	if (sa == NULL) {
220721fffe3SKacheong Poon 		if (connp->conn_tcp->tcp_state < TCPS_LISTEN)
221721fffe3SKacheong Poon 			error = tcp_do_unbind(connp);
222721fffe3SKacheong Poon 		else
223721fffe3SKacheong Poon 			error = EINVAL;
224721fffe3SKacheong Poon 	} else {
225721fffe3SKacheong Poon 		error = tcp_do_bind(connp, sa, len, cr, B_TRUE);
226721fffe3SKacheong Poon 	}
227721fffe3SKacheong Poon 
2289ee3959aSAnders Persson 	squeue_synch_exit(connp);
229721fffe3SKacheong Poon 
230721fffe3SKacheong Poon 	if (error < 0) {
231721fffe3SKacheong Poon 		if (error == -TOUTSTATE)
232721fffe3SKacheong Poon 			error = EINVAL;
233721fffe3SKacheong Poon 		else
234721fffe3SKacheong Poon 			error = proto_tlitosyserr(-error);
235721fffe3SKacheong Poon 	}
236721fffe3SKacheong Poon 
237721fffe3SKacheong Poon 	return (error);
238721fffe3SKacheong Poon }
239721fffe3SKacheong Poon 
240721fffe3SKacheong Poon /* ARGSUSED */
241721fffe3SKacheong Poon static int
242721fffe3SKacheong Poon tcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
243721fffe3SKacheong Poon {
244721fffe3SKacheong Poon 	conn_t	*connp = (conn_t *)proto_handle;
2453e95bd4aSAnders Persson 	tcp_t	*tcp = connp->conn_tcp;
246721fffe3SKacheong Poon 	int 	error;
247721fffe3SKacheong Poon 
248721fffe3SKacheong Poon 	ASSERT(connp->conn_upper_handle != NULL);
249721fffe3SKacheong Poon 
250721fffe3SKacheong Poon 	/* All Solaris components should pass a cred for this operation. */
251721fffe3SKacheong Poon 	ASSERT(cr != NULL);
252721fffe3SKacheong Poon 
2539ee3959aSAnders Persson 	error = squeue_synch_enter(connp, NULL);
254721fffe3SKacheong Poon 	if (error != 0) {
255721fffe3SKacheong Poon 		/* failed to enter */
256721fffe3SKacheong Poon 		return (ENOBUFS);
257721fffe3SKacheong Poon 	}
258721fffe3SKacheong Poon 
259721fffe3SKacheong Poon 	error = tcp_do_listen(connp, NULL, 0, backlog, cr, B_FALSE);
260721fffe3SKacheong Poon 	if (error == 0) {
2613e95bd4aSAnders Persson 		/*
2623e95bd4aSAnders Persson 		 * sockfs needs to know what's the maximum number of socket
2633e95bd4aSAnders Persson 		 * that can be queued on the listener.
2643e95bd4aSAnders Persson 		 */
265721fffe3SKacheong Poon 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
2663e95bd4aSAnders Persson 		    SOCK_OPCTL_ENAB_ACCEPT,
2673e95bd4aSAnders Persson 		    (uintptr_t)(tcp->tcp_conn_req_max +
2683e95bd4aSAnders Persson 		    tcp->tcp_tcps->tcps_conn_req_max_q0));
269721fffe3SKacheong Poon 	} else if (error < 0) {
270721fffe3SKacheong Poon 		if (error == -TOUTSTATE)
271721fffe3SKacheong Poon 			error = EINVAL;
272721fffe3SKacheong Poon 		else
273721fffe3SKacheong Poon 			error = proto_tlitosyserr(-error);
274721fffe3SKacheong Poon 	}
2759ee3959aSAnders Persson 	squeue_synch_exit(connp);
276721fffe3SKacheong Poon 	return (error);
277721fffe3SKacheong Poon }
278721fffe3SKacheong Poon 
279721fffe3SKacheong Poon static int
280721fffe3SKacheong Poon tcp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
281721fffe3SKacheong Poon     socklen_t len, sock_connid_t *id, cred_t *cr)
282721fffe3SKacheong Poon {
283721fffe3SKacheong Poon 	conn_t		*connp = (conn_t *)proto_handle;
284721fffe3SKacheong Poon 	int		error;
285721fffe3SKacheong Poon 
286721fffe3SKacheong Poon 	ASSERT(connp->conn_upper_handle != NULL);
287721fffe3SKacheong Poon 
288721fffe3SKacheong Poon 	/* All Solaris components should pass a cred for this operation. */
289721fffe3SKacheong Poon 	ASSERT(cr != NULL);
290721fffe3SKacheong Poon 
291721fffe3SKacheong Poon 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
292721fffe3SKacheong Poon 	if (error != 0) {
293721fffe3SKacheong Poon 		return (error);
294721fffe3SKacheong Poon 	}
295721fffe3SKacheong Poon 
2969ee3959aSAnders Persson 	error = squeue_synch_enter(connp, NULL);
297721fffe3SKacheong Poon 	if (error != 0) {
298721fffe3SKacheong Poon 		/* failed to enter */
299721fffe3SKacheong Poon 		return (ENOSR);
300721fffe3SKacheong Poon 	}
301721fffe3SKacheong Poon 
302721fffe3SKacheong Poon 	/*
303721fffe3SKacheong Poon 	 * TCP supports quick connect, so no need to do an implicit bind
304721fffe3SKacheong Poon 	 */
305721fffe3SKacheong Poon 	error = tcp_do_connect(connp, sa, len, cr, curproc->p_pid);
306721fffe3SKacheong Poon 	if (error == 0) {
307721fffe3SKacheong Poon 		*id = connp->conn_tcp->tcp_connid;
308721fffe3SKacheong Poon 	} else if (error < 0) {
309721fffe3SKacheong Poon 		if (error == -TOUTSTATE) {
310721fffe3SKacheong Poon 			switch (connp->conn_tcp->tcp_state) {
311721fffe3SKacheong Poon 			case TCPS_SYN_SENT:
312721fffe3SKacheong Poon 				error = EALREADY;
313721fffe3SKacheong Poon 				break;
314721fffe3SKacheong Poon 			case TCPS_ESTABLISHED:
315721fffe3SKacheong Poon 				error = EISCONN;
316721fffe3SKacheong Poon 				break;
317721fffe3SKacheong Poon 			case TCPS_LISTEN:
318721fffe3SKacheong Poon 				error = EOPNOTSUPP;
319721fffe3SKacheong Poon 				break;
320721fffe3SKacheong Poon 			default:
321721fffe3SKacheong Poon 				error = EINVAL;
322721fffe3SKacheong Poon 				break;
323721fffe3SKacheong Poon 			}
324721fffe3SKacheong Poon 		} else {
325721fffe3SKacheong Poon 			error = proto_tlitosyserr(-error);
326721fffe3SKacheong Poon 		}
327721fffe3SKacheong Poon 	}
328721fffe3SKacheong Poon 
329721fffe3SKacheong Poon 	if (connp->conn_tcp->tcp_loopback) {
330721fffe3SKacheong Poon 		struct sock_proto_props sopp;
331721fffe3SKacheong Poon 
332721fffe3SKacheong Poon 		sopp.sopp_flags = SOCKOPT_LOOPBACK;
333721fffe3SKacheong Poon 		sopp.sopp_loopback = B_TRUE;
334721fffe3SKacheong Poon 
335721fffe3SKacheong Poon 		(*connp->conn_upcalls->su_set_proto_props)(
336721fffe3SKacheong Poon 		    connp->conn_upper_handle, &sopp);
337721fffe3SKacheong Poon 	}
338721fffe3SKacheong Poon done:
3399ee3959aSAnders Persson 	squeue_synch_exit(connp);
340721fffe3SKacheong Poon 
341721fffe3SKacheong Poon 	return ((error == 0) ? EINPROGRESS : error);
342721fffe3SKacheong Poon }
343721fffe3SKacheong Poon 
344721fffe3SKacheong Poon /* ARGSUSED3 */
345721fffe3SKacheong Poon int
346721fffe3SKacheong Poon tcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr,
347721fffe3SKacheong Poon     socklen_t *addrlenp, cred_t *cr)
348721fffe3SKacheong Poon {
349721fffe3SKacheong Poon 	conn_t	*connp = (conn_t *)proto_handle;
350721fffe3SKacheong Poon 	tcp_t	*tcp = connp->conn_tcp;
351721fffe3SKacheong Poon 
352721fffe3SKacheong Poon 	/* All Solaris components should pass a cred for this operation. */
353721fffe3SKacheong Poon 	ASSERT(cr != NULL);
354721fffe3SKacheong Poon 
355721fffe3SKacheong Poon 	ASSERT(tcp != NULL);
356721fffe3SKacheong Poon 	if (tcp->tcp_state < TCPS_SYN_RCVD)
357721fffe3SKacheong Poon 		return (ENOTCONN);
358721fffe3SKacheong Poon 
359721fffe3SKacheong Poon 	return (conn_getpeername(connp, addr, addrlenp));
360721fffe3SKacheong Poon }
361721fffe3SKacheong Poon 
362721fffe3SKacheong Poon /* ARGSUSED3 */
363721fffe3SKacheong Poon int
364721fffe3SKacheong Poon tcp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr,
365721fffe3SKacheong Poon     socklen_t *addrlenp, cred_t *cr)
366721fffe3SKacheong Poon {
367721fffe3SKacheong Poon 	conn_t	*connp = (conn_t *)proto_handle;
368721fffe3SKacheong Poon 
369721fffe3SKacheong Poon 	/* All Solaris components should pass a cred for this operation. */
370721fffe3SKacheong Poon 	ASSERT(cr != NULL);
371721fffe3SKacheong Poon 
372721fffe3SKacheong Poon 	return (conn_getsockname(connp, addr, addrlenp));
373721fffe3SKacheong Poon }
374721fffe3SKacheong Poon 
375721fffe3SKacheong Poon /* returns UNIX error, the optlen is a value-result arg */
376721fffe3SKacheong Poon static int
377721fffe3SKacheong Poon tcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
378721fffe3SKacheong Poon     void *optvalp, socklen_t *optlen, cred_t *cr)
379721fffe3SKacheong Poon {
380721fffe3SKacheong Poon 	conn_t		*connp = (conn_t *)proto_handle;
381721fffe3SKacheong Poon 	int		error;
382721fffe3SKacheong Poon 	t_uscalar_t	max_optbuf_len;
383721fffe3SKacheong Poon 	void		*optvalp_buf;
384721fffe3SKacheong Poon 	int		len;
385721fffe3SKacheong Poon 
386721fffe3SKacheong Poon 	ASSERT(connp->conn_upper_handle != NULL);
387721fffe3SKacheong Poon 
388721fffe3SKacheong Poon 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
389721fffe3SKacheong Poon 	    tcp_opt_obj.odb_opt_des_arr,
390721fffe3SKacheong Poon 	    tcp_opt_obj.odb_opt_arr_cnt,
391721fffe3SKacheong Poon 	    B_FALSE, B_TRUE, cr);
392721fffe3SKacheong Poon 	if (error != 0) {
393721fffe3SKacheong Poon 		if (error < 0) {
394721fffe3SKacheong Poon 			error = proto_tlitosyserr(-error);
395721fffe3SKacheong Poon 		}
396721fffe3SKacheong Poon 		return (error);
397721fffe3SKacheong Poon 	}
398721fffe3SKacheong Poon 
399721fffe3SKacheong Poon 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
400721fffe3SKacheong Poon 
4019ee3959aSAnders Persson 	error = squeue_synch_enter(connp, NULL);
402721fffe3SKacheong Poon 	if (error == ENOMEM) {
403721fffe3SKacheong Poon 		kmem_free(optvalp_buf, max_optbuf_len);
404721fffe3SKacheong Poon 		return (ENOMEM);
405721fffe3SKacheong Poon 	}
406721fffe3SKacheong Poon 
407721fffe3SKacheong Poon 	len = tcp_opt_get(connp, level, option_name, optvalp_buf);
4089ee3959aSAnders Persson 	squeue_synch_exit(connp);
409721fffe3SKacheong Poon 
410721fffe3SKacheong Poon 	if (len == -1) {
411721fffe3SKacheong Poon 		kmem_free(optvalp_buf, max_optbuf_len);
412721fffe3SKacheong Poon 		return (EINVAL);
413721fffe3SKacheong Poon 	}
414721fffe3SKacheong Poon 
415721fffe3SKacheong Poon 	/*
416721fffe3SKacheong Poon 	 * update optlen and copy option value
417721fffe3SKacheong Poon 	 */
418721fffe3SKacheong Poon 	t_uscalar_t size = MIN(len, *optlen);
419721fffe3SKacheong Poon 
420721fffe3SKacheong Poon 	bcopy(optvalp_buf, optvalp, size);
421721fffe3SKacheong Poon 	bcopy(&size, optlen, sizeof (size));
422721fffe3SKacheong Poon 
423721fffe3SKacheong Poon 	kmem_free(optvalp_buf, max_optbuf_len);
424721fffe3SKacheong Poon 	return (0);
425721fffe3SKacheong Poon }
426721fffe3SKacheong Poon 
427721fffe3SKacheong Poon static int
428721fffe3SKacheong Poon tcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
429721fffe3SKacheong Poon     const void *optvalp, socklen_t optlen, cred_t *cr)
430721fffe3SKacheong Poon {
431721fffe3SKacheong Poon 	conn_t		*connp = (conn_t *)proto_handle;
432721fffe3SKacheong Poon 	int		error;
433721fffe3SKacheong Poon 
434721fffe3SKacheong Poon 	ASSERT(connp->conn_upper_handle != NULL);
435721fffe3SKacheong Poon 	/*
436721fffe3SKacheong Poon 	 * Entering the squeue synchronously can result in a context switch,
437721fffe3SKacheong Poon 	 * which can cause a rather sever performance degradation. So we try to
438721fffe3SKacheong Poon 	 * handle whatever options we can without entering the squeue.
439721fffe3SKacheong Poon 	 */
440721fffe3SKacheong Poon 	if (level == IPPROTO_TCP) {
441721fffe3SKacheong Poon 		switch (option_name) {
442721fffe3SKacheong Poon 		case TCP_NODELAY:
443721fffe3SKacheong Poon 			if (optlen != sizeof (int32_t))
444721fffe3SKacheong Poon 				return (EINVAL);
445721fffe3SKacheong Poon 			mutex_enter(&connp->conn_tcp->tcp_non_sq_lock);
446721fffe3SKacheong Poon 			connp->conn_tcp->tcp_naglim = *(int *)optvalp ? 1 :
447721fffe3SKacheong Poon 			    connp->conn_tcp->tcp_mss;
448721fffe3SKacheong Poon 			mutex_exit(&connp->conn_tcp->tcp_non_sq_lock);
449721fffe3SKacheong Poon 			return (0);
450721fffe3SKacheong Poon 		default:
451721fffe3SKacheong Poon 			break;
452721fffe3SKacheong Poon 		}
453721fffe3SKacheong Poon 	}
454721fffe3SKacheong Poon 
4559ee3959aSAnders Persson 	error = squeue_synch_enter(connp, NULL);
456721fffe3SKacheong Poon 	if (error == ENOMEM) {
457721fffe3SKacheong Poon 		return (ENOMEM);
458721fffe3SKacheong Poon 	}
459721fffe3SKacheong Poon 
460721fffe3SKacheong Poon 	error = proto_opt_check(level, option_name, optlen, NULL,
461721fffe3SKacheong Poon 	    tcp_opt_obj.odb_opt_des_arr,
462721fffe3SKacheong Poon 	    tcp_opt_obj.odb_opt_arr_cnt,
463721fffe3SKacheong Poon 	    B_TRUE, B_FALSE, cr);
464721fffe3SKacheong Poon 
465721fffe3SKacheong Poon 	if (error != 0) {
466721fffe3SKacheong Poon 		if (error < 0) {
467721fffe3SKacheong Poon 			error = proto_tlitosyserr(-error);
468721fffe3SKacheong Poon 		}
4699ee3959aSAnders Persson 		squeue_synch_exit(connp);
470721fffe3SKacheong Poon 		return (error);
471721fffe3SKacheong Poon 	}
472721fffe3SKacheong Poon 
473721fffe3SKacheong Poon 	error = tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
474721fffe3SKacheong Poon 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
475721fffe3SKacheong Poon 	    NULL, cr);
4769ee3959aSAnders Persson 	squeue_synch_exit(connp);
477721fffe3SKacheong Poon 
478721fffe3SKacheong Poon 	ASSERT(error >= 0);
479721fffe3SKacheong Poon 
480721fffe3SKacheong Poon 	return (error);
481721fffe3SKacheong Poon }
482721fffe3SKacheong Poon 
483721fffe3SKacheong Poon /* ARGSUSED */
484721fffe3SKacheong Poon static int
485721fffe3SKacheong Poon tcp_sendmsg(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
486721fffe3SKacheong Poon     cred_t *cr)
487721fffe3SKacheong Poon {
488721fffe3SKacheong Poon 	tcp_t		*tcp;
489721fffe3SKacheong Poon 	uint32_t	msize;
490721fffe3SKacheong Poon 	conn_t *connp = (conn_t *)proto_handle;
491721fffe3SKacheong Poon 	int32_t		tcpstate;
492721fffe3SKacheong Poon 
493721fffe3SKacheong Poon 	/* All Solaris components should pass a cred for this operation. */
494721fffe3SKacheong Poon 	ASSERT(cr != NULL);
495721fffe3SKacheong Poon 
496721fffe3SKacheong Poon 	ASSERT(connp->conn_ref >= 2);
497721fffe3SKacheong Poon 	ASSERT(connp->conn_upper_handle != NULL);
498721fffe3SKacheong Poon 
499721fffe3SKacheong Poon 	if (msg->msg_controllen != 0) {
500721fffe3SKacheong Poon 		freemsg(mp);
501721fffe3SKacheong Poon 		return (EOPNOTSUPP);
502721fffe3SKacheong Poon 	}
503721fffe3SKacheong Poon 
504721fffe3SKacheong Poon 	switch (DB_TYPE(mp)) {
505721fffe3SKacheong Poon 	case M_DATA:
506721fffe3SKacheong Poon 		tcp = connp->conn_tcp;
507721fffe3SKacheong Poon 		ASSERT(tcp != NULL);
508721fffe3SKacheong Poon 
509721fffe3SKacheong Poon 		tcpstate = tcp->tcp_state;
510721fffe3SKacheong Poon 		if (tcpstate < TCPS_ESTABLISHED) {
511721fffe3SKacheong Poon 			freemsg(mp);
512721fffe3SKacheong Poon 			/*
513721fffe3SKacheong Poon 			 * We return ENOTCONN if the endpoint is trying to
514721fffe3SKacheong Poon 			 * connect or has never been connected, and EPIPE if it
515721fffe3SKacheong Poon 			 * has been disconnected. The connection id helps us
516721fffe3SKacheong Poon 			 * distinguish between the last two cases.
517721fffe3SKacheong Poon 			 */
518721fffe3SKacheong Poon 			return ((tcpstate == TCPS_SYN_SENT) ? ENOTCONN :
519721fffe3SKacheong Poon 			    ((tcp->tcp_connid > 0) ? EPIPE : ENOTCONN));
520721fffe3SKacheong Poon 		} else if (tcpstate > TCPS_CLOSE_WAIT) {
521721fffe3SKacheong Poon 			freemsg(mp);
522721fffe3SKacheong Poon 			return (EPIPE);
523721fffe3SKacheong Poon 		}
524721fffe3SKacheong Poon 
525721fffe3SKacheong Poon 		msize = msgdsize(mp);
526721fffe3SKacheong Poon 
527721fffe3SKacheong Poon 		mutex_enter(&tcp->tcp_non_sq_lock);
528721fffe3SKacheong Poon 		tcp->tcp_squeue_bytes += msize;
529721fffe3SKacheong Poon 		/*
530721fffe3SKacheong Poon 		 * Squeue Flow Control
531721fffe3SKacheong Poon 		 */
532721fffe3SKacheong Poon 		if (TCP_UNSENT_BYTES(tcp) > connp->conn_sndbuf) {
533721fffe3SKacheong Poon 			tcp_setqfull(tcp);
534721fffe3SKacheong Poon 		}
535721fffe3SKacheong Poon 		mutex_exit(&tcp->tcp_non_sq_lock);
536721fffe3SKacheong Poon 
537721fffe3SKacheong Poon 		/*
538721fffe3SKacheong Poon 		 * The application may pass in an address in the msghdr, but
539721fffe3SKacheong Poon 		 * we ignore the address on connection-oriented sockets.
540721fffe3SKacheong Poon 		 * Just like BSD this code does not generate an error for
541721fffe3SKacheong Poon 		 * TCP (a CONNREQUIRED socket) when sending to an address
542721fffe3SKacheong Poon 		 * passed in with sendto/sendmsg. Instead the data is
543721fffe3SKacheong Poon 		 * delivered on the connection as if no address had been
544721fffe3SKacheong Poon 		 * supplied.
545721fffe3SKacheong Poon 		 */
546721fffe3SKacheong Poon 		CONN_INC_REF(connp);
547721fffe3SKacheong Poon 
548721fffe3SKacheong Poon 		if (msg->msg_flags & MSG_OOB) {
549721fffe3SKacheong Poon 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output_urgent,
550721fffe3SKacheong Poon 			    connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
551721fffe3SKacheong Poon 		} else {
552721fffe3SKacheong Poon 			SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output,
553721fffe3SKacheong Poon 			    connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
554721fffe3SKacheong Poon 		}
555721fffe3SKacheong Poon 
556721fffe3SKacheong Poon 		return (0);
557721fffe3SKacheong Poon 
558721fffe3SKacheong Poon 	default:
559721fffe3SKacheong Poon 		ASSERT(0);
560721fffe3SKacheong Poon 	}
561721fffe3SKacheong Poon 
562721fffe3SKacheong Poon 	freemsg(mp);
563721fffe3SKacheong Poon 	return (0);
564721fffe3SKacheong Poon }
565721fffe3SKacheong Poon 
566721fffe3SKacheong Poon /* ARGSUSED */
567721fffe3SKacheong Poon static int
568721fffe3SKacheong Poon tcp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
569721fffe3SKacheong Poon {
570721fffe3SKacheong Poon 	conn_t  *connp = (conn_t *)proto_handle;
571721fffe3SKacheong Poon 	tcp_t   *tcp = connp->conn_tcp;
572721fffe3SKacheong Poon 
573721fffe3SKacheong Poon 	ASSERT(connp->conn_upper_handle != NULL);
574721fffe3SKacheong Poon 
575721fffe3SKacheong Poon 	/* All Solaris components should pass a cred for this operation. */
576721fffe3SKacheong Poon 	ASSERT(cr != NULL);
577721fffe3SKacheong Poon 
578721fffe3SKacheong Poon 	/*
579721fffe3SKacheong Poon 	 * X/Open requires that we check the connected state.
580721fffe3SKacheong Poon 	 */
581721fffe3SKacheong Poon 	if (tcp->tcp_state < TCPS_SYN_SENT)
582721fffe3SKacheong Poon 		return (ENOTCONN);
583721fffe3SKacheong Poon 
584721fffe3SKacheong Poon 	/* shutdown the send side */
585721fffe3SKacheong Poon 	if (how != SHUT_RD) {
586721fffe3SKacheong Poon 		mblk_t *bp;
587721fffe3SKacheong Poon 
588721fffe3SKacheong Poon 		bp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL);
589721fffe3SKacheong Poon 		CONN_INC_REF(connp);
590721fffe3SKacheong Poon 		SQUEUE_ENTER_ONE(connp->conn_sqp, bp, tcp_shutdown_output,
591721fffe3SKacheong Poon 		    connp, NULL, SQ_NODRAIN, SQTAG_TCP_SHUTDOWN_OUTPUT);
592721fffe3SKacheong Poon 
593721fffe3SKacheong Poon 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
594721fffe3SKacheong Poon 		    SOCK_OPCTL_SHUT_SEND, 0);
595721fffe3SKacheong Poon 	}
596721fffe3SKacheong Poon 
597721fffe3SKacheong Poon 	/* shutdown the recv side */
598721fffe3SKacheong Poon 	if (how != SHUT_WR)
599721fffe3SKacheong Poon 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
600721fffe3SKacheong Poon 		    SOCK_OPCTL_SHUT_RECV, 0);
601721fffe3SKacheong Poon 
602721fffe3SKacheong Poon 	return (0);
603721fffe3SKacheong Poon }
604721fffe3SKacheong Poon 
605721fffe3SKacheong Poon static void
606721fffe3SKacheong Poon tcp_clr_flowctrl(sock_lower_handle_t proto_handle)
607721fffe3SKacheong Poon {
608721fffe3SKacheong Poon 	conn_t  *connp = (conn_t *)proto_handle;
609721fffe3SKacheong Poon 	tcp_t	*tcp = connp->conn_tcp;
610721fffe3SKacheong Poon 	mblk_t *mp;
611721fffe3SKacheong Poon 	int error;
612721fffe3SKacheong Poon 
613721fffe3SKacheong Poon 	ASSERT(connp->conn_upper_handle != NULL);
614721fffe3SKacheong Poon 
615721fffe3SKacheong Poon 	/*
616721fffe3SKacheong Poon 	 * If tcp->tcp_rsrv_mp == NULL, it means that tcp_clr_flowctrl()
617721fffe3SKacheong Poon 	 * is currently running.
618721fffe3SKacheong Poon 	 */
619721fffe3SKacheong Poon 	mutex_enter(&tcp->tcp_rsrv_mp_lock);
620721fffe3SKacheong Poon 	if ((mp = tcp->tcp_rsrv_mp) == NULL) {
621721fffe3SKacheong Poon 		mutex_exit(&tcp->tcp_rsrv_mp_lock);
622721fffe3SKacheong Poon 		return;
623721fffe3SKacheong Poon 	}
624721fffe3SKacheong Poon 	tcp->tcp_rsrv_mp = NULL;
625721fffe3SKacheong Poon 	mutex_exit(&tcp->tcp_rsrv_mp_lock);
626721fffe3SKacheong Poon 
6279ee3959aSAnders Persson 	error = squeue_synch_enter(connp, mp);
628721fffe3SKacheong Poon 	ASSERT(error == 0);
629721fffe3SKacheong Poon 
630721fffe3SKacheong Poon 	mutex_enter(&tcp->tcp_rsrv_mp_lock);
631721fffe3SKacheong Poon 	tcp->tcp_rsrv_mp = mp;
632721fffe3SKacheong Poon 	mutex_exit(&tcp->tcp_rsrv_mp_lock);
633721fffe3SKacheong Poon 
634721fffe3SKacheong Poon 	if (tcp->tcp_fused) {
635721fffe3SKacheong Poon 		tcp_fuse_backenable(tcp);
636721fffe3SKacheong Poon 	} else {
637721fffe3SKacheong Poon 		tcp->tcp_rwnd = connp->conn_rcvbuf;
638721fffe3SKacheong Poon 		/*
639721fffe3SKacheong Poon 		 * Send back a window update immediately if TCP is above
640721fffe3SKacheong Poon 		 * ESTABLISHED state and the increase of the rcv window
641721fffe3SKacheong Poon 		 * that the other side knows is at least 1 MSS after flow
642721fffe3SKacheong Poon 		 * control is lifted.
643721fffe3SKacheong Poon 		 */
644721fffe3SKacheong Poon 		if (tcp->tcp_state >= TCPS_ESTABLISHED &&
645721fffe3SKacheong Poon 		    tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) {
646721fffe3SKacheong Poon 			tcp_xmit_ctl(NULL, tcp,
647721fffe3SKacheong Poon 			    (tcp->tcp_swnd == 0) ? tcp->tcp_suna :
648721fffe3SKacheong Poon 			    tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK);
649721fffe3SKacheong Poon 		}
650721fffe3SKacheong Poon 	}
651721fffe3SKacheong Poon 
6529ee3959aSAnders Persson 	squeue_synch_exit(connp);
653721fffe3SKacheong Poon }
654721fffe3SKacheong Poon 
655721fffe3SKacheong Poon /* ARGSUSED */
656721fffe3SKacheong Poon static int
657721fffe3SKacheong Poon tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
658721fffe3SKacheong Poon     int mode, int32_t *rvalp, cred_t *cr)
659721fffe3SKacheong Poon {
660721fffe3SKacheong Poon 	conn_t  	*connp = (conn_t *)proto_handle;
661721fffe3SKacheong Poon 	int		error;
662721fffe3SKacheong Poon 
663721fffe3SKacheong Poon 	ASSERT(connp->conn_upper_handle != NULL);
664721fffe3SKacheong Poon 
665721fffe3SKacheong Poon 	/* All Solaris components should pass a cred for this operation. */
666721fffe3SKacheong Poon 	ASSERT(cr != NULL);
667721fffe3SKacheong Poon 
668721fffe3SKacheong Poon 	/*
669721fffe3SKacheong Poon 	 * If we don't have a helper stream then create one.
670721fffe3SKacheong Poon 	 * ip_create_helper_stream takes care of locking the conn_t,
671721fffe3SKacheong Poon 	 * so this check for NULL is just a performance optimization.
672721fffe3SKacheong Poon 	 */
673721fffe3SKacheong Poon 	if (connp->conn_helper_info == NULL) {
674721fffe3SKacheong Poon 		tcp_stack_t *tcps = connp->conn_tcp->tcp_tcps;
675721fffe3SKacheong Poon 
676721fffe3SKacheong Poon 		/*
677721fffe3SKacheong Poon 		 * Create a helper stream for non-STREAMS socket.
678721fffe3SKacheong Poon 		 */
679721fffe3SKacheong Poon 		error = ip_create_helper_stream(connp, tcps->tcps_ldi_ident);
680721fffe3SKacheong Poon 		if (error != 0) {
681721fffe3SKacheong Poon 			ip0dbg(("tcp_ioctl: create of IP helper stream "
682721fffe3SKacheong Poon 			    "failed %d\n", error));
683721fffe3SKacheong Poon 			return (error);
684721fffe3SKacheong Poon 		}
685721fffe3SKacheong Poon 	}
686721fffe3SKacheong Poon 
687721fffe3SKacheong Poon 	switch (cmd) {
688721fffe3SKacheong Poon 		case ND_SET:
689721fffe3SKacheong Poon 		case ND_GET:
690721fffe3SKacheong Poon 		case _SIOCSOCKFALLBACK:
691721fffe3SKacheong Poon 		case TCP_IOC_ABORT_CONN:
692721fffe3SKacheong Poon 		case TI_GETPEERNAME:
693721fffe3SKacheong Poon 		case TI_GETMYNAME:
694721fffe3SKacheong Poon 			ip1dbg(("tcp_ioctl: cmd 0x%x on non streams socket",
695721fffe3SKacheong Poon 			    cmd));
696721fffe3SKacheong Poon 			error = EINVAL;
697721fffe3SKacheong Poon 			break;
698721fffe3SKacheong Poon 		default:
699721fffe3SKacheong Poon 			/*
700721fffe3SKacheong Poon 			 * If the conn is not closing, pass on to IP using
701721fffe3SKacheong Poon 			 * helper stream. Bump the ioctlref to prevent tcp_close
702721fffe3SKacheong Poon 			 * from closing the rq/wq out from underneath the ioctl
703721fffe3SKacheong Poon 			 * if it ends up queued or aborted/interrupted.
704721fffe3SKacheong Poon 			 */
705721fffe3SKacheong Poon 			mutex_enter(&connp->conn_lock);
706721fffe3SKacheong Poon 			if (connp->conn_state_flags & (CONN_CLOSING)) {
707721fffe3SKacheong Poon 				mutex_exit(&connp->conn_lock);
708721fffe3SKacheong Poon 				error = EINVAL;
709721fffe3SKacheong Poon 				break;
710721fffe3SKacheong Poon 			}
711721fffe3SKacheong Poon 			CONN_INC_IOCTLREF_LOCKED(connp);
712721fffe3SKacheong Poon 			error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
713721fffe3SKacheong Poon 			    cmd, arg, mode, cr, rvalp);
714721fffe3SKacheong Poon 			CONN_DEC_IOCTLREF(connp);
715721fffe3SKacheong Poon 			break;
716721fffe3SKacheong Poon 	}
717721fffe3SKacheong Poon 	return (error);
718721fffe3SKacheong Poon }
719721fffe3SKacheong Poon 
720721fffe3SKacheong Poon /* ARGSUSED */
721721fffe3SKacheong Poon static int
722721fffe3SKacheong Poon tcp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
723721fffe3SKacheong Poon {
724721fffe3SKacheong Poon 	conn_t *connp = (conn_t *)proto_handle;
725721fffe3SKacheong Poon 
726721fffe3SKacheong Poon 	ASSERT(connp->conn_upper_handle != NULL);
727721fffe3SKacheong Poon 
728721fffe3SKacheong Poon 	/* All Solaris components should pass a cred for this operation. */
729721fffe3SKacheong Poon 	ASSERT(cr != NULL);
730721fffe3SKacheong Poon 
731721fffe3SKacheong Poon 	tcp_close_common(connp, flags);
732721fffe3SKacheong Poon 
733721fffe3SKacheong Poon 	ip_free_helper_stream(connp);
734721fffe3SKacheong Poon 
735721fffe3SKacheong Poon 	/*
736721fffe3SKacheong Poon 	 * Drop IP's reference on the conn. This is the last reference
737721fffe3SKacheong Poon 	 * on the connp if the state was less than established. If the
738721fffe3SKacheong Poon 	 * connection has gone into timewait state, then we will have
739721fffe3SKacheong Poon 	 * one ref for the TCP and one more ref (total of two) for the
740721fffe3SKacheong Poon 	 * classifier connected hash list (a timewait connections stays
741721fffe3SKacheong Poon 	 * in connected hash till closed).
742721fffe3SKacheong Poon 	 *
743721fffe3SKacheong Poon 	 * We can't assert the references because there might be other
744721fffe3SKacheong Poon 	 * transient reference places because of some walkers or queued
745721fffe3SKacheong Poon 	 * packets in squeue for the timewait state.
746721fffe3SKacheong Poon 	 */
747721fffe3SKacheong Poon 	CONN_DEC_REF(connp);
7483e95bd4aSAnders Persson 
7493e95bd4aSAnders Persson 	/*
7503e95bd4aSAnders Persson 	 * EINPROGRESS tells sockfs to wait for a 'closed' upcall before
7513e95bd4aSAnders Persson 	 * freeing the socket.
7523e95bd4aSAnders Persson 	 */
7533e95bd4aSAnders Persson 	return (EINPROGRESS);
754721fffe3SKacheong Poon }
755721fffe3SKacheong Poon 
756721fffe3SKacheong Poon /* ARGSUSED */
757721fffe3SKacheong Poon sock_lower_handle_t
758721fffe3SKacheong Poon tcp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
759721fffe3SKacheong Poon     uint_t *smodep, int *errorp, int flags, cred_t *credp)
760721fffe3SKacheong Poon {
761721fffe3SKacheong Poon 	conn_t		*connp;
762721fffe3SKacheong Poon 	boolean_t	isv6 = family == AF_INET6;
763721fffe3SKacheong Poon 	if (type != SOCK_STREAM || (family != AF_INET && family != AF_INET6) ||
764721fffe3SKacheong Poon 	    (proto != 0 && proto != IPPROTO_TCP)) {
765721fffe3SKacheong Poon 		*errorp = EPROTONOSUPPORT;
766721fffe3SKacheong Poon 		return (NULL);
767721fffe3SKacheong Poon 	}
768721fffe3SKacheong Poon 
769721fffe3SKacheong Poon 	connp = tcp_create_common(credp, isv6, B_TRUE, errorp);
770721fffe3SKacheong Poon 	if (connp == NULL) {
771721fffe3SKacheong Poon 		return (NULL);
772721fffe3SKacheong Poon 	}
773721fffe3SKacheong Poon 
774721fffe3SKacheong Poon 	/*
775721fffe3SKacheong Poon 	 * Put the ref for TCP. Ref for IP was already put
776721fffe3SKacheong Poon 	 * by ipcl_conn_create. Also Make the conn_t globally
777721fffe3SKacheong Poon 	 * visible to walkers
778721fffe3SKacheong Poon 	 */
779721fffe3SKacheong Poon 	mutex_enter(&connp->conn_lock);
780721fffe3SKacheong Poon 	CONN_INC_REF_LOCKED(connp);
781721fffe3SKacheong Poon 	ASSERT(connp->conn_ref == 2);
782721fffe3SKacheong Poon 	connp->conn_state_flags &= ~CONN_INCIPIENT;
783721fffe3SKacheong Poon 
784721fffe3SKacheong Poon 	connp->conn_flags |= IPCL_NONSTR;
785721fffe3SKacheong Poon 	mutex_exit(&connp->conn_lock);
786721fffe3SKacheong Poon 
787721fffe3SKacheong Poon 	ASSERT(errorp != NULL);
788721fffe3SKacheong Poon 	*errorp = 0;
789721fffe3SKacheong Poon 	*sock_downcalls = &sock_tcp_downcalls;
790721fffe3SKacheong Poon 	*smodep = SM_CONNREQUIRED | SM_EXDATA | SM_ACCEPTSUPP |
791721fffe3SKacheong Poon 	    SM_SENDFILESUPP;
792721fffe3SKacheong Poon 
793721fffe3SKacheong Poon 	return ((sock_lower_handle_t)connp);
794721fffe3SKacheong Poon }
795721fffe3SKacheong Poon 
7963e95bd4aSAnders Persson /*
7973e95bd4aSAnders Persson  * tcp_fallback
7983e95bd4aSAnders Persson  *
7993e95bd4aSAnders Persson  * A direct socket is falling back to using STREAMS. The queue
8003e95bd4aSAnders Persson  * that is being passed down was created using tcp_open() with
8013e95bd4aSAnders Persson  * the SO_FALLBACK flag set. As a result, the queue is not
8023e95bd4aSAnders Persson  * associated with a conn, and the q_ptrs instead contain the
8033e95bd4aSAnders Persson  * dev and minor area that should be used.
8043e95bd4aSAnders Persson  *
8053e95bd4aSAnders Persson  * The 'issocket' flag indicates whether the FireEngine
8063e95bd4aSAnders Persson  * optimizations should be used. The common case would be that
8073e95bd4aSAnders Persson  * optimizations are enabled, and they might be subsequently
8083e95bd4aSAnders Persson  * disabled using the _SIOCSOCKFALLBACK ioctl.
8093e95bd4aSAnders Persson  */
8103e95bd4aSAnders Persson 
8113e95bd4aSAnders Persson /*
8123e95bd4aSAnders Persson  * An active connection is falling back to TPI. Gather all the information
8133e95bd4aSAnders Persson  * required by the STREAM head and TPI sonode and send it up.
8143e95bd4aSAnders Persson  */
8153e95bd4aSAnders Persson static void
8163e95bd4aSAnders Persson tcp_fallback_noneager(tcp_t *tcp, mblk_t *stropt_mp, queue_t *q,
8173e95bd4aSAnders Persson     boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
8183e95bd4aSAnders Persson     sock_quiesce_arg_t *arg)
8193e95bd4aSAnders Persson {
8203e95bd4aSAnders Persson 	conn_t			*connp = tcp->tcp_connp;
8213e95bd4aSAnders Persson 	struct stroptions	*stropt;
8223e95bd4aSAnders Persson 	struct T_capability_ack tca;
8233e95bd4aSAnders Persson 	struct sockaddr_in6	laddr, faddr;
8243e95bd4aSAnders Persson 	socklen_t 		laddrlen, faddrlen;
8253e95bd4aSAnders Persson 	short			opts;
8263e95bd4aSAnders Persson 	int			error;
8273e95bd4aSAnders Persson 	mblk_t			*mp, *mpnext;
8283e95bd4aSAnders Persson 
8293e95bd4aSAnders Persson 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
8303e95bd4aSAnders Persson 	connp->conn_minor_arena = WR(q)->q_ptr;
8313e95bd4aSAnders Persson 
8323e95bd4aSAnders Persson 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
8333e95bd4aSAnders Persson 
8343e95bd4aSAnders Persson 	connp->conn_rq = RD(q);
8353e95bd4aSAnders Persson 	connp->conn_wq = WR(q);
8363e95bd4aSAnders Persson 
8373e95bd4aSAnders Persson 	WR(q)->q_qinfo = &tcp_sock_winit;
8383e95bd4aSAnders Persson 
8393e95bd4aSAnders Persson 	if (!issocket)
8403e95bd4aSAnders Persson 		tcp_use_pure_tpi(tcp);
8413e95bd4aSAnders Persson 
8423e95bd4aSAnders Persson 	/*
8433e95bd4aSAnders Persson 	 * free the helper stream
8443e95bd4aSAnders Persson 	 */
8453e95bd4aSAnders Persson 	ip_free_helper_stream(connp);
8463e95bd4aSAnders Persson 
8473e95bd4aSAnders Persson 	/*
8483e95bd4aSAnders Persson 	 * Notify the STREAM head about options
8493e95bd4aSAnders Persson 	 */
8503e95bd4aSAnders Persson 	DB_TYPE(stropt_mp) = M_SETOPTS;
8513e95bd4aSAnders Persson 	stropt = (struct stroptions *)stropt_mp->b_rptr;
8523e95bd4aSAnders Persson 	stropt_mp->b_wptr += sizeof (struct stroptions);
8533e95bd4aSAnders Persson 	stropt->so_flags = SO_HIWAT | SO_WROFF | SO_MAXBLK;
8543e95bd4aSAnders Persson 
8553e95bd4aSAnders Persson 	stropt->so_wroff = connp->conn_ht_iphc_len + (tcp->tcp_loopback ? 0 :
8563e95bd4aSAnders Persson 	    tcp->tcp_tcps->tcps_wroff_xtra);
8573e95bd4aSAnders Persson 	if (tcp->tcp_snd_sack_ok)
8583e95bd4aSAnders Persson 		stropt->so_wroff += TCPOPT_MAX_SACK_LEN;
8593e95bd4aSAnders Persson 	stropt->so_hiwat = connp->conn_rcvbuf;
8603e95bd4aSAnders Persson 	stropt->so_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
8613e95bd4aSAnders Persson 
8623e95bd4aSAnders Persson 	putnext(RD(q), stropt_mp);
8633e95bd4aSAnders Persson 
8643e95bd4aSAnders Persson 	/*
8653e95bd4aSAnders Persson 	 * Collect the information needed to sync with the sonode
8663e95bd4aSAnders Persson 	 */
8673e95bd4aSAnders Persson 	tcp_do_capability_ack(tcp, &tca, TC1_INFO|TC1_ACCEPTOR_ID);
8683e95bd4aSAnders Persson 
8693e95bd4aSAnders Persson 	laddrlen = faddrlen = sizeof (sin6_t);
8703e95bd4aSAnders Persson 	(void) tcp_getsockname((sock_lower_handle_t)connp,
8713e95bd4aSAnders Persson 	    (struct sockaddr *)&laddr, &laddrlen, CRED());
8723e95bd4aSAnders Persson 	error = tcp_getpeername((sock_lower_handle_t)connp,
8733e95bd4aSAnders Persson 	    (struct sockaddr *)&faddr, &faddrlen, CRED());
8743e95bd4aSAnders Persson 	if (error != 0)
8753e95bd4aSAnders Persson 		faddrlen = 0;
8763e95bd4aSAnders Persson 
8773e95bd4aSAnders Persson 	opts = 0;
8783e95bd4aSAnders Persson 	if (connp->conn_oobinline)
8793e95bd4aSAnders Persson 		opts |= SO_OOBINLINE;
8803e95bd4aSAnders Persson 	if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
8813e95bd4aSAnders Persson 		opts |= SO_DONTROUTE;
8823e95bd4aSAnders Persson 
8833e95bd4aSAnders Persson 	/*
8843e95bd4aSAnders Persson 	 * Notify the socket that the protocol is now quiescent,
8853e95bd4aSAnders Persson 	 * and it's therefore safe move data from the socket
8863e95bd4aSAnders Persson 	 * to the stream head.
8873e95bd4aSAnders Persson 	 */
8883e95bd4aSAnders Persson 	mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
8893e95bd4aSAnders Persson 	    (struct sockaddr *)&laddr, laddrlen,
8903e95bd4aSAnders Persson 	    (struct sockaddr *)&faddr, faddrlen, opts);
8913e95bd4aSAnders Persson 
8923e95bd4aSAnders Persson 	while (mp != NULL) {
8933e95bd4aSAnders Persson 		mpnext = mp->b_next;
8943e95bd4aSAnders Persson 		tcp->tcp_rcv_list = mp->b_next;
8953e95bd4aSAnders Persson 		mp->b_next = NULL;
8963e95bd4aSAnders Persson 		putnext(q, mp);
8973e95bd4aSAnders Persson 		mp = mpnext;
8983e95bd4aSAnders Persson 	}
8993e95bd4aSAnders Persson 	ASSERT(tcp->tcp_rcv_last_head == NULL);
9003e95bd4aSAnders Persson 	ASSERT(tcp->tcp_rcv_last_tail == NULL);
9013e95bd4aSAnders Persson 	ASSERT(tcp->tcp_rcv_cnt == 0);
9023e95bd4aSAnders Persson 
9033e95bd4aSAnders Persson 	/*
9043e95bd4aSAnders Persson 	 * All eagers in q0 are marked as being non-STREAM, so they will
9053e95bd4aSAnders Persson 	 * make su_newconn upcalls when the handshake completes, which
9063e95bd4aSAnders Persson 	 * will fail (resulting in the conn being closed). So we just blow
9073e95bd4aSAnders Persson 	 * off everything in q0 instead of waiting for the inevitable.
9083e95bd4aSAnders Persson 	 */
9093e95bd4aSAnders Persson 	if (tcp->tcp_conn_req_cnt_q0 != 0)
9103e95bd4aSAnders Persson 		tcp_eager_cleanup(tcp, B_TRUE);
9113e95bd4aSAnders Persson }
9123e95bd4aSAnders Persson 
9133e95bd4aSAnders Persson /*
9143e95bd4aSAnders Persson  * An eager is falling back to TPI. All we have to do is send
9153e95bd4aSAnders Persson  * up a T_CONN_IND.
9163e95bd4aSAnders Persson  */
9173e95bd4aSAnders Persson static void
9183e95bd4aSAnders Persson tcp_fallback_eager(tcp_t *eager, boolean_t issocket,
9193e95bd4aSAnders Persson     so_proto_quiesced_cb_t quiesced_cb, sock_quiesce_arg_t *arg)
9203e95bd4aSAnders Persson {
9213e95bd4aSAnders Persson 	conn_t *connp = eager->tcp_connp;
9223e95bd4aSAnders Persson 	tcp_t *listener = eager->tcp_listener;
9233e95bd4aSAnders Persson 	mblk_t *mp;
9243e95bd4aSAnders Persson 
9253e95bd4aSAnders Persson 	ASSERT(listener != NULL);
9263e95bd4aSAnders Persson 
9273e95bd4aSAnders Persson 	/*
9283e95bd4aSAnders Persson 	 * Notify the socket that the protocol is now quiescent,
9293e95bd4aSAnders Persson 	 * and it's therefore safe move data from the socket
9303e95bd4aSAnders Persson 	 * to tcp's rcv queue.
9313e95bd4aSAnders Persson 	 */
9323e95bd4aSAnders Persson 	mp = (*quiesced_cb)(connp->conn_upper_handle, arg, NULL, NULL, 0,
9333e95bd4aSAnders Persson 	    NULL, 0, 0);
9343e95bd4aSAnders Persson 
9353e95bd4aSAnders Persson 	if (mp != NULL) {
9363e95bd4aSAnders Persson 		ASSERT(eager->tcp_rcv_cnt == 0);
9373e95bd4aSAnders Persson 
9383e95bd4aSAnders Persson 		eager->tcp_rcv_list = mp;
9393e95bd4aSAnders Persson 		eager->tcp_rcv_cnt = msgdsize(mp);
9403e95bd4aSAnders Persson 		while (mp->b_next != NULL) {
9413e95bd4aSAnders Persson 			mp = mp->b_next;
9423e95bd4aSAnders Persson 			eager->tcp_rcv_cnt += msgdsize(mp);
9433e95bd4aSAnders Persson 		}
9443e95bd4aSAnders Persson 		eager->tcp_rcv_last_head = mp;
9453e95bd4aSAnders Persson 		while (mp->b_cont)
9463e95bd4aSAnders Persson 			mp = mp->b_cont;
9473e95bd4aSAnders Persson 		eager->tcp_rcv_last_tail = mp;
9483e95bd4aSAnders Persson 		if (eager->tcp_rcv_cnt > eager->tcp_rwnd)
9493e95bd4aSAnders Persson 			eager->tcp_rwnd = 0;
9503e95bd4aSAnders Persson 		else
9513e95bd4aSAnders Persson 			eager->tcp_rwnd -= eager->tcp_rcv_cnt;
9523e95bd4aSAnders Persson 	}
9533e95bd4aSAnders Persson 
9543e95bd4aSAnders Persson 	if (!issocket)
9553e95bd4aSAnders Persson 		eager->tcp_issocket = B_FALSE;
9563e95bd4aSAnders Persson 	/*
9573e95bd4aSAnders Persson 	 * The stream for this eager does not yet exist, so mark it as
9583e95bd4aSAnders Persson 	 * being detached.
9593e95bd4aSAnders Persson 	 */
9603e95bd4aSAnders Persson 	eager->tcp_detached = B_TRUE;
9613e95bd4aSAnders Persson 	eager->tcp_hard_binding = B_TRUE;
9623e95bd4aSAnders Persson 	connp->conn_rq = listener->tcp_connp->conn_rq;
9633e95bd4aSAnders Persson 	connp->conn_wq = listener->tcp_connp->conn_wq;
9643e95bd4aSAnders Persson 
9653e95bd4aSAnders Persson 	/* Send up the connection indication */
9663e95bd4aSAnders Persson 	mp = eager->tcp_conn.tcp_eager_conn_ind;
9673e95bd4aSAnders Persson 	ASSERT(mp != NULL);
9683e95bd4aSAnders Persson 	eager->tcp_conn.tcp_eager_conn_ind = NULL;
9693e95bd4aSAnders Persson 
9703e95bd4aSAnders Persson 	/*
9713e95bd4aSAnders Persson 	 * TLI/XTI applications will get confused by
9723e95bd4aSAnders Persson 	 * sending eager as an option since it violates
9733e95bd4aSAnders Persson 	 * the option semantics. So remove the eager as
9743e95bd4aSAnders Persson 	 * option since TLI/XTI app doesn't need it anyway.
9753e95bd4aSAnders Persson 	 */
9763e95bd4aSAnders Persson 	if (!issocket) {
9773e95bd4aSAnders Persson 		struct T_conn_ind *conn_ind;
9783e95bd4aSAnders Persson 
9793e95bd4aSAnders Persson 		conn_ind = (struct T_conn_ind *)mp->b_rptr;
9803e95bd4aSAnders Persson 		conn_ind->OPT_length = 0;
9813e95bd4aSAnders Persson 		conn_ind->OPT_offset = 0;
9823e95bd4aSAnders Persson 	}
9833e95bd4aSAnders Persson 
9843e95bd4aSAnders Persson 	/*
9853e95bd4aSAnders Persson 	 * Sockfs guarantees that the listener will not be closed
9863e95bd4aSAnders Persson 	 * during fallback. So we can safely use the listener's queue.
9873e95bd4aSAnders Persson 	 */
9883e95bd4aSAnders Persson 	putnext(listener->tcp_connp->conn_rq, mp);
9893e95bd4aSAnders Persson }
9903e95bd4aSAnders Persson 
9913e95bd4aSAnders Persson 
992721fffe3SKacheong Poon int
993721fffe3SKacheong Poon tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
9943e95bd4aSAnders Persson     boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb,
9953e95bd4aSAnders Persson     sock_quiesce_arg_t *arg)
996721fffe3SKacheong Poon {
997721fffe3SKacheong Poon 	tcp_t			*tcp;
998721fffe3SKacheong Poon 	conn_t 			*connp = (conn_t *)proto_handle;
999721fffe3SKacheong Poon 	int			error;
1000721fffe3SKacheong Poon 	mblk_t			*stropt_mp;
1001721fffe3SKacheong Poon 	mblk_t			*ordrel_mp;
1002721fffe3SKacheong Poon 
1003721fffe3SKacheong Poon 	tcp = connp->conn_tcp;
1004721fffe3SKacheong Poon 
1005721fffe3SKacheong Poon 	stropt_mp = allocb_wait(sizeof (struct stroptions), BPRI_HI, STR_NOSIG,
1006721fffe3SKacheong Poon 	    NULL);
1007721fffe3SKacheong Poon 
1008721fffe3SKacheong Poon 	/* Pre-allocate the T_ordrel_ind mblk. */
1009721fffe3SKacheong Poon 	ASSERT(tcp->tcp_ordrel_mp == NULL);
1010721fffe3SKacheong Poon 	ordrel_mp = allocb_wait(sizeof (struct T_ordrel_ind), BPRI_HI,
1011721fffe3SKacheong Poon 	    STR_NOSIG, NULL);
1012721fffe3SKacheong Poon 	ordrel_mp->b_datap->db_type = M_PROTO;
1013721fffe3SKacheong Poon 	((struct T_ordrel_ind *)ordrel_mp->b_rptr)->PRIM_type = T_ORDREL_IND;
1014721fffe3SKacheong Poon 	ordrel_mp->b_wptr += sizeof (struct T_ordrel_ind);
1015721fffe3SKacheong Poon 
1016721fffe3SKacheong Poon 	/*
1017721fffe3SKacheong Poon 	 * Enter the squeue so that no new packets can come in
1018721fffe3SKacheong Poon 	 */
10199ee3959aSAnders Persson 	error = squeue_synch_enter(connp, NULL);
1020721fffe3SKacheong Poon 	if (error != 0) {
1021721fffe3SKacheong Poon 		/* failed to enter, free all the pre-allocated messages. */
1022721fffe3SKacheong Poon 		freeb(stropt_mp);
1023721fffe3SKacheong Poon 		freeb(ordrel_mp);
1024721fffe3SKacheong Poon 		return (ENOMEM);
1025721fffe3SKacheong Poon 	}
1026721fffe3SKacheong Poon 
1027721fffe3SKacheong Poon 	/*
1028721fffe3SKacheong Poon 	 * Both endpoints must be of the same type (either STREAMS or
1029721fffe3SKacheong Poon 	 * non-STREAMS) for fusion to be enabled. So if we are fused,
1030721fffe3SKacheong Poon 	 * we have to unfuse.
1031721fffe3SKacheong Poon 	 */
1032721fffe3SKacheong Poon 	if (tcp->tcp_fused)
1033721fffe3SKacheong Poon 		tcp_unfuse(tcp);
1034721fffe3SKacheong Poon 
1035721fffe3SKacheong Poon 	if (tcp->tcp_listener != NULL) {
1036721fffe3SKacheong Poon 		/* The eager will deal with opts when accept() is called */
1037721fffe3SKacheong Poon 		freeb(stropt_mp);
10383e95bd4aSAnders Persson 		tcp_fallback_eager(tcp, direct_sockfs, quiesced_cb, arg);
1039721fffe3SKacheong Poon 	} else {
1040721fffe3SKacheong Poon 		tcp_fallback_noneager(tcp, stropt_mp, q, direct_sockfs,
10413e95bd4aSAnders Persson 		    quiesced_cb, arg);
1042721fffe3SKacheong Poon 	}
1043721fffe3SKacheong Poon 
10443e95bd4aSAnders Persson 	/*
10453e95bd4aSAnders Persson 	 * No longer a direct socket
10463e95bd4aSAnders Persson 	 *
10473e95bd4aSAnders Persson 	 * Note that we intentionally leave the upper_handle and upcalls
10483e95bd4aSAnders Persson 	 * intact, since eagers may still be using them.
10493e95bd4aSAnders Persson 	 */
10503e95bd4aSAnders Persson 	connp->conn_flags &= ~IPCL_NONSTR;
10513e95bd4aSAnders Persson 	tcp->tcp_ordrel_mp = ordrel_mp;
10523e95bd4aSAnders Persson 
1053721fffe3SKacheong Poon 	/*
1054721fffe3SKacheong Poon 	 * There should be atleast two ref's (IP + TCP)
1055721fffe3SKacheong Poon 	 */
1056721fffe3SKacheong Poon 	ASSERT(connp->conn_ref >= 2);
10579ee3959aSAnders Persson 	squeue_synch_exit(connp);
1058721fffe3SKacheong Poon 
1059721fffe3SKacheong Poon 	return (0);
1060721fffe3SKacheong Poon }
10613e95bd4aSAnders Persson 
10623e95bd4aSAnders Persson /*
10633e95bd4aSAnders Persson  * Notifies a non-STREAMS based listener about a new connection. This
10643e95bd4aSAnders Persson  * function is executed on the *eager*'s squeue once the 3 way handshake
10653e95bd4aSAnders Persson  * has completed. Note that the behavior differs from STREAMS, where the
10663e95bd4aSAnders Persson  * T_CONN_IND is sent up by tcp_send_conn_ind while on the *listener*'s
10673e95bd4aSAnders Persson  * squeue.
10683e95bd4aSAnders Persson  *
10693e95bd4aSAnders Persson  * Returns B_TRUE if the notification succeeded, in which case `tcp' will
10703e95bd4aSAnders Persson  * be moved over to the ESTABLISHED list (q) of the listener. Othwerise,
10713e95bd4aSAnders Persson  * B_FALSE is returned and `tcp' is killed.
10723e95bd4aSAnders Persson  */
10733e95bd4aSAnders Persson boolean_t
10743e95bd4aSAnders Persson tcp_newconn_notify(tcp_t *tcp, ip_recv_attr_t *ira)
10753e95bd4aSAnders Persson {
10763e95bd4aSAnders Persson 	tcp_t *listener = tcp->tcp_listener;
10773e95bd4aSAnders Persson 	conn_t *lconnp = listener->tcp_connp;
10783e95bd4aSAnders Persson 	conn_t *econnp = tcp->tcp_connp;
10793e95bd4aSAnders Persson 	tcp_t *tail;
10803e95bd4aSAnders Persson 	ipaddr_t *addr_cache;
10813e95bd4aSAnders Persson 	sock_upper_handle_t upper;
10823e95bd4aSAnders Persson 	struct sock_proto_props sopp;
10833e95bd4aSAnders Persson 	mblk_t *mp;
10843e95bd4aSAnders Persson 
10853e95bd4aSAnders Persson 	mutex_enter(&listener->tcp_eager_lock);
10863e95bd4aSAnders Persson 	/*
10873e95bd4aSAnders Persson 	 * Take the eager out, if it is in the list of droppable eagers
10883e95bd4aSAnders Persson 	 * as we are here because the 3W handshake is over.
10893e95bd4aSAnders Persson 	 */
10903e95bd4aSAnders Persson 	MAKE_UNDROPPABLE(tcp);
10913e95bd4aSAnders Persson 	/*
10923e95bd4aSAnders Persson 	 * The eager already has an extra ref put in tcp_input_data
10933e95bd4aSAnders Persson 	 * so that it stays till accept comes back even though it
10943e95bd4aSAnders Persson 	 * might get into TCPS_CLOSED as a result of a TH_RST etc.
10953e95bd4aSAnders Persson 	 */
10963e95bd4aSAnders Persson 	ASSERT(listener->tcp_conn_req_cnt_q0 > 0);
10973e95bd4aSAnders Persson 	listener->tcp_conn_req_cnt_q0--;
10983e95bd4aSAnders Persson 	listener->tcp_conn_req_cnt_q++;
10993e95bd4aSAnders Persson 
11003e95bd4aSAnders Persson 	/* Move from SYN_RCVD to ESTABLISHED list  */
11013e95bd4aSAnders Persson 	tcp->tcp_eager_next_q0->tcp_eager_prev_q0 = tcp->tcp_eager_prev_q0;
11023e95bd4aSAnders Persson 	tcp->tcp_eager_prev_q0->tcp_eager_next_q0 = tcp->tcp_eager_next_q0;
11033e95bd4aSAnders Persson 	tcp->tcp_eager_prev_q0 = NULL;
11043e95bd4aSAnders Persson 	tcp->tcp_eager_next_q0 = NULL;
11053e95bd4aSAnders Persson 
11063e95bd4aSAnders Persson 	/*
11073e95bd4aSAnders Persson 	 * Insert at end of the queue because connections are accepted
11083e95bd4aSAnders Persson 	 * in chronological order. Leaving the older connections at front
11093e95bd4aSAnders Persson 	 * of the queue helps reducing search time.
11103e95bd4aSAnders Persson 	 */
11113e95bd4aSAnders Persson 	tail = listener->tcp_eager_last_q;
11123e95bd4aSAnders Persson 	if (tail != NULL)
11133e95bd4aSAnders Persson 		tail->tcp_eager_next_q = tcp;
11143e95bd4aSAnders Persson 	else
11153e95bd4aSAnders Persson 		listener->tcp_eager_next_q = tcp;
11163e95bd4aSAnders Persson 	listener->tcp_eager_last_q = tcp;
11173e95bd4aSAnders Persson 	tcp->tcp_eager_next_q = NULL;
11183e95bd4aSAnders Persson 
11193e95bd4aSAnders Persson 	/* we have timed out before */
11203e95bd4aSAnders Persson 	if (tcp->tcp_syn_rcvd_timeout != 0) {
11213e95bd4aSAnders Persson 		tcp->tcp_syn_rcvd_timeout = 0;
11223e95bd4aSAnders Persson 		listener->tcp_syn_rcvd_timeout--;
11233e95bd4aSAnders Persson 		if (listener->tcp_syn_defense &&
11243e95bd4aSAnders Persson 		    listener->tcp_syn_rcvd_timeout <=
11253e95bd4aSAnders Persson 		    (listener->tcp_tcps->tcps_conn_req_max_q0 >> 5) &&
11263e95bd4aSAnders Persson 		    10*MINUTES < TICK_TO_MSEC(ddi_get_lbolt64() -
11273e95bd4aSAnders Persson 		    listener->tcp_last_rcv_lbolt)) {
11283e95bd4aSAnders Persson 			/*
11293e95bd4aSAnders Persson 			 * Turn off the defense mode if we
11303e95bd4aSAnders Persson 			 * believe the SYN attack is over.
11313e95bd4aSAnders Persson 			 */
11323e95bd4aSAnders Persson 			listener->tcp_syn_defense = B_FALSE;
11333e95bd4aSAnders Persson 			if (listener->tcp_ip_addr_cache) {
11343e95bd4aSAnders Persson 				kmem_free((void *)listener->tcp_ip_addr_cache,
11353e95bd4aSAnders Persson 				    IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t));
11363e95bd4aSAnders Persson 				listener->tcp_ip_addr_cache = NULL;
11373e95bd4aSAnders Persson 			}
11383e95bd4aSAnders Persson 		}
11393e95bd4aSAnders Persson 	}
11403e95bd4aSAnders Persson 	addr_cache = (ipaddr_t *)(listener->tcp_ip_addr_cache);
11413e95bd4aSAnders Persson 	if (addr_cache != NULL) {
11423e95bd4aSAnders Persson 		/*
11433e95bd4aSAnders Persson 		 * We have finished a 3-way handshake with this
11443e95bd4aSAnders Persson 		 * remote host. This proves the IP addr is good.
11453e95bd4aSAnders Persson 		 * Cache it!
11463e95bd4aSAnders Persson 		 */
11473e95bd4aSAnders Persson 		addr_cache[IP_ADDR_CACHE_HASH(tcp->tcp_connp->conn_faddr_v4)] =
11483e95bd4aSAnders Persson 		    tcp->tcp_connp->conn_faddr_v4;
11493e95bd4aSAnders Persson 	}
11503e95bd4aSAnders Persson 	mutex_exit(&listener->tcp_eager_lock);
11513e95bd4aSAnders Persson 
11523e95bd4aSAnders Persson 	/*
11533e95bd4aSAnders Persson 	 * Notify the ULP about the newconn. It is guaranteed that no
11543e95bd4aSAnders Persson 	 * tcp_accept() call will be made for the eager if the
11553e95bd4aSAnders Persson 	 * notification fails.
11563e95bd4aSAnders Persson 	 */
11573e95bd4aSAnders Persson 	if ((upper = (*lconnp->conn_upcalls->su_newconn)
11583e95bd4aSAnders Persson 	    (lconnp->conn_upper_handle, (sock_lower_handle_t)econnp,
11593e95bd4aSAnders Persson 	    &sock_tcp_downcalls, ira->ira_cred, ira->ira_cpid,
11603e95bd4aSAnders Persson 	    &econnp->conn_upcalls)) == NULL) {
11613e95bd4aSAnders Persson 		/*
11623e95bd4aSAnders Persson 		 * Normally this should not happen, but the listener might
11633e95bd4aSAnders Persson 		 * have done a fallback to TPI followed by a close(), in
11643e95bd4aSAnders Persson 		 * which case tcp_closemp for this conn might have been
11653e95bd4aSAnders Persson 		 * used by tcp_eager_cleanup().
11663e95bd4aSAnders Persson 		 */
11673e95bd4aSAnders Persson 		mutex_enter(&listener->tcp_eager_lock);
11683e95bd4aSAnders Persson 		if (tcp->tcp_closemp_used) {
11693e95bd4aSAnders Persson 			mutex_exit(&listener->tcp_eager_lock);
11703e95bd4aSAnders Persson 			return (B_FALSE);
11713e95bd4aSAnders Persson 		}
11723e95bd4aSAnders Persson 		tcp->tcp_closemp_used = B_TRUE;
11733e95bd4aSAnders Persson 		TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15);
11743e95bd4aSAnders Persson 		mp = &tcp->tcp_closemp;
11753e95bd4aSAnders Persson 		mutex_exit(&listener->tcp_eager_lock);
11763e95bd4aSAnders Persson 		tcp_eager_kill(econnp, mp, NULL, NULL);
11773e95bd4aSAnders Persson 		return (B_FALSE);
11783e95bd4aSAnders Persson 	}
11793e95bd4aSAnders Persson 	econnp->conn_upper_handle = upper;
11803e95bd4aSAnders Persson 
11813e95bd4aSAnders Persson 	tcp->tcp_detached = B_FALSE;
11823e95bd4aSAnders Persson 	tcp->tcp_hard_binding = B_FALSE;
11833e95bd4aSAnders Persson 	tcp->tcp_tconnind_started = B_TRUE;
11843e95bd4aSAnders Persson 
11853e95bd4aSAnders Persson 	if (econnp->conn_keepalive) {
11863e95bd4aSAnders Persson 		tcp->tcp_ka_last_intrvl = 0;
11873e95bd4aSAnders Persson 		tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer,
11883e95bd4aSAnders Persson 		    tcp->tcp_ka_interval);
11893e95bd4aSAnders Persson 	}
11903e95bd4aSAnders Persson 
11913e95bd4aSAnders Persson 	/* Update the necessary parameters */
11923e95bd4aSAnders Persson 	tcp_get_proto_props(tcp, &sopp);
11933e95bd4aSAnders Persson 
11943e95bd4aSAnders Persson 	(*econnp->conn_upcalls->su_set_proto_props)
11953e95bd4aSAnders Persson 	    (econnp->conn_upper_handle, &sopp);
11963e95bd4aSAnders Persson 
11973e95bd4aSAnders Persson 	return (B_TRUE);
11983e95bd4aSAnders Persson }
1199