1721fffeKacheong Poon/*
2721fffeKacheong Poon * CDDL HEADER START
3721fffeKacheong Poon *
4721fffeKacheong Poon * The contents of this file are subject to the terms of the
5721fffeKacheong Poon * Common Development and Distribution License (the "License").
6721fffeKacheong Poon * You may not use this file except in compliance with the License.
7721fffeKacheong Poon *
8721fffeKacheong Poon * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9721fffeKacheong Poon * or http://www.opensolaris.org/os/licensing.
10721fffeKacheong Poon * See the License for the specific language governing permissions
11721fffeKacheong Poon * and limitations under the License.
12721fffeKacheong Poon *
13721fffeKacheong Poon * When distributing Covered Code, include this CDDL HEADER in each
14721fffeKacheong Poon * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15721fffeKacheong Poon * If applicable, add the following below this CDDL HEADER, with the
16721fffeKacheong Poon * fields enclosed by brackets "[]" replaced with your own identifying
17721fffeKacheong Poon * information: Portions Copyright [yyyy] [name of copyright owner]
18721fffeKacheong Poon *
19721fffeKacheong Poon * CDDL HEADER END
20721fffeKacheong Poon */
21721fffeKacheong Poon
22721fffeKacheong Poon/*
239ee3959Anders Persson * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24721fffeKacheong Poon */
25721fffeKacheong Poon
26721fffeKacheong Poon/* This file contains all TCP kernel socket related functions. */
27721fffeKacheong Poon
28721fffeKacheong Poon#include <sys/types.h>
29721fffeKacheong Poon#include <sys/strlog.h>
30721fffeKacheong Poon#include <sys/policy.h>
31721fffeKacheong Poon#include <sys/sockio.h>
32721fffeKacheong Poon#include <sys/strsubr.h>
33721fffeKacheong Poon#include <sys/strsun.h>
34721fffeKacheong Poon#include <sys/squeue_impl.h>
35721fffeKacheong Poon#include <sys/squeue.h>
363e95bd4Anders Persson#define	_SUN_TPI_VERSION 2
37721fffeKacheong Poon#include <sys/tihdr.h>
38721fffeKacheong Poon#include <sys/timod.h>
39721fffeKacheong Poon#include <sys/tpicommon.h>
40721fffeKacheong Poon#include <sys/socketvar.h>
41721fffeKacheong Poon
42721fffeKacheong Poon#include <inet/common.h>
43721fffeKacheong Poon#include <inet/proto_set.h>
44721fffeKacheong Poon#include <inet/ip.h>
45721fffeKacheong Poon#include <inet/tcp.h>
46721fffeKacheong Poon#include <inet/tcp_impl.h>
47721fffeKacheong Poon
48721fffeKacheong Poonstatic void	tcp_activate(sock_lower_handle_t, sock_upper_handle_t,
49721fffeKacheong Poon		    sock_upcalls_t *, int, cred_t *);
50721fffeKacheong Poonstatic int	tcp_accept(sock_lower_handle_t, sock_lower_handle_t,
51721fffeKacheong Poon		    sock_upper_handle_t, cred_t *);
52721fffeKacheong Poonstatic int	tcp_bind(sock_lower_handle_t, struct sockaddr *,
53721fffeKacheong Poon		    socklen_t, cred_t *);
54721fffeKacheong Poonstatic int	tcp_listen(sock_lower_handle_t, int, cred_t *);
55721fffeKacheong Poonstatic int	tcp_connect(sock_lower_handle_t, const struct sockaddr *,
56721fffeKacheong Poon		    socklen_t, sock_connid_t *, cred_t *);
57ca3c8f4David Höppnerstatic int	tcp_getpeername(sock_lower_handle_t, struct sockaddr *,
58ca3c8f4David Höppner		    socklen_t *, cred_t *);
59ca3c8f4David Höppnerstatic int	tcp_getsockname(sock_lower_handle_t, struct sockaddr *,
60ca3c8f4David Höppner		    socklen_t *, cred_t *);
61721fffeKacheong Poonstatic int	tcp_getsockopt(sock_lower_handle_t, int, int, void *,
62721fffeKacheong Poon		    socklen_t *, cred_t *);
63721fffeKacheong Poonstatic int	tcp_setsockopt(sock_lower_handle_t, int, int, const void *,
64721fffeKacheong Poon		    socklen_t, cred_t *);
65721fffeKacheong Poonstatic int	tcp_sendmsg(sock_lower_handle_t, mblk_t *, struct nmsghdr *,
66ca3c8f4David Höppner		    cred_t *);
67721fffeKacheong Poonstatic int	tcp_shutdown(sock_lower_handle_t, int, cred_t *);
68721fffeKacheong Poonstatic void	tcp_clr_flowctrl(sock_lower_handle_t);
69721fffeKacheong Poonstatic int	tcp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *,
70721fffeKacheong Poon		    cred_t *);
71721fffeKacheong Poonstatic int	tcp_close(sock_lower_handle_t, int, cred_t *);
72721fffeKacheong Poon
73721fffeKacheong Poonsock_downcalls_t sock_tcp_downcalls = {
74721fffeKacheong Poon	tcp_activate,
75721fffeKacheong Poon	tcp_accept,
76721fffeKacheong Poon	tcp_bind,
77721fffeKacheong Poon	tcp_listen,
78721fffeKacheong Poon	tcp_connect,
79721fffeKacheong Poon	tcp_getpeername,
80721fffeKacheong Poon	tcp_getsockname,
81721fffeKacheong Poon	tcp_getsockopt,
82721fffeKacheong Poon	tcp_setsockopt,
83721fffeKacheong Poon	tcp_sendmsg,
84721fffeKacheong Poon	NULL,
85721fffeKacheong Poon	NULL,
86721fffeKacheong Poon	NULL,
87721fffeKacheong Poon	tcp_shutdown,
88721fffeKacheong Poon	tcp_clr_flowctrl,
89721fffeKacheong Poon	tcp_ioctl,
90721fffeKacheong Poon	tcp_close,
91721fffeKacheong Poon};
92721fffeKacheong Poon
93721fffeKacheong Poon/* ARGSUSED */
94721fffeKacheong Poonstatic void
95721fffeKacheong Poontcp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
96721fffeKacheong Poon    sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
97721fffeKacheong Poon{
98721fffeKacheong Poon	conn_t *connp = (conn_t *)proto_handle;
99721fffeKacheong Poon	struct sock_proto_props sopp;
100721fffeKacheong Poon	extern struct module_info tcp_rinfo;
101721fffeKacheong Poon
102721fffeKacheong Poon	ASSERT(connp->conn_upper_handle == NULL);
103721fffeKacheong Poon
104721fffeKacheong Poon	/* All Solaris components should pass a cred for this operation. */
105721fffeKacheong Poon	ASSERT(cr != NULL);
106721fffeKacheong Poon
107721fffeKacheong Poon	sopp.sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
110721fffeKacheong Poon
111721fffeKacheong Poon	sopp.sopp_rxhiwat = SOCKET_RECVHIWATER;
112721fffeKacheong Poon	sopp.sopp_rxlowat = SOCKET_RECVLOWATER;
113721fffeKacheong Poon	sopp.sopp_maxpsz = INFPSZ;
114721fffeKacheong Poon	sopp.sopp_maxblk = INFPSZ;
115721fffeKacheong Poon	sopp.sopp_rcvtimer = SOCKET_TIMER_INTERVAL;
116721fffeKacheong Poon	sopp.sopp_rcvthresh = SOCKET_RECVHIWATER >> 3;
117721fffeKacheong Poon	sopp.sopp_maxaddrlen = sizeof (sin6_t);
118721fffeKacheong Poon	sopp.sopp_minpsz = (tcp_rinfo.mi_minpsz == 1) ? 0 :
119721fffeKacheong Poon	    tcp_rinfo.mi_minpsz;
120721fffeKacheong Poon
121721fffeKacheong Poon	connp->conn_upcalls = sock_upcalls;
122721fffeKacheong Poon	connp->conn_upper_handle = sock_handle;
123721fffeKacheong Poon
124721fffeKacheong Poon	ASSERT(connp->conn_rcvbuf != 0 &&
125721fffeKacheong Poon	    connp->conn_rcvbuf == connp->conn_tcp->tcp_rwnd);
126721fffeKacheong Poon	(*sock_upcalls->su_set_proto_props)(sock_handle, &sopp);
127721fffeKacheong Poon}
128721fffeKacheong Poon
1293e95bd4Anders Persson/*ARGSUSED*/
130721fffeKacheong Poonstatic int
131721fffeKacheong Poontcp_accept(sock_lower_handle_t lproto_handle,
132721fffeKacheong Poon    sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
133721fffeKacheong Poon    cred_t *cr)
134721fffeKacheong Poon{
135721fffeKacheong Poon	conn_t *lconnp, *econnp;
136721fffeKacheong Poon	tcp_t *listener, *eager;
137721fffeKacheong Poon
138dd49f12Anders Persson	/*
139dd49f12Anders Persson	 * KSSL can move a socket from one listener to another, in which
140dd49f12Anders Persson	 * case `lproto_handle' points to the new listener. To ensure that
141dd49f12Anders Persson	 * the original listener is used the information is obtained from
142dd49f12Anders Persson	 * the eager.
143dd49f12Anders Persson	 */
144721fffeKacheong Poon	econnp = (conn_t *)eproto_handle;
145721fffeKacheong Poon	eager = econnp->conn_tcp;
1463e95bd4Anders Persson	ASSERT(IPCL_IS_NONSTR(econnp));
147dd49f12Anders Persson	ASSERT(eager->tcp_listener != NULL);
148dd49f12Anders Persson	listener = eager->tcp_listener;
149dd49f12Anders Persson	lconnp = (conn_t *)listener->tcp_connp;
150dd49f12Anders Persson	ASSERT(listener->tcp_state == TCPS_LISTEN);
1513e95bd4Anders Persson	ASSERT(lconnp->conn_upper_handle != NULL);
152721fffeKacheong Poon
153721fffeKacheong Poon	/*
1543e95bd4Anders Persson	 * It is possible for the accept thread to race with the thread that
1553e95bd4Anders Persson	 * made the su_newconn upcall in tcp_newconn_notify. Both
1563e95bd4Anders Persson	 * tcp_newconn_notify and tcp_accept require that conn_upper_handle
1573e95bd4Anders Persson	 * and conn_upcalls be set before returning, so they both write to
1583e95bd4Anders Persson	 * them. However, we're guaranteed that the value written is the same
1593e95bd4Anders Persson	 * for both threads.
160721fffeKacheong Poon	 */
1613e95bd4Anders Persson	ASSERT(econnp->conn_upper_handle == NULL ||
1623e95bd4Anders Persson	    econnp->conn_upper_handle == sock_handle);
1633e95bd4Anders Persson	ASSERT(econnp->conn_upcalls == NULL ||
1643e95bd4Anders Persson	    econnp->conn_upcalls == lconnp->conn_upcalls);
165721fffeKacheong Poon	econnp->conn_upper_handle = sock_handle;
166721fffeKacheong Poon	econnp->conn_upcalls = lconnp->conn_upcalls;
1673e95bd4Anders Persson
1683e95bd4Anders Persson	ASSERT(econnp->conn_netstack ==
1693e95bd4Anders Persson	    listener->tcp_connp->conn_netstack);
1703e95bd4Anders Persson	ASSERT(eager->tcp_tcps == listener->tcp_tcps);
1713e95bd4Anders Persson
1723e95bd4Anders Persson	/*
1733e95bd4Anders Persson	 * We should have a minimum of 2 references on the conn at this
1743e95bd4Anders Persson	 * point. One for TCP and one for the newconn notification
1753e95bd4Anders Persson	 * (which is now taken over by IP). In the normal case we would
1763e95bd4Anders Persson	 * also have another reference (making a total of 3) for the conn
1773e95bd4Anders Persson	 * being in the classifier hash list. However the eager could have
1783e95bd4Anders Persson	 * received an RST subsequently and tcp_closei_local could have
1793e95bd4Anders Persson	 * removed the eager from the classifier hash list, hence we can't
1803e95bd4Anders Persson	 * assert that reference.
1813e95bd4Anders Persson	 */
1823e95bd4Anders Persson	ASSERT(econnp->conn_ref >= 2);
1833e95bd4Anders Persson
1843e95bd4Anders Persson	mutex_enter(&listener->tcp_eager_lock);
1853e95bd4Anders Persson	/*
1863e95bd4Anders Persson	 * Non-STREAMS listeners never defer the notification of new
1873e95bd4Anders Persson	 * connections.
1883e95bd4Anders Persson	 */
1893e95bd4Anders Persson	ASSERT(!listener->tcp_eager_prev_q0->tcp_conn_def_q0);
1903e95bd4Anders Persson	tcp_eager_unlink(eager);
1913e95bd4Anders Persson	mutex_exit(&listener->tcp_eager_lock);
1923e95bd4Anders Persson	CONN_DEC_REF(listener->tcp_connp);
1933e95bd4Anders Persson
194b1cd787Anders Persson	return ((eager->tcp_state < TCPS_ESTABLISHED) ? ECONNABORTED : 0);
195721fffeKacheong Poon}
196721fffeKacheong Poon
197721fffeKacheong Poonstatic int
198721fffeKacheong Poontcp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
199721fffeKacheong Poon    socklen_t len, cred_t *cr)
200721fffeKacheong Poon{
201721fffeKacheong Poon	int 		error;
202721fffeKacheong Poon	conn_t		*connp = (conn_t *)proto_handle;
203721fffeKacheong Poon
204721fffeKacheong Poon	/* All Solaris components should pass a cred for this operation. */
205721fffeKacheong Poon	ASSERT(cr != NULL);
206721fffeKacheong Poon	ASSERT(connp->conn_upper_handle != NULL);
207721fffeKacheong Poon
2089ee3959Anders Persson	error = squeue_synch_enter(connp, NULL);
209721fffeKacheong Poon	if (error != 0) {
210721fffeKacheong Poon		/* failed to enter */
211721fffeKacheong Poon		return (ENOSR);
212721fffeKacheong Poon	}
213721fffeKacheong Poon
214721fffeKacheong Poon	/* binding to a NULL address really means unbind */
215721fffeKacheong Poon	if (sa == NULL) {
216721fffeKacheong Poon		if (connp->conn_tcp->tcp_state < TCPS_LISTEN)
217721fffeKacheong Poon			error = tcp_do_unbind(connp);
218721fffeKacheong Poon		else
219721fffeKacheong Poon			error = EINVAL;
220721fffeKacheong Poon	} else {
221721fffeKacheong Poon		error = tcp_do_bind(connp, sa, len, cr, B_TRUE);
222721fffeKacheong Poon	}
223721fffeKacheong Poon
2249ee3959Anders Persson	squeue_synch_exit(connp);
225721fffeKacheong Poon
226721fffeKacheong Poon	if (error < 0) {
227721fffeKacheong Poon		if (error == -TOUTSTATE)
228721fffeKacheong Poon			error = EINVAL;
229721fffeKacheong Poon		else
230721fffeKacheong Poon			error = proto_tlitosyserr(-error);
231721fffeKacheong Poon	}
232721fffeKacheong Poon
233721fffeKacheong Poon	return (error);
234721fffeKacheong Poon}
235721fffeKacheong Poon
236721fffeKacheong Poon/* ARGSUSED */
237721fffeKacheong Poonstatic int
238721fffeKacheong Poontcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
239721fffeKacheong Poon{
240721fffeKacheong Poon	conn_t	*connp = (conn_t *)proto_handle;
2413e95bd4Anders Persson	tcp_t	*tcp = connp->conn_tcp;
242721fffeKacheong Poon	int 	error;
243721fffeKacheong Poon
244721fffeKacheong Poon	ASSERT(connp->conn_upper_handle != NULL);
245721fffeKacheong Poon
246721fffeKacheong Poon	/* All Solaris components should pass a cred for this operation. */
247721fffeKacheong Poon	ASSERT(cr != NULL);
248721fffeKacheong Poon
2499ee3959Anders Persson	error = squeue_synch_enter(connp, NULL);
250721fffeKacheong Poon	if (error != 0) {
251721fffeKacheong Poon		/* failed to enter */
252721fffeKacheong Poon		return (ENOBUFS);
253721fffeKacheong Poon	}
254721fffeKacheong Poon
255721fffeKacheong Poon	error = tcp_do_listen(connp, NULL, 0, backlog, cr, B_FALSE);
256721fffeKacheong Poon	if (error == 0) {
2573e95bd4Anders Persson		/*
2583e95bd4Anders Persson		 * sockfs needs to know what's the maximum number of socket
2593e95bd4Anders Persson		 * that can be queued on the listener.
2603e95bd4Anders Persson		 */
261721fffeKacheong Poon		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
2623e95bd4Anders Persson		    SOCK_OPCTL_ENAB_ACCEPT,
2633e95bd4Anders Persson		    (uintptr_t)(tcp->tcp_conn_req_max +
2643e95bd4Anders Persson		    tcp->tcp_tcps->tcps_conn_req_max_q0));
265721fffeKacheong Poon	} else if (error < 0) {
266721fffeKacheong Poon		if (error == -TOUTSTATE)
267721fffeKacheong Poon			error = EINVAL;
268721fffeKacheong Poon		else
269721fffeKacheong Poon			error = proto_tlitosyserr(-error);
270721fffeKacheong Poon	}
2719ee3959Anders Persson	squeue_synch_exit(connp);
272721fffeKacheong Poon	return (error);
273721fffeKacheong Poon}
274721fffeKacheong Poon
275721fffeKacheong Poonstatic int
276721fffeKacheong Poontcp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
277721fffeKacheong Poon    socklen_t len, sock_connid_t *id, cred_t *cr)
278721fffeKacheong Poon{
279721fffeKacheong Poon	conn_t		*connp = (conn_t *)proto_handle;
280721fffeKacheong Poon	int		error;
281721fffeKacheong Poon
282721fffeKacheong Poon	ASSERT(connp->conn_upper_handle != NULL);
283721fffeKacheong Poon
284721fffeKacheong Poon	/* All Solaris components should pass a cred for this operation. */
285721fffeKacheong Poon	ASSERT(cr != NULL);
286721fffeKacheong Poon
287721fffeKacheong Poon	error = proto_verify_ip_addr(connp->conn_family, sa, len);
288721fffeKacheong Poon	if (error != 0) {
289721fffeKacheong Poon		return (error);
290721fffeKacheong Poon	}
291721fffeKacheong Poon
2929ee3959Anders Persson	error = squeue_synch_enter(connp, NULL);
293721fffeKacheong Poon	if (error != 0) {
294721fffeKacheong Poon		/* failed to enter */
295721fffeKacheong Poon		return (ENOSR);
296721fffeKacheong Poon	}
297721fffeKacheong Poon
298721fffeKacheong Poon	/*
299721fffeKacheong Poon	 * TCP supports quick connect, so no need to do an implicit bind
300721fffeKacheong Poon	 */
301721fffeKacheong Poon	error = tcp_do_connect(connp, sa, len, cr, curproc->p_pid);
302721fffeKacheong Poon	if (error == 0) {
303721fffeKacheong Poon		*id = connp->conn_tcp->tcp_connid;
304721fffeKacheong Poon	} else if (error < 0) {
305721fffeKacheong Poon		if (error == -TOUTSTATE) {
306721fffeKacheong Poon			switch (connp->conn_tcp->tcp_state) {
307721fffeKacheong Poon			case TCPS_SYN_SENT:
308721fffeKacheong Poon				error = EALREADY;
309721fffeKacheong Poon				break;
310721fffeKacheong Poon			case TCPS_ESTABLISHED:
311721fffeKacheong Poon				error = EISCONN;
312721fffeKacheong Poon				break;
313721fffeKacheong Poon			case TCPS_LISTEN:
314721fffeKacheong Poon				error = EOPNOTSUPP;
315721fffeKacheong Poon				break;
316721fffeKacheong Poon			default:
317721fffeKacheong Poon				error = EINVAL;
318721fffeKacheong Poon				break;
319721fffeKacheong Poon			}
320721fffeKacheong Poon		} else {
321721fffeKacheong Poon			error = proto_tlitosyserr(-error);
322721fffeKacheong Poon		}
323721fffeKacheong Poon	}
324721fffeKacheong Poon
325721fffeKacheong Poon	if (connp->conn_tcp->tcp_loopback) {
326721fffeKacheong Poon		struct sock_proto_props sopp;
327721fffeKacheong Poon
328721fffeKacheong Poon		sopp.sopp_flags = SOCKOPT_LOOPBACK;
329721fffeKacheong Poon		sopp.sopp_loopback = B_TRUE;
330721fffeKacheong Poon
331721fffeKacheong Poon		(*connp->conn_upcalls->su_set_proto_props)(
332721fffeKacheong Poon		    connp->conn_upper_handle, &sopp);
333721fffeKacheong Poon	}
334721fffeKacheong Poondone:
3359ee3959Anders Persson	squeue_synch_exit(connp);
336721fffeKacheong Poon
337721fffeKacheong Poon	return ((error == 0) ? EINPROGRESS : error);
338721fffeKacheong Poon}
339721fffeKacheong Poon
340721fffeKacheong Poon/* ARGSUSED3 */
341ca3c8f4David Höppnerstatic int
342721fffeKacheong Poontcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr,
343721fffeKacheong Poon    socklen_t *addrlenp, cred_t *cr)
344721fffeKacheong Poon{
345721fffeKacheong Poon	conn_t	*connp = (conn_t *)proto_handle;
346721fffeKacheong Poon	tcp_t	*tcp = connp->conn_tcp;
347721fffeKacheong Poon
348721fffeKacheong Poon	/* All Solaris components should pass a cred for this operation. */
349721fffeKacheong Poon	ASSERT(cr != NULL);
350721fffeKacheong Poon
351721fffeKacheong Poon	ASSERT(tcp != NULL);
352721fffeKacheong Poon	if (tcp->tcp_state < TCPS_SYN_RCVD)
353721fffeKacheong Poon		return (ENOTCONN);
354721fffeKacheong Poon
355721fffeKacheong Poon	return (conn_getpeername(connp, addr, addrlenp));
356721fffeKacheong Poon}
357721fffeKacheong Poon
358721fffeKacheong Poon/* ARGSUSED3 */
359ca3c8f4David Höppnerstatic int
360721fffeKacheong Poontcp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr,
361721fffeKacheong Poon    socklen_t *addrlenp, cred_t *cr)
362721fffeKacheong Poon{
363721fffeKacheong Poon	conn_t	*connp = (conn_t *)proto_handle;
364721fffeKacheong Poon
365721fffeKacheong Poon	/* All Solaris components should pass a cred for this operation. */
366721fffeKacheong Poon	ASSERT(cr != NULL);
367721fffeKacheong Poon
368721fffeKacheong Poon	return (conn_getsockname(connp, addr, addrlenp));
369721fffeKacheong Poon}
370721fffeKacheong Poon
371721fffeKacheong Poon/* returns UNIX error, the optlen is a value-result arg */
372721fffeKacheong Poonstatic int
373721fffeKacheong Poontcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
374721fffeKacheong Poon    void *optvalp, socklen_t *optlen, cred_t *cr)
375721fffeKacheong Poon{
376721fffeKacheong Poon	conn_t		*connp = (conn_t *)proto_handle;
377721fffeKacheong Poon	int		error;
378721fffeKacheong Poon	t_uscalar_t	max_optbuf_len;
379721fffeKacheong Poon	void		*optvalp_buf;
380721fffeKacheong Poon	int		len;
381721fffeKacheong Poon
382721fffeKacheong Poon	ASSERT(connp->conn_upper_handle != NULL);
383721fffeKacheong Poon
384721fffeKacheong Poon	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
385721fffeKacheong Poon	    tcp_opt_obj.odb_opt_des_arr,
386721fffeKacheong Poon	    tcp_opt_obj.odb_opt_arr_cnt,
387721fffeKacheong Poon	    B_FALSE, B_TRUE, cr);
388721fffeKacheong Poon	if (error != 0) {
389721fffeKacheong Poon		if (error < 0) {
390721fffeKacheong Poon			error = proto_tlitosyserr(-error);
391721fffeKacheong Poon		}
392721fffeKacheong Poon		return (error);
393721fffeKacheong Poon	}
394721fffeKacheong Poon
395721fffeKacheong Poon	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
396721fffeKacheong Poon
3979ee3959Anders Persson	error = squeue_synch_enter(connp, NULL);
398721fffeKacheong Poon	if (error == ENOMEM) {
399721fffeKacheong Poon		kmem_free(optvalp_buf, max_optbuf_len);
400721fffeKacheong Poon		return (ENOMEM);
401721fffeKacheong Poon	}
402721fffeKacheong Poon
403721fffeKacheong Poon	len = tcp_opt_get(connp, level, option_name, optvalp_buf);
4049ee3959Anders Persson	squeue_synch_exit(connp);
405721fffeKacheong Poon
406721fffeKacheong Poon	if (len == -1) {
407721fffeKacheong Poon		kmem_free(optvalp_buf, max_optbuf_len);
408721fffeKacheong Poon		return (EINVAL);
409721fffeKacheong Poon	}
410721fffeKacheong Poon
411721fffeKacheong Poon	/*
412721fffeKacheong Poon	 * update optlen and copy option value
413721fffeKacheong Poon	 */
414721fffeKacheong Poon	t_uscalar_t size = MIN(len, *optlen);
415721fffeKacheong Poon
416721fffeKacheong Poon	bcopy(optvalp_buf, optvalp, size);
417721fffeKacheong Poon	bcopy(&size, optlen, sizeof (size));
418721fffeKacheong Poon
419721fffeKacheong Poon	kmem_free(optvalp_buf, max_optbuf_len);
420721fffeKacheong Poon	return (0);
421721fffeKacheong Poon}
422721fffeKacheong Poon
423721fffeKacheong Poonstatic int
424721fffeKacheong Poontcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
425721fffeKacheong Poon    const void *optvalp, socklen_t optlen, cred_t *cr)
426721fffeKacheong Poon{
427721fffeKacheong Poon	conn_t		*connp = (conn_t *)proto_handle;
428721fffeKacheong Poon	int		error;
429721fffeKacheong Poon
430721fffeKacheong Poon	ASSERT(connp->conn_upper_handle != NULL);
431721fffeKacheong Poon	/*
432721fffeKacheong Poon	 * Entering the squeue synchronously can result in a context switch,
433721fffeKacheong Poon	 * which can cause a rather sever performance degradation. So we try to
434721fffeKacheong Poon	 * handle whatever options we can without entering the squeue.
435721fffeKacheong Poon	 */
436721fffeKacheong Poon	if (level == IPPROTO_TCP) {
437721fffeKacheong Poon		switch (option_name) {
438721fffeKacheong Poon		case TCP_NODELAY:
439721fffeKacheong Poon			if (optlen != sizeof (int32_t))
440721fffeKacheong Poon				return (EINVAL);
441721fffeKacheong Poon			mutex_enter(&connp->conn_tcp->tcp_non_sq_lock);
442721fffeKacheong Poon			connp->conn_tcp->tcp_naglim = *(int *)optvalp ? 1 :
443721fffeKacheong Poon			    connp->conn_tcp->tcp_mss;
444721fffeKacheong Poon			mutex_exit(&connp->conn_tcp->tcp_non_sq_lock);
445721fffeKacheong Poon			return (0);
446721fffeKacheong Poon		default:
447721fffeKacheong Poon			break;
448721fffeKacheong Poon		}
449721fffeKacheong Poon	}
450721fffeKacheong Poon
4519ee3959Anders Persson	error = squeue_synch_enter(connp, NULL);
452721fffeKacheong Poon	if (error == ENOMEM) {
453721fffeKacheong Poon		return (ENOMEM);
454721fffeKacheong Poon	}
455721fffeKacheong Poon
456721fffeKacheong Poon	error = proto_opt_check(level, option_name, optlen, NULL,
457721fffeKacheong Poon	    tcp_opt_obj.odb_opt_des_arr,
458721fffeKacheong Poon	    tcp_opt_obj.odb_opt_arr_cnt,
459721fffeKacheong Poon	    B_TRUE, B_FALSE, cr);
460721fffeKacheong Poon
461721fffeKacheong Poon	if (error != 0) {
462721fffeKacheong Poon		if (error < 0) {
463721fffeKacheong Poon			error = proto_tlitosyserr(-error);
464721fffeKacheong Poon		}
4659ee3959Anders Persson		squeue_synch_exit(connp);
466721fffeKacheong Poon		return (error);
467721fffeKacheong Poon	}
468721fffeKacheong Poon
469721fffeKacheong Poon	error = tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
470721fffeKacheong Poon	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
471721fffeKacheong Poon	    NULL, cr);
4729ee3959Anders Persson	squeue_synch_exit(connp);
473721fffeKacheong Poon
474721fffeKacheong Poon	ASSERT(error >= 0);
475721fffeKacheong Poon
476721fffeKacheong Poon	return (error);
477721fffeKacheong Poon}
478721fffeKacheong Poon
479721fffeKacheong Poon/* ARGSUSED */
480721fffeKacheong Poonstatic int
481721fffeKacheong Poontcp_sendmsg(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
482721fffeKacheong Poon    cred_t *cr)
483721fffeKacheong Poon{
484721fffeKacheong Poon	tcp_t		*tcp;
485721fffeKacheong Poon	uint32_t	msize;
486721fffeKacheong Poon	conn_t *connp = (conn_t *)proto_handle;
487721fffeKacheong Poon	int32_t		tcpstate;
488721fffeKacheong Poon
489721fffeKacheong Poon	/* All Solaris components should pass a cred for this operation. */
490721fffeKacheong Poon	ASSERT(cr != NULL);
491721fffeKacheong Poon
492721fffeKacheong Poon	ASSERT(connp->conn_ref >= 2);
493721fffeKacheong Poon	ASSERT(connp->conn_upper_handle != NULL);
494721fffeKacheong Poon
495721fffeKacheong Poon	if (msg->msg_controllen != 0) {
496721fffeKacheong Poon		freemsg(mp);
497721fffeKacheong Poon		return (EOPNOTSUPP);
498721fffeKacheong Poon	}
499721fffeKacheong Poon
500721fffeKacheong Poon	switch (DB_TYPE(mp)) {
501721fffeKacheong Poon	case M_DATA:
502721fffeKacheong Poon		tcp = connp->conn_tcp;
503721fffeKacheong Poon		ASSERT(tcp != NULL);
504721fffeKacheong Poon
505721fffeKacheong Poon		tcpstate = tcp->tcp_state;
506721fffeKacheong Poon		if (tcpstate < TCPS_ESTABLISHED) {
507721fffeKacheong Poon			freemsg(mp);
508721fffeKacheong Poon			/*
509721fffeKacheong Poon			 * We return ENOTCONN if the endpoint is trying to
510721fffeKacheong Poon			 * connect or has never been connected, and EPIPE if it
511721fffeKacheong Poon			 * has been disconnected. The connection id helps us
512721fffeKacheong Poon			 * distinguish between the last two cases.
513721fffeKacheong Poon			 */
514721fffeKacheong Poon			return ((tcpstate == TCPS_SYN_SENT) ? ENOTCONN :
515721fffeKacheong Poon			    ((tcp->tcp_connid > 0) ? EPIPE : ENOTCONN));
516721fffeKacheong Poon		} else if (tcpstate > TCPS_CLOSE_WAIT) {
517721fffeKacheong Poon			freemsg(mp);
518721fffeKacheong Poon			return (EPIPE);
519721fffeKacheong Poon		}
520721fffeKacheong Poon
521721fffeKacheong Poon		msize = msgdsize(mp);
522721fffeKacheong Poon
523721fffeKacheong Poon		mutex_enter(&tcp->tcp_non_sq_lock);
524721fffeKacheong Poon		tcp->tcp_squeue_bytes += msize;
525721fffeKacheong Poon		/*
526721fffeKacheong Poon		 * Squeue Flow Control
527721fffeKacheong Poon		 */
528721fffeKacheong Poon		if (TCP_UNSENT_BYTES(tcp) > connp->conn_sndbuf) {
529721fffeKacheong Poon			tcp_setqfull(tcp);
530721fffeKacheong Poon		}
531721fffeKacheong Poon		mutex_exit(&tcp->tcp_non_sq_lock);
532721fffeKacheong Poon
533721fffeKacheong Poon		/*
534721fffeKacheong Poon		 * The application may pass in an address in the msghdr, but
535721fffeKacheong Poon		 * we ignore the address on connection-oriented sockets.
536721fffeKacheong Poon		 * Just like BSD this code does not generate an error for
537721fffeKacheong Poon		 * TCP (a CONNREQUIRED socket) when sending to an address
538721fffeKacheong Poon		 * passed in with sendto/sendmsg. Instead the data is
539721fffeKacheong Poon		 * delivered on the connection as if no address had been
540721fffeKacheong Poon		 * supplied.
541721fffeKacheong Poon		 */
542721fffeKacheong Poon		CONN_INC_REF(connp);
543721fffeKacheong Poon
544721fffeKacheong Poon		if (msg->msg_flags & MSG_OOB) {
545721fffeKacheong Poon			SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output_urgent,
546721fffeKacheong Poon			    connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
547721fffeKacheong Poon		} else {
548721fffeKacheong Poon			SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output,
549721fffeKacheong Poon			    connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT);
550721fffeKacheong Poon		}
551721fffeKacheong Poon
552721fffeKacheong Poon		return (0);
553721fffeKacheong Poon
554721fffeKacheong Poon	default:
555721fffeKacheong Poon		ASSERT(0);
556721fffeKacheong Poon	}
557721fffeKacheong Poon
558721fffeKacheong Poon	freemsg(mp);
559721fffeKacheong Poon	return (0);
560721fffeKacheong Poon}
561721fffeKacheong Poon
562721fffeKacheong Poon/* ARGSUSED */
563721fffeKacheong Poonstatic int
564721fffeKacheong Poontcp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
565721fffeKacheong Poon{
566721fffeKacheong Poon	conn_t  *connp = (conn_t *)proto_handle;
567721fffeKacheong Poon	tcp_t   *tcp = connp->conn_tcp;
568721fffeKacheong Poon
569721fffeKacheong Poon	ASSERT(connp->conn_upper_handle != NULL);
570721fffeKacheong Poon
571721fffeKacheong Poon	/* All Solaris components should pass a cred for this operation. */
572721fffeKacheong Poon	ASSERT(cr != NULL);
573721fffeKacheong Poon
574721fffeKacheong Poon	/*
575721fffeKacheong Poon	 * X/Open requires that we check the connected state.
576721fffeKacheong Poon	 */
577721fffeKacheong Poon	if (tcp->tcp_state < TCPS_SYN_SENT)
578721fffeKacheong Poon		return (ENOTCONN);
579721fffeKacheong Poon
580721fffeKacheong Poon	/* shutdown the send side */
581721fffeKacheong Poon	if (how != SHUT_RD) {
582721fffeKacheong Poon		mblk_t *bp;
583721fffeKacheong Poon
584721fffeKacheong Poon		bp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL);
585721fffeKacheong Poon		CONN_INC_REF(connp);
586721fffeKacheong Poon		SQUEUE_ENTER_ONE(connp->conn_sqp, bp, tcp_shutdown_output,
587721fffeKacheong Poon		    connp, NULL, SQ_NODRAIN, SQTAG_TCP_SHUTDOWN_OUTPUT);
588721fffeKacheong Poon
589721fffeKacheong Poon		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
590721fffeKacheong Poon		    SOCK_OPCTL_SHUT_SEND, 0);
591721fffeKacheong Poon	}
592721fffeKacheong Poon
593721fffeKacheong Poon	/* shutdown the recv side */
594721fffeKacheong Poon	if (how != SHUT_WR)
595721fffeKacheong Poon		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
596721fffeKacheong Poon		    SOCK_OPCTL_SHUT_RECV, 0);
597721fffeKacheong Poon
598721fffeKacheong Poon	return (0);
599721fffeKacheong Poon}
600721fffeKacheong Poon
601721fffeKacheong Poonstatic void
602721fffeKacheong Poontcp_clr_flowctrl(sock_lower_handle_t proto_handle)
603721fffeKacheong Poon{
604721fffeKacheong Poon	conn_t  *connp = (conn_t *)proto_handle;
605721fffeKacheong Poon	tcp_t	*tcp = connp->conn_tcp;
606721fffeKacheong Poon	mblk_t *mp;
607721fffeKacheong Poon	int error;
608721fffeKacheong Poon
609721fffeKacheong Poon	ASSERT(connp->conn_upper_handle != NULL);
610721fffeKacheong Poon
611721fffeKacheong Poon	/*
612721fffeKacheong Poon	 * If tcp->tcp_rsrv_mp == NULL, it means that tcp_clr_flowctrl()
613721fffeKacheong Poon	 * is currently running.
614721fffeKacheong Poon	 */
615721fffeKacheong Poon	mutex_enter(&tcp->tcp_rsrv_mp_lock);
616721fffeKacheong Poon	if ((mp = tcp->tcp_rsrv_mp) == NULL) {
617721fffeKacheong Poon		mutex_exit(&tcp->tcp_rsrv_mp_lock);
618721fffeKacheong Poon		return;
619721fffeKacheong Poon	}
620721fffeKacheong Poon	tcp->tcp_rsrv_mp = NULL;
621721fffeKacheong Poon	mutex_exit(&tcp->tcp_rsrv_mp_lock);
622721fffeKacheong Poon
6239ee3959Anders Persson	error = squeue_synch_enter(connp, mp);
624721fffeKacheong Poon	ASSERT(error == 0);
625721fffeKacheong Poon
626721fffeKacheong Poon	mutex_enter(&tcp->tcp_rsrv_mp_lock);
627721fffeKacheong Poon	tcp->tcp_rsrv_mp = mp;
628721fffeKacheong Poon	mutex_exit(&tcp->tcp_rsrv_mp_lock);
629721fffeKacheong Poon
630721fffeKacheong Poon	if (tcp->tcp_fused) {
631721fffeKacheong Poon		tcp_fuse_backenable(tcp);
632721fffeKacheong Poon	} else {
633721fffeKacheong Poon		tcp->tcp_rwnd = connp->conn_rcvbuf;
634721fffeKacheong Poon		/*
635721fffeKacheong Poon		 * Send back a window update immediately if TCP is above
636721fffeKacheong Poon		 * ESTABLISHED state and the increase of the rcv window
637721fffeKacheong Poon		 * that the other side knows is at least 1 MSS after flow
638721fffeKacheong Poon		 * control is lifted.
639721fffeKacheong Poon		 */
640721fffeKacheong Poon		if (tcp->tcp_state >= TCPS_ESTABLISHED &&
641721fffeKacheong Poon		    tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) {
642721fffeKacheong Poon			tcp_xmit_ctl(NULL, tcp,
643721fffeKacheong Poon			    (tcp->tcp_swnd == 0) ? tcp->tcp_suna :
644721fffeKacheong Poon			    tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK);
645721fffeKacheong Poon		}
646721fffeKacheong Poon	}
647721fffeKacheong Poon
6489ee3959Anders Persson	squeue_synch_exit(connp);
649721fffeKacheong Poon}
650721fffeKacheong Poon
651721fffeKacheong Poon/* ARGSUSED */
652721fffeKacheong Poonstatic int
653721fffeKacheong Poontcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
654721fffeKacheong Poon    int mode, int32_t *rvalp, cred_t *cr)
655721fffeKacheong Poon{
656721fffeKacheong Poon	conn_t  	*connp = (conn_t *)proto_handle;
657721fffeKacheong Poon	int		error;
658721fffeKacheong Poon
659721fffeKacheong Poon	ASSERT(connp->conn_upper_handle != NULL);
660721fffeKacheong Poon
661721fffeKacheong Poon	/* All Solaris components should pass a cred for this operation. */
662721fffeKacheong Poon	ASSERT(cr != NULL);
663721fffeKacheong Poon
664721fffeKacheong Poon	/*
665721fffeKacheong Poon	 * If we don't have a helper stream then create one.
666721fffeKacheong Poon	 * ip_create_helper_stream takes care of locking the conn_t,
667721fffeKacheong Poon	 * so this check for NULL is just a performance optimization.
668721fffeKacheong Poon	 */
669721fffeKacheong Poon	if (connp->conn_helper_info == NULL) {
670721fffeKacheong Poon		tcp_stack_t *tcps = connp->conn_tcp->tcp_tcps;
671721fffeKacheong Poon
672721fffeKacheong Poon		/*
673721fffeKacheong Poon		 * Create a helper stream for non-STREAMS socket.
674721fffeKacheong Poon		 */
675721fffeKacheong Poon		error = ip_create_helper_stream(connp, tcps->tcps_ldi_ident);
676721fffeKacheong Poon		if (error != 0) {
677721fffeKacheong Poon			ip0dbg(("tcp_ioctl: create of IP helper stream "
678721fffeKacheong Poon			    "failed %d\n", error));
679721fffeKacheong Poon			return (error);
680721fffeKacheong Poon		}
681721fffeKacheong Poon	}
682721fffeKacheong Poon
683721fffeKacheong Poon	switch (cmd) {
684721fffeKacheong Poon		case ND_SET:
685721fffeKacheong Poon		case ND_GET:
686721fffeKacheong Poon		case _SIOCSOCKFALLBACK:
687721fffeKacheong Poon		case TCP_IOC_ABORT_CONN:
688721fffeKacheong Poon		case TI_GETPEERNAME:
689721fffeKacheong Poon		case TI_GETMYNAME:
690721fffeKacheong Poon			ip1dbg(("tcp_ioctl: cmd 0x%x on non streams socket",
691721fffeKacheong Poon			    cmd));
692721fffeKacheong Poon			error = EINVAL;
693721fffeKacheong Poon			break;
694721fffeKacheong Poon		default:
695721fffeKacheong Poon			/*
696721fffeKacheong Poon			 * If the conn is not closing, pass on to IP using
697721fffeKacheong Poon			 * helper stream. Bump the ioctlref to prevent tcp_close
698721fffeKacheong Poon			 * from closing the rq/wq out from underneath the ioctl
699721fffeKacheong Poon			 * if it ends up queued or aborted/interrupted.
700721fffeKacheong Poon			 */
701721fffeKacheong Poon			mutex_enter(&connp->conn_lock);
702721fffeKacheong Poon			if (connp->conn_state_flags & (CONN_CLOSING)) {
703721fffeKacheong Poon				mutex_exit(&connp->conn_lock);
704721fffeKacheong Poon				error = EINVAL;
705721fffeKacheong Poon				break;
706721fffeKacheong Poon			}
707721fffeKacheong Poon			CONN_INC_IOCTLREF_LOCKED(connp);
708721fffeKacheong Poon			error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
709721fffeKacheong Poon			    cmd, arg, mode, cr, rvalp);
710721fffeKacheong Poon			CONN_DEC_IOCTLREF(connp);
711721fffeKacheong Poon			break;
712721fffeKacheong Poon	}
713721fffeKacheong Poon	return (error);
714721fffeKacheong Poon}
715721fffeKacheong Poon
716721fffeKacheong Poon/* ARGSUSED */
717721fffeKacheong Poonstatic int
718721fffeKacheong Poontcp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
719721fffeKacheong Poon{
720721fffeKacheong Poon	conn_t *connp = (conn_t *)proto_handle;
721721fffeKacheong Poon
722721fffeKacheong Poon	ASSERT(connp->conn_upper_handle != NULL);
723721fffeKacheong Poon
724721fffeKacheong Poon	/* All Solaris components should pass a cred for this operation. */
725721fffeKacheong Poon	ASSERT(cr != NULL);
726721fffeKacheong Poon
727721fffeKacheong Poon	tcp_close_common(connp, flags);
728721fffeKacheong Poon
729721fffeKacheong Poon	ip_free_helper_stream(connp);
730721fffeKacheong Poon
731721fffeKacheong Poon	/*
732721fffeKacheong Poon	 * Drop IP's reference on the conn. This is the last reference
733721fffeKacheong Poon	 * on the connp if the state was less than established. If the
734721fffeKacheong Poon	 * connection has gone into timewait state, then we will have
735721fffeKacheong Poon	 * one ref for the TCP and one more ref (total of two) for the
736721fffeKacheong Poon	 * classifier connected hash list (a timewait connections stays
737721fffeKacheong Poon	 * in connected hash till closed).
738721fffeKacheong Poon	 *
739721fffeKacheong Poon	 * We can't assert the references because there might be other
740721fffeKacheong Poon	 * transient reference places because of some walkers or queued
741721fffeKacheong Poon	 * packets in squeue for the timewait state.
742721fffeKacheong Poon	 */
743721fffeKacheong Poon	CONN_DEC_REF(connp);
7443e95bd4Anders Persson
7453e95bd4Anders Persson	/*
7463e95bd4Anders Persson	 * EINPROGRESS tells sockfs to wait for a 'closed' upcall before
7473e95bd4Anders Persson	 * freeing the socket.
7483e95bd4Anders Persson	 */
7493e95bd4Anders Persson	return (EINPROGRESS);
750721fffeKacheong Poon}
751721fffeKacheong Poon
752721fffeKacheong Poon/* ARGSUSED */
753721fffeKacheong Poonsock_lower_handle_t
754721fffeKacheong Poontcp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
755721fffeKacheong Poon    uint_t *smodep, int *errorp, int flags, cred_t *credp)
756721fffeKacheong Poon{
757721fffeKacheong Poon	conn_t		*connp;
758721fffeKacheong Poon	boolean_t	isv6 = family == AF_INET6;
759ca3c8f4David Höppner
760721fffeKacheong Poon	if (type != SOCK_STREAM || (family != AF_INET && family != AF_INET6) ||
761721fffeKacheong Poon	    (proto != 0 && proto != IPPROTO_TCP)) {
762721fffeKacheong Poon		*errorp = EPROTONOSUPPORT;
763721fffeKacheong Poon		return (NULL);
764721fffeKacheong Poon	}
765721fffeKacheong Poon
766721fffeKacheong Poon	connp = tcp_create_common(credp, isv6, B_TRUE, errorp);
767721fffeKacheong Poon	if (connp == NULL) {
768721fffeKacheong Poon		return (NULL);
769721fffeKacheong Poon	}
770721fffeKacheong Poon
771721fffeKacheong Poon	/*
772721fffeKacheong Poon	 * Put the ref for TCP. Ref for IP was already put
773ca3c8f4David Höppner	 * by ipcl_conn_create. Also make the conn_t globally
774721fffeKacheong Poon	 * visible to walkers
775721fffeKacheong Poon	 */
776721fffeKacheong Poon	mutex_enter(&connp->conn_lock);
777721fffeKacheong Poon	CONN_INC_REF_LOCKED(connp);
778721fffeKacheong Poon	ASSERT(connp->conn_ref == 2);
779721fffeKacheong Poon	connp->conn_state_flags &= ~CONN_INCIPIENT;
780721fffeKacheong Poon
781721fffeKacheong Poon	connp->conn_flags |= IPCL_NONSTR;
782721fffeKacheong Poon	mutex_exit(&connp->conn_lock);
783721fffeKacheong Poon
784721fffeKacheong Poon	ASSERT(errorp != NULL);
785721fffeKacheong Poon	*errorp = 0;
786721fffeKacheong Poon	*sock_downcalls = &sock_tcp_downcalls;
787721fffeKacheong Poon	*smodep = SM_CONNREQUIRED | SM_EXDATA | SM_ACCEPTSUPP |
788721fffeKacheong Poon	    SM_SENDFILESUPP;
789721fffeKacheong Poon
790721fffeKacheong Poon	return ((sock_lower_handle_t)connp);
791721fffeKacheong Poon}
792721fffeKacheong Poon
7933e95bd4Anders Persson/*
7943e95bd4Anders Persson * tcp_fallback
7953e95bd4Anders Persson *
7963e95bd4Anders Persson * A direct socket is falling back to using STREAMS. The queue
7973e95bd4Anders Persson * that is being passed down was created using tcp_open() with
7983e95bd4Anders Persson * the SO_FALLBACK flag set. As a result, the queue is not
7993e95bd4Anders Persson * associated with a conn, and the q_ptrs instead contain the
8003e95bd4Anders Persson * dev and minor area that should be used.
8013e95bd4Anders Persson *
8023e95bd4Anders Persson * The 'issocket' flag indicates whether the FireEngine
8033e95bd4Anders Persson * optimizations should be used. The common case would be that
8043e95bd4Anders Persson * optimizations are enabled, and they might be subsequently
8053e95bd4Anders Persson * disabled using the _SIOCSOCKFALLBACK ioctl.
8063e95bd4Anders Persson */
8073e95bd4Anders Persson
8083e95bd4Anders Persson/*
8093e95bd4Anders Persson * An active connection is falling back to TPI. Gather all the information
8103e95bd4Anders Persson * required by the STREAM head and TPI sonode and send it up.
8113e95bd4Anders Persson */
8123e95bd4Anders Perssonstatic void
8133e95bd4Anders Perssontcp_fallback_noneager(tcp_t *tcp, mblk_t *stropt_mp, queue_t *q,
8143e95bd4Anders Persson    boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb,
8153e95bd4Anders Persson    sock_quiesce_arg_t *arg)
8163e95bd4Anders Persson{
8173e95bd4Anders Persson	conn_t			*connp = tcp->tcp_connp;
8183e95bd4Anders Persson	struct stroptions	*stropt;
8193e95bd4Anders Persson	struct T_capability_ack tca;
8203e95bd4Anders Persson	struct sockaddr_in6	laddr, faddr;
8213e95bd4Anders Persson	socklen_t 		laddrlen, faddrlen;
8223e95bd4Anders Persson	short			opts;
8233e95bd4Anders Persson	int			error;
8243e95bd4Anders Persson	mblk_t			*mp, *mpnext;
8253e95bd4Anders Persson
8263e95bd4Anders Persson	connp->conn_dev = (dev_t)RD(q)->q_ptr;
8273e95bd4Anders Persson	connp->conn_minor_arena = WR(q)->q_ptr;
8283e95bd4Anders Persson
8293e95bd4Anders Persson	RD(q)->q_ptr = WR(q)->q_ptr = connp;
8303e95bd4Anders Persson
8313e95bd4Anders Persson	connp->conn_rq = RD(q);
8323e95bd4Anders Persson	connp->conn_wq = WR(q);
8333e95bd4Anders Persson
8343e95bd4Anders Persson	WR(q)->q_qinfo = &tcp_sock_winit;
8353e95bd4Anders Persson
8363e95bd4Anders Persson	if (!issocket)
8373e95bd4Anders Persson		tcp_use_pure_tpi(tcp);
8383e95bd4Anders Persson
8393e95bd4Anders Persson	/*
8403e95bd4Anders Persson	 * free the helper stream
8413e95bd4Anders Persson	 */
8423e95bd4Anders Persson	ip_free_helper_stream(connp);
8433e95bd4Anders Persson
8443e95bd4Anders Persson	/*
8453e95bd4Anders Persson	 * Notify the STREAM head about options
8463e95bd4Anders Persson	 */
8473e95bd4Anders Persson	DB_TYPE(stropt_mp) = M_SETOPTS;
8483e95bd4Anders Persson	stropt = (struct stroptions *)stropt_mp->b_rptr;
8493e95bd4Anders Persson	stropt_mp->b_wptr += sizeof (struct stroptions);
8503e95bd4Anders Persson	stropt->so_flags = SO_HIWAT | SO_WROFF | SO_MAXBLK;
8513e95bd4Anders Persson
8523e95bd4Anders Persson	stropt->so_wroff = connp->conn_ht_iphc_len + (tcp->tcp_loopback ? 0 :
8533e95bd4Anders Persson	    tcp->tcp_tcps->tcps_wroff_xtra);
8543e95bd4Anders Persson	if (tcp->tcp_snd_sack_ok)
8553e95bd4Anders Persson		stropt->so_wroff += TCPOPT_MAX_SACK_LEN;
8563e95bd4Anders Persson	stropt->so_hiwat = connp->conn_rcvbuf;
8573e95bd4Anders Persson	stropt->so_maxblk = tcp_maxpsz_set(tcp, B_FALSE);
8583e95bd4Anders Persson
8593e95bd4Anders Persson	putnext(RD(q), stropt_mp);
8603e95bd4Anders Persson
8613e95bd4Anders Persson	/*
8623e95bd4Anders Persson	 * Collect the information needed to sync with the sonode
8633e95bd4Anders Persson	 */
8643e95bd4Anders Persson	tcp_do_capability_ack(tcp, &tca, TC1_INFO|TC1_ACCEPTOR_ID);
8653e95bd4Anders Persson
8663e95bd4Anders Persson	laddrlen = faddrlen = sizeof (sin6_t);
8673e95bd4Anders Persson	(void) tcp_getsockname((sock_lower_handle_t)connp,
8683e95bd4Anders Persson	    (struct sockaddr *)&laddr, &laddrlen, CRED());
8693e95bd4Anders Persson	error = tcp_getpeername((sock_lower_handle_t)connp,
8703e95bd4Anders Persson	    (struct sockaddr *)&faddr, &faddrlen, CRED());
8713e95bd4Anders Persson	if (error != 0)
8723e95bd4Anders Persson		faddrlen = 0;
8733e95bd4Anders Persson
8743e95bd4Anders Persson	opts = 0;
8753e95bd4Anders Persson	if (connp->conn_oobinline)
8763e95bd4Anders Persson		opts |= SO_OOBINLINE;
8773e95bd4Anders Persson	if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
8783e95bd4Anders Persson		opts |= SO_DONTROUTE;
8793e95bd4Anders Persson
8803e95bd4Anders Persson	/*
8813e95bd4Anders Persson	 * Notify the socket that the protocol is now quiescent,
8823e95bd4Anders Persson	 * and it's therefore safe move data from the socket
8833e95bd4Anders Persson	 * to the stream head.
8843e95bd4Anders Persson	 */
8853e95bd4Anders Persson	mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca,
8863e95bd4Anders Persson	    (struct sockaddr *)&laddr, laddrlen,
8873e95bd4Anders Persson	    (struct sockaddr *)&faddr, faddrlen, opts);
8883e95bd4Anders Persson
8893e95bd4Anders Persson	while (mp != NULL) {
8903e95bd4Anders Persson		mpnext = mp->b_next;
8913e95bd4Anders Persson		tcp->tcp_rcv_list = mp->b_next;
8923e95bd4Anders Persson		mp->b_next = NULL;
8933e95bd4Anders Persson		putnext(q, mp);
8943e95bd4Anders Persson		mp = mpnext;
8953e95bd4Anders Persson	}
8963e95bd4Anders Persson	ASSERT(tcp->tcp_rcv_last_head == NULL);
8973e95bd4Anders Persson	ASSERT(tcp->tcp_rcv_last_tail == NULL);
8983e95bd4Anders Persson	ASSERT(tcp->tcp_rcv_cnt == 0);
8993e95bd4Anders Persson
9003e95bd4Anders Persson	/*
9013e95bd4Anders Persson	 * All eagers in q0 are marked as being non-STREAM, so they will
9023e95bd4Anders Persson	 * make su_newconn upcalls when the handshake completes, which
9033e95bd4Anders Persson	 * will fail (resulting in the conn being closed). So we just blow
9043e95bd4Anders Persson	 * off everything in q0 instead of waiting for the inevitable.
9053e95bd4Anders Persson	 */
9063e95bd4Anders Persson	if (tcp->tcp_conn_req_cnt_q0 != 0)
9073e95bd4Anders Persson		tcp_eager_cleanup(tcp, B_TRUE);
9083e95bd4Anders Persson}
9093e95bd4Anders Persson
9103e95bd4Anders Persson/*
9113e95bd4Anders Persson * An eager is falling back to TPI. All we have to do is send
9123e95bd4Anders Persson * up a T_CONN_IND.
9133e95bd4Anders Persson */
9143e95bd4Anders Perssonstatic void
9153e95bd4Anders Perssontcp_fallback_eager(tcp_t *eager, boolean_t issocket,
9163e95bd4Anders Persson    so_proto_quiesced_cb_t quiesced_cb, sock_quiesce_arg_t *arg)
9173e95bd4Anders Persson{
9183e95bd4Anders Persson	conn_t *connp = eager->tcp_connp;
9193e95bd4Anders Persson	tcp_t *listener = eager->tcp_listener;
9203e95bd4Anders Persson	mblk_t *mp;
9213e95bd4Anders Persson
9223e95bd4Anders Persson	ASSERT(listener != NULL);
9233e95bd4Anders Persson
9243e95bd4Anders Persson	/*
9253e95bd4Anders Persson	 * Notify the socket that the protocol is now quiescent,