1721fffe3SKacheong Poon /* 2721fffe3SKacheong Poon * CDDL HEADER START 3721fffe3SKacheong Poon * 4721fffe3SKacheong Poon * The contents of this file are subject to the terms of the 5721fffe3SKacheong Poon * Common Development and Distribution License (the "License"). 6721fffe3SKacheong Poon * You may not use this file except in compliance with the License. 7721fffe3SKacheong Poon * 8721fffe3SKacheong Poon * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9721fffe3SKacheong Poon * or http://www.opensolaris.org/os/licensing. 10721fffe3SKacheong Poon * See the License for the specific language governing permissions 11721fffe3SKacheong Poon * and limitations under the License. 12721fffe3SKacheong Poon * 13721fffe3SKacheong Poon * When distributing Covered Code, include this CDDL HEADER in each 14721fffe3SKacheong Poon * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15721fffe3SKacheong Poon * If applicable, add the following below this CDDL HEADER, with the 16721fffe3SKacheong Poon * fields enclosed by brackets "[]" replaced with your own identifying 17721fffe3SKacheong Poon * information: Portions Copyright [yyyy] [name of copyright owner] 18721fffe3SKacheong Poon * 19721fffe3SKacheong Poon * CDDL HEADER END 20721fffe3SKacheong Poon */ 21721fffe3SKacheong Poon 22721fffe3SKacheong Poon /* 239ee3959aSAnders Persson * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 24721fffe3SKacheong Poon */ 25721fffe3SKacheong Poon 26721fffe3SKacheong Poon /* This file contains all TCP kernel socket related functions. */ 27721fffe3SKacheong Poon 28721fffe3SKacheong Poon #include <sys/types.h> 29721fffe3SKacheong Poon #include <sys/strlog.h> 30721fffe3SKacheong Poon #include <sys/policy.h> 31721fffe3SKacheong Poon #include <sys/sockio.h> 32721fffe3SKacheong Poon #include <sys/strsubr.h> 33721fffe3SKacheong Poon #include <sys/strsun.h> 34721fffe3SKacheong Poon #include <sys/squeue_impl.h> 35721fffe3SKacheong Poon #include <sys/squeue.h> 363e95bd4aSAnders Persson #define _SUN_TPI_VERSION 2 37721fffe3SKacheong Poon #include <sys/tihdr.h> 38721fffe3SKacheong Poon #include <sys/timod.h> 39721fffe3SKacheong Poon #include <sys/tpicommon.h> 40721fffe3SKacheong Poon #include <sys/socketvar.h> 41721fffe3SKacheong Poon 42721fffe3SKacheong Poon #include <inet/common.h> 43721fffe3SKacheong Poon #include <inet/proto_set.h> 44721fffe3SKacheong Poon #include <inet/ip.h> 45721fffe3SKacheong Poon #include <inet/tcp.h> 46721fffe3SKacheong Poon #include <inet/tcp_impl.h> 47721fffe3SKacheong Poon 48721fffe3SKacheong Poon static void tcp_activate(sock_lower_handle_t, sock_upper_handle_t, 49721fffe3SKacheong Poon sock_upcalls_t *, int, cred_t *); 50721fffe3SKacheong Poon static int tcp_accept(sock_lower_handle_t, sock_lower_handle_t, 51721fffe3SKacheong Poon sock_upper_handle_t, cred_t *); 52721fffe3SKacheong Poon static int tcp_bind(sock_lower_handle_t, struct sockaddr *, 53721fffe3SKacheong Poon socklen_t, cred_t *); 54721fffe3SKacheong Poon static int tcp_listen(sock_lower_handle_t, int, cred_t *); 55721fffe3SKacheong Poon static int tcp_connect(sock_lower_handle_t, const struct sockaddr *, 56721fffe3SKacheong Poon socklen_t, sock_connid_t *, cred_t *); 57721fffe3SKacheong Poon static int tcp_getsockopt(sock_lower_handle_t, int, int, void *, 58721fffe3SKacheong Poon socklen_t *, cred_t *); 59721fffe3SKacheong Poon static int tcp_setsockopt(sock_lower_handle_t, int, int, const void *, 60721fffe3SKacheong Poon socklen_t, cred_t *); 61721fffe3SKacheong Poon static int tcp_sendmsg(sock_lower_handle_t, mblk_t *, struct nmsghdr *, 62721fffe3SKacheong Poon cred_t *cr); 63721fffe3SKacheong Poon static int tcp_shutdown(sock_lower_handle_t, int, cred_t *); 64721fffe3SKacheong Poon static void tcp_clr_flowctrl(sock_lower_handle_t); 65721fffe3SKacheong Poon static int tcp_ioctl(sock_lower_handle_t, int, intptr_t, int, int32_t *, 66721fffe3SKacheong Poon cred_t *); 67721fffe3SKacheong Poon static int tcp_close(sock_lower_handle_t, int, cred_t *); 68721fffe3SKacheong Poon 69721fffe3SKacheong Poon sock_downcalls_t sock_tcp_downcalls = { 70721fffe3SKacheong Poon tcp_activate, 71721fffe3SKacheong Poon tcp_accept, 72721fffe3SKacheong Poon tcp_bind, 73721fffe3SKacheong Poon tcp_listen, 74721fffe3SKacheong Poon tcp_connect, 75721fffe3SKacheong Poon tcp_getpeername, 76721fffe3SKacheong Poon tcp_getsockname, 77721fffe3SKacheong Poon tcp_getsockopt, 78721fffe3SKacheong Poon tcp_setsockopt, 79721fffe3SKacheong Poon tcp_sendmsg, 80721fffe3SKacheong Poon NULL, 81721fffe3SKacheong Poon NULL, 82721fffe3SKacheong Poon NULL, 83721fffe3SKacheong Poon tcp_shutdown, 84721fffe3SKacheong Poon tcp_clr_flowctrl, 85721fffe3SKacheong Poon tcp_ioctl, 86721fffe3SKacheong Poon tcp_close, 87721fffe3SKacheong Poon }; 88721fffe3SKacheong Poon 89721fffe3SKacheong Poon /* ARGSUSED */ 90721fffe3SKacheong Poon static void 91721fffe3SKacheong Poon tcp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 92721fffe3SKacheong Poon sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 93721fffe3SKacheong Poon { 94721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 95721fffe3SKacheong Poon struct sock_proto_props sopp; 96721fffe3SKacheong Poon extern struct module_info tcp_rinfo; 97721fffe3SKacheong Poon 98721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle == NULL); 99721fffe3SKacheong Poon 100721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 101721fffe3SKacheong Poon ASSERT(cr != NULL); 102721fffe3SKacheong Poon 103721fffe3SKacheong Poon sopp.sopp_flags = SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 104721fffe3SKacheong Poon SOCKOPT_MAXPSZ | SOCKOPT_MAXBLK | SOCKOPT_RCVTIMER | 105721fffe3SKacheong Poon SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ; 106721fffe3SKacheong Poon 107721fffe3SKacheong Poon sopp.sopp_rxhiwat = SOCKET_RECVHIWATER; 108721fffe3SKacheong Poon sopp.sopp_rxlowat = SOCKET_RECVLOWATER; 109721fffe3SKacheong Poon sopp.sopp_maxpsz = INFPSZ; 110721fffe3SKacheong Poon sopp.sopp_maxblk = INFPSZ; 111721fffe3SKacheong Poon sopp.sopp_rcvtimer = SOCKET_TIMER_INTERVAL; 112721fffe3SKacheong Poon sopp.sopp_rcvthresh = SOCKET_RECVHIWATER >> 3; 113721fffe3SKacheong Poon sopp.sopp_maxaddrlen = sizeof (sin6_t); 114721fffe3SKacheong Poon sopp.sopp_minpsz = (tcp_rinfo.mi_minpsz == 1) ? 0 : 115721fffe3SKacheong Poon tcp_rinfo.mi_minpsz; 116721fffe3SKacheong Poon 117721fffe3SKacheong Poon connp->conn_upcalls = sock_upcalls; 118721fffe3SKacheong Poon connp->conn_upper_handle = sock_handle; 119721fffe3SKacheong Poon 120721fffe3SKacheong Poon ASSERT(connp->conn_rcvbuf != 0 && 121721fffe3SKacheong Poon connp->conn_rcvbuf == connp->conn_tcp->tcp_rwnd); 122721fffe3SKacheong Poon (*sock_upcalls->su_set_proto_props)(sock_handle, &sopp); 123721fffe3SKacheong Poon } 124721fffe3SKacheong Poon 1253e95bd4aSAnders Persson /*ARGSUSED*/ 126721fffe3SKacheong Poon static int 127721fffe3SKacheong Poon tcp_accept(sock_lower_handle_t lproto_handle, 128721fffe3SKacheong Poon sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 129721fffe3SKacheong Poon cred_t *cr) 130721fffe3SKacheong Poon { 131721fffe3SKacheong Poon conn_t *lconnp, *econnp; 132721fffe3SKacheong Poon tcp_t *listener, *eager; 133721fffe3SKacheong Poon 134*dd49f125SAnders Persson /* 135*dd49f125SAnders Persson * KSSL can move a socket from one listener to another, in which 136*dd49f125SAnders Persson * case `lproto_handle' points to the new listener. To ensure that 137*dd49f125SAnders Persson * the original listener is used the information is obtained from 138*dd49f125SAnders Persson * the eager. 139*dd49f125SAnders Persson */ 140721fffe3SKacheong Poon econnp = (conn_t *)eproto_handle; 141721fffe3SKacheong Poon eager = econnp->conn_tcp; 1423e95bd4aSAnders Persson ASSERT(IPCL_IS_NONSTR(econnp)); 143*dd49f125SAnders Persson ASSERT(eager->tcp_listener != NULL); 144*dd49f125SAnders Persson listener = eager->tcp_listener; 145*dd49f125SAnders Persson lconnp = (conn_t *)listener->tcp_connp; 146*dd49f125SAnders Persson ASSERT(listener->tcp_state == TCPS_LISTEN); 1473e95bd4aSAnders Persson ASSERT(lconnp->conn_upper_handle != NULL); 148721fffe3SKacheong Poon 149721fffe3SKacheong Poon /* 1503e95bd4aSAnders Persson * It is possible for the accept thread to race with the thread that 1513e95bd4aSAnders Persson * made the su_newconn upcall in tcp_newconn_notify. Both 1523e95bd4aSAnders Persson * tcp_newconn_notify and tcp_accept require that conn_upper_handle 1533e95bd4aSAnders Persson * and conn_upcalls be set before returning, so they both write to 1543e95bd4aSAnders Persson * them. However, we're guaranteed that the value written is the same 1553e95bd4aSAnders Persson * for both threads. 156721fffe3SKacheong Poon */ 1573e95bd4aSAnders Persson ASSERT(econnp->conn_upper_handle == NULL || 1583e95bd4aSAnders Persson econnp->conn_upper_handle == sock_handle); 1593e95bd4aSAnders Persson ASSERT(econnp->conn_upcalls == NULL || 1603e95bd4aSAnders Persson econnp->conn_upcalls == lconnp->conn_upcalls); 161721fffe3SKacheong Poon econnp->conn_upper_handle = sock_handle; 162721fffe3SKacheong Poon econnp->conn_upcalls = lconnp->conn_upcalls; 1633e95bd4aSAnders Persson 1643e95bd4aSAnders Persson ASSERT(econnp->conn_netstack == 1653e95bd4aSAnders Persson listener->tcp_connp->conn_netstack); 1663e95bd4aSAnders Persson ASSERT(eager->tcp_tcps == listener->tcp_tcps); 1673e95bd4aSAnders Persson 1683e95bd4aSAnders Persson /* 1693e95bd4aSAnders Persson * We should have a minimum of 2 references on the conn at this 1703e95bd4aSAnders Persson * point. One for TCP and one for the newconn notification 1713e95bd4aSAnders Persson * (which is now taken over by IP). In the normal case we would 1723e95bd4aSAnders Persson * also have another reference (making a total of 3) for the conn 1733e95bd4aSAnders Persson * being in the classifier hash list. However the eager could have 1743e95bd4aSAnders Persson * received an RST subsequently and tcp_closei_local could have 1753e95bd4aSAnders Persson * removed the eager from the classifier hash list, hence we can't 1763e95bd4aSAnders Persson * assert that reference. 1773e95bd4aSAnders Persson */ 1783e95bd4aSAnders Persson ASSERT(econnp->conn_ref >= 2); 1793e95bd4aSAnders Persson 1803e95bd4aSAnders Persson /* 1813e95bd4aSAnders Persson * An error is returned if this conn has been reset, which will 1823e95bd4aSAnders Persson * cause the socket to be closed immediately. The eager will be 1833e95bd4aSAnders Persson * unlinked from the listener during close. 1843e95bd4aSAnders Persson */ 1853e95bd4aSAnders Persson if (eager->tcp_state < TCPS_ESTABLISHED) 1863e95bd4aSAnders Persson return (ECONNABORTED); 1873e95bd4aSAnders Persson 1883e95bd4aSAnders Persson mutex_enter(&listener->tcp_eager_lock); 1893e95bd4aSAnders Persson /* 1903e95bd4aSAnders Persson * Non-STREAMS listeners never defer the notification of new 1913e95bd4aSAnders Persson * connections. 1923e95bd4aSAnders Persson */ 1933e95bd4aSAnders Persson ASSERT(!listener->tcp_eager_prev_q0->tcp_conn_def_q0); 1943e95bd4aSAnders Persson tcp_eager_unlink(eager); 1953e95bd4aSAnders Persson mutex_exit(&listener->tcp_eager_lock); 1963e95bd4aSAnders Persson CONN_DEC_REF(listener->tcp_connp); 1973e95bd4aSAnders Persson 1983e95bd4aSAnders Persson return (0); 199721fffe3SKacheong Poon } 200721fffe3SKacheong Poon 201721fffe3SKacheong Poon static int 202721fffe3SKacheong Poon tcp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 203721fffe3SKacheong Poon socklen_t len, cred_t *cr) 204721fffe3SKacheong Poon { 205721fffe3SKacheong Poon int error; 206721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 207721fffe3SKacheong Poon 208721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 209721fffe3SKacheong Poon ASSERT(cr != NULL); 210721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 211721fffe3SKacheong Poon 2129ee3959aSAnders Persson error = squeue_synch_enter(connp, NULL); 213721fffe3SKacheong Poon if (error != 0) { 214721fffe3SKacheong Poon /* failed to enter */ 215721fffe3SKacheong Poon return (ENOSR); 216721fffe3SKacheong Poon } 217721fffe3SKacheong Poon 218721fffe3SKacheong Poon /* binding to a NULL address really means unbind */ 219721fffe3SKacheong Poon if (sa == NULL) { 220721fffe3SKacheong Poon if (connp->conn_tcp->tcp_state < TCPS_LISTEN) 221721fffe3SKacheong Poon error = tcp_do_unbind(connp); 222721fffe3SKacheong Poon else 223721fffe3SKacheong Poon error = EINVAL; 224721fffe3SKacheong Poon } else { 225721fffe3SKacheong Poon error = tcp_do_bind(connp, sa, len, cr, B_TRUE); 226721fffe3SKacheong Poon } 227721fffe3SKacheong Poon 2289ee3959aSAnders Persson squeue_synch_exit(connp); 229721fffe3SKacheong Poon 230721fffe3SKacheong Poon if (error < 0) { 231721fffe3SKacheong Poon if (error == -TOUTSTATE) 232721fffe3SKacheong Poon error = EINVAL; 233721fffe3SKacheong Poon else 234721fffe3SKacheong Poon error = proto_tlitosyserr(-error); 235721fffe3SKacheong Poon } 236721fffe3SKacheong Poon 237721fffe3SKacheong Poon return (error); 238721fffe3SKacheong Poon } 239721fffe3SKacheong Poon 240721fffe3SKacheong Poon /* ARGSUSED */ 241721fffe3SKacheong Poon static int 242721fffe3SKacheong Poon tcp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 243721fffe3SKacheong Poon { 244721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 2453e95bd4aSAnders Persson tcp_t *tcp = connp->conn_tcp; 246721fffe3SKacheong Poon int error; 247721fffe3SKacheong Poon 248721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 249721fffe3SKacheong Poon 250721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 251721fffe3SKacheong Poon ASSERT(cr != NULL); 252721fffe3SKacheong Poon 2539ee3959aSAnders Persson error = squeue_synch_enter(connp, NULL); 254721fffe3SKacheong Poon if (error != 0) { 255721fffe3SKacheong Poon /* failed to enter */ 256721fffe3SKacheong Poon return (ENOBUFS); 257721fffe3SKacheong Poon } 258721fffe3SKacheong Poon 259721fffe3SKacheong Poon error = tcp_do_listen(connp, NULL, 0, backlog, cr, B_FALSE); 260721fffe3SKacheong Poon if (error == 0) { 2613e95bd4aSAnders Persson /* 2623e95bd4aSAnders Persson * sockfs needs to know what's the maximum number of socket 2633e95bd4aSAnders Persson * that can be queued on the listener. 2643e95bd4aSAnders Persson */ 265721fffe3SKacheong Poon (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 2663e95bd4aSAnders Persson SOCK_OPCTL_ENAB_ACCEPT, 2673e95bd4aSAnders Persson (uintptr_t)(tcp->tcp_conn_req_max + 2683e95bd4aSAnders Persson tcp->tcp_tcps->tcps_conn_req_max_q0)); 269721fffe3SKacheong Poon } else if (error < 0) { 270721fffe3SKacheong Poon if (error == -TOUTSTATE) 271721fffe3SKacheong Poon error = EINVAL; 272721fffe3SKacheong Poon else 273721fffe3SKacheong Poon error = proto_tlitosyserr(-error); 274721fffe3SKacheong Poon } 2759ee3959aSAnders Persson squeue_synch_exit(connp); 276721fffe3SKacheong Poon return (error); 277721fffe3SKacheong Poon } 278721fffe3SKacheong Poon 279721fffe3SKacheong Poon static int 280721fffe3SKacheong Poon tcp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 281721fffe3SKacheong Poon socklen_t len, sock_connid_t *id, cred_t *cr) 282721fffe3SKacheong Poon { 283721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 284721fffe3SKacheong Poon int error; 285721fffe3SKacheong Poon 286721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 287721fffe3SKacheong Poon 288721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 289721fffe3SKacheong Poon ASSERT(cr != NULL); 290721fffe3SKacheong Poon 291721fffe3SKacheong Poon error = proto_verify_ip_addr(connp->conn_family, sa, len); 292721fffe3SKacheong Poon if (error != 0) { 293721fffe3SKacheong Poon return (error); 294721fffe3SKacheong Poon } 295721fffe3SKacheong Poon 2969ee3959aSAnders Persson error = squeue_synch_enter(connp, NULL); 297721fffe3SKacheong Poon if (error != 0) { 298721fffe3SKacheong Poon /* failed to enter */ 299721fffe3SKacheong Poon return (ENOSR); 300721fffe3SKacheong Poon } 301721fffe3SKacheong Poon 302721fffe3SKacheong Poon /* 303721fffe3SKacheong Poon * TCP supports quick connect, so no need to do an implicit bind 304721fffe3SKacheong Poon */ 305721fffe3SKacheong Poon error = tcp_do_connect(connp, sa, len, cr, curproc->p_pid); 306721fffe3SKacheong Poon if (error == 0) { 307721fffe3SKacheong Poon *id = connp->conn_tcp->tcp_connid; 308721fffe3SKacheong Poon } else if (error < 0) { 309721fffe3SKacheong Poon if (error == -TOUTSTATE) { 310721fffe3SKacheong Poon switch (connp->conn_tcp->tcp_state) { 311721fffe3SKacheong Poon case TCPS_SYN_SENT: 312721fffe3SKacheong Poon error = EALREADY; 313721fffe3SKacheong Poon break; 314721fffe3SKacheong Poon case TCPS_ESTABLISHED: 315721fffe3SKacheong Poon error = EISCONN; 316721fffe3SKacheong Poon break; 317721fffe3SKacheong Poon case TCPS_LISTEN: 318721fffe3SKacheong Poon error = EOPNOTSUPP; 319721fffe3SKacheong Poon break; 320721fffe3SKacheong Poon default: 321721fffe3SKacheong Poon error = EINVAL; 322721fffe3SKacheong Poon break; 323721fffe3SKacheong Poon } 324721fffe3SKacheong Poon } else { 325721fffe3SKacheong Poon error = proto_tlitosyserr(-error); 326721fffe3SKacheong Poon } 327721fffe3SKacheong Poon } 328721fffe3SKacheong Poon 329721fffe3SKacheong Poon if (connp->conn_tcp->tcp_loopback) { 330721fffe3SKacheong Poon struct sock_proto_props sopp; 331721fffe3SKacheong Poon 332721fffe3SKacheong Poon sopp.sopp_flags = SOCKOPT_LOOPBACK; 333721fffe3SKacheong Poon sopp.sopp_loopback = B_TRUE; 334721fffe3SKacheong Poon 335721fffe3SKacheong Poon (*connp->conn_upcalls->su_set_proto_props)( 336721fffe3SKacheong Poon connp->conn_upper_handle, &sopp); 337721fffe3SKacheong Poon } 338721fffe3SKacheong Poon done: 3399ee3959aSAnders Persson squeue_synch_exit(connp); 340721fffe3SKacheong Poon 341721fffe3SKacheong Poon return ((error == 0) ? EINPROGRESS : error); 342721fffe3SKacheong Poon } 343721fffe3SKacheong Poon 344721fffe3SKacheong Poon /* ARGSUSED3 */ 345721fffe3SKacheong Poon int 346721fffe3SKacheong Poon tcp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *addr, 347721fffe3SKacheong Poon socklen_t *addrlenp, cred_t *cr) 348721fffe3SKacheong Poon { 349721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 350721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 351721fffe3SKacheong Poon 352721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 353721fffe3SKacheong Poon ASSERT(cr != NULL); 354721fffe3SKacheong Poon 355721fffe3SKacheong Poon ASSERT(tcp != NULL); 356721fffe3SKacheong Poon if (tcp->tcp_state < TCPS_SYN_RCVD) 357721fffe3SKacheong Poon return (ENOTCONN); 358721fffe3SKacheong Poon 359721fffe3SKacheong Poon return (conn_getpeername(connp, addr, addrlenp)); 360721fffe3SKacheong Poon } 361721fffe3SKacheong Poon 362721fffe3SKacheong Poon /* ARGSUSED3 */ 363721fffe3SKacheong Poon int 364721fffe3SKacheong Poon tcp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *addr, 365721fffe3SKacheong Poon socklen_t *addrlenp, cred_t *cr) 366721fffe3SKacheong Poon { 367721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 368721fffe3SKacheong Poon 369721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 370721fffe3SKacheong Poon ASSERT(cr != NULL); 371721fffe3SKacheong Poon 372721fffe3SKacheong Poon return (conn_getsockname(connp, addr, addrlenp)); 373721fffe3SKacheong Poon } 374721fffe3SKacheong Poon 375721fffe3SKacheong Poon /* returns UNIX error, the optlen is a value-result arg */ 376721fffe3SKacheong Poon static int 377721fffe3SKacheong Poon tcp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 378721fffe3SKacheong Poon void *optvalp, socklen_t *optlen, cred_t *cr) 379721fffe3SKacheong Poon { 380721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 381721fffe3SKacheong Poon int error; 382721fffe3SKacheong Poon t_uscalar_t max_optbuf_len; 383721fffe3SKacheong Poon void *optvalp_buf; 384721fffe3SKacheong Poon int len; 385721fffe3SKacheong Poon 386721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 387721fffe3SKacheong Poon 388721fffe3SKacheong Poon error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 389721fffe3SKacheong Poon tcp_opt_obj.odb_opt_des_arr, 390721fffe3SKacheong Poon tcp_opt_obj.odb_opt_arr_cnt, 391721fffe3SKacheong Poon B_FALSE, B_TRUE, cr); 392721fffe3SKacheong Poon if (error != 0) { 393721fffe3SKacheong Poon if (error < 0) { 394721fffe3SKacheong Poon error = proto_tlitosyserr(-error); 395721fffe3SKacheong Poon } 396721fffe3SKacheong Poon return (error); 397721fffe3SKacheong Poon } 398721fffe3SKacheong Poon 399721fffe3SKacheong Poon optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 400721fffe3SKacheong Poon 4019ee3959aSAnders Persson error = squeue_synch_enter(connp, NULL); 402721fffe3SKacheong Poon if (error == ENOMEM) { 403721fffe3SKacheong Poon kmem_free(optvalp_buf, max_optbuf_len); 404721fffe3SKacheong Poon return (ENOMEM); 405721fffe3SKacheong Poon } 406721fffe3SKacheong Poon 407721fffe3SKacheong Poon len = tcp_opt_get(connp, level, option_name, optvalp_buf); 4089ee3959aSAnders Persson squeue_synch_exit(connp); 409721fffe3SKacheong Poon 410721fffe3SKacheong Poon if (len == -1) { 411721fffe3SKacheong Poon kmem_free(optvalp_buf, max_optbuf_len); 412721fffe3SKacheong Poon return (EINVAL); 413721fffe3SKacheong Poon } 414721fffe3SKacheong Poon 415721fffe3SKacheong Poon /* 416721fffe3SKacheong Poon * update optlen and copy option value 417721fffe3SKacheong Poon */ 418721fffe3SKacheong Poon t_uscalar_t size = MIN(len, *optlen); 419721fffe3SKacheong Poon 420721fffe3SKacheong Poon bcopy(optvalp_buf, optvalp, size); 421721fffe3SKacheong Poon bcopy(&size, optlen, sizeof (size)); 422721fffe3SKacheong Poon 423721fffe3SKacheong Poon kmem_free(optvalp_buf, max_optbuf_len); 424721fffe3SKacheong Poon return (0); 425721fffe3SKacheong Poon } 426721fffe3SKacheong Poon 427721fffe3SKacheong Poon static int 428721fffe3SKacheong Poon tcp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 429721fffe3SKacheong Poon const void *optvalp, socklen_t optlen, cred_t *cr) 430721fffe3SKacheong Poon { 431721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 432721fffe3SKacheong Poon int error; 433721fffe3SKacheong Poon 434721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 435721fffe3SKacheong Poon /* 436721fffe3SKacheong Poon * Entering the squeue synchronously can result in a context switch, 437721fffe3SKacheong Poon * which can cause a rather sever performance degradation. So we try to 438721fffe3SKacheong Poon * handle whatever options we can without entering the squeue. 439721fffe3SKacheong Poon */ 440721fffe3SKacheong Poon if (level == IPPROTO_TCP) { 441721fffe3SKacheong Poon switch (option_name) { 442721fffe3SKacheong Poon case TCP_NODELAY: 443721fffe3SKacheong Poon if (optlen != sizeof (int32_t)) 444721fffe3SKacheong Poon return (EINVAL); 445721fffe3SKacheong Poon mutex_enter(&connp->conn_tcp->tcp_non_sq_lock); 446721fffe3SKacheong Poon connp->conn_tcp->tcp_naglim = *(int *)optvalp ? 1 : 447721fffe3SKacheong Poon connp->conn_tcp->tcp_mss; 448721fffe3SKacheong Poon mutex_exit(&connp->conn_tcp->tcp_non_sq_lock); 449721fffe3SKacheong Poon return (0); 450721fffe3SKacheong Poon default: 451721fffe3SKacheong Poon break; 452721fffe3SKacheong Poon } 453721fffe3SKacheong Poon } 454721fffe3SKacheong Poon 4559ee3959aSAnders Persson error = squeue_synch_enter(connp, NULL); 456721fffe3SKacheong Poon if (error == ENOMEM) { 457721fffe3SKacheong Poon return (ENOMEM); 458721fffe3SKacheong Poon } 459721fffe3SKacheong Poon 460721fffe3SKacheong Poon error = proto_opt_check(level, option_name, optlen, NULL, 461721fffe3SKacheong Poon tcp_opt_obj.odb_opt_des_arr, 462721fffe3SKacheong Poon tcp_opt_obj.odb_opt_arr_cnt, 463721fffe3SKacheong Poon B_TRUE, B_FALSE, cr); 464721fffe3SKacheong Poon 465721fffe3SKacheong Poon if (error != 0) { 466721fffe3SKacheong Poon if (error < 0) { 467721fffe3SKacheong Poon error = proto_tlitosyserr(-error); 468721fffe3SKacheong Poon } 4699ee3959aSAnders Persson squeue_synch_exit(connp); 470721fffe3SKacheong Poon return (error); 471721fffe3SKacheong Poon } 472721fffe3SKacheong Poon 473721fffe3SKacheong Poon error = tcp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 474721fffe3SKacheong Poon optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 475721fffe3SKacheong Poon NULL, cr); 4769ee3959aSAnders Persson squeue_synch_exit(connp); 477721fffe3SKacheong Poon 478721fffe3SKacheong Poon ASSERT(error >= 0); 479721fffe3SKacheong Poon 480721fffe3SKacheong Poon return (error); 481721fffe3SKacheong Poon } 482721fffe3SKacheong Poon 483721fffe3SKacheong Poon /* ARGSUSED */ 484721fffe3SKacheong Poon static int 485721fffe3SKacheong Poon tcp_sendmsg(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 486721fffe3SKacheong Poon cred_t *cr) 487721fffe3SKacheong Poon { 488721fffe3SKacheong Poon tcp_t *tcp; 489721fffe3SKacheong Poon uint32_t msize; 490721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 491721fffe3SKacheong Poon int32_t tcpstate; 492721fffe3SKacheong Poon 493721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 494721fffe3SKacheong Poon ASSERT(cr != NULL); 495721fffe3SKacheong Poon 496721fffe3SKacheong Poon ASSERT(connp->conn_ref >= 2); 497721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 498721fffe3SKacheong Poon 499721fffe3SKacheong Poon if (msg->msg_controllen != 0) { 500721fffe3SKacheong Poon freemsg(mp); 501721fffe3SKacheong Poon return (EOPNOTSUPP); 502721fffe3SKacheong Poon } 503721fffe3SKacheong Poon 504721fffe3SKacheong Poon switch (DB_TYPE(mp)) { 505721fffe3SKacheong Poon case M_DATA: 506721fffe3SKacheong Poon tcp = connp->conn_tcp; 507721fffe3SKacheong Poon ASSERT(tcp != NULL); 508721fffe3SKacheong Poon 509721fffe3SKacheong Poon tcpstate = tcp->tcp_state; 510721fffe3SKacheong Poon if (tcpstate < TCPS_ESTABLISHED) { 511721fffe3SKacheong Poon freemsg(mp); 512721fffe3SKacheong Poon /* 513721fffe3SKacheong Poon * We return ENOTCONN if the endpoint is trying to 514721fffe3SKacheong Poon * connect or has never been connected, and EPIPE if it 515721fffe3SKacheong Poon * has been disconnected. The connection id helps us 516721fffe3SKacheong Poon * distinguish between the last two cases. 517721fffe3SKacheong Poon */ 518721fffe3SKacheong Poon return ((tcpstate == TCPS_SYN_SENT) ? ENOTCONN : 519721fffe3SKacheong Poon ((tcp->tcp_connid > 0) ? EPIPE : ENOTCONN)); 520721fffe3SKacheong Poon } else if (tcpstate > TCPS_CLOSE_WAIT) { 521721fffe3SKacheong Poon freemsg(mp); 522721fffe3SKacheong Poon return (EPIPE); 523721fffe3SKacheong Poon } 524721fffe3SKacheong Poon 525721fffe3SKacheong Poon msize = msgdsize(mp); 526721fffe3SKacheong Poon 527721fffe3SKacheong Poon mutex_enter(&tcp->tcp_non_sq_lock); 528721fffe3SKacheong Poon tcp->tcp_squeue_bytes += msize; 529721fffe3SKacheong Poon /* 530721fffe3SKacheong Poon * Squeue Flow Control 531721fffe3SKacheong Poon */ 532721fffe3SKacheong Poon if (TCP_UNSENT_BYTES(tcp) > connp->conn_sndbuf) { 533721fffe3SKacheong Poon tcp_setqfull(tcp); 534721fffe3SKacheong Poon } 535721fffe3SKacheong Poon mutex_exit(&tcp->tcp_non_sq_lock); 536721fffe3SKacheong Poon 537721fffe3SKacheong Poon /* 538721fffe3SKacheong Poon * The application may pass in an address in the msghdr, but 539721fffe3SKacheong Poon * we ignore the address on connection-oriented sockets. 540721fffe3SKacheong Poon * Just like BSD this code does not generate an error for 541721fffe3SKacheong Poon * TCP (a CONNREQUIRED socket) when sending to an address 542721fffe3SKacheong Poon * passed in with sendto/sendmsg. Instead the data is 543721fffe3SKacheong Poon * delivered on the connection as if no address had been 544721fffe3SKacheong Poon * supplied. 545721fffe3SKacheong Poon */ 546721fffe3SKacheong Poon CONN_INC_REF(connp); 547721fffe3SKacheong Poon 548721fffe3SKacheong Poon if (msg->msg_flags & MSG_OOB) { 549721fffe3SKacheong Poon SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output_urgent, 550721fffe3SKacheong Poon connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT); 551721fffe3SKacheong Poon } else { 552721fffe3SKacheong Poon SQUEUE_ENTER_ONE(connp->conn_sqp, mp, tcp_output, 553721fffe3SKacheong Poon connp, NULL, tcp_squeue_flag, SQTAG_TCP_OUTPUT); 554721fffe3SKacheong Poon } 555721fffe3SKacheong Poon 556721fffe3SKacheong Poon return (0); 557721fffe3SKacheong Poon 558721fffe3SKacheong Poon default: 559721fffe3SKacheong Poon ASSERT(0); 560721fffe3SKacheong Poon } 561721fffe3SKacheong Poon 562721fffe3SKacheong Poon freemsg(mp); 563721fffe3SKacheong Poon return (0); 564721fffe3SKacheong Poon } 565721fffe3SKacheong Poon 566721fffe3SKacheong Poon /* ARGSUSED */ 567721fffe3SKacheong Poon static int 568721fffe3SKacheong Poon tcp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 569721fffe3SKacheong Poon { 570721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 571721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 572721fffe3SKacheong Poon 573721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 574721fffe3SKacheong Poon 575721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 576721fffe3SKacheong Poon ASSERT(cr != NULL); 577721fffe3SKacheong Poon 578721fffe3SKacheong Poon /* 579721fffe3SKacheong Poon * X/Open requires that we check the connected state. 580721fffe3SKacheong Poon */ 581721fffe3SKacheong Poon if (tcp->tcp_state < TCPS_SYN_SENT) 582721fffe3SKacheong Poon return (ENOTCONN); 583721fffe3SKacheong Poon 584721fffe3SKacheong Poon /* shutdown the send side */ 585721fffe3SKacheong Poon if (how != SHUT_RD) { 586721fffe3SKacheong Poon mblk_t *bp; 587721fffe3SKacheong Poon 588721fffe3SKacheong Poon bp = allocb_wait(0, BPRI_HI, STR_NOSIG, NULL); 589721fffe3SKacheong Poon CONN_INC_REF(connp); 590721fffe3SKacheong Poon SQUEUE_ENTER_ONE(connp->conn_sqp, bp, tcp_shutdown_output, 591721fffe3SKacheong Poon connp, NULL, SQ_NODRAIN, SQTAG_TCP_SHUTDOWN_OUTPUT); 592721fffe3SKacheong Poon 593721fffe3SKacheong Poon (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 594721fffe3SKacheong Poon SOCK_OPCTL_SHUT_SEND, 0); 595721fffe3SKacheong Poon } 596721fffe3SKacheong Poon 597721fffe3SKacheong Poon /* shutdown the recv side */ 598721fffe3SKacheong Poon if (how != SHUT_WR) 599721fffe3SKacheong Poon (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 600721fffe3SKacheong Poon SOCK_OPCTL_SHUT_RECV, 0); 601721fffe3SKacheong Poon 602721fffe3SKacheong Poon return (0); 603721fffe3SKacheong Poon } 604721fffe3SKacheong Poon 605721fffe3SKacheong Poon static void 606721fffe3SKacheong Poon tcp_clr_flowctrl(sock_lower_handle_t proto_handle) 607721fffe3SKacheong Poon { 608721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 609721fffe3SKacheong Poon tcp_t *tcp = connp->conn_tcp; 610721fffe3SKacheong Poon mblk_t *mp; 611721fffe3SKacheong Poon int error; 612721fffe3SKacheong Poon 613721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 614721fffe3SKacheong Poon 615721fffe3SKacheong Poon /* 616721fffe3SKacheong Poon * If tcp->tcp_rsrv_mp == NULL, it means that tcp_clr_flowctrl() 617721fffe3SKacheong Poon * is currently running. 618721fffe3SKacheong Poon */ 619721fffe3SKacheong Poon mutex_enter(&tcp->tcp_rsrv_mp_lock); 620721fffe3SKacheong Poon if ((mp = tcp->tcp_rsrv_mp) == NULL) { 621721fffe3SKacheong Poon mutex_exit(&tcp->tcp_rsrv_mp_lock); 622721fffe3SKacheong Poon return; 623721fffe3SKacheong Poon } 624721fffe3SKacheong Poon tcp->tcp_rsrv_mp = NULL; 625721fffe3SKacheong Poon mutex_exit(&tcp->tcp_rsrv_mp_lock); 626721fffe3SKacheong Poon 6279ee3959aSAnders Persson error = squeue_synch_enter(connp, mp); 628721fffe3SKacheong Poon ASSERT(error == 0); 629721fffe3SKacheong Poon 630721fffe3SKacheong Poon mutex_enter(&tcp->tcp_rsrv_mp_lock); 631721fffe3SKacheong Poon tcp->tcp_rsrv_mp = mp; 632721fffe3SKacheong Poon mutex_exit(&tcp->tcp_rsrv_mp_lock); 633721fffe3SKacheong Poon 634721fffe3SKacheong Poon if (tcp->tcp_fused) { 635721fffe3SKacheong Poon tcp_fuse_backenable(tcp); 636721fffe3SKacheong Poon } else { 637721fffe3SKacheong Poon tcp->tcp_rwnd = connp->conn_rcvbuf; 638721fffe3SKacheong Poon /* 639721fffe3SKacheong Poon * Send back a window update immediately if TCP is above 640721fffe3SKacheong Poon * ESTABLISHED state and the increase of the rcv window 641721fffe3SKacheong Poon * that the other side knows is at least 1 MSS after flow 642721fffe3SKacheong Poon * control is lifted. 643721fffe3SKacheong Poon */ 644721fffe3SKacheong Poon if (tcp->tcp_state >= TCPS_ESTABLISHED && 645721fffe3SKacheong Poon tcp_rwnd_reopen(tcp) == TH_ACK_NEEDED) { 646721fffe3SKacheong Poon tcp_xmit_ctl(NULL, tcp, 647721fffe3SKacheong Poon (tcp->tcp_swnd == 0) ? tcp->tcp_suna : 648721fffe3SKacheong Poon tcp->tcp_snxt, tcp->tcp_rnxt, TH_ACK); 649721fffe3SKacheong Poon } 650721fffe3SKacheong Poon } 651721fffe3SKacheong Poon 6529ee3959aSAnders Persson squeue_synch_exit(connp); 653721fffe3SKacheong Poon } 654721fffe3SKacheong Poon 655721fffe3SKacheong Poon /* ARGSUSED */ 656721fffe3SKacheong Poon static int 657721fffe3SKacheong Poon tcp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 658721fffe3SKacheong Poon int mode, int32_t *rvalp, cred_t *cr) 659721fffe3SKacheong Poon { 660721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 661721fffe3SKacheong Poon int error; 662721fffe3SKacheong Poon 663721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 664721fffe3SKacheong Poon 665721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 666721fffe3SKacheong Poon ASSERT(cr != NULL); 667721fffe3SKacheong Poon 668721fffe3SKacheong Poon /* 669721fffe3SKacheong Poon * If we don't have a helper stream then create one. 670721fffe3SKacheong Poon * ip_create_helper_stream takes care of locking the conn_t, 671721fffe3SKacheong Poon * so this check for NULL is just a performance optimization. 672721fffe3SKacheong Poon */ 673721fffe3SKacheong Poon if (connp->conn_helper_info == NULL) { 674721fffe3SKacheong Poon tcp_stack_t *tcps = connp->conn_tcp->tcp_tcps; 675721fffe3SKacheong Poon 676721fffe3SKacheong Poon /* 677721fffe3SKacheong Poon * Create a helper stream for non-STREAMS socket. 678721fffe3SKacheong Poon */ 679721fffe3SKacheong Poon error = ip_create_helper_stream(connp, tcps->tcps_ldi_ident); 680721fffe3SKacheong Poon if (error != 0) { 681721fffe3SKacheong Poon ip0dbg(("tcp_ioctl: create of IP helper stream " 682721fffe3SKacheong Poon "failed %d\n", error)); 683721fffe3SKacheong Poon return (error); 684721fffe3SKacheong Poon } 685721fffe3SKacheong Poon } 686721fffe3SKacheong Poon 687721fffe3SKacheong Poon switch (cmd) { 688721fffe3SKacheong Poon case ND_SET: 689721fffe3SKacheong Poon case ND_GET: 690721fffe3SKacheong Poon case _SIOCSOCKFALLBACK: 691721fffe3SKacheong Poon case TCP_IOC_ABORT_CONN: 692721fffe3SKacheong Poon case TI_GETPEERNAME: 693721fffe3SKacheong Poon case TI_GETMYNAME: 694721fffe3SKacheong Poon ip1dbg(("tcp_ioctl: cmd 0x%x on non streams socket", 695721fffe3SKacheong Poon cmd)); 696721fffe3SKacheong Poon error = EINVAL; 697721fffe3SKacheong Poon break; 698721fffe3SKacheong Poon default: 699721fffe3SKacheong Poon /* 700721fffe3SKacheong Poon * If the conn is not closing, pass on to IP using 701721fffe3SKacheong Poon * helper stream. Bump the ioctlref to prevent tcp_close 702721fffe3SKacheong Poon * from closing the rq/wq out from underneath the ioctl 703721fffe3SKacheong Poon * if it ends up queued or aborted/interrupted. 704721fffe3SKacheong Poon */ 705721fffe3SKacheong Poon mutex_enter(&connp->conn_lock); 706721fffe3SKacheong Poon if (connp->conn_state_flags & (CONN_CLOSING)) { 707721fffe3SKacheong Poon mutex_exit(&connp->conn_lock); 708721fffe3SKacheong Poon error = EINVAL; 709721fffe3SKacheong Poon break; 710721fffe3SKacheong Poon } 711721fffe3SKacheong Poon CONN_INC_IOCTLREF_LOCKED(connp); 712721fffe3SKacheong Poon error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 713721fffe3SKacheong Poon cmd, arg, mode, cr, rvalp); 714721fffe3SKacheong Poon CONN_DEC_IOCTLREF(connp); 715721fffe3SKacheong Poon break; 716721fffe3SKacheong Poon } 717721fffe3SKacheong Poon return (error); 718721fffe3SKacheong Poon } 719721fffe3SKacheong Poon 720721fffe3SKacheong Poon /* ARGSUSED */ 721721fffe3SKacheong Poon static int 722721fffe3SKacheong Poon tcp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 723721fffe3SKacheong Poon { 724721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 725721fffe3SKacheong Poon 726721fffe3SKacheong Poon ASSERT(connp->conn_upper_handle != NULL); 727721fffe3SKacheong Poon 728721fffe3SKacheong Poon /* All Solaris components should pass a cred for this operation. */ 729721fffe3SKacheong Poon ASSERT(cr != NULL); 730721fffe3SKacheong Poon 731721fffe3SKacheong Poon tcp_close_common(connp, flags); 732721fffe3SKacheong Poon 733721fffe3SKacheong Poon ip_free_helper_stream(connp); 734721fffe3SKacheong Poon 735721fffe3SKacheong Poon /* 736721fffe3SKacheong Poon * Drop IP's reference on the conn. This is the last reference 737721fffe3SKacheong Poon * on the connp if the state was less than established. If the 738721fffe3SKacheong Poon * connection has gone into timewait state, then we will have 739721fffe3SKacheong Poon * one ref for the TCP and one more ref (total of two) for the 740721fffe3SKacheong Poon * classifier connected hash list (a timewait connections stays 741721fffe3SKacheong Poon * in connected hash till closed). 742721fffe3SKacheong Poon * 743721fffe3SKacheong Poon * We can't assert the references because there might be other 744721fffe3SKacheong Poon * transient reference places because of some walkers or queued 745721fffe3SKacheong Poon * packets in squeue for the timewait state. 746721fffe3SKacheong Poon */ 747721fffe3SKacheong Poon CONN_DEC_REF(connp); 7483e95bd4aSAnders Persson 7493e95bd4aSAnders Persson /* 7503e95bd4aSAnders Persson * EINPROGRESS tells sockfs to wait for a 'closed' upcall before 7513e95bd4aSAnders Persson * freeing the socket. 7523e95bd4aSAnders Persson */ 7533e95bd4aSAnders Persson return (EINPROGRESS); 754721fffe3SKacheong Poon } 755721fffe3SKacheong Poon 756721fffe3SKacheong Poon /* ARGSUSED */ 757721fffe3SKacheong Poon sock_lower_handle_t 758721fffe3SKacheong Poon tcp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 759721fffe3SKacheong Poon uint_t *smodep, int *errorp, int flags, cred_t *credp) 760721fffe3SKacheong Poon { 761721fffe3SKacheong Poon conn_t *connp; 762721fffe3SKacheong Poon boolean_t isv6 = family == AF_INET6; 763721fffe3SKacheong Poon if (type != SOCK_STREAM || (family != AF_INET && family != AF_INET6) || 764721fffe3SKacheong Poon (proto != 0 && proto != IPPROTO_TCP)) { 765721fffe3SKacheong Poon *errorp = EPROTONOSUPPORT; 766721fffe3SKacheong Poon return (NULL); 767721fffe3SKacheong Poon } 768721fffe3SKacheong Poon 769721fffe3SKacheong Poon connp = tcp_create_common(credp, isv6, B_TRUE, errorp); 770721fffe3SKacheong Poon if (connp == NULL) { 771721fffe3SKacheong Poon return (NULL); 772721fffe3SKacheong Poon } 773721fffe3SKacheong Poon 774721fffe3SKacheong Poon /* 775721fffe3SKacheong Poon * Put the ref for TCP. Ref for IP was already put 776721fffe3SKacheong Poon * by ipcl_conn_create. Also Make the conn_t globally 777721fffe3SKacheong Poon * visible to walkers 778721fffe3SKacheong Poon */ 779721fffe3SKacheong Poon mutex_enter(&connp->conn_lock); 780721fffe3SKacheong Poon CONN_INC_REF_LOCKED(connp); 781721fffe3SKacheong Poon ASSERT(connp->conn_ref == 2); 782721fffe3SKacheong Poon connp->conn_state_flags &= ~CONN_INCIPIENT; 783721fffe3SKacheong Poon 784721fffe3SKacheong Poon connp->conn_flags |= IPCL_NONSTR; 785721fffe3SKacheong Poon mutex_exit(&connp->conn_lock); 786721fffe3SKacheong Poon 787721fffe3SKacheong Poon ASSERT(errorp != NULL); 788721fffe3SKacheong Poon *errorp = 0; 789721fffe3SKacheong Poon *sock_downcalls = &sock_tcp_downcalls; 790721fffe3SKacheong Poon *smodep = SM_CONNREQUIRED | SM_EXDATA | SM_ACCEPTSUPP | 791721fffe3SKacheong Poon SM_SENDFILESUPP; 792721fffe3SKacheong Poon 793721fffe3SKacheong Poon return ((sock_lower_handle_t)connp); 794721fffe3SKacheong Poon } 795721fffe3SKacheong Poon 7963e95bd4aSAnders Persson /* 7973e95bd4aSAnders Persson * tcp_fallback 7983e95bd4aSAnders Persson * 7993e95bd4aSAnders Persson * A direct socket is falling back to using STREAMS. The queue 8003e95bd4aSAnders Persson * that is being passed down was created using tcp_open() with 8013e95bd4aSAnders Persson * the SO_FALLBACK flag set. As a result, the queue is not 8023e95bd4aSAnders Persson * associated with a conn, and the q_ptrs instead contain the 8033e95bd4aSAnders Persson * dev and minor area that should be used. 8043e95bd4aSAnders Persson * 8053e95bd4aSAnders Persson * The 'issocket' flag indicates whether the FireEngine 8063e95bd4aSAnders Persson * optimizations should be used. The common case would be that 8073e95bd4aSAnders Persson * optimizations are enabled, and they might be subsequently 8083e95bd4aSAnders Persson * disabled using the _SIOCSOCKFALLBACK ioctl. 8093e95bd4aSAnders Persson */ 8103e95bd4aSAnders Persson 8113e95bd4aSAnders Persson /* 8123e95bd4aSAnders Persson * An active connection is falling back to TPI. Gather all the information 8133e95bd4aSAnders Persson * required by the STREAM head and TPI sonode and send it up. 8143e95bd4aSAnders Persson */ 8153e95bd4aSAnders Persson static void 8163e95bd4aSAnders Persson tcp_fallback_noneager(tcp_t *tcp, mblk_t *stropt_mp, queue_t *q, 8173e95bd4aSAnders Persson boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb, 8183e95bd4aSAnders Persson sock_quiesce_arg_t *arg) 8193e95bd4aSAnders Persson { 8203e95bd4aSAnders Persson conn_t *connp = tcp->tcp_connp; 8213e95bd4aSAnders Persson struct stroptions *stropt; 8223e95bd4aSAnders Persson struct T_capability_ack tca; 8233e95bd4aSAnders Persson struct sockaddr_in6 laddr, faddr; 8243e95bd4aSAnders Persson socklen_t laddrlen, faddrlen; 8253e95bd4aSAnders Persson short opts; 8263e95bd4aSAnders Persson int error; 8273e95bd4aSAnders Persson mblk_t *mp, *mpnext; 8283e95bd4aSAnders Persson 8293e95bd4aSAnders Persson connp->conn_dev = (dev_t)RD(q)->q_ptr; 8303e95bd4aSAnders Persson connp->conn_minor_arena = WR(q)->q_ptr; 8313e95bd4aSAnders Persson 8323e95bd4aSAnders Persson RD(q)->q_ptr = WR(q)->q_ptr = connp; 8333e95bd4aSAnders Persson 8343e95bd4aSAnders Persson connp->conn_rq = RD(q); 8353e95bd4aSAnders Persson connp->conn_wq = WR(q); 8363e95bd4aSAnders Persson 8373e95bd4aSAnders Persson WR(q)->q_qinfo = &tcp_sock_winit; 8383e95bd4aSAnders Persson 8393e95bd4aSAnders Persson if (!issocket) 8403e95bd4aSAnders Persson tcp_use_pure_tpi(tcp); 8413e95bd4aSAnders Persson 8423e95bd4aSAnders Persson /* 8433e95bd4aSAnders Persson * free the helper stream 8443e95bd4aSAnders Persson */ 8453e95bd4aSAnders Persson ip_free_helper_stream(connp); 8463e95bd4aSAnders Persson 8473e95bd4aSAnders Persson /* 8483e95bd4aSAnders Persson * Notify the STREAM head about options 8493e95bd4aSAnders Persson */ 8503e95bd4aSAnders Persson DB_TYPE(stropt_mp) = M_SETOPTS; 8513e95bd4aSAnders Persson stropt = (struct stroptions *)stropt_mp->b_rptr; 8523e95bd4aSAnders Persson stropt_mp->b_wptr += sizeof (struct stroptions); 8533e95bd4aSAnders Persson stropt->so_flags = SO_HIWAT | SO_WROFF | SO_MAXBLK; 8543e95bd4aSAnders Persson 8553e95bd4aSAnders Persson stropt->so_wroff = connp->conn_ht_iphc_len + (tcp->tcp_loopback ? 0 : 8563e95bd4aSAnders Persson tcp->tcp_tcps->tcps_wroff_xtra); 8573e95bd4aSAnders Persson if (tcp->tcp_snd_sack_ok) 8583e95bd4aSAnders Persson stropt->so_wroff += TCPOPT_MAX_SACK_LEN; 8593e95bd4aSAnders Persson stropt->so_hiwat = connp->conn_rcvbuf; 8603e95bd4aSAnders Persson stropt->so_maxblk = tcp_maxpsz_set(tcp, B_FALSE); 8613e95bd4aSAnders Persson 8623e95bd4aSAnders Persson putnext(RD(q), stropt_mp); 8633e95bd4aSAnders Persson 8643e95bd4aSAnders Persson /* 8653e95bd4aSAnders Persson * Collect the information needed to sync with the sonode 8663e95bd4aSAnders Persson */ 8673e95bd4aSAnders Persson tcp_do_capability_ack(tcp, &tca, TC1_INFO|TC1_ACCEPTOR_ID); 8683e95bd4aSAnders Persson 8693e95bd4aSAnders Persson laddrlen = faddrlen = sizeof (sin6_t); 8703e95bd4aSAnders Persson (void) tcp_getsockname((sock_lower_handle_t)connp, 8713e95bd4aSAnders Persson (struct sockaddr *)&laddr, &laddrlen, CRED()); 8723e95bd4aSAnders Persson error = tcp_getpeername((sock_lower_handle_t)connp, 8733e95bd4aSAnders Persson (struct sockaddr *)&faddr, &faddrlen, CRED()); 8743e95bd4aSAnders Persson if (error != 0) 8753e95bd4aSAnders Persson faddrlen = 0; 8763e95bd4aSAnders Persson 8773e95bd4aSAnders Persson opts = 0; 8783e95bd4aSAnders Persson if (connp->conn_oobinline) 8793e95bd4aSAnders Persson opts |= SO_OOBINLINE; 8803e95bd4aSAnders Persson if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 8813e95bd4aSAnders Persson opts |= SO_DONTROUTE; 8823e95bd4aSAnders Persson 8833e95bd4aSAnders Persson /* 8843e95bd4aSAnders Persson * Notify the socket that the protocol is now quiescent, 8853e95bd4aSAnders Persson * and it's therefore safe move data from the socket 8863e95bd4aSAnders Persson * to the stream head. 8873e95bd4aSAnders Persson */ 8883e95bd4aSAnders Persson mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca, 8893e95bd4aSAnders Persson (struct sockaddr *)&laddr, laddrlen, 8903e95bd4aSAnders Persson (struct sockaddr *)&faddr, faddrlen, opts); 8913e95bd4aSAnders Persson 8923e95bd4aSAnders Persson while (mp != NULL) { 8933e95bd4aSAnders Persson mpnext = mp->b_next; 8943e95bd4aSAnders Persson tcp->tcp_rcv_list = mp->b_next; 8953e95bd4aSAnders Persson mp->b_next = NULL; 8963e95bd4aSAnders Persson putnext(q, mp); 8973e95bd4aSAnders Persson mp = mpnext; 8983e95bd4aSAnders Persson } 8993e95bd4aSAnders Persson ASSERT(tcp->tcp_rcv_last_head == NULL); 9003e95bd4aSAnders Persson ASSERT(tcp->tcp_rcv_last_tail == NULL); 9013e95bd4aSAnders Persson ASSERT(tcp->tcp_rcv_cnt == 0); 9023e95bd4aSAnders Persson 9033e95bd4aSAnders Persson /* 9043e95bd4aSAnders Persson * All eagers in q0 are marked as being non-STREAM, so they will 9053e95bd4aSAnders Persson * make su_newconn upcalls when the handshake completes, which 9063e95bd4aSAnders Persson * will fail (resulting in the conn being closed). So we just blow 9073e95bd4aSAnders Persson * off everything in q0 instead of waiting for the inevitable. 9083e95bd4aSAnders Persson */ 9093e95bd4aSAnders Persson if (tcp->tcp_conn_req_cnt_q0 != 0) 9103e95bd4aSAnders Persson tcp_eager_cleanup(tcp, B_TRUE); 9113e95bd4aSAnders Persson } 9123e95bd4aSAnders Persson 9133e95bd4aSAnders Persson /* 9143e95bd4aSAnders Persson * An eager is falling back to TPI. All we have to do is send 9153e95bd4aSAnders Persson * up a T_CONN_IND. 9163e95bd4aSAnders Persson */ 9173e95bd4aSAnders Persson static void 9183e95bd4aSAnders Persson tcp_fallback_eager(tcp_t *eager, boolean_t issocket, 9193e95bd4aSAnders Persson so_proto_quiesced_cb_t quiesced_cb, sock_quiesce_arg_t *arg) 9203e95bd4aSAnders Persson { 9213e95bd4aSAnders Persson conn_t *connp = eager->tcp_connp; 9223e95bd4aSAnders Persson tcp_t *listener = eager->tcp_listener; 9233e95bd4aSAnders Persson mblk_t *mp; 9243e95bd4aSAnders Persson 9253e95bd4aSAnders Persson ASSERT(listener != NULL); 9263e95bd4aSAnders Persson 9273e95bd4aSAnders Persson /* 9283e95bd4aSAnders Persson * Notify the socket that the protocol is now quiescent, 9293e95bd4aSAnders Persson * and it's therefore safe move data from the socket 9303e95bd4aSAnders Persson * to tcp's rcv queue. 9313e95bd4aSAnders Persson */ 9323e95bd4aSAnders Persson mp = (*quiesced_cb)(connp->conn_upper_handle, arg, NULL, NULL, 0, 9333e95bd4aSAnders Persson NULL, 0, 0); 9343e95bd4aSAnders Persson 9353e95bd4aSAnders Persson if (mp != NULL) { 9363e95bd4aSAnders Persson ASSERT(eager->tcp_rcv_cnt == 0); 9373e95bd4aSAnders Persson 9383e95bd4aSAnders Persson eager->tcp_rcv_list = mp; 9393e95bd4aSAnders Persson eager->tcp_rcv_cnt = msgdsize(mp); 9403e95bd4aSAnders Persson while (mp->b_next != NULL) { 9413e95bd4aSAnders Persson mp = mp->b_next; 9423e95bd4aSAnders Persson eager->tcp_rcv_cnt += msgdsize(mp); 9433e95bd4aSAnders Persson } 9443e95bd4aSAnders Persson eager->tcp_rcv_last_head = mp; 9453e95bd4aSAnders Persson while (mp->b_cont) 9463e95bd4aSAnders Persson mp = mp->b_cont; 9473e95bd4aSAnders Persson eager->tcp_rcv_last_tail = mp; 9483e95bd4aSAnders Persson if (eager->tcp_rcv_cnt > eager->tcp_rwnd) 9493e95bd4aSAnders Persson eager->tcp_rwnd = 0; 9503e95bd4aSAnders Persson else 9513e95bd4aSAnders Persson eager->tcp_rwnd -= eager->tcp_rcv_cnt; 9523e95bd4aSAnders Persson } 9533e95bd4aSAnders Persson 9543e95bd4aSAnders Persson if (!issocket) 9553e95bd4aSAnders Persson eager->tcp_issocket = B_FALSE; 9563e95bd4aSAnders Persson /* 9573e95bd4aSAnders Persson * The stream for this eager does not yet exist, so mark it as 9583e95bd4aSAnders Persson * being detached. 9593e95bd4aSAnders Persson */ 9603e95bd4aSAnders Persson eager->tcp_detached = B_TRUE; 9613e95bd4aSAnders Persson eager->tcp_hard_binding = B_TRUE; 9623e95bd4aSAnders Persson connp->conn_rq = listener->tcp_connp->conn_rq; 9633e95bd4aSAnders Persson connp->conn_wq = listener->tcp_connp->conn_wq; 9643e95bd4aSAnders Persson 9653e95bd4aSAnders Persson /* Send up the connection indication */ 9663e95bd4aSAnders Persson mp = eager->tcp_conn.tcp_eager_conn_ind; 9673e95bd4aSAnders Persson ASSERT(mp != NULL); 9683e95bd4aSAnders Persson eager->tcp_conn.tcp_eager_conn_ind = NULL; 9693e95bd4aSAnders Persson 9703e95bd4aSAnders Persson /* 9713e95bd4aSAnders Persson * TLI/XTI applications will get confused by 9723e95bd4aSAnders Persson * sending eager as an option since it violates 9733e95bd4aSAnders Persson * the option semantics. So remove the eager as 9743e95bd4aSAnders Persson * option since TLI/XTI app doesn't need it anyway. 9753e95bd4aSAnders Persson */ 9763e95bd4aSAnders Persson if (!issocket) { 9773e95bd4aSAnders Persson struct T_conn_ind *conn_ind; 9783e95bd4aSAnders Persson 9793e95bd4aSAnders Persson conn_ind = (struct T_conn_ind *)mp->b_rptr; 9803e95bd4aSAnders Persson conn_ind->OPT_length = 0; 9813e95bd4aSAnders Persson conn_ind->OPT_offset = 0; 9823e95bd4aSAnders Persson } 9833e95bd4aSAnders Persson 9843e95bd4aSAnders Persson /* 9853e95bd4aSAnders Persson * Sockfs guarantees that the listener will not be closed 9863e95bd4aSAnders Persson * during fallback. So we can safely use the listener's queue. 9873e95bd4aSAnders Persson */ 9883e95bd4aSAnders Persson putnext(listener->tcp_connp->conn_rq, mp); 9893e95bd4aSAnders Persson } 9903e95bd4aSAnders Persson 9913e95bd4aSAnders Persson 992721fffe3SKacheong Poon int 993721fffe3SKacheong Poon tcp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 9943e95bd4aSAnders Persson boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb, 9953e95bd4aSAnders Persson sock_quiesce_arg_t *arg) 996721fffe3SKacheong Poon { 997721fffe3SKacheong Poon tcp_t *tcp; 998721fffe3SKacheong Poon conn_t *connp = (conn_t *)proto_handle; 999721fffe3SKacheong Poon int error; 1000721fffe3SKacheong Poon mblk_t *stropt_mp; 1001721fffe3SKacheong Poon mblk_t *ordrel_mp; 1002721fffe3SKacheong Poon 1003721fffe3SKacheong Poon tcp = connp->conn_tcp; 1004721fffe3SKacheong Poon 1005721fffe3SKacheong Poon stropt_mp = allocb_wait(sizeof (struct stroptions), BPRI_HI, STR_NOSIG, 1006721fffe3SKacheong Poon NULL); 1007721fffe3SKacheong Poon 1008721fffe3SKacheong Poon /* Pre-allocate the T_ordrel_ind mblk. */ 1009721fffe3SKacheong Poon ASSERT(tcp->tcp_ordrel_mp == NULL); 1010721fffe3SKacheong Poon ordrel_mp = allocb_wait(sizeof (struct T_ordrel_ind), BPRI_HI, 1011721fffe3SKacheong Poon STR_NOSIG, NULL); 1012721fffe3SKacheong Poon ordrel_mp->b_datap->db_type = M_PROTO; 1013721fffe3SKacheong Poon ((struct T_ordrel_ind *)ordrel_mp->b_rptr)->PRIM_type = T_ORDREL_IND; 1014721fffe3SKacheong Poon ordrel_mp->b_wptr += sizeof (struct T_ordrel_ind); 1015721fffe3SKacheong Poon 1016721fffe3SKacheong Poon /* 1017721fffe3SKacheong Poon * Enter the squeue so that no new packets can come in 1018721fffe3SKacheong Poon */ 10199ee3959aSAnders Persson error = squeue_synch_enter(connp, NULL); 1020721fffe3SKacheong Poon if (error != 0) { 1021721fffe3SKacheong Poon /* failed to enter, free all the pre-allocated messages. */ 1022721fffe3SKacheong Poon freeb(stropt_mp); 1023721fffe3SKacheong Poon freeb(ordrel_mp); 1024721fffe3SKacheong Poon return (ENOMEM); 1025721fffe3SKacheong Poon } 1026721fffe3SKacheong Poon 1027721fffe3SKacheong Poon /* 1028721fffe3SKacheong Poon * Both endpoints must be of the same type (either STREAMS or 1029721fffe3SKacheong Poon * non-STREAMS) for fusion to be enabled. So if we are fused, 1030721fffe3SKacheong Poon * we have to unfuse. 1031721fffe3SKacheong Poon */ 1032721fffe3SKacheong Poon if (tcp->tcp_fused) 1033721fffe3SKacheong Poon tcp_unfuse(tcp); 1034721fffe3SKacheong Poon 1035721fffe3SKacheong Poon if (tcp->tcp_listener != NULL) { 1036721fffe3SKacheong Poon /* The eager will deal with opts when accept() is called */ 1037721fffe3SKacheong Poon freeb(stropt_mp); 10383e95bd4aSAnders Persson tcp_fallback_eager(tcp, direct_sockfs, quiesced_cb, arg); 1039721fffe3SKacheong Poon } else { 1040721fffe3SKacheong Poon tcp_fallback_noneager(tcp, stropt_mp, q, direct_sockfs, 10413e95bd4aSAnders Persson quiesced_cb, arg); 1042721fffe3SKacheong Poon } 1043721fffe3SKacheong Poon 10443e95bd4aSAnders Persson /* 10453e95bd4aSAnders Persson * No longer a direct socket 10463e95bd4aSAnders Persson * 10473e95bd4aSAnders Persson * Note that we intentionally leave the upper_handle and upcalls 10483e95bd4aSAnders Persson * intact, since eagers may still be using them. 10493e95bd4aSAnders Persson */ 10503e95bd4aSAnders Persson connp->conn_flags &= ~IPCL_NONSTR; 10513e95bd4aSAnders Persson tcp->tcp_ordrel_mp = ordrel_mp; 10523e95bd4aSAnders Persson 1053721fffe3SKacheong Poon /* 1054721fffe3SKacheong Poon * There should be atleast two ref's (IP + TCP) 1055721fffe3SKacheong Poon */ 1056721fffe3SKacheong Poon ASSERT(connp->conn_ref >= 2); 10579ee3959aSAnders Persson squeue_synch_exit(connp); 1058721fffe3SKacheong Poon 1059721fffe3SKacheong Poon return (0); 1060721fffe3SKacheong Poon } 10613e95bd4aSAnders Persson 10623e95bd4aSAnders Persson /* 10633e95bd4aSAnders Persson * Notifies a non-STREAMS based listener about a new connection. This 10643e95bd4aSAnders Persson * function is executed on the *eager*'s squeue once the 3 way handshake 10653e95bd4aSAnders Persson * has completed. Note that the behavior differs from STREAMS, where the 10663e95bd4aSAnders Persson * T_CONN_IND is sent up by tcp_send_conn_ind while on the *listener*'s 10673e95bd4aSAnders Persson * squeue. 10683e95bd4aSAnders Persson * 10693e95bd4aSAnders Persson * Returns B_TRUE if the notification succeeded, in which case `tcp' will 10703e95bd4aSAnders Persson * be moved over to the ESTABLISHED list (q) of the listener. Othwerise, 10713e95bd4aSAnders Persson * B_FALSE is returned and `tcp' is killed. 10723e95bd4aSAnders Persson */ 10733e95bd4aSAnders Persson boolean_t 10743e95bd4aSAnders Persson tcp_newconn_notify(tcp_t *tcp, ip_recv_attr_t *ira) 10753e95bd4aSAnders Persson { 10763e95bd4aSAnders Persson tcp_t *listener = tcp->tcp_listener; 10773e95bd4aSAnders Persson conn_t *lconnp = listener->tcp_connp; 10783e95bd4aSAnders Persson conn_t *econnp = tcp->tcp_connp; 10793e95bd4aSAnders Persson tcp_t *tail; 10803e95bd4aSAnders Persson ipaddr_t *addr_cache; 10813e95bd4aSAnders Persson sock_upper_handle_t upper; 10823e95bd4aSAnders Persson struct sock_proto_props sopp; 10833e95bd4aSAnders Persson mblk_t *mp; 10843e95bd4aSAnders Persson 10853e95bd4aSAnders Persson mutex_enter(&listener->tcp_eager_lock); 10863e95bd4aSAnders Persson /* 10873e95bd4aSAnders Persson * Take the eager out, if it is in the list of droppable eagers 10883e95bd4aSAnders Persson * as we are here because the 3W handshake is over. 10893e95bd4aSAnders Persson */ 10903e95bd4aSAnders Persson MAKE_UNDROPPABLE(tcp); 10913e95bd4aSAnders Persson /* 10923e95bd4aSAnders Persson * The eager already has an extra ref put in tcp_input_data 10933e95bd4aSAnders Persson * so that it stays till accept comes back even though it 10943e95bd4aSAnders Persson * might get into TCPS_CLOSED as a result of a TH_RST etc. 10953e95bd4aSAnders Persson */ 10963e95bd4aSAnders Persson ASSERT(listener->tcp_conn_req_cnt_q0 > 0); 10973e95bd4aSAnders Persson listener->tcp_conn_req_cnt_q0--; 10983e95bd4aSAnders Persson listener->tcp_conn_req_cnt_q++; 10993e95bd4aSAnders Persson 11003e95bd4aSAnders Persson /* Move from SYN_RCVD to ESTABLISHED list */ 11013e95bd4aSAnders Persson tcp->tcp_eager_next_q0->tcp_eager_prev_q0 = tcp->tcp_eager_prev_q0; 11023e95bd4aSAnders Persson tcp->tcp_eager_prev_q0->tcp_eager_next_q0 = tcp->tcp_eager_next_q0; 11033e95bd4aSAnders Persson tcp->tcp_eager_prev_q0 = NULL; 11043e95bd4aSAnders Persson tcp->tcp_eager_next_q0 = NULL; 11053e95bd4aSAnders Persson 11063e95bd4aSAnders Persson /* 11073e95bd4aSAnders Persson * Insert at end of the queue because connections are accepted 11083e95bd4aSAnders Persson * in chronological order. Leaving the older connections at front 11093e95bd4aSAnders Persson * of the queue helps reducing search time. 11103e95bd4aSAnders Persson */ 11113e95bd4aSAnders Persson tail = listener->tcp_eager_last_q; 11123e95bd4aSAnders Persson if (tail != NULL) 11133e95bd4aSAnders Persson tail->tcp_eager_next_q = tcp; 11143e95bd4aSAnders Persson else 11153e95bd4aSAnders Persson listener->tcp_eager_next_q = tcp; 11163e95bd4aSAnders Persson listener->tcp_eager_last_q = tcp; 11173e95bd4aSAnders Persson tcp->tcp_eager_next_q = NULL; 11183e95bd4aSAnders Persson 11193e95bd4aSAnders Persson /* we have timed out before */ 11203e95bd4aSAnders Persson if (tcp->tcp_syn_rcvd_timeout != 0) { 11213e95bd4aSAnders Persson tcp->tcp_syn_rcvd_timeout = 0; 11223e95bd4aSAnders Persson listener->tcp_syn_rcvd_timeout--; 11233e95bd4aSAnders Persson if (listener->tcp_syn_defense && 11243e95bd4aSAnders Persson listener->tcp_syn_rcvd_timeout <= 11253e95bd4aSAnders Persson (listener->tcp_tcps->tcps_conn_req_max_q0 >> 5) && 11263e95bd4aSAnders Persson 10*MINUTES < TICK_TO_MSEC(ddi_get_lbolt64() - 11273e95bd4aSAnders Persson listener->tcp_last_rcv_lbolt)) { 11283e95bd4aSAnders Persson /* 11293e95bd4aSAnders Persson * Turn off the defense mode if we 11303e95bd4aSAnders Persson * believe the SYN attack is over. 11313e95bd4aSAnders Persson */ 11323e95bd4aSAnders Persson listener->tcp_syn_defense = B_FALSE; 11333e95bd4aSAnders Persson if (listener->tcp_ip_addr_cache) { 11343e95bd4aSAnders Persson kmem_free((void *)listener->tcp_ip_addr_cache, 11353e95bd4aSAnders Persson IP_ADDR_CACHE_SIZE * sizeof (ipaddr_t)); 11363e95bd4aSAnders Persson listener->tcp_ip_addr_cache = NULL; 11373e95bd4aSAnders Persson } 11383e95bd4aSAnders Persson } 11393e95bd4aSAnders Persson } 11403e95bd4aSAnders Persson addr_cache = (ipaddr_t *)(listener->tcp_ip_addr_cache); 11413e95bd4aSAnders Persson if (addr_cache != NULL) { 11423e95bd4aSAnders Persson /* 11433e95bd4aSAnders Persson * We have finished a 3-way handshake with this 11443e95bd4aSAnders Persson * remote host. This proves the IP addr is good. 11453e95bd4aSAnders Persson * Cache it! 11463e95bd4aSAnders Persson */ 11473e95bd4aSAnders Persson addr_cache[IP_ADDR_CACHE_HASH(tcp->tcp_connp->conn_faddr_v4)] = 11483e95bd4aSAnders Persson tcp->tcp_connp->conn_faddr_v4; 11493e95bd4aSAnders Persson } 11503e95bd4aSAnders Persson mutex_exit(&listener->tcp_eager_lock); 11513e95bd4aSAnders Persson 11523e95bd4aSAnders Persson /* 11533e95bd4aSAnders Persson * Notify the ULP about the newconn. It is guaranteed that no 11543e95bd4aSAnders Persson * tcp_accept() call will be made for the eager if the 11553e95bd4aSAnders Persson * notification fails. 11563e95bd4aSAnders Persson */ 11573e95bd4aSAnders Persson if ((upper = (*lconnp->conn_upcalls->su_newconn) 11583e95bd4aSAnders Persson (lconnp->conn_upper_handle, (sock_lower_handle_t)econnp, 11593e95bd4aSAnders Persson &sock_tcp_downcalls, ira->ira_cred, ira->ira_cpid, 11603e95bd4aSAnders Persson &econnp->conn_upcalls)) == NULL) { 11613e95bd4aSAnders Persson /* 11623e95bd4aSAnders Persson * Normally this should not happen, but the listener might 11633e95bd4aSAnders Persson * have done a fallback to TPI followed by a close(), in 11643e95bd4aSAnders Persson * which case tcp_closemp for this conn might have been 11653e95bd4aSAnders Persson * used by tcp_eager_cleanup(). 11663e95bd4aSAnders Persson */ 11673e95bd4aSAnders Persson mutex_enter(&listener->tcp_eager_lock); 11683e95bd4aSAnders Persson if (tcp->tcp_closemp_used) { 11693e95bd4aSAnders Persson mutex_exit(&listener->tcp_eager_lock); 11703e95bd4aSAnders Persson return (B_FALSE); 11713e95bd4aSAnders Persson } 11723e95bd4aSAnders Persson tcp->tcp_closemp_used = B_TRUE; 11733e95bd4aSAnders Persson TCP_DEBUG_GETPCSTACK(tcp->tcmp_stk, 15); 11743e95bd4aSAnders Persson mp = &tcp->tcp_closemp; 11753e95bd4aSAnders Persson mutex_exit(&listener->tcp_eager_lock); 11763e95bd4aSAnders Persson tcp_eager_kill(econnp, mp, NULL, NULL); 11773e95bd4aSAnders Persson return (B_FALSE); 11783e95bd4aSAnders Persson } 11793e95bd4aSAnders Persson econnp->conn_upper_handle = upper; 11803e95bd4aSAnders Persson 11813e95bd4aSAnders Persson tcp->tcp_detached = B_FALSE; 11823e95bd4aSAnders Persson tcp->tcp_hard_binding = B_FALSE; 11833e95bd4aSAnders Persson tcp->tcp_tconnind_started = B_TRUE; 11843e95bd4aSAnders Persson 11853e95bd4aSAnders Persson if (econnp->conn_keepalive) { 11863e95bd4aSAnders Persson tcp->tcp_ka_last_intrvl = 0; 11873e95bd4aSAnders Persson tcp->tcp_ka_tid = TCP_TIMER(tcp, tcp_keepalive_timer, 11883e95bd4aSAnders Persson tcp->tcp_ka_interval); 11893e95bd4aSAnders Persson } 11903e95bd4aSAnders Persson 11913e95bd4aSAnders Persson /* Update the necessary parameters */ 11923e95bd4aSAnders Persson tcp_get_proto_props(tcp, &sopp); 11933e95bd4aSAnders Persson 11943e95bd4aSAnders Persson (*econnp->conn_upcalls->su_set_proto_props) 11953e95bd4aSAnders Persson (econnp->conn_upper_handle, &sopp); 11963e95bd4aSAnders Persson 11973e95bd4aSAnders Persson return (B_TRUE); 11983e95bd4aSAnders Persson } 1199