17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ee4701baSericheng * Common Development and Distribution License (the "License"). 6ee4701baSericheng * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22e11c3f44Smeem * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* 277c478bd9Sstevel@tonic-gate * IP PACKET CLASSIFIER 287c478bd9Sstevel@tonic-gate * 297c478bd9Sstevel@tonic-gate * The IP packet classifier provides mapping between IP packets and persistent 307c478bd9Sstevel@tonic-gate * connection state for connection-oriented protocols. It also provides 317c478bd9Sstevel@tonic-gate * interface for managing connection states. 327c478bd9Sstevel@tonic-gate * 337c478bd9Sstevel@tonic-gate * The connection state is kept in conn_t data structure and contains, among 347c478bd9Sstevel@tonic-gate * other things: 357c478bd9Sstevel@tonic-gate * 367c478bd9Sstevel@tonic-gate * o local/remote address and ports 377c478bd9Sstevel@tonic-gate * o Transport protocol 387c478bd9Sstevel@tonic-gate * o squeue for the connection (for TCP only) 397c478bd9Sstevel@tonic-gate * o reference counter 407c478bd9Sstevel@tonic-gate * o Connection state 417c478bd9Sstevel@tonic-gate * o hash table linkage 427c478bd9Sstevel@tonic-gate * o interface/ire information 437c478bd9Sstevel@tonic-gate * o credentials 447c478bd9Sstevel@tonic-gate * o ipsec policy 457c478bd9Sstevel@tonic-gate * o send and receive functions. 467c478bd9Sstevel@tonic-gate * o mutex lock. 477c478bd9Sstevel@tonic-gate * 487c478bd9Sstevel@tonic-gate * Connections use a reference counting scheme. They are freed when the 497c478bd9Sstevel@tonic-gate * reference counter drops to zero. A reference is incremented when connection 507c478bd9Sstevel@tonic-gate * is placed in a list or table, when incoming packet for the connection arrives 517c478bd9Sstevel@tonic-gate * and when connection is processed via squeue (squeue processing may be 527c478bd9Sstevel@tonic-gate * asynchronous and the reference protects the connection from being destroyed 537c478bd9Sstevel@tonic-gate * before its processing is finished). 547c478bd9Sstevel@tonic-gate * 55*bd670b35SErik Nordmark * conn_recv is used to pass up packets to the ULP. 56*bd670b35SErik Nordmark * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for 57*bd670b35SErik Nordmark * a listener, and changes to tcp_input_listener as the listener has picked a 58*bd670b35SErik Nordmark * good squeue. For other cases it is set to tcp_input_data. 59*bd670b35SErik Nordmark * 60*bd670b35SErik Nordmark * conn_recvicmp is used to pass up ICMP errors to the ULP. 617c478bd9Sstevel@tonic-gate * 627c478bd9Sstevel@tonic-gate * Classifier uses several hash tables: 637c478bd9Sstevel@tonic-gate * 647c478bd9Sstevel@tonic-gate * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 657c478bd9Sstevel@tonic-gate * ipcl_bind_fanout: contains all connections in BOUND state 667c478bd9Sstevel@tonic-gate * ipcl_proto_fanout: IPv4 protocol fanout 677c478bd9Sstevel@tonic-gate * ipcl_proto_fanout_v6: IPv6 protocol fanout 687c478bd9Sstevel@tonic-gate * ipcl_udp_fanout: contains all UDP connections 692b24ab6bSSebastien Roy * ipcl_iptun_fanout: contains all IP tunnel connections 707c478bd9Sstevel@tonic-gate * ipcl_globalhash_fanout: contains all connections 717c478bd9Sstevel@tonic-gate * 727c478bd9Sstevel@tonic-gate * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 737c478bd9Sstevel@tonic-gate * which need to view all existing connections. 747c478bd9Sstevel@tonic-gate * 757c478bd9Sstevel@tonic-gate * All tables are protected by per-bucket locks. When both per-bucket lock and 767c478bd9Sstevel@tonic-gate * connection lock need to be held, the per-bucket lock should be acquired 777c478bd9Sstevel@tonic-gate * first, followed by the connection lock. 787c478bd9Sstevel@tonic-gate * 797c478bd9Sstevel@tonic-gate * All functions doing search in one of these tables increment a reference 807c478bd9Sstevel@tonic-gate * counter on the connection found (if any). This reference should be dropped 817c478bd9Sstevel@tonic-gate * when the caller has finished processing the connection. 827c478bd9Sstevel@tonic-gate * 837c478bd9Sstevel@tonic-gate * 847c478bd9Sstevel@tonic-gate * INTERFACES: 857c478bd9Sstevel@tonic-gate * =========== 867c478bd9Sstevel@tonic-gate * 877c478bd9Sstevel@tonic-gate * Connection Lookup: 887c478bd9Sstevel@tonic-gate * ------------------ 897c478bd9Sstevel@tonic-gate * 90*bd670b35SErik Nordmark * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack) 91*bd670b35SErik Nordmark * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack) 927c478bd9Sstevel@tonic-gate * 937c478bd9Sstevel@tonic-gate * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 947c478bd9Sstevel@tonic-gate * it can't find any associated connection. If the connection is found, its 957c478bd9Sstevel@tonic-gate * reference counter is incremented. 967c478bd9Sstevel@tonic-gate * 977c478bd9Sstevel@tonic-gate * mp: mblock, containing packet header. The full header should fit 987c478bd9Sstevel@tonic-gate * into a single mblock. It should also contain at least full IP 997c478bd9Sstevel@tonic-gate * and TCP or UDP header. 1007c478bd9Sstevel@tonic-gate * 1017c478bd9Sstevel@tonic-gate * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 1027c478bd9Sstevel@tonic-gate * 1037c478bd9Sstevel@tonic-gate * hdr_len: The size of IP header. It is used to find TCP or UDP header in 1047c478bd9Sstevel@tonic-gate * the packet. 1057c478bd9Sstevel@tonic-gate * 106*bd670b35SErik Nordmark * ira->ira_zoneid: The zone in which the returned connection must be; the 107*bd670b35SErik Nordmark * zoneid corresponding to the ire_zoneid on the IRE located for 108*bd670b35SErik Nordmark * the packet's destination address. 109*bd670b35SErik Nordmark * 110*bd670b35SErik Nordmark * ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and 111*bd670b35SErik Nordmark * IRAF_TX_SHARED_ADDR flags 1127c478bd9Sstevel@tonic-gate * 1137c478bd9Sstevel@tonic-gate * For TCP connections, the lookup order is as follows: 1147c478bd9Sstevel@tonic-gate * 5-tuple {src, dst, protocol, local port, remote port} 1157c478bd9Sstevel@tonic-gate * lookup in ipcl_conn_fanout table. 1167c478bd9Sstevel@tonic-gate * 3-tuple {dst, remote port, protocol} lookup in 1177c478bd9Sstevel@tonic-gate * ipcl_bind_fanout table. 1187c478bd9Sstevel@tonic-gate * 1197c478bd9Sstevel@tonic-gate * For UDP connections, a 5-tuple {src, dst, protocol, local port, 1207c478bd9Sstevel@tonic-gate * remote port} lookup is done on ipcl_udp_fanout. Note that, 1217c478bd9Sstevel@tonic-gate * these interfaces do not handle cases where a packets belongs 1227c478bd9Sstevel@tonic-gate * to multiple UDP clients, which is handled in IP itself. 1237c478bd9Sstevel@tonic-gate * 12445916cd2Sjpk * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 12545916cd2Sjpk * determine which actual zone gets the segment. This is used only in a 12645916cd2Sjpk * labeled environment. The matching rules are: 12745916cd2Sjpk * 12845916cd2Sjpk * - If it's not a multilevel port, then the label on the packet selects 12945916cd2Sjpk * the zone. Unlabeled packets are delivered to the global zone. 13045916cd2Sjpk * 13145916cd2Sjpk * - If it's a multilevel port, then only the zone registered to receive 13245916cd2Sjpk * packets on that port matches. 13345916cd2Sjpk * 13445916cd2Sjpk * Also, in a labeled environment, packet labels need to be checked. For fully 13545916cd2Sjpk * bound TCP connections, we can assume that the packet label was checked 13645916cd2Sjpk * during connection establishment, and doesn't need to be checked on each 13745916cd2Sjpk * packet. For others, though, we need to check for strict equality or, for 13845916cd2Sjpk * multilevel ports, membership in the range or set. This part currently does 13945916cd2Sjpk * a tnrh lookup on each packet, but could be optimized to use cached results 14045916cd2Sjpk * if that were necessary. (SCTP doesn't come through here, but if it did, 14145916cd2Sjpk * we would apply the same rules as TCP.) 14245916cd2Sjpk * 14345916cd2Sjpk * An implication of the above is that fully-bound TCP sockets must always use 14445916cd2Sjpk * distinct 4-tuples; they can't be discriminated by label alone. 14545916cd2Sjpk * 14645916cd2Sjpk * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 14745916cd2Sjpk * as there's no connection set-up handshake and no shared state. 14845916cd2Sjpk * 14945916cd2Sjpk * Labels on looped-back packets within a single zone do not need to be 15045916cd2Sjpk * checked, as all processes in the same zone have the same label. 15145916cd2Sjpk * 15245916cd2Sjpk * Finally, for unlabeled packets received by a labeled system, special rules 15345916cd2Sjpk * apply. We consider only the MLP if there is one. Otherwise, we prefer a 15445916cd2Sjpk * socket in the zone whose label matches the default label of the sender, if 15545916cd2Sjpk * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 15645916cd2Sjpk * receiver's label must dominate the sender's default label. 15745916cd2Sjpk * 158*bd670b35SErik Nordmark * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack); 159f4b3ec61Sdh * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 160f4b3ec61Sdh * ip_stack); 1617c478bd9Sstevel@tonic-gate * 1627c478bd9Sstevel@tonic-gate * Lookup routine to find a exact match for {src, dst, local port, 1637c478bd9Sstevel@tonic-gate * remote port) for TCP connections in ipcl_conn_fanout. The address and 1647c478bd9Sstevel@tonic-gate * ports are read from the IP and TCP header respectively. 1657c478bd9Sstevel@tonic-gate * 166f4b3ec61Sdh * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 167f4b3ec61Sdh * zoneid, ip_stack); 168f4b3ec61Sdh * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 169f4b3ec61Sdh * zoneid, ip_stack); 1707c478bd9Sstevel@tonic-gate * 1717c478bd9Sstevel@tonic-gate * Lookup routine to find a listener with the tuple {lport, laddr, 1727c478bd9Sstevel@tonic-gate * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 1737c478bd9Sstevel@tonic-gate * parameter interface index is also compared. 1747c478bd9Sstevel@tonic-gate * 175f4b3ec61Sdh * void ipcl_walk(func, arg, ip_stack) 1767c478bd9Sstevel@tonic-gate * 1777c478bd9Sstevel@tonic-gate * Apply 'func' to every connection available. The 'func' is called as 1787c478bd9Sstevel@tonic-gate * (*func)(connp, arg). The walk is non-atomic so connections may be 1797c478bd9Sstevel@tonic-gate * created and destroyed during the walk. The CONN_CONDEMNED and 1807c478bd9Sstevel@tonic-gate * CONN_INCIPIENT flags ensure that connections which are newly created 1817c478bd9Sstevel@tonic-gate * or being destroyed are not selected by the walker. 1827c478bd9Sstevel@tonic-gate * 1837c478bd9Sstevel@tonic-gate * Table Updates 1847c478bd9Sstevel@tonic-gate * ------------- 1857c478bd9Sstevel@tonic-gate * 186*bd670b35SErik Nordmark * int ipcl_conn_insert(connp); 187*bd670b35SErik Nordmark * int ipcl_conn_insert_v4(connp); 188*bd670b35SErik Nordmark * int ipcl_conn_insert_v6(connp); 1897c478bd9Sstevel@tonic-gate * 1907c478bd9Sstevel@tonic-gate * Insert 'connp' in the ipcl_conn_fanout. 1917c478bd9Sstevel@tonic-gate * Arguements : 1927c478bd9Sstevel@tonic-gate * connp conn_t to be inserted 1937c478bd9Sstevel@tonic-gate * 1947c478bd9Sstevel@tonic-gate * Return value : 1957c478bd9Sstevel@tonic-gate * 0 if connp was inserted 1967c478bd9Sstevel@tonic-gate * EADDRINUSE if the connection with the same tuple 1977c478bd9Sstevel@tonic-gate * already exists. 1987c478bd9Sstevel@tonic-gate * 199*bd670b35SErik Nordmark * int ipcl_bind_insert(connp); 200*bd670b35SErik Nordmark * int ipcl_bind_insert_v4(connp); 201*bd670b35SErik Nordmark * int ipcl_bind_insert_v6(connp); 2027c478bd9Sstevel@tonic-gate * 2037c478bd9Sstevel@tonic-gate * Insert 'connp' in ipcl_bind_fanout. 2047c478bd9Sstevel@tonic-gate * Arguements : 2057c478bd9Sstevel@tonic-gate * connp conn_t to be inserted 2067c478bd9Sstevel@tonic-gate * 2077c478bd9Sstevel@tonic-gate * 2087c478bd9Sstevel@tonic-gate * void ipcl_hash_remove(connp); 2097c478bd9Sstevel@tonic-gate * 2107c478bd9Sstevel@tonic-gate * Removes the 'connp' from the connection fanout table. 2117c478bd9Sstevel@tonic-gate * 2127c478bd9Sstevel@tonic-gate * Connection Creation/Destruction 2137c478bd9Sstevel@tonic-gate * ------------------------------- 2147c478bd9Sstevel@tonic-gate * 215f4b3ec61Sdh * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 2167c478bd9Sstevel@tonic-gate * 2177c478bd9Sstevel@tonic-gate * Creates a new conn based on the type flag, inserts it into 2187c478bd9Sstevel@tonic-gate * globalhash table. 2197c478bd9Sstevel@tonic-gate * 2207c478bd9Sstevel@tonic-gate * type: This flag determines the type of conn_t which needs to be 221fc80c0dfSnordmark * created i.e., which kmem_cache it comes from. 2227c478bd9Sstevel@tonic-gate * IPCL_TCPCONN indicates a TCP connection 223fc80c0dfSnordmark * IPCL_SCTPCONN indicates a SCTP connection 224fc80c0dfSnordmark * IPCL_UDPCONN indicates a UDP conn_t. 225fc80c0dfSnordmark * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t. 226fc80c0dfSnordmark * IPCL_RTSCONN indicates a RTS conn_t. 227fc80c0dfSnordmark * IPCL_IPCCONN indicates all other connections. 2287c478bd9Sstevel@tonic-gate * 2297c478bd9Sstevel@tonic-gate * void ipcl_conn_destroy(connp) 2307c478bd9Sstevel@tonic-gate * 2317c478bd9Sstevel@tonic-gate * Destroys the connection state, removes it from the global 2327c478bd9Sstevel@tonic-gate * connection hash table and frees its memory. 2337c478bd9Sstevel@tonic-gate */ 2347c478bd9Sstevel@tonic-gate 2357c478bd9Sstevel@tonic-gate #include <sys/types.h> 2367c478bd9Sstevel@tonic-gate #include <sys/stream.h> 2377c478bd9Sstevel@tonic-gate #include <sys/stropts.h> 2387c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 2397c478bd9Sstevel@tonic-gate #include <sys/strsubr.h> 2407c478bd9Sstevel@tonic-gate #include <sys/strsun.h> 2417c478bd9Sstevel@tonic-gate #define _SUN_TPI_VERSION 2 2427c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 2437c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 2447c478bd9Sstevel@tonic-gate #include <sys/debug.h> 2457c478bd9Sstevel@tonic-gate 2467c478bd9Sstevel@tonic-gate #include <sys/systm.h> 2477c478bd9Sstevel@tonic-gate #include <sys/param.h> 2487c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 2497c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h> 2507c478bd9Sstevel@tonic-gate #include <inet/common.h> 2517c478bd9Sstevel@tonic-gate #include <netinet/ip6.h> 2527c478bd9Sstevel@tonic-gate #include <netinet/icmp6.h> 2537c478bd9Sstevel@tonic-gate 2547c478bd9Sstevel@tonic-gate #include <inet/ip.h> 255*bd670b35SErik Nordmark #include <inet/ip_if.h> 256*bd670b35SErik Nordmark #include <inet/ip_ire.h> 2577c478bd9Sstevel@tonic-gate #include <inet/ip6.h> 2587c478bd9Sstevel@tonic-gate #include <inet/ip_ndp.h> 2590f1702c5SYu Xiangning #include <inet/ip_impl.h> 260ff550d0eSmasputra #include <inet/udp_impl.h> 2617c478bd9Sstevel@tonic-gate #include <inet/sctp_ip.h> 262f4b3ec61Sdh #include <inet/sctp/sctp_impl.h> 263fc80c0dfSnordmark #include <inet/rawip_impl.h> 264fc80c0dfSnordmark #include <inet/rts_impl.h> 2652b24ab6bSSebastien Roy #include <inet/iptun/iptun_impl.h> 2667c478bd9Sstevel@tonic-gate 2677c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 2687c478bd9Sstevel@tonic-gate 2697c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h> 2700f1702c5SYu Xiangning #include <inet/tcp.h> 2717c478bd9Sstevel@tonic-gate #include <inet/ipsec_impl.h> 2727c478bd9Sstevel@tonic-gate 27345916cd2Sjpk #include <sys/tsol/tnet.h> 2740f1702c5SYu Xiangning #include <sys/sockio.h> 27545916cd2Sjpk 276f4b3ec61Sdh /* Old value for compatibility. Setable in /etc/system */ 2777c478bd9Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0; 2787c478bd9Sstevel@tonic-gate 279f4b3ec61Sdh /* New value. Zero means choose automatically. Setable in /etc/system */ 2807c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0; 2817c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192; 2827c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500; 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate /* bind/udp fanout table size */ 2857c478bd9Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512; 286ee4701baSericheng uint_t ipcl_udp_fanout_size = 16384; 2877c478bd9Sstevel@tonic-gate 2887c478bd9Sstevel@tonic-gate /* Raw socket fanout size. Must be a power of 2. */ 2897c478bd9Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256; 2907c478bd9Sstevel@tonic-gate 2912b24ab6bSSebastien Roy /* 2922b24ab6bSSebastien Roy * The IPCL_IPTUN_HASH() function works best with a prime table size. We 2932b24ab6bSSebastien Roy * expect that most large deployments would have hundreds of tunnels, and 2942b24ab6bSSebastien Roy * thousands in the extreme case. 2952b24ab6bSSebastien Roy */ 2962b24ab6bSSebastien Roy uint_t ipcl_iptun_fanout_size = 6143; 2972b24ab6bSSebastien Roy 2987c478bd9Sstevel@tonic-gate /* 2997c478bd9Sstevel@tonic-gate * Power of 2^N Primes useful for hashing for N of 0-28, 3007c478bd9Sstevel@tonic-gate * these primes are the nearest prime <= 2^N - 2^(N-2). 3017c478bd9Sstevel@tonic-gate */ 3027c478bd9Sstevel@tonic-gate 3037c478bd9Sstevel@tonic-gate #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 3047c478bd9Sstevel@tonic-gate 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 3057c478bd9Sstevel@tonic-gate 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 3067c478bd9Sstevel@tonic-gate 50331599, 100663291, 201326557, 0} 3077c478bd9Sstevel@tonic-gate 3087c478bd9Sstevel@tonic-gate /* 309fc80c0dfSnordmark * wrapper structure to ensure that conn and what follows it (tcp_t, etc) 310fc80c0dfSnordmark * are aligned on cache lines. 3117c478bd9Sstevel@tonic-gate */ 312fc80c0dfSnordmark typedef union itc_s { 313fc80c0dfSnordmark conn_t itc_conn; 314fc80c0dfSnordmark char itcu_filler[CACHE_ALIGN(conn_s)]; 3157c478bd9Sstevel@tonic-gate } itc_t; 3167c478bd9Sstevel@tonic-gate 317fc80c0dfSnordmark struct kmem_cache *tcp_conn_cache; 318fc80c0dfSnordmark struct kmem_cache *ip_conn_cache; 3197c478bd9Sstevel@tonic-gate extern struct kmem_cache *sctp_conn_cache; 3207c478bd9Sstevel@tonic-gate extern struct kmem_cache *tcp_sack_info_cache; 321fc80c0dfSnordmark struct kmem_cache *udp_conn_cache; 322fc80c0dfSnordmark struct kmem_cache *rawip_conn_cache; 323fc80c0dfSnordmark struct kmem_cache *rts_conn_cache; 3247c478bd9Sstevel@tonic-gate 3257c478bd9Sstevel@tonic-gate extern void tcp_timermp_free(tcp_t *); 3267c478bd9Sstevel@tonic-gate extern mblk_t *tcp_timermp_alloc(int); 3277c478bd9Sstevel@tonic-gate 328fc80c0dfSnordmark static int ip_conn_constructor(void *, void *, int); 329fc80c0dfSnordmark static void ip_conn_destructor(void *, void *); 330fc80c0dfSnordmark 331fc80c0dfSnordmark static int tcp_conn_constructor(void *, void *, int); 332fc80c0dfSnordmark static void tcp_conn_destructor(void *, void *); 333fc80c0dfSnordmark 334fc80c0dfSnordmark static int udp_conn_constructor(void *, void *, int); 335fc80c0dfSnordmark static void udp_conn_destructor(void *, void *); 336fc80c0dfSnordmark 337fc80c0dfSnordmark static int rawip_conn_constructor(void *, void *, int); 338fc80c0dfSnordmark static void rawip_conn_destructor(void *, void *); 339fc80c0dfSnordmark 340fc80c0dfSnordmark static int rts_conn_constructor(void *, void *, int); 341fc80c0dfSnordmark static void rts_conn_destructor(void *, void *); 3427c478bd9Sstevel@tonic-gate 3437c478bd9Sstevel@tonic-gate /* 344f4b3ec61Sdh * Global (for all stack instances) init routine 3457c478bd9Sstevel@tonic-gate */ 3467c478bd9Sstevel@tonic-gate void 347f4b3ec61Sdh ipcl_g_init(void) 3487c478bd9Sstevel@tonic-gate { 349fc80c0dfSnordmark ip_conn_cache = kmem_cache_create("ip_conn_cache", 3507c478bd9Sstevel@tonic-gate sizeof (conn_t), CACHE_ALIGN_SIZE, 351fc80c0dfSnordmark ip_conn_constructor, ip_conn_destructor, 352fc80c0dfSnordmark NULL, NULL, NULL, 0); 353fc80c0dfSnordmark 354fc80c0dfSnordmark tcp_conn_cache = kmem_cache_create("tcp_conn_cache", 355fc80c0dfSnordmark sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE, 356fc80c0dfSnordmark tcp_conn_constructor, tcp_conn_destructor, 357fc80c0dfSnordmark NULL, NULL, NULL, 0); 358fc80c0dfSnordmark 359fc80c0dfSnordmark udp_conn_cache = kmem_cache_create("udp_conn_cache", 360fc80c0dfSnordmark sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE, 361fc80c0dfSnordmark udp_conn_constructor, udp_conn_destructor, 362fc80c0dfSnordmark NULL, NULL, NULL, 0); 3637c478bd9Sstevel@tonic-gate 364fc80c0dfSnordmark rawip_conn_cache = kmem_cache_create("rawip_conn_cache", 365fc80c0dfSnordmark sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE, 366fc80c0dfSnordmark rawip_conn_constructor, rawip_conn_destructor, 367fc80c0dfSnordmark NULL, NULL, NULL, 0); 368fc80c0dfSnordmark 369fc80c0dfSnordmark rts_conn_cache = kmem_cache_create("rts_conn_cache", 370fc80c0dfSnordmark sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE, 371fc80c0dfSnordmark rts_conn_constructor, rts_conn_destructor, 3727c478bd9Sstevel@tonic-gate NULL, NULL, NULL, 0); 373f4b3ec61Sdh } 374f4b3ec61Sdh 375f4b3ec61Sdh /* 376f4b3ec61Sdh * ipclassifier intialization routine, sets up hash tables. 377f4b3ec61Sdh */ 378f4b3ec61Sdh void 379f4b3ec61Sdh ipcl_init(ip_stack_t *ipst) 380f4b3ec61Sdh { 381f4b3ec61Sdh int i; 382f4b3ec61Sdh int sizes[] = P2Ps(); 3837c478bd9Sstevel@tonic-gate 3847c478bd9Sstevel@tonic-gate /* 385f4b3ec61Sdh * Calculate size of conn fanout table from /etc/system settings 3867c478bd9Sstevel@tonic-gate */ 3877c478bd9Sstevel@tonic-gate if (ipcl_conn_hash_size != 0) { 388f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 3897c478bd9Sstevel@tonic-gate } else if (tcp_conn_hash_size != 0) { 390f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 3917c478bd9Sstevel@tonic-gate } else { 3927c478bd9Sstevel@tonic-gate extern pgcnt_t freemem; 3937c478bd9Sstevel@tonic-gate 394f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = 3957c478bd9Sstevel@tonic-gate (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 3967c478bd9Sstevel@tonic-gate 397f4b3ec61Sdh if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 398f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = 399f4b3ec61Sdh ipcl_conn_hash_maxsize; 400f4b3ec61Sdh } 4017c478bd9Sstevel@tonic-gate } 4027c478bd9Sstevel@tonic-gate 4037c478bd9Sstevel@tonic-gate for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 404f4b3ec61Sdh if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 4057c478bd9Sstevel@tonic-gate break; 4067c478bd9Sstevel@tonic-gate } 4077c478bd9Sstevel@tonic-gate } 408f4b3ec61Sdh if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 4097c478bd9Sstevel@tonic-gate /* Out of range, use the 2^16 value */ 410f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = sizes[16]; 4117c478bd9Sstevel@tonic-gate } 4127c478bd9Sstevel@tonic-gate 413f4b3ec61Sdh /* Take values from /etc/system */ 414f4b3ec61Sdh ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 415f4b3ec61Sdh ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 416f4b3ec61Sdh ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 4172b24ab6bSSebastien Roy ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size; 418f4b3ec61Sdh 419f4b3ec61Sdh ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 420f4b3ec61Sdh 421f4b3ec61Sdh ipst->ips_ipcl_conn_fanout = kmem_zalloc( 422f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 423f4b3ec61Sdh 424f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 425f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 4267c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4277c478bd9Sstevel@tonic-gate } 4287c478bd9Sstevel@tonic-gate 429f4b3ec61Sdh ipst->ips_ipcl_bind_fanout = kmem_zalloc( 430f4b3ec61Sdh ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 4317c478bd9Sstevel@tonic-gate 432f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 433f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 4347c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4357c478bd9Sstevel@tonic-gate } 4367c478bd9Sstevel@tonic-gate 437*bd670b35SErik Nordmark ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX * 438f4b3ec61Sdh sizeof (connf_t), KM_SLEEP); 439f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 440*bd670b35SErik Nordmark mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL, 4417c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4427c478bd9Sstevel@tonic-gate } 443f4b3ec61Sdh 444f4b3ec61Sdh ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 445f4b3ec61Sdh sizeof (connf_t), KM_SLEEP); 446f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 447f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 4487c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4497c478bd9Sstevel@tonic-gate } 4507c478bd9Sstevel@tonic-gate 451f4b3ec61Sdh ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 452f4b3ec61Sdh mutex_init(&ipst->ips_rts_clients->connf_lock, 453f4b3ec61Sdh NULL, MUTEX_DEFAULT, NULL); 4547c478bd9Sstevel@tonic-gate 455f4b3ec61Sdh ipst->ips_ipcl_udp_fanout = kmem_zalloc( 456f4b3ec61Sdh ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 457f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 458f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 4597c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4607c478bd9Sstevel@tonic-gate } 4617c478bd9Sstevel@tonic-gate 4622b24ab6bSSebastien Roy ipst->ips_ipcl_iptun_fanout = kmem_zalloc( 4632b24ab6bSSebastien Roy ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP); 4642b24ab6bSSebastien Roy for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 4652b24ab6bSSebastien Roy mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL, 4662b24ab6bSSebastien Roy MUTEX_DEFAULT, NULL); 4672b24ab6bSSebastien Roy } 4682b24ab6bSSebastien Roy 469f4b3ec61Sdh ipst->ips_ipcl_raw_fanout = kmem_zalloc( 470f4b3ec61Sdh ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 471f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 472f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 4737c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4747c478bd9Sstevel@tonic-gate } 4757c478bd9Sstevel@tonic-gate 476f4b3ec61Sdh ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 477f4b3ec61Sdh sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 4787c478bd9Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 479f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 480f4b3ec61Sdh NULL, MUTEX_DEFAULT, NULL); 4817c478bd9Sstevel@tonic-gate } 4827c478bd9Sstevel@tonic-gate } 4837c478bd9Sstevel@tonic-gate 4847c478bd9Sstevel@tonic-gate void 485f4b3ec61Sdh ipcl_g_destroy(void) 4867c478bd9Sstevel@tonic-gate { 487fc80c0dfSnordmark kmem_cache_destroy(ip_conn_cache); 488fc80c0dfSnordmark kmem_cache_destroy(tcp_conn_cache); 489fc80c0dfSnordmark kmem_cache_destroy(udp_conn_cache); 490fc80c0dfSnordmark kmem_cache_destroy(rawip_conn_cache); 491fc80c0dfSnordmark kmem_cache_destroy(rts_conn_cache); 492f4b3ec61Sdh } 493f4b3ec61Sdh 494f4b3ec61Sdh /* 495f4b3ec61Sdh * All user-level and kernel use of the stack must be gone 496f4b3ec61Sdh * by now. 497f4b3ec61Sdh */ 498f4b3ec61Sdh void 499f4b3ec61Sdh ipcl_destroy(ip_stack_t *ipst) 500f4b3ec61Sdh { 501f4b3ec61Sdh int i; 502f4b3ec61Sdh 503f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 504f4b3ec61Sdh ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 505f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 506f4b3ec61Sdh } 507f4b3ec61Sdh kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 508f4b3ec61Sdh sizeof (connf_t)); 509f4b3ec61Sdh ipst->ips_ipcl_conn_fanout = NULL; 510f4b3ec61Sdh 511f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 512f4b3ec61Sdh ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 513f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 514f4b3ec61Sdh } 515f4b3ec61Sdh kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 516f4b3ec61Sdh sizeof (connf_t)); 517f4b3ec61Sdh ipst->ips_ipcl_bind_fanout = NULL; 518f4b3ec61Sdh 519f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 520*bd670b35SErik Nordmark ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL); 521*bd670b35SErik Nordmark mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock); 522f4b3ec61Sdh } 523*bd670b35SErik Nordmark kmem_free(ipst->ips_ipcl_proto_fanout_v4, 524*bd670b35SErik Nordmark IPPROTO_MAX * sizeof (connf_t)); 525*bd670b35SErik Nordmark ipst->ips_ipcl_proto_fanout_v4 = NULL; 526f4b3ec61Sdh 527f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 528f4b3ec61Sdh ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 529f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 530f4b3ec61Sdh } 531f4b3ec61Sdh kmem_free(ipst->ips_ipcl_proto_fanout_v6, 532f4b3ec61Sdh IPPROTO_MAX * sizeof (connf_t)); 533f4b3ec61Sdh ipst->ips_ipcl_proto_fanout_v6 = NULL; 534f4b3ec61Sdh 535f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 536f4b3ec61Sdh ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 537f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 538f4b3ec61Sdh } 539f4b3ec61Sdh kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 540f4b3ec61Sdh sizeof (connf_t)); 541f4b3ec61Sdh ipst->ips_ipcl_udp_fanout = NULL; 542f4b3ec61Sdh 5432b24ab6bSSebastien Roy for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) { 5442b24ab6bSSebastien Roy ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL); 5452b24ab6bSSebastien Roy mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock); 5462b24ab6bSSebastien Roy } 5472b24ab6bSSebastien Roy kmem_free(ipst->ips_ipcl_iptun_fanout, 5482b24ab6bSSebastien Roy ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t)); 5492b24ab6bSSebastien Roy ipst->ips_ipcl_iptun_fanout = NULL; 5502b24ab6bSSebastien Roy 551f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 552f4b3ec61Sdh ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 553f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 554f4b3ec61Sdh } 555f4b3ec61Sdh kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 556f4b3ec61Sdh sizeof (connf_t)); 557f4b3ec61Sdh ipst->ips_ipcl_raw_fanout = NULL; 558f4b3ec61Sdh 559f4b3ec61Sdh for (i = 0; i < CONN_G_HASH_SIZE; i++) { 560f4b3ec61Sdh ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 561f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 562f4b3ec61Sdh } 563f4b3ec61Sdh kmem_free(ipst->ips_ipcl_globalhash_fanout, 564f4b3ec61Sdh sizeof (connf_t) * CONN_G_HASH_SIZE); 565f4b3ec61Sdh ipst->ips_ipcl_globalhash_fanout = NULL; 566f4b3ec61Sdh 567f4b3ec61Sdh ASSERT(ipst->ips_rts_clients->connf_head == NULL); 568f4b3ec61Sdh mutex_destroy(&ipst->ips_rts_clients->connf_lock); 569f4b3ec61Sdh kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 570f4b3ec61Sdh ipst->ips_rts_clients = NULL; 5717c478bd9Sstevel@tonic-gate } 5727c478bd9Sstevel@tonic-gate 5737c478bd9Sstevel@tonic-gate /* 5747c478bd9Sstevel@tonic-gate * conn creation routine. initialize the conn, sets the reference 5757c478bd9Sstevel@tonic-gate * and inserts it in the global hash table. 5767c478bd9Sstevel@tonic-gate */ 5777c478bd9Sstevel@tonic-gate conn_t * 578f4b3ec61Sdh ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 5797c478bd9Sstevel@tonic-gate { 5807c478bd9Sstevel@tonic-gate conn_t *connp; 581fc80c0dfSnordmark struct kmem_cache *conn_cache; 5827c478bd9Sstevel@tonic-gate 5837c478bd9Sstevel@tonic-gate switch (type) { 5847c478bd9Sstevel@tonic-gate case IPCL_SCTPCONN: 5857c478bd9Sstevel@tonic-gate if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 5867c478bd9Sstevel@tonic-gate return (NULL); 587121e5416Skcpoon sctp_conn_init(connp); 588f4b3ec61Sdh netstack_hold(ns); 589f4b3ec61Sdh connp->conn_netstack = ns; 590*bd670b35SErik Nordmark connp->conn_ixa->ixa_ipst = ns->netstack_ip; 591*bd670b35SErik Nordmark ipcl_globalhash_insert(connp); 592fc80c0dfSnordmark return (connp); 593fc80c0dfSnordmark 594fc80c0dfSnordmark case IPCL_TCPCONN: 595fc80c0dfSnordmark conn_cache = tcp_conn_cache; 5967c478bd9Sstevel@tonic-gate break; 597fc80c0dfSnordmark 598fc80c0dfSnordmark case IPCL_UDPCONN: 599fc80c0dfSnordmark conn_cache = udp_conn_cache; 600fc80c0dfSnordmark break; 601fc80c0dfSnordmark 602fc80c0dfSnordmark case IPCL_RAWIPCONN: 603fc80c0dfSnordmark conn_cache = rawip_conn_cache; 604fc80c0dfSnordmark break; 605fc80c0dfSnordmark 606fc80c0dfSnordmark case IPCL_RTSCONN: 607fc80c0dfSnordmark conn_cache = rts_conn_cache; 608fc80c0dfSnordmark break; 609fc80c0dfSnordmark 6107c478bd9Sstevel@tonic-gate case IPCL_IPCCONN: 611fc80c0dfSnordmark conn_cache = ip_conn_cache; 6127c478bd9Sstevel@tonic-gate break; 613fc80c0dfSnordmark 614ff550d0eSmasputra default: 615ff550d0eSmasputra connp = NULL; 616ff550d0eSmasputra ASSERT(0); 6177c478bd9Sstevel@tonic-gate } 6187c478bd9Sstevel@tonic-gate 619fc80c0dfSnordmark if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL) 620fc80c0dfSnordmark return (NULL); 621fc80c0dfSnordmark 622fc80c0dfSnordmark connp->conn_ref = 1; 623fc80c0dfSnordmark netstack_hold(ns); 624fc80c0dfSnordmark connp->conn_netstack = ns; 625*bd670b35SErik Nordmark connp->conn_ixa->ixa_ipst = ns->netstack_ip; 626fc80c0dfSnordmark ipcl_globalhash_insert(connp); 6277c478bd9Sstevel@tonic-gate return (connp); 6287c478bd9Sstevel@tonic-gate } 6297c478bd9Sstevel@tonic-gate 6307c478bd9Sstevel@tonic-gate void 6317c478bd9Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp) 6327c478bd9Sstevel@tonic-gate { 6337c478bd9Sstevel@tonic-gate mblk_t *mp; 634f4b3ec61Sdh netstack_t *ns = connp->conn_netstack; 6357c478bd9Sstevel@tonic-gate 6367c478bd9Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&connp->conn_lock)); 6377c478bd9Sstevel@tonic-gate ASSERT(connp->conn_ref == 0); 6387c478bd9Sstevel@tonic-gate 639fab254e2SAruna Ramakrishna DTRACE_PROBE1(conn__destroy, conn_t *, connp); 640fab254e2SAruna Ramakrishna 64145916cd2Sjpk if (connp->conn_cred != NULL) { 64245916cd2Sjpk crfree(connp->conn_cred); 64345916cd2Sjpk connp->conn_cred = NULL; 64445916cd2Sjpk } 64545916cd2Sjpk 646*bd670b35SErik Nordmark if (connp->conn_ht_iphc != NULL) { 647*bd670b35SErik Nordmark kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated); 648*bd670b35SErik Nordmark connp->conn_ht_iphc = NULL; 649*bd670b35SErik Nordmark connp->conn_ht_iphc_allocated = 0; 650*bd670b35SErik Nordmark connp->conn_ht_iphc_len = 0; 651*bd670b35SErik Nordmark connp->conn_ht_ulp = NULL; 652*bd670b35SErik Nordmark connp->conn_ht_ulp_len = 0; 653*bd670b35SErik Nordmark } 654*bd670b35SErik Nordmark ip_pkt_free(&connp->conn_xmit_ipp); 655*bd670b35SErik Nordmark 6567c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(connp); 6577c478bd9Sstevel@tonic-gate 658*bd670b35SErik Nordmark if (connp->conn_latch != NULL) { 659*bd670b35SErik Nordmark IPLATCH_REFRELE(connp->conn_latch); 660*bd670b35SErik Nordmark connp->conn_latch = NULL; 661*bd670b35SErik Nordmark } 662*bd670b35SErik Nordmark if (connp->conn_latch_in_policy != NULL) { 663*bd670b35SErik Nordmark IPPOL_REFRELE(connp->conn_latch_in_policy); 664*bd670b35SErik Nordmark connp->conn_latch_in_policy = NULL; 665*bd670b35SErik Nordmark } 666*bd670b35SErik Nordmark if (connp->conn_latch_in_action != NULL) { 667*bd670b35SErik Nordmark IPACT_REFRELE(connp->conn_latch_in_action); 668*bd670b35SErik Nordmark connp->conn_latch_in_action = NULL; 669*bd670b35SErik Nordmark } 670*bd670b35SErik Nordmark if (connp->conn_policy != NULL) { 671*bd670b35SErik Nordmark IPPH_REFRELE(connp->conn_policy, ns); 672*bd670b35SErik Nordmark connp->conn_policy = NULL; 673*bd670b35SErik Nordmark } 674*bd670b35SErik Nordmark 675*bd670b35SErik Nordmark if (connp->conn_ipsec_opt_mp != NULL) { 676*bd670b35SErik Nordmark freemsg(connp->conn_ipsec_opt_mp); 677*bd670b35SErik Nordmark connp->conn_ipsec_opt_mp = NULL; 678*bd670b35SErik Nordmark } 679*bd670b35SErik Nordmark 6807c478bd9Sstevel@tonic-gate if (connp->conn_flags & IPCL_TCPCONN) { 681*bd670b35SErik Nordmark tcp_t *tcp = connp->conn_tcp; 682ff550d0eSmasputra 6837c478bd9Sstevel@tonic-gate tcp_free(tcp); 6847c478bd9Sstevel@tonic-gate mp = tcp->tcp_timercache; 685*bd670b35SErik Nordmark 686*bd670b35SErik Nordmark tcp->tcp_tcps = NULL; 6877c478bd9Sstevel@tonic-gate 6887c478bd9Sstevel@tonic-gate if (tcp->tcp_sack_info != NULL) { 6897c478bd9Sstevel@tonic-gate bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 6907c478bd9Sstevel@tonic-gate kmem_cache_free(tcp_sack_info_cache, 6917c478bd9Sstevel@tonic-gate tcp->tcp_sack_info); 6927c478bd9Sstevel@tonic-gate } 6937c478bd9Sstevel@tonic-gate 694f7f8e53dSKacheong Poon /* 695f7f8e53dSKacheong Poon * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate 696f7f8e53dSKacheong Poon * the mblk. 697f7f8e53dSKacheong Poon */ 698f7f8e53dSKacheong Poon if (tcp->tcp_rsrv_mp != NULL) { 699f7f8e53dSKacheong Poon freeb(tcp->tcp_rsrv_mp); 700f7f8e53dSKacheong Poon tcp->tcp_rsrv_mp = NULL; 701f7f8e53dSKacheong Poon mutex_destroy(&tcp->tcp_rsrv_mp_lock); 702f7f8e53dSKacheong Poon } 703f7f8e53dSKacheong Poon 704*bd670b35SErik Nordmark ipcl_conn_cleanup(connp); 705*bd670b35SErik Nordmark connp->conn_flags = IPCL_TCPCONN; 706f4b3ec61Sdh if (ns != NULL) { 707f4b3ec61Sdh ASSERT(tcp->tcp_tcps == NULL); 708f4b3ec61Sdh connp->conn_netstack = NULL; 709*bd670b35SErik Nordmark connp->conn_ixa->ixa_ipst = NULL; 710f4b3ec61Sdh netstack_rele(ns); 711f4b3ec61Sdh } 712fc80c0dfSnordmark 713fc80c0dfSnordmark bzero(tcp, sizeof (tcp_t)); 714fc80c0dfSnordmark 715fc80c0dfSnordmark tcp->tcp_timercache = mp; 716fc80c0dfSnordmark tcp->tcp_connp = connp; 717fc80c0dfSnordmark kmem_cache_free(tcp_conn_cache, connp); 718fc80c0dfSnordmark return; 719fc80c0dfSnordmark } 720fc80c0dfSnordmark 721fc80c0dfSnordmark if (connp->conn_flags & IPCL_SCTPCONN) { 722f4b3ec61Sdh ASSERT(ns != NULL); 7237c478bd9Sstevel@tonic-gate sctp_free(connp); 724fc80c0dfSnordmark return; 725fc80c0dfSnordmark } 726fc80c0dfSnordmark 727*bd670b35SErik Nordmark ipcl_conn_cleanup(connp); 728fc80c0dfSnordmark if (ns != NULL) { 729fc80c0dfSnordmark connp->conn_netstack = NULL; 730*bd670b35SErik Nordmark connp->conn_ixa->ixa_ipst = NULL; 731fc80c0dfSnordmark netstack_rele(ns); 732fc80c0dfSnordmark } 7330f1702c5SYu Xiangning 734fc80c0dfSnordmark /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */ 735fc80c0dfSnordmark if (connp->conn_flags & IPCL_UDPCONN) { 736fc80c0dfSnordmark connp->conn_flags = IPCL_UDPCONN; 737fc80c0dfSnordmark kmem_cache_free(udp_conn_cache, connp); 738fc80c0dfSnordmark } else if (connp->conn_flags & IPCL_RAWIPCONN) { 739fc80c0dfSnordmark connp->conn_flags = IPCL_RAWIPCONN; 740*bd670b35SErik Nordmark connp->conn_proto = IPPROTO_ICMP; 741*bd670b35SErik Nordmark connp->conn_ixa->ixa_protocol = connp->conn_proto; 742fc80c0dfSnordmark kmem_cache_free(rawip_conn_cache, connp); 743fc80c0dfSnordmark } else if (connp->conn_flags & IPCL_RTSCONN) { 744fc80c0dfSnordmark connp->conn_flags = IPCL_RTSCONN; 745fc80c0dfSnordmark kmem_cache_free(rts_conn_cache, connp); 7467c478bd9Sstevel@tonic-gate } else { 747fc80c0dfSnordmark connp->conn_flags = IPCL_IPCCONN; 748fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 749fc80c0dfSnordmark ASSERT(connp->conn_priv == NULL); 750fc80c0dfSnordmark kmem_cache_free(ip_conn_cache, connp); 7517c478bd9Sstevel@tonic-gate } 7527c478bd9Sstevel@tonic-gate } 7537c478bd9Sstevel@tonic-gate 7547c478bd9Sstevel@tonic-gate /* 7557c478bd9Sstevel@tonic-gate * Running in cluster mode - deregister listener information 7567c478bd9Sstevel@tonic-gate */ 7577c478bd9Sstevel@tonic-gate static void 7587c478bd9Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp) 7597c478bd9Sstevel@tonic-gate { 7607c478bd9Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 7617c478bd9Sstevel@tonic-gate ASSERT(connp->conn_lport != 0); 7627c478bd9Sstevel@tonic-gate 7637c478bd9Sstevel@tonic-gate if (cl_inet_unlisten != NULL) { 7647c478bd9Sstevel@tonic-gate sa_family_t addr_family; 7657c478bd9Sstevel@tonic-gate uint8_t *laddrp; 7667c478bd9Sstevel@tonic-gate 767*bd670b35SErik Nordmark if (connp->conn_ipversion == IPV6_VERSION) { 7687c478bd9Sstevel@tonic-gate addr_family = AF_INET6; 769*bd670b35SErik Nordmark laddrp = (uint8_t *)&connp->conn_bound_addr_v6; 7707c478bd9Sstevel@tonic-gate } else { 7717c478bd9Sstevel@tonic-gate addr_family = AF_INET; 772*bd670b35SErik Nordmark laddrp = (uint8_t *)&connp->conn_bound_addr_v4; 7737c478bd9Sstevel@tonic-gate } 7748e4b770fSLu Huafeng (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid, 7758e4b770fSLu Huafeng IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL); 7767c478bd9Sstevel@tonic-gate } 7777c478bd9Sstevel@tonic-gate connp->conn_flags &= ~IPCL_CL_LISTENER; 7787c478bd9Sstevel@tonic-gate } 7797c478bd9Sstevel@tonic-gate 7807c478bd9Sstevel@tonic-gate /* 7817c478bd9Sstevel@tonic-gate * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 7827c478bd9Sstevel@tonic-gate * which table the conn belonged to). So for debugging we can see which hash 7837c478bd9Sstevel@tonic-gate * table this connection was in. 7847c478bd9Sstevel@tonic-gate */ 7857c478bd9Sstevel@tonic-gate #define IPCL_HASH_REMOVE(connp) { \ 7867c478bd9Sstevel@tonic-gate connf_t *connfp = (connp)->conn_fanout; \ 7877c478bd9Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 7887c478bd9Sstevel@tonic-gate if (connfp != NULL) { \ 7897c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); \ 7907c478bd9Sstevel@tonic-gate if ((connp)->conn_next != NULL) \ 7917c478bd9Sstevel@tonic-gate (connp)->conn_next->conn_prev = \ 7927c478bd9Sstevel@tonic-gate (connp)->conn_prev; \ 7937c478bd9Sstevel@tonic-gate if ((connp)->conn_prev != NULL) \ 7947c478bd9Sstevel@tonic-gate (connp)->conn_prev->conn_next = \ 7957c478bd9Sstevel@tonic-gate (connp)->conn_next; \ 7967c478bd9Sstevel@tonic-gate else \ 7977c478bd9Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; \ 7987c478bd9Sstevel@tonic-gate (connp)->conn_fanout = NULL; \ 7997c478bd9Sstevel@tonic-gate (connp)->conn_next = NULL; \ 8007c478bd9Sstevel@tonic-gate (connp)->conn_prev = NULL; \ 8017c478bd9Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; \ 8027c478bd9Sstevel@tonic-gate if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 8037c478bd9Sstevel@tonic-gate ipcl_conn_unlisten((connp)); \ 8047c478bd9Sstevel@tonic-gate CONN_DEC_REF((connp)); \ 8057c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); \ 8067c478bd9Sstevel@tonic-gate } \ 8077c478bd9Sstevel@tonic-gate } 8087c478bd9Sstevel@tonic-gate 8097c478bd9Sstevel@tonic-gate void 8107c478bd9Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp) 8117c478bd9Sstevel@tonic-gate { 812*bd670b35SErik Nordmark uint8_t protocol = connp->conn_proto; 813*bd670b35SErik Nordmark 8147c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 815*bd670b35SErik Nordmark if (protocol == IPPROTO_RSVP) 816*bd670b35SErik Nordmark ill_set_inputfn_all(connp->conn_netstack->netstack_ip); 8177c478bd9Sstevel@tonic-gate } 8187c478bd9Sstevel@tonic-gate 8197c478bd9Sstevel@tonic-gate /* 8207c478bd9Sstevel@tonic-gate * The whole purpose of this function is allow removal of 8217c478bd9Sstevel@tonic-gate * a conn_t from the connected hash for timewait reclaim. 8227c478bd9Sstevel@tonic-gate * This is essentially a TW reclaim fastpath where timewait 8237c478bd9Sstevel@tonic-gate * collector checks under fanout lock (so no one else can 8247c478bd9Sstevel@tonic-gate * get access to the conn_t) that refcnt is 2 i.e. one for 8257c478bd9Sstevel@tonic-gate * TCP and one for the classifier hash list. If ref count 8267c478bd9Sstevel@tonic-gate * is indeed 2, we can just remove the conn under lock and 8277c478bd9Sstevel@tonic-gate * avoid cleaning up the conn under squeue. This gives us 8287c478bd9Sstevel@tonic-gate * improved performance. 8297c478bd9Sstevel@tonic-gate */ 8307c478bd9Sstevel@tonic-gate void 8317c478bd9Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 8327c478bd9Sstevel@tonic-gate { 8337c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connfp->connf_lock)); 8347c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 8357c478bd9Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 8367c478bd9Sstevel@tonic-gate 8377c478bd9Sstevel@tonic-gate if ((connp)->conn_next != NULL) { 838121e5416Skcpoon (connp)->conn_next->conn_prev = (connp)->conn_prev; 8397c478bd9Sstevel@tonic-gate } 8407c478bd9Sstevel@tonic-gate if ((connp)->conn_prev != NULL) { 841121e5416Skcpoon (connp)->conn_prev->conn_next = (connp)->conn_next; 8427c478bd9Sstevel@tonic-gate } else { 8437c478bd9Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; 8447c478bd9Sstevel@tonic-gate } 8457c478bd9Sstevel@tonic-gate (connp)->conn_fanout = NULL; 8467c478bd9Sstevel@tonic-gate (connp)->conn_next = NULL; 8477c478bd9Sstevel@tonic-gate (connp)->conn_prev = NULL; 8487c478bd9Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; 8497c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_ref == 2); 8507c478bd9Sstevel@tonic-gate (connp)->conn_ref--; 8517c478bd9Sstevel@tonic-gate } 8527c478bd9Sstevel@tonic-gate 8537c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 8547c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_fanout == NULL); \ 8557c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_next == NULL); \ 8567c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_prev == NULL); \ 8577c478bd9Sstevel@tonic-gate if ((connfp)->connf_head != NULL) { \ 8587c478bd9Sstevel@tonic-gate (connfp)->connf_head->conn_prev = (connp); \ 8597c478bd9Sstevel@tonic-gate (connp)->conn_next = (connfp)->connf_head; \ 8607c478bd9Sstevel@tonic-gate } \ 8617c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 8627c478bd9Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 8637c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 8647c478bd9Sstevel@tonic-gate IPCL_CONNECTED; \ 8657c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); \ 8667c478bd9Sstevel@tonic-gate } 8677c478bd9Sstevel@tonic-gate 8687c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 8697c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 8707c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 8717c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 8727c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 8737c478bd9Sstevel@tonic-gate } 8747c478bd9Sstevel@tonic-gate 8757c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 8767c478bd9Sstevel@tonic-gate conn_t *pconnp = NULL, *nconnp; \ 8777c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 8787c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 8797c478bd9Sstevel@tonic-gate nconnp = (connfp)->connf_head; \ 8803d1c78fbSethindra while (nconnp != NULL && \ 881*bd670b35SErik Nordmark !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \ 8823d1c78fbSethindra pconnp = nconnp; \ 8833d1c78fbSethindra nconnp = nconnp->conn_next; \ 8847c478bd9Sstevel@tonic-gate } \ 8857c478bd9Sstevel@tonic-gate if (pconnp != NULL) { \ 8867c478bd9Sstevel@tonic-gate pconnp->conn_next = (connp); \ 8877c478bd9Sstevel@tonic-gate (connp)->conn_prev = pconnp; \ 8887c478bd9Sstevel@tonic-gate } else { \ 8897c478bd9Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 8907c478bd9Sstevel@tonic-gate } \ 8917c478bd9Sstevel@tonic-gate if (nconnp != NULL) { \ 8927c478bd9Sstevel@tonic-gate (connp)->conn_next = nconnp; \ 8937c478bd9Sstevel@tonic-gate nconnp->conn_prev = (connp); \ 8947c478bd9Sstevel@tonic-gate } \ 8957c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 8967c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 8977c478bd9Sstevel@tonic-gate IPCL_BOUND; \ 8987c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); \ 8997c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9007c478bd9Sstevel@tonic-gate } 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 9037c478bd9Sstevel@tonic-gate conn_t **list, *prev, *next; \ 9047c478bd9Sstevel@tonic-gate boolean_t isv4mapped = \ 905*bd670b35SErik Nordmark IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \ 9067c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 9077c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 9087c478bd9Sstevel@tonic-gate list = &(connfp)->connf_head; \ 9097c478bd9Sstevel@tonic-gate prev = NULL; \ 9107c478bd9Sstevel@tonic-gate while ((next = *list) != NULL) { \ 9117c478bd9Sstevel@tonic-gate if (isv4mapped && \ 912*bd670b35SErik Nordmark IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \ 9137c478bd9Sstevel@tonic-gate connp->conn_zoneid == next->conn_zoneid) { \ 9147c478bd9Sstevel@tonic-gate (connp)->conn_next = next; \ 9157c478bd9Sstevel@tonic-gate if (prev != NULL) \ 9167c478bd9Sstevel@tonic-gate prev = next->conn_prev; \ 9177c478bd9Sstevel@tonic-gate next->conn_prev = (connp); \ 9187c478bd9Sstevel@tonic-gate break; \ 9197c478bd9Sstevel@tonic-gate } \ 9207c478bd9Sstevel@tonic-gate list = &next->conn_next; \ 9217c478bd9Sstevel@tonic-gate prev = next; \ 9227c478bd9Sstevel@tonic-gate } \ 9237c478bd9Sstevel@tonic-gate (connp)->conn_prev = prev; \ 9247c478bd9Sstevel@tonic-gate *list = (connp); \ 9257c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 9267c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 9277c478bd9Sstevel@tonic-gate IPCL_BOUND; \ 9287c478bd9Sstevel@tonic-gate CONN_INC_REF((connp)); \ 9297c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9307c478bd9Sstevel@tonic-gate } 9317c478bd9Sstevel@tonic-gate 9327c478bd9Sstevel@tonic-gate void 9337c478bd9Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 9347c478bd9Sstevel@tonic-gate { 9357c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9367c478bd9Sstevel@tonic-gate } 9377c478bd9Sstevel@tonic-gate 9382b24ab6bSSebastien Roy /* 9392b24ab6bSSebastien Roy * Because the classifier is used to classify inbound packets, the destination 9402b24ab6bSSebastien Roy * address is meant to be our local tunnel address (tunnel source), and the 9412b24ab6bSSebastien Roy * source the remote tunnel address (tunnel destination). 942*bd670b35SErik Nordmark * 943*bd670b35SErik Nordmark * Note that conn_proto can't be used for fanout since the upper protocol 944*bd670b35SErik Nordmark * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel. 9452b24ab6bSSebastien Roy */ 9462b24ab6bSSebastien Roy conn_t * 9472b24ab6bSSebastien Roy ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst) 9482b24ab6bSSebastien Roy { 9492b24ab6bSSebastien Roy connf_t *connfp; 9502b24ab6bSSebastien Roy conn_t *connp; 9512b24ab6bSSebastien Roy 9522b24ab6bSSebastien Roy /* first look for IPv4 tunnel links */ 9532b24ab6bSSebastien Roy connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)]; 9542b24ab6bSSebastien Roy mutex_enter(&connfp->connf_lock); 9552b24ab6bSSebastien Roy for (connp = connfp->connf_head; connp != NULL; 9562b24ab6bSSebastien Roy connp = connp->conn_next) { 9572b24ab6bSSebastien Roy if (IPCL_IPTUN_MATCH(connp, *dst, *src)) 9582b24ab6bSSebastien Roy break; 9592b24ab6bSSebastien Roy } 9602b24ab6bSSebastien Roy if (connp != NULL) 9612b24ab6bSSebastien Roy goto done; 9622b24ab6bSSebastien Roy 9632b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock); 9642b24ab6bSSebastien Roy 9652b24ab6bSSebastien Roy /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */ 9662b24ab6bSSebastien Roy connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, 9672b24ab6bSSebastien Roy INADDR_ANY)]; 9682b24ab6bSSebastien Roy mutex_enter(&connfp->connf_lock); 9692b24ab6bSSebastien Roy for (connp = connfp->connf_head; connp != NULL; 9702b24ab6bSSebastien Roy connp = connp->conn_next) { 9712b24ab6bSSebastien Roy if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY)) 9722b24ab6bSSebastien Roy break; 9732b24ab6bSSebastien Roy } 9742b24ab6bSSebastien Roy done: 9752b24ab6bSSebastien Roy if (connp != NULL) 9762b24ab6bSSebastien Roy CONN_INC_REF(connp); 9772b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock); 9782b24ab6bSSebastien Roy return (connp); 9792b24ab6bSSebastien Roy } 9802b24ab6bSSebastien Roy 9812b24ab6bSSebastien Roy conn_t * 9822b24ab6bSSebastien Roy ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst) 9832b24ab6bSSebastien Roy { 9842b24ab6bSSebastien Roy connf_t *connfp; 9852b24ab6bSSebastien Roy conn_t *connp; 9862b24ab6bSSebastien Roy 9872b24ab6bSSebastien Roy /* Look for an IPv6 tunnel link */ 9882b24ab6bSSebastien Roy connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)]; 9892b24ab6bSSebastien Roy mutex_enter(&connfp->connf_lock); 9902b24ab6bSSebastien Roy for (connp = connfp->connf_head; connp != NULL; 9912b24ab6bSSebastien Roy connp = connp->conn_next) { 9922b24ab6bSSebastien Roy if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) { 9932b24ab6bSSebastien Roy CONN_INC_REF(connp); 9942b24ab6bSSebastien Roy break; 9952b24ab6bSSebastien Roy } 9962b24ab6bSSebastien Roy } 9972b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock); 9982b24ab6bSSebastien Roy return (connp); 9992b24ab6bSSebastien Roy } 10002b24ab6bSSebastien Roy 10017c478bd9Sstevel@tonic-gate /* 10027c478bd9Sstevel@tonic-gate * This function is used only for inserting SCTP raw socket now. 10037c478bd9Sstevel@tonic-gate * This may change later. 10047c478bd9Sstevel@tonic-gate * 10057c478bd9Sstevel@tonic-gate * Note that only one raw socket can be bound to a port. The param 10067c478bd9Sstevel@tonic-gate * lport is in network byte order. 10077c478bd9Sstevel@tonic-gate */ 10087c478bd9Sstevel@tonic-gate static int 10097c478bd9Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 10107c478bd9Sstevel@tonic-gate { 10117c478bd9Sstevel@tonic-gate connf_t *connfp; 10127c478bd9Sstevel@tonic-gate conn_t *oconnp; 1013f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 10147c478bd9Sstevel@tonic-gate 1015f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 10167c478bd9Sstevel@tonic-gate 10177c478bd9Sstevel@tonic-gate /* Check for existing raw socket already bound to the port. */ 10187c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 10197c478bd9Sstevel@tonic-gate for (oconnp = connfp->connf_head; oconnp != NULL; 10207c0c0508Skcpoon oconnp = oconnp->conn_next) { 10217c478bd9Sstevel@tonic-gate if (oconnp->conn_lport == lport && 10227c478bd9Sstevel@tonic-gate oconnp->conn_zoneid == connp->conn_zoneid && 1023*bd670b35SErik Nordmark oconnp->conn_family == connp->conn_family && 1024*bd670b35SErik Nordmark ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) || 1025*bd670b35SErik Nordmark IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) || 1026*bd670b35SErik Nordmark IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) || 1027*bd670b35SErik Nordmark IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) || 1028*bd670b35SErik Nordmark IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6, 1029*bd670b35SErik Nordmark &connp->conn_laddr_v6))) { 10307c478bd9Sstevel@tonic-gate break; 10317c478bd9Sstevel@tonic-gate } 10327c478bd9Sstevel@tonic-gate } 10337c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 10347c478bd9Sstevel@tonic-gate if (oconnp != NULL) 10357c478bd9Sstevel@tonic-gate return (EADDRNOTAVAIL); 10367c478bd9Sstevel@tonic-gate 1037*bd670b35SErik Nordmark if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) || 1038*bd670b35SErik Nordmark IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 1039*bd670b35SErik Nordmark if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) || 1040*bd670b35SErik Nordmark IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) { 10417c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10427c478bd9Sstevel@tonic-gate } else { 10437c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 10447c478bd9Sstevel@tonic-gate } 10457c478bd9Sstevel@tonic-gate } else { 10467c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 10477c478bd9Sstevel@tonic-gate } 10487c478bd9Sstevel@tonic-gate return (0); 10497c478bd9Sstevel@tonic-gate } 10507c478bd9Sstevel@tonic-gate 10512b24ab6bSSebastien Roy static int 1052*bd670b35SErik Nordmark ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst) 10532b24ab6bSSebastien Roy { 10542b24ab6bSSebastien Roy connf_t *connfp; 10552b24ab6bSSebastien Roy conn_t *tconnp; 1056*bd670b35SErik Nordmark ipaddr_t laddr = connp->conn_laddr_v4; 1057*bd670b35SErik Nordmark ipaddr_t faddr = connp->conn_faddr_v4; 10582b24ab6bSSebastien Roy 1059*bd670b35SErik Nordmark connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)]; 10602b24ab6bSSebastien Roy mutex_enter(&connfp->connf_lock); 10612b24ab6bSSebastien Roy for (tconnp = connfp->connf_head; tconnp != NULL; 10622b24ab6bSSebastien Roy tconnp = tconnp->conn_next) { 1063*bd670b35SErik Nordmark if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) { 10642b24ab6bSSebastien Roy /* A tunnel is already bound to these addresses. */ 10652b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock); 10662b24ab6bSSebastien Roy return (EADDRINUSE); 10672b24ab6bSSebastien Roy } 10682b24ab6bSSebastien Roy } 10692b24ab6bSSebastien Roy IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 10702b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock); 10712b24ab6bSSebastien Roy return (0); 10722b24ab6bSSebastien Roy } 10732b24ab6bSSebastien Roy 10742b24ab6bSSebastien Roy static int 1075*bd670b35SErik Nordmark ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst) 10762b24ab6bSSebastien Roy { 10772b24ab6bSSebastien Roy connf_t *connfp; 10782b24ab6bSSebastien Roy conn_t *tconnp; 1079*bd670b35SErik Nordmark in6_addr_t *laddr = &connp->conn_laddr_v6; 1080*bd670b35SErik Nordmark in6_addr_t *faddr = &connp->conn_faddr_v6; 10812b24ab6bSSebastien Roy 1082*bd670b35SErik Nordmark connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)]; 10832b24ab6bSSebastien Roy mutex_enter(&connfp->connf_lock); 10842b24ab6bSSebastien Roy for (tconnp = connfp->connf_head; tconnp != NULL; 10852b24ab6bSSebastien Roy tconnp = tconnp->conn_next) { 1086*bd670b35SErik Nordmark if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) { 10872b24ab6bSSebastien Roy /* A tunnel is already bound to these addresses. */ 10882b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock); 10892b24ab6bSSebastien Roy return (EADDRINUSE); 10902b24ab6bSSebastien Roy } 10912b24ab6bSSebastien Roy } 10922b24ab6bSSebastien Roy IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 10932b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock); 10942b24ab6bSSebastien Roy return (0); 10952b24ab6bSSebastien Roy } 10962b24ab6bSSebastien Roy 109745916cd2Sjpk /* 109845916cd2Sjpk * Check for a MAC exemption conflict on a labeled system. Note that for 109945916cd2Sjpk * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 110045916cd2Sjpk * transport layer. This check is for binding all other protocols. 110145916cd2Sjpk * 110245916cd2Sjpk * Returns true if there's a conflict. 110345916cd2Sjpk */ 110445916cd2Sjpk static boolean_t 1105f4b3ec61Sdh check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 110645916cd2Sjpk { 110745916cd2Sjpk connf_t *connfp; 110845916cd2Sjpk conn_t *tconn; 110945916cd2Sjpk 1110*bd670b35SErik Nordmark connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto]; 111145916cd2Sjpk mutex_enter(&connfp->connf_lock); 111245916cd2Sjpk for (tconn = connfp->connf_head; tconn != NULL; 111345916cd2Sjpk tconn = tconn->conn_next) { 111445916cd2Sjpk /* We don't allow v4 fallback for v6 raw socket */ 1115*bd670b35SErik Nordmark if (connp->conn_family != tconn->conn_family) 111645916cd2Sjpk continue; 111745916cd2Sjpk /* If neither is exempt, then there's no conflict */ 11185d3b8cb7SBill Sommerfeld if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) && 11195d3b8cb7SBill Sommerfeld (tconn->conn_mac_mode == CONN_MAC_DEFAULT)) 112045916cd2Sjpk continue; 11215f9878b0Sken Powell - Sun Microsystem /* We are only concerned about sockets for a different zone */ 11225f9878b0Sken Powell - Sun Microsystem if (connp->conn_zoneid == tconn->conn_zoneid) 11235f9878b0Sken Powell - Sun Microsystem continue; 112445916cd2Sjpk /* If both are bound to different specific addrs, ok */ 1125*bd670b35SErik Nordmark if (connp->conn_laddr_v4 != INADDR_ANY && 1126*bd670b35SErik Nordmark tconn->conn_laddr_v4 != INADDR_ANY && 1127*bd670b35SErik Nordmark connp->conn_laddr_v4 != tconn->conn_laddr_v4) 112845916cd2Sjpk continue; 112945916cd2Sjpk /* These two conflict; fail */ 113045916cd2Sjpk break; 113145916cd2Sjpk } 113245916cd2Sjpk mutex_exit(&connfp->connf_lock); 113345916cd2Sjpk return (tconn != NULL); 113445916cd2Sjpk } 113545916cd2Sjpk 113645916cd2Sjpk static boolean_t 1137f4b3ec61Sdh check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 113845916cd2Sjpk { 113945916cd2Sjpk connf_t *connfp; 114045916cd2Sjpk conn_t *tconn; 114145916cd2Sjpk 1142*bd670b35SErik Nordmark connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto]; 114345916cd2Sjpk mutex_enter(&connfp->connf_lock); 114445916cd2Sjpk for (tconn = connfp->connf_head; tconn != NULL; 114545916cd2Sjpk tconn = tconn->conn_next) { 114645916cd2Sjpk /* We don't allow v4 fallback for v6 raw socket */ 1147*bd670b35SErik Nordmark if (connp->conn_family != tconn->conn_family) 114845916cd2Sjpk continue; 114945916cd2Sjpk /* If neither is exempt, then there's no conflict */ 11505d3b8cb7SBill Sommerfeld if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) && 11515d3b8cb7SBill Sommerfeld (tconn->conn_mac_mode == CONN_MAC_DEFAULT)) 115245916cd2Sjpk continue; 11535f9878b0Sken Powell - Sun Microsystem /* We are only concerned about sockets for a different zone */ 11545f9878b0Sken Powell - Sun Microsystem if (connp->conn_zoneid == tconn->conn_zoneid) 11555f9878b0Sken Powell - Sun Microsystem continue; 115645916cd2Sjpk /* If both are bound to different addrs, ok */ 1157*bd670b35SErik Nordmark if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) && 1158*bd670b35SErik Nordmark !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) && 1159*bd670b35SErik Nordmark !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 1160*bd670b35SErik Nordmark &tconn->conn_laddr_v6)) 116145916cd2Sjpk continue; 116245916cd2Sjpk /* These two conflict; fail */ 116345916cd2Sjpk break; 116445916cd2Sjpk } 116545916cd2Sjpk mutex_exit(&connfp->connf_lock); 116645916cd2Sjpk return (tconn != NULL); 116745916cd2Sjpk } 116845916cd2Sjpk 11697c478bd9Sstevel@tonic-gate /* 11707c478bd9Sstevel@tonic-gate * (v4, v6) bind hash insertion routines 1171*bd670b35SErik Nordmark * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport) 11727c478bd9Sstevel@tonic-gate */ 1173*bd670b35SErik Nordmark 11747c478bd9Sstevel@tonic-gate int 1175*bd670b35SErik Nordmark ipcl_bind_insert(conn_t *connp) 1176*bd670b35SErik Nordmark { 1177*bd670b35SErik Nordmark if (connp->conn_ipversion == IPV6_VERSION) 1178*bd670b35SErik Nordmark return (ipcl_bind_insert_v6(connp)); 1179*bd670b35SErik Nordmark else 1180*bd670b35SErik Nordmark return (ipcl_bind_insert_v4(connp)); 1181*bd670b35SErik Nordmark } 1182*bd670b35SErik Nordmark 1183*bd670b35SErik Nordmark int 1184*bd670b35SErik Nordmark ipcl_bind_insert_v4(conn_t *connp) 11857c478bd9Sstevel@tonic-gate { 11867c478bd9Sstevel@tonic-gate connf_t *connfp; 11877c478bd9Sstevel@tonic-gate int ret = 0; 1188f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1189*bd670b35SErik Nordmark uint16_t lport = connp->conn_lport; 1190*bd670b35SErik Nordmark uint8_t protocol = connp->conn_proto; 11917c478bd9Sstevel@tonic-gate 11922b24ab6bSSebastien Roy if (IPCL_IS_IPTUN(connp)) 1193*bd670b35SErik Nordmark return (ipcl_iptun_hash_insert(connp, ipst)); 11942b24ab6bSSebastien Roy 11957c478bd9Sstevel@tonic-gate switch (protocol) { 11967c478bd9Sstevel@tonic-gate default: 1197f4b3ec61Sdh if (is_system_labeled() && 1198f4b3ec61Sdh check_exempt_conflict_v4(connp, ipst)) 119945916cd2Sjpk return (EADDRINUSE); 120045916cd2Sjpk /* FALLTHROUGH */ 120145916cd2Sjpk case IPPROTO_UDP: 12027c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 1203f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1204f4b3ec61Sdh IPCL_UDP_HASH(lport, ipst)]; 12057c478bd9Sstevel@tonic-gate } else { 1206*bd670b35SErik Nordmark connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol]; 12077c478bd9Sstevel@tonic-gate } 12087c478bd9Sstevel@tonic-gate 1209*bd670b35SErik Nordmark if (connp->conn_faddr_v4 != INADDR_ANY) { 12107c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1211*bd670b35SErik Nordmark } else if (connp->conn_laddr_v4 != INADDR_ANY) { 12127c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12137c478bd9Sstevel@tonic-gate } else { 12147c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12157c478bd9Sstevel@tonic-gate } 1216*bd670b35SErik Nordmark if (protocol == IPPROTO_RSVP) 1217*bd670b35SErik Nordmark ill_set_inputfn_all(ipst); 12187c478bd9Sstevel@tonic-gate break; 12197c478bd9Sstevel@tonic-gate 12207c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 12217c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */ 122245916cd2Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 1223f4b3ec61Sdh connfp = &ipst->ips_ipcl_bind_fanout[ 1224f4b3ec61Sdh IPCL_BIND_HASH(lport, ipst)]; 1225*bd670b35SErik Nordmark if (connp->conn_laddr_v4 != INADDR_ANY) { 12267c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12277c478bd9Sstevel@tonic-gate } else { 12287c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12297c478bd9Sstevel@tonic-gate } 12307c478bd9Sstevel@tonic-gate if (cl_inet_listen != NULL) { 1231*bd670b35SErik Nordmark ASSERT(connp->conn_ipversion == IPV4_VERSION); 12327c478bd9Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 12338e4b770fSLu Huafeng (*cl_inet_listen)( 12348e4b770fSLu Huafeng connp->conn_netstack->netstack_stackid, 12358e4b770fSLu Huafeng IPPROTO_TCP, AF_INET, 1236*bd670b35SErik Nordmark (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL); 12377c478bd9Sstevel@tonic-gate } 12387c478bd9Sstevel@tonic-gate break; 12397c478bd9Sstevel@tonic-gate 12407c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 12417c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 12427c478bd9Sstevel@tonic-gate break; 12437c478bd9Sstevel@tonic-gate } 12447c478bd9Sstevel@tonic-gate 12457c478bd9Sstevel@tonic-gate return (ret); 12467c478bd9Sstevel@tonic-gate } 12477c478bd9Sstevel@tonic-gate 12487c478bd9Sstevel@tonic-gate int 1249*bd670b35SErik Nordmark ipcl_bind_insert_v6(conn_t *connp) 12507c478bd9Sstevel@tonic-gate { 12512b24ab6bSSebastien Roy connf_t *connfp; 12522b24ab6bSSebastien Roy int ret = 0; 1253f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1254*bd670b35SErik Nordmark uint16_t lport = connp->conn_lport; 1255*bd670b35SErik Nordmark uint8_t protocol = connp->conn_proto; 12567c478bd9Sstevel@tonic-gate 12572b24ab6bSSebastien Roy if (IPCL_IS_IPTUN(connp)) { 1258*bd670b35SErik Nordmark return (ipcl_iptun_hash_insert_v6(connp, ipst)); 12592b24ab6bSSebastien Roy } 12602b24ab6bSSebastien Roy 12617c478bd9Sstevel@tonic-gate switch (protocol) { 12627c478bd9Sstevel@tonic-gate default: 1263f4b3ec61Sdh if (is_system_labeled() && 1264f4b3ec61Sdh check_exempt_conflict_v6(connp, ipst)) 126545916cd2Sjpk return (EADDRINUSE); 126645916cd2Sjpk /* FALLTHROUGH */ 126745916cd2Sjpk case IPPROTO_UDP: 12687c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 1269f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1270f4b3ec61Sdh IPCL_UDP_HASH(lport, ipst)]; 12717c478bd9Sstevel@tonic-gate } else { 1272f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 12737c478bd9Sstevel@tonic-gate } 12747c478bd9Sstevel@tonic-gate 1275*bd670b35SErik Nordmark if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) { 12767c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1277*bd670b35SErik Nordmark } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 12787c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12797c478bd9Sstevel@tonic-gate } else { 12807c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12817c478bd9Sstevel@tonic-gate } 12827c478bd9Sstevel@tonic-gate break; 12837c478bd9Sstevel@tonic-gate 12847c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 12857c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */ 128645916cd2Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 1287f4b3ec61Sdh connfp = &ipst->ips_ipcl_bind_fanout[ 1288f4b3ec61Sdh IPCL_BIND_HASH(lport, ipst)]; 1289*bd670b35SErik Nordmark if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 12907c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12917c478bd9Sstevel@tonic-gate } else { 12927c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12937c478bd9Sstevel@tonic-gate } 12947c478bd9Sstevel@tonic-gate if (cl_inet_listen != NULL) { 12957c478bd9Sstevel@tonic-gate sa_family_t addr_family; 12967c478bd9Sstevel@tonic-gate uint8_t *laddrp; 12977c478bd9Sstevel@tonic-gate 1298*bd670b35SErik Nordmark if (connp->conn_ipversion == IPV6_VERSION) { 12997c478bd9Sstevel@tonic-gate addr_family = AF_INET6; 13007c478bd9Sstevel@tonic-gate laddrp = 1301*bd670b35SErik Nordmark (uint8_t *)&connp->conn_bound_addr_v6; 13027c478bd9Sstevel@tonic-gate } else { 13037c478bd9Sstevel@tonic-gate addr_family = AF_INET; 1304*bd670b35SErik Nordmark laddrp = (uint8_t *)&connp->conn_bound_addr_v4; 13057c478bd9Sstevel@tonic-gate } 13067c478bd9Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 13078e4b770fSLu Huafeng (*cl_inet_listen)( 13088e4b770fSLu Huafeng connp->conn_netstack->netstack_stackid, 13098e4b770fSLu Huafeng IPPROTO_TCP, addr_family, laddrp, lport, NULL); 13107c478bd9Sstevel@tonic-gate } 13117c478bd9Sstevel@tonic-gate break; 13127c478bd9Sstevel@tonic-gate 13137c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 13147c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 13157c478bd9Sstevel@tonic-gate break; 13167c478bd9Sstevel@tonic-gate } 13177c478bd9Sstevel@tonic-gate 13187c478bd9Sstevel@tonic-gate return (ret); 13197c478bd9Sstevel@tonic-gate } 13207c478bd9Sstevel@tonic-gate 13217c478bd9Sstevel@tonic-gate /* 13227c478bd9Sstevel@tonic-gate * ipcl_conn_hash insertion routines. 1323*bd670b35SErik Nordmark * The caller has already set conn_proto and the addresses/ports in the conn_t. 13247c478bd9Sstevel@tonic-gate */ 1325*bd670b35SErik Nordmark 1326*bd670b35SErik Nordmark int 1327*bd670b35SErik Nordmark ipcl_conn_insert(conn_t *connp) 1328*bd670b35SErik Nordmark { 1329*bd670b35SErik Nordmark if (connp->conn_ipversion == IPV6_VERSION) 1330*bd670b35SErik Nordmark return (ipcl_conn_insert_v6(connp)); 1331*bd670b35SErik Nordmark else 1332*bd670b35SErik Nordmark return (ipcl_conn_insert_v4(connp)); 1333*bd670b35SErik Nordmark } 1334*bd670b35SErik Nordmark 13357c478bd9Sstevel@tonic-gate int 1336*bd670b35SErik Nordmark ipcl_conn_insert_v4(conn_t *connp) 13377c478bd9Sstevel@tonic-gate { 13387c478bd9Sstevel@tonic-gate connf_t *connfp; 13397c478bd9Sstevel@tonic-gate conn_t *tconnp; 13407c478bd9Sstevel@tonic-gate int ret = 0; 1341f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1342*bd670b35SErik Nordmark uint16_t lport = connp->conn_lport; 1343*bd670b35SErik Nordmark uint8_t protocol = connp->conn_proto; 13447c478bd9Sstevel@tonic-gate 13452b24ab6bSSebastien Roy if (IPCL_IS_IPTUN(connp)) 1346*bd670b35SErik Nordmark return (ipcl_iptun_hash_insert(connp, ipst)); 13472b24ab6bSSebastien Roy 13487c478bd9Sstevel@tonic-gate switch (protocol) { 13497c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 1350a12220b3SJon Anderson /* 1351*bd670b35SErik Nordmark * For TCP, we check whether the connection tuple already 1352a12220b3SJon Anderson * exists before allowing the connection to proceed. We 1353a12220b3SJon Anderson * also allow indexing on the zoneid. This is to allow 1354a12220b3SJon Anderson * multiple shared stack zones to have the same tcp 1355a12220b3SJon Anderson * connection tuple. In practice this only happens for 1356a12220b3SJon Anderson * INADDR_LOOPBACK as it's the only local address which 1357a12220b3SJon Anderson * doesn't have to be unique. 1358a12220b3SJon Anderson */ 1359f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[ 1360*bd670b35SErik Nordmark IPCL_CONN_HASH(connp->conn_faddr_v4, 1361f4b3ec61Sdh connp->conn_ports, ipst)]; 13627c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13637c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 13647c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 1365*bd670b35SErik Nordmark if (IPCL_CONN_MATCH(tconnp, connp->conn_proto, 1366*bd670b35SErik Nordmark connp->conn_faddr_v4, connp->conn_laddr_v4, 1367*bd670b35SErik Nordmark connp->conn_ports) && 1368*bd670b35SErik Nordmark IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) { 13697c478bd9Sstevel@tonic-gate /* Already have a conn. bail out */ 13707c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13717c478bd9Sstevel@tonic-gate return (EADDRINUSE); 13727c478bd9Sstevel@tonic-gate } 13737c478bd9Sstevel@tonic-gate } 13747c478bd9Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 13757c478bd9Sstevel@tonic-gate /* 13767c478bd9Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 13777c478bd9Sstevel@tonic-gate * rebind. Let it happen. 13787c478bd9Sstevel@tonic-gate */ 13797c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13807c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 13817c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13827c478bd9Sstevel@tonic-gate } 1383866ba9ddSjprakash 1384866ba9ddSjprakash ASSERT(connp->conn_recv != NULL); 1385*bd670b35SErik Nordmark ASSERT(connp->conn_recvicmp != NULL); 1386866ba9ddSjprakash 13877c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 13887c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13897c478bd9Sstevel@tonic-gate break; 13907c478bd9Sstevel@tonic-gate 13917c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 13927c0c0508Skcpoon /* 13937c0c0508Skcpoon * The raw socket may have already been bound, remove it 13947c0c0508Skcpoon * from the hash first. 13957c0c0508Skcpoon */ 13967c0c0508Skcpoon IPCL_HASH_REMOVE(connp); 13977c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 13987c478bd9Sstevel@tonic-gate break; 13997c478bd9Sstevel@tonic-gate 14007c478bd9Sstevel@tonic-gate default: 140145916cd2Sjpk /* 140245916cd2Sjpk * Check for conflicts among MAC exempt bindings. For 140345916cd2Sjpk * transports with port numbers, this is done by the upper 140445916cd2Sjpk * level per-transport binding logic. For all others, it's 140545916cd2Sjpk * done here. 140645916cd2Sjpk */ 1407f4b3ec61Sdh if (is_system_labeled() && 1408f4b3ec61Sdh check_exempt_conflict_v4(connp, ipst)) 140945916cd2Sjpk return (EADDRINUSE); 141045916cd2Sjpk /* FALLTHROUGH */ 141145916cd2Sjpk 141245916cd2Sjpk case IPPROTO_UDP: 14137c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 1414f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1415*bd670b35SErik Nordmark IPCL_UDP_HASH(lport, ipst)]; 14167c478bd9Sstevel@tonic-gate } else { 1417*bd670b35SErik Nordmark connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol]; 14187c478bd9Sstevel@tonic-gate } 14197c478bd9Sstevel@tonic-gate 1420*bd670b35SErik Nordmark if (connp->conn_faddr_v4 != INADDR_ANY) { 14217c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1422*bd670b35SErik Nordmark } else if (connp->conn_laddr_v4 != INADDR_ANY) { 14237c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 14247c478bd9Sstevel@tonic-gate } else { 14257c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 14267c478bd9Sstevel@tonic-gate } 14277c478bd9Sstevel@tonic-gate break; 14287c478bd9Sstevel@tonic-gate } 14297c478bd9Sstevel@tonic-gate 14307c478bd9Sstevel@tonic-gate return (ret); 14317c478bd9Sstevel@tonic-gate } 14327c478bd9Sstevel@tonic-gate 14337c478bd9Sstevel@tonic-gate int 1434*bd670b35SErik Nordmark ipcl_conn_insert_v6(conn_t *connp) 14357c478bd9Sstevel@tonic-gate { 14367c478bd9Sstevel@tonic-gate connf_t *connfp; 14377c478bd9Sstevel@tonic-gate conn_t *tconnp; 14387c478bd9Sstevel@tonic-gate int ret = 0; 1439f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1440*bd670b35SErik Nordmark uint16_t lport = connp->conn_lport; 1441*bd670b35SErik Nordmark uint8_t protocol = connp->conn_proto; 1442*bd670b35SErik Nordmark uint_t ifindex = connp->conn_bound_if; 14437c478bd9Sstevel@tonic-gate 14442b24ab6bSSebastien Roy if (IPCL_IS_IPTUN(connp)) 1445*bd670b35SErik Nordmark return (ipcl_iptun_hash_insert_v6(connp, ipst)); 14462b24ab6bSSebastien Roy 14477c478bd9Sstevel@tonic-gate switch (protocol) { 14487c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 1449a12220b3SJon Anderson 1450a12220b3SJon Anderson /* 1451a12220b3SJon Anderson * For tcp, we check whether the connection tuple already 1452a12220b3SJon Anderson * exists before allowing the connection to proceed. We 1453a12220b3SJon Anderson * also allow indexing on the zoneid. This is to allow 1454a12220b3SJon Anderson * multiple shared stack zones to have the same tcp 1455a12220b3SJon Anderson * connection tuple. In practice this only happens for 1456a12220b3SJon Anderson * ipv6_loopback as it's the only local address which 1457a12220b3SJon Anderson * doesn't have to be unique. 1458a12220b3SJon Anderson */ 1459f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[ 1460*bd670b35SErik Nordmark IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports, 1461f4b3ec61Sdh ipst)]; 14627c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14637c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 14647c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 1465*bd670b35SErik Nordmark /* NOTE: need to match zoneid. Bug in onnv-gate */ 1466*bd670b35SErik Nordmark if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto, 1467*bd670b35SErik Nordmark connp->conn_faddr_v6, connp->conn_laddr_v6, 14687c478bd9Sstevel@tonic-gate connp->conn_ports) && 1469*bd670b35SErik Nordmark (tconnp->conn_bound_if == 0 || 1470*bd670b35SErik Nordmark tconnp->conn_bound_if == ifindex) && 1471*bd670b35SErik Nordmark IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) { 14727c478bd9Sstevel@tonic-gate /* Already have a conn. bail out */ 14737c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14747c478bd9Sstevel@tonic-gate return (EADDRINUSE); 14757c478bd9Sstevel@tonic-gate } 14767c478bd9Sstevel@tonic-gate } 14777c478bd9Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 14787c478bd9Sstevel@tonic-gate /* 14797c478bd9Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 14807c478bd9Sstevel@tonic-gate * rebind. Let it happen. 14817c478bd9Sstevel@tonic-gate */ 14827c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14837c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 14847c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14857c478bd9Sstevel@tonic-gate } 14867c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 14877c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14887c478bd9Sstevel@tonic-gate break; 14897c478bd9Sstevel@tonic-gate 14907c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 14917c0c0508Skcpoon IPCL_HASH_REMOVE(connp); 14927c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 14937c478bd9Sstevel@tonic-gate break; 14947c478bd9Sstevel@tonic-gate 14957c478bd9Sstevel@tonic-gate default: 1496f4b3ec61Sdh if (is_system_labeled() && 1497f4b3ec61Sdh check_exempt_conflict_v6(connp, ipst)) 149845916cd2Sjpk return (EADDRINUSE); 149945916cd2Sjpk /* FALLTHROUGH */ 150045916cd2Sjpk case IPPROTO_UDP: 15017c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 1502f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1503*bd670b35SErik Nordmark IPCL_UDP_HASH(lport, ipst)]; 15047c478bd9Sstevel@tonic-gate } else { 1505f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 15067c478bd9Sstevel@tonic-gate } 15077c478bd9Sstevel@tonic-gate 1508*bd670b35SErik Nordmark if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) { 15097c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 1510*bd670b35SErik Nordmark } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) { 15117c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 15127c478bd9Sstevel@tonic-gate } else { 15137c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 15147c478bd9Sstevel@tonic-gate } 15157c478bd9Sstevel@tonic-gate break; 15167c478bd9Sstevel@tonic-gate } 15177c478bd9Sstevel@tonic-gate 15187c478bd9Sstevel@tonic-gate return (ret); 15197c478bd9Sstevel@tonic-gate } 15207c478bd9Sstevel@tonic-gate 15217c478bd9Sstevel@tonic-gate /* 15227c478bd9Sstevel@tonic-gate * v4 packet classifying function. looks up the fanout table to 15237c478bd9Sstevel@tonic-gate * find the conn, the packet belongs to. returns the conn with 15247c478bd9Sstevel@tonic-gate * the reference held, null otherwise. 152545916cd2Sjpk * 152645916cd2Sjpk * If zoneid is ALL_ZONES, then the search rules described in the "Connection 152745916cd2Sjpk * Lookup" comment block are applied. Labels are also checked as described 152845916cd2Sjpk * above. If the packet is from the inside (looped back), and is from the same 152945916cd2Sjpk * zone, then label checks are omitted. 15307c478bd9Sstevel@tonic-gate */ 15317c478bd9Sstevel@tonic-gate conn_t * 1532*bd670b35SErik Nordmark ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, 1533*bd670b35SErik Nordmark ip_recv_attr_t *ira, ip_stack_t *ipst) 15347c478bd9Sstevel@tonic-gate { 15357c478bd9Sstevel@tonic-gate ipha_t *ipha; 15367c478bd9Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 15377c478bd9Sstevel@tonic-gate uint16_t lport; 15387c478bd9Sstevel@tonic-gate uint16_t fport; 15397c478bd9Sstevel@tonic-gate uint32_t ports; 15407c478bd9Sstevel@tonic-gate conn_t *connp; 15417c478bd9Sstevel@tonic-gate uint16_t *up; 1542*bd670b35SErik Nordmark zoneid_t zoneid = ira->ira_zoneid; 15437c478bd9Sstevel@tonic-gate 15447c478bd9Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 15457c478bd9Sstevel@tonic-gate up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 15467c478bd9Sstevel@tonic-gate 15477c478bd9Sstevel@tonic-gate switch (protocol) { 15487c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 15497c478bd9Sstevel@tonic-gate ports = *(uint32_t *)up; 15507c478bd9Sstevel@tonic-gate connfp = 1551f4b3ec61Sdh &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 1552f4b3ec61Sdh ports, ipst)]; 15537c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 15547c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 15557c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 1556*bd670b35SErik Nordmark if (IPCL_CONN_MATCH(connp, protocol, 1557*bd670b35SErik Nordmark ipha->ipha_src, ipha->ipha_dst, ports) && 1558*bd670b35SErik Nordmark (connp->conn_zoneid == zoneid || 1559*bd670b35SErik Nordmark connp->conn_allzones || 1560*bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1561*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1562*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 15637c478bd9Sstevel@tonic-gate break; 15647c478bd9Sstevel@tonic-gate } 15657c478bd9Sstevel@tonic-gate 15667c478bd9Sstevel@tonic-gate if (connp != NULL) { 156745916cd2Sjpk /* 156845916cd2Sjpk * We have a fully-bound TCP connection. 156945916cd2Sjpk * 157045916cd2Sjpk * For labeled systems, there's no need to check the 157145916cd2Sjpk * label here. It's known to be good as we checked 157245916cd2Sjpk * before allowing the connection to become bound. 157345916cd2Sjpk */ 15747c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 15757c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15767c478bd9Sstevel@tonic-gate return (connp); 15777c478bd9Sstevel@tonic-gate } 15787c478bd9Sstevel@tonic-gate 15797c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15807c478bd9Sstevel@tonic-gate lport = up[1]; 1581f4b3ec61Sdh bind_connfp = 1582f4b3ec61Sdh &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 15837c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 15847c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 15857c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 158645916cd2Sjpk if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 1587*bd670b35SErik Nordmark lport) && 1588*bd670b35SErik Nordmark (connp->conn_zoneid == zoneid || 1589*bd670b35SErik Nordmark connp->conn_allzones || 1590*bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1591*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1592*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 15937c478bd9Sstevel@tonic-gate break; 15947c478bd9Sstevel@tonic-gate } 15957c478bd9Sstevel@tonic-gate 159645916cd2Sjpk /* 159745916cd2Sjpk * If the matching connection is SLP on a private address, then 159845916cd2Sjpk * the label on the packet must match the local zone's label. 159945916cd2Sjpk * Otherwise, it must be in the label range defined by tnrh. 1600*bd670b35SErik Nordmark * This is ensured by tsol_receive_local. 1601*bd670b35SErik Nordmark * 1602*bd670b35SErik Nordmark * Note that we don't check tsol_receive_local for 1603*bd670b35SErik Nordmark * the connected case. 160445916cd2Sjpk */ 1605*bd670b35SErik Nordmark if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 160645916cd2Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1607*bd670b35SErik Nordmark ira, connp)) { 1608*bd670b35SErik Nordmark DTRACE_PROBE3(tx__ip__log__info__classify__tcp, 1609*bd670b35SErik Nordmark char *, "connp(1) could not receive mp(2)", 1610*bd670b35SErik Nordmark conn_t *, connp, mblk_t *, mp); 161145916cd2Sjpk connp = NULL; 161245916cd2Sjpk } 161345916cd2Sjpk 16147c478bd9Sstevel@tonic-gate if (connp != NULL) { 161545916cd2Sjpk /* Have a listener at least */ 16167c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 16177c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 16187c478bd9Sstevel@tonic-gate return (connp); 16197c478bd9Sstevel@tonic-gate } 16207c478bd9Sstevel@tonic-gate 16217c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 16227c478bd9Sstevel@tonic-gate break; 16237c478bd9Sstevel@tonic-gate 16247c478bd9Sstevel@tonic-gate case IPPROTO_UDP: 16257c478bd9Sstevel@tonic-gate lport = up[1]; 16267c478bd9Sstevel@tonic-gate fport = up[0]; 1627f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 16287c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 16297c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 16307c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 16317c478bd9Sstevel@tonic-gate if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 16327c478bd9Sstevel@tonic-gate fport, ipha->ipha_src) && 1633*bd670b35SErik Nordmark (connp->conn_zoneid == zoneid || 1634*bd670b35SErik Nordmark connp->conn_allzones || 1635*bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1636*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE)))) 16377c478bd9Sstevel@tonic-gate break; 16387c478bd9Sstevel@tonic-gate } 16397c478bd9Sstevel@tonic-gate 1640*bd670b35SErik Nordmark if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 164145916cd2Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 1642*bd670b35SErik Nordmark ira, connp)) { 164345916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp, 164445916cd2Sjpk char *, "connp(1) could not receive mp(2)", 164545916cd2Sjpk conn_t *, connp, mblk_t *, mp); 164645916cd2Sjpk connp = NULL; 164745916cd2Sjpk } 164845916cd2Sjpk 16497c478bd9Sstevel@tonic-gate if (connp != NULL) { 16507c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 16517c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16527c478bd9Sstevel@tonic-gate return (connp); 16537c478bd9Sstevel@tonic-gate } 16547c478bd9Sstevel@tonic-gate 16557c478bd9Sstevel@tonic-gate /* 16567c478bd9Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 16577c478bd9Sstevel@tonic-gate */ 16587c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 1659*bd670b35SErik Nordmark 16607c478bd9Sstevel@tonic-gate break; 16612b24ab6bSSebastien Roy 16622b24ab6bSSebastien Roy case IPPROTO_ENCAP: 16632b24ab6bSSebastien Roy case IPPROTO_IPV6: 16642b24ab6bSSebastien Roy return (ipcl_iptun_classify_v4(&ipha->ipha_src, 16652b24ab6bSSebastien Roy &ipha->ipha_dst, ipst)); 16667c478bd9Sstevel@tonic-gate } 16677c478bd9Sstevel@tonic-gate 16687c478bd9Sstevel@tonic-gate return (NULL); 16697c478bd9Sstevel@tonic-gate } 16707c478bd9Sstevel@tonic-gate 16717c478bd9Sstevel@tonic-gate conn_t * 1672*bd670b35SErik Nordmark ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, 1673*bd670b35SErik Nordmark ip_recv_attr_t *ira, ip_stack_t *ipst) 16747c478bd9Sstevel@tonic-gate { 16757c478bd9Sstevel@tonic-gate ip6_t *ip6h; 16767c478bd9Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 16777c478bd9Sstevel@tonic-gate uint16_t lport; 16787c478bd9Sstevel@tonic-gate uint16_t fport; 1679*bd670b35SErik Nordmark tcpha_t *tcpha; 16807c478bd9Sstevel@tonic-gate uint32_t ports; 16817c478bd9Sstevel@tonic-gate conn_t *connp; 16827c478bd9Sstevel@tonic-gate uint16_t *up; 1683*bd670b35SErik Nordmark zoneid_t zoneid = ira->ira_zoneid; 16847c478bd9Sstevel@tonic-gate 16857c478bd9Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 16867c478bd9Sstevel@tonic-gate 16877c478bd9Sstevel@tonic-gate switch (protocol) { 16887c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 1689*bd670b35SErik Nordmark tcpha = (tcpha_t *)&mp->b_rptr[hdr_len]; 1690*bd670b35SErik Nordmark up = &tcpha->tha_lport; 16917c478bd9Sstevel@tonic-gate ports = *(uint32_t *)up; 16927c478bd9Sstevel@tonic-gate 16937c478bd9Sstevel@tonic-gate connfp = 1694f4b3ec61Sdh &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 1695f4b3ec61Sdh ports, ipst)]; 16967c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 16977c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 16987c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 1699*bd670b35SErik Nordmark if (IPCL_CONN_MATCH_V6(connp, protocol, 1700*bd670b35SErik Nordmark ip6h->ip6_src, ip6h->ip6_dst, ports) && 1701*bd670b35SErik Nordmark (connp->conn_zoneid == zoneid || 1702*bd670b35SErik Nordmark connp->conn_allzones || 1703*bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1704*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1705*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 17067c478bd9Sstevel@tonic-gate break; 17077c478bd9Sstevel@tonic-gate } 17087c478bd9Sstevel@tonic-gate 17097c478bd9Sstevel@tonic-gate if (connp != NULL) { 171045916cd2Sjpk /* 171145916cd2Sjpk * We have a fully-bound TCP connection. 171245916cd2Sjpk * 171345916cd2Sjpk * For labeled systems, there's no need to check the 171445916cd2Sjpk * label here. It's known to be good as we checked 171545916cd2Sjpk * before allowing the connection to become bound. 171645916cd2Sjpk */ 17177c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 17187c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17197c478bd9Sstevel@tonic-gate return (connp); 17207c478bd9Sstevel@tonic-gate } 17217c478bd9Sstevel@tonic-gate 17227c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17237c478bd9Sstevel@tonic-gate 17247c478bd9Sstevel@tonic-gate lport = up[1]; 1725f4b3ec61Sdh bind_connfp = 1726f4b3ec61Sdh &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 17277c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 17287c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 17297c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 17307c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, protocol, 17317c478bd9Sstevel@tonic-gate ip6h->ip6_dst, lport) && 1732*bd670b35SErik Nordmark (connp->conn_zoneid == zoneid || 1733*bd670b35SErik Nordmark connp->conn_allzones || 1734*bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1735*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1736*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 17377c478bd9Sstevel@tonic-gate break; 17387c478bd9Sstevel@tonic-gate } 17397c478bd9Sstevel@tonic-gate 1740*bd670b35SErik Nordmark if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 174145916cd2Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1742*bd670b35SErik Nordmark ira, connp)) { 174345916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 174445916cd2Sjpk char *, "connp(1) could not receive mp(2)", 174545916cd2Sjpk conn_t *, connp, mblk_t *, mp); 174645916cd2Sjpk connp = NULL; 174745916cd2Sjpk } 174845916cd2Sjpk 17497c478bd9Sstevel@tonic-gate if (connp != NULL) { 17507c478bd9Sstevel@tonic-gate /* Have a listner at least */ 17517c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 17527c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 17537c478bd9Sstevel@tonic-gate return (connp); 17547c478bd9Sstevel@tonic-gate } 17557c478bd9Sstevel@tonic-gate 17567c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 17577c478bd9Sstevel@tonic-gate break; 17587c478bd9Sstevel@tonic-gate 17597c478bd9Sstevel@tonic-gate case IPPROTO_UDP: 17607c478bd9Sstevel@tonic-gate up = (uint16_t *)&mp->b_rptr[hdr_len]; 17617c478bd9Sstevel@tonic-gate lport = up[1]; 17627c478bd9Sstevel@tonic-gate fport = up[0]; 1763f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 17647c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 17657c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 17667c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 17677c478bd9Sstevel@tonic-gate if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 17687c478bd9Sstevel@tonic-gate fport, ip6h->ip6_src) && 1769*bd670b35SErik Nordmark (connp->conn_zoneid == zoneid || 1770*bd670b35SErik Nordmark connp->conn_allzones || 1771*bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1772*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1773*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_SHARED_ADDR)))) 17747c478bd9Sstevel@tonic-gate break; 17757c478bd9Sstevel@tonic-gate } 17767c478bd9Sstevel@tonic-gate 1777*bd670b35SErik Nordmark if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 177845916cd2Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 1779*bd670b35SErik Nordmark ira, connp)) { 178045916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 178145916cd2Sjpk char *, "connp(1) could not receive mp(2)", 178245916cd2Sjpk conn_t *, connp, mblk_t *, mp); 178345916cd2Sjpk connp = NULL; 178445916cd2Sjpk } 178545916cd2Sjpk 17867c478bd9Sstevel@tonic-gate if (connp != NULL) { 17877c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 17887c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17897c478bd9Sstevel@tonic-gate return (connp); 17907c478bd9Sstevel@tonic-gate } 17917c478bd9Sstevel@tonic-gate 17927c478bd9Sstevel@tonic-gate /* 17937c478bd9Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 17947c478bd9Sstevel@tonic-gate */ 17957c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17967c478bd9Sstevel@tonic-gate break; 17972b24ab6bSSebastien Roy case IPPROTO_ENCAP: 17982b24ab6bSSebastien Roy case IPPROTO_IPV6: 17992b24ab6bSSebastien Roy return (ipcl_iptun_classify_v6(&ip6h->ip6_src, 18002b24ab6bSSebastien Roy &ip6h->ip6_dst, ipst)); 18017c478bd9Sstevel@tonic-gate } 18027c478bd9Sstevel@tonic-gate 18037c478bd9Sstevel@tonic-gate return (NULL); 18047c478bd9Sstevel@tonic-gate } 18057c478bd9Sstevel@tonic-gate 18067c478bd9Sstevel@tonic-gate /* 18077c478bd9Sstevel@tonic-gate * wrapper around ipcl_classify_(v4,v6) routines. 18087c478bd9Sstevel@tonic-gate */ 18097c478bd9Sstevel@tonic-gate conn_t * 1810*bd670b35SErik Nordmark ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst) 18117c478bd9Sstevel@tonic-gate { 1812*bd670b35SErik Nordmark if (ira->ira_flags & IRAF_IS_IPV4) { 1813*bd670b35SErik Nordmark return (ipcl_classify_v4(mp, ira->ira_protocol, 1814*bd670b35SErik Nordmark ira->ira_ip_hdr_length, ira, ipst)); 1815*bd670b35SErik Nordmark } else { 1816*bd670b35SErik Nordmark return (ipcl_classify_v6(mp, ira->ira_protocol, 1817*bd670b35SErik Nordmark ira->ira_ip_hdr_length, ira, ipst)); 18187c478bd9Sstevel@tonic-gate } 18197c478bd9Sstevel@tonic-gate } 18207c478bd9Sstevel@tonic-gate 1821*bd670b35SErik Nordmark /* 1822*bd670b35SErik Nordmark * Only used to classify SCTP RAW sockets 1823*bd670b35SErik Nordmark */ 18247c478bd9Sstevel@tonic-gate conn_t * 1825*bd670b35SErik Nordmark ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports, 1826*bd670b35SErik Nordmark ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst) 18277c478bd9Sstevel@tonic-gate { 182845916cd2Sjpk connf_t *connfp; 18297c478bd9Sstevel@tonic-gate conn_t *connp; 18307c478bd9Sstevel@tonic-gate in_port_t lport; 1831*bd670b35SErik Nordmark int ipversion; 183245916cd2Sjpk const void *dst; 1833*bd670b35SErik Nordmark zoneid_t zoneid = ira->ira_zoneid; 18347c478bd9Sstevel@tonic-gate 18357c478bd9Sstevel@tonic-gate lport = ((uint16_t *)&ports)[1]; 1836*bd670b35SErik Nordmark if (ira->ira_flags & IRAF_IS_IPV4) { 1837*bd670b35SErik Nordmark dst = (const void *)&ipha->ipha_dst; 1838*bd670b35SErik Nordmark ipversion = IPV4_VERSION; 1839*bd670b35SErik Nordmark } else { 1840*bd670b35SErik Nordmark dst = (const void *)&ip6h->ip6_dst; 1841*bd670b35SErik Nordmark ipversion = IPV6_VERSION; 184245916cd2Sjpk } 184345916cd2Sjpk 1844f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 18457c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 18467c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 18477c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 18487c478bd9Sstevel@tonic-gate /* We don't allow v4 fallback for v6 raw socket. */ 1849*bd670b35SErik Nordmark if (ipversion != connp->conn_ipversion) 18507c478bd9Sstevel@tonic-gate continue; 1851*bd670b35SErik Nordmark if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 1852*bd670b35SErik Nordmark !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 1853*bd670b35SErik Nordmark if (ipversion == IPV4_VERSION) { 185445916cd2Sjpk if (!IPCL_CONN_MATCH(connp, protocol, 1855*bd670b35SErik Nordmark ipha->ipha_src, ipha->ipha_dst, ports)) 185645916cd2Sjpk continue; 18577c478bd9Sstevel@tonic-gate } else { 185845916cd2Sjpk if (!IPCL_CONN_MATCH_V6(connp, protocol, 1859*bd670b35SErik Nordmark ip6h->ip6_src, ip6h->ip6_dst, ports)) 186045916cd2Sjpk continue; 18617c478bd9Sstevel@tonic-gate } 18627c478bd9Sstevel@tonic-gate } else { 1863*bd670b35SErik Nordmark if (ipversion == IPV4_VERSION) { 186445916cd2Sjpk if (!IPCL_BIND_MATCH(connp, protocol, 1865*bd670b35SErik Nordmark ipha->ipha_dst, lport)) 186645916cd2Sjpk continue; 18677c478bd9Sstevel@tonic-gate } else { 186845916cd2Sjpk if (!IPCL_BIND_MATCH_V6(connp, protocol, 1869*bd670b35SErik Nordmark ip6h->ip6_dst, lport)) 187045916cd2Sjpk continue; 18717c478bd9Sstevel@tonic-gate } 18727c478bd9Sstevel@tonic-gate } 187345916cd2Sjpk 1874*bd670b35SErik Nordmark if (connp->conn_zoneid == zoneid || 1875*bd670b35SErik Nordmark connp->conn_allzones || 1876*bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) && 1877*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) && 1878*bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_SHARED_ADDR))) 187945916cd2Sjpk break; 188045916cd2Sjpk } 1881*bd670b35SErik Nordmark 1882*bd670b35SErik Nordmark if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) && 1883*bd670b35SErik Nordmark !tsol_receive_local(mp, dst, ipversion, ira, connp)) { 188445916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 188545916cd2Sjpk char *, "connp(1) could not receive mp(2)", 188645916cd2Sjpk conn_t *, connp, mblk_t *, mp); 188745916cd2Sjpk connp = NULL; 18887c478bd9Sstevel@tonic-gate } 18897c0c0508Skcpoon 18907c0c0508Skcpoon if (connp != NULL) 18917c0c0508Skcpoon goto found; 18927c0c0508Skcpoon mutex_exit(&connfp->connf_lock); 18937c0c0508Skcpoon 1894*bd670b35SErik Nordmark /* Try to look for a wildcard SCTP RAW socket match. */ 1895f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 18967c0c0508Skcpoon mutex_enter(&connfp->connf_lock); 18977c0c0508Skcpoon for (connp = connfp->connf_head; connp != NULL; 18987c0c0508Skcpoon connp = connp->conn_next) { 18997c0c0508Skcpoon /* We don't allow v4 fallback for v6 raw socket. */ 1900*bd670b35SErik Nordmark if (ipversion != connp->conn_ipversion) 19017c0c0508Skcpoon continue; 1902*bd670b35SErik Nordmark if (!IPCL_ZONE_MATCH(connp, zoneid)) 1903*bd670b35SErik Nordmark continue; 1904*bd670b35SErik Nordmark 1905*bd670b35SErik Nordmark if (ipversion == IPV4_VERSION) { 1906*bd670b35SErik Nordmark if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst)) 19077c0c0508Skcpoon break; 19087c0c0508Skcpoon } else { 1909*bd670b35SErik Nordmark if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) { 19107c0c0508Skcpoon break; 19117c0c0508Skcpoon } 19127c0c0508Skcpoon } 19137c478bd9Sstevel@tonic-gate } 19147c0c0508Skcpoon 19157c0c0508Skcpoon if (connp != NULL) 19167c0c0508Skcpoon goto found; 19177c0c0508Skcpoon 19187c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 19197c478bd9Sstevel@tonic-gate return (NULL); 19207c0c0508Skcpoon 19217c0c0508Skcpoon found: 19227c0c0508Skcpoon ASSERT(connp != NULL); 19237c0c0508Skcpoon CONN_INC_REF(connp); 19247c0c0508Skcpoon mutex_exit(&connfp->connf_lock); 19257c0c0508Skcpoon return (connp); 19267c478bd9Sstevel@tonic-gate } 19277c478bd9Sstevel@tonic-gate 19287c478bd9Sstevel@tonic-gate /* ARGSUSED */ 19297c478bd9Sstevel@tonic-gate static int 1930fc80c0dfSnordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags) 19317c478bd9Sstevel@tonic-gate { 19327c478bd9Sstevel@tonic-gate itc_t *itc = (itc_t *)buf; 19337c478bd9Sstevel@tonic-gate conn_t *connp = &itc->itc_conn; 1934fc80c0dfSnordmark tcp_t *tcp = (tcp_t *)&itc[1]; 1935fc80c0dfSnordmark 1936fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 1937fc80c0dfSnordmark bzero(tcp, sizeof (tcp_t)); 1938fc80c0dfSnordmark 1939fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 1940fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 19410f1702c5SYu Xiangning cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL); 1942*bd670b35SErik Nordmark tcp->tcp_timercache = tcp_timermp_alloc(kmflags); 1943*bd670b35SErik Nordmark if (tcp->tcp_timercache == NULL) 1944*bd670b35SErik Nordmark return (ENOMEM); 19457c478bd9Sstevel@tonic-gate connp->conn_tcp = tcp; 19467c478bd9Sstevel@tonic-gate connp->conn_flags = IPCL_TCPCONN; 1947*bd670b35SErik Nordmark connp->conn_proto = IPPROTO_TCP; 19487c478bd9Sstevel@tonic-gate tcp->tcp_connp = connp; 1949*bd670b35SErik Nordmark rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 1950*bd670b35SErik Nordmark 1951*bd670b35SErik Nordmark connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 1952*bd670b35SErik Nordmark if (connp->conn_ixa == NULL) { 1953*bd670b35SErik Nordmark tcp_timermp_free(tcp); 1954*bd670b35SErik Nordmark return (ENOMEM); 1955*bd670b35SErik Nordmark } 1956*bd670b35SErik Nordmark connp->conn_ixa->ixa_refcnt = 1; 1957*bd670b35SErik Nordmark connp->conn_ixa->ixa_protocol = connp->conn_proto; 1958*bd670b35SErik Nordmark connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 19597c478bd9Sstevel@tonic-gate return (0); 19607c478bd9Sstevel@tonic-gate } 19617c478bd9Sstevel@tonic-gate 19627c478bd9Sstevel@tonic-gate /* ARGSUSED */ 19637c478bd9Sstevel@tonic-gate static void 1964fc80c0dfSnordmark tcp_conn_destructor(void *buf, void *cdrarg) 1965fc80c0dfSnordmark { 1966fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 1967fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 1968fc80c0dfSnordmark tcp_t *tcp = (tcp_t *)&itc[1]; 1969fc80c0dfSnordmark 1970fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_TCPCONN); 1971fc80c0dfSnordmark ASSERT(tcp->tcp_connp == connp); 1972fc80c0dfSnordmark ASSERT(connp->conn_tcp == tcp); 1973fc80c0dfSnordmark tcp_timermp_free(tcp); 1974fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 1975fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 19760f1702c5SYu Xiangning cv_destroy(&connp->conn_sq_cv); 1977*bd670b35SErik Nordmark rw_destroy(&connp->conn_ilg_lock); 1978*bd670b35SErik Nordmark 1979*bd670b35SErik Nordmark /* Can be NULL if constructor failed */ 1980*bd670b35SErik Nordmark if (connp->conn_ixa != NULL) { 1981*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_refcnt == 1); 1982*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_ire == NULL); 1983*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_nce == NULL); 1984*bd670b35SErik Nordmark ixa_refrele(connp->conn_ixa); 1985*bd670b35SErik Nordmark } 1986fc80c0dfSnordmark } 1987fc80c0dfSnordmark 1988fc80c0dfSnordmark /* ARGSUSED */ 1989fc80c0dfSnordmark static int 1990fc80c0dfSnordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags) 1991fc80c0dfSnordmark { 1992fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 1993fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 1994fc80c0dfSnordmark 1995fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 1996fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 1997fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 1998fc80c0dfSnordmark connp->conn_flags = IPCL_IPCCONN; 1999*bd670b35SErik Nordmark rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2000fc80c0dfSnordmark 2001*bd670b35SErik Nordmark connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2002*bd670b35SErik Nordmark if (connp->conn_ixa == NULL) 2003*bd670b35SErik Nordmark return (ENOMEM); 2004*bd670b35SErik Nordmark connp->conn_ixa->ixa_refcnt = 1; 2005*bd670b35SErik Nordmark connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2006fc80c0dfSnordmark return (0); 2007fc80c0dfSnordmark } 2008fc80c0dfSnordmark 2009fc80c0dfSnordmark /* ARGSUSED */ 2010fc80c0dfSnordmark static void 2011fc80c0dfSnordmark ip_conn_destructor(void *buf, void *cdrarg) 2012fc80c0dfSnordmark { 2013fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2014fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2015fc80c0dfSnordmark 2016fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 2017fc80c0dfSnordmark ASSERT(connp->conn_priv == NULL); 2018fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2019fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2020*bd670b35SErik Nordmark rw_destroy(&connp->conn_ilg_lock); 2021*bd670b35SErik Nordmark 2022*bd670b35SErik Nordmark /* Can be NULL if constructor failed */ 2023*bd670b35SErik Nordmark if (connp->conn_ixa != NULL) { 2024*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2025*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_ire == NULL); 2026*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_nce == NULL); 2027*bd670b35SErik Nordmark ixa_refrele(connp->conn_ixa); 2028*bd670b35SErik Nordmark } 2029fc80c0dfSnordmark } 2030fc80c0dfSnordmark 2031fc80c0dfSnordmark /* ARGSUSED */ 2032fc80c0dfSnordmark static int 2033fc80c0dfSnordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2034fc80c0dfSnordmark { 2035fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2036fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2037fc80c0dfSnordmark udp_t *udp = (udp_t *)&itc[1]; 2038fc80c0dfSnordmark 2039fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2040fc80c0dfSnordmark bzero(udp, sizeof (udp_t)); 2041fc80c0dfSnordmark 2042fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2043fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2044fc80c0dfSnordmark connp->conn_udp = udp; 2045fc80c0dfSnordmark connp->conn_flags = IPCL_UDPCONN; 2046*bd670b35SErik Nordmark connp->conn_proto = IPPROTO_UDP; 2047fc80c0dfSnordmark udp->udp_connp = connp; 2048*bd670b35SErik Nordmark rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2049*bd670b35SErik Nordmark connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2050*bd670b35SErik Nordmark if (connp->conn_ixa == NULL) 2051*bd670b35SErik Nordmark return (ENOMEM); 2052*bd670b35SErik Nordmark connp->conn_ixa->ixa_refcnt = 1; 2053*bd670b35SErik Nordmark connp->conn_ixa->ixa_protocol = connp->conn_proto; 2054*bd670b35SErik Nordmark connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2055fc80c0dfSnordmark return (0); 2056fc80c0dfSnordmark } 2057fc80c0dfSnordmark 2058fc80c0dfSnordmark /* ARGSUSED */ 2059fc80c0dfSnordmark static void 2060fc80c0dfSnordmark udp_conn_destructor(void *buf, void *cdrarg) 2061fc80c0dfSnordmark { 2062fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2063fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2064fc80c0dfSnordmark udp_t *udp = (udp_t *)&itc[1]; 2065fc80c0dfSnordmark 2066fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_UDPCONN); 2067fc80c0dfSnordmark ASSERT(udp->udp_connp == connp); 2068fc80c0dfSnordmark ASSERT(connp->conn_udp == udp); 2069fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2070fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2071*bd670b35SErik Nordmark rw_destroy(&connp->conn_ilg_lock); 2072*bd670b35SErik Nordmark 2073*bd670b35SErik Nordmark /* Can be NULL if constructor failed */ 2074*bd670b35SErik Nordmark if (connp->conn_ixa != NULL) { 2075*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2076*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_ire == NULL); 2077*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_nce == NULL); 2078*bd670b35SErik Nordmark ixa_refrele(connp->conn_ixa); 2079*bd670b35SErik Nordmark } 2080fc80c0dfSnordmark } 2081fc80c0dfSnordmark 2082fc80c0dfSnordmark /* ARGSUSED */ 2083fc80c0dfSnordmark static int 2084fc80c0dfSnordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2085fc80c0dfSnordmark { 2086fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2087fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2088fc80c0dfSnordmark icmp_t *icmp = (icmp_t *)&itc[1]; 2089fc80c0dfSnordmark 2090fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2091fc80c0dfSnordmark bzero(icmp, sizeof (icmp_t)); 2092fc80c0dfSnordmark 2093fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2094fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2095fc80c0dfSnordmark connp->conn_icmp = icmp; 2096fc80c0dfSnordmark connp->conn_flags = IPCL_RAWIPCONN; 2097*bd670b35SErik Nordmark connp->conn_proto = IPPROTO_ICMP; 2098fc80c0dfSnordmark icmp->icmp_connp = connp; 2099*bd670b35SErik Nordmark rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2100*bd670b35SErik Nordmark connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2101*bd670b35SErik Nordmark if (connp->conn_ixa == NULL) 2102*bd670b35SErik Nordmark return (ENOMEM); 2103*bd670b35SErik Nordmark connp->conn_ixa->ixa_refcnt = 1; 2104*bd670b35SErik Nordmark connp->conn_ixa->ixa_protocol = connp->conn_proto; 2105*bd670b35SErik Nordmark connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2106fc80c0dfSnordmark return (0); 2107fc80c0dfSnordmark } 2108fc80c0dfSnordmark 2109fc80c0dfSnordmark /* ARGSUSED */ 2110fc80c0dfSnordmark static void 2111fc80c0dfSnordmark rawip_conn_destructor(void *buf, void *cdrarg) 2112fc80c0dfSnordmark { 2113fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2114fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2115fc80c0dfSnordmark icmp_t *icmp = (icmp_t *)&itc[1]; 2116fc80c0dfSnordmark 2117fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 2118fc80c0dfSnordmark ASSERT(icmp->icmp_connp == connp); 2119fc80c0dfSnordmark ASSERT(connp->conn_icmp == icmp); 2120fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2121fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2122*bd670b35SErik Nordmark rw_destroy(&connp->conn_ilg_lock); 2123*bd670b35SErik Nordmark 2124*bd670b35SErik Nordmark /* Can be NULL if constructor failed */ 2125*bd670b35SErik Nordmark if (connp->conn_ixa != NULL) { 2126*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2127*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_ire == NULL); 2128*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_nce == NULL); 2129*bd670b35SErik Nordmark ixa_refrele(connp->conn_ixa); 2130*bd670b35SErik Nordmark } 2131fc80c0dfSnordmark } 2132fc80c0dfSnordmark 2133fc80c0dfSnordmark /* ARGSUSED */ 2134fc80c0dfSnordmark static int 2135fc80c0dfSnordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags) 2136fc80c0dfSnordmark { 2137fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2138fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2139fc80c0dfSnordmark rts_t *rts = (rts_t *)&itc[1]; 2140fc80c0dfSnordmark 2141fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2142fc80c0dfSnordmark bzero(rts, sizeof (rts_t)); 2143fc80c0dfSnordmark 2144fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2145fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2146fc80c0dfSnordmark connp->conn_rts = rts; 2147fc80c0dfSnordmark connp->conn_flags = IPCL_RTSCONN; 2148fc80c0dfSnordmark rts->rts_connp = connp; 2149*bd670b35SErik Nordmark rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL); 2150*bd670b35SErik Nordmark connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags); 2151*bd670b35SErik Nordmark if (connp->conn_ixa == NULL) 2152*bd670b35SErik Nordmark return (ENOMEM); 2153*bd670b35SErik Nordmark connp->conn_ixa->ixa_refcnt = 1; 2154*bd670b35SErik Nordmark connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp); 2155fc80c0dfSnordmark return (0); 2156fc80c0dfSnordmark } 2157fc80c0dfSnordmark 2158fc80c0dfSnordmark /* ARGSUSED */ 2159fc80c0dfSnordmark static void 2160fc80c0dfSnordmark rts_conn_destructor(void *buf, void *cdrarg) 21617c478bd9Sstevel@tonic-gate { 2162fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2163fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2164fc80c0dfSnordmark rts_t *rts = (rts_t *)&itc[1]; 2165fc80c0dfSnordmark 2166fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_RTSCONN); 2167fc80c0dfSnordmark ASSERT(rts->rts_connp == connp); 2168fc80c0dfSnordmark ASSERT(connp->conn_rts == rts); 2169fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2170fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2171*bd670b35SErik Nordmark rw_destroy(&connp->conn_ilg_lock); 2172*bd670b35SErik Nordmark 2173*bd670b35SErik Nordmark /* Can be NULL if constructor failed */ 2174*bd670b35SErik Nordmark if (connp->conn_ixa != NULL) { 2175*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_refcnt == 1); 2176*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_ire == NULL); 2177*bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_nce == NULL); 2178*bd670b35SErik Nordmark ixa_refrele(connp->conn_ixa); 21790f1702c5SYu Xiangning } 21800f1702c5SYu Xiangning } 21810f1702c5SYu Xiangning 2182fc80c0dfSnordmark /* 2183fc80c0dfSnordmark * Called as part of ipcl_conn_destroy to assert and clear any pointers 2184fc80c0dfSnordmark * in the conn_t. 2185*bd670b35SErik Nordmark * 2186*bd670b35SErik Nordmark * Below we list all the pointers in the conn_t as a documentation aid. 2187*bd670b35SErik Nordmark * The ones that we can not ASSERT to be NULL are #ifdef'ed out. 2188*bd670b35SErik Nordmark * If you add any pointers to the conn_t please add an ASSERT here 2189*bd670b35SErik Nordmark * and #ifdef it out if it can't be actually asserted to be NULL. 2190*bd670b35SErik Nordmark * In any case, we bzero most of the conn_t at the end of the function. 2191fc80c0dfSnordmark */ 2192fc80c0dfSnordmark void 2193fc80c0dfSnordmark ipcl_conn_cleanup(conn_t *connp) 2194fc80c0dfSnordmark { 2195*bd670b35SErik Nordmark ip_xmit_attr_t *ixa; 2196*bd670b35SErik Nordmark 2197fc80c0dfSnordmark ASSERT(connp->conn_latch == NULL); 2198*bd670b35SErik Nordmark ASSERT(connp->conn_latch_in_policy == NULL); 2199*bd670b35SErik Nordmark ASSERT(connp->conn_latch_in_action == NULL); 2200fc80c0dfSnordmark #ifdef notdef 2201fc80c0dfSnordmark ASSERT(connp->conn_rq == NULL); 2202fc80c0dfSnordmark ASSERT(connp->conn_wq == NULL); 2203fc80c0dfSnordmark #endif 2204fc80c0dfSnordmark ASSERT(connp->conn_cred == NULL); 2205fc80c0dfSnordmark ASSERT(connp->conn_g_fanout == NULL); 2206fc80c0dfSnordmark ASSERT(connp->conn_g_next == NULL); 2207fc80c0dfSnordmark ASSERT(connp->conn_g_prev == NULL); 2208fc80c0dfSnordmark ASSERT(connp->conn_policy == NULL); 2209fc80c0dfSnordmark ASSERT(connp->conn_fanout == NULL); 2210fc80c0dfSnordmark ASSERT(connp->conn_next == NULL); 2211fc80c0dfSnordmark ASSERT(connp->conn_prev == NULL); 2212fc80c0dfSnordmark ASSERT(connp->conn_oper_pending_ill == NULL); 2213fc80c0dfSnordmark ASSERT(connp->conn_ilg == NULL); 2214fc80c0dfSnordmark ASSERT(connp->conn_drain_next == NULL); 2215fc80c0dfSnordmark ASSERT(connp->conn_drain_prev == NULL); 2216a9737be2Snordmark #ifdef notdef 2217a9737be2Snordmark /* conn_idl is not cleared when removed from idl list */ 2218fc80c0dfSnordmark ASSERT(connp->conn_idl == NULL); 2219a9737be2Snordmark #endif 2220fc80c0dfSnordmark ASSERT(connp->conn_ipsec_opt_mp == NULL); 2221*bd670b35SErik Nordmark #ifdef notdef 2222*bd670b35SErik Nordmark /* conn_netstack is cleared by the caller; needed by ixa_cleanup */ 2223fc80c0dfSnordmark ASSERT(connp->conn_netstack == NULL); 2224*bd670b35SErik Nordmark #endif 2225fc80c0dfSnordmark 22260f1702c5SYu Xiangning ASSERT(connp->conn_helper_info == NULL); 2227*bd670b35SErik Nordmark ASSERT(connp->conn_ixa != NULL); 2228*bd670b35SErik Nordmark ixa = connp->conn_ixa; 2229*bd670b35SErik Nordmark ASSERT(ixa->ixa_refcnt == 1); 2230*bd670b35SErik Nordmark /* Need to preserve ixa_protocol */ 2231*bd670b35SErik Nordmark ixa_cleanup(ixa); 2232*bd670b35SErik Nordmark ixa->ixa_flags = 0; 2233*bd670b35SErik Nordmark 2234fc80c0dfSnordmark /* Clear out the conn_t fields that are not preserved */ 2235fc80c0dfSnordmark bzero(&connp->conn_start_clr, 2236fc80c0dfSnordmark sizeof (conn_t) - 2237fc80c0dfSnordmark ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp)); 22387c478bd9Sstevel@tonic-gate } 22397c478bd9Sstevel@tonic-gate 22407c478bd9Sstevel@tonic-gate /* 22417c478bd9Sstevel@tonic-gate * All conns are inserted in a global multi-list for the benefit of 22427c478bd9Sstevel@tonic-gate * walkers. The walk is guaranteed to walk all open conns at the time 22437c478bd9Sstevel@tonic-gate * of the start of the walk exactly once. This property is needed to 22447c478bd9Sstevel@tonic-gate * achieve some cleanups during unplumb of interfaces. This is achieved 22457c478bd9Sstevel@tonic-gate * as follows. 22467c478bd9Sstevel@tonic-gate * 22477c478bd9Sstevel@tonic-gate * ipcl_conn_create and ipcl_conn_destroy are the only functions that 22487c478bd9Sstevel@tonic-gate * call the insert and delete functions below at creation and deletion 22497c478bd9Sstevel@tonic-gate * time respectively. The conn never moves or changes its position in this 22507c478bd9Sstevel@tonic-gate * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 22517c478bd9Sstevel@tonic-gate * won't increase due to walkers, once the conn deletion has started. Note 22527c478bd9Sstevel@tonic-gate * that we can't remove the conn from the global list and then wait for 22537c478bd9Sstevel@tonic-gate * the refcnt to drop to zero, since walkers would then see a truncated 22547c478bd9Sstevel@tonic-gate * list. CONN_INCIPIENT ensures that walkers don't start looking at 22557c478bd9Sstevel@tonic-gate * conns until ip_open is ready to make them globally visible. 22567c478bd9Sstevel@tonic-gate * The global round robin multi-list locks are held only to get the 22577c478bd9Sstevel@tonic-gate * next member/insertion/deletion and contention should be negligible 22587c478bd9Sstevel@tonic-gate * if the multi-list is much greater than the number of cpus. 22597c478bd9Sstevel@tonic-gate */ 22607c478bd9Sstevel@tonic-gate void 22617c478bd9Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp) 22627c478bd9Sstevel@tonic-gate { 22637c478bd9Sstevel@tonic-gate int index; 2264f4b3ec61Sdh struct connf_s *connfp; 2265f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 22667c478bd9Sstevel@tonic-gate 22677c478bd9Sstevel@tonic-gate /* 22687c478bd9Sstevel@tonic-gate * No need for atomic here. Approximate even distribution 22697c478bd9Sstevel@tonic-gate * in the global lists is sufficient. 22707c478bd9Sstevel@tonic-gate */ 2271f4b3ec61Sdh ipst->ips_conn_g_index++; 2272f4b3ec61Sdh index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 22737c478bd9Sstevel@tonic-gate 22747c478bd9Sstevel@tonic-gate connp->conn_g_prev = NULL; 22757c478bd9Sstevel@tonic-gate /* 22767c478bd9Sstevel@tonic-gate * Mark as INCIPIENT, so that walkers will ignore this 22777c478bd9Sstevel@tonic-gate * for now, till ip_open is ready to make it visible globally. 22787c478bd9Sstevel@tonic-gate */ 22797c478bd9Sstevel@tonic-gate connp->conn_state_flags |= CONN_INCIPIENT; 22807c478bd9Sstevel@tonic-gate 2281f4b3ec61Sdh connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 22827c478bd9Sstevel@tonic-gate /* Insert at the head of the list */ 2283f4b3ec61Sdh mutex_enter(&connfp->connf_lock); 2284f4b3ec61Sdh connp->conn_g_next = connfp->connf_head; 22857c478bd9Sstevel@tonic-gate if (connp->conn_g_next != NULL) 22867c478bd9Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp; 2287f4b3ec61Sdh connfp->connf_head = connp; 22887c478bd9Sstevel@tonic-gate 22897c478bd9Sstevel@tonic-gate /* The fanout bucket this conn points to */ 2290f4b3ec61Sdh connp->conn_g_fanout = connfp; 22917c478bd9Sstevel@tonic-gate 2292f4b3ec61Sdh mutex_exit(&connfp->connf_lock); 22937c478bd9Sstevel@tonic-gate } 22947c478bd9Sstevel@tonic-gate 22957c478bd9Sstevel@tonic-gate void 22967c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp) 22977c478bd9Sstevel@tonic-gate { 2298f4b3ec61Sdh struct connf_s *connfp; 2299f4b3ec61Sdh 23007c478bd9Sstevel@tonic-gate /* 23017c478bd9Sstevel@tonic-gate * We were never inserted in the global multi list. 23027c478bd9Sstevel@tonic-gate * IPCL_NONE variety is never inserted in the global multilist 23037c478bd9Sstevel@tonic-gate * since it is presumed to not need any cleanup and is transient. 23047c478bd9Sstevel@tonic-gate */ 23057c478bd9Sstevel@tonic-gate if (connp->conn_g_fanout == NULL) 23067c478bd9Sstevel@tonic-gate return; 23077c478bd9Sstevel@tonic-gate 2308f4b3ec61Sdh connfp = connp->conn_g_fanout; 2309f4b3ec61Sdh mutex_enter(&connfp->connf_lock); 23107c478bd9Sstevel@tonic-gate if (connp->conn_g_prev != NULL) 23117c478bd9Sstevel@tonic-gate connp->conn_g_prev->conn_g_next = connp->conn_g_next; 23127c478bd9Sstevel@tonic-gate else 2313f4b3ec61Sdh connfp->connf_head = connp->conn_g_next; 23147c478bd9Sstevel@tonic-gate if (connp->conn_g_next != NULL) 23157c478bd9Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 2316f4b3ec61Sdh mutex_exit(&connfp->connf_lock); 23177c478bd9Sstevel@tonic-gate 23187c478bd9Sstevel@tonic-gate /* Better to stumble on a null pointer than to corrupt memory */ 23197c478bd9Sstevel@tonic-gate connp->conn_g_next = NULL; 23207c478bd9Sstevel@tonic-gate connp->conn_g_prev = NULL; 2321fc80c0dfSnordmark connp->conn_g_fanout = NULL; 23227c478bd9Sstevel@tonic-gate } 23237c478bd9Sstevel@tonic-gate 23247c478bd9Sstevel@tonic-gate /* 23257c478bd9Sstevel@tonic-gate * Walk the list of all conn_t's in the system, calling the function provided 2326*bd670b35SErik Nordmark * With the specified argument for each. 23277c478bd9Sstevel@tonic-gate * Applies to both IPv4 and IPv6. 23287c478bd9Sstevel@tonic-gate * 2329*bd670b35SErik Nordmark * CONNs may hold pointers to ills (conn_dhcpinit_ill and 2330*bd670b35SErik Nordmark * conn_oper_pending_ill). To guard against stale pointers 23317c478bd9Sstevel@tonic-gate * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 23327c478bd9Sstevel@tonic-gate * unplumbed or removed. New conn_t's that are created while we are walking 23337c478bd9Sstevel@tonic-gate * may be missed by this walk, because they are not necessarily inserted 23347c478bd9Sstevel@tonic-gate * at the tail of the list. They are new conn_t's and thus don't have any 23357c478bd9Sstevel@tonic-gate * stale pointers. The CONN_CLOSING flag ensures that no new reference 23367c478bd9Sstevel@tonic-gate * is created to the struct that is going away. 23377c478bd9Sstevel@tonic-gate */ 23387c478bd9Sstevel@tonic-gate void 2339f4b3ec61Sdh ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 23407c478bd9Sstevel@tonic-gate { 23417c478bd9Sstevel@tonic-gate int i; 23427c478bd9Sstevel@tonic-gate conn_t *connp; 23437c478bd9Sstevel@tonic-gate conn_t *prev_connp; 23447c478bd9Sstevel@tonic-gate 23457c478bd9Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2346f4b3ec61Sdh mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23477c478bd9Sstevel@tonic-gate prev_connp = NULL; 2348f4b3ec61Sdh connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 23497c478bd9Sstevel@tonic-gate while (connp != NULL) { 23507c478bd9Sstevel@tonic-gate mutex_enter(&connp->conn_lock); 23517c478bd9Sstevel@tonic-gate if (connp->conn_state_flags & 23527c478bd9Sstevel@tonic-gate (CONN_CONDEMNED | CONN_INCIPIENT)) { 23537c478bd9Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 23547c478bd9Sstevel@tonic-gate connp = connp->conn_g_next; 23557c478bd9Sstevel@tonic-gate continue; 23567c478bd9Sstevel@tonic-gate } 23577c478bd9Sstevel@tonic-gate CONN_INC_REF_LOCKED(connp); 23587c478bd9Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 2359f4b3ec61Sdh mutex_exit( 2360f4b3ec61Sdh &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23617c478bd9Sstevel@tonic-gate (*func)(connp, arg); 23627c478bd9Sstevel@tonic-gate if (prev_connp != NULL) 23637c478bd9Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 2364f4b3ec61Sdh mutex_enter( 2365f4b3ec61Sdh &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23667c478bd9Sstevel@tonic-gate prev_connp = connp; 23677c478bd9Sstevel@tonic-gate connp = connp->conn_g_next; 23687c478bd9Sstevel@tonic-gate } 2369f4b3ec61Sdh mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23707c478bd9Sstevel@tonic-gate if (prev_connp != NULL) 23717c478bd9Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 23727c478bd9Sstevel@tonic-gate } 23737c478bd9Sstevel@tonic-gate } 23747c478bd9Sstevel@tonic-gate 23757c478bd9Sstevel@tonic-gate /* 23767c478bd9Sstevel@tonic-gate * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 23777c478bd9Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 23787c478bd9Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 2379d0ab37afSethindra * (peer tcp in ESTABLISHED state). 23807c478bd9Sstevel@tonic-gate */ 23817c478bd9Sstevel@tonic-gate conn_t * 2382*bd670b35SErik Nordmark ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha, 2383f4b3ec61Sdh ip_stack_t *ipst) 23847c478bd9Sstevel@tonic-gate { 23857c478bd9Sstevel@tonic-gate uint32_t ports; 23867c478bd9Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 23877c478bd9Sstevel@tonic-gate connf_t *connfp; 23887c478bd9Sstevel@tonic-gate conn_t *tconnp; 23897c478bd9Sstevel@tonic-gate boolean_t zone_chk; 23907c478bd9Sstevel@tonic-gate 23917c478bd9Sstevel@tonic-gate /* 23927c478bd9Sstevel@tonic-gate * If either the source of destination address is loopback, then 23937c478bd9Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 23947c478bd9Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 23957c478bd9Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. 23967c478bd9Sstevel@tonic-gate */ 23977c478bd9Sstevel@tonic-gate zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 23987c478bd9Sstevel@tonic-gate ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 23997c478bd9Sstevel@tonic-gate 2400*bd670b35SErik Nordmark pports[0] = tcpha->tha_fport; 2401*bd670b35SErik Nordmark pports[1] = tcpha->tha_lport; 24027c478bd9Sstevel@tonic-gate 2403f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2404f4b3ec61Sdh ports, ipst)]; 24057c478bd9Sstevel@tonic-gate 24067c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 24077c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 24087c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 24097c478bd9Sstevel@tonic-gate 24107c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 24117c478bd9Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 2412d0ab37afSethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 24137c478bd9Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 24147c478bd9Sstevel@tonic-gate 24157c478bd9Sstevel@tonic-gate ASSERT(tconnp != connp); 24167c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 24177c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24187c478bd9Sstevel@tonic-gate return (tconnp); 24197c478bd9Sstevel@tonic-gate } 24207c478bd9Sstevel@tonic-gate } 24217c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24227c478bd9Sstevel@tonic-gate return (NULL); 24237c478bd9Sstevel@tonic-gate } 24247c478bd9Sstevel@tonic-gate 24257c478bd9Sstevel@tonic-gate /* 24267c478bd9Sstevel@tonic-gate * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 24277c478bd9Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 24287c478bd9Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 2429d0ab37afSethindra * (peer tcp in ESTABLISHED state). 24307c478bd9Sstevel@tonic-gate */ 24317c478bd9Sstevel@tonic-gate conn_t * 2432*bd670b35SErik Nordmark ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha, 2433f4b3ec61Sdh ip_stack_t *ipst) 24347c478bd9Sstevel@tonic-gate { 24357c478bd9Sstevel@tonic-gate uint32_t ports; 24367c478bd9Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 24377c478bd9Sstevel@tonic-gate connf_t *connfp; 24387c478bd9Sstevel@tonic-gate conn_t *tconnp; 24397c478bd9Sstevel@tonic-gate boolean_t zone_chk; 24407c478bd9Sstevel@tonic-gate 24417c478bd9Sstevel@tonic-gate /* 24427c478bd9Sstevel@tonic-gate * If either the source of destination address is loopback, then 24437c478bd9Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 24447c478bd9Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 24457c478bd9Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. We 24467c478bd9Sstevel@tonic-gate * don't do Zone check for link local address(es) because the 24477c478bd9Sstevel@tonic-gate * current Zone implementation treats each link local address as 24487c478bd9Sstevel@tonic-gate * being unique per system node, i.e. they belong to global Zone. 24497c478bd9Sstevel@tonic-gate */ 24507c478bd9Sstevel@tonic-gate zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 24517c478bd9Sstevel@tonic-gate IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 24527c478bd9Sstevel@tonic-gate 2453*bd670b35SErik Nordmark pports[0] = tcpha->tha_fport; 2454*bd670b35SErik Nordmark pports[1] = tcpha->tha_lport; 24557c478bd9Sstevel@tonic-gate 2456f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2457f4b3ec61Sdh ports, ipst)]; 24587c478bd9Sstevel@tonic-gate 24597c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 24607c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 24617c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 24627c478bd9Sstevel@tonic-gate 2463*bd670b35SErik Nordmark /* We skip conn_bound_if check here as this is loopback tcp */ 24647c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 24657c478bd9Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 2466d0ab37afSethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 24677c478bd9Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 24687c478bd9Sstevel@tonic-gate 24697c478bd9Sstevel@tonic-gate ASSERT(tconnp != connp); 24707c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 24717c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24727c478bd9Sstevel@tonic-gate return (tconnp); 24737c478bd9Sstevel@tonic-gate } 24747c478bd9Sstevel@tonic-gate } 24757c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24767c478bd9Sstevel@tonic-gate return (NULL); 24777c478bd9Sstevel@tonic-gate } 24787c478bd9Sstevel@tonic-gate 24797c478bd9Sstevel@tonic-gate /* 24807c478bd9Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 24817c478bd9Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 24827c478bd9Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 24837c478bd9Sstevel@tonic-gate */ 24847c478bd9Sstevel@tonic-gate conn_t * 2485*bd670b35SErik Nordmark ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state, 2486f4b3ec61Sdh ip_stack_t *ipst) 24877c478bd9Sstevel@tonic-gate { 24887c478bd9Sstevel@tonic-gate uint32_t ports; 24897c478bd9Sstevel@tonic-gate uint16_t *pports; 24907c478bd9Sstevel@tonic-gate connf_t *connfp; 24917c478bd9Sstevel@tonic-gate conn_t *tconnp; 24927c478bd9Sstevel@tonic-gate 24937c478bd9Sstevel@tonic-gate pports = (uint16_t *)&ports; 2494*bd670b35SErik Nordmark pports[0] = tcpha->tha_fport; 2495*bd670b35SErik Nordmark pports[1] = tcpha->tha_lport; 24967c478bd9Sstevel@tonic-gate 2497f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2498121e5416Skcpoon ports, ipst)]; 24997c478bd9Sstevel@tonic-gate 25007c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 25017c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 25027c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 25037c478bd9Sstevel@tonic-gate 25047c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 25057c478bd9Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 25067c478bd9Sstevel@tonic-gate tconnp->conn_tcp->tcp_state >= min_state) { 25077c478bd9Sstevel@tonic-gate 25087c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 25097c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25107c478bd9Sstevel@tonic-gate return (tconnp); 25117c478bd9Sstevel@tonic-gate } 25127c478bd9Sstevel@tonic-gate } 25137c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25147c478bd9Sstevel@tonic-gate return (NULL); 25157c478bd9Sstevel@tonic-gate } 25167c478bd9Sstevel@tonic-gate 25177c478bd9Sstevel@tonic-gate /* 25187c478bd9Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 25197c478bd9Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 25207c478bd9Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 25217c478bd9Sstevel@tonic-gate * Match on ifindex in addition to addresses. 25227c478bd9Sstevel@tonic-gate */ 25237c478bd9Sstevel@tonic-gate conn_t * 25247c478bd9Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 2525f4b3ec61Sdh uint_t ifindex, ip_stack_t *ipst) 25267c478bd9Sstevel@tonic-gate { 25277c478bd9Sstevel@tonic-gate tcp_t *tcp; 25287c478bd9Sstevel@tonic-gate uint32_t ports; 25297c478bd9Sstevel@tonic-gate uint16_t *pports; 25307c478bd9Sstevel@tonic-gate connf_t *connfp; 25317c478bd9Sstevel@tonic-gate conn_t *tconnp; 25327c478bd9Sstevel@tonic-gate 25337c478bd9Sstevel@tonic-gate pports = (uint16_t *)&ports; 25347c478bd9Sstevel@tonic-gate pports[0] = tcpha->tha_fport; 25357c478bd9Sstevel@tonic-gate pports[1] = tcpha->tha_lport; 25367c478bd9Sstevel@tonic-gate 2537f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2538121e5416Skcpoon ports, ipst)]; 25397c478bd9Sstevel@tonic-gate 25407c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 25417c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 25427c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 25437c478bd9Sstevel@tonic-gate 25447c478bd9Sstevel@tonic-gate tcp = tconnp->conn_tcp; 25457c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 25467c478bd9Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 25477c478bd9Sstevel@tonic-gate tcp->tcp_state >= min_state && 2548*bd670b35SErik Nordmark (tconnp->conn_bound_if == 0 || 2549*bd670b35SErik Nordmark tconnp->conn_bound_if == ifindex)) { 25507c478bd9Sstevel@tonic-gate 25517c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 25527c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25537c478bd9Sstevel@tonic-gate return (tconnp); 25547c478bd9Sstevel@tonic-gate } 25557c478bd9Sstevel@tonic-gate } 25567c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25577c478bd9Sstevel@tonic-gate return (NULL); 25587c478bd9Sstevel@tonic-gate } 25597c478bd9Sstevel@tonic-gate 25607c478bd9Sstevel@tonic-gate /* 256145916cd2Sjpk * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 256245916cd2Sjpk * a listener when changing state. 25637c478bd9Sstevel@tonic-gate */ 25647c478bd9Sstevel@tonic-gate conn_t * 2565f4b3ec61Sdh ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 2566f4b3ec61Sdh ip_stack_t *ipst) 25677c478bd9Sstevel@tonic-gate { 25687c478bd9Sstevel@tonic-gate connf_t *bind_connfp; 25697c478bd9Sstevel@tonic-gate conn_t *connp; 25707c478bd9Sstevel@tonic-gate tcp_t *tcp; 25717c478bd9Sstevel@tonic-gate 25727c478bd9Sstevel@tonic-gate /* 25737c478bd9Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 25747c478bd9Sstevel@tonic-gate * all zeros. 25757c478bd9Sstevel@tonic-gate */ 25767c478bd9Sstevel@tonic-gate if (laddr == 0) 25777c478bd9Sstevel@tonic-gate return (NULL); 25787c478bd9Sstevel@tonic-gate 257945916cd2Sjpk ASSERT(zoneid != ALL_ZONES); 258045916cd2Sjpk 2581f4b3ec61Sdh bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 25827c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 25837c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 25847c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 25857c478bd9Sstevel@tonic-gate tcp = connp->conn_tcp; 25867c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 25875d0bc3edSsommerfe IPCL_ZONE_MATCH(connp, zoneid) && 25887c478bd9Sstevel@tonic-gate (tcp->tcp_listener == NULL)) { 25897c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 25907c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 25917c478bd9Sstevel@tonic-gate return (connp); 25927c478bd9Sstevel@tonic-gate } 25937c478bd9Sstevel@tonic-gate } 25947c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 25957c478bd9Sstevel@tonic-gate return (NULL); 25967c478bd9Sstevel@tonic-gate } 25977c478bd9Sstevel@tonic-gate 259845916cd2Sjpk /* 259945916cd2Sjpk * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 260045916cd2Sjpk * a listener when changing state. 260145916cd2Sjpk */ 26027c478bd9Sstevel@tonic-gate conn_t * 26037c478bd9Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 2604f4b3ec61Sdh zoneid_t zoneid, ip_stack_t *ipst) 26057c478bd9Sstevel@tonic-gate { 26067c478bd9Sstevel@tonic-gate connf_t *bind_connfp; 26077c478bd9Sstevel@tonic-gate conn_t *connp = NULL; 26087c478bd9Sstevel@tonic-gate tcp_t *tcp; 26097c478bd9Sstevel@tonic-gate 26107c478bd9Sstevel@tonic-gate /* 26117c478bd9Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 26127c478bd9Sstevel@tonic-gate * all zeros. 26137c478bd9Sstevel@tonic-gate */ 26147c478bd9Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 26157c478bd9Sstevel@tonic-gate return (NULL); 26167c478bd9Sstevel@tonic-gate 261745916cd2Sjpk ASSERT(zoneid != ALL_ZONES); 26187c478bd9Sstevel@tonic-gate 2619f4b3ec61Sdh bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 26207c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 26217c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 26227c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 26237c478bd9Sstevel@tonic-gate tcp = connp->conn_tcp; 26247c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 26255d0bc3edSsommerfe IPCL_ZONE_MATCH(connp, zoneid) && 2626*bd670b35SErik Nordmark (connp->conn_bound_if == 0 || 2627*bd670b35SErik Nordmark connp->conn_bound_if == ifindex) && 26287c478bd9Sstevel@tonic-gate tcp->tcp_listener == NULL) { 26297c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 26307c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26317c478bd9Sstevel@tonic-gate return (connp); 26327c478bd9Sstevel@tonic-gate } 26337c478bd9Sstevel@tonic-gate } 26347c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26357c478bd9Sstevel@tonic-gate return (NULL); 26367c478bd9Sstevel@tonic-gate } 26377c478bd9Sstevel@tonic-gate 2638ff550d0eSmasputra /* 2639ff550d0eSmasputra * ipcl_get_next_conn 2640ff550d0eSmasputra * get the next entry in the conn global list 2641ff550d0eSmasputra * and put a reference on the next_conn. 2642ff550d0eSmasputra * decrement the reference on the current conn. 2643ff550d0eSmasputra * 2644ff550d0eSmasputra * This is an iterator based walker function that also provides for 2645ff550d0eSmasputra * some selection by the caller. It walks through the conn_hash bucket 2646ff550d0eSmasputra * searching for the next valid connp in the list, and selects connections 2647ff550d0eSmasputra * that are neither closed nor condemned. It also REFHOLDS the conn 2648ff550d0eSmasputra * thus ensuring that the conn exists when the caller uses the conn. 2649ff550d0eSmasputra */ 2650ff550d0eSmasputra conn_t * 2651ff550d0eSmasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2652ff550d0eSmasputra { 2653ff550d0eSmasputra conn_t *next_connp; 2654ff550d0eSmasputra 2655ff550d0eSmasputra if (connfp == NULL) 2656ff550d0eSmasputra return (NULL); 2657ff550d0eSmasputra 2658ff550d0eSmasputra mutex_enter(&connfp->connf_lock); 2659ff550d0eSmasputra 2660ff550d0eSmasputra next_connp = (connp == NULL) ? 2661ff550d0eSmasputra connfp->connf_head : connp->conn_g_next; 2662ff550d0eSmasputra 2663ff550d0eSmasputra while (next_connp != NULL) { 2664ff550d0eSmasputra mutex_enter(&next_connp->conn_lock); 2665ff550d0eSmasputra if (!(next_connp->conn_flags & conn_flags) || 2666ff550d0eSmasputra (next_connp->conn_state_flags & 2667ff550d0eSmasputra (CONN_CONDEMNED | CONN_INCIPIENT))) { 2668ff550d0eSmasputra /* 2669ff550d0eSmasputra * This conn has been condemned or 2670ff550d0eSmasputra * is closing, or the flags don't match 2671ff550d0eSmasputra */ 2672ff550d0eSmasputra mutex_exit(&next_connp->conn_lock); 2673ff550d0eSmasputra next_connp = next_connp->conn_g_next; 2674ff550d0eSmasputra continue; 2675ff550d0eSmasputra } 2676ff550d0eSmasputra CONN_INC_REF_LOCKED(next_connp); 2677ff550d0eSmasputra mutex_exit(&next_connp->conn_lock); 2678ff550d0eSmasputra break; 2679ff550d0eSmasputra } 2680ff550d0eSmasputra 2681ff550d0eSmasputra mutex_exit(&connfp->connf_lock); 2682ff550d0eSmasputra 2683ff550d0eSmasputra if (connp != NULL) 2684ff550d0eSmasputra CONN_DEC_REF(connp); 2685ff550d0eSmasputra 2686ff550d0eSmasputra return (next_connp); 2687ff550d0eSmasputra } 2688ff550d0eSmasputra 26897c478bd9Sstevel@tonic-gate #ifdef CONN_DEBUG 26907c478bd9Sstevel@tonic-gate /* 26917c478bd9Sstevel@tonic-gate * Trace of the last NBUF refhold/refrele 26927c478bd9Sstevel@tonic-gate */ 26937c478bd9Sstevel@tonic-gate int 26947c478bd9Sstevel@tonic-gate conn_trace_ref(conn_t *connp) 26957c478bd9Sstevel@tonic-gate { 26967c478bd9Sstevel@tonic-gate int last; 26977c478bd9Sstevel@tonic-gate conn_trace_t *ctb; 26987c478bd9Sstevel@tonic-gate 26997c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 27007c478bd9Sstevel@tonic-gate last = connp->conn_trace_last; 27017c478bd9Sstevel@tonic-gate last++; 27027c478bd9Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 27037c478bd9Sstevel@tonic-gate last = 0; 27047c478bd9Sstevel@tonic-gate 27057c478bd9Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 27066a8288c7Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 27077c478bd9Sstevel@tonic-gate connp->conn_trace_last = last; 27087c478bd9Sstevel@tonic-gate return (1); 27097c478bd9Sstevel@tonic-gate } 27107c478bd9Sstevel@tonic-gate 27117c478bd9Sstevel@tonic-gate int 27127c478bd9Sstevel@tonic-gate conn_untrace_ref(conn_t *connp) 27137c478bd9Sstevel@tonic-gate { 27147c478bd9Sstevel@tonic-gate int last; 27157c478bd9Sstevel@tonic-gate conn_trace_t *ctb; 27167c478bd9Sstevel@tonic-gate 27177c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 27187c478bd9Sstevel@tonic-gate last = connp->conn_trace_last; 27197c478bd9Sstevel@tonic-gate last++; 27207c478bd9Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 27217c478bd9Sstevel@tonic-gate last = 0; 27227c478bd9Sstevel@tonic-gate 27237c478bd9Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 27246a8288c7Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 27257c478bd9Sstevel@tonic-gate connp->conn_trace_last = last; 27267c478bd9Sstevel@tonic-gate return (1); 27277c478bd9Sstevel@tonic-gate } 27287c478bd9Sstevel@tonic-gate #endif 2729