17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ee4701baSericheng * Common Development and Distribution License (the "License"). 6ee4701baSericheng * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22fab254e2SAruna Ramakrishna * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* 277c478bd9Sstevel@tonic-gate * IP PACKET CLASSIFIER 287c478bd9Sstevel@tonic-gate * 297c478bd9Sstevel@tonic-gate * The IP packet classifier provides mapping between IP packets and persistent 307c478bd9Sstevel@tonic-gate * connection state for connection-oriented protocols. It also provides 317c478bd9Sstevel@tonic-gate * interface for managing connection states. 327c478bd9Sstevel@tonic-gate * 337c478bd9Sstevel@tonic-gate * The connection state is kept in conn_t data structure and contains, among 347c478bd9Sstevel@tonic-gate * other things: 357c478bd9Sstevel@tonic-gate * 367c478bd9Sstevel@tonic-gate * o local/remote address and ports 377c478bd9Sstevel@tonic-gate * o Transport protocol 387c478bd9Sstevel@tonic-gate * o squeue for the connection (for TCP only) 397c478bd9Sstevel@tonic-gate * o reference counter 407c478bd9Sstevel@tonic-gate * o Connection state 417c478bd9Sstevel@tonic-gate * o hash table linkage 427c478bd9Sstevel@tonic-gate * o interface/ire information 437c478bd9Sstevel@tonic-gate * o credentials 447c478bd9Sstevel@tonic-gate * o ipsec policy 457c478bd9Sstevel@tonic-gate * o send and receive functions. 467c478bd9Sstevel@tonic-gate * o mutex lock. 477c478bd9Sstevel@tonic-gate * 487c478bd9Sstevel@tonic-gate * Connections use a reference counting scheme. They are freed when the 497c478bd9Sstevel@tonic-gate * reference counter drops to zero. A reference is incremented when connection 507c478bd9Sstevel@tonic-gate * is placed in a list or table, when incoming packet for the connection arrives 517c478bd9Sstevel@tonic-gate * and when connection is processed via squeue (squeue processing may be 527c478bd9Sstevel@tonic-gate * asynchronous and the reference protects the connection from being destroyed 537c478bd9Sstevel@tonic-gate * before its processing is finished). 547c478bd9Sstevel@tonic-gate * 557c478bd9Sstevel@tonic-gate * send and receive functions are currently used for TCP only. The send function 567c478bd9Sstevel@tonic-gate * determines the IP entry point for the packet once it leaves TCP to be sent to 577c478bd9Sstevel@tonic-gate * the destination address. The receive function is used by IP when the packet 587c478bd9Sstevel@tonic-gate * should be passed for TCP processing. When a new connection is created these 597c478bd9Sstevel@tonic-gate * are set to ip_output() and tcp_input() respectively. During the lifetime of 607c478bd9Sstevel@tonic-gate * the connection the send and receive functions may change depending on the 617c478bd9Sstevel@tonic-gate * changes in the connection state. For example, Once the connection is bound to 627c478bd9Sstevel@tonic-gate * an addresse, the receive function for this connection is set to 637c478bd9Sstevel@tonic-gate * tcp_conn_request(). This allows incoming SYNs to go directly into the 647c478bd9Sstevel@tonic-gate * listener SYN processing function without going to tcp_input() first. 657c478bd9Sstevel@tonic-gate * 667c478bd9Sstevel@tonic-gate * Classifier uses several hash tables: 677c478bd9Sstevel@tonic-gate * 687c478bd9Sstevel@tonic-gate * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 697c478bd9Sstevel@tonic-gate * ipcl_bind_fanout: contains all connections in BOUND state 707c478bd9Sstevel@tonic-gate * ipcl_proto_fanout: IPv4 protocol fanout 717c478bd9Sstevel@tonic-gate * ipcl_proto_fanout_v6: IPv6 protocol fanout 727c478bd9Sstevel@tonic-gate * ipcl_udp_fanout: contains all UDP connections 737c478bd9Sstevel@tonic-gate * ipcl_globalhash_fanout: contains all connections 747c478bd9Sstevel@tonic-gate * 757c478bd9Sstevel@tonic-gate * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 767c478bd9Sstevel@tonic-gate * which need to view all existing connections. 777c478bd9Sstevel@tonic-gate * 787c478bd9Sstevel@tonic-gate * All tables are protected by per-bucket locks. When both per-bucket lock and 797c478bd9Sstevel@tonic-gate * connection lock need to be held, the per-bucket lock should be acquired 807c478bd9Sstevel@tonic-gate * first, followed by the connection lock. 817c478bd9Sstevel@tonic-gate * 827c478bd9Sstevel@tonic-gate * All functions doing search in one of these tables increment a reference 837c478bd9Sstevel@tonic-gate * counter on the connection found (if any). This reference should be dropped 847c478bd9Sstevel@tonic-gate * when the caller has finished processing the connection. 857c478bd9Sstevel@tonic-gate * 867c478bd9Sstevel@tonic-gate * 877c478bd9Sstevel@tonic-gate * INTERFACES: 887c478bd9Sstevel@tonic-gate * =========== 897c478bd9Sstevel@tonic-gate * 907c478bd9Sstevel@tonic-gate * Connection Lookup: 917c478bd9Sstevel@tonic-gate * ------------------ 927c478bd9Sstevel@tonic-gate * 93f4b3ec61Sdh * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack) 94f4b3ec61Sdh * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack) 957c478bd9Sstevel@tonic-gate * 967c478bd9Sstevel@tonic-gate * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 977c478bd9Sstevel@tonic-gate * it can't find any associated connection. If the connection is found, its 987c478bd9Sstevel@tonic-gate * reference counter is incremented. 997c478bd9Sstevel@tonic-gate * 1007c478bd9Sstevel@tonic-gate * mp: mblock, containing packet header. The full header should fit 1017c478bd9Sstevel@tonic-gate * into a single mblock. It should also contain at least full IP 1027c478bd9Sstevel@tonic-gate * and TCP or UDP header. 1037c478bd9Sstevel@tonic-gate * 1047c478bd9Sstevel@tonic-gate * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 1057c478bd9Sstevel@tonic-gate * 1067c478bd9Sstevel@tonic-gate * hdr_len: The size of IP header. It is used to find TCP or UDP header in 1077c478bd9Sstevel@tonic-gate * the packet. 1087c478bd9Sstevel@tonic-gate * 10945916cd2Sjpk * zoneid: The zone in which the returned connection must be; the zoneid 11045916cd2Sjpk * corresponding to the ire_zoneid on the IRE located for the 11145916cd2Sjpk * packet's destination address. 1127c478bd9Sstevel@tonic-gate * 1137c478bd9Sstevel@tonic-gate * For TCP connections, the lookup order is as follows: 1147c478bd9Sstevel@tonic-gate * 5-tuple {src, dst, protocol, local port, remote port} 1157c478bd9Sstevel@tonic-gate * lookup in ipcl_conn_fanout table. 1167c478bd9Sstevel@tonic-gate * 3-tuple {dst, remote port, protocol} lookup in 1177c478bd9Sstevel@tonic-gate * ipcl_bind_fanout table. 1187c478bd9Sstevel@tonic-gate * 1197c478bd9Sstevel@tonic-gate * For UDP connections, a 5-tuple {src, dst, protocol, local port, 1207c478bd9Sstevel@tonic-gate * remote port} lookup is done on ipcl_udp_fanout. Note that, 1217c478bd9Sstevel@tonic-gate * these interfaces do not handle cases where a packets belongs 1227c478bd9Sstevel@tonic-gate * to multiple UDP clients, which is handled in IP itself. 1237c478bd9Sstevel@tonic-gate * 12445916cd2Sjpk * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 12545916cd2Sjpk * determine which actual zone gets the segment. This is used only in a 12645916cd2Sjpk * labeled environment. The matching rules are: 12745916cd2Sjpk * 12845916cd2Sjpk * - If it's not a multilevel port, then the label on the packet selects 12945916cd2Sjpk * the zone. Unlabeled packets are delivered to the global zone. 13045916cd2Sjpk * 13145916cd2Sjpk * - If it's a multilevel port, then only the zone registered to receive 13245916cd2Sjpk * packets on that port matches. 13345916cd2Sjpk * 13445916cd2Sjpk * Also, in a labeled environment, packet labels need to be checked. For fully 13545916cd2Sjpk * bound TCP connections, we can assume that the packet label was checked 13645916cd2Sjpk * during connection establishment, and doesn't need to be checked on each 13745916cd2Sjpk * packet. For others, though, we need to check for strict equality or, for 13845916cd2Sjpk * multilevel ports, membership in the range or set. This part currently does 13945916cd2Sjpk * a tnrh lookup on each packet, but could be optimized to use cached results 14045916cd2Sjpk * if that were necessary. (SCTP doesn't come through here, but if it did, 14145916cd2Sjpk * we would apply the same rules as TCP.) 14245916cd2Sjpk * 14345916cd2Sjpk * An implication of the above is that fully-bound TCP sockets must always use 14445916cd2Sjpk * distinct 4-tuples; they can't be discriminated by label alone. 14545916cd2Sjpk * 14645916cd2Sjpk * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 14745916cd2Sjpk * as there's no connection set-up handshake and no shared state. 14845916cd2Sjpk * 14945916cd2Sjpk * Labels on looped-back packets within a single zone do not need to be 15045916cd2Sjpk * checked, as all processes in the same zone have the same label. 15145916cd2Sjpk * 15245916cd2Sjpk * Finally, for unlabeled packets received by a labeled system, special rules 15345916cd2Sjpk * apply. We consider only the MLP if there is one. Otherwise, we prefer a 15445916cd2Sjpk * socket in the zone whose label matches the default label of the sender, if 15545916cd2Sjpk * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 15645916cd2Sjpk * receiver's label must dominate the sender's default label. 15745916cd2Sjpk * 158f4b3ec61Sdh * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack); 159f4b3ec61Sdh * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 160f4b3ec61Sdh * ip_stack); 1617c478bd9Sstevel@tonic-gate * 1627c478bd9Sstevel@tonic-gate * Lookup routine to find a exact match for {src, dst, local port, 1637c478bd9Sstevel@tonic-gate * remote port) for TCP connections in ipcl_conn_fanout. The address and 1647c478bd9Sstevel@tonic-gate * ports are read from the IP and TCP header respectively. 1657c478bd9Sstevel@tonic-gate * 166f4b3ec61Sdh * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 167f4b3ec61Sdh * zoneid, ip_stack); 168f4b3ec61Sdh * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 169f4b3ec61Sdh * zoneid, ip_stack); 1707c478bd9Sstevel@tonic-gate * 1717c478bd9Sstevel@tonic-gate * Lookup routine to find a listener with the tuple {lport, laddr, 1727c478bd9Sstevel@tonic-gate * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 1737c478bd9Sstevel@tonic-gate * parameter interface index is also compared. 1747c478bd9Sstevel@tonic-gate * 175f4b3ec61Sdh * void ipcl_walk(func, arg, ip_stack) 1767c478bd9Sstevel@tonic-gate * 1777c478bd9Sstevel@tonic-gate * Apply 'func' to every connection available. The 'func' is called as 1787c478bd9Sstevel@tonic-gate * (*func)(connp, arg). The walk is non-atomic so connections may be 1797c478bd9Sstevel@tonic-gate * created and destroyed during the walk. The CONN_CONDEMNED and 1807c478bd9Sstevel@tonic-gate * CONN_INCIPIENT flags ensure that connections which are newly created 1817c478bd9Sstevel@tonic-gate * or being destroyed are not selected by the walker. 1827c478bd9Sstevel@tonic-gate * 1837c478bd9Sstevel@tonic-gate * Table Updates 1847c478bd9Sstevel@tonic-gate * ------------- 1857c478bd9Sstevel@tonic-gate * 1867c478bd9Sstevel@tonic-gate * int ipcl_conn_insert(connp, protocol, src, dst, ports) 1877c478bd9Sstevel@tonic-gate * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 1887c478bd9Sstevel@tonic-gate * 1897c478bd9Sstevel@tonic-gate * Insert 'connp' in the ipcl_conn_fanout. 1907c478bd9Sstevel@tonic-gate * Arguements : 1917c478bd9Sstevel@tonic-gate * connp conn_t to be inserted 1927c478bd9Sstevel@tonic-gate * protocol connection protocol 1937c478bd9Sstevel@tonic-gate * src source address 1947c478bd9Sstevel@tonic-gate * dst destination address 1957c478bd9Sstevel@tonic-gate * ports local and remote port 1967c478bd9Sstevel@tonic-gate * ifindex interface index for IPv6 connections 1977c478bd9Sstevel@tonic-gate * 1987c478bd9Sstevel@tonic-gate * Return value : 1997c478bd9Sstevel@tonic-gate * 0 if connp was inserted 2007c478bd9Sstevel@tonic-gate * EADDRINUSE if the connection with the same tuple 2017c478bd9Sstevel@tonic-gate * already exists. 2027c478bd9Sstevel@tonic-gate * 2037c478bd9Sstevel@tonic-gate * int ipcl_bind_insert(connp, protocol, src, lport); 2047c478bd9Sstevel@tonic-gate * int ipcl_bind_insert_v6(connp, protocol, src, lport); 2057c478bd9Sstevel@tonic-gate * 2067c478bd9Sstevel@tonic-gate * Insert 'connp' in ipcl_bind_fanout. 2077c478bd9Sstevel@tonic-gate * Arguements : 2087c478bd9Sstevel@tonic-gate * connp conn_t to be inserted 2097c478bd9Sstevel@tonic-gate * protocol connection protocol 2107c478bd9Sstevel@tonic-gate * src source address connection wants 2117c478bd9Sstevel@tonic-gate * to bind to 2127c478bd9Sstevel@tonic-gate * lport local port connection wants to 2137c478bd9Sstevel@tonic-gate * bind to 2147c478bd9Sstevel@tonic-gate * 2157c478bd9Sstevel@tonic-gate * 2167c478bd9Sstevel@tonic-gate * void ipcl_hash_remove(connp); 2177c478bd9Sstevel@tonic-gate * 2187c478bd9Sstevel@tonic-gate * Removes the 'connp' from the connection fanout table. 2197c478bd9Sstevel@tonic-gate * 2207c478bd9Sstevel@tonic-gate * Connection Creation/Destruction 2217c478bd9Sstevel@tonic-gate * ------------------------------- 2227c478bd9Sstevel@tonic-gate * 223f4b3ec61Sdh * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 2247c478bd9Sstevel@tonic-gate * 2257c478bd9Sstevel@tonic-gate * Creates a new conn based on the type flag, inserts it into 2267c478bd9Sstevel@tonic-gate * globalhash table. 2277c478bd9Sstevel@tonic-gate * 2287c478bd9Sstevel@tonic-gate * type: This flag determines the type of conn_t which needs to be 229fc80c0dfSnordmark * created i.e., which kmem_cache it comes from. 2307c478bd9Sstevel@tonic-gate * IPCL_TCPCONN indicates a TCP connection 231fc80c0dfSnordmark * IPCL_SCTPCONN indicates a SCTP connection 232fc80c0dfSnordmark * IPCL_UDPCONN indicates a UDP conn_t. 233fc80c0dfSnordmark * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t. 234fc80c0dfSnordmark * IPCL_RTSCONN indicates a RTS conn_t. 235fc80c0dfSnordmark * IPCL_IPCCONN indicates all other connections. 2367c478bd9Sstevel@tonic-gate * 2377c478bd9Sstevel@tonic-gate * void ipcl_conn_destroy(connp) 2387c478bd9Sstevel@tonic-gate * 2397c478bd9Sstevel@tonic-gate * Destroys the connection state, removes it from the global 2407c478bd9Sstevel@tonic-gate * connection hash table and frees its memory. 2417c478bd9Sstevel@tonic-gate */ 2427c478bd9Sstevel@tonic-gate 2437c478bd9Sstevel@tonic-gate #include <sys/types.h> 2447c478bd9Sstevel@tonic-gate #include <sys/stream.h> 2457c478bd9Sstevel@tonic-gate #include <sys/stropts.h> 2467c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 2477c478bd9Sstevel@tonic-gate #include <sys/strsubr.h> 2487c478bd9Sstevel@tonic-gate #include <sys/strsun.h> 2497c478bd9Sstevel@tonic-gate #define _SUN_TPI_VERSION 2 2507c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 2517c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 2527c478bd9Sstevel@tonic-gate #include <sys/debug.h> 2537c478bd9Sstevel@tonic-gate 2547c478bd9Sstevel@tonic-gate #include <sys/systm.h> 2557c478bd9Sstevel@tonic-gate #include <sys/param.h> 2567c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 2577c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h> 2587c478bd9Sstevel@tonic-gate #include <inet/common.h> 2597c478bd9Sstevel@tonic-gate #include <netinet/ip6.h> 2607c478bd9Sstevel@tonic-gate #include <netinet/icmp6.h> 2617c478bd9Sstevel@tonic-gate 2627c478bd9Sstevel@tonic-gate #include <inet/ip.h> 2637c478bd9Sstevel@tonic-gate #include <inet/ip6.h> 2647c478bd9Sstevel@tonic-gate #include <inet/ip_ndp.h> 265*0f1702c5SYu Xiangning #include <inet/ip_impl.h> 266ff550d0eSmasputra #include <inet/udp_impl.h> 2677c478bd9Sstevel@tonic-gate #include <inet/sctp_ip.h> 268f4b3ec61Sdh #include <inet/sctp/sctp_impl.h> 269fc80c0dfSnordmark #include <inet/rawip_impl.h> 270fc80c0dfSnordmark #include <inet/rts_impl.h> 2717c478bd9Sstevel@tonic-gate 2727c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 2737c478bd9Sstevel@tonic-gate 2747c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h> 275*0f1702c5SYu Xiangning #include <inet/tcp.h> 2767c478bd9Sstevel@tonic-gate #include <inet/ipsec_impl.h> 2777c478bd9Sstevel@tonic-gate 27845916cd2Sjpk #include <sys/tsol/tnet.h> 279*0f1702c5SYu Xiangning #include <sys/sockio.h> 28045916cd2Sjpk 2817c478bd9Sstevel@tonic-gate #ifdef DEBUG 2827c478bd9Sstevel@tonic-gate #define IPCL_DEBUG 2837c478bd9Sstevel@tonic-gate #else 2847c478bd9Sstevel@tonic-gate #undef IPCL_DEBUG 2857c478bd9Sstevel@tonic-gate #endif 2867c478bd9Sstevel@tonic-gate 2877c478bd9Sstevel@tonic-gate #ifdef IPCL_DEBUG 2887c478bd9Sstevel@tonic-gate int ipcl_debug_level = 0; 2897c478bd9Sstevel@tonic-gate #define IPCL_DEBUG_LVL(level, args) \ 2907c478bd9Sstevel@tonic-gate if (ipcl_debug_level & level) { printf args; } 2917c478bd9Sstevel@tonic-gate #else 2927c478bd9Sstevel@tonic-gate #define IPCL_DEBUG_LVL(level, args) {; } 2937c478bd9Sstevel@tonic-gate #endif 294f4b3ec61Sdh /* Old value for compatibility. Setable in /etc/system */ 2957c478bd9Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0; 2967c478bd9Sstevel@tonic-gate 297f4b3ec61Sdh /* New value. Zero means choose automatically. Setable in /etc/system */ 2987c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0; 2997c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192; 3007c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500; 3017c478bd9Sstevel@tonic-gate 3027c478bd9Sstevel@tonic-gate /* bind/udp fanout table size */ 3037c478bd9Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512; 304ee4701baSericheng uint_t ipcl_udp_fanout_size = 16384; 3057c478bd9Sstevel@tonic-gate 3067c478bd9Sstevel@tonic-gate /* Raw socket fanout size. Must be a power of 2. */ 3077c478bd9Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256; 3087c478bd9Sstevel@tonic-gate 3097c478bd9Sstevel@tonic-gate /* 3107c478bd9Sstevel@tonic-gate * Power of 2^N Primes useful for hashing for N of 0-28, 3117c478bd9Sstevel@tonic-gate * these primes are the nearest prime <= 2^N - 2^(N-2). 3127c478bd9Sstevel@tonic-gate */ 3137c478bd9Sstevel@tonic-gate 3147c478bd9Sstevel@tonic-gate #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 3157c478bd9Sstevel@tonic-gate 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 3167c478bd9Sstevel@tonic-gate 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 3177c478bd9Sstevel@tonic-gate 50331599, 100663291, 201326557, 0} 3187c478bd9Sstevel@tonic-gate 3197c478bd9Sstevel@tonic-gate /* 320fc80c0dfSnordmark * wrapper structure to ensure that conn and what follows it (tcp_t, etc) 321fc80c0dfSnordmark * are aligned on cache lines. 3227c478bd9Sstevel@tonic-gate */ 323fc80c0dfSnordmark typedef union itc_s { 324fc80c0dfSnordmark conn_t itc_conn; 325fc80c0dfSnordmark char itcu_filler[CACHE_ALIGN(conn_s)]; 3267c478bd9Sstevel@tonic-gate } itc_t; 3277c478bd9Sstevel@tonic-gate 328fc80c0dfSnordmark struct kmem_cache *tcp_conn_cache; 329fc80c0dfSnordmark struct kmem_cache *ip_conn_cache; 330*0f1702c5SYu Xiangning struct kmem_cache *ip_helper_stream_cache; 3317c478bd9Sstevel@tonic-gate extern struct kmem_cache *sctp_conn_cache; 3327c478bd9Sstevel@tonic-gate extern struct kmem_cache *tcp_sack_info_cache; 3337c478bd9Sstevel@tonic-gate extern struct kmem_cache *tcp_iphc_cache; 334fc80c0dfSnordmark struct kmem_cache *udp_conn_cache; 335fc80c0dfSnordmark struct kmem_cache *rawip_conn_cache; 336fc80c0dfSnordmark struct kmem_cache *rts_conn_cache; 3377c478bd9Sstevel@tonic-gate 3387c478bd9Sstevel@tonic-gate extern void tcp_timermp_free(tcp_t *); 3397c478bd9Sstevel@tonic-gate extern mblk_t *tcp_timermp_alloc(int); 3407c478bd9Sstevel@tonic-gate 341fc80c0dfSnordmark static int ip_conn_constructor(void *, void *, int); 342fc80c0dfSnordmark static void ip_conn_destructor(void *, void *); 343fc80c0dfSnordmark 344fc80c0dfSnordmark static int tcp_conn_constructor(void *, void *, int); 345fc80c0dfSnordmark static void tcp_conn_destructor(void *, void *); 346fc80c0dfSnordmark 347fc80c0dfSnordmark static int udp_conn_constructor(void *, void *, int); 348fc80c0dfSnordmark static void udp_conn_destructor(void *, void *); 349fc80c0dfSnordmark 350fc80c0dfSnordmark static int rawip_conn_constructor(void *, void *, int); 351fc80c0dfSnordmark static void rawip_conn_destructor(void *, void *); 352fc80c0dfSnordmark 353fc80c0dfSnordmark static int rts_conn_constructor(void *, void *, int); 354fc80c0dfSnordmark static void rts_conn_destructor(void *, void *); 3557c478bd9Sstevel@tonic-gate 356*0f1702c5SYu Xiangning static int ip_helper_stream_constructor(void *, void *, int); 357*0f1702c5SYu Xiangning static void ip_helper_stream_destructor(void *, void *); 358*0f1702c5SYu Xiangning 359*0f1702c5SYu Xiangning boolean_t ip_use_helper_cache = B_TRUE; 360*0f1702c5SYu Xiangning 3617c478bd9Sstevel@tonic-gate #ifdef IPCL_DEBUG 3627c478bd9Sstevel@tonic-gate #define INET_NTOA_BUFSIZE 18 3637c478bd9Sstevel@tonic-gate 3647c478bd9Sstevel@tonic-gate static char * 3657c478bd9Sstevel@tonic-gate inet_ntoa_r(uint32_t in, char *b) 3667c478bd9Sstevel@tonic-gate { 3677c478bd9Sstevel@tonic-gate unsigned char *p; 3687c478bd9Sstevel@tonic-gate 3697c478bd9Sstevel@tonic-gate p = (unsigned char *)∈ 3707c478bd9Sstevel@tonic-gate (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 3717c478bd9Sstevel@tonic-gate return (b); 3727c478bd9Sstevel@tonic-gate } 3737c478bd9Sstevel@tonic-gate #endif 3747c478bd9Sstevel@tonic-gate 3757c478bd9Sstevel@tonic-gate /* 376f4b3ec61Sdh * Global (for all stack instances) init routine 3777c478bd9Sstevel@tonic-gate */ 3787c478bd9Sstevel@tonic-gate void 379f4b3ec61Sdh ipcl_g_init(void) 3807c478bd9Sstevel@tonic-gate { 381fc80c0dfSnordmark ip_conn_cache = kmem_cache_create("ip_conn_cache", 3827c478bd9Sstevel@tonic-gate sizeof (conn_t), CACHE_ALIGN_SIZE, 383fc80c0dfSnordmark ip_conn_constructor, ip_conn_destructor, 384fc80c0dfSnordmark NULL, NULL, NULL, 0); 385fc80c0dfSnordmark 386fc80c0dfSnordmark tcp_conn_cache = kmem_cache_create("tcp_conn_cache", 387fc80c0dfSnordmark sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE, 388fc80c0dfSnordmark tcp_conn_constructor, tcp_conn_destructor, 389fc80c0dfSnordmark NULL, NULL, NULL, 0); 390fc80c0dfSnordmark 391fc80c0dfSnordmark udp_conn_cache = kmem_cache_create("udp_conn_cache", 392fc80c0dfSnordmark sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE, 393fc80c0dfSnordmark udp_conn_constructor, udp_conn_destructor, 394fc80c0dfSnordmark NULL, NULL, NULL, 0); 3957c478bd9Sstevel@tonic-gate 396fc80c0dfSnordmark rawip_conn_cache = kmem_cache_create("rawip_conn_cache", 397fc80c0dfSnordmark sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE, 398fc80c0dfSnordmark rawip_conn_constructor, rawip_conn_destructor, 399fc80c0dfSnordmark NULL, NULL, NULL, 0); 400fc80c0dfSnordmark 401fc80c0dfSnordmark rts_conn_cache = kmem_cache_create("rts_conn_cache", 402fc80c0dfSnordmark sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE, 403fc80c0dfSnordmark rts_conn_constructor, rts_conn_destructor, 4047c478bd9Sstevel@tonic-gate NULL, NULL, NULL, 0); 405*0f1702c5SYu Xiangning 406*0f1702c5SYu Xiangning if (ip_use_helper_cache) { 407*0f1702c5SYu Xiangning ip_helper_stream_cache = kmem_cache_create 408*0f1702c5SYu Xiangning ("ip_helper_stream_cache", sizeof (ip_helper_stream_info_t), 409*0f1702c5SYu Xiangning CACHE_ALIGN_SIZE, ip_helper_stream_constructor, 410*0f1702c5SYu Xiangning ip_helper_stream_destructor, NULL, NULL, NULL, 0); 411*0f1702c5SYu Xiangning } else { 412*0f1702c5SYu Xiangning ip_helper_stream_cache = NULL; 413*0f1702c5SYu Xiangning } 414f4b3ec61Sdh } 415f4b3ec61Sdh 416f4b3ec61Sdh /* 417f4b3ec61Sdh * ipclassifier intialization routine, sets up hash tables. 418f4b3ec61Sdh */ 419f4b3ec61Sdh void 420f4b3ec61Sdh ipcl_init(ip_stack_t *ipst) 421f4b3ec61Sdh { 422f4b3ec61Sdh int i; 423f4b3ec61Sdh int sizes[] = P2Ps(); 4247c478bd9Sstevel@tonic-gate 4257c478bd9Sstevel@tonic-gate /* 426f4b3ec61Sdh * Calculate size of conn fanout table from /etc/system settings 4277c478bd9Sstevel@tonic-gate */ 4287c478bd9Sstevel@tonic-gate if (ipcl_conn_hash_size != 0) { 429f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 4307c478bd9Sstevel@tonic-gate } else if (tcp_conn_hash_size != 0) { 431f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 4327c478bd9Sstevel@tonic-gate } else { 4337c478bd9Sstevel@tonic-gate extern pgcnt_t freemem; 4347c478bd9Sstevel@tonic-gate 435f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = 4367c478bd9Sstevel@tonic-gate (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 4377c478bd9Sstevel@tonic-gate 438f4b3ec61Sdh if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 439f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = 440f4b3ec61Sdh ipcl_conn_hash_maxsize; 441f4b3ec61Sdh } 4427c478bd9Sstevel@tonic-gate } 4437c478bd9Sstevel@tonic-gate 4447c478bd9Sstevel@tonic-gate for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 445f4b3ec61Sdh if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 4467c478bd9Sstevel@tonic-gate break; 4477c478bd9Sstevel@tonic-gate } 4487c478bd9Sstevel@tonic-gate } 449f4b3ec61Sdh if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 4507c478bd9Sstevel@tonic-gate /* Out of range, use the 2^16 value */ 451f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = sizes[16]; 4527c478bd9Sstevel@tonic-gate } 4537c478bd9Sstevel@tonic-gate 454f4b3ec61Sdh /* Take values from /etc/system */ 455f4b3ec61Sdh ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 456f4b3ec61Sdh ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 457f4b3ec61Sdh ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 458f4b3ec61Sdh 459f4b3ec61Sdh ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 460f4b3ec61Sdh 461f4b3ec61Sdh ipst->ips_ipcl_conn_fanout = kmem_zalloc( 462f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 463f4b3ec61Sdh 464f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 465f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 4667c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4677c478bd9Sstevel@tonic-gate } 4687c478bd9Sstevel@tonic-gate 469f4b3ec61Sdh ipst->ips_ipcl_bind_fanout = kmem_zalloc( 470f4b3ec61Sdh ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 4717c478bd9Sstevel@tonic-gate 472f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 473f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 4747c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4757c478bd9Sstevel@tonic-gate } 4767c478bd9Sstevel@tonic-gate 477f4b3ec61Sdh ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX * 478f4b3ec61Sdh sizeof (connf_t), KM_SLEEP); 479f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 480f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL, 4817c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4827c478bd9Sstevel@tonic-gate } 483f4b3ec61Sdh 484f4b3ec61Sdh ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 485f4b3ec61Sdh sizeof (connf_t), KM_SLEEP); 486f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 487f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 4887c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4897c478bd9Sstevel@tonic-gate } 4907c478bd9Sstevel@tonic-gate 491f4b3ec61Sdh ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 492f4b3ec61Sdh mutex_init(&ipst->ips_rts_clients->connf_lock, 493f4b3ec61Sdh NULL, MUTEX_DEFAULT, NULL); 4947c478bd9Sstevel@tonic-gate 495f4b3ec61Sdh ipst->ips_ipcl_udp_fanout = kmem_zalloc( 496f4b3ec61Sdh ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 497f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 498f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 4997c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 5007c478bd9Sstevel@tonic-gate } 5017c478bd9Sstevel@tonic-gate 502f4b3ec61Sdh ipst->ips_ipcl_raw_fanout = kmem_zalloc( 503f4b3ec61Sdh ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 504f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 505f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 5067c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 5077c478bd9Sstevel@tonic-gate } 5087c478bd9Sstevel@tonic-gate 509f4b3ec61Sdh ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 510f4b3ec61Sdh sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 5117c478bd9Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 512f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 513f4b3ec61Sdh NULL, MUTEX_DEFAULT, NULL); 5147c478bd9Sstevel@tonic-gate } 5157c478bd9Sstevel@tonic-gate } 5167c478bd9Sstevel@tonic-gate 5177c478bd9Sstevel@tonic-gate void 518f4b3ec61Sdh ipcl_g_destroy(void) 5197c478bd9Sstevel@tonic-gate { 520fc80c0dfSnordmark kmem_cache_destroy(ip_conn_cache); 521fc80c0dfSnordmark kmem_cache_destroy(tcp_conn_cache); 522fc80c0dfSnordmark kmem_cache_destroy(udp_conn_cache); 523fc80c0dfSnordmark kmem_cache_destroy(rawip_conn_cache); 524fc80c0dfSnordmark kmem_cache_destroy(rts_conn_cache); 525f4b3ec61Sdh } 526f4b3ec61Sdh 527f4b3ec61Sdh /* 528f4b3ec61Sdh * All user-level and kernel use of the stack must be gone 529f4b3ec61Sdh * by now. 530f4b3ec61Sdh */ 531f4b3ec61Sdh void 532f4b3ec61Sdh ipcl_destroy(ip_stack_t *ipst) 533f4b3ec61Sdh { 534f4b3ec61Sdh int i; 535f4b3ec61Sdh 536f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 537f4b3ec61Sdh ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 538f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 539f4b3ec61Sdh } 540f4b3ec61Sdh kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 541f4b3ec61Sdh sizeof (connf_t)); 542f4b3ec61Sdh ipst->ips_ipcl_conn_fanout = NULL; 543f4b3ec61Sdh 544f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 545f4b3ec61Sdh ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 546f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 547f4b3ec61Sdh } 548f4b3ec61Sdh kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 549f4b3ec61Sdh sizeof (connf_t)); 550f4b3ec61Sdh ipst->ips_ipcl_bind_fanout = NULL; 551f4b3ec61Sdh 552f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 553f4b3ec61Sdh ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL); 554f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock); 555f4b3ec61Sdh } 556f4b3ec61Sdh kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t)); 557f4b3ec61Sdh ipst->ips_ipcl_proto_fanout = NULL; 558f4b3ec61Sdh 559f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 560f4b3ec61Sdh ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 561f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 562f4b3ec61Sdh } 563f4b3ec61Sdh kmem_free(ipst->ips_ipcl_proto_fanout_v6, 564f4b3ec61Sdh IPPROTO_MAX * sizeof (connf_t)); 565f4b3ec61Sdh ipst->ips_ipcl_proto_fanout_v6 = NULL; 566f4b3ec61Sdh 567f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 568f4b3ec61Sdh ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 569f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 570f4b3ec61Sdh } 571f4b3ec61Sdh kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 572f4b3ec61Sdh sizeof (connf_t)); 573f4b3ec61Sdh ipst->ips_ipcl_udp_fanout = NULL; 574f4b3ec61Sdh 575f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 576f4b3ec61Sdh ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 577f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 578f4b3ec61Sdh } 579f4b3ec61Sdh kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 580f4b3ec61Sdh sizeof (connf_t)); 581f4b3ec61Sdh ipst->ips_ipcl_raw_fanout = NULL; 582f4b3ec61Sdh 583f4b3ec61Sdh for (i = 0; i < CONN_G_HASH_SIZE; i++) { 584f4b3ec61Sdh ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 585f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 586f4b3ec61Sdh } 587f4b3ec61Sdh kmem_free(ipst->ips_ipcl_globalhash_fanout, 588f4b3ec61Sdh sizeof (connf_t) * CONN_G_HASH_SIZE); 589f4b3ec61Sdh ipst->ips_ipcl_globalhash_fanout = NULL; 590f4b3ec61Sdh 591f4b3ec61Sdh ASSERT(ipst->ips_rts_clients->connf_head == NULL); 592f4b3ec61Sdh mutex_destroy(&ipst->ips_rts_clients->connf_lock); 593f4b3ec61Sdh kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 594f4b3ec61Sdh ipst->ips_rts_clients = NULL; 5957c478bd9Sstevel@tonic-gate } 5967c478bd9Sstevel@tonic-gate 5977c478bd9Sstevel@tonic-gate /* 5987c478bd9Sstevel@tonic-gate * conn creation routine. initialize the conn, sets the reference 5997c478bd9Sstevel@tonic-gate * and inserts it in the global hash table. 6007c478bd9Sstevel@tonic-gate */ 6017c478bd9Sstevel@tonic-gate conn_t * 602f4b3ec61Sdh ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 6037c478bd9Sstevel@tonic-gate { 6047c478bd9Sstevel@tonic-gate conn_t *connp; 605f4b3ec61Sdh sctp_stack_t *sctps; 606fc80c0dfSnordmark struct kmem_cache *conn_cache; 6077c478bd9Sstevel@tonic-gate 6087c478bd9Sstevel@tonic-gate switch (type) { 6097c478bd9Sstevel@tonic-gate case IPCL_SCTPCONN: 6107c478bd9Sstevel@tonic-gate if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 6117c478bd9Sstevel@tonic-gate return (NULL); 612121e5416Skcpoon sctp_conn_init(connp); 613f4b3ec61Sdh sctps = ns->netstack_sctp; 614f4b3ec61Sdh SCTP_G_Q_REFHOLD(sctps); 615f4b3ec61Sdh netstack_hold(ns); 616f4b3ec61Sdh connp->conn_netstack = ns; 617fc80c0dfSnordmark return (connp); 618fc80c0dfSnordmark 619fc80c0dfSnordmark case IPCL_TCPCONN: 620fc80c0dfSnordmark conn_cache = tcp_conn_cache; 6217c478bd9Sstevel@tonic-gate break; 622fc80c0dfSnordmark 623fc80c0dfSnordmark case IPCL_UDPCONN: 624fc80c0dfSnordmark conn_cache = udp_conn_cache; 625fc80c0dfSnordmark break; 626fc80c0dfSnordmark 627fc80c0dfSnordmark case IPCL_RAWIPCONN: 628fc80c0dfSnordmark conn_cache = rawip_conn_cache; 629fc80c0dfSnordmark break; 630fc80c0dfSnordmark 631fc80c0dfSnordmark case IPCL_RTSCONN: 632fc80c0dfSnordmark conn_cache = rts_conn_cache; 633fc80c0dfSnordmark break; 634fc80c0dfSnordmark 6357c478bd9Sstevel@tonic-gate case IPCL_IPCCONN: 636fc80c0dfSnordmark conn_cache = ip_conn_cache; 6377c478bd9Sstevel@tonic-gate break; 638fc80c0dfSnordmark 639ff550d0eSmasputra default: 640ff550d0eSmasputra connp = NULL; 641ff550d0eSmasputra ASSERT(0); 6427c478bd9Sstevel@tonic-gate } 6437c478bd9Sstevel@tonic-gate 644fc80c0dfSnordmark if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL) 645fc80c0dfSnordmark return (NULL); 646fc80c0dfSnordmark 647fc80c0dfSnordmark connp->conn_ref = 1; 648fc80c0dfSnordmark netstack_hold(ns); 649fc80c0dfSnordmark connp->conn_netstack = ns; 650fc80c0dfSnordmark ipcl_globalhash_insert(connp); 6517c478bd9Sstevel@tonic-gate return (connp); 6527c478bd9Sstevel@tonic-gate } 6537c478bd9Sstevel@tonic-gate 6547c478bd9Sstevel@tonic-gate void 6557c478bd9Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp) 6567c478bd9Sstevel@tonic-gate { 6577c478bd9Sstevel@tonic-gate mblk_t *mp; 658f4b3ec61Sdh netstack_t *ns = connp->conn_netstack; 6597c478bd9Sstevel@tonic-gate 6607c478bd9Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&connp->conn_lock)); 6617c478bd9Sstevel@tonic-gate ASSERT(connp->conn_ref == 0); 6627c478bd9Sstevel@tonic-gate ASSERT(connp->conn_ire_cache == NULL); 6637c478bd9Sstevel@tonic-gate 664fab254e2SAruna Ramakrishna DTRACE_PROBE1(conn__destroy, conn_t *, connp); 665fab254e2SAruna Ramakrishna 66645916cd2Sjpk if (connp->conn_peercred != NULL && 66745916cd2Sjpk connp->conn_peercred != connp->conn_cred) 66845916cd2Sjpk crfree(connp->conn_peercred); 66945916cd2Sjpk connp->conn_peercred = NULL; 67045916cd2Sjpk 67145916cd2Sjpk if (connp->conn_cred != NULL) { 67245916cd2Sjpk crfree(connp->conn_cred); 67345916cd2Sjpk connp->conn_cred = NULL; 67445916cd2Sjpk } 67545916cd2Sjpk 6767c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(connp); 6777c478bd9Sstevel@tonic-gate 678fc80c0dfSnordmark /* FIXME: add separate tcp_conn_free()? */ 6797c478bd9Sstevel@tonic-gate if (connp->conn_flags & IPCL_TCPCONN) { 680ff550d0eSmasputra tcp_t *tcp = connp->conn_tcp; 681f4b3ec61Sdh tcp_stack_t *tcps; 682f4b3ec61Sdh 683f4b3ec61Sdh ASSERT(tcp != NULL); 684f4b3ec61Sdh tcps = tcp->tcp_tcps; 685f4b3ec61Sdh if (tcps != NULL) { 686f4b3ec61Sdh if (connp->conn_latch != NULL) { 687f4b3ec61Sdh IPLATCH_REFRELE(connp->conn_latch, ns); 688f4b3ec61Sdh connp->conn_latch = NULL; 689f4b3ec61Sdh } 690f4b3ec61Sdh if (connp->conn_policy != NULL) { 691f4b3ec61Sdh IPPH_REFRELE(connp->conn_policy, ns); 692f4b3ec61Sdh connp->conn_policy = NULL; 693f4b3ec61Sdh } 694f4b3ec61Sdh tcp->tcp_tcps = NULL; 695f4b3ec61Sdh TCPS_REFRELE(tcps); 696f4b3ec61Sdh } 697ff550d0eSmasputra 6987c478bd9Sstevel@tonic-gate tcp_free(tcp); 6997c478bd9Sstevel@tonic-gate mp = tcp->tcp_timercache; 70045916cd2Sjpk tcp->tcp_cred = NULL; 7017c478bd9Sstevel@tonic-gate 7027c478bd9Sstevel@tonic-gate if (tcp->tcp_sack_info != NULL) { 7037c478bd9Sstevel@tonic-gate bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 7047c478bd9Sstevel@tonic-gate kmem_cache_free(tcp_sack_info_cache, 7057c478bd9Sstevel@tonic-gate tcp->tcp_sack_info); 7067c478bd9Sstevel@tonic-gate } 7077c478bd9Sstevel@tonic-gate if (tcp->tcp_iphc != NULL) { 7087c478bd9Sstevel@tonic-gate if (tcp->tcp_hdr_grown) { 7097c478bd9Sstevel@tonic-gate kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 7107c478bd9Sstevel@tonic-gate } else { 7117c478bd9Sstevel@tonic-gate bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 7127c478bd9Sstevel@tonic-gate kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 7137c478bd9Sstevel@tonic-gate } 7147c478bd9Sstevel@tonic-gate tcp->tcp_iphc_len = 0; 7157c478bd9Sstevel@tonic-gate } 7167c478bd9Sstevel@tonic-gate ASSERT(tcp->tcp_iphc_len == 0); 7177c478bd9Sstevel@tonic-gate 718f7f8e53dSKacheong Poon /* 719f7f8e53dSKacheong Poon * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate 720f7f8e53dSKacheong Poon * the mblk. 721f7f8e53dSKacheong Poon */ 722f7f8e53dSKacheong Poon if (tcp->tcp_rsrv_mp != NULL) { 723f7f8e53dSKacheong Poon freeb(tcp->tcp_rsrv_mp); 724f7f8e53dSKacheong Poon tcp->tcp_rsrv_mp = NULL; 725f7f8e53dSKacheong Poon mutex_destroy(&tcp->tcp_rsrv_mp_lock); 726f7f8e53dSKacheong Poon } 727f7f8e53dSKacheong Poon 728f4b3ec61Sdh ASSERT(connp->conn_latch == NULL); 729f4b3ec61Sdh ASSERT(connp->conn_policy == NULL); 730f4b3ec61Sdh 731f4b3ec61Sdh if (ns != NULL) { 732f4b3ec61Sdh ASSERT(tcp->tcp_tcps == NULL); 733f4b3ec61Sdh connp->conn_netstack = NULL; 734f4b3ec61Sdh netstack_rele(ns); 735f4b3ec61Sdh } 736fc80c0dfSnordmark 737fc80c0dfSnordmark ipcl_conn_cleanup(connp); 738fc80c0dfSnordmark connp->conn_flags = IPCL_TCPCONN; 739fc80c0dfSnordmark bzero(tcp, sizeof (tcp_t)); 740fc80c0dfSnordmark 741fc80c0dfSnordmark tcp->tcp_timercache = mp; 742fc80c0dfSnordmark tcp->tcp_connp = connp; 743fc80c0dfSnordmark kmem_cache_free(tcp_conn_cache, connp); 744fc80c0dfSnordmark return; 745fc80c0dfSnordmark } 746fc80c0dfSnordmark if (connp->conn_latch != NULL) { 747fc80c0dfSnordmark IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack); 748fc80c0dfSnordmark connp->conn_latch = NULL; 749fc80c0dfSnordmark } 750fc80c0dfSnordmark if (connp->conn_policy != NULL) { 751fc80c0dfSnordmark IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); 752fc80c0dfSnordmark connp->conn_policy = NULL; 753fc80c0dfSnordmark } 754fc80c0dfSnordmark if (connp->conn_ipsec_opt_mp != NULL) { 755fc80c0dfSnordmark freemsg(connp->conn_ipsec_opt_mp); 756fc80c0dfSnordmark connp->conn_ipsec_opt_mp = NULL; 757fc80c0dfSnordmark } 758fc80c0dfSnordmark 759fc80c0dfSnordmark if (connp->conn_flags & IPCL_SCTPCONN) { 760f4b3ec61Sdh ASSERT(ns != NULL); 7617c478bd9Sstevel@tonic-gate sctp_free(connp); 762fc80c0dfSnordmark return; 763fc80c0dfSnordmark } 764fc80c0dfSnordmark 765fc80c0dfSnordmark if (ns != NULL) { 766fc80c0dfSnordmark connp->conn_netstack = NULL; 767fc80c0dfSnordmark netstack_rele(ns); 768fc80c0dfSnordmark } 769*0f1702c5SYu Xiangning 770fc80c0dfSnordmark ipcl_conn_cleanup(connp); 771fc80c0dfSnordmark 772fc80c0dfSnordmark /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */ 773fc80c0dfSnordmark if (connp->conn_flags & IPCL_UDPCONN) { 774fc80c0dfSnordmark connp->conn_flags = IPCL_UDPCONN; 775fc80c0dfSnordmark kmem_cache_free(udp_conn_cache, connp); 776fc80c0dfSnordmark } else if (connp->conn_flags & IPCL_RAWIPCONN) { 777*0f1702c5SYu Xiangning 778fc80c0dfSnordmark connp->conn_flags = IPCL_RAWIPCONN; 779fc80c0dfSnordmark connp->conn_ulp = IPPROTO_ICMP; 780fc80c0dfSnordmark kmem_cache_free(rawip_conn_cache, connp); 781fc80c0dfSnordmark } else if (connp->conn_flags & IPCL_RTSCONN) { 782fc80c0dfSnordmark connp->conn_flags = IPCL_RTSCONN; 783fc80c0dfSnordmark kmem_cache_free(rts_conn_cache, connp); 7847c478bd9Sstevel@tonic-gate } else { 785fc80c0dfSnordmark connp->conn_flags = IPCL_IPCCONN; 786fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 787fc80c0dfSnordmark ASSERT(connp->conn_priv == NULL); 788fc80c0dfSnordmark kmem_cache_free(ip_conn_cache, connp); 7897c478bd9Sstevel@tonic-gate } 7907c478bd9Sstevel@tonic-gate } 7917c478bd9Sstevel@tonic-gate 7927c478bd9Sstevel@tonic-gate /* 7937c478bd9Sstevel@tonic-gate * Running in cluster mode - deregister listener information 7947c478bd9Sstevel@tonic-gate */ 7957c478bd9Sstevel@tonic-gate 7967c478bd9Sstevel@tonic-gate static void 7977c478bd9Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp) 7987c478bd9Sstevel@tonic-gate { 7997c478bd9Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 8007c478bd9Sstevel@tonic-gate ASSERT(connp->conn_lport != 0); 8017c478bd9Sstevel@tonic-gate 8027c478bd9Sstevel@tonic-gate if (cl_inet_unlisten != NULL) { 8037c478bd9Sstevel@tonic-gate sa_family_t addr_family; 8047c478bd9Sstevel@tonic-gate uint8_t *laddrp; 8057c478bd9Sstevel@tonic-gate 8067c478bd9Sstevel@tonic-gate if (connp->conn_pkt_isv6) { 8077c478bd9Sstevel@tonic-gate addr_family = AF_INET6; 8087c478bd9Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source_v6; 8097c478bd9Sstevel@tonic-gate } else { 8107c478bd9Sstevel@tonic-gate addr_family = AF_INET; 8117c478bd9Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source; 8127c478bd9Sstevel@tonic-gate } 8137c478bd9Sstevel@tonic-gate (*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp, 8147c478bd9Sstevel@tonic-gate connp->conn_lport); 8157c478bd9Sstevel@tonic-gate } 8167c478bd9Sstevel@tonic-gate connp->conn_flags &= ~IPCL_CL_LISTENER; 8177c478bd9Sstevel@tonic-gate } 8187c478bd9Sstevel@tonic-gate 8197c478bd9Sstevel@tonic-gate /* 8207c478bd9Sstevel@tonic-gate * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 8217c478bd9Sstevel@tonic-gate * which table the conn belonged to). So for debugging we can see which hash 8227c478bd9Sstevel@tonic-gate * table this connection was in. 8237c478bd9Sstevel@tonic-gate */ 8247c478bd9Sstevel@tonic-gate #define IPCL_HASH_REMOVE(connp) { \ 8257c478bd9Sstevel@tonic-gate connf_t *connfp = (connp)->conn_fanout; \ 8267c478bd9Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 8277c478bd9Sstevel@tonic-gate if (connfp != NULL) { \ 8287c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 8297c478bd9Sstevel@tonic-gate (void *)(connp))); \ 8307c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); \ 8317c478bd9Sstevel@tonic-gate if ((connp)->conn_next != NULL) \ 8327c478bd9Sstevel@tonic-gate (connp)->conn_next->conn_prev = \ 8337c478bd9Sstevel@tonic-gate (connp)->conn_prev; \ 8347c478bd9Sstevel@tonic-gate if ((connp)->conn_prev != NULL) \ 8357c478bd9Sstevel@tonic-gate (connp)->conn_prev->conn_next = \ 8367c478bd9Sstevel@tonic-gate (connp)->conn_next; \ 8377c478bd9Sstevel@tonic-gate else \ 8387c478bd9Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; \ 8397c478bd9Sstevel@tonic-gate (connp)->conn_fanout = NULL; \ 8407c478bd9Sstevel@tonic-gate (connp)->conn_next = NULL; \ 8417c478bd9Sstevel@tonic-gate (connp)->conn_prev = NULL; \ 8427c478bd9Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; \ 8437c478bd9Sstevel@tonic-gate if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 8447c478bd9Sstevel@tonic-gate ipcl_conn_unlisten((connp)); \ 8457c478bd9Sstevel@tonic-gate CONN_DEC_REF((connp)); \ 8467c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); \ 8477c478bd9Sstevel@tonic-gate } \ 8487c478bd9Sstevel@tonic-gate } 8497c478bd9Sstevel@tonic-gate 8507c478bd9Sstevel@tonic-gate void 8517c478bd9Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp) 8527c478bd9Sstevel@tonic-gate { 8537c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 8547c478bd9Sstevel@tonic-gate } 8557c478bd9Sstevel@tonic-gate 8567c478bd9Sstevel@tonic-gate /* 8577c478bd9Sstevel@tonic-gate * The whole purpose of this function is allow removal of 8587c478bd9Sstevel@tonic-gate * a conn_t from the connected hash for timewait reclaim. 8597c478bd9Sstevel@tonic-gate * This is essentially a TW reclaim fastpath where timewait 8607c478bd9Sstevel@tonic-gate * collector checks under fanout lock (so no one else can 8617c478bd9Sstevel@tonic-gate * get access to the conn_t) that refcnt is 2 i.e. one for 8627c478bd9Sstevel@tonic-gate * TCP and one for the classifier hash list. If ref count 8637c478bd9Sstevel@tonic-gate * is indeed 2, we can just remove the conn under lock and 8647c478bd9Sstevel@tonic-gate * avoid cleaning up the conn under squeue. This gives us 8657c478bd9Sstevel@tonic-gate * improved performance. 8667c478bd9Sstevel@tonic-gate */ 8677c478bd9Sstevel@tonic-gate void 8687c478bd9Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 8697c478bd9Sstevel@tonic-gate { 8707c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connfp->connf_lock)); 8717c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 8727c478bd9Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 8737c478bd9Sstevel@tonic-gate 8747c478bd9Sstevel@tonic-gate if ((connp)->conn_next != NULL) { 875121e5416Skcpoon (connp)->conn_next->conn_prev = (connp)->conn_prev; 8767c478bd9Sstevel@tonic-gate } 8777c478bd9Sstevel@tonic-gate if ((connp)->conn_prev != NULL) { 878121e5416Skcpoon (connp)->conn_prev->conn_next = (connp)->conn_next; 8797c478bd9Sstevel@tonic-gate } else { 8807c478bd9Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; 8817c478bd9Sstevel@tonic-gate } 8827c478bd9Sstevel@tonic-gate (connp)->conn_fanout = NULL; 8837c478bd9Sstevel@tonic-gate (connp)->conn_next = NULL; 8847c478bd9Sstevel@tonic-gate (connp)->conn_prev = NULL; 8857c478bd9Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; 8867c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_ref == 2); 8877c478bd9Sstevel@tonic-gate (connp)->conn_ref--; 8887c478bd9Sstevel@tonic-gate } 8897c478bd9Sstevel@tonic-gate 8907c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 8917c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_fanout == NULL); \ 8927c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_next == NULL); \ 8937c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_prev == NULL); \ 8947c478bd9Sstevel@tonic-gate if ((connfp)->connf_head != NULL) { \ 8957c478bd9Sstevel@tonic-gate (connfp)->connf_head->conn_prev = (connp); \ 8967c478bd9Sstevel@tonic-gate (connp)->conn_next = (connfp)->connf_head; \ 8977c478bd9Sstevel@tonic-gate } \ 8987c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 8997c478bd9Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 9007c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 9017c478bd9Sstevel@tonic-gate IPCL_CONNECTED; \ 9027c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); \ 9037c478bd9Sstevel@tonic-gate } 9047c478bd9Sstevel@tonic-gate 9057c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 9067c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 9077c478bd9Sstevel@tonic-gate "connp %p", (void *)(connfp), (void *)(connp))); \ 9087c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 9097c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 9107c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 9117c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9127c478bd9Sstevel@tonic-gate } 9137c478bd9Sstevel@tonic-gate 9147c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 9157c478bd9Sstevel@tonic-gate conn_t *pconnp = NULL, *nconnp; \ 9167c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 9177c478bd9Sstevel@tonic-gate "connp %p", (void *)connfp, (void *)(connp))); \ 9187c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 9197c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 9207c478bd9Sstevel@tonic-gate nconnp = (connfp)->connf_head; \ 9213d1c78fbSethindra while (nconnp != NULL && \ 9223d1c78fbSethindra !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ 9233d1c78fbSethindra pconnp = nconnp; \ 9243d1c78fbSethindra nconnp = nconnp->conn_next; \ 9257c478bd9Sstevel@tonic-gate } \ 9267c478bd9Sstevel@tonic-gate if (pconnp != NULL) { \ 9277c478bd9Sstevel@tonic-gate pconnp->conn_next = (connp); \ 9287c478bd9Sstevel@tonic-gate (connp)->conn_prev = pconnp; \ 9297c478bd9Sstevel@tonic-gate } else { \ 9307c478bd9Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 9317c478bd9Sstevel@tonic-gate } \ 9327c478bd9Sstevel@tonic-gate if (nconnp != NULL) { \ 9337c478bd9Sstevel@tonic-gate (connp)->conn_next = nconnp; \ 9347c478bd9Sstevel@tonic-gate nconnp->conn_prev = (connp); \ 9357c478bd9Sstevel@tonic-gate } \ 9367c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 9377c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 9387c478bd9Sstevel@tonic-gate IPCL_BOUND; \ 9397c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); \ 9407c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9417c478bd9Sstevel@tonic-gate } 9427c478bd9Sstevel@tonic-gate 9437c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 9447c478bd9Sstevel@tonic-gate conn_t **list, *prev, *next; \ 9457c478bd9Sstevel@tonic-gate boolean_t isv4mapped = \ 9467c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 9477c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 9487c478bd9Sstevel@tonic-gate "connp %p", (void *)(connfp), (void *)(connp))); \ 9497c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 9507c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 9517c478bd9Sstevel@tonic-gate list = &(connfp)->connf_head; \ 9527c478bd9Sstevel@tonic-gate prev = NULL; \ 9537c478bd9Sstevel@tonic-gate while ((next = *list) != NULL) { \ 9547c478bd9Sstevel@tonic-gate if (isv4mapped && \ 9557c478bd9Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 9567c478bd9Sstevel@tonic-gate connp->conn_zoneid == next->conn_zoneid) { \ 9577c478bd9Sstevel@tonic-gate (connp)->conn_next = next; \ 9587c478bd9Sstevel@tonic-gate if (prev != NULL) \ 9597c478bd9Sstevel@tonic-gate prev = next->conn_prev; \ 9607c478bd9Sstevel@tonic-gate next->conn_prev = (connp); \ 9617c478bd9Sstevel@tonic-gate break; \ 9627c478bd9Sstevel@tonic-gate } \ 9637c478bd9Sstevel@tonic-gate list = &next->conn_next; \ 9647c478bd9Sstevel@tonic-gate prev = next; \ 9657c478bd9Sstevel@tonic-gate } \ 9667c478bd9Sstevel@tonic-gate (connp)->conn_prev = prev; \ 9677c478bd9Sstevel@tonic-gate *list = (connp); \ 9687c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 9697c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 9707c478bd9Sstevel@tonic-gate IPCL_BOUND; \ 9717c478bd9Sstevel@tonic-gate CONN_INC_REF((connp)); \ 9727c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9737c478bd9Sstevel@tonic-gate } 9747c478bd9Sstevel@tonic-gate 9757c478bd9Sstevel@tonic-gate void 9767c478bd9Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 9777c478bd9Sstevel@tonic-gate { 9787c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9797c478bd9Sstevel@tonic-gate } 9807c478bd9Sstevel@tonic-gate 9817c478bd9Sstevel@tonic-gate void 9827c478bd9Sstevel@tonic-gate ipcl_proto_insert(conn_t *connp, uint8_t protocol) 9837c478bd9Sstevel@tonic-gate { 9847c478bd9Sstevel@tonic-gate connf_t *connfp; 985f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 9867c478bd9Sstevel@tonic-gate 9877c478bd9Sstevel@tonic-gate ASSERT(connp != NULL); 98845916cd2Sjpk ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 98945916cd2Sjpk protocol == IPPROTO_ESP); 9907c478bd9Sstevel@tonic-gate 9917c478bd9Sstevel@tonic-gate connp->conn_ulp = protocol; 9927c478bd9Sstevel@tonic-gate 9937c478bd9Sstevel@tonic-gate /* Insert it in the protocol hash */ 994f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 9957c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9967c478bd9Sstevel@tonic-gate } 9977c478bd9Sstevel@tonic-gate 9987c478bd9Sstevel@tonic-gate void 9997c478bd9Sstevel@tonic-gate ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 10007c478bd9Sstevel@tonic-gate { 10017c478bd9Sstevel@tonic-gate connf_t *connfp; 1002f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 10037c478bd9Sstevel@tonic-gate 10047c478bd9Sstevel@tonic-gate ASSERT(connp != NULL); 100545916cd2Sjpk ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 100645916cd2Sjpk protocol == IPPROTO_ESP); 10077c478bd9Sstevel@tonic-gate 10087c478bd9Sstevel@tonic-gate connp->conn_ulp = protocol; 10097c478bd9Sstevel@tonic-gate 10107c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */ 1011f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 10127c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10137c478bd9Sstevel@tonic-gate } 10147c478bd9Sstevel@tonic-gate 10157c478bd9Sstevel@tonic-gate /* 10167c478bd9Sstevel@tonic-gate * This function is used only for inserting SCTP raw socket now. 10177c478bd9Sstevel@tonic-gate * This may change later. 10187c478bd9Sstevel@tonic-gate * 10197c478bd9Sstevel@tonic-gate * Note that only one raw socket can be bound to a port. The param 10207c478bd9Sstevel@tonic-gate * lport is in network byte order. 10217c478bd9Sstevel@tonic-gate */ 10227c478bd9Sstevel@tonic-gate static int 10237c478bd9Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 10247c478bd9Sstevel@tonic-gate { 10257c478bd9Sstevel@tonic-gate connf_t *connfp; 10267c478bd9Sstevel@tonic-gate conn_t *oconnp; 1027f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 10287c478bd9Sstevel@tonic-gate 1029f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 10307c478bd9Sstevel@tonic-gate 10317c478bd9Sstevel@tonic-gate /* Check for existing raw socket already bound to the port. */ 10327c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 10337c478bd9Sstevel@tonic-gate for (oconnp = connfp->connf_head; oconnp != NULL; 10347c0c0508Skcpoon oconnp = oconnp->conn_next) { 10357c478bd9Sstevel@tonic-gate if (oconnp->conn_lport == lport && 10367c478bd9Sstevel@tonic-gate oconnp->conn_zoneid == connp->conn_zoneid && 10377c478bd9Sstevel@tonic-gate oconnp->conn_af_isv6 == connp->conn_af_isv6 && 10387c478bd9Sstevel@tonic-gate ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 10397c478bd9Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 10407c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 10417c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 10427c478bd9Sstevel@tonic-gate IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 10437c478bd9Sstevel@tonic-gate &connp->conn_srcv6))) { 10447c478bd9Sstevel@tonic-gate break; 10457c478bd9Sstevel@tonic-gate } 10467c478bd9Sstevel@tonic-gate } 10477c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 10487c478bd9Sstevel@tonic-gate if (oconnp != NULL) 10497c478bd9Sstevel@tonic-gate return (EADDRNOTAVAIL); 10507c478bd9Sstevel@tonic-gate 10517c478bd9Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 10527c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 10537c478bd9Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 10547c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 10557c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10567c478bd9Sstevel@tonic-gate } else { 10577c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 10587c478bd9Sstevel@tonic-gate } 10597c478bd9Sstevel@tonic-gate } else { 10607c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 10617c478bd9Sstevel@tonic-gate } 10627c478bd9Sstevel@tonic-gate return (0); 10637c478bd9Sstevel@tonic-gate } 10647c478bd9Sstevel@tonic-gate 106545916cd2Sjpk /* 106645916cd2Sjpk * Check for a MAC exemption conflict on a labeled system. Note that for 106745916cd2Sjpk * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 106845916cd2Sjpk * transport layer. This check is for binding all other protocols. 106945916cd2Sjpk * 107045916cd2Sjpk * Returns true if there's a conflict. 107145916cd2Sjpk */ 107245916cd2Sjpk static boolean_t 1073f4b3ec61Sdh check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 107445916cd2Sjpk { 107545916cd2Sjpk connf_t *connfp; 107645916cd2Sjpk conn_t *tconn; 107745916cd2Sjpk 1078f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 107945916cd2Sjpk mutex_enter(&connfp->connf_lock); 108045916cd2Sjpk for (tconn = connfp->connf_head; tconn != NULL; 108145916cd2Sjpk tconn = tconn->conn_next) { 108245916cd2Sjpk /* We don't allow v4 fallback for v6 raw socket */ 108345916cd2Sjpk if (connp->conn_af_isv6 != tconn->conn_af_isv6) 108445916cd2Sjpk continue; 108545916cd2Sjpk /* If neither is exempt, then there's no conflict */ 108645916cd2Sjpk if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 108745916cd2Sjpk continue; 108845916cd2Sjpk /* If both are bound to different specific addrs, ok */ 108945916cd2Sjpk if (connp->conn_src != INADDR_ANY && 109045916cd2Sjpk tconn->conn_src != INADDR_ANY && 109145916cd2Sjpk connp->conn_src != tconn->conn_src) 109245916cd2Sjpk continue; 109345916cd2Sjpk /* These two conflict; fail */ 109445916cd2Sjpk break; 109545916cd2Sjpk } 109645916cd2Sjpk mutex_exit(&connfp->connf_lock); 109745916cd2Sjpk return (tconn != NULL); 109845916cd2Sjpk } 109945916cd2Sjpk 110045916cd2Sjpk static boolean_t 1101f4b3ec61Sdh check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 110245916cd2Sjpk { 110345916cd2Sjpk connf_t *connfp; 110445916cd2Sjpk conn_t *tconn; 110545916cd2Sjpk 1106f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 110745916cd2Sjpk mutex_enter(&connfp->connf_lock); 110845916cd2Sjpk for (tconn = connfp->connf_head; tconn != NULL; 110945916cd2Sjpk tconn = tconn->conn_next) { 111045916cd2Sjpk /* We don't allow v4 fallback for v6 raw socket */ 111145916cd2Sjpk if (connp->conn_af_isv6 != tconn->conn_af_isv6) 111245916cd2Sjpk continue; 111345916cd2Sjpk /* If neither is exempt, then there's no conflict */ 111445916cd2Sjpk if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 111545916cd2Sjpk continue; 111645916cd2Sjpk /* If both are bound to different addrs, ok */ 111745916cd2Sjpk if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) && 111845916cd2Sjpk !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) && 111945916cd2Sjpk !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6)) 112045916cd2Sjpk continue; 112145916cd2Sjpk /* These two conflict; fail */ 112245916cd2Sjpk break; 112345916cd2Sjpk } 112445916cd2Sjpk mutex_exit(&connfp->connf_lock); 112545916cd2Sjpk return (tconn != NULL); 112645916cd2Sjpk } 112745916cd2Sjpk 11287c478bd9Sstevel@tonic-gate /* 11297c478bd9Sstevel@tonic-gate * (v4, v6) bind hash insertion routines 11307c478bd9Sstevel@tonic-gate */ 11317c478bd9Sstevel@tonic-gate int 11327c478bd9Sstevel@tonic-gate ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 11337c478bd9Sstevel@tonic-gate { 11347c478bd9Sstevel@tonic-gate connf_t *connfp; 11357c478bd9Sstevel@tonic-gate #ifdef IPCL_DEBUG 11367c478bd9Sstevel@tonic-gate char buf[INET_NTOA_BUFSIZE]; 11377c478bd9Sstevel@tonic-gate #endif 11387c478bd9Sstevel@tonic-gate int ret = 0; 1139f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 11407c478bd9Sstevel@tonic-gate 11417c478bd9Sstevel@tonic-gate ASSERT(connp); 11427c478bd9Sstevel@tonic-gate 11437c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 11447c478bd9Sstevel@tonic-gate "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 11457c478bd9Sstevel@tonic-gate 11467c478bd9Sstevel@tonic-gate connp->conn_ulp = protocol; 11477c478bd9Sstevel@tonic-gate IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 11487c478bd9Sstevel@tonic-gate connp->conn_lport = lport; 11497c478bd9Sstevel@tonic-gate 11507c478bd9Sstevel@tonic-gate switch (protocol) { 11517c478bd9Sstevel@tonic-gate default: 1152f4b3ec61Sdh if (is_system_labeled() && 1153f4b3ec61Sdh check_exempt_conflict_v4(connp, ipst)) 115445916cd2Sjpk return (EADDRINUSE); 115545916cd2Sjpk /* FALLTHROUGH */ 115645916cd2Sjpk case IPPROTO_UDP: 11577c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 11587c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(64, 11597c478bd9Sstevel@tonic-gate ("ipcl_bind_insert: connp %p - udp\n", 11607c478bd9Sstevel@tonic-gate (void *)connp)); 1161f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1162f4b3ec61Sdh IPCL_UDP_HASH(lport, ipst)]; 11637c478bd9Sstevel@tonic-gate } else { 11647c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(64, 11657c478bd9Sstevel@tonic-gate ("ipcl_bind_insert: connp %p - protocol\n", 11667c478bd9Sstevel@tonic-gate (void *)connp)); 1167f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 11687c478bd9Sstevel@tonic-gate } 11697c478bd9Sstevel@tonic-gate 11707c478bd9Sstevel@tonic-gate if (connp->conn_rem != INADDR_ANY) { 11717c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 11727c478bd9Sstevel@tonic-gate } else if (connp->conn_src != INADDR_ANY) { 11737c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 11747c478bd9Sstevel@tonic-gate } else { 11757c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 11767c478bd9Sstevel@tonic-gate } 11777c478bd9Sstevel@tonic-gate break; 11787c478bd9Sstevel@tonic-gate 11797c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 11807c478bd9Sstevel@tonic-gate 11817c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */ 118245916cd2Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 1183f4b3ec61Sdh connfp = &ipst->ips_ipcl_bind_fanout[ 1184f4b3ec61Sdh IPCL_BIND_HASH(lport, ipst)]; 11857c478bd9Sstevel@tonic-gate if (connp->conn_src != INADDR_ANY) { 11867c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 11877c478bd9Sstevel@tonic-gate } else { 11887c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 11897c478bd9Sstevel@tonic-gate } 11907c478bd9Sstevel@tonic-gate if (cl_inet_listen != NULL) { 11917c478bd9Sstevel@tonic-gate ASSERT(!connp->conn_pkt_isv6); 11927c478bd9Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 11937c478bd9Sstevel@tonic-gate (*cl_inet_listen)(IPPROTO_TCP, AF_INET, 11947c478bd9Sstevel@tonic-gate (uint8_t *)&connp->conn_bound_source, lport); 11957c478bd9Sstevel@tonic-gate } 11967c478bd9Sstevel@tonic-gate break; 11977c478bd9Sstevel@tonic-gate 11987c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 11997c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 12007c478bd9Sstevel@tonic-gate break; 12017c478bd9Sstevel@tonic-gate } 12027c478bd9Sstevel@tonic-gate 12037c478bd9Sstevel@tonic-gate return (ret); 12047c478bd9Sstevel@tonic-gate } 12057c478bd9Sstevel@tonic-gate 12067c478bd9Sstevel@tonic-gate int 12077c478bd9Sstevel@tonic-gate ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 12087c478bd9Sstevel@tonic-gate uint16_t lport) 12097c478bd9Sstevel@tonic-gate { 12107c478bd9Sstevel@tonic-gate connf_t *connfp; 12117c478bd9Sstevel@tonic-gate int ret = 0; 1212f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12137c478bd9Sstevel@tonic-gate 12147c478bd9Sstevel@tonic-gate ASSERT(connp); 12157c478bd9Sstevel@tonic-gate 12167c478bd9Sstevel@tonic-gate connp->conn_ulp = protocol; 12177c478bd9Sstevel@tonic-gate connp->conn_srcv6 = *src; 12187c478bd9Sstevel@tonic-gate connp->conn_lport = lport; 12197c478bd9Sstevel@tonic-gate 12207c478bd9Sstevel@tonic-gate switch (protocol) { 12217c478bd9Sstevel@tonic-gate default: 1222f4b3ec61Sdh if (is_system_labeled() && 1223f4b3ec61Sdh check_exempt_conflict_v6(connp, ipst)) 122445916cd2Sjpk return (EADDRINUSE); 122545916cd2Sjpk /* FALLTHROUGH */ 122645916cd2Sjpk case IPPROTO_UDP: 12277c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 12287c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(128, 12297c478bd9Sstevel@tonic-gate ("ipcl_bind_insert_v6: connp %p - udp\n", 12307c478bd9Sstevel@tonic-gate (void *)connp)); 1231f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1232f4b3ec61Sdh IPCL_UDP_HASH(lport, ipst)]; 12337c478bd9Sstevel@tonic-gate } else { 12347c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(128, 12357c478bd9Sstevel@tonic-gate ("ipcl_bind_insert_v6: connp %p - protocol\n", 12367c478bd9Sstevel@tonic-gate (void *)connp)); 1237f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 12387c478bd9Sstevel@tonic-gate } 12397c478bd9Sstevel@tonic-gate 12407c478bd9Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 12417c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 12427c478bd9Sstevel@tonic-gate } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 12437c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12447c478bd9Sstevel@tonic-gate } else { 12457c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12467c478bd9Sstevel@tonic-gate } 12477c478bd9Sstevel@tonic-gate break; 12487c478bd9Sstevel@tonic-gate 12497c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 12507c478bd9Sstevel@tonic-gate /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 12517c478bd9Sstevel@tonic-gate 12527c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */ 125345916cd2Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 1254f4b3ec61Sdh connfp = &ipst->ips_ipcl_bind_fanout[ 1255f4b3ec61Sdh IPCL_BIND_HASH(lport, ipst)]; 12567c478bd9Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 12577c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12587c478bd9Sstevel@tonic-gate } else { 12597c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12607c478bd9Sstevel@tonic-gate } 12617c478bd9Sstevel@tonic-gate if (cl_inet_listen != NULL) { 12627c478bd9Sstevel@tonic-gate sa_family_t addr_family; 12637c478bd9Sstevel@tonic-gate uint8_t *laddrp; 12647c478bd9Sstevel@tonic-gate 12657c478bd9Sstevel@tonic-gate if (connp->conn_pkt_isv6) { 12667c478bd9Sstevel@tonic-gate addr_family = AF_INET6; 12677c478bd9Sstevel@tonic-gate laddrp = 12687c478bd9Sstevel@tonic-gate (uint8_t *)&connp->conn_bound_source_v6; 12697c478bd9Sstevel@tonic-gate } else { 12707c478bd9Sstevel@tonic-gate addr_family = AF_INET; 12717c478bd9Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source; 12727c478bd9Sstevel@tonic-gate } 12737c478bd9Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 12747c478bd9Sstevel@tonic-gate (*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp, 12757c478bd9Sstevel@tonic-gate lport); 12767c478bd9Sstevel@tonic-gate } 12777c478bd9Sstevel@tonic-gate break; 12787c478bd9Sstevel@tonic-gate 12797c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 12807c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 12817c478bd9Sstevel@tonic-gate break; 12827c478bd9Sstevel@tonic-gate } 12837c478bd9Sstevel@tonic-gate 12847c478bd9Sstevel@tonic-gate return (ret); 12857c478bd9Sstevel@tonic-gate } 12867c478bd9Sstevel@tonic-gate 12877c478bd9Sstevel@tonic-gate /* 12887c478bd9Sstevel@tonic-gate * ipcl_conn_hash insertion routines. 12897c478bd9Sstevel@tonic-gate */ 12907c478bd9Sstevel@tonic-gate int 12917c478bd9Sstevel@tonic-gate ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 12927c478bd9Sstevel@tonic-gate ipaddr_t rem, uint32_t ports) 12937c478bd9Sstevel@tonic-gate { 12947c478bd9Sstevel@tonic-gate connf_t *connfp; 12957c478bd9Sstevel@tonic-gate uint16_t *up; 12967c478bd9Sstevel@tonic-gate conn_t *tconnp; 12977c478bd9Sstevel@tonic-gate #ifdef IPCL_DEBUG 12987c478bd9Sstevel@tonic-gate char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 12997c478bd9Sstevel@tonic-gate #endif 13007c478bd9Sstevel@tonic-gate in_port_t lport; 13017c478bd9Sstevel@tonic-gate int ret = 0; 1302f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13037c478bd9Sstevel@tonic-gate 13047c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 13057c478bd9Sstevel@tonic-gate "dst = %s, ports = %x, protocol = %x", (void *)connp, 13067c478bd9Sstevel@tonic-gate inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 13077c478bd9Sstevel@tonic-gate ports, protocol)); 13087c478bd9Sstevel@tonic-gate 13097c478bd9Sstevel@tonic-gate switch (protocol) { 13107c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 13117c478bd9Sstevel@tonic-gate if (!(connp->conn_flags & IPCL_EAGER)) { 13127c478bd9Sstevel@tonic-gate /* 13137c478bd9Sstevel@tonic-gate * for a eager connection, i.e connections which 13147c478bd9Sstevel@tonic-gate * have just been created, the initialization is 13157c478bd9Sstevel@tonic-gate * already done in ip at conn_creation time, so 13167c478bd9Sstevel@tonic-gate * we can skip the checks here. 13177c478bd9Sstevel@tonic-gate */ 13187c478bd9Sstevel@tonic-gate IPCL_CONN_INIT(connp, protocol, src, rem, ports); 13197c478bd9Sstevel@tonic-gate } 1320f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[ 1321f4b3ec61Sdh IPCL_CONN_HASH(connp->conn_rem, 1322f4b3ec61Sdh connp->conn_ports, ipst)]; 13237c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13247c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 13257c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 13267c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 13277c478bd9Sstevel@tonic-gate connp->conn_rem, connp->conn_src, 13287c478bd9Sstevel@tonic-gate connp->conn_ports)) { 13297c478bd9Sstevel@tonic-gate 13307c478bd9Sstevel@tonic-gate /* Already have a conn. bail out */ 13317c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13327c478bd9Sstevel@tonic-gate return (EADDRINUSE); 13337c478bd9Sstevel@tonic-gate } 13347c478bd9Sstevel@tonic-gate } 13357c478bd9Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 13367c478bd9Sstevel@tonic-gate /* 13377c478bd9Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 13387c478bd9Sstevel@tonic-gate * rebind. Let it happen. 13397c478bd9Sstevel@tonic-gate */ 13407c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13417c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 13427c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13437c478bd9Sstevel@tonic-gate } 1344866ba9ddSjprakash 1345866ba9ddSjprakash ASSERT(connp->conn_recv != NULL); 1346866ba9ddSjprakash 13477c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 13487c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13497c478bd9Sstevel@tonic-gate break; 13507c478bd9Sstevel@tonic-gate 13517c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 13527c0c0508Skcpoon /* 13537c0c0508Skcpoon * The raw socket may have already been bound, remove it 13547c0c0508Skcpoon * from the hash first. 13557c0c0508Skcpoon */ 13567c0c0508Skcpoon IPCL_HASH_REMOVE(connp); 13577c0c0508Skcpoon lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 13587c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 13597c478bd9Sstevel@tonic-gate break; 13607c478bd9Sstevel@tonic-gate 13617c478bd9Sstevel@tonic-gate default: 136245916cd2Sjpk /* 136345916cd2Sjpk * Check for conflicts among MAC exempt bindings. For 136445916cd2Sjpk * transports with port numbers, this is done by the upper 136545916cd2Sjpk * level per-transport binding logic. For all others, it's 136645916cd2Sjpk * done here. 136745916cd2Sjpk */ 1368f4b3ec61Sdh if (is_system_labeled() && 1369f4b3ec61Sdh check_exempt_conflict_v4(connp, ipst)) 137045916cd2Sjpk return (EADDRINUSE); 137145916cd2Sjpk /* FALLTHROUGH */ 137245916cd2Sjpk 137345916cd2Sjpk case IPPROTO_UDP: 13747c478bd9Sstevel@tonic-gate up = (uint16_t *)&ports; 13757c478bd9Sstevel@tonic-gate IPCL_CONN_INIT(connp, protocol, src, rem, ports); 13767c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 1377f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1378f4b3ec61Sdh IPCL_UDP_HASH(up[1], ipst)]; 13797c478bd9Sstevel@tonic-gate } else { 1380f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 13817c478bd9Sstevel@tonic-gate } 13827c478bd9Sstevel@tonic-gate 13837c478bd9Sstevel@tonic-gate if (connp->conn_rem != INADDR_ANY) { 13847c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 13857c478bd9Sstevel@tonic-gate } else if (connp->conn_src != INADDR_ANY) { 13867c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 13877c478bd9Sstevel@tonic-gate } else { 13887c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 13897c478bd9Sstevel@tonic-gate } 13907c478bd9Sstevel@tonic-gate break; 13917c478bd9Sstevel@tonic-gate } 13927c478bd9Sstevel@tonic-gate 13937c478bd9Sstevel@tonic-gate return (ret); 13947c478bd9Sstevel@tonic-gate } 13957c478bd9Sstevel@tonic-gate 13967c478bd9Sstevel@tonic-gate int 13977c478bd9Sstevel@tonic-gate ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 13987c478bd9Sstevel@tonic-gate const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 13997c478bd9Sstevel@tonic-gate { 14007c478bd9Sstevel@tonic-gate connf_t *connfp; 14017c478bd9Sstevel@tonic-gate uint16_t *up; 14027c478bd9Sstevel@tonic-gate conn_t *tconnp; 14037c478bd9Sstevel@tonic-gate in_port_t lport; 14047c478bd9Sstevel@tonic-gate int ret = 0; 1405f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 14067c478bd9Sstevel@tonic-gate 14077c478bd9Sstevel@tonic-gate switch (protocol) { 14087c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 14097c478bd9Sstevel@tonic-gate /* Just need to insert a conn struct */ 14107c478bd9Sstevel@tonic-gate if (!(connp->conn_flags & IPCL_EAGER)) { 14117c478bd9Sstevel@tonic-gate IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 14127c478bd9Sstevel@tonic-gate } 1413f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[ 1414f4b3ec61Sdh IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports, 1415f4b3ec61Sdh ipst)]; 14167c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14177c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 14187c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 14197c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 14207c478bd9Sstevel@tonic-gate connp->conn_remv6, connp->conn_srcv6, 14217c478bd9Sstevel@tonic-gate connp->conn_ports) && 14227c478bd9Sstevel@tonic-gate (tconnp->conn_tcp->tcp_bound_if == 0 || 14237c478bd9Sstevel@tonic-gate tconnp->conn_tcp->tcp_bound_if == ifindex)) { 14247c478bd9Sstevel@tonic-gate /* Already have a conn. bail out */ 14257c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14267c478bd9Sstevel@tonic-gate return (EADDRINUSE); 14277c478bd9Sstevel@tonic-gate } 14287c478bd9Sstevel@tonic-gate } 14297c478bd9Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 14307c478bd9Sstevel@tonic-gate /* 14317c478bd9Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 14327c478bd9Sstevel@tonic-gate * rebind. Let it happen. 14337c478bd9Sstevel@tonic-gate */ 14347c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14357c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 14367c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14377c478bd9Sstevel@tonic-gate } 14387c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 14397c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14407c478bd9Sstevel@tonic-gate break; 14417c478bd9Sstevel@tonic-gate 14427c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 14437c0c0508Skcpoon IPCL_HASH_REMOVE(connp); 14447c0c0508Skcpoon lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 14457c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 14467c478bd9Sstevel@tonic-gate break; 14477c478bd9Sstevel@tonic-gate 14487c478bd9Sstevel@tonic-gate default: 1449f4b3ec61Sdh if (is_system_labeled() && 1450f4b3ec61Sdh check_exempt_conflict_v6(connp, ipst)) 145145916cd2Sjpk return (EADDRINUSE); 145245916cd2Sjpk /* FALLTHROUGH */ 145345916cd2Sjpk case IPPROTO_UDP: 14547c478bd9Sstevel@tonic-gate up = (uint16_t *)&ports; 14557c478bd9Sstevel@tonic-gate IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 14567c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 1457f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1458f4b3ec61Sdh IPCL_UDP_HASH(up[1], ipst)]; 14597c478bd9Sstevel@tonic-gate } else { 1460f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 14617c478bd9Sstevel@tonic-gate } 14627c478bd9Sstevel@tonic-gate 14637c478bd9Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 14647c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 14657c478bd9Sstevel@tonic-gate } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 14667c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 14677c478bd9Sstevel@tonic-gate } else { 14687c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 14697c478bd9Sstevel@tonic-gate } 14707c478bd9Sstevel@tonic-gate break; 14717c478bd9Sstevel@tonic-gate } 14727c478bd9Sstevel@tonic-gate 14737c478bd9Sstevel@tonic-gate return (ret); 14747c478bd9Sstevel@tonic-gate } 14757c478bd9Sstevel@tonic-gate 14767c478bd9Sstevel@tonic-gate /* 14777c478bd9Sstevel@tonic-gate * v4 packet classifying function. looks up the fanout table to 14787c478bd9Sstevel@tonic-gate * find the conn, the packet belongs to. returns the conn with 14797c478bd9Sstevel@tonic-gate * the reference held, null otherwise. 148045916cd2Sjpk * 148145916cd2Sjpk * If zoneid is ALL_ZONES, then the search rules described in the "Connection 148245916cd2Sjpk * Lookup" comment block are applied. Labels are also checked as described 148345916cd2Sjpk * above. If the packet is from the inside (looped back), and is from the same 148445916cd2Sjpk * zone, then label checks are omitted. 14857c478bd9Sstevel@tonic-gate */ 14867c478bd9Sstevel@tonic-gate conn_t * 1487f4b3ec61Sdh ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 1488f4b3ec61Sdh ip_stack_t *ipst) 14897c478bd9Sstevel@tonic-gate { 14907c478bd9Sstevel@tonic-gate ipha_t *ipha; 14917c478bd9Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 14927c478bd9Sstevel@tonic-gate uint16_t lport; 14937c478bd9Sstevel@tonic-gate uint16_t fport; 14947c478bd9Sstevel@tonic-gate uint32_t ports; 14957c478bd9Sstevel@tonic-gate conn_t *connp; 14967c478bd9Sstevel@tonic-gate uint16_t *up; 149745916cd2Sjpk boolean_t shared_addr; 149845916cd2Sjpk boolean_t unlabeled; 14997c478bd9Sstevel@tonic-gate 15007c478bd9Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 15017c478bd9Sstevel@tonic-gate up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 15027c478bd9Sstevel@tonic-gate 15037c478bd9Sstevel@tonic-gate switch (protocol) { 15047c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 15057c478bd9Sstevel@tonic-gate ports = *(uint32_t *)up; 15067c478bd9Sstevel@tonic-gate connfp = 1507f4b3ec61Sdh &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 1508f4b3ec61Sdh ports, ipst)]; 15097c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 15107c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 15117c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 15127c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(connp, protocol, 15137c478bd9Sstevel@tonic-gate ipha->ipha_src, ipha->ipha_dst, ports)) 15147c478bd9Sstevel@tonic-gate break; 15157c478bd9Sstevel@tonic-gate } 15167c478bd9Sstevel@tonic-gate 15177c478bd9Sstevel@tonic-gate if (connp != NULL) { 151845916cd2Sjpk /* 151945916cd2Sjpk * We have a fully-bound TCP connection. 152045916cd2Sjpk * 152145916cd2Sjpk * For labeled systems, there's no need to check the 152245916cd2Sjpk * label here. It's known to be good as we checked 152345916cd2Sjpk * before allowing the connection to become bound. 152445916cd2Sjpk */ 15257c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 15267c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15277c478bd9Sstevel@tonic-gate return (connp); 15287c478bd9Sstevel@tonic-gate } 15297c478bd9Sstevel@tonic-gate 15307c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15317c478bd9Sstevel@tonic-gate 15327c478bd9Sstevel@tonic-gate lport = up[1]; 153345916cd2Sjpk unlabeled = B_FALSE; 153445916cd2Sjpk /* Cred cannot be null on IPv4 */ 153545916cd2Sjpk if (is_system_labeled()) 153645916cd2Sjpk unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 153745916cd2Sjpk TSLF_UNLABELED) != 0; 153845916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 153945916cd2Sjpk if (shared_addr) { 1540f4b3ec61Sdh /* 1541f4b3ec61Sdh * No need to handle exclusive-stack zones since 1542f4b3ec61Sdh * ALL_ZONES only applies to the shared stack. 1543f4b3ec61Sdh */ 154445916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 154545916cd2Sjpk /* 154645916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 154745916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 154845916cd2Sjpk * search for the zone based on the packet label. 154945916cd2Sjpk * 155045916cd2Sjpk * If there is such a zone, we prefer to find a 155145916cd2Sjpk * connection in it. Otherwise, we look for a 155245916cd2Sjpk * MAC-exempt connection in any zone whose label 155345916cd2Sjpk * dominates the default label on the packet. 155445916cd2Sjpk */ 155545916cd2Sjpk if (zoneid == ALL_ZONES) 155645916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 155745916cd2Sjpk else 155845916cd2Sjpk unlabeled = B_FALSE; 155945916cd2Sjpk } 156045916cd2Sjpk 1561f4b3ec61Sdh bind_connfp = 1562f4b3ec61Sdh &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 15637c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 15647c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 15657c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 156645916cd2Sjpk if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 15675d0bc3edSsommerfe lport) && (IPCL_ZONE_MATCH(connp, zoneid) || 156845916cd2Sjpk (unlabeled && connp->conn_mac_exempt))) 15697c478bd9Sstevel@tonic-gate break; 15707c478bd9Sstevel@tonic-gate } 15717c478bd9Sstevel@tonic-gate 157245916cd2Sjpk /* 157345916cd2Sjpk * If the matching connection is SLP on a private address, then 157445916cd2Sjpk * the label on the packet must match the local zone's label. 157545916cd2Sjpk * Otherwise, it must be in the label range defined by tnrh. 157645916cd2Sjpk * This is ensured by tsol_receive_label. 157745916cd2Sjpk */ 157845916cd2Sjpk if (connp != NULL && is_system_labeled() && 157945916cd2Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 158045916cd2Sjpk shared_addr, connp)) { 158145916cd2Sjpk DTRACE_PROBE3( 158245916cd2Sjpk tx__ip__log__info__classify__tcp, 158345916cd2Sjpk char *, 158445916cd2Sjpk "connp(1) could not receive mp(2)", 158545916cd2Sjpk conn_t *, connp, mblk_t *, mp); 158645916cd2Sjpk connp = NULL; 158745916cd2Sjpk } 158845916cd2Sjpk 15897c478bd9Sstevel@tonic-gate if (connp != NULL) { 159045916cd2Sjpk /* Have a listener at least */ 15917c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 15927c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 15937c478bd9Sstevel@tonic-gate return (connp); 15947c478bd9Sstevel@tonic-gate } 15957c478bd9Sstevel@tonic-gate 15967c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 15977c478bd9Sstevel@tonic-gate 15987c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 15997c478bd9Sstevel@tonic-gate ("ipcl_classify: couldn't classify mp = %p\n", 16007c478bd9Sstevel@tonic-gate (void *)mp)); 16017c478bd9Sstevel@tonic-gate break; 16027c478bd9Sstevel@tonic-gate 16037c478bd9Sstevel@tonic-gate case IPPROTO_UDP: 16047c478bd9Sstevel@tonic-gate lport = up[1]; 160545916cd2Sjpk unlabeled = B_FALSE; 160645916cd2Sjpk /* Cred cannot be null on IPv4 */ 160745916cd2Sjpk if (is_system_labeled()) 160845916cd2Sjpk unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 160945916cd2Sjpk TSLF_UNLABELED) != 0; 161045916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 161145916cd2Sjpk if (shared_addr) { 1612f4b3ec61Sdh /* 1613f4b3ec61Sdh * No need to handle exclusive-stack zones since 1614f4b3ec61Sdh * ALL_ZONES only applies to the shared stack. 1615f4b3ec61Sdh */ 161645916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 161745916cd2Sjpk /* 161845916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 161945916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 162045916cd2Sjpk * search for the zone based on the packet label. 162145916cd2Sjpk * 162245916cd2Sjpk * If there is such a zone, we prefer to find a 162345916cd2Sjpk * connection in it. Otherwise, we look for a 162445916cd2Sjpk * MAC-exempt connection in any zone whose label 162545916cd2Sjpk * dominates the default label on the packet. 162645916cd2Sjpk */ 162745916cd2Sjpk if (zoneid == ALL_ZONES) 162845916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 162945916cd2Sjpk else 163045916cd2Sjpk unlabeled = B_FALSE; 163145916cd2Sjpk } 16327c478bd9Sstevel@tonic-gate fport = up[0]; 16337c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 1634f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 16357c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 16367c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 16377c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 16387c478bd9Sstevel@tonic-gate if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 16397c478bd9Sstevel@tonic-gate fport, ipha->ipha_src) && 16405d0bc3edSsommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 164145916cd2Sjpk (unlabeled && connp->conn_mac_exempt))) 16427c478bd9Sstevel@tonic-gate break; 16437c478bd9Sstevel@tonic-gate } 16447c478bd9Sstevel@tonic-gate 164545916cd2Sjpk if (connp != NULL && is_system_labeled() && 164645916cd2Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 164745916cd2Sjpk shared_addr, connp)) { 164845916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp, 164945916cd2Sjpk char *, "connp(1) could not receive mp(2)", 165045916cd2Sjpk conn_t *, connp, mblk_t *, mp); 165145916cd2Sjpk connp = NULL; 165245916cd2Sjpk } 165345916cd2Sjpk 16547c478bd9Sstevel@tonic-gate if (connp != NULL) { 16557c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 16567c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16577c478bd9Sstevel@tonic-gate return (connp); 16587c478bd9Sstevel@tonic-gate } 16597c478bd9Sstevel@tonic-gate 16607c478bd9Sstevel@tonic-gate /* 16617c478bd9Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 16627c478bd9Sstevel@tonic-gate */ 16637c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16647c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 16657c478bd9Sstevel@tonic-gate ("ipcl_classify: cant find udp conn_t for ports : %x %x", 16667c478bd9Sstevel@tonic-gate lport, fport)); 16677c478bd9Sstevel@tonic-gate break; 16687c478bd9Sstevel@tonic-gate } 16697c478bd9Sstevel@tonic-gate 16707c478bd9Sstevel@tonic-gate return (NULL); 16717c478bd9Sstevel@tonic-gate } 16727c478bd9Sstevel@tonic-gate 16737c478bd9Sstevel@tonic-gate conn_t * 1674f4b3ec61Sdh ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 1675f4b3ec61Sdh ip_stack_t *ipst) 16767c478bd9Sstevel@tonic-gate { 16777c478bd9Sstevel@tonic-gate ip6_t *ip6h; 16787c478bd9Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 16797c478bd9Sstevel@tonic-gate uint16_t lport; 16807c478bd9Sstevel@tonic-gate uint16_t fport; 16817c478bd9Sstevel@tonic-gate tcph_t *tcph; 16827c478bd9Sstevel@tonic-gate uint32_t ports; 16837c478bd9Sstevel@tonic-gate conn_t *connp; 16847c478bd9Sstevel@tonic-gate uint16_t *up; 168545916cd2Sjpk boolean_t shared_addr; 168645916cd2Sjpk boolean_t unlabeled; 16877c478bd9Sstevel@tonic-gate 16887c478bd9Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 16897c478bd9Sstevel@tonic-gate 16907c478bd9Sstevel@tonic-gate switch (protocol) { 16917c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 16927c478bd9Sstevel@tonic-gate tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 16937c478bd9Sstevel@tonic-gate up = (uint16_t *)tcph->th_lport; 16947c478bd9Sstevel@tonic-gate ports = *(uint32_t *)up; 16957c478bd9Sstevel@tonic-gate 16967c478bd9Sstevel@tonic-gate connfp = 1697f4b3ec61Sdh &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 1698f4b3ec61Sdh ports, ipst)]; 16997c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 17007c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 17017c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 17027c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(connp, protocol, 17037c478bd9Sstevel@tonic-gate ip6h->ip6_src, ip6h->ip6_dst, ports)) 17047c478bd9Sstevel@tonic-gate break; 17057c478bd9Sstevel@tonic-gate } 17067c478bd9Sstevel@tonic-gate 17077c478bd9Sstevel@tonic-gate if (connp != NULL) { 170845916cd2Sjpk /* 170945916cd2Sjpk * We have a fully-bound TCP connection. 171045916cd2Sjpk * 171145916cd2Sjpk * For labeled systems, there's no need to check the 171245916cd2Sjpk * label here. It's known to be good as we checked 171345916cd2Sjpk * before allowing the connection to become bound. 171445916cd2Sjpk */ 17157c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 17167c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17177c478bd9Sstevel@tonic-gate return (connp); 17187c478bd9Sstevel@tonic-gate } 17197c478bd9Sstevel@tonic-gate 17207c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17217c478bd9Sstevel@tonic-gate 17227c478bd9Sstevel@tonic-gate lport = up[1]; 172345916cd2Sjpk unlabeled = B_FALSE; 172445916cd2Sjpk /* Cred can be null on IPv6 */ 172545916cd2Sjpk if (is_system_labeled()) { 172645916cd2Sjpk cred_t *cr = DB_CRED(mp); 172745916cd2Sjpk 172845916cd2Sjpk unlabeled = (cr != NULL && 172945916cd2Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 173045916cd2Sjpk } 173145916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 173245916cd2Sjpk if (shared_addr) { 1733f4b3ec61Sdh /* 1734f4b3ec61Sdh * No need to handle exclusive-stack zones since 1735f4b3ec61Sdh * ALL_ZONES only applies to the shared stack. 1736f4b3ec61Sdh */ 173745916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 173845916cd2Sjpk /* 173945916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 174045916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 174145916cd2Sjpk * search for the zone based on the packet label. 174245916cd2Sjpk * 174345916cd2Sjpk * If there is such a zone, we prefer to find a 174445916cd2Sjpk * connection in it. Otherwise, we look for a 174545916cd2Sjpk * MAC-exempt connection in any zone whose label 174645916cd2Sjpk * dominates the default label on the packet. 174745916cd2Sjpk */ 174845916cd2Sjpk if (zoneid == ALL_ZONES) 174945916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 175045916cd2Sjpk else 175145916cd2Sjpk unlabeled = B_FALSE; 175245916cd2Sjpk } 175345916cd2Sjpk 1754f4b3ec61Sdh bind_connfp = 1755f4b3ec61Sdh &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 17567c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 17577c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 17587c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 17597c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, protocol, 17607c478bd9Sstevel@tonic-gate ip6h->ip6_dst, lport) && 17615d0bc3edSsommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 176245916cd2Sjpk (unlabeled && connp->conn_mac_exempt))) 17637c478bd9Sstevel@tonic-gate break; 17647c478bd9Sstevel@tonic-gate } 17657c478bd9Sstevel@tonic-gate 176645916cd2Sjpk if (connp != NULL && is_system_labeled() && 176745916cd2Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 176845916cd2Sjpk shared_addr, connp)) { 176945916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 177045916cd2Sjpk char *, "connp(1) could not receive mp(2)", 177145916cd2Sjpk conn_t *, connp, mblk_t *, mp); 177245916cd2Sjpk connp = NULL; 177345916cd2Sjpk } 177445916cd2Sjpk 17757c478bd9Sstevel@tonic-gate if (connp != NULL) { 17767c478bd9Sstevel@tonic-gate /* Have a listner at least */ 17777c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 17787c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 17797c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 17807c478bd9Sstevel@tonic-gate ("ipcl_classify_v6: found listner " 17817c478bd9Sstevel@tonic-gate "connp = %p\n", (void *)connp)); 17827c478bd9Sstevel@tonic-gate 17837c478bd9Sstevel@tonic-gate return (connp); 17847c478bd9Sstevel@tonic-gate } 17857c478bd9Sstevel@tonic-gate 17867c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 17877c478bd9Sstevel@tonic-gate 17887c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 17897c478bd9Sstevel@tonic-gate ("ipcl_classify_v6: couldn't classify mp = %p\n", 17907c478bd9Sstevel@tonic-gate (void *)mp)); 17917c478bd9Sstevel@tonic-gate break; 17927c478bd9Sstevel@tonic-gate 17937c478bd9Sstevel@tonic-gate case IPPROTO_UDP: 17947c478bd9Sstevel@tonic-gate up = (uint16_t *)&mp->b_rptr[hdr_len]; 17957c478bd9Sstevel@tonic-gate lport = up[1]; 179645916cd2Sjpk unlabeled = B_FALSE; 179745916cd2Sjpk /* Cred can be null on IPv6 */ 179845916cd2Sjpk if (is_system_labeled()) { 179945916cd2Sjpk cred_t *cr = DB_CRED(mp); 180045916cd2Sjpk 180145916cd2Sjpk unlabeled = (cr != NULL && 180245916cd2Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 180345916cd2Sjpk } 180445916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 180545916cd2Sjpk if (shared_addr) { 1806f4b3ec61Sdh /* 1807f4b3ec61Sdh * No need to handle exclusive-stack zones since 1808f4b3ec61Sdh * ALL_ZONES only applies to the shared stack. 1809f4b3ec61Sdh */ 181045916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 181145916cd2Sjpk /* 181245916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 181345916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 181445916cd2Sjpk * search for the zone based on the packet label. 181545916cd2Sjpk * 181645916cd2Sjpk * If there is such a zone, we prefer to find a 181745916cd2Sjpk * connection in it. Otherwise, we look for a 181845916cd2Sjpk * MAC-exempt connection in any zone whose label 181945916cd2Sjpk * dominates the default label on the packet. 182045916cd2Sjpk */ 182145916cd2Sjpk if (zoneid == ALL_ZONES) 182245916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 182345916cd2Sjpk else 182445916cd2Sjpk unlabeled = B_FALSE; 182545916cd2Sjpk } 182645916cd2Sjpk 18277c478bd9Sstevel@tonic-gate fport = up[0]; 18287c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 18297c478bd9Sstevel@tonic-gate fport)); 1830f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 18317c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 18327c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 18337c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 18347c478bd9Sstevel@tonic-gate if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 18357c478bd9Sstevel@tonic-gate fport, ip6h->ip6_src) && 18365d0bc3edSsommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 183745916cd2Sjpk (unlabeled && connp->conn_mac_exempt))) 18387c478bd9Sstevel@tonic-gate break; 18397c478bd9Sstevel@tonic-gate } 18407c478bd9Sstevel@tonic-gate 184145916cd2Sjpk if (connp != NULL && is_system_labeled() && 184245916cd2Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 184345916cd2Sjpk shared_addr, connp)) { 184445916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 184545916cd2Sjpk char *, "connp(1) could not receive mp(2)", 184645916cd2Sjpk conn_t *, connp, mblk_t *, mp); 184745916cd2Sjpk connp = NULL; 184845916cd2Sjpk } 184945916cd2Sjpk 18507c478bd9Sstevel@tonic-gate if (connp != NULL) { 18517c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 18527c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 18537c478bd9Sstevel@tonic-gate return (connp); 18547c478bd9Sstevel@tonic-gate } 18557c478bd9Sstevel@tonic-gate 18567c478bd9Sstevel@tonic-gate /* 18577c478bd9Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 18587c478bd9Sstevel@tonic-gate */ 18597c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 18607c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 18617c478bd9Sstevel@tonic-gate ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 18627c478bd9Sstevel@tonic-gate lport, fport)); 18637c478bd9Sstevel@tonic-gate break; 18647c478bd9Sstevel@tonic-gate } 18657c478bd9Sstevel@tonic-gate 18667c478bd9Sstevel@tonic-gate return (NULL); 18677c478bd9Sstevel@tonic-gate } 18687c478bd9Sstevel@tonic-gate 18697c478bd9Sstevel@tonic-gate /* 18707c478bd9Sstevel@tonic-gate * wrapper around ipcl_classify_(v4,v6) routines. 18717c478bd9Sstevel@tonic-gate */ 18727c478bd9Sstevel@tonic-gate conn_t * 1873f4b3ec61Sdh ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst) 18747c478bd9Sstevel@tonic-gate { 18757c478bd9Sstevel@tonic-gate uint16_t hdr_len; 18767c478bd9Sstevel@tonic-gate ipha_t *ipha; 18777c478bd9Sstevel@tonic-gate uint8_t *nexthdrp; 18787c478bd9Sstevel@tonic-gate 18797c478bd9Sstevel@tonic-gate if (MBLKL(mp) < sizeof (ipha_t)) 18807c478bd9Sstevel@tonic-gate return (NULL); 18817c478bd9Sstevel@tonic-gate 18827c478bd9Sstevel@tonic-gate switch (IPH_HDR_VERSION(mp->b_rptr)) { 18837c478bd9Sstevel@tonic-gate case IPV4_VERSION: 18847c478bd9Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 18857c478bd9Sstevel@tonic-gate hdr_len = IPH_HDR_LENGTH(ipha); 18867c478bd9Sstevel@tonic-gate return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 1887f4b3ec61Sdh zoneid, ipst)); 18887c478bd9Sstevel@tonic-gate case IPV6_VERSION: 18897c478bd9Sstevel@tonic-gate if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 18907c478bd9Sstevel@tonic-gate &hdr_len, &nexthdrp)) 18917c478bd9Sstevel@tonic-gate return (NULL); 18927c478bd9Sstevel@tonic-gate 1893f4b3ec61Sdh return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst)); 18947c478bd9Sstevel@tonic-gate } 18957c478bd9Sstevel@tonic-gate 18967c478bd9Sstevel@tonic-gate return (NULL); 18977c478bd9Sstevel@tonic-gate } 18987c478bd9Sstevel@tonic-gate 18997c478bd9Sstevel@tonic-gate conn_t * 190045916cd2Sjpk ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, 1901f4b3ec61Sdh uint32_t ports, ipha_t *hdr, ip_stack_t *ipst) 19027c478bd9Sstevel@tonic-gate { 190345916cd2Sjpk connf_t *connfp; 19047c478bd9Sstevel@tonic-gate conn_t *connp; 19057c478bd9Sstevel@tonic-gate in_port_t lport; 19067c478bd9Sstevel@tonic-gate int af; 190745916cd2Sjpk boolean_t shared_addr; 190845916cd2Sjpk boolean_t unlabeled; 190945916cd2Sjpk const void *dst; 19107c478bd9Sstevel@tonic-gate 19117c478bd9Sstevel@tonic-gate lport = ((uint16_t *)&ports)[1]; 191245916cd2Sjpk 191345916cd2Sjpk unlabeled = B_FALSE; 191445916cd2Sjpk /* Cred can be null on IPv6 */ 191545916cd2Sjpk if (is_system_labeled()) { 191645916cd2Sjpk cred_t *cr = DB_CRED(mp); 191745916cd2Sjpk 191845916cd2Sjpk unlabeled = (cr != NULL && 191945916cd2Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 192045916cd2Sjpk } 192145916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 192245916cd2Sjpk if (shared_addr) { 1923f4b3ec61Sdh /* 1924f4b3ec61Sdh * No need to handle exclusive-stack zones since ALL_ZONES 1925f4b3ec61Sdh * only applies to the shared stack. 1926f4b3ec61Sdh */ 192745916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 192845916cd2Sjpk /* 192945916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 193045916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and search for 193145916cd2Sjpk * the zone based on the packet label. 193245916cd2Sjpk * 193345916cd2Sjpk * If there is such a zone, we prefer to find a connection in 193445916cd2Sjpk * it. Otherwise, we look for a MAC-exempt connection in any 193545916cd2Sjpk * zone whose label dominates the default label on the packet. 193645916cd2Sjpk */ 193745916cd2Sjpk if (zoneid == ALL_ZONES) 193845916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 193945916cd2Sjpk else 194045916cd2Sjpk unlabeled = B_FALSE; 194145916cd2Sjpk } 194245916cd2Sjpk 19437c478bd9Sstevel@tonic-gate af = IPH_HDR_VERSION(hdr); 194445916cd2Sjpk dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst : 194545916cd2Sjpk (const void *)&((ip6_t *)hdr)->ip6_dst; 1946f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 19477c478bd9Sstevel@tonic-gate 19487c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 19497c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 19507c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 19517c478bd9Sstevel@tonic-gate /* We don't allow v4 fallback for v6 raw socket. */ 195245916cd2Sjpk if (af == (connp->conn_af_isv6 ? IPV4_VERSION : 195345916cd2Sjpk IPV6_VERSION)) 19547c478bd9Sstevel@tonic-gate continue; 19557c478bd9Sstevel@tonic-gate if (connp->conn_fully_bound) { 19567c478bd9Sstevel@tonic-gate if (af == IPV4_VERSION) { 195745916cd2Sjpk if (!IPCL_CONN_MATCH(connp, protocol, 195845916cd2Sjpk hdr->ipha_src, hdr->ipha_dst, ports)) 195945916cd2Sjpk continue; 19607c478bd9Sstevel@tonic-gate } else { 196145916cd2Sjpk if (!IPCL_CONN_MATCH_V6(connp, protocol, 19627c478bd9Sstevel@tonic-gate ((ip6_t *)hdr)->ip6_src, 196345916cd2Sjpk ((ip6_t *)hdr)->ip6_dst, ports)) 196445916cd2Sjpk continue; 19657c478bd9Sstevel@tonic-gate } 19667c478bd9Sstevel@tonic-gate } else { 19677c478bd9Sstevel@tonic-gate if (af == IPV4_VERSION) { 196845916cd2Sjpk if (!IPCL_BIND_MATCH(connp, protocol, 196945916cd2Sjpk hdr->ipha_dst, lport)) 197045916cd2Sjpk continue; 19717c478bd9Sstevel@tonic-gate } else { 197245916cd2Sjpk if (!IPCL_BIND_MATCH_V6(connp, protocol, 197345916cd2Sjpk ((ip6_t *)hdr)->ip6_dst, lport)) 197445916cd2Sjpk continue; 19757c478bd9Sstevel@tonic-gate } 19767c478bd9Sstevel@tonic-gate } 197745916cd2Sjpk 19785d0bc3edSsommerfe if (IPCL_ZONE_MATCH(connp, zoneid) || 197945916cd2Sjpk (unlabeled && connp->conn_mac_exempt)) 198045916cd2Sjpk break; 198145916cd2Sjpk } 198245916cd2Sjpk /* 198345916cd2Sjpk * If the connection is fully-bound and connection-oriented (TCP or 198445916cd2Sjpk * SCTP), then we've already validated the remote system's label. 198545916cd2Sjpk * There's no need to do it again for every packet. 198645916cd2Sjpk */ 198745916cd2Sjpk if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound || 198845916cd2Sjpk !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) && 198945916cd2Sjpk !tsol_receive_local(mp, dst, af, shared_addr, connp)) { 199045916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 199145916cd2Sjpk char *, "connp(1) could not receive mp(2)", 199245916cd2Sjpk conn_t *, connp, mblk_t *, mp); 199345916cd2Sjpk connp = NULL; 19947c478bd9Sstevel@tonic-gate } 19957c0c0508Skcpoon 19967c0c0508Skcpoon if (connp != NULL) 19977c0c0508Skcpoon goto found; 19987c0c0508Skcpoon mutex_exit(&connfp->connf_lock); 19997c0c0508Skcpoon 20007c0c0508Skcpoon /* Try to look for a wildcard match. */ 2001f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 20027c0c0508Skcpoon mutex_enter(&connfp->connf_lock); 20037c0c0508Skcpoon for (connp = connfp->connf_head; connp != NULL; 20047c0c0508Skcpoon connp = connp->conn_next) { 20057c0c0508Skcpoon /* We don't allow v4 fallback for v6 raw socket. */ 20067c0c0508Skcpoon if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 20075d0bc3edSsommerfe IPV6_VERSION)) || !IPCL_ZONE_MATCH(connp, zoneid)) { 20087c0c0508Skcpoon continue; 20097c0c0508Skcpoon } 20107c0c0508Skcpoon if (af == IPV4_VERSION) { 20117c0c0508Skcpoon if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) 20127c0c0508Skcpoon break; 20137c0c0508Skcpoon } else { 20147c0c0508Skcpoon if (IPCL_RAW_MATCH_V6(connp, protocol, 20157c0c0508Skcpoon ((ip6_t *)hdr)->ip6_dst)) { 20167c0c0508Skcpoon break; 20177c0c0508Skcpoon } 20187c0c0508Skcpoon } 20197c478bd9Sstevel@tonic-gate } 20207c0c0508Skcpoon 20217c0c0508Skcpoon if (connp != NULL) 20227c0c0508Skcpoon goto found; 20237c0c0508Skcpoon 20247c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 20257c478bd9Sstevel@tonic-gate return (NULL); 20267c0c0508Skcpoon 20277c0c0508Skcpoon found: 20287c0c0508Skcpoon ASSERT(connp != NULL); 20297c0c0508Skcpoon CONN_INC_REF(connp); 20307c0c0508Skcpoon mutex_exit(&connfp->connf_lock); 20317c0c0508Skcpoon return (connp); 20327c478bd9Sstevel@tonic-gate } 20337c478bd9Sstevel@tonic-gate 20347c478bd9Sstevel@tonic-gate /* ARGSUSED */ 20357c478bd9Sstevel@tonic-gate static int 2036fc80c0dfSnordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags) 20377c478bd9Sstevel@tonic-gate { 20387c478bd9Sstevel@tonic-gate itc_t *itc = (itc_t *)buf; 20397c478bd9Sstevel@tonic-gate conn_t *connp = &itc->itc_conn; 2040fc80c0dfSnordmark tcp_t *tcp = (tcp_t *)&itc[1]; 2041fc80c0dfSnordmark 2042fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2043fc80c0dfSnordmark bzero(tcp, sizeof (tcp_t)); 2044fc80c0dfSnordmark 2045fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2046fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2047*0f1702c5SYu Xiangning cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL); 20487c478bd9Sstevel@tonic-gate tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 20497c478bd9Sstevel@tonic-gate connp->conn_tcp = tcp; 20507c478bd9Sstevel@tonic-gate connp->conn_flags = IPCL_TCPCONN; 20517c478bd9Sstevel@tonic-gate connp->conn_ulp = IPPROTO_TCP; 20527c478bd9Sstevel@tonic-gate tcp->tcp_connp = connp; 20537c478bd9Sstevel@tonic-gate return (0); 20547c478bd9Sstevel@tonic-gate } 20557c478bd9Sstevel@tonic-gate 20567c478bd9Sstevel@tonic-gate /* ARGSUSED */ 20577c478bd9Sstevel@tonic-gate static void 2058fc80c0dfSnordmark tcp_conn_destructor(void *buf, void *cdrarg) 2059fc80c0dfSnordmark { 2060fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2061fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2062fc80c0dfSnordmark tcp_t *tcp = (tcp_t *)&itc[1]; 2063fc80c0dfSnordmark 2064fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_TCPCONN); 2065fc80c0dfSnordmark ASSERT(tcp->tcp_connp == connp); 2066fc80c0dfSnordmark ASSERT(connp->conn_tcp == tcp); 2067fc80c0dfSnordmark tcp_timermp_free(tcp); 2068fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2069fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2070*0f1702c5SYu Xiangning cv_destroy(&connp->conn_sq_cv); 2071fc80c0dfSnordmark } 2072fc80c0dfSnordmark 2073fc80c0dfSnordmark /* ARGSUSED */ 2074fc80c0dfSnordmark static int 2075fc80c0dfSnordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2076fc80c0dfSnordmark { 2077fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2078fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2079fc80c0dfSnordmark 2080fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2081fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2082fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2083fc80c0dfSnordmark connp->conn_flags = IPCL_IPCCONN; 2084fc80c0dfSnordmark 2085fc80c0dfSnordmark return (0); 2086fc80c0dfSnordmark } 2087fc80c0dfSnordmark 2088fc80c0dfSnordmark /* ARGSUSED */ 2089fc80c0dfSnordmark static void 2090fc80c0dfSnordmark ip_conn_destructor(void *buf, void *cdrarg) 2091fc80c0dfSnordmark { 2092fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2093fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2094fc80c0dfSnordmark 2095fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 2096fc80c0dfSnordmark ASSERT(connp->conn_priv == NULL); 2097fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2098fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2099fc80c0dfSnordmark } 2100fc80c0dfSnordmark 2101fc80c0dfSnordmark /* ARGSUSED */ 2102fc80c0dfSnordmark static int 2103fc80c0dfSnordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2104fc80c0dfSnordmark { 2105fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2106fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2107fc80c0dfSnordmark udp_t *udp = (udp_t *)&itc[1]; 2108fc80c0dfSnordmark 2109fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2110fc80c0dfSnordmark bzero(udp, sizeof (udp_t)); 2111fc80c0dfSnordmark 2112fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2113fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2114fc80c0dfSnordmark connp->conn_udp = udp; 2115fc80c0dfSnordmark connp->conn_flags = IPCL_UDPCONN; 2116fc80c0dfSnordmark connp->conn_ulp = IPPROTO_UDP; 2117fc80c0dfSnordmark udp->udp_connp = connp; 2118fc80c0dfSnordmark return (0); 2119fc80c0dfSnordmark } 2120fc80c0dfSnordmark 2121fc80c0dfSnordmark /* ARGSUSED */ 2122fc80c0dfSnordmark static void 2123fc80c0dfSnordmark udp_conn_destructor(void *buf, void *cdrarg) 2124fc80c0dfSnordmark { 2125fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2126fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2127fc80c0dfSnordmark udp_t *udp = (udp_t *)&itc[1]; 2128fc80c0dfSnordmark 2129fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_UDPCONN); 2130fc80c0dfSnordmark ASSERT(udp->udp_connp == connp); 2131fc80c0dfSnordmark ASSERT(connp->conn_udp == udp); 2132fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2133fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2134fc80c0dfSnordmark } 2135fc80c0dfSnordmark 2136fc80c0dfSnordmark /* ARGSUSED */ 2137fc80c0dfSnordmark static int 2138fc80c0dfSnordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2139fc80c0dfSnordmark { 2140fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2141fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2142fc80c0dfSnordmark icmp_t *icmp = (icmp_t *)&itc[1]; 2143fc80c0dfSnordmark 2144fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2145fc80c0dfSnordmark bzero(icmp, sizeof (icmp_t)); 2146fc80c0dfSnordmark 2147fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2148fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2149fc80c0dfSnordmark connp->conn_icmp = icmp; 2150fc80c0dfSnordmark connp->conn_flags = IPCL_RAWIPCONN; 2151fc80c0dfSnordmark connp->conn_ulp = IPPROTO_ICMP; 2152fc80c0dfSnordmark icmp->icmp_connp = connp; 2153fc80c0dfSnordmark return (0); 2154fc80c0dfSnordmark } 2155fc80c0dfSnordmark 2156fc80c0dfSnordmark /* ARGSUSED */ 2157fc80c0dfSnordmark static void 2158fc80c0dfSnordmark rawip_conn_destructor(void *buf, void *cdrarg) 2159fc80c0dfSnordmark { 2160fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2161fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2162fc80c0dfSnordmark icmp_t *icmp = (icmp_t *)&itc[1]; 2163fc80c0dfSnordmark 2164fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 2165fc80c0dfSnordmark ASSERT(icmp->icmp_connp == connp); 2166fc80c0dfSnordmark ASSERT(connp->conn_icmp == icmp); 2167fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2168fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2169fc80c0dfSnordmark } 2170fc80c0dfSnordmark 2171fc80c0dfSnordmark /* ARGSUSED */ 2172fc80c0dfSnordmark static int 2173fc80c0dfSnordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags) 2174fc80c0dfSnordmark { 2175fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2176fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2177fc80c0dfSnordmark rts_t *rts = (rts_t *)&itc[1]; 2178fc80c0dfSnordmark 2179fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2180fc80c0dfSnordmark bzero(rts, sizeof (rts_t)); 2181fc80c0dfSnordmark 2182fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2183fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2184fc80c0dfSnordmark connp->conn_rts = rts; 2185fc80c0dfSnordmark connp->conn_flags = IPCL_RTSCONN; 2186fc80c0dfSnordmark rts->rts_connp = connp; 2187fc80c0dfSnordmark return (0); 2188fc80c0dfSnordmark } 2189fc80c0dfSnordmark 2190fc80c0dfSnordmark /* ARGSUSED */ 2191fc80c0dfSnordmark static void 2192fc80c0dfSnordmark rts_conn_destructor(void *buf, void *cdrarg) 21937c478bd9Sstevel@tonic-gate { 2194fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2195fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2196fc80c0dfSnordmark rts_t *rts = (rts_t *)&itc[1]; 2197fc80c0dfSnordmark 2198fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_RTSCONN); 2199fc80c0dfSnordmark ASSERT(rts->rts_connp == connp); 2200fc80c0dfSnordmark ASSERT(connp->conn_rts == rts); 2201fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2202fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2203fc80c0dfSnordmark } 2204fc80c0dfSnordmark 2205*0f1702c5SYu Xiangning /* ARGSUSED */ 2206*0f1702c5SYu Xiangning int 2207*0f1702c5SYu Xiangning ip_helper_stream_constructor(void *buf, void *cdrarg, int kmflags) 2208*0f1702c5SYu Xiangning { 2209*0f1702c5SYu Xiangning int error; 2210*0f1702c5SYu Xiangning netstack_t *ns; 2211*0f1702c5SYu Xiangning int ret; 2212*0f1702c5SYu Xiangning tcp_stack_t *tcps; 2213*0f1702c5SYu Xiangning ip_helper_stream_info_t *ip_helper_str; 2214*0f1702c5SYu Xiangning ip_stack_t *ipst; 2215*0f1702c5SYu Xiangning 2216*0f1702c5SYu Xiangning ns = netstack_find_by_cred(kcred); 2217*0f1702c5SYu Xiangning ASSERT(ns != NULL); 2218*0f1702c5SYu Xiangning tcps = ns->netstack_tcp; 2219*0f1702c5SYu Xiangning ipst = ns->netstack_ip; 2220*0f1702c5SYu Xiangning ASSERT(tcps != NULL); 2221*0f1702c5SYu Xiangning ip_helper_str = (ip_helper_stream_info_t *)buf; 2222*0f1702c5SYu Xiangning 2223*0f1702c5SYu Xiangning error = ldi_open_by_name(DEV_IP, IP_HELPER_STR, kcred, 2224*0f1702c5SYu Xiangning &ip_helper_str->ip_helper_stream_handle, ipst->ips_ldi_ident); 2225*0f1702c5SYu Xiangning if (error != 0) { 2226*0f1702c5SYu Xiangning goto done; 2227*0f1702c5SYu Xiangning } 2228*0f1702c5SYu Xiangning error = ldi_ioctl(ip_helper_str->ip_helper_stream_handle, 2229*0f1702c5SYu Xiangning SIOCSQPTR, (intptr_t)buf, FKIOCTL, kcred, &ret); 2230*0f1702c5SYu Xiangning if (error != 0) { 2231*0f1702c5SYu Xiangning (void) ldi_close(ip_helper_str->ip_helper_stream_handle, 0, 2232*0f1702c5SYu Xiangning kcred); 2233*0f1702c5SYu Xiangning } 2234*0f1702c5SYu Xiangning done: 2235*0f1702c5SYu Xiangning netstack_rele(ipst->ips_netstack); 2236*0f1702c5SYu Xiangning return (error); 2237*0f1702c5SYu Xiangning } 2238*0f1702c5SYu Xiangning 2239*0f1702c5SYu Xiangning /* ARGSUSED */ 2240*0f1702c5SYu Xiangning static void 2241*0f1702c5SYu Xiangning ip_helper_stream_destructor(void *buf, void *cdrarg) 2242*0f1702c5SYu Xiangning { 2243*0f1702c5SYu Xiangning ip_helper_stream_info_t *ip_helper_str = (ip_helper_stream_info_t *)buf; 2244*0f1702c5SYu Xiangning 2245*0f1702c5SYu Xiangning ip_helper_str->ip_helper_stream_rq->q_ptr = 2246*0f1702c5SYu Xiangning ip_helper_str->ip_helper_stream_wq->q_ptr = 2247*0f1702c5SYu Xiangning ip_helper_str->ip_helper_stream_minfo; 2248*0f1702c5SYu Xiangning (void) ldi_close(ip_helper_str->ip_helper_stream_handle, 0, kcred); 2249*0f1702c5SYu Xiangning } 2250*0f1702c5SYu Xiangning 2251*0f1702c5SYu Xiangning 2252fc80c0dfSnordmark /* 2253fc80c0dfSnordmark * Called as part of ipcl_conn_destroy to assert and clear any pointers 2254fc80c0dfSnordmark * in the conn_t. 2255fc80c0dfSnordmark */ 2256fc80c0dfSnordmark void 2257fc80c0dfSnordmark ipcl_conn_cleanup(conn_t *connp) 2258fc80c0dfSnordmark { 2259fc80c0dfSnordmark ASSERT(connp->conn_ire_cache == NULL); 2260fc80c0dfSnordmark ASSERT(connp->conn_latch == NULL); 2261fc80c0dfSnordmark #ifdef notdef 2262fc80c0dfSnordmark ASSERT(connp->conn_rq == NULL); 2263fc80c0dfSnordmark ASSERT(connp->conn_wq == NULL); 2264fc80c0dfSnordmark #endif 2265fc80c0dfSnordmark ASSERT(connp->conn_cred == NULL); 2266fc80c0dfSnordmark ASSERT(connp->conn_g_fanout == NULL); 2267fc80c0dfSnordmark ASSERT(connp->conn_g_next == NULL); 2268fc80c0dfSnordmark ASSERT(connp->conn_g_prev == NULL); 2269fc80c0dfSnordmark ASSERT(connp->conn_policy == NULL); 2270fc80c0dfSnordmark ASSERT(connp->conn_fanout == NULL); 2271fc80c0dfSnordmark ASSERT(connp->conn_next == NULL); 2272fc80c0dfSnordmark ASSERT(connp->conn_prev == NULL); 2273fc80c0dfSnordmark #ifdef notdef 2274fc80c0dfSnordmark /* 2275fc80c0dfSnordmark * The ill and ipif pointers are not cleared before the conn_t 2276fc80c0dfSnordmark * goes away since they do not hold a reference on the ill/ipif. 2277fc80c0dfSnordmark * We should replace these pointers with ifindex/ipaddr_t to 2278fc80c0dfSnordmark * make the code less complex. 2279fc80c0dfSnordmark */ 2280fc80c0dfSnordmark ASSERT(connp->conn_xmit_if_ill == NULL); 2281fc80c0dfSnordmark ASSERT(connp->conn_nofailover_ill == NULL); 2282fc80c0dfSnordmark ASSERT(connp->conn_outgoing_ill == NULL); 2283fc80c0dfSnordmark ASSERT(connp->conn_incoming_ill == NULL); 2284fc80c0dfSnordmark ASSERT(connp->conn_outgoing_pill == NULL); 2285fc80c0dfSnordmark ASSERT(connp->conn_multicast_ipif == NULL); 2286fc80c0dfSnordmark ASSERT(connp->conn_multicast_ill == NULL); 2287fc80c0dfSnordmark #endif 2288fc80c0dfSnordmark ASSERT(connp->conn_oper_pending_ill == NULL); 2289fc80c0dfSnordmark ASSERT(connp->conn_ilg == NULL); 2290fc80c0dfSnordmark ASSERT(connp->conn_drain_next == NULL); 2291fc80c0dfSnordmark ASSERT(connp->conn_drain_prev == NULL); 2292a9737be2Snordmark #ifdef notdef 2293a9737be2Snordmark /* conn_idl is not cleared when removed from idl list */ 2294fc80c0dfSnordmark ASSERT(connp->conn_idl == NULL); 2295a9737be2Snordmark #endif 2296fc80c0dfSnordmark ASSERT(connp->conn_ipsec_opt_mp == NULL); 2297fc80c0dfSnordmark ASSERT(connp->conn_peercred == NULL); 2298fc80c0dfSnordmark ASSERT(connp->conn_netstack == NULL); 2299fc80c0dfSnordmark 2300*0f1702c5SYu Xiangning ASSERT(connp->conn_helper_info == NULL); 2301fc80c0dfSnordmark /* Clear out the conn_t fields that are not preserved */ 2302fc80c0dfSnordmark bzero(&connp->conn_start_clr, 2303fc80c0dfSnordmark sizeof (conn_t) - 2304fc80c0dfSnordmark ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp)); 23057c478bd9Sstevel@tonic-gate } 23067c478bd9Sstevel@tonic-gate 23077c478bd9Sstevel@tonic-gate /* 23087c478bd9Sstevel@tonic-gate * All conns are inserted in a global multi-list for the benefit of 23097c478bd9Sstevel@tonic-gate * walkers. The walk is guaranteed to walk all open conns at the time 23107c478bd9Sstevel@tonic-gate * of the start of the walk exactly once. This property is needed to 23117c478bd9Sstevel@tonic-gate * achieve some cleanups during unplumb of interfaces. This is achieved 23127c478bd9Sstevel@tonic-gate * as follows. 23137c478bd9Sstevel@tonic-gate * 23147c478bd9Sstevel@tonic-gate * ipcl_conn_create and ipcl_conn_destroy are the only functions that 23157c478bd9Sstevel@tonic-gate * call the insert and delete functions below at creation and deletion 23167c478bd9Sstevel@tonic-gate * time respectively. The conn never moves or changes its position in this 23177c478bd9Sstevel@tonic-gate * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 23187c478bd9Sstevel@tonic-gate * won't increase due to walkers, once the conn deletion has started. Note 23197c478bd9Sstevel@tonic-gate * that we can't remove the conn from the global list and then wait for 23207c478bd9Sstevel@tonic-gate * the refcnt to drop to zero, since walkers would then see a truncated 23217c478bd9Sstevel@tonic-gate * list. CONN_INCIPIENT ensures that walkers don't start looking at 23227c478bd9Sstevel@tonic-gate * conns until ip_open is ready to make them globally visible. 23237c478bd9Sstevel@tonic-gate * The global round robin multi-list locks are held only to get the 23247c478bd9Sstevel@tonic-gate * next member/insertion/deletion and contention should be negligible 23257c478bd9Sstevel@tonic-gate * if the multi-list is much greater than the number of cpus. 23267c478bd9Sstevel@tonic-gate */ 23277c478bd9Sstevel@tonic-gate void 23287c478bd9Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp) 23297c478bd9Sstevel@tonic-gate { 23307c478bd9Sstevel@tonic-gate int index; 2331f4b3ec61Sdh struct connf_s *connfp; 2332f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 23337c478bd9Sstevel@tonic-gate 23347c478bd9Sstevel@tonic-gate /* 23357c478bd9Sstevel@tonic-gate * No need for atomic here. Approximate even distribution 23367c478bd9Sstevel@tonic-gate * in the global lists is sufficient. 23377c478bd9Sstevel@tonic-gate */ 2338f4b3ec61Sdh ipst->ips_conn_g_index++; 2339f4b3ec61Sdh index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 23407c478bd9Sstevel@tonic-gate 23417c478bd9Sstevel@tonic-gate connp->conn_g_prev = NULL; 23427c478bd9Sstevel@tonic-gate /* 23437c478bd9Sstevel@tonic-gate * Mark as INCIPIENT, so that walkers will ignore this 23447c478bd9Sstevel@tonic-gate * for now, till ip_open is ready to make it visible globally. 23457c478bd9Sstevel@tonic-gate */ 23467c478bd9Sstevel@tonic-gate connp->conn_state_flags |= CONN_INCIPIENT; 23477c478bd9Sstevel@tonic-gate 2348f4b3ec61Sdh connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 23497c478bd9Sstevel@tonic-gate /* Insert at the head of the list */ 2350f4b3ec61Sdh mutex_enter(&connfp->connf_lock); 2351f4b3ec61Sdh connp->conn_g_next = connfp->connf_head; 23527c478bd9Sstevel@tonic-gate if (connp->conn_g_next != NULL) 23537c478bd9Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp; 2354f4b3ec61Sdh connfp->connf_head = connp; 23557c478bd9Sstevel@tonic-gate 23567c478bd9Sstevel@tonic-gate /* The fanout bucket this conn points to */ 2357f4b3ec61Sdh connp->conn_g_fanout = connfp; 23587c478bd9Sstevel@tonic-gate 2359f4b3ec61Sdh mutex_exit(&connfp->connf_lock); 23607c478bd9Sstevel@tonic-gate } 23617c478bd9Sstevel@tonic-gate 23627c478bd9Sstevel@tonic-gate void 23637c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp) 23647c478bd9Sstevel@tonic-gate { 2365f4b3ec61Sdh struct connf_s *connfp; 2366f4b3ec61Sdh 23677c478bd9Sstevel@tonic-gate /* 23687c478bd9Sstevel@tonic-gate * We were never inserted in the global multi list. 23697c478bd9Sstevel@tonic-gate * IPCL_NONE variety is never inserted in the global multilist 23707c478bd9Sstevel@tonic-gate * since it is presumed to not need any cleanup and is transient. 23717c478bd9Sstevel@tonic-gate */ 23727c478bd9Sstevel@tonic-gate if (connp->conn_g_fanout == NULL) 23737c478bd9Sstevel@tonic-gate return; 23747c478bd9Sstevel@tonic-gate 2375f4b3ec61Sdh connfp = connp->conn_g_fanout; 2376f4b3ec61Sdh mutex_enter(&connfp->connf_lock); 23777c478bd9Sstevel@tonic-gate if (connp->conn_g_prev != NULL) 23787c478bd9Sstevel@tonic-gate connp->conn_g_prev->conn_g_next = connp->conn_g_next; 23797c478bd9Sstevel@tonic-gate else 2380f4b3ec61Sdh connfp->connf_head = connp->conn_g_next; 23817c478bd9Sstevel@tonic-gate if (connp->conn_g_next != NULL) 23827c478bd9Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 2383f4b3ec61Sdh mutex_exit(&connfp->connf_lock); 23847c478bd9Sstevel@tonic-gate 23857c478bd9Sstevel@tonic-gate /* Better to stumble on a null pointer than to corrupt memory */ 23867c478bd9Sstevel@tonic-gate connp->conn_g_next = NULL; 23877c478bd9Sstevel@tonic-gate connp->conn_g_prev = NULL; 2388fc80c0dfSnordmark connp->conn_g_fanout = NULL; 23897c478bd9Sstevel@tonic-gate } 23907c478bd9Sstevel@tonic-gate 23917c478bd9Sstevel@tonic-gate /* 23927c478bd9Sstevel@tonic-gate * Walk the list of all conn_t's in the system, calling the function provided 23937c478bd9Sstevel@tonic-gate * with the specified argument for each. 23947c478bd9Sstevel@tonic-gate * Applies to both IPv4 and IPv6. 23957c478bd9Sstevel@tonic-gate * 23967c478bd9Sstevel@tonic-gate * IPCs may hold pointers to ipif/ill. To guard against stale pointers 23977c478bd9Sstevel@tonic-gate * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 23987c478bd9Sstevel@tonic-gate * unplumbed or removed. New conn_t's that are created while we are walking 23997c478bd9Sstevel@tonic-gate * may be missed by this walk, because they are not necessarily inserted 24007c478bd9Sstevel@tonic-gate * at the tail of the list. They are new conn_t's and thus don't have any 24017c478bd9Sstevel@tonic-gate * stale pointers. The CONN_CLOSING flag ensures that no new reference 24027c478bd9Sstevel@tonic-gate * is created to the struct that is going away. 24037c478bd9Sstevel@tonic-gate */ 24047c478bd9Sstevel@tonic-gate void 2405f4b3ec61Sdh ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 24067c478bd9Sstevel@tonic-gate { 24077c478bd9Sstevel@tonic-gate int i; 24087c478bd9Sstevel@tonic-gate conn_t *connp; 24097c478bd9Sstevel@tonic-gate conn_t *prev_connp; 24107c478bd9Sstevel@tonic-gate 24117c478bd9Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2412f4b3ec61Sdh mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 24137c478bd9Sstevel@tonic-gate prev_connp = NULL; 2414f4b3ec61Sdh connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 24157c478bd9Sstevel@tonic-gate while (connp != NULL) { 24167c478bd9Sstevel@tonic-gate mutex_enter(&connp->conn_lock); 24177c478bd9Sstevel@tonic-gate if (connp->conn_state_flags & 24187c478bd9Sstevel@tonic-gate (CONN_CONDEMNED | CONN_INCIPIENT)) { 24197c478bd9Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 24207c478bd9Sstevel@tonic-gate connp = connp->conn_g_next; 24217c478bd9Sstevel@tonic-gate continue; 24227c478bd9Sstevel@tonic-gate } 24237c478bd9Sstevel@tonic-gate CONN_INC_REF_LOCKED(connp); 24247c478bd9Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 2425f4b3ec61Sdh mutex_exit( 2426f4b3ec61Sdh &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 24277c478bd9Sstevel@tonic-gate (*func)(connp, arg); 24287c478bd9Sstevel@tonic-gate if (prev_connp != NULL) 24297c478bd9Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 2430f4b3ec61Sdh mutex_enter( 2431f4b3ec61Sdh &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 24327c478bd9Sstevel@tonic-gate prev_connp = connp; 24337c478bd9Sstevel@tonic-gate connp = connp->conn_g_next; 24347c478bd9Sstevel@tonic-gate } 2435f4b3ec61Sdh mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 24367c478bd9Sstevel@tonic-gate if (prev_connp != NULL) 24377c478bd9Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 24387c478bd9Sstevel@tonic-gate } 24397c478bd9Sstevel@tonic-gate } 24407c478bd9Sstevel@tonic-gate 24417c478bd9Sstevel@tonic-gate /* 24427c478bd9Sstevel@tonic-gate * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 24437c478bd9Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 24447c478bd9Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 2445d0ab37afSethindra * (peer tcp in ESTABLISHED state). 24467c478bd9Sstevel@tonic-gate */ 24477c478bd9Sstevel@tonic-gate conn_t * 2448f4b3ec61Sdh ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph, 2449f4b3ec61Sdh ip_stack_t *ipst) 24507c478bd9Sstevel@tonic-gate { 24517c478bd9Sstevel@tonic-gate uint32_t ports; 24527c478bd9Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 24537c478bd9Sstevel@tonic-gate connf_t *connfp; 24547c478bd9Sstevel@tonic-gate conn_t *tconnp; 24557c478bd9Sstevel@tonic-gate boolean_t zone_chk; 24567c478bd9Sstevel@tonic-gate 24577c478bd9Sstevel@tonic-gate /* 24587c478bd9Sstevel@tonic-gate * If either the source of destination address is loopback, then 24597c478bd9Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 24607c478bd9Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 24617c478bd9Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. 24627c478bd9Sstevel@tonic-gate */ 24637c478bd9Sstevel@tonic-gate zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 24647c478bd9Sstevel@tonic-gate ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 24657c478bd9Sstevel@tonic-gate 24667c478bd9Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 24677c478bd9Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 24687c478bd9Sstevel@tonic-gate 2469f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2470f4b3ec61Sdh ports, ipst)]; 24717c478bd9Sstevel@tonic-gate 24727c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 24737c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 24747c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 24757c478bd9Sstevel@tonic-gate 24767c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 24777c478bd9Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 2478d0ab37afSethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 24797c478bd9Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 24807c478bd9Sstevel@tonic-gate 24817c478bd9Sstevel@tonic-gate ASSERT(tconnp != connp); 24827c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 24837c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24847c478bd9Sstevel@tonic-gate return (tconnp); 24857c478bd9Sstevel@tonic-gate } 24867c478bd9Sstevel@tonic-gate } 24877c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24887c478bd9Sstevel@tonic-gate return (NULL); 24897c478bd9Sstevel@tonic-gate } 24907c478bd9Sstevel@tonic-gate 24917c478bd9Sstevel@tonic-gate /* 24927c478bd9Sstevel@tonic-gate * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 24937c478bd9Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 24947c478bd9Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 2495d0ab37afSethindra * (peer tcp in ESTABLISHED state). 24967c478bd9Sstevel@tonic-gate */ 24977c478bd9Sstevel@tonic-gate conn_t * 2498f4b3ec61Sdh ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph, 2499f4b3ec61Sdh ip_stack_t *ipst) 25007c478bd9Sstevel@tonic-gate { 25017c478bd9Sstevel@tonic-gate uint32_t ports; 25027c478bd9Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 25037c478bd9Sstevel@tonic-gate connf_t *connfp; 25047c478bd9Sstevel@tonic-gate conn_t *tconnp; 25057c478bd9Sstevel@tonic-gate boolean_t zone_chk; 25067c478bd9Sstevel@tonic-gate 25077c478bd9Sstevel@tonic-gate /* 25087c478bd9Sstevel@tonic-gate * If either the source of destination address is loopback, then 25097c478bd9Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 25107c478bd9Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 25117c478bd9Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. We 25127c478bd9Sstevel@tonic-gate * don't do Zone check for link local address(es) because the 25137c478bd9Sstevel@tonic-gate * current Zone implementation treats each link local address as 25147c478bd9Sstevel@tonic-gate * being unique per system node, i.e. they belong to global Zone. 25157c478bd9Sstevel@tonic-gate */ 25167c478bd9Sstevel@tonic-gate zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 25177c478bd9Sstevel@tonic-gate IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 25187c478bd9Sstevel@tonic-gate 25197c478bd9Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 25207c478bd9Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 25217c478bd9Sstevel@tonic-gate 2522f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2523f4b3ec61Sdh ports, ipst)]; 25247c478bd9Sstevel@tonic-gate 25257c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 25267c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 25277c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 25287c478bd9Sstevel@tonic-gate 25297c478bd9Sstevel@tonic-gate /* We skip tcp_bound_if check here as this is loopback tcp */ 25307c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 25317c478bd9Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 2532d0ab37afSethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 25337c478bd9Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 25347c478bd9Sstevel@tonic-gate 25357c478bd9Sstevel@tonic-gate ASSERT(tconnp != connp); 25367c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 25377c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25387c478bd9Sstevel@tonic-gate return (tconnp); 25397c478bd9Sstevel@tonic-gate } 25407c478bd9Sstevel@tonic-gate } 25417c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25427c478bd9Sstevel@tonic-gate return (NULL); 25437c478bd9Sstevel@tonic-gate } 25447c478bd9Sstevel@tonic-gate 25457c478bd9Sstevel@tonic-gate /* 25467c478bd9Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 25477c478bd9Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 25487c478bd9Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 25497c478bd9Sstevel@tonic-gate */ 25507c478bd9Sstevel@tonic-gate conn_t * 2551f4b3ec61Sdh ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state, 2552f4b3ec61Sdh ip_stack_t *ipst) 25537c478bd9Sstevel@tonic-gate { 25547c478bd9Sstevel@tonic-gate uint32_t ports; 25557c478bd9Sstevel@tonic-gate uint16_t *pports; 25567c478bd9Sstevel@tonic-gate connf_t *connfp; 25577c478bd9Sstevel@tonic-gate conn_t *tconnp; 25587c478bd9Sstevel@tonic-gate 25597c478bd9Sstevel@tonic-gate pports = (uint16_t *)&ports; 25607c478bd9Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 25617c478bd9Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 25627c478bd9Sstevel@tonic-gate 2563f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2564121e5416Skcpoon ports, ipst)]; 25657c478bd9Sstevel@tonic-gate 25667c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 25677c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 25687c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 25697c478bd9Sstevel@tonic-gate 25707c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 25717c478bd9Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 25727c478bd9Sstevel@tonic-gate tconnp->conn_tcp->tcp_state >= min_state) { 25737c478bd9Sstevel@tonic-gate 25747c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 25757c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25767c478bd9Sstevel@tonic-gate return (tconnp); 25777c478bd9Sstevel@tonic-gate } 25787c478bd9Sstevel@tonic-gate } 25797c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25807c478bd9Sstevel@tonic-gate return (NULL); 25817c478bd9Sstevel@tonic-gate } 25827c478bd9Sstevel@tonic-gate 25837c478bd9Sstevel@tonic-gate /* 25847c478bd9Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 25857c478bd9Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 25867c478bd9Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 25877c478bd9Sstevel@tonic-gate * Match on ifindex in addition to addresses. 25887c478bd9Sstevel@tonic-gate */ 25897c478bd9Sstevel@tonic-gate conn_t * 25907c478bd9Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 2591f4b3ec61Sdh uint_t ifindex, ip_stack_t *ipst) 25927c478bd9Sstevel@tonic-gate { 25937c478bd9Sstevel@tonic-gate tcp_t *tcp; 25947c478bd9Sstevel@tonic-gate uint32_t ports; 25957c478bd9Sstevel@tonic-gate uint16_t *pports; 25967c478bd9Sstevel@tonic-gate connf_t *connfp; 25977c478bd9Sstevel@tonic-gate conn_t *tconnp; 25987c478bd9Sstevel@tonic-gate 25997c478bd9Sstevel@tonic-gate pports = (uint16_t *)&ports; 26007c478bd9Sstevel@tonic-gate pports[0] = tcpha->tha_fport; 26017c478bd9Sstevel@tonic-gate pports[1] = tcpha->tha_lport; 26027c478bd9Sstevel@tonic-gate 2603f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2604121e5416Skcpoon ports, ipst)]; 26057c478bd9Sstevel@tonic-gate 26067c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 26077c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 26087c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 26097c478bd9Sstevel@tonic-gate 26107c478bd9Sstevel@tonic-gate tcp = tconnp->conn_tcp; 26117c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 26127c478bd9Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 26137c478bd9Sstevel@tonic-gate tcp->tcp_state >= min_state && 26147c478bd9Sstevel@tonic-gate (tcp->tcp_bound_if == 0 || 26157c478bd9Sstevel@tonic-gate tcp->tcp_bound_if == ifindex)) { 26167c478bd9Sstevel@tonic-gate 26177c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 26187c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 26197c478bd9Sstevel@tonic-gate return (tconnp); 26207c478bd9Sstevel@tonic-gate } 26217c478bd9Sstevel@tonic-gate } 26227c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 26237c478bd9Sstevel@tonic-gate return (NULL); 26247c478bd9Sstevel@tonic-gate } 26257c478bd9Sstevel@tonic-gate 26267c478bd9Sstevel@tonic-gate /* 262745916cd2Sjpk * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 262845916cd2Sjpk * a listener when changing state. 26297c478bd9Sstevel@tonic-gate */ 26307c478bd9Sstevel@tonic-gate conn_t * 2631f4b3ec61Sdh ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 2632f4b3ec61Sdh ip_stack_t *ipst) 26337c478bd9Sstevel@tonic-gate { 26347c478bd9Sstevel@tonic-gate connf_t *bind_connfp; 26357c478bd9Sstevel@tonic-gate conn_t *connp; 26367c478bd9Sstevel@tonic-gate tcp_t *tcp; 26377c478bd9Sstevel@tonic-gate 26387c478bd9Sstevel@tonic-gate /* 26397c478bd9Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 26407c478bd9Sstevel@tonic-gate * all zeros. 26417c478bd9Sstevel@tonic-gate */ 26427c478bd9Sstevel@tonic-gate if (laddr == 0) 26437c478bd9Sstevel@tonic-gate return (NULL); 26447c478bd9Sstevel@tonic-gate 264545916cd2Sjpk ASSERT(zoneid != ALL_ZONES); 264645916cd2Sjpk 2647f4b3ec61Sdh bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 26487c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 26497c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 26507c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 26517c478bd9Sstevel@tonic-gate tcp = connp->conn_tcp; 26527c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 26535d0bc3edSsommerfe IPCL_ZONE_MATCH(connp, zoneid) && 26547c478bd9Sstevel@tonic-gate (tcp->tcp_listener == NULL)) { 26557c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 26567c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26577c478bd9Sstevel@tonic-gate return (connp); 26587c478bd9Sstevel@tonic-gate } 26597c478bd9Sstevel@tonic-gate } 26607c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26617c478bd9Sstevel@tonic-gate return (NULL); 26627c478bd9Sstevel@tonic-gate } 26637c478bd9Sstevel@tonic-gate 266445916cd2Sjpk /* 266545916cd2Sjpk * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 266645916cd2Sjpk * a listener when changing state. 266745916cd2Sjpk */ 26687c478bd9Sstevel@tonic-gate conn_t * 26697c478bd9Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 2670f4b3ec61Sdh zoneid_t zoneid, ip_stack_t *ipst) 26717c478bd9Sstevel@tonic-gate { 26727c478bd9Sstevel@tonic-gate connf_t *bind_connfp; 26737c478bd9Sstevel@tonic-gate conn_t *connp = NULL; 26747c478bd9Sstevel@tonic-gate tcp_t *tcp; 26757c478bd9Sstevel@tonic-gate 26767c478bd9Sstevel@tonic-gate /* 26777c478bd9Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 26787c478bd9Sstevel@tonic-gate * all zeros. 26797c478bd9Sstevel@tonic-gate */ 26807c478bd9Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 26817c478bd9Sstevel@tonic-gate return (NULL); 26827c478bd9Sstevel@tonic-gate 268345916cd2Sjpk ASSERT(zoneid != ALL_ZONES); 26847c478bd9Sstevel@tonic-gate 2685f4b3ec61Sdh bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 26867c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 26877c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 26887c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 26897c478bd9Sstevel@tonic-gate tcp = connp->conn_tcp; 26907c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 26915d0bc3edSsommerfe IPCL_ZONE_MATCH(connp, zoneid) && 26927c478bd9Sstevel@tonic-gate (tcp->tcp_bound_if == 0 || 26937c478bd9Sstevel@tonic-gate tcp->tcp_bound_if == ifindex) && 26947c478bd9Sstevel@tonic-gate tcp->tcp_listener == NULL) { 26957c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 26967c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26977c478bd9Sstevel@tonic-gate return (connp); 26987c478bd9Sstevel@tonic-gate } 26997c478bd9Sstevel@tonic-gate } 27007c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 27017c478bd9Sstevel@tonic-gate return (NULL); 27027c478bd9Sstevel@tonic-gate } 27037c478bd9Sstevel@tonic-gate 2704ff550d0eSmasputra /* 2705ff550d0eSmasputra * ipcl_get_next_conn 2706ff550d0eSmasputra * get the next entry in the conn global list 2707ff550d0eSmasputra * and put a reference on the next_conn. 2708ff550d0eSmasputra * decrement the reference on the current conn. 2709ff550d0eSmasputra * 2710ff550d0eSmasputra * This is an iterator based walker function that also provides for 2711ff550d0eSmasputra * some selection by the caller. It walks through the conn_hash bucket 2712ff550d0eSmasputra * searching for the next valid connp in the list, and selects connections 2713ff550d0eSmasputra * that are neither closed nor condemned. It also REFHOLDS the conn 2714ff550d0eSmasputra * thus ensuring that the conn exists when the caller uses the conn. 2715ff550d0eSmasputra */ 2716ff550d0eSmasputra conn_t * 2717ff550d0eSmasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2718ff550d0eSmasputra { 2719ff550d0eSmasputra conn_t *next_connp; 2720ff550d0eSmasputra 2721ff550d0eSmasputra if (connfp == NULL) 2722ff550d0eSmasputra return (NULL); 2723ff550d0eSmasputra 2724ff550d0eSmasputra mutex_enter(&connfp->connf_lock); 2725ff550d0eSmasputra 2726ff550d0eSmasputra next_connp = (connp == NULL) ? 2727ff550d0eSmasputra connfp->connf_head : connp->conn_g_next; 2728ff550d0eSmasputra 2729ff550d0eSmasputra while (next_connp != NULL) { 2730ff550d0eSmasputra mutex_enter(&next_connp->conn_lock); 2731ff550d0eSmasputra if (!(next_connp->conn_flags & conn_flags) || 2732ff550d0eSmasputra (next_connp->conn_state_flags & 2733ff550d0eSmasputra (CONN_CONDEMNED | CONN_INCIPIENT))) { 2734ff550d0eSmasputra /* 2735ff550d0eSmasputra * This conn has been condemned or 2736ff550d0eSmasputra * is closing, or the flags don't match 2737ff550d0eSmasputra */ 2738ff550d0eSmasputra mutex_exit(&next_connp->conn_lock); 2739ff550d0eSmasputra next_connp = next_connp->conn_g_next; 2740ff550d0eSmasputra continue; 2741ff550d0eSmasputra } 2742ff550d0eSmasputra CONN_INC_REF_LOCKED(next_connp); 2743ff550d0eSmasputra mutex_exit(&next_connp->conn_lock); 2744ff550d0eSmasputra break; 2745ff550d0eSmasputra } 2746ff550d0eSmasputra 2747ff550d0eSmasputra mutex_exit(&connfp->connf_lock); 2748ff550d0eSmasputra 2749ff550d0eSmasputra if (connp != NULL) 2750ff550d0eSmasputra CONN_DEC_REF(connp); 2751ff550d0eSmasputra 2752ff550d0eSmasputra return (next_connp); 2753ff550d0eSmasputra } 2754ff550d0eSmasputra 27557c478bd9Sstevel@tonic-gate #ifdef CONN_DEBUG 27567c478bd9Sstevel@tonic-gate /* 27577c478bd9Sstevel@tonic-gate * Trace of the last NBUF refhold/refrele 27587c478bd9Sstevel@tonic-gate */ 27597c478bd9Sstevel@tonic-gate int 27607c478bd9Sstevel@tonic-gate conn_trace_ref(conn_t *connp) 27617c478bd9Sstevel@tonic-gate { 27627c478bd9Sstevel@tonic-gate int last; 27637c478bd9Sstevel@tonic-gate conn_trace_t *ctb; 27647c478bd9Sstevel@tonic-gate 27657c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 27667c478bd9Sstevel@tonic-gate last = connp->conn_trace_last; 27677c478bd9Sstevel@tonic-gate last++; 27687c478bd9Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 27697c478bd9Sstevel@tonic-gate last = 0; 27707c478bd9Sstevel@tonic-gate 27717c478bd9Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 27726a8288c7Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 27737c478bd9Sstevel@tonic-gate connp->conn_trace_last = last; 27747c478bd9Sstevel@tonic-gate return (1); 27757c478bd9Sstevel@tonic-gate } 27767c478bd9Sstevel@tonic-gate 27777c478bd9Sstevel@tonic-gate int 27787c478bd9Sstevel@tonic-gate conn_untrace_ref(conn_t *connp) 27797c478bd9Sstevel@tonic-gate { 27807c478bd9Sstevel@tonic-gate int last; 27817c478bd9Sstevel@tonic-gate conn_trace_t *ctb; 27827c478bd9Sstevel@tonic-gate 27837c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 27847c478bd9Sstevel@tonic-gate last = connp->conn_trace_last; 27857c478bd9Sstevel@tonic-gate last++; 27867c478bd9Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 27877c478bd9Sstevel@tonic-gate last = 0; 27887c478bd9Sstevel@tonic-gate 27897c478bd9Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 27906a8288c7Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 27917c478bd9Sstevel@tonic-gate connp->conn_trace_last = last; 27927c478bd9Sstevel@tonic-gate return (1); 27937c478bd9Sstevel@tonic-gate } 27947c478bd9Sstevel@tonic-gate #endif 2795