17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ee4701baSericheng * Common Development and Distribution License (the "License"). 6ee4701baSericheng * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 2274e20cfeSnh * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 277c478bd9Sstevel@tonic-gate 2845916cd2Sjpk const char ipclassifier_version[] = "@(#)ipclassifier.c %I% %E% SMI"; 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate /* 317c478bd9Sstevel@tonic-gate * IP PACKET CLASSIFIER 327c478bd9Sstevel@tonic-gate * 337c478bd9Sstevel@tonic-gate * The IP packet classifier provides mapping between IP packets and persistent 347c478bd9Sstevel@tonic-gate * connection state for connection-oriented protocols. It also provides 357c478bd9Sstevel@tonic-gate * interface for managing connection states. 367c478bd9Sstevel@tonic-gate * 377c478bd9Sstevel@tonic-gate * The connection state is kept in conn_t data structure and contains, among 387c478bd9Sstevel@tonic-gate * other things: 397c478bd9Sstevel@tonic-gate * 407c478bd9Sstevel@tonic-gate * o local/remote address and ports 417c478bd9Sstevel@tonic-gate * o Transport protocol 427c478bd9Sstevel@tonic-gate * o squeue for the connection (for TCP only) 437c478bd9Sstevel@tonic-gate * o reference counter 447c478bd9Sstevel@tonic-gate * o Connection state 457c478bd9Sstevel@tonic-gate * o hash table linkage 467c478bd9Sstevel@tonic-gate * o interface/ire information 477c478bd9Sstevel@tonic-gate * o credentials 487c478bd9Sstevel@tonic-gate * o ipsec policy 497c478bd9Sstevel@tonic-gate * o send and receive functions. 507c478bd9Sstevel@tonic-gate * o mutex lock. 517c478bd9Sstevel@tonic-gate * 527c478bd9Sstevel@tonic-gate * Connections use a reference counting scheme. They are freed when the 537c478bd9Sstevel@tonic-gate * reference counter drops to zero. A reference is incremented when connection 547c478bd9Sstevel@tonic-gate * is placed in a list or table, when incoming packet for the connection arrives 557c478bd9Sstevel@tonic-gate * and when connection is processed via squeue (squeue processing may be 567c478bd9Sstevel@tonic-gate * asynchronous and the reference protects the connection from being destroyed 577c478bd9Sstevel@tonic-gate * before its processing is finished). 587c478bd9Sstevel@tonic-gate * 597c478bd9Sstevel@tonic-gate * send and receive functions are currently used for TCP only. The send function 607c478bd9Sstevel@tonic-gate * determines the IP entry point for the packet once it leaves TCP to be sent to 617c478bd9Sstevel@tonic-gate * the destination address. The receive function is used by IP when the packet 627c478bd9Sstevel@tonic-gate * should be passed for TCP processing. When a new connection is created these 637c478bd9Sstevel@tonic-gate * are set to ip_output() and tcp_input() respectively. During the lifetime of 647c478bd9Sstevel@tonic-gate * the connection the send and receive functions may change depending on the 657c478bd9Sstevel@tonic-gate * changes in the connection state. For example, Once the connection is bound to 667c478bd9Sstevel@tonic-gate * an addresse, the receive function for this connection is set to 677c478bd9Sstevel@tonic-gate * tcp_conn_request(). This allows incoming SYNs to go directly into the 687c478bd9Sstevel@tonic-gate * listener SYN processing function without going to tcp_input() first. 697c478bd9Sstevel@tonic-gate * 707c478bd9Sstevel@tonic-gate * Classifier uses several hash tables: 717c478bd9Sstevel@tonic-gate * 727c478bd9Sstevel@tonic-gate * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 737c478bd9Sstevel@tonic-gate * ipcl_bind_fanout: contains all connections in BOUND state 747c478bd9Sstevel@tonic-gate * ipcl_proto_fanout: IPv4 protocol fanout 757c478bd9Sstevel@tonic-gate * ipcl_proto_fanout_v6: IPv6 protocol fanout 767c478bd9Sstevel@tonic-gate * ipcl_udp_fanout: contains all UDP connections 777c478bd9Sstevel@tonic-gate * ipcl_globalhash_fanout: contains all connections 787c478bd9Sstevel@tonic-gate * 797c478bd9Sstevel@tonic-gate * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 807c478bd9Sstevel@tonic-gate * which need to view all existing connections. 817c478bd9Sstevel@tonic-gate * 827c478bd9Sstevel@tonic-gate * All tables are protected by per-bucket locks. When both per-bucket lock and 837c478bd9Sstevel@tonic-gate * connection lock need to be held, the per-bucket lock should be acquired 847c478bd9Sstevel@tonic-gate * first, followed by the connection lock. 857c478bd9Sstevel@tonic-gate * 867c478bd9Sstevel@tonic-gate * All functions doing search in one of these tables increment a reference 877c478bd9Sstevel@tonic-gate * counter on the connection found (if any). This reference should be dropped 887c478bd9Sstevel@tonic-gate * when the caller has finished processing the connection. 897c478bd9Sstevel@tonic-gate * 907c478bd9Sstevel@tonic-gate * 917c478bd9Sstevel@tonic-gate * INTERFACES: 927c478bd9Sstevel@tonic-gate * =========== 937c478bd9Sstevel@tonic-gate * 947c478bd9Sstevel@tonic-gate * Connection Lookup: 957c478bd9Sstevel@tonic-gate * ------------------ 967c478bd9Sstevel@tonic-gate * 97f4b3ec61Sdh * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack) 98f4b3ec61Sdh * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack) 997c478bd9Sstevel@tonic-gate * 1007c478bd9Sstevel@tonic-gate * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 1017c478bd9Sstevel@tonic-gate * it can't find any associated connection. If the connection is found, its 1027c478bd9Sstevel@tonic-gate * reference counter is incremented. 1037c478bd9Sstevel@tonic-gate * 1047c478bd9Sstevel@tonic-gate * mp: mblock, containing packet header. The full header should fit 1057c478bd9Sstevel@tonic-gate * into a single mblock. It should also contain at least full IP 1067c478bd9Sstevel@tonic-gate * and TCP or UDP header. 1077c478bd9Sstevel@tonic-gate * 1087c478bd9Sstevel@tonic-gate * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 1097c478bd9Sstevel@tonic-gate * 1107c478bd9Sstevel@tonic-gate * hdr_len: The size of IP header. It is used to find TCP or UDP header in 1117c478bd9Sstevel@tonic-gate * the packet. 1127c478bd9Sstevel@tonic-gate * 11345916cd2Sjpk * zoneid: The zone in which the returned connection must be; the zoneid 11445916cd2Sjpk * corresponding to the ire_zoneid on the IRE located for the 11545916cd2Sjpk * packet's destination address. 1167c478bd9Sstevel@tonic-gate * 1177c478bd9Sstevel@tonic-gate * For TCP connections, the lookup order is as follows: 1187c478bd9Sstevel@tonic-gate * 5-tuple {src, dst, protocol, local port, remote port} 1197c478bd9Sstevel@tonic-gate * lookup in ipcl_conn_fanout table. 1207c478bd9Sstevel@tonic-gate * 3-tuple {dst, remote port, protocol} lookup in 1217c478bd9Sstevel@tonic-gate * ipcl_bind_fanout table. 1227c478bd9Sstevel@tonic-gate * 1237c478bd9Sstevel@tonic-gate * For UDP connections, a 5-tuple {src, dst, protocol, local port, 1247c478bd9Sstevel@tonic-gate * remote port} lookup is done on ipcl_udp_fanout. Note that, 1257c478bd9Sstevel@tonic-gate * these interfaces do not handle cases where a packets belongs 1267c478bd9Sstevel@tonic-gate * to multiple UDP clients, which is handled in IP itself. 1277c478bd9Sstevel@tonic-gate * 12845916cd2Sjpk * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 12945916cd2Sjpk * determine which actual zone gets the segment. This is used only in a 13045916cd2Sjpk * labeled environment. The matching rules are: 13145916cd2Sjpk * 13245916cd2Sjpk * - If it's not a multilevel port, then the label on the packet selects 13345916cd2Sjpk * the zone. Unlabeled packets are delivered to the global zone. 13445916cd2Sjpk * 13545916cd2Sjpk * - If it's a multilevel port, then only the zone registered to receive 13645916cd2Sjpk * packets on that port matches. 13745916cd2Sjpk * 13845916cd2Sjpk * Also, in a labeled environment, packet labels need to be checked. For fully 13945916cd2Sjpk * bound TCP connections, we can assume that the packet label was checked 14045916cd2Sjpk * during connection establishment, and doesn't need to be checked on each 14145916cd2Sjpk * packet. For others, though, we need to check for strict equality or, for 14245916cd2Sjpk * multilevel ports, membership in the range or set. This part currently does 14345916cd2Sjpk * a tnrh lookup on each packet, but could be optimized to use cached results 14445916cd2Sjpk * if that were necessary. (SCTP doesn't come through here, but if it did, 14545916cd2Sjpk * we would apply the same rules as TCP.) 14645916cd2Sjpk * 14745916cd2Sjpk * An implication of the above is that fully-bound TCP sockets must always use 14845916cd2Sjpk * distinct 4-tuples; they can't be discriminated by label alone. 14945916cd2Sjpk * 15045916cd2Sjpk * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 15145916cd2Sjpk * as there's no connection set-up handshake and no shared state. 15245916cd2Sjpk * 15345916cd2Sjpk * Labels on looped-back packets within a single zone do not need to be 15445916cd2Sjpk * checked, as all processes in the same zone have the same label. 15545916cd2Sjpk * 15645916cd2Sjpk * Finally, for unlabeled packets received by a labeled system, special rules 15745916cd2Sjpk * apply. We consider only the MLP if there is one. Otherwise, we prefer a 15845916cd2Sjpk * socket in the zone whose label matches the default label of the sender, if 15945916cd2Sjpk * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 16045916cd2Sjpk * receiver's label must dominate the sender's default label. 16145916cd2Sjpk * 162f4b3ec61Sdh * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack); 163f4b3ec61Sdh * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 164f4b3ec61Sdh * ip_stack); 1657c478bd9Sstevel@tonic-gate * 1667c478bd9Sstevel@tonic-gate * Lookup routine to find a exact match for {src, dst, local port, 1677c478bd9Sstevel@tonic-gate * remote port) for TCP connections in ipcl_conn_fanout. The address and 1687c478bd9Sstevel@tonic-gate * ports are read from the IP and TCP header respectively. 1697c478bd9Sstevel@tonic-gate * 170f4b3ec61Sdh * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 171f4b3ec61Sdh * zoneid, ip_stack); 172f4b3ec61Sdh * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 173f4b3ec61Sdh * zoneid, ip_stack); 1747c478bd9Sstevel@tonic-gate * 1757c478bd9Sstevel@tonic-gate * Lookup routine to find a listener with the tuple {lport, laddr, 1767c478bd9Sstevel@tonic-gate * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 1777c478bd9Sstevel@tonic-gate * parameter interface index is also compared. 1787c478bd9Sstevel@tonic-gate * 179f4b3ec61Sdh * void ipcl_walk(func, arg, ip_stack) 1807c478bd9Sstevel@tonic-gate * 1817c478bd9Sstevel@tonic-gate * Apply 'func' to every connection available. The 'func' is called as 1827c478bd9Sstevel@tonic-gate * (*func)(connp, arg). The walk is non-atomic so connections may be 1837c478bd9Sstevel@tonic-gate * created and destroyed during the walk. The CONN_CONDEMNED and 1847c478bd9Sstevel@tonic-gate * CONN_INCIPIENT flags ensure that connections which are newly created 1857c478bd9Sstevel@tonic-gate * or being destroyed are not selected by the walker. 1867c478bd9Sstevel@tonic-gate * 1877c478bd9Sstevel@tonic-gate * Table Updates 1887c478bd9Sstevel@tonic-gate * ------------- 1897c478bd9Sstevel@tonic-gate * 1907c478bd9Sstevel@tonic-gate * int ipcl_conn_insert(connp, protocol, src, dst, ports) 1917c478bd9Sstevel@tonic-gate * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 1927c478bd9Sstevel@tonic-gate * 1937c478bd9Sstevel@tonic-gate * Insert 'connp' in the ipcl_conn_fanout. 1947c478bd9Sstevel@tonic-gate * Arguements : 1957c478bd9Sstevel@tonic-gate * connp conn_t to be inserted 1967c478bd9Sstevel@tonic-gate * protocol connection protocol 1977c478bd9Sstevel@tonic-gate * src source address 1987c478bd9Sstevel@tonic-gate * dst destination address 1997c478bd9Sstevel@tonic-gate * ports local and remote port 2007c478bd9Sstevel@tonic-gate * ifindex interface index for IPv6 connections 2017c478bd9Sstevel@tonic-gate * 2027c478bd9Sstevel@tonic-gate * Return value : 2037c478bd9Sstevel@tonic-gate * 0 if connp was inserted 2047c478bd9Sstevel@tonic-gate * EADDRINUSE if the connection with the same tuple 2057c478bd9Sstevel@tonic-gate * already exists. 2067c478bd9Sstevel@tonic-gate * 2077c478bd9Sstevel@tonic-gate * int ipcl_bind_insert(connp, protocol, src, lport); 2087c478bd9Sstevel@tonic-gate * int ipcl_bind_insert_v6(connp, protocol, src, lport); 2097c478bd9Sstevel@tonic-gate * 2107c478bd9Sstevel@tonic-gate * Insert 'connp' in ipcl_bind_fanout. 2117c478bd9Sstevel@tonic-gate * Arguements : 2127c478bd9Sstevel@tonic-gate * connp conn_t to be inserted 2137c478bd9Sstevel@tonic-gate * protocol connection protocol 2147c478bd9Sstevel@tonic-gate * src source address connection wants 2157c478bd9Sstevel@tonic-gate * to bind to 2167c478bd9Sstevel@tonic-gate * lport local port connection wants to 2177c478bd9Sstevel@tonic-gate * bind to 2187c478bd9Sstevel@tonic-gate * 2197c478bd9Sstevel@tonic-gate * 2207c478bd9Sstevel@tonic-gate * void ipcl_hash_remove(connp); 2217c478bd9Sstevel@tonic-gate * 2227c478bd9Sstevel@tonic-gate * Removes the 'connp' from the connection fanout table. 2237c478bd9Sstevel@tonic-gate * 2247c478bd9Sstevel@tonic-gate * Connection Creation/Destruction 2257c478bd9Sstevel@tonic-gate * ------------------------------- 2267c478bd9Sstevel@tonic-gate * 227f4b3ec61Sdh * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 2287c478bd9Sstevel@tonic-gate * 2297c478bd9Sstevel@tonic-gate * Creates a new conn based on the type flag, inserts it into 2307c478bd9Sstevel@tonic-gate * globalhash table. 2317c478bd9Sstevel@tonic-gate * 2327c478bd9Sstevel@tonic-gate * type: This flag determines the type of conn_t which needs to be 233*fc80c0dfSnordmark * created i.e., which kmem_cache it comes from. 2347c478bd9Sstevel@tonic-gate * IPCL_TCPCONN indicates a TCP connection 235*fc80c0dfSnordmark * IPCL_SCTPCONN indicates a SCTP connection 236*fc80c0dfSnordmark * IPCL_UDPCONN indicates a UDP conn_t. 237*fc80c0dfSnordmark * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t. 238*fc80c0dfSnordmark * IPCL_RTSCONN indicates a RTS conn_t. 239*fc80c0dfSnordmark * IPCL_IPCCONN indicates all other connections. 2407c478bd9Sstevel@tonic-gate * 2417c478bd9Sstevel@tonic-gate * void ipcl_conn_destroy(connp) 2427c478bd9Sstevel@tonic-gate * 2437c478bd9Sstevel@tonic-gate * Destroys the connection state, removes it from the global 2447c478bd9Sstevel@tonic-gate * connection hash table and frees its memory. 2457c478bd9Sstevel@tonic-gate */ 2467c478bd9Sstevel@tonic-gate 2477c478bd9Sstevel@tonic-gate #include <sys/types.h> 2487c478bd9Sstevel@tonic-gate #include <sys/stream.h> 2497c478bd9Sstevel@tonic-gate #include <sys/stropts.h> 2507c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 2517c478bd9Sstevel@tonic-gate #include <sys/strsubr.h> 2527c478bd9Sstevel@tonic-gate #include <sys/strsun.h> 2537c478bd9Sstevel@tonic-gate #define _SUN_TPI_VERSION 2 2547c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 2557c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 2567c478bd9Sstevel@tonic-gate #include <sys/debug.h> 2577c478bd9Sstevel@tonic-gate 2587c478bd9Sstevel@tonic-gate #include <sys/systm.h> 2597c478bd9Sstevel@tonic-gate #include <sys/param.h> 2607c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 2617c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h> 2627c478bd9Sstevel@tonic-gate #include <inet/common.h> 2637c478bd9Sstevel@tonic-gate #include <netinet/ip6.h> 2647c478bd9Sstevel@tonic-gate #include <netinet/icmp6.h> 2657c478bd9Sstevel@tonic-gate 2667c478bd9Sstevel@tonic-gate #include <inet/ip.h> 2677c478bd9Sstevel@tonic-gate #include <inet/ip6.h> 2687c478bd9Sstevel@tonic-gate #include <inet/tcp.h> 2697c478bd9Sstevel@tonic-gate #include <inet/ip_ndp.h> 270ff550d0eSmasputra #include <inet/udp_impl.h> 2717c478bd9Sstevel@tonic-gate #include <inet/sctp_ip.h> 272f4b3ec61Sdh #include <inet/sctp/sctp_impl.h> 273*fc80c0dfSnordmark #include <inet/rawip_impl.h> 274*fc80c0dfSnordmark #include <inet/rts_impl.h> 2757c478bd9Sstevel@tonic-gate 2767c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 2777c478bd9Sstevel@tonic-gate 2787c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h> 2797c478bd9Sstevel@tonic-gate #include <inet/ipsec_impl.h> 2807c478bd9Sstevel@tonic-gate 28145916cd2Sjpk #include <sys/tsol/tnet.h> 28245916cd2Sjpk 2837c478bd9Sstevel@tonic-gate #ifdef DEBUG 2847c478bd9Sstevel@tonic-gate #define IPCL_DEBUG 2857c478bd9Sstevel@tonic-gate #else 2867c478bd9Sstevel@tonic-gate #undef IPCL_DEBUG 2877c478bd9Sstevel@tonic-gate #endif 2887c478bd9Sstevel@tonic-gate 2897c478bd9Sstevel@tonic-gate #ifdef IPCL_DEBUG 2907c478bd9Sstevel@tonic-gate int ipcl_debug_level = 0; 2917c478bd9Sstevel@tonic-gate #define IPCL_DEBUG_LVL(level, args) \ 2927c478bd9Sstevel@tonic-gate if (ipcl_debug_level & level) { printf args; } 2937c478bd9Sstevel@tonic-gate #else 2947c478bd9Sstevel@tonic-gate #define IPCL_DEBUG_LVL(level, args) {; } 2957c478bd9Sstevel@tonic-gate #endif 296f4b3ec61Sdh /* Old value for compatibility. Setable in /etc/system */ 2977c478bd9Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0; 2987c478bd9Sstevel@tonic-gate 299f4b3ec61Sdh /* New value. Zero means choose automatically. Setable in /etc/system */ 3007c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0; 3017c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192; 3027c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500; 3037c478bd9Sstevel@tonic-gate 3047c478bd9Sstevel@tonic-gate /* bind/udp fanout table size */ 3057c478bd9Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512; 306ee4701baSericheng uint_t ipcl_udp_fanout_size = 16384; 3077c478bd9Sstevel@tonic-gate 3087c478bd9Sstevel@tonic-gate /* Raw socket fanout size. Must be a power of 2. */ 3097c478bd9Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256; 3107c478bd9Sstevel@tonic-gate 3117c478bd9Sstevel@tonic-gate /* 3127c478bd9Sstevel@tonic-gate * Power of 2^N Primes useful for hashing for N of 0-28, 3137c478bd9Sstevel@tonic-gate * these primes are the nearest prime <= 2^N - 2^(N-2). 3147c478bd9Sstevel@tonic-gate */ 3157c478bd9Sstevel@tonic-gate 3167c478bd9Sstevel@tonic-gate #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 3177c478bd9Sstevel@tonic-gate 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 3187c478bd9Sstevel@tonic-gate 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 3197c478bd9Sstevel@tonic-gate 50331599, 100663291, 201326557, 0} 3207c478bd9Sstevel@tonic-gate 3217c478bd9Sstevel@tonic-gate /* 322*fc80c0dfSnordmark * wrapper structure to ensure that conn and what follows it (tcp_t, etc) 323*fc80c0dfSnordmark * are aligned on cache lines. 3247c478bd9Sstevel@tonic-gate */ 325*fc80c0dfSnordmark typedef union itc_s { 326*fc80c0dfSnordmark conn_t itc_conn; 327*fc80c0dfSnordmark char itcu_filler[CACHE_ALIGN(conn_s)]; 3287c478bd9Sstevel@tonic-gate } itc_t; 3297c478bd9Sstevel@tonic-gate 330*fc80c0dfSnordmark struct kmem_cache *tcp_conn_cache; 331*fc80c0dfSnordmark struct kmem_cache *ip_conn_cache; 3327c478bd9Sstevel@tonic-gate extern struct kmem_cache *sctp_conn_cache; 3337c478bd9Sstevel@tonic-gate extern struct kmem_cache *tcp_sack_info_cache; 3347c478bd9Sstevel@tonic-gate extern struct kmem_cache *tcp_iphc_cache; 335*fc80c0dfSnordmark struct kmem_cache *udp_conn_cache; 336*fc80c0dfSnordmark struct kmem_cache *rawip_conn_cache; 337*fc80c0dfSnordmark struct kmem_cache *rts_conn_cache; 3387c478bd9Sstevel@tonic-gate 3397c478bd9Sstevel@tonic-gate extern void tcp_timermp_free(tcp_t *); 3407c478bd9Sstevel@tonic-gate extern mblk_t *tcp_timermp_alloc(int); 3417c478bd9Sstevel@tonic-gate 342*fc80c0dfSnordmark static int ip_conn_constructor(void *, void *, int); 343*fc80c0dfSnordmark static void ip_conn_destructor(void *, void *); 344*fc80c0dfSnordmark 345*fc80c0dfSnordmark static int tcp_conn_constructor(void *, void *, int); 346*fc80c0dfSnordmark static void tcp_conn_destructor(void *, void *); 347*fc80c0dfSnordmark 348*fc80c0dfSnordmark static int udp_conn_constructor(void *, void *, int); 349*fc80c0dfSnordmark static void udp_conn_destructor(void *, void *); 350*fc80c0dfSnordmark 351*fc80c0dfSnordmark static int rawip_conn_constructor(void *, void *, int); 352*fc80c0dfSnordmark static void rawip_conn_destructor(void *, void *); 353*fc80c0dfSnordmark 354*fc80c0dfSnordmark static int rts_conn_constructor(void *, void *, int); 355*fc80c0dfSnordmark static void rts_conn_destructor(void *, void *); 3567c478bd9Sstevel@tonic-gate 3577c478bd9Sstevel@tonic-gate #ifdef IPCL_DEBUG 3587c478bd9Sstevel@tonic-gate #define INET_NTOA_BUFSIZE 18 3597c478bd9Sstevel@tonic-gate 3607c478bd9Sstevel@tonic-gate static char * 3617c478bd9Sstevel@tonic-gate inet_ntoa_r(uint32_t in, char *b) 3627c478bd9Sstevel@tonic-gate { 3637c478bd9Sstevel@tonic-gate unsigned char *p; 3647c478bd9Sstevel@tonic-gate 3657c478bd9Sstevel@tonic-gate p = (unsigned char *)∈ 3667c478bd9Sstevel@tonic-gate (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 3677c478bd9Sstevel@tonic-gate return (b); 3687c478bd9Sstevel@tonic-gate } 3697c478bd9Sstevel@tonic-gate #endif 3707c478bd9Sstevel@tonic-gate 3717c478bd9Sstevel@tonic-gate /* 372f4b3ec61Sdh * Global (for all stack instances) init routine 3737c478bd9Sstevel@tonic-gate */ 3747c478bd9Sstevel@tonic-gate void 375f4b3ec61Sdh ipcl_g_init(void) 3767c478bd9Sstevel@tonic-gate { 377*fc80c0dfSnordmark ip_conn_cache = kmem_cache_create("ip_conn_cache", 3787c478bd9Sstevel@tonic-gate sizeof (conn_t), CACHE_ALIGN_SIZE, 379*fc80c0dfSnordmark ip_conn_constructor, ip_conn_destructor, 380*fc80c0dfSnordmark NULL, NULL, NULL, 0); 381*fc80c0dfSnordmark 382*fc80c0dfSnordmark tcp_conn_cache = kmem_cache_create("tcp_conn_cache", 383*fc80c0dfSnordmark sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE, 384*fc80c0dfSnordmark tcp_conn_constructor, tcp_conn_destructor, 385*fc80c0dfSnordmark NULL, NULL, NULL, 0); 386*fc80c0dfSnordmark 387*fc80c0dfSnordmark udp_conn_cache = kmem_cache_create("udp_conn_cache", 388*fc80c0dfSnordmark sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE, 389*fc80c0dfSnordmark udp_conn_constructor, udp_conn_destructor, 390*fc80c0dfSnordmark NULL, NULL, NULL, 0); 3917c478bd9Sstevel@tonic-gate 392*fc80c0dfSnordmark rawip_conn_cache = kmem_cache_create("rawip_conn_cache", 393*fc80c0dfSnordmark sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE, 394*fc80c0dfSnordmark rawip_conn_constructor, rawip_conn_destructor, 395*fc80c0dfSnordmark NULL, NULL, NULL, 0); 396*fc80c0dfSnordmark 397*fc80c0dfSnordmark rts_conn_cache = kmem_cache_create("rts_conn_cache", 398*fc80c0dfSnordmark sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE, 399*fc80c0dfSnordmark rts_conn_constructor, rts_conn_destructor, 4007c478bd9Sstevel@tonic-gate NULL, NULL, NULL, 0); 401f4b3ec61Sdh } 402f4b3ec61Sdh 403f4b3ec61Sdh /* 404f4b3ec61Sdh * ipclassifier intialization routine, sets up hash tables. 405f4b3ec61Sdh */ 406f4b3ec61Sdh void 407f4b3ec61Sdh ipcl_init(ip_stack_t *ipst) 408f4b3ec61Sdh { 409f4b3ec61Sdh int i; 410f4b3ec61Sdh int sizes[] = P2Ps(); 4117c478bd9Sstevel@tonic-gate 4127c478bd9Sstevel@tonic-gate /* 413f4b3ec61Sdh * Calculate size of conn fanout table from /etc/system settings 4147c478bd9Sstevel@tonic-gate */ 4157c478bd9Sstevel@tonic-gate if (ipcl_conn_hash_size != 0) { 416f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 4177c478bd9Sstevel@tonic-gate } else if (tcp_conn_hash_size != 0) { 418f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 4197c478bd9Sstevel@tonic-gate } else { 4207c478bd9Sstevel@tonic-gate extern pgcnt_t freemem; 4217c478bd9Sstevel@tonic-gate 422f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = 4237c478bd9Sstevel@tonic-gate (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 4247c478bd9Sstevel@tonic-gate 425f4b3ec61Sdh if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 426f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = 427f4b3ec61Sdh ipcl_conn_hash_maxsize; 428f4b3ec61Sdh } 4297c478bd9Sstevel@tonic-gate } 4307c478bd9Sstevel@tonic-gate 4317c478bd9Sstevel@tonic-gate for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 432f4b3ec61Sdh if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 4337c478bd9Sstevel@tonic-gate break; 4347c478bd9Sstevel@tonic-gate } 4357c478bd9Sstevel@tonic-gate } 436f4b3ec61Sdh if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 4377c478bd9Sstevel@tonic-gate /* Out of range, use the 2^16 value */ 438f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = sizes[16]; 4397c478bd9Sstevel@tonic-gate } 4407c478bd9Sstevel@tonic-gate 441f4b3ec61Sdh /* Take values from /etc/system */ 442f4b3ec61Sdh ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 443f4b3ec61Sdh ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 444f4b3ec61Sdh ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 445f4b3ec61Sdh 446f4b3ec61Sdh ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 447f4b3ec61Sdh 448f4b3ec61Sdh ipst->ips_ipcl_conn_fanout = kmem_zalloc( 449f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 450f4b3ec61Sdh 451f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 452f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 4537c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4547c478bd9Sstevel@tonic-gate } 4557c478bd9Sstevel@tonic-gate 456f4b3ec61Sdh ipst->ips_ipcl_bind_fanout = kmem_zalloc( 457f4b3ec61Sdh ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 4587c478bd9Sstevel@tonic-gate 459f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 460f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 4617c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4627c478bd9Sstevel@tonic-gate } 4637c478bd9Sstevel@tonic-gate 464f4b3ec61Sdh ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX * 465f4b3ec61Sdh sizeof (connf_t), KM_SLEEP); 466f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 467f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL, 4687c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4697c478bd9Sstevel@tonic-gate } 470f4b3ec61Sdh 471f4b3ec61Sdh ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 472f4b3ec61Sdh sizeof (connf_t), KM_SLEEP); 473f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 474f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 4757c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4767c478bd9Sstevel@tonic-gate } 4777c478bd9Sstevel@tonic-gate 478f4b3ec61Sdh ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 479f4b3ec61Sdh mutex_init(&ipst->ips_rts_clients->connf_lock, 480f4b3ec61Sdh NULL, MUTEX_DEFAULT, NULL); 4817c478bd9Sstevel@tonic-gate 482f4b3ec61Sdh ipst->ips_ipcl_udp_fanout = kmem_zalloc( 483f4b3ec61Sdh ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 484f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 485f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 4867c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4877c478bd9Sstevel@tonic-gate } 4887c478bd9Sstevel@tonic-gate 489f4b3ec61Sdh ipst->ips_ipcl_raw_fanout = kmem_zalloc( 490f4b3ec61Sdh ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 491f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 492f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 4937c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4947c478bd9Sstevel@tonic-gate } 4957c478bd9Sstevel@tonic-gate 496f4b3ec61Sdh ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 497f4b3ec61Sdh sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 4987c478bd9Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 499f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 500f4b3ec61Sdh NULL, MUTEX_DEFAULT, NULL); 5017c478bd9Sstevel@tonic-gate } 5027c478bd9Sstevel@tonic-gate } 5037c478bd9Sstevel@tonic-gate 5047c478bd9Sstevel@tonic-gate void 505f4b3ec61Sdh ipcl_g_destroy(void) 5067c478bd9Sstevel@tonic-gate { 507*fc80c0dfSnordmark kmem_cache_destroy(ip_conn_cache); 508*fc80c0dfSnordmark kmem_cache_destroy(tcp_conn_cache); 509*fc80c0dfSnordmark kmem_cache_destroy(udp_conn_cache); 510*fc80c0dfSnordmark kmem_cache_destroy(rawip_conn_cache); 511*fc80c0dfSnordmark kmem_cache_destroy(rts_conn_cache); 512f4b3ec61Sdh } 513f4b3ec61Sdh 514f4b3ec61Sdh /* 515f4b3ec61Sdh * All user-level and kernel use of the stack must be gone 516f4b3ec61Sdh * by now. 517f4b3ec61Sdh */ 518f4b3ec61Sdh void 519f4b3ec61Sdh ipcl_destroy(ip_stack_t *ipst) 520f4b3ec61Sdh { 521f4b3ec61Sdh int i; 522f4b3ec61Sdh 523f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 524f4b3ec61Sdh ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 525f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 526f4b3ec61Sdh } 527f4b3ec61Sdh kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 528f4b3ec61Sdh sizeof (connf_t)); 529f4b3ec61Sdh ipst->ips_ipcl_conn_fanout = NULL; 530f4b3ec61Sdh 531f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 532f4b3ec61Sdh ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 533f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 534f4b3ec61Sdh } 535f4b3ec61Sdh kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 536f4b3ec61Sdh sizeof (connf_t)); 537f4b3ec61Sdh ipst->ips_ipcl_bind_fanout = NULL; 538f4b3ec61Sdh 539f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 540f4b3ec61Sdh ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL); 541f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock); 542f4b3ec61Sdh } 543f4b3ec61Sdh kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t)); 544f4b3ec61Sdh ipst->ips_ipcl_proto_fanout = NULL; 545f4b3ec61Sdh 546f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 547f4b3ec61Sdh ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 548f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 549f4b3ec61Sdh } 550f4b3ec61Sdh kmem_free(ipst->ips_ipcl_proto_fanout_v6, 551f4b3ec61Sdh IPPROTO_MAX * sizeof (connf_t)); 552f4b3ec61Sdh ipst->ips_ipcl_proto_fanout_v6 = NULL; 553f4b3ec61Sdh 554f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 555f4b3ec61Sdh ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 556f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 557f4b3ec61Sdh } 558f4b3ec61Sdh kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 559f4b3ec61Sdh sizeof (connf_t)); 560f4b3ec61Sdh ipst->ips_ipcl_udp_fanout = NULL; 561f4b3ec61Sdh 562f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 563f4b3ec61Sdh ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 564f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 565f4b3ec61Sdh } 566f4b3ec61Sdh kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 567f4b3ec61Sdh sizeof (connf_t)); 568f4b3ec61Sdh ipst->ips_ipcl_raw_fanout = NULL; 569f4b3ec61Sdh 570f4b3ec61Sdh for (i = 0; i < CONN_G_HASH_SIZE; i++) { 571f4b3ec61Sdh ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 572f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 573f4b3ec61Sdh } 574f4b3ec61Sdh kmem_free(ipst->ips_ipcl_globalhash_fanout, 575f4b3ec61Sdh sizeof (connf_t) * CONN_G_HASH_SIZE); 576f4b3ec61Sdh ipst->ips_ipcl_globalhash_fanout = NULL; 577f4b3ec61Sdh 578f4b3ec61Sdh ASSERT(ipst->ips_rts_clients->connf_head == NULL); 579f4b3ec61Sdh mutex_destroy(&ipst->ips_rts_clients->connf_lock); 580f4b3ec61Sdh kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 581f4b3ec61Sdh ipst->ips_rts_clients = NULL; 5827c478bd9Sstevel@tonic-gate } 5837c478bd9Sstevel@tonic-gate 5847c478bd9Sstevel@tonic-gate /* 5857c478bd9Sstevel@tonic-gate * conn creation routine. initialize the conn, sets the reference 5867c478bd9Sstevel@tonic-gate * and inserts it in the global hash table. 5877c478bd9Sstevel@tonic-gate */ 5887c478bd9Sstevel@tonic-gate conn_t * 589f4b3ec61Sdh ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 5907c478bd9Sstevel@tonic-gate { 5917c478bd9Sstevel@tonic-gate conn_t *connp; 592f4b3ec61Sdh sctp_stack_t *sctps; 593*fc80c0dfSnordmark struct kmem_cache *conn_cache; 5947c478bd9Sstevel@tonic-gate 5957c478bd9Sstevel@tonic-gate switch (type) { 5967c478bd9Sstevel@tonic-gate case IPCL_SCTPCONN: 5977c478bd9Sstevel@tonic-gate if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 5987c478bd9Sstevel@tonic-gate return (NULL); 599121e5416Skcpoon sctp_conn_init(connp); 600f4b3ec61Sdh sctps = ns->netstack_sctp; 601f4b3ec61Sdh SCTP_G_Q_REFHOLD(sctps); 602f4b3ec61Sdh netstack_hold(ns); 603f4b3ec61Sdh connp->conn_netstack = ns; 604*fc80c0dfSnordmark return (connp); 605*fc80c0dfSnordmark 606*fc80c0dfSnordmark case IPCL_TCPCONN: 607*fc80c0dfSnordmark conn_cache = tcp_conn_cache; 6087c478bd9Sstevel@tonic-gate break; 609*fc80c0dfSnordmark 610*fc80c0dfSnordmark case IPCL_UDPCONN: 611*fc80c0dfSnordmark conn_cache = udp_conn_cache; 612*fc80c0dfSnordmark break; 613*fc80c0dfSnordmark 614*fc80c0dfSnordmark case IPCL_RAWIPCONN: 615*fc80c0dfSnordmark conn_cache = rawip_conn_cache; 616*fc80c0dfSnordmark break; 617*fc80c0dfSnordmark 618*fc80c0dfSnordmark case IPCL_RTSCONN: 619*fc80c0dfSnordmark conn_cache = rts_conn_cache; 620*fc80c0dfSnordmark break; 621*fc80c0dfSnordmark 6227c478bd9Sstevel@tonic-gate case IPCL_IPCCONN: 623*fc80c0dfSnordmark conn_cache = ip_conn_cache; 6247c478bd9Sstevel@tonic-gate break; 625*fc80c0dfSnordmark 626ff550d0eSmasputra default: 627ff550d0eSmasputra connp = NULL; 628ff550d0eSmasputra ASSERT(0); 6297c478bd9Sstevel@tonic-gate } 6307c478bd9Sstevel@tonic-gate 631*fc80c0dfSnordmark if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL) 632*fc80c0dfSnordmark return (NULL); 633*fc80c0dfSnordmark 634*fc80c0dfSnordmark connp->conn_ref = 1; 635*fc80c0dfSnordmark netstack_hold(ns); 636*fc80c0dfSnordmark connp->conn_netstack = ns; 637*fc80c0dfSnordmark ipcl_globalhash_insert(connp); 6387c478bd9Sstevel@tonic-gate return (connp); 6397c478bd9Sstevel@tonic-gate } 6407c478bd9Sstevel@tonic-gate 6417c478bd9Sstevel@tonic-gate void 6427c478bd9Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp) 6437c478bd9Sstevel@tonic-gate { 6447c478bd9Sstevel@tonic-gate mblk_t *mp; 645f4b3ec61Sdh netstack_t *ns = connp->conn_netstack; 6467c478bd9Sstevel@tonic-gate 6477c478bd9Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&connp->conn_lock)); 6487c478bd9Sstevel@tonic-gate ASSERT(connp->conn_ref == 0); 6497c478bd9Sstevel@tonic-gate ASSERT(connp->conn_ire_cache == NULL); 6507c478bd9Sstevel@tonic-gate 65145916cd2Sjpk if (connp->conn_peercred != NULL && 65245916cd2Sjpk connp->conn_peercred != connp->conn_cred) 65345916cd2Sjpk crfree(connp->conn_peercred); 65445916cd2Sjpk connp->conn_peercred = NULL; 65545916cd2Sjpk 65645916cd2Sjpk if (connp->conn_cred != NULL) { 65745916cd2Sjpk crfree(connp->conn_cred); 65845916cd2Sjpk connp->conn_cred = NULL; 65945916cd2Sjpk } 66045916cd2Sjpk 6617c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(connp); 6627c478bd9Sstevel@tonic-gate 663*fc80c0dfSnordmark /* FIXME: add separate tcp_conn_free()? */ 6647c478bd9Sstevel@tonic-gate if (connp->conn_flags & IPCL_TCPCONN) { 665ff550d0eSmasputra tcp_t *tcp = connp->conn_tcp; 666f4b3ec61Sdh tcp_stack_t *tcps; 667f4b3ec61Sdh 668f4b3ec61Sdh ASSERT(tcp != NULL); 669f4b3ec61Sdh tcps = tcp->tcp_tcps; 670f4b3ec61Sdh if (tcps != NULL) { 671f4b3ec61Sdh if (connp->conn_latch != NULL) { 672f4b3ec61Sdh IPLATCH_REFRELE(connp->conn_latch, ns); 673f4b3ec61Sdh connp->conn_latch = NULL; 674f4b3ec61Sdh } 675f4b3ec61Sdh if (connp->conn_policy != NULL) { 676f4b3ec61Sdh IPPH_REFRELE(connp->conn_policy, ns); 677f4b3ec61Sdh connp->conn_policy = NULL; 678f4b3ec61Sdh } 679f4b3ec61Sdh tcp->tcp_tcps = NULL; 680f4b3ec61Sdh TCPS_REFRELE(tcps); 681f4b3ec61Sdh } 682ff550d0eSmasputra 6837c478bd9Sstevel@tonic-gate tcp_free(tcp); 6847c478bd9Sstevel@tonic-gate mp = tcp->tcp_timercache; 68545916cd2Sjpk tcp->tcp_cred = NULL; 6867c478bd9Sstevel@tonic-gate 6877c478bd9Sstevel@tonic-gate if (tcp->tcp_sack_info != NULL) { 6887c478bd9Sstevel@tonic-gate bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 6897c478bd9Sstevel@tonic-gate kmem_cache_free(tcp_sack_info_cache, 6907c478bd9Sstevel@tonic-gate tcp->tcp_sack_info); 6917c478bd9Sstevel@tonic-gate } 6927c478bd9Sstevel@tonic-gate if (tcp->tcp_iphc != NULL) { 6937c478bd9Sstevel@tonic-gate if (tcp->tcp_hdr_grown) { 6947c478bd9Sstevel@tonic-gate kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 6957c478bd9Sstevel@tonic-gate } else { 6967c478bd9Sstevel@tonic-gate bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 6977c478bd9Sstevel@tonic-gate kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 6987c478bd9Sstevel@tonic-gate } 6997c478bd9Sstevel@tonic-gate tcp->tcp_iphc_len = 0; 7007c478bd9Sstevel@tonic-gate } 7017c478bd9Sstevel@tonic-gate ASSERT(tcp->tcp_iphc_len == 0); 7027c478bd9Sstevel@tonic-gate 703f4b3ec61Sdh ASSERT(connp->conn_latch == NULL); 704f4b3ec61Sdh ASSERT(connp->conn_policy == NULL); 705f4b3ec61Sdh 706f4b3ec61Sdh if (ns != NULL) { 707f4b3ec61Sdh ASSERT(tcp->tcp_tcps == NULL); 708f4b3ec61Sdh connp->conn_netstack = NULL; 709f4b3ec61Sdh netstack_rele(ns); 710f4b3ec61Sdh } 711*fc80c0dfSnordmark 712*fc80c0dfSnordmark ipcl_conn_cleanup(connp); 713*fc80c0dfSnordmark connp->conn_flags = IPCL_TCPCONN; 714*fc80c0dfSnordmark bzero(tcp, sizeof (tcp_t)); 715*fc80c0dfSnordmark 716*fc80c0dfSnordmark tcp->tcp_timercache = mp; 717*fc80c0dfSnordmark tcp->tcp_connp = connp; 718*fc80c0dfSnordmark kmem_cache_free(tcp_conn_cache, connp); 719*fc80c0dfSnordmark return; 720*fc80c0dfSnordmark } 721*fc80c0dfSnordmark if (connp->conn_latch != NULL) { 722*fc80c0dfSnordmark IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack); 723*fc80c0dfSnordmark connp->conn_latch = NULL; 724*fc80c0dfSnordmark } 725*fc80c0dfSnordmark if (connp->conn_policy != NULL) { 726*fc80c0dfSnordmark IPPH_REFRELE(connp->conn_policy, connp->conn_netstack); 727*fc80c0dfSnordmark connp->conn_policy = NULL; 728*fc80c0dfSnordmark } 729*fc80c0dfSnordmark if (connp->conn_ipsec_opt_mp != NULL) { 730*fc80c0dfSnordmark freemsg(connp->conn_ipsec_opt_mp); 731*fc80c0dfSnordmark connp->conn_ipsec_opt_mp = NULL; 732*fc80c0dfSnordmark } 733*fc80c0dfSnordmark 734*fc80c0dfSnordmark if (connp->conn_flags & IPCL_SCTPCONN) { 735f4b3ec61Sdh ASSERT(ns != NULL); 7367c478bd9Sstevel@tonic-gate sctp_free(connp); 737*fc80c0dfSnordmark return; 738*fc80c0dfSnordmark } 739*fc80c0dfSnordmark 740*fc80c0dfSnordmark if (ns != NULL) { 741*fc80c0dfSnordmark connp->conn_netstack = NULL; 742*fc80c0dfSnordmark netstack_rele(ns); 743*fc80c0dfSnordmark } 744*fc80c0dfSnordmark ipcl_conn_cleanup(connp); 745*fc80c0dfSnordmark 746*fc80c0dfSnordmark /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */ 747*fc80c0dfSnordmark if (connp->conn_flags & IPCL_UDPCONN) { 748*fc80c0dfSnordmark connp->conn_flags = IPCL_UDPCONN; 749*fc80c0dfSnordmark kmem_cache_free(udp_conn_cache, connp); 750*fc80c0dfSnordmark } else if (connp->conn_flags & IPCL_RAWIPCONN) { 751*fc80c0dfSnordmark connp->conn_flags = IPCL_RAWIPCONN; 752*fc80c0dfSnordmark connp->conn_ulp = IPPROTO_ICMP; 753*fc80c0dfSnordmark kmem_cache_free(rawip_conn_cache, connp); 754*fc80c0dfSnordmark } else if (connp->conn_flags & IPCL_RTSCONN) { 755*fc80c0dfSnordmark connp->conn_flags = IPCL_RTSCONN; 756*fc80c0dfSnordmark kmem_cache_free(rts_conn_cache, connp); 7577c478bd9Sstevel@tonic-gate } else { 758*fc80c0dfSnordmark connp->conn_flags = IPCL_IPCCONN; 759*fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 760*fc80c0dfSnordmark ASSERT(connp->conn_priv == NULL); 761*fc80c0dfSnordmark kmem_cache_free(ip_conn_cache, connp); 7627c478bd9Sstevel@tonic-gate } 7637c478bd9Sstevel@tonic-gate } 7647c478bd9Sstevel@tonic-gate 7657c478bd9Sstevel@tonic-gate /* 7667c478bd9Sstevel@tonic-gate * Running in cluster mode - deregister listener information 7677c478bd9Sstevel@tonic-gate */ 7687c478bd9Sstevel@tonic-gate 7697c478bd9Sstevel@tonic-gate static void 7707c478bd9Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp) 7717c478bd9Sstevel@tonic-gate { 7727c478bd9Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 7737c478bd9Sstevel@tonic-gate ASSERT(connp->conn_lport != 0); 7747c478bd9Sstevel@tonic-gate 7757c478bd9Sstevel@tonic-gate if (cl_inet_unlisten != NULL) { 7767c478bd9Sstevel@tonic-gate sa_family_t addr_family; 7777c478bd9Sstevel@tonic-gate uint8_t *laddrp; 7787c478bd9Sstevel@tonic-gate 7797c478bd9Sstevel@tonic-gate if (connp->conn_pkt_isv6) { 7807c478bd9Sstevel@tonic-gate addr_family = AF_INET6; 7817c478bd9Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source_v6; 7827c478bd9Sstevel@tonic-gate } else { 7837c478bd9Sstevel@tonic-gate addr_family = AF_INET; 7847c478bd9Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source; 7857c478bd9Sstevel@tonic-gate } 7867c478bd9Sstevel@tonic-gate (*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp, 7877c478bd9Sstevel@tonic-gate connp->conn_lport); 7887c478bd9Sstevel@tonic-gate } 7897c478bd9Sstevel@tonic-gate connp->conn_flags &= ~IPCL_CL_LISTENER; 7907c478bd9Sstevel@tonic-gate } 7917c478bd9Sstevel@tonic-gate 7927c478bd9Sstevel@tonic-gate /* 7937c478bd9Sstevel@tonic-gate * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 7947c478bd9Sstevel@tonic-gate * which table the conn belonged to). So for debugging we can see which hash 7957c478bd9Sstevel@tonic-gate * table this connection was in. 7967c478bd9Sstevel@tonic-gate */ 7977c478bd9Sstevel@tonic-gate #define IPCL_HASH_REMOVE(connp) { \ 7987c478bd9Sstevel@tonic-gate connf_t *connfp = (connp)->conn_fanout; \ 7997c478bd9Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 8007c478bd9Sstevel@tonic-gate if (connfp != NULL) { \ 8017c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 8027c478bd9Sstevel@tonic-gate (void *)(connp))); \ 8037c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); \ 8047c478bd9Sstevel@tonic-gate if ((connp)->conn_next != NULL) \ 8057c478bd9Sstevel@tonic-gate (connp)->conn_next->conn_prev = \ 8067c478bd9Sstevel@tonic-gate (connp)->conn_prev; \ 8077c478bd9Sstevel@tonic-gate if ((connp)->conn_prev != NULL) \ 8087c478bd9Sstevel@tonic-gate (connp)->conn_prev->conn_next = \ 8097c478bd9Sstevel@tonic-gate (connp)->conn_next; \ 8107c478bd9Sstevel@tonic-gate else \ 8117c478bd9Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; \ 8127c478bd9Sstevel@tonic-gate (connp)->conn_fanout = NULL; \ 8137c478bd9Sstevel@tonic-gate (connp)->conn_next = NULL; \ 8147c478bd9Sstevel@tonic-gate (connp)->conn_prev = NULL; \ 8157c478bd9Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; \ 8167c478bd9Sstevel@tonic-gate if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 8177c478bd9Sstevel@tonic-gate ipcl_conn_unlisten((connp)); \ 8187c478bd9Sstevel@tonic-gate CONN_DEC_REF((connp)); \ 8197c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); \ 8207c478bd9Sstevel@tonic-gate } \ 8217c478bd9Sstevel@tonic-gate } 8227c478bd9Sstevel@tonic-gate 8237c478bd9Sstevel@tonic-gate void 8247c478bd9Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp) 8257c478bd9Sstevel@tonic-gate { 8267c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 8277c478bd9Sstevel@tonic-gate } 8287c478bd9Sstevel@tonic-gate 8297c478bd9Sstevel@tonic-gate /* 8307c478bd9Sstevel@tonic-gate * The whole purpose of this function is allow removal of 8317c478bd9Sstevel@tonic-gate * a conn_t from the connected hash for timewait reclaim. 8327c478bd9Sstevel@tonic-gate * This is essentially a TW reclaim fastpath where timewait 8337c478bd9Sstevel@tonic-gate * collector checks under fanout lock (so no one else can 8347c478bd9Sstevel@tonic-gate * get access to the conn_t) that refcnt is 2 i.e. one for 8357c478bd9Sstevel@tonic-gate * TCP and one for the classifier hash list. If ref count 8367c478bd9Sstevel@tonic-gate * is indeed 2, we can just remove the conn under lock and 8377c478bd9Sstevel@tonic-gate * avoid cleaning up the conn under squeue. This gives us 8387c478bd9Sstevel@tonic-gate * improved performance. 8397c478bd9Sstevel@tonic-gate */ 8407c478bd9Sstevel@tonic-gate void 8417c478bd9Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 8427c478bd9Sstevel@tonic-gate { 8437c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connfp->connf_lock)); 8447c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 8457c478bd9Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 8467c478bd9Sstevel@tonic-gate 8477c478bd9Sstevel@tonic-gate if ((connp)->conn_next != NULL) { 848121e5416Skcpoon (connp)->conn_next->conn_prev = (connp)->conn_prev; 8497c478bd9Sstevel@tonic-gate } 8507c478bd9Sstevel@tonic-gate if ((connp)->conn_prev != NULL) { 851121e5416Skcpoon (connp)->conn_prev->conn_next = (connp)->conn_next; 8527c478bd9Sstevel@tonic-gate } else { 8537c478bd9Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; 8547c478bd9Sstevel@tonic-gate } 8557c478bd9Sstevel@tonic-gate (connp)->conn_fanout = NULL; 8567c478bd9Sstevel@tonic-gate (connp)->conn_next = NULL; 8577c478bd9Sstevel@tonic-gate (connp)->conn_prev = NULL; 8587c478bd9Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; 8597c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_ref == 2); 8607c478bd9Sstevel@tonic-gate (connp)->conn_ref--; 8617c478bd9Sstevel@tonic-gate } 8627c478bd9Sstevel@tonic-gate 8637c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 8647c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_fanout == NULL); \ 8657c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_next == NULL); \ 8667c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_prev == NULL); \ 8677c478bd9Sstevel@tonic-gate if ((connfp)->connf_head != NULL) { \ 8687c478bd9Sstevel@tonic-gate (connfp)->connf_head->conn_prev = (connp); \ 8697c478bd9Sstevel@tonic-gate (connp)->conn_next = (connfp)->connf_head; \ 8707c478bd9Sstevel@tonic-gate } \ 8717c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 8727c478bd9Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 8737c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 8747c478bd9Sstevel@tonic-gate IPCL_CONNECTED; \ 8757c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); \ 8767c478bd9Sstevel@tonic-gate } 8777c478bd9Sstevel@tonic-gate 8787c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 8797c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 8807c478bd9Sstevel@tonic-gate "connp %p", (void *)(connfp), (void *)(connp))); \ 8817c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 8827c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 8837c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 8847c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 8857c478bd9Sstevel@tonic-gate } 8867c478bd9Sstevel@tonic-gate 8877c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 8887c478bd9Sstevel@tonic-gate conn_t *pconnp = NULL, *nconnp; \ 8897c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 8907c478bd9Sstevel@tonic-gate "connp %p", (void *)connfp, (void *)(connp))); \ 8917c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 8927c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 8937c478bd9Sstevel@tonic-gate nconnp = (connfp)->connf_head; \ 8943d1c78fbSethindra while (nconnp != NULL && \ 8953d1c78fbSethindra !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ 8963d1c78fbSethindra pconnp = nconnp; \ 8973d1c78fbSethindra nconnp = nconnp->conn_next; \ 8987c478bd9Sstevel@tonic-gate } \ 8997c478bd9Sstevel@tonic-gate if (pconnp != NULL) { \ 9007c478bd9Sstevel@tonic-gate pconnp->conn_next = (connp); \ 9017c478bd9Sstevel@tonic-gate (connp)->conn_prev = pconnp; \ 9027c478bd9Sstevel@tonic-gate } else { \ 9037c478bd9Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 9047c478bd9Sstevel@tonic-gate } \ 9057c478bd9Sstevel@tonic-gate if (nconnp != NULL) { \ 9067c478bd9Sstevel@tonic-gate (connp)->conn_next = nconnp; \ 9077c478bd9Sstevel@tonic-gate nconnp->conn_prev = (connp); \ 9087c478bd9Sstevel@tonic-gate } \ 9097c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 9107c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 9117c478bd9Sstevel@tonic-gate IPCL_BOUND; \ 9127c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); \ 9137c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9147c478bd9Sstevel@tonic-gate } 9157c478bd9Sstevel@tonic-gate 9167c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 9177c478bd9Sstevel@tonic-gate conn_t **list, *prev, *next; \ 9187c478bd9Sstevel@tonic-gate boolean_t isv4mapped = \ 9197c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 9207c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 9217c478bd9Sstevel@tonic-gate "connp %p", (void *)(connfp), (void *)(connp))); \ 9227c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 9237c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 9247c478bd9Sstevel@tonic-gate list = &(connfp)->connf_head; \ 9257c478bd9Sstevel@tonic-gate prev = NULL; \ 9267c478bd9Sstevel@tonic-gate while ((next = *list) != NULL) { \ 9277c478bd9Sstevel@tonic-gate if (isv4mapped && \ 9287c478bd9Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 9297c478bd9Sstevel@tonic-gate connp->conn_zoneid == next->conn_zoneid) { \ 9307c478bd9Sstevel@tonic-gate (connp)->conn_next = next; \ 9317c478bd9Sstevel@tonic-gate if (prev != NULL) \ 9327c478bd9Sstevel@tonic-gate prev = next->conn_prev; \ 9337c478bd9Sstevel@tonic-gate next->conn_prev = (connp); \ 9347c478bd9Sstevel@tonic-gate break; \ 9357c478bd9Sstevel@tonic-gate } \ 9367c478bd9Sstevel@tonic-gate list = &next->conn_next; \ 9377c478bd9Sstevel@tonic-gate prev = next; \ 9387c478bd9Sstevel@tonic-gate } \ 9397c478bd9Sstevel@tonic-gate (connp)->conn_prev = prev; \ 9407c478bd9Sstevel@tonic-gate *list = (connp); \ 9417c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 9427c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 9437c478bd9Sstevel@tonic-gate IPCL_BOUND; \ 9447c478bd9Sstevel@tonic-gate CONN_INC_REF((connp)); \ 9457c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 9467c478bd9Sstevel@tonic-gate } 9477c478bd9Sstevel@tonic-gate 9487c478bd9Sstevel@tonic-gate void 9497c478bd9Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 9507c478bd9Sstevel@tonic-gate { 9517c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9527c478bd9Sstevel@tonic-gate } 9537c478bd9Sstevel@tonic-gate 9547c478bd9Sstevel@tonic-gate void 9557c478bd9Sstevel@tonic-gate ipcl_proto_insert(conn_t *connp, uint8_t protocol) 9567c478bd9Sstevel@tonic-gate { 9577c478bd9Sstevel@tonic-gate connf_t *connfp; 958f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 9597c478bd9Sstevel@tonic-gate 9607c478bd9Sstevel@tonic-gate ASSERT(connp != NULL); 96145916cd2Sjpk ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 96245916cd2Sjpk protocol == IPPROTO_ESP); 9637c478bd9Sstevel@tonic-gate 9647c478bd9Sstevel@tonic-gate connp->conn_ulp = protocol; 9657c478bd9Sstevel@tonic-gate 9667c478bd9Sstevel@tonic-gate /* Insert it in the protocol hash */ 967f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 9687c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9697c478bd9Sstevel@tonic-gate } 9707c478bd9Sstevel@tonic-gate 9717c478bd9Sstevel@tonic-gate void 9727c478bd9Sstevel@tonic-gate ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 9737c478bd9Sstevel@tonic-gate { 9747c478bd9Sstevel@tonic-gate connf_t *connfp; 975f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 9767c478bd9Sstevel@tonic-gate 9777c478bd9Sstevel@tonic-gate ASSERT(connp != NULL); 97845916cd2Sjpk ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 97945916cd2Sjpk protocol == IPPROTO_ESP); 9807c478bd9Sstevel@tonic-gate 9817c478bd9Sstevel@tonic-gate connp->conn_ulp = protocol; 9827c478bd9Sstevel@tonic-gate 9837c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */ 984f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 9857c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9867c478bd9Sstevel@tonic-gate } 9877c478bd9Sstevel@tonic-gate 9887c478bd9Sstevel@tonic-gate /* 9897c478bd9Sstevel@tonic-gate * This function is used only for inserting SCTP raw socket now. 9907c478bd9Sstevel@tonic-gate * This may change later. 9917c478bd9Sstevel@tonic-gate * 9927c478bd9Sstevel@tonic-gate * Note that only one raw socket can be bound to a port. The param 9937c478bd9Sstevel@tonic-gate * lport is in network byte order. 9947c478bd9Sstevel@tonic-gate */ 9957c478bd9Sstevel@tonic-gate static int 9967c478bd9Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 9977c478bd9Sstevel@tonic-gate { 9987c478bd9Sstevel@tonic-gate connf_t *connfp; 9997c478bd9Sstevel@tonic-gate conn_t *oconnp; 1000f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 10017c478bd9Sstevel@tonic-gate 1002f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 10037c478bd9Sstevel@tonic-gate 10047c478bd9Sstevel@tonic-gate /* Check for existing raw socket already bound to the port. */ 10057c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 10067c478bd9Sstevel@tonic-gate for (oconnp = connfp->connf_head; oconnp != NULL; 10077c0c0508Skcpoon oconnp = oconnp->conn_next) { 10087c478bd9Sstevel@tonic-gate if (oconnp->conn_lport == lport && 10097c478bd9Sstevel@tonic-gate oconnp->conn_zoneid == connp->conn_zoneid && 10107c478bd9Sstevel@tonic-gate oconnp->conn_af_isv6 == connp->conn_af_isv6 && 10117c478bd9Sstevel@tonic-gate ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 10127c478bd9Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 10137c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 10147c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 10157c478bd9Sstevel@tonic-gate IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 10167c478bd9Sstevel@tonic-gate &connp->conn_srcv6))) { 10177c478bd9Sstevel@tonic-gate break; 10187c478bd9Sstevel@tonic-gate } 10197c478bd9Sstevel@tonic-gate } 10207c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 10217c478bd9Sstevel@tonic-gate if (oconnp != NULL) 10227c478bd9Sstevel@tonic-gate return (EADDRNOTAVAIL); 10237c478bd9Sstevel@tonic-gate 10247c478bd9Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 10257c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 10267c478bd9Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 10277c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 10287c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10297c478bd9Sstevel@tonic-gate } else { 10307c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 10317c478bd9Sstevel@tonic-gate } 10327c478bd9Sstevel@tonic-gate } else { 10337c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 10347c478bd9Sstevel@tonic-gate } 10357c478bd9Sstevel@tonic-gate return (0); 10367c478bd9Sstevel@tonic-gate } 10377c478bd9Sstevel@tonic-gate 103845916cd2Sjpk /* 103945916cd2Sjpk * Check for a MAC exemption conflict on a labeled system. Note that for 104045916cd2Sjpk * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 104145916cd2Sjpk * transport layer. This check is for binding all other protocols. 104245916cd2Sjpk * 104345916cd2Sjpk * Returns true if there's a conflict. 104445916cd2Sjpk */ 104545916cd2Sjpk static boolean_t 1046f4b3ec61Sdh check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 104745916cd2Sjpk { 104845916cd2Sjpk connf_t *connfp; 104945916cd2Sjpk conn_t *tconn; 105045916cd2Sjpk 1051f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 105245916cd2Sjpk mutex_enter(&connfp->connf_lock); 105345916cd2Sjpk for (tconn = connfp->connf_head; tconn != NULL; 105445916cd2Sjpk tconn = tconn->conn_next) { 105545916cd2Sjpk /* We don't allow v4 fallback for v6 raw socket */ 105645916cd2Sjpk if (connp->conn_af_isv6 != tconn->conn_af_isv6) 105745916cd2Sjpk continue; 105845916cd2Sjpk /* If neither is exempt, then there's no conflict */ 105945916cd2Sjpk if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 106045916cd2Sjpk continue; 106145916cd2Sjpk /* If both are bound to different specific addrs, ok */ 106245916cd2Sjpk if (connp->conn_src != INADDR_ANY && 106345916cd2Sjpk tconn->conn_src != INADDR_ANY && 106445916cd2Sjpk connp->conn_src != tconn->conn_src) 106545916cd2Sjpk continue; 106645916cd2Sjpk /* These two conflict; fail */ 106745916cd2Sjpk break; 106845916cd2Sjpk } 106945916cd2Sjpk mutex_exit(&connfp->connf_lock); 107045916cd2Sjpk return (tconn != NULL); 107145916cd2Sjpk } 107245916cd2Sjpk 107345916cd2Sjpk static boolean_t 1074f4b3ec61Sdh check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 107545916cd2Sjpk { 107645916cd2Sjpk connf_t *connfp; 107745916cd2Sjpk conn_t *tconn; 107845916cd2Sjpk 1079f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 108045916cd2Sjpk mutex_enter(&connfp->connf_lock); 108145916cd2Sjpk for (tconn = connfp->connf_head; tconn != NULL; 108245916cd2Sjpk tconn = tconn->conn_next) { 108345916cd2Sjpk /* We don't allow v4 fallback for v6 raw socket */ 108445916cd2Sjpk if (connp->conn_af_isv6 != tconn->conn_af_isv6) 108545916cd2Sjpk continue; 108645916cd2Sjpk /* If neither is exempt, then there's no conflict */ 108745916cd2Sjpk if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 108845916cd2Sjpk continue; 108945916cd2Sjpk /* If both are bound to different addrs, ok */ 109045916cd2Sjpk if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) && 109145916cd2Sjpk !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) && 109245916cd2Sjpk !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6)) 109345916cd2Sjpk continue; 109445916cd2Sjpk /* These two conflict; fail */ 109545916cd2Sjpk break; 109645916cd2Sjpk } 109745916cd2Sjpk mutex_exit(&connfp->connf_lock); 109845916cd2Sjpk return (tconn != NULL); 109945916cd2Sjpk } 110045916cd2Sjpk 11017c478bd9Sstevel@tonic-gate /* 11027c478bd9Sstevel@tonic-gate * (v4, v6) bind hash insertion routines 11037c478bd9Sstevel@tonic-gate */ 11047c478bd9Sstevel@tonic-gate int 11057c478bd9Sstevel@tonic-gate ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 11067c478bd9Sstevel@tonic-gate { 11077c478bd9Sstevel@tonic-gate connf_t *connfp; 11087c478bd9Sstevel@tonic-gate #ifdef IPCL_DEBUG 11097c478bd9Sstevel@tonic-gate char buf[INET_NTOA_BUFSIZE]; 11107c478bd9Sstevel@tonic-gate #endif 11117c478bd9Sstevel@tonic-gate int ret = 0; 1112f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 11137c478bd9Sstevel@tonic-gate 11147c478bd9Sstevel@tonic-gate ASSERT(connp); 11157c478bd9Sstevel@tonic-gate 11167c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 11177c478bd9Sstevel@tonic-gate "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 11187c478bd9Sstevel@tonic-gate 11197c478bd9Sstevel@tonic-gate connp->conn_ulp = protocol; 11207c478bd9Sstevel@tonic-gate IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 11217c478bd9Sstevel@tonic-gate connp->conn_lport = lport; 11227c478bd9Sstevel@tonic-gate 11237c478bd9Sstevel@tonic-gate switch (protocol) { 11247c478bd9Sstevel@tonic-gate default: 1125f4b3ec61Sdh if (is_system_labeled() && 1126f4b3ec61Sdh check_exempt_conflict_v4(connp, ipst)) 112745916cd2Sjpk return (EADDRINUSE); 112845916cd2Sjpk /* FALLTHROUGH */ 112945916cd2Sjpk case IPPROTO_UDP: 11307c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 11317c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(64, 11327c478bd9Sstevel@tonic-gate ("ipcl_bind_insert: connp %p - udp\n", 11337c478bd9Sstevel@tonic-gate (void *)connp)); 1134f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1135f4b3ec61Sdh IPCL_UDP_HASH(lport, ipst)]; 11367c478bd9Sstevel@tonic-gate } else { 11377c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(64, 11387c478bd9Sstevel@tonic-gate ("ipcl_bind_insert: connp %p - protocol\n", 11397c478bd9Sstevel@tonic-gate (void *)connp)); 1140f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 11417c478bd9Sstevel@tonic-gate } 11427c478bd9Sstevel@tonic-gate 11437c478bd9Sstevel@tonic-gate if (connp->conn_rem != INADDR_ANY) { 11447c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 11457c478bd9Sstevel@tonic-gate } else if (connp->conn_src != INADDR_ANY) { 11467c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 11477c478bd9Sstevel@tonic-gate } else { 11487c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 11497c478bd9Sstevel@tonic-gate } 11507c478bd9Sstevel@tonic-gate break; 11517c478bd9Sstevel@tonic-gate 11527c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 11537c478bd9Sstevel@tonic-gate 11547c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */ 115545916cd2Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 1156f4b3ec61Sdh connfp = &ipst->ips_ipcl_bind_fanout[ 1157f4b3ec61Sdh IPCL_BIND_HASH(lport, ipst)]; 11587c478bd9Sstevel@tonic-gate if (connp->conn_src != INADDR_ANY) { 11597c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 11607c478bd9Sstevel@tonic-gate } else { 11617c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 11627c478bd9Sstevel@tonic-gate } 11637c478bd9Sstevel@tonic-gate if (cl_inet_listen != NULL) { 11647c478bd9Sstevel@tonic-gate ASSERT(!connp->conn_pkt_isv6); 11657c478bd9Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 11667c478bd9Sstevel@tonic-gate (*cl_inet_listen)(IPPROTO_TCP, AF_INET, 11677c478bd9Sstevel@tonic-gate (uint8_t *)&connp->conn_bound_source, lport); 11687c478bd9Sstevel@tonic-gate } 11697c478bd9Sstevel@tonic-gate break; 11707c478bd9Sstevel@tonic-gate 11717c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 11727c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 11737c478bd9Sstevel@tonic-gate break; 11747c478bd9Sstevel@tonic-gate } 11757c478bd9Sstevel@tonic-gate 11767c478bd9Sstevel@tonic-gate return (ret); 11777c478bd9Sstevel@tonic-gate } 11787c478bd9Sstevel@tonic-gate 11797c478bd9Sstevel@tonic-gate int 11807c478bd9Sstevel@tonic-gate ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 11817c478bd9Sstevel@tonic-gate uint16_t lport) 11827c478bd9Sstevel@tonic-gate { 11837c478bd9Sstevel@tonic-gate connf_t *connfp; 11847c478bd9Sstevel@tonic-gate int ret = 0; 1185f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 11867c478bd9Sstevel@tonic-gate 11877c478bd9Sstevel@tonic-gate ASSERT(connp); 11887c478bd9Sstevel@tonic-gate 11897c478bd9Sstevel@tonic-gate connp->conn_ulp = protocol; 11907c478bd9Sstevel@tonic-gate connp->conn_srcv6 = *src; 11917c478bd9Sstevel@tonic-gate connp->conn_lport = lport; 11927c478bd9Sstevel@tonic-gate 11937c478bd9Sstevel@tonic-gate switch (protocol) { 11947c478bd9Sstevel@tonic-gate default: 1195f4b3ec61Sdh if (is_system_labeled() && 1196f4b3ec61Sdh check_exempt_conflict_v6(connp, ipst)) 119745916cd2Sjpk return (EADDRINUSE); 119845916cd2Sjpk /* FALLTHROUGH */ 119945916cd2Sjpk case IPPROTO_UDP: 12007c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 12017c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(128, 12027c478bd9Sstevel@tonic-gate ("ipcl_bind_insert_v6: connp %p - udp\n", 12037c478bd9Sstevel@tonic-gate (void *)connp)); 1204f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1205f4b3ec61Sdh IPCL_UDP_HASH(lport, ipst)]; 12067c478bd9Sstevel@tonic-gate } else { 12077c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(128, 12087c478bd9Sstevel@tonic-gate ("ipcl_bind_insert_v6: connp %p - protocol\n", 12097c478bd9Sstevel@tonic-gate (void *)connp)); 1210f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 12117c478bd9Sstevel@tonic-gate } 12127c478bd9Sstevel@tonic-gate 12137c478bd9Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 12147c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 12157c478bd9Sstevel@tonic-gate } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 12167c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12177c478bd9Sstevel@tonic-gate } else { 12187c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12197c478bd9Sstevel@tonic-gate } 12207c478bd9Sstevel@tonic-gate break; 12217c478bd9Sstevel@tonic-gate 12227c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 12237c478bd9Sstevel@tonic-gate /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 12247c478bd9Sstevel@tonic-gate 12257c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */ 122645916cd2Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 1227f4b3ec61Sdh connfp = &ipst->ips_ipcl_bind_fanout[ 1228f4b3ec61Sdh IPCL_BIND_HASH(lport, ipst)]; 12297c478bd9Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 12307c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12317c478bd9Sstevel@tonic-gate } else { 12327c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12337c478bd9Sstevel@tonic-gate } 12347c478bd9Sstevel@tonic-gate if (cl_inet_listen != NULL) { 12357c478bd9Sstevel@tonic-gate sa_family_t addr_family; 12367c478bd9Sstevel@tonic-gate uint8_t *laddrp; 12377c478bd9Sstevel@tonic-gate 12387c478bd9Sstevel@tonic-gate if (connp->conn_pkt_isv6) { 12397c478bd9Sstevel@tonic-gate addr_family = AF_INET6; 12407c478bd9Sstevel@tonic-gate laddrp = 12417c478bd9Sstevel@tonic-gate (uint8_t *)&connp->conn_bound_source_v6; 12427c478bd9Sstevel@tonic-gate } else { 12437c478bd9Sstevel@tonic-gate addr_family = AF_INET; 12447c478bd9Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source; 12457c478bd9Sstevel@tonic-gate } 12467c478bd9Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 12477c478bd9Sstevel@tonic-gate (*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp, 12487c478bd9Sstevel@tonic-gate lport); 12497c478bd9Sstevel@tonic-gate } 12507c478bd9Sstevel@tonic-gate break; 12517c478bd9Sstevel@tonic-gate 12527c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 12537c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 12547c478bd9Sstevel@tonic-gate break; 12557c478bd9Sstevel@tonic-gate } 12567c478bd9Sstevel@tonic-gate 12577c478bd9Sstevel@tonic-gate return (ret); 12587c478bd9Sstevel@tonic-gate } 12597c478bd9Sstevel@tonic-gate 12607c478bd9Sstevel@tonic-gate /* 12617c478bd9Sstevel@tonic-gate * ipcl_conn_hash insertion routines. 12627c478bd9Sstevel@tonic-gate */ 12637c478bd9Sstevel@tonic-gate int 12647c478bd9Sstevel@tonic-gate ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 12657c478bd9Sstevel@tonic-gate ipaddr_t rem, uint32_t ports) 12667c478bd9Sstevel@tonic-gate { 12677c478bd9Sstevel@tonic-gate connf_t *connfp; 12687c478bd9Sstevel@tonic-gate uint16_t *up; 12697c478bd9Sstevel@tonic-gate conn_t *tconnp; 12707c478bd9Sstevel@tonic-gate #ifdef IPCL_DEBUG 12717c478bd9Sstevel@tonic-gate char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 12727c478bd9Sstevel@tonic-gate #endif 12737c478bd9Sstevel@tonic-gate in_port_t lport; 12747c478bd9Sstevel@tonic-gate int ret = 0; 1275f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12767c478bd9Sstevel@tonic-gate 12777c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 12787c478bd9Sstevel@tonic-gate "dst = %s, ports = %x, protocol = %x", (void *)connp, 12797c478bd9Sstevel@tonic-gate inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 12807c478bd9Sstevel@tonic-gate ports, protocol)); 12817c478bd9Sstevel@tonic-gate 12827c478bd9Sstevel@tonic-gate switch (protocol) { 12837c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 12847c478bd9Sstevel@tonic-gate if (!(connp->conn_flags & IPCL_EAGER)) { 12857c478bd9Sstevel@tonic-gate /* 12867c478bd9Sstevel@tonic-gate * for a eager connection, i.e connections which 12877c478bd9Sstevel@tonic-gate * have just been created, the initialization is 12887c478bd9Sstevel@tonic-gate * already done in ip at conn_creation time, so 12897c478bd9Sstevel@tonic-gate * we can skip the checks here. 12907c478bd9Sstevel@tonic-gate */ 12917c478bd9Sstevel@tonic-gate IPCL_CONN_INIT(connp, protocol, src, rem, ports); 12927c478bd9Sstevel@tonic-gate } 1293f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[ 1294f4b3ec61Sdh IPCL_CONN_HASH(connp->conn_rem, 1295f4b3ec61Sdh connp->conn_ports, ipst)]; 12967c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 12977c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 12987c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 12997c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 13007c478bd9Sstevel@tonic-gate connp->conn_rem, connp->conn_src, 13017c478bd9Sstevel@tonic-gate connp->conn_ports)) { 13027c478bd9Sstevel@tonic-gate 13037c478bd9Sstevel@tonic-gate /* Already have a conn. bail out */ 13047c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13057c478bd9Sstevel@tonic-gate return (EADDRINUSE); 13067c478bd9Sstevel@tonic-gate } 13077c478bd9Sstevel@tonic-gate } 13087c478bd9Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 13097c478bd9Sstevel@tonic-gate /* 13107c478bd9Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 13117c478bd9Sstevel@tonic-gate * rebind. Let it happen. 13127c478bd9Sstevel@tonic-gate */ 13137c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13147c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 13157c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13167c478bd9Sstevel@tonic-gate } 1317866ba9ddSjprakash 1318866ba9ddSjprakash ASSERT(connp->conn_recv != NULL); 1319866ba9ddSjprakash 13207c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 13217c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13227c478bd9Sstevel@tonic-gate break; 13237c478bd9Sstevel@tonic-gate 13247c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 13257c0c0508Skcpoon /* 13267c0c0508Skcpoon * The raw socket may have already been bound, remove it 13277c0c0508Skcpoon * from the hash first. 13287c0c0508Skcpoon */ 13297c0c0508Skcpoon IPCL_HASH_REMOVE(connp); 13307c0c0508Skcpoon lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 13317c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 13327c478bd9Sstevel@tonic-gate break; 13337c478bd9Sstevel@tonic-gate 13347c478bd9Sstevel@tonic-gate default: 133545916cd2Sjpk /* 133645916cd2Sjpk * Check for conflicts among MAC exempt bindings. For 133745916cd2Sjpk * transports with port numbers, this is done by the upper 133845916cd2Sjpk * level per-transport binding logic. For all others, it's 133945916cd2Sjpk * done here. 134045916cd2Sjpk */ 1341f4b3ec61Sdh if (is_system_labeled() && 1342f4b3ec61Sdh check_exempt_conflict_v4(connp, ipst)) 134345916cd2Sjpk return (EADDRINUSE); 134445916cd2Sjpk /* FALLTHROUGH */ 134545916cd2Sjpk 134645916cd2Sjpk case IPPROTO_UDP: 13477c478bd9Sstevel@tonic-gate up = (uint16_t *)&ports; 13487c478bd9Sstevel@tonic-gate IPCL_CONN_INIT(connp, protocol, src, rem, ports); 13497c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 1350f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1351f4b3ec61Sdh IPCL_UDP_HASH(up[1], ipst)]; 13527c478bd9Sstevel@tonic-gate } else { 1353f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 13547c478bd9Sstevel@tonic-gate } 13557c478bd9Sstevel@tonic-gate 13567c478bd9Sstevel@tonic-gate if (connp->conn_rem != INADDR_ANY) { 13577c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 13587c478bd9Sstevel@tonic-gate } else if (connp->conn_src != INADDR_ANY) { 13597c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 13607c478bd9Sstevel@tonic-gate } else { 13617c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 13627c478bd9Sstevel@tonic-gate } 13637c478bd9Sstevel@tonic-gate break; 13647c478bd9Sstevel@tonic-gate } 13657c478bd9Sstevel@tonic-gate 13667c478bd9Sstevel@tonic-gate return (ret); 13677c478bd9Sstevel@tonic-gate } 13687c478bd9Sstevel@tonic-gate 13697c478bd9Sstevel@tonic-gate int 13707c478bd9Sstevel@tonic-gate ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 13717c478bd9Sstevel@tonic-gate const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 13727c478bd9Sstevel@tonic-gate { 13737c478bd9Sstevel@tonic-gate connf_t *connfp; 13747c478bd9Sstevel@tonic-gate uint16_t *up; 13757c478bd9Sstevel@tonic-gate conn_t *tconnp; 13767c478bd9Sstevel@tonic-gate in_port_t lport; 13777c478bd9Sstevel@tonic-gate int ret = 0; 1378f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13797c478bd9Sstevel@tonic-gate 13807c478bd9Sstevel@tonic-gate switch (protocol) { 13817c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 13827c478bd9Sstevel@tonic-gate /* Just need to insert a conn struct */ 13837c478bd9Sstevel@tonic-gate if (!(connp->conn_flags & IPCL_EAGER)) { 13847c478bd9Sstevel@tonic-gate IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 13857c478bd9Sstevel@tonic-gate } 1386f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[ 1387f4b3ec61Sdh IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports, 1388f4b3ec61Sdh ipst)]; 13897c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13907c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 13917c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 13927c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 13937c478bd9Sstevel@tonic-gate connp->conn_remv6, connp->conn_srcv6, 13947c478bd9Sstevel@tonic-gate connp->conn_ports) && 13957c478bd9Sstevel@tonic-gate (tconnp->conn_tcp->tcp_bound_if == 0 || 13967c478bd9Sstevel@tonic-gate tconnp->conn_tcp->tcp_bound_if == ifindex)) { 13977c478bd9Sstevel@tonic-gate /* Already have a conn. bail out */ 13987c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13997c478bd9Sstevel@tonic-gate return (EADDRINUSE); 14007c478bd9Sstevel@tonic-gate } 14017c478bd9Sstevel@tonic-gate } 14027c478bd9Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 14037c478bd9Sstevel@tonic-gate /* 14047c478bd9Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 14057c478bd9Sstevel@tonic-gate * rebind. Let it happen. 14067c478bd9Sstevel@tonic-gate */ 14077c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14087c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 14097c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14107c478bd9Sstevel@tonic-gate } 14117c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 14127c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14137c478bd9Sstevel@tonic-gate break; 14147c478bd9Sstevel@tonic-gate 14157c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 14167c0c0508Skcpoon IPCL_HASH_REMOVE(connp); 14177c0c0508Skcpoon lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 14187c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 14197c478bd9Sstevel@tonic-gate break; 14207c478bd9Sstevel@tonic-gate 14217c478bd9Sstevel@tonic-gate default: 1422f4b3ec61Sdh if (is_system_labeled() && 1423f4b3ec61Sdh check_exempt_conflict_v6(connp, ipst)) 142445916cd2Sjpk return (EADDRINUSE); 142545916cd2Sjpk /* FALLTHROUGH */ 142645916cd2Sjpk case IPPROTO_UDP: 14277c478bd9Sstevel@tonic-gate up = (uint16_t *)&ports; 14287c478bd9Sstevel@tonic-gate IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 14297c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 1430f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1431f4b3ec61Sdh IPCL_UDP_HASH(up[1], ipst)]; 14327c478bd9Sstevel@tonic-gate } else { 1433f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 14347c478bd9Sstevel@tonic-gate } 14357c478bd9Sstevel@tonic-gate 14367c478bd9Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 14377c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 14387c478bd9Sstevel@tonic-gate } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 14397c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 14407c478bd9Sstevel@tonic-gate } else { 14417c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 14427c478bd9Sstevel@tonic-gate } 14437c478bd9Sstevel@tonic-gate break; 14447c478bd9Sstevel@tonic-gate } 14457c478bd9Sstevel@tonic-gate 14467c478bd9Sstevel@tonic-gate return (ret); 14477c478bd9Sstevel@tonic-gate } 14487c478bd9Sstevel@tonic-gate 14497c478bd9Sstevel@tonic-gate /* 14507c478bd9Sstevel@tonic-gate * v4 packet classifying function. looks up the fanout table to 14517c478bd9Sstevel@tonic-gate * find the conn, the packet belongs to. returns the conn with 14527c478bd9Sstevel@tonic-gate * the reference held, null otherwise. 145345916cd2Sjpk * 145445916cd2Sjpk * If zoneid is ALL_ZONES, then the search rules described in the "Connection 145545916cd2Sjpk * Lookup" comment block are applied. Labels are also checked as described 145645916cd2Sjpk * above. If the packet is from the inside (looped back), and is from the same 145745916cd2Sjpk * zone, then label checks are omitted. 14587c478bd9Sstevel@tonic-gate */ 14597c478bd9Sstevel@tonic-gate conn_t * 1460f4b3ec61Sdh ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 1461f4b3ec61Sdh ip_stack_t *ipst) 14627c478bd9Sstevel@tonic-gate { 14637c478bd9Sstevel@tonic-gate ipha_t *ipha; 14647c478bd9Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 14657c478bd9Sstevel@tonic-gate uint16_t lport; 14667c478bd9Sstevel@tonic-gate uint16_t fport; 14677c478bd9Sstevel@tonic-gate uint32_t ports; 14687c478bd9Sstevel@tonic-gate conn_t *connp; 14697c478bd9Sstevel@tonic-gate uint16_t *up; 147045916cd2Sjpk boolean_t shared_addr; 147145916cd2Sjpk boolean_t unlabeled; 14727c478bd9Sstevel@tonic-gate 14737c478bd9Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 14747c478bd9Sstevel@tonic-gate up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 14757c478bd9Sstevel@tonic-gate 14767c478bd9Sstevel@tonic-gate switch (protocol) { 14777c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 14787c478bd9Sstevel@tonic-gate ports = *(uint32_t *)up; 14797c478bd9Sstevel@tonic-gate connfp = 1480f4b3ec61Sdh &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 1481f4b3ec61Sdh ports, ipst)]; 14827c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14837c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 14847c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 14857c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(connp, protocol, 14867c478bd9Sstevel@tonic-gate ipha->ipha_src, ipha->ipha_dst, ports)) 14877c478bd9Sstevel@tonic-gate break; 14887c478bd9Sstevel@tonic-gate } 14897c478bd9Sstevel@tonic-gate 14907c478bd9Sstevel@tonic-gate if (connp != NULL) { 149145916cd2Sjpk /* 149245916cd2Sjpk * We have a fully-bound TCP connection. 149345916cd2Sjpk * 149445916cd2Sjpk * For labeled systems, there's no need to check the 149545916cd2Sjpk * label here. It's known to be good as we checked 149645916cd2Sjpk * before allowing the connection to become bound. 149745916cd2Sjpk */ 14987c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 14997c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15007c478bd9Sstevel@tonic-gate return (connp); 15017c478bd9Sstevel@tonic-gate } 15027c478bd9Sstevel@tonic-gate 15037c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15047c478bd9Sstevel@tonic-gate 15057c478bd9Sstevel@tonic-gate lport = up[1]; 150645916cd2Sjpk unlabeled = B_FALSE; 150745916cd2Sjpk /* Cred cannot be null on IPv4 */ 150845916cd2Sjpk if (is_system_labeled()) 150945916cd2Sjpk unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 151045916cd2Sjpk TSLF_UNLABELED) != 0; 151145916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 151245916cd2Sjpk if (shared_addr) { 1513f4b3ec61Sdh /* 1514f4b3ec61Sdh * No need to handle exclusive-stack zones since 1515f4b3ec61Sdh * ALL_ZONES only applies to the shared stack. 1516f4b3ec61Sdh */ 151745916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 151845916cd2Sjpk /* 151945916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 152045916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 152145916cd2Sjpk * search for the zone based on the packet label. 152245916cd2Sjpk * 152345916cd2Sjpk * If there is such a zone, we prefer to find a 152445916cd2Sjpk * connection in it. Otherwise, we look for a 152545916cd2Sjpk * MAC-exempt connection in any zone whose label 152645916cd2Sjpk * dominates the default label on the packet. 152745916cd2Sjpk */ 152845916cd2Sjpk if (zoneid == ALL_ZONES) 152945916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 153045916cd2Sjpk else 153145916cd2Sjpk unlabeled = B_FALSE; 153245916cd2Sjpk } 153345916cd2Sjpk 1534f4b3ec61Sdh bind_connfp = 1535f4b3ec61Sdh &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 15367c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 15377c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 15387c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 153945916cd2Sjpk if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 15405d0bc3edSsommerfe lport) && (IPCL_ZONE_MATCH(connp, zoneid) || 154145916cd2Sjpk (unlabeled && connp->conn_mac_exempt))) 15427c478bd9Sstevel@tonic-gate break; 15437c478bd9Sstevel@tonic-gate } 15447c478bd9Sstevel@tonic-gate 154545916cd2Sjpk /* 154645916cd2Sjpk * If the matching connection is SLP on a private address, then 154745916cd2Sjpk * the label on the packet must match the local zone's label. 154845916cd2Sjpk * Otherwise, it must be in the label range defined by tnrh. 154945916cd2Sjpk * This is ensured by tsol_receive_label. 155045916cd2Sjpk */ 155145916cd2Sjpk if (connp != NULL && is_system_labeled() && 155245916cd2Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 155345916cd2Sjpk shared_addr, connp)) { 155445916cd2Sjpk DTRACE_PROBE3( 155545916cd2Sjpk tx__ip__log__info__classify__tcp, 155645916cd2Sjpk char *, 155745916cd2Sjpk "connp(1) could not receive mp(2)", 155845916cd2Sjpk conn_t *, connp, mblk_t *, mp); 155945916cd2Sjpk connp = NULL; 156045916cd2Sjpk } 156145916cd2Sjpk 15627c478bd9Sstevel@tonic-gate if (connp != NULL) { 156345916cd2Sjpk /* Have a listener at least */ 15647c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 15657c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 15667c478bd9Sstevel@tonic-gate return (connp); 15677c478bd9Sstevel@tonic-gate } 15687c478bd9Sstevel@tonic-gate 15697c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 15707c478bd9Sstevel@tonic-gate 15717c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 15727c478bd9Sstevel@tonic-gate ("ipcl_classify: couldn't classify mp = %p\n", 15737c478bd9Sstevel@tonic-gate (void *)mp)); 15747c478bd9Sstevel@tonic-gate break; 15757c478bd9Sstevel@tonic-gate 15767c478bd9Sstevel@tonic-gate case IPPROTO_UDP: 15777c478bd9Sstevel@tonic-gate lport = up[1]; 157845916cd2Sjpk unlabeled = B_FALSE; 157945916cd2Sjpk /* Cred cannot be null on IPv4 */ 158045916cd2Sjpk if (is_system_labeled()) 158145916cd2Sjpk unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 158245916cd2Sjpk TSLF_UNLABELED) != 0; 158345916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 158445916cd2Sjpk if (shared_addr) { 1585f4b3ec61Sdh /* 1586f4b3ec61Sdh * No need to handle exclusive-stack zones since 1587f4b3ec61Sdh * ALL_ZONES only applies to the shared stack. 1588f4b3ec61Sdh */ 158945916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 159045916cd2Sjpk /* 159145916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 159245916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 159345916cd2Sjpk * search for the zone based on the packet label. 159445916cd2Sjpk * 159545916cd2Sjpk * If there is such a zone, we prefer to find a 159645916cd2Sjpk * connection in it. Otherwise, we look for a 159745916cd2Sjpk * MAC-exempt connection in any zone whose label 159845916cd2Sjpk * dominates the default label on the packet. 159945916cd2Sjpk */ 160045916cd2Sjpk if (zoneid == ALL_ZONES) 160145916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 160245916cd2Sjpk else 160345916cd2Sjpk unlabeled = B_FALSE; 160445916cd2Sjpk } 16057c478bd9Sstevel@tonic-gate fport = up[0]; 16067c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 1607f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 16087c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 16097c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 16107c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 16117c478bd9Sstevel@tonic-gate if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 16127c478bd9Sstevel@tonic-gate fport, ipha->ipha_src) && 16135d0bc3edSsommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 161445916cd2Sjpk (unlabeled && connp->conn_mac_exempt))) 16157c478bd9Sstevel@tonic-gate break; 16167c478bd9Sstevel@tonic-gate } 16177c478bd9Sstevel@tonic-gate 161845916cd2Sjpk if (connp != NULL && is_system_labeled() && 161945916cd2Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 162045916cd2Sjpk shared_addr, connp)) { 162145916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp, 162245916cd2Sjpk char *, "connp(1) could not receive mp(2)", 162345916cd2Sjpk conn_t *, connp, mblk_t *, mp); 162445916cd2Sjpk connp = NULL; 162545916cd2Sjpk } 162645916cd2Sjpk 16277c478bd9Sstevel@tonic-gate if (connp != NULL) { 16287c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 16297c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16307c478bd9Sstevel@tonic-gate return (connp); 16317c478bd9Sstevel@tonic-gate } 16327c478bd9Sstevel@tonic-gate 16337c478bd9Sstevel@tonic-gate /* 16347c478bd9Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 16357c478bd9Sstevel@tonic-gate */ 16367c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16377c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 16387c478bd9Sstevel@tonic-gate ("ipcl_classify: cant find udp conn_t for ports : %x %x", 16397c478bd9Sstevel@tonic-gate lport, fport)); 16407c478bd9Sstevel@tonic-gate break; 16417c478bd9Sstevel@tonic-gate } 16427c478bd9Sstevel@tonic-gate 16437c478bd9Sstevel@tonic-gate return (NULL); 16447c478bd9Sstevel@tonic-gate } 16457c478bd9Sstevel@tonic-gate 16467c478bd9Sstevel@tonic-gate conn_t * 1647f4b3ec61Sdh ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 1648f4b3ec61Sdh ip_stack_t *ipst) 16497c478bd9Sstevel@tonic-gate { 16507c478bd9Sstevel@tonic-gate ip6_t *ip6h; 16517c478bd9Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 16527c478bd9Sstevel@tonic-gate uint16_t lport; 16537c478bd9Sstevel@tonic-gate uint16_t fport; 16547c478bd9Sstevel@tonic-gate tcph_t *tcph; 16557c478bd9Sstevel@tonic-gate uint32_t ports; 16567c478bd9Sstevel@tonic-gate conn_t *connp; 16577c478bd9Sstevel@tonic-gate uint16_t *up; 165845916cd2Sjpk boolean_t shared_addr; 165945916cd2Sjpk boolean_t unlabeled; 16607c478bd9Sstevel@tonic-gate 16617c478bd9Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 16627c478bd9Sstevel@tonic-gate 16637c478bd9Sstevel@tonic-gate switch (protocol) { 16647c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 16657c478bd9Sstevel@tonic-gate tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 16667c478bd9Sstevel@tonic-gate up = (uint16_t *)tcph->th_lport; 16677c478bd9Sstevel@tonic-gate ports = *(uint32_t *)up; 16687c478bd9Sstevel@tonic-gate 16697c478bd9Sstevel@tonic-gate connfp = 1670f4b3ec61Sdh &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 1671f4b3ec61Sdh ports, ipst)]; 16727c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 16737c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 16747c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 16757c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(connp, protocol, 16767c478bd9Sstevel@tonic-gate ip6h->ip6_src, ip6h->ip6_dst, ports)) 16777c478bd9Sstevel@tonic-gate break; 16787c478bd9Sstevel@tonic-gate } 16797c478bd9Sstevel@tonic-gate 16807c478bd9Sstevel@tonic-gate if (connp != NULL) { 168145916cd2Sjpk /* 168245916cd2Sjpk * We have a fully-bound TCP connection. 168345916cd2Sjpk * 168445916cd2Sjpk * For labeled systems, there's no need to check the 168545916cd2Sjpk * label here. It's known to be good as we checked 168645916cd2Sjpk * before allowing the connection to become bound. 168745916cd2Sjpk */ 16887c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 16897c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16907c478bd9Sstevel@tonic-gate return (connp); 16917c478bd9Sstevel@tonic-gate } 16927c478bd9Sstevel@tonic-gate 16937c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16947c478bd9Sstevel@tonic-gate 16957c478bd9Sstevel@tonic-gate lport = up[1]; 169645916cd2Sjpk unlabeled = B_FALSE; 169745916cd2Sjpk /* Cred can be null on IPv6 */ 169845916cd2Sjpk if (is_system_labeled()) { 169945916cd2Sjpk cred_t *cr = DB_CRED(mp); 170045916cd2Sjpk 170145916cd2Sjpk unlabeled = (cr != NULL && 170245916cd2Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 170345916cd2Sjpk } 170445916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 170545916cd2Sjpk if (shared_addr) { 1706f4b3ec61Sdh /* 1707f4b3ec61Sdh * No need to handle exclusive-stack zones since 1708f4b3ec61Sdh * ALL_ZONES only applies to the shared stack. 1709f4b3ec61Sdh */ 171045916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 171145916cd2Sjpk /* 171245916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 171345916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 171445916cd2Sjpk * search for the zone based on the packet label. 171545916cd2Sjpk * 171645916cd2Sjpk * If there is such a zone, we prefer to find a 171745916cd2Sjpk * connection in it. Otherwise, we look for a 171845916cd2Sjpk * MAC-exempt connection in any zone whose label 171945916cd2Sjpk * dominates the default label on the packet. 172045916cd2Sjpk */ 172145916cd2Sjpk if (zoneid == ALL_ZONES) 172245916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 172345916cd2Sjpk else 172445916cd2Sjpk unlabeled = B_FALSE; 172545916cd2Sjpk } 172645916cd2Sjpk 1727f4b3ec61Sdh bind_connfp = 1728f4b3ec61Sdh &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 17297c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 17307c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 17317c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 17327c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, protocol, 17337c478bd9Sstevel@tonic-gate ip6h->ip6_dst, lport) && 17345d0bc3edSsommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 173545916cd2Sjpk (unlabeled && connp->conn_mac_exempt))) 17367c478bd9Sstevel@tonic-gate break; 17377c478bd9Sstevel@tonic-gate } 17387c478bd9Sstevel@tonic-gate 173945916cd2Sjpk if (connp != NULL && is_system_labeled() && 174045916cd2Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 174145916cd2Sjpk shared_addr, connp)) { 174245916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 174345916cd2Sjpk char *, "connp(1) could not receive mp(2)", 174445916cd2Sjpk conn_t *, connp, mblk_t *, mp); 174545916cd2Sjpk connp = NULL; 174645916cd2Sjpk } 174745916cd2Sjpk 17487c478bd9Sstevel@tonic-gate if (connp != NULL) { 17497c478bd9Sstevel@tonic-gate /* Have a listner at least */ 17507c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 17517c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 17527c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 17537c478bd9Sstevel@tonic-gate ("ipcl_classify_v6: found listner " 17547c478bd9Sstevel@tonic-gate "connp = %p\n", (void *)connp)); 17557c478bd9Sstevel@tonic-gate 17567c478bd9Sstevel@tonic-gate return (connp); 17577c478bd9Sstevel@tonic-gate } 17587c478bd9Sstevel@tonic-gate 17597c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 17607c478bd9Sstevel@tonic-gate 17617c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 17627c478bd9Sstevel@tonic-gate ("ipcl_classify_v6: couldn't classify mp = %p\n", 17637c478bd9Sstevel@tonic-gate (void *)mp)); 17647c478bd9Sstevel@tonic-gate break; 17657c478bd9Sstevel@tonic-gate 17667c478bd9Sstevel@tonic-gate case IPPROTO_UDP: 17677c478bd9Sstevel@tonic-gate up = (uint16_t *)&mp->b_rptr[hdr_len]; 17687c478bd9Sstevel@tonic-gate lport = up[1]; 176945916cd2Sjpk unlabeled = B_FALSE; 177045916cd2Sjpk /* Cred can be null on IPv6 */ 177145916cd2Sjpk if (is_system_labeled()) { 177245916cd2Sjpk cred_t *cr = DB_CRED(mp); 177345916cd2Sjpk 177445916cd2Sjpk unlabeled = (cr != NULL && 177545916cd2Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 177645916cd2Sjpk } 177745916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 177845916cd2Sjpk if (shared_addr) { 1779f4b3ec61Sdh /* 1780f4b3ec61Sdh * No need to handle exclusive-stack zones since 1781f4b3ec61Sdh * ALL_ZONES only applies to the shared stack. 1782f4b3ec61Sdh */ 178345916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 178445916cd2Sjpk /* 178545916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 178645916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 178745916cd2Sjpk * search for the zone based on the packet label. 178845916cd2Sjpk * 178945916cd2Sjpk * If there is such a zone, we prefer to find a 179045916cd2Sjpk * connection in it. Otherwise, we look for a 179145916cd2Sjpk * MAC-exempt connection in any zone whose label 179245916cd2Sjpk * dominates the default label on the packet. 179345916cd2Sjpk */ 179445916cd2Sjpk if (zoneid == ALL_ZONES) 179545916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 179645916cd2Sjpk else 179745916cd2Sjpk unlabeled = B_FALSE; 179845916cd2Sjpk } 179945916cd2Sjpk 18007c478bd9Sstevel@tonic-gate fport = up[0]; 18017c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 18027c478bd9Sstevel@tonic-gate fport)); 1803f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 18047c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 18057c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 18067c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 18077c478bd9Sstevel@tonic-gate if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 18087c478bd9Sstevel@tonic-gate fport, ip6h->ip6_src) && 18095d0bc3edSsommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 181045916cd2Sjpk (unlabeled && connp->conn_mac_exempt))) 18117c478bd9Sstevel@tonic-gate break; 18127c478bd9Sstevel@tonic-gate } 18137c478bd9Sstevel@tonic-gate 181445916cd2Sjpk if (connp != NULL && is_system_labeled() && 181545916cd2Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 181645916cd2Sjpk shared_addr, connp)) { 181745916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 181845916cd2Sjpk char *, "connp(1) could not receive mp(2)", 181945916cd2Sjpk conn_t *, connp, mblk_t *, mp); 182045916cd2Sjpk connp = NULL; 182145916cd2Sjpk } 182245916cd2Sjpk 18237c478bd9Sstevel@tonic-gate if (connp != NULL) { 18247c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 18257c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 18267c478bd9Sstevel@tonic-gate return (connp); 18277c478bd9Sstevel@tonic-gate } 18287c478bd9Sstevel@tonic-gate 18297c478bd9Sstevel@tonic-gate /* 18307c478bd9Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 18317c478bd9Sstevel@tonic-gate */ 18327c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 18337c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 18347c478bd9Sstevel@tonic-gate ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 18357c478bd9Sstevel@tonic-gate lport, fport)); 18367c478bd9Sstevel@tonic-gate break; 18377c478bd9Sstevel@tonic-gate } 18387c478bd9Sstevel@tonic-gate 18397c478bd9Sstevel@tonic-gate return (NULL); 18407c478bd9Sstevel@tonic-gate } 18417c478bd9Sstevel@tonic-gate 18427c478bd9Sstevel@tonic-gate /* 18437c478bd9Sstevel@tonic-gate * wrapper around ipcl_classify_(v4,v6) routines. 18447c478bd9Sstevel@tonic-gate */ 18457c478bd9Sstevel@tonic-gate conn_t * 1846f4b3ec61Sdh ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst) 18477c478bd9Sstevel@tonic-gate { 18487c478bd9Sstevel@tonic-gate uint16_t hdr_len; 18497c478bd9Sstevel@tonic-gate ipha_t *ipha; 18507c478bd9Sstevel@tonic-gate uint8_t *nexthdrp; 18517c478bd9Sstevel@tonic-gate 18527c478bd9Sstevel@tonic-gate if (MBLKL(mp) < sizeof (ipha_t)) 18537c478bd9Sstevel@tonic-gate return (NULL); 18547c478bd9Sstevel@tonic-gate 18557c478bd9Sstevel@tonic-gate switch (IPH_HDR_VERSION(mp->b_rptr)) { 18567c478bd9Sstevel@tonic-gate case IPV4_VERSION: 18577c478bd9Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 18587c478bd9Sstevel@tonic-gate hdr_len = IPH_HDR_LENGTH(ipha); 18597c478bd9Sstevel@tonic-gate return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 1860f4b3ec61Sdh zoneid, ipst)); 18617c478bd9Sstevel@tonic-gate case IPV6_VERSION: 18627c478bd9Sstevel@tonic-gate if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 18637c478bd9Sstevel@tonic-gate &hdr_len, &nexthdrp)) 18647c478bd9Sstevel@tonic-gate return (NULL); 18657c478bd9Sstevel@tonic-gate 1866f4b3ec61Sdh return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst)); 18677c478bd9Sstevel@tonic-gate } 18687c478bd9Sstevel@tonic-gate 18697c478bd9Sstevel@tonic-gate return (NULL); 18707c478bd9Sstevel@tonic-gate } 18717c478bd9Sstevel@tonic-gate 18727c478bd9Sstevel@tonic-gate conn_t * 187345916cd2Sjpk ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, 1874f4b3ec61Sdh uint32_t ports, ipha_t *hdr, ip_stack_t *ipst) 18757c478bd9Sstevel@tonic-gate { 187645916cd2Sjpk connf_t *connfp; 18777c478bd9Sstevel@tonic-gate conn_t *connp; 18787c478bd9Sstevel@tonic-gate in_port_t lport; 18797c478bd9Sstevel@tonic-gate int af; 188045916cd2Sjpk boolean_t shared_addr; 188145916cd2Sjpk boolean_t unlabeled; 188245916cd2Sjpk const void *dst; 18837c478bd9Sstevel@tonic-gate 18847c478bd9Sstevel@tonic-gate lport = ((uint16_t *)&ports)[1]; 188545916cd2Sjpk 188645916cd2Sjpk unlabeled = B_FALSE; 188745916cd2Sjpk /* Cred can be null on IPv6 */ 188845916cd2Sjpk if (is_system_labeled()) { 188945916cd2Sjpk cred_t *cr = DB_CRED(mp); 189045916cd2Sjpk 189145916cd2Sjpk unlabeled = (cr != NULL && 189245916cd2Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 189345916cd2Sjpk } 189445916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 189545916cd2Sjpk if (shared_addr) { 1896f4b3ec61Sdh /* 1897f4b3ec61Sdh * No need to handle exclusive-stack zones since ALL_ZONES 1898f4b3ec61Sdh * only applies to the shared stack. 1899f4b3ec61Sdh */ 190045916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 190145916cd2Sjpk /* 190245916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 190345916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and search for 190445916cd2Sjpk * the zone based on the packet label. 190545916cd2Sjpk * 190645916cd2Sjpk * If there is such a zone, we prefer to find a connection in 190745916cd2Sjpk * it. Otherwise, we look for a MAC-exempt connection in any 190845916cd2Sjpk * zone whose label dominates the default label on the packet. 190945916cd2Sjpk */ 191045916cd2Sjpk if (zoneid == ALL_ZONES) 191145916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 191245916cd2Sjpk else 191345916cd2Sjpk unlabeled = B_FALSE; 191445916cd2Sjpk } 191545916cd2Sjpk 19167c478bd9Sstevel@tonic-gate af = IPH_HDR_VERSION(hdr); 191745916cd2Sjpk dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst : 191845916cd2Sjpk (const void *)&((ip6_t *)hdr)->ip6_dst; 1919f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 19207c478bd9Sstevel@tonic-gate 19217c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 19227c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 19237c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 19247c478bd9Sstevel@tonic-gate /* We don't allow v4 fallback for v6 raw socket. */ 192545916cd2Sjpk if (af == (connp->conn_af_isv6 ? IPV4_VERSION : 192645916cd2Sjpk IPV6_VERSION)) 19277c478bd9Sstevel@tonic-gate continue; 19287c478bd9Sstevel@tonic-gate if (connp->conn_fully_bound) { 19297c478bd9Sstevel@tonic-gate if (af == IPV4_VERSION) { 193045916cd2Sjpk if (!IPCL_CONN_MATCH(connp, protocol, 193145916cd2Sjpk hdr->ipha_src, hdr->ipha_dst, ports)) 193245916cd2Sjpk continue; 19337c478bd9Sstevel@tonic-gate } else { 193445916cd2Sjpk if (!IPCL_CONN_MATCH_V6(connp, protocol, 19357c478bd9Sstevel@tonic-gate ((ip6_t *)hdr)->ip6_src, 193645916cd2Sjpk ((ip6_t *)hdr)->ip6_dst, ports)) 193745916cd2Sjpk continue; 19387c478bd9Sstevel@tonic-gate } 19397c478bd9Sstevel@tonic-gate } else { 19407c478bd9Sstevel@tonic-gate if (af == IPV4_VERSION) { 194145916cd2Sjpk if (!IPCL_BIND_MATCH(connp, protocol, 194245916cd2Sjpk hdr->ipha_dst, lport)) 194345916cd2Sjpk continue; 19447c478bd9Sstevel@tonic-gate } else { 194545916cd2Sjpk if (!IPCL_BIND_MATCH_V6(connp, protocol, 194645916cd2Sjpk ((ip6_t *)hdr)->ip6_dst, lport)) 194745916cd2Sjpk continue; 19487c478bd9Sstevel@tonic-gate } 19497c478bd9Sstevel@tonic-gate } 195045916cd2Sjpk 19515d0bc3edSsommerfe if (IPCL_ZONE_MATCH(connp, zoneid) || 195245916cd2Sjpk (unlabeled && connp->conn_mac_exempt)) 195345916cd2Sjpk break; 195445916cd2Sjpk } 195545916cd2Sjpk /* 195645916cd2Sjpk * If the connection is fully-bound and connection-oriented (TCP or 195745916cd2Sjpk * SCTP), then we've already validated the remote system's label. 195845916cd2Sjpk * There's no need to do it again for every packet. 195945916cd2Sjpk */ 196045916cd2Sjpk if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound || 196145916cd2Sjpk !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) && 196245916cd2Sjpk !tsol_receive_local(mp, dst, af, shared_addr, connp)) { 196345916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 196445916cd2Sjpk char *, "connp(1) could not receive mp(2)", 196545916cd2Sjpk conn_t *, connp, mblk_t *, mp); 196645916cd2Sjpk connp = NULL; 19677c478bd9Sstevel@tonic-gate } 19687c0c0508Skcpoon 19697c0c0508Skcpoon if (connp != NULL) 19707c0c0508Skcpoon goto found; 19717c0c0508Skcpoon mutex_exit(&connfp->connf_lock); 19727c0c0508Skcpoon 19737c0c0508Skcpoon /* Try to look for a wildcard match. */ 1974f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 19757c0c0508Skcpoon mutex_enter(&connfp->connf_lock); 19767c0c0508Skcpoon for (connp = connfp->connf_head; connp != NULL; 19777c0c0508Skcpoon connp = connp->conn_next) { 19787c0c0508Skcpoon /* We don't allow v4 fallback for v6 raw socket. */ 19797c0c0508Skcpoon if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 19805d0bc3edSsommerfe IPV6_VERSION)) || !IPCL_ZONE_MATCH(connp, zoneid)) { 19817c0c0508Skcpoon continue; 19827c0c0508Skcpoon } 19837c0c0508Skcpoon if (af == IPV4_VERSION) { 19847c0c0508Skcpoon if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) 19857c0c0508Skcpoon break; 19867c0c0508Skcpoon } else { 19877c0c0508Skcpoon if (IPCL_RAW_MATCH_V6(connp, protocol, 19887c0c0508Skcpoon ((ip6_t *)hdr)->ip6_dst)) { 19897c0c0508Skcpoon break; 19907c0c0508Skcpoon } 19917c0c0508Skcpoon } 19927c478bd9Sstevel@tonic-gate } 19937c0c0508Skcpoon 19947c0c0508Skcpoon if (connp != NULL) 19957c0c0508Skcpoon goto found; 19967c0c0508Skcpoon 19977c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 19987c478bd9Sstevel@tonic-gate return (NULL); 19997c0c0508Skcpoon 20007c0c0508Skcpoon found: 20017c0c0508Skcpoon ASSERT(connp != NULL); 20027c0c0508Skcpoon CONN_INC_REF(connp); 20037c0c0508Skcpoon mutex_exit(&connfp->connf_lock); 20047c0c0508Skcpoon return (connp); 20057c478bd9Sstevel@tonic-gate } 20067c478bd9Sstevel@tonic-gate 20077c478bd9Sstevel@tonic-gate /* ARGSUSED */ 20087c478bd9Sstevel@tonic-gate static int 2009*fc80c0dfSnordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags) 20107c478bd9Sstevel@tonic-gate { 20117c478bd9Sstevel@tonic-gate itc_t *itc = (itc_t *)buf; 20127c478bd9Sstevel@tonic-gate conn_t *connp = &itc->itc_conn; 2013*fc80c0dfSnordmark tcp_t *tcp = (tcp_t *)&itc[1]; 2014*fc80c0dfSnordmark 2015*fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2016*fc80c0dfSnordmark bzero(tcp, sizeof (tcp_t)); 2017*fc80c0dfSnordmark 2018*fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2019*fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 20207c478bd9Sstevel@tonic-gate tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 20217c478bd9Sstevel@tonic-gate connp->conn_tcp = tcp; 20227c478bd9Sstevel@tonic-gate connp->conn_flags = IPCL_TCPCONN; 20237c478bd9Sstevel@tonic-gate connp->conn_ulp = IPPROTO_TCP; 20247c478bd9Sstevel@tonic-gate tcp->tcp_connp = connp; 20257c478bd9Sstevel@tonic-gate return (0); 20267c478bd9Sstevel@tonic-gate } 20277c478bd9Sstevel@tonic-gate 20287c478bd9Sstevel@tonic-gate /* ARGSUSED */ 20297c478bd9Sstevel@tonic-gate static void 2030*fc80c0dfSnordmark tcp_conn_destructor(void *buf, void *cdrarg) 2031*fc80c0dfSnordmark { 2032*fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2033*fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2034*fc80c0dfSnordmark tcp_t *tcp = (tcp_t *)&itc[1]; 2035*fc80c0dfSnordmark 2036*fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_TCPCONN); 2037*fc80c0dfSnordmark ASSERT(tcp->tcp_connp == connp); 2038*fc80c0dfSnordmark ASSERT(connp->conn_tcp == tcp); 2039*fc80c0dfSnordmark tcp_timermp_free(tcp); 2040*fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2041*fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2042*fc80c0dfSnordmark } 2043*fc80c0dfSnordmark 2044*fc80c0dfSnordmark /* ARGSUSED */ 2045*fc80c0dfSnordmark static int 2046*fc80c0dfSnordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2047*fc80c0dfSnordmark { 2048*fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2049*fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2050*fc80c0dfSnordmark 2051*fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2052*fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2053*fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2054*fc80c0dfSnordmark connp->conn_flags = IPCL_IPCCONN; 2055*fc80c0dfSnordmark 2056*fc80c0dfSnordmark return (0); 2057*fc80c0dfSnordmark } 2058*fc80c0dfSnordmark 2059*fc80c0dfSnordmark /* ARGSUSED */ 2060*fc80c0dfSnordmark static void 2061*fc80c0dfSnordmark ip_conn_destructor(void *buf, void *cdrarg) 2062*fc80c0dfSnordmark { 2063*fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2064*fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2065*fc80c0dfSnordmark 2066*fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_IPCCONN); 2067*fc80c0dfSnordmark ASSERT(connp->conn_priv == NULL); 2068*fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2069*fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2070*fc80c0dfSnordmark } 2071*fc80c0dfSnordmark 2072*fc80c0dfSnordmark /* ARGSUSED */ 2073*fc80c0dfSnordmark static int 2074*fc80c0dfSnordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags) 2075*fc80c0dfSnordmark { 2076*fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2077*fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2078*fc80c0dfSnordmark udp_t *udp = (udp_t *)&itc[1]; 2079*fc80c0dfSnordmark 2080*fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2081*fc80c0dfSnordmark bzero(udp, sizeof (udp_t)); 2082*fc80c0dfSnordmark 2083*fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2084*fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2085*fc80c0dfSnordmark connp->conn_udp = udp; 2086*fc80c0dfSnordmark connp->conn_flags = IPCL_UDPCONN; 2087*fc80c0dfSnordmark connp->conn_ulp = IPPROTO_UDP; 2088*fc80c0dfSnordmark udp->udp_connp = connp; 2089*fc80c0dfSnordmark return (0); 2090*fc80c0dfSnordmark } 2091*fc80c0dfSnordmark 2092*fc80c0dfSnordmark /* ARGSUSED */ 2093*fc80c0dfSnordmark static void 2094*fc80c0dfSnordmark udp_conn_destructor(void *buf, void *cdrarg) 2095*fc80c0dfSnordmark { 2096*fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2097*fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2098*fc80c0dfSnordmark udp_t *udp = (udp_t *)&itc[1]; 2099*fc80c0dfSnordmark 2100*fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_UDPCONN); 2101*fc80c0dfSnordmark ASSERT(udp->udp_connp == connp); 2102*fc80c0dfSnordmark ASSERT(connp->conn_udp == udp); 2103*fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2104*fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2105*fc80c0dfSnordmark } 2106*fc80c0dfSnordmark 2107*fc80c0dfSnordmark /* ARGSUSED */ 2108*fc80c0dfSnordmark static int 2109*fc80c0dfSnordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags) 2110*fc80c0dfSnordmark { 2111*fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2112*fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2113*fc80c0dfSnordmark icmp_t *icmp = (icmp_t *)&itc[1]; 2114*fc80c0dfSnordmark 2115*fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2116*fc80c0dfSnordmark bzero(icmp, sizeof (icmp_t)); 2117*fc80c0dfSnordmark 2118*fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2119*fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2120*fc80c0dfSnordmark connp->conn_icmp = icmp; 2121*fc80c0dfSnordmark connp->conn_flags = IPCL_RAWIPCONN; 2122*fc80c0dfSnordmark connp->conn_ulp = IPPROTO_ICMP; 2123*fc80c0dfSnordmark icmp->icmp_connp = connp; 2124*fc80c0dfSnordmark return (0); 2125*fc80c0dfSnordmark } 2126*fc80c0dfSnordmark 2127*fc80c0dfSnordmark /* ARGSUSED */ 2128*fc80c0dfSnordmark static void 2129*fc80c0dfSnordmark rawip_conn_destructor(void *buf, void *cdrarg) 2130*fc80c0dfSnordmark { 2131*fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2132*fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2133*fc80c0dfSnordmark icmp_t *icmp = (icmp_t *)&itc[1]; 2134*fc80c0dfSnordmark 2135*fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_RAWIPCONN); 2136*fc80c0dfSnordmark ASSERT(icmp->icmp_connp == connp); 2137*fc80c0dfSnordmark ASSERT(connp->conn_icmp == icmp); 2138*fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2139*fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2140*fc80c0dfSnordmark } 2141*fc80c0dfSnordmark 2142*fc80c0dfSnordmark /* ARGSUSED */ 2143*fc80c0dfSnordmark static int 2144*fc80c0dfSnordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags) 2145*fc80c0dfSnordmark { 2146*fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2147*fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2148*fc80c0dfSnordmark rts_t *rts = (rts_t *)&itc[1]; 2149*fc80c0dfSnordmark 2150*fc80c0dfSnordmark bzero(connp, sizeof (conn_t)); 2151*fc80c0dfSnordmark bzero(rts, sizeof (rts_t)); 2152*fc80c0dfSnordmark 2153*fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 2154*fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 2155*fc80c0dfSnordmark connp->conn_rts = rts; 2156*fc80c0dfSnordmark connp->conn_flags = IPCL_RTSCONN; 2157*fc80c0dfSnordmark rts->rts_connp = connp; 2158*fc80c0dfSnordmark return (0); 2159*fc80c0dfSnordmark } 2160*fc80c0dfSnordmark 2161*fc80c0dfSnordmark /* ARGSUSED */ 2162*fc80c0dfSnordmark static void 2163*fc80c0dfSnordmark rts_conn_destructor(void *buf, void *cdrarg) 21647c478bd9Sstevel@tonic-gate { 2165*fc80c0dfSnordmark itc_t *itc = (itc_t *)buf; 2166*fc80c0dfSnordmark conn_t *connp = &itc->itc_conn; 2167*fc80c0dfSnordmark rts_t *rts = (rts_t *)&itc[1]; 2168*fc80c0dfSnordmark 2169*fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_RTSCONN); 2170*fc80c0dfSnordmark ASSERT(rts->rts_connp == connp); 2171*fc80c0dfSnordmark ASSERT(connp->conn_rts == rts); 2172*fc80c0dfSnordmark mutex_destroy(&connp->conn_lock); 2173*fc80c0dfSnordmark cv_destroy(&connp->conn_cv); 2174*fc80c0dfSnordmark } 2175*fc80c0dfSnordmark 2176*fc80c0dfSnordmark /* 2177*fc80c0dfSnordmark * Called as part of ipcl_conn_destroy to assert and clear any pointers 2178*fc80c0dfSnordmark * in the conn_t. 2179*fc80c0dfSnordmark */ 2180*fc80c0dfSnordmark void 2181*fc80c0dfSnordmark ipcl_conn_cleanup(conn_t *connp) 2182*fc80c0dfSnordmark { 2183*fc80c0dfSnordmark ASSERT(connp->conn_ire_cache == NULL); 2184*fc80c0dfSnordmark ASSERT(connp->conn_latch == NULL); 2185*fc80c0dfSnordmark #ifdef notdef 2186*fc80c0dfSnordmark ASSERT(connp->conn_rq == NULL); 2187*fc80c0dfSnordmark ASSERT(connp->conn_wq == NULL); 2188*fc80c0dfSnordmark #endif 2189*fc80c0dfSnordmark ASSERT(connp->conn_cred == NULL); 2190*fc80c0dfSnordmark ASSERT(connp->conn_g_fanout == NULL); 2191*fc80c0dfSnordmark ASSERT(connp->conn_g_next == NULL); 2192*fc80c0dfSnordmark ASSERT(connp->conn_g_prev == NULL); 2193*fc80c0dfSnordmark ASSERT(connp->conn_policy == NULL); 2194*fc80c0dfSnordmark ASSERT(connp->conn_fanout == NULL); 2195*fc80c0dfSnordmark ASSERT(connp->conn_next == NULL); 2196*fc80c0dfSnordmark ASSERT(connp->conn_prev == NULL); 2197*fc80c0dfSnordmark #ifdef notdef 2198*fc80c0dfSnordmark /* 2199*fc80c0dfSnordmark * The ill and ipif pointers are not cleared before the conn_t 2200*fc80c0dfSnordmark * goes away since they do not hold a reference on the ill/ipif. 2201*fc80c0dfSnordmark * We should replace these pointers with ifindex/ipaddr_t to 2202*fc80c0dfSnordmark * make the code less complex. 2203*fc80c0dfSnordmark */ 2204*fc80c0dfSnordmark ASSERT(connp->conn_xmit_if_ill == NULL); 2205*fc80c0dfSnordmark ASSERT(connp->conn_nofailover_ill == NULL); 2206*fc80c0dfSnordmark ASSERT(connp->conn_outgoing_ill == NULL); 2207*fc80c0dfSnordmark ASSERT(connp->conn_incoming_ill == NULL); 2208*fc80c0dfSnordmark ASSERT(connp->conn_outgoing_pill == NULL); 2209*fc80c0dfSnordmark ASSERT(connp->conn_multicast_ipif == NULL); 2210*fc80c0dfSnordmark ASSERT(connp->conn_multicast_ill == NULL); 2211*fc80c0dfSnordmark #endif 2212*fc80c0dfSnordmark ASSERT(connp->conn_oper_pending_ill == NULL); 2213*fc80c0dfSnordmark ASSERT(connp->conn_ilg == NULL); 2214*fc80c0dfSnordmark ASSERT(connp->conn_drain_next == NULL); 2215*fc80c0dfSnordmark ASSERT(connp->conn_drain_prev == NULL); 2216*fc80c0dfSnordmark ASSERT(connp->conn_idl == NULL); 2217*fc80c0dfSnordmark ASSERT(connp->conn_ipsec_opt_mp == NULL); 2218*fc80c0dfSnordmark ASSERT(connp->conn_peercred == NULL); 2219*fc80c0dfSnordmark ASSERT(connp->conn_netstack == NULL); 2220*fc80c0dfSnordmark 2221*fc80c0dfSnordmark /* Clear out the conn_t fields that are not preserved */ 2222*fc80c0dfSnordmark bzero(&connp->conn_start_clr, 2223*fc80c0dfSnordmark sizeof (conn_t) - 2224*fc80c0dfSnordmark ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp)); 2225*fc80c0dfSnordmark 22267c478bd9Sstevel@tonic-gate } 22277c478bd9Sstevel@tonic-gate 22287c478bd9Sstevel@tonic-gate /* 22297c478bd9Sstevel@tonic-gate * All conns are inserted in a global multi-list for the benefit of 22307c478bd9Sstevel@tonic-gate * walkers. The walk is guaranteed to walk all open conns at the time 22317c478bd9Sstevel@tonic-gate * of the start of the walk exactly once. This property is needed to 22327c478bd9Sstevel@tonic-gate * achieve some cleanups during unplumb of interfaces. This is achieved 22337c478bd9Sstevel@tonic-gate * as follows. 22347c478bd9Sstevel@tonic-gate * 22357c478bd9Sstevel@tonic-gate * ipcl_conn_create and ipcl_conn_destroy are the only functions that 22367c478bd9Sstevel@tonic-gate * call the insert and delete functions below at creation and deletion 22377c478bd9Sstevel@tonic-gate * time respectively. The conn never moves or changes its position in this 22387c478bd9Sstevel@tonic-gate * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 22397c478bd9Sstevel@tonic-gate * won't increase due to walkers, once the conn deletion has started. Note 22407c478bd9Sstevel@tonic-gate * that we can't remove the conn from the global list and then wait for 22417c478bd9Sstevel@tonic-gate * the refcnt to drop to zero, since walkers would then see a truncated 22427c478bd9Sstevel@tonic-gate * list. CONN_INCIPIENT ensures that walkers don't start looking at 22437c478bd9Sstevel@tonic-gate * conns until ip_open is ready to make them globally visible. 22447c478bd9Sstevel@tonic-gate * The global round robin multi-list locks are held only to get the 22457c478bd9Sstevel@tonic-gate * next member/insertion/deletion and contention should be negligible 22467c478bd9Sstevel@tonic-gate * if the multi-list is much greater than the number of cpus. 22477c478bd9Sstevel@tonic-gate */ 22487c478bd9Sstevel@tonic-gate void 22497c478bd9Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp) 22507c478bd9Sstevel@tonic-gate { 22517c478bd9Sstevel@tonic-gate int index; 2252f4b3ec61Sdh struct connf_s *connfp; 2253f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 22547c478bd9Sstevel@tonic-gate 22557c478bd9Sstevel@tonic-gate /* 22567c478bd9Sstevel@tonic-gate * No need for atomic here. Approximate even distribution 22577c478bd9Sstevel@tonic-gate * in the global lists is sufficient. 22587c478bd9Sstevel@tonic-gate */ 2259f4b3ec61Sdh ipst->ips_conn_g_index++; 2260f4b3ec61Sdh index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 22617c478bd9Sstevel@tonic-gate 22627c478bd9Sstevel@tonic-gate connp->conn_g_prev = NULL; 22637c478bd9Sstevel@tonic-gate /* 22647c478bd9Sstevel@tonic-gate * Mark as INCIPIENT, so that walkers will ignore this 22657c478bd9Sstevel@tonic-gate * for now, till ip_open is ready to make it visible globally. 22667c478bd9Sstevel@tonic-gate */ 22677c478bd9Sstevel@tonic-gate connp->conn_state_flags |= CONN_INCIPIENT; 22687c478bd9Sstevel@tonic-gate 2269f4b3ec61Sdh connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 22707c478bd9Sstevel@tonic-gate /* Insert at the head of the list */ 2271f4b3ec61Sdh mutex_enter(&connfp->connf_lock); 2272f4b3ec61Sdh connp->conn_g_next = connfp->connf_head; 22737c478bd9Sstevel@tonic-gate if (connp->conn_g_next != NULL) 22747c478bd9Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp; 2275f4b3ec61Sdh connfp->connf_head = connp; 22767c478bd9Sstevel@tonic-gate 22777c478bd9Sstevel@tonic-gate /* The fanout bucket this conn points to */ 2278f4b3ec61Sdh connp->conn_g_fanout = connfp; 22797c478bd9Sstevel@tonic-gate 2280f4b3ec61Sdh mutex_exit(&connfp->connf_lock); 22817c478bd9Sstevel@tonic-gate } 22827c478bd9Sstevel@tonic-gate 22837c478bd9Sstevel@tonic-gate void 22847c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp) 22857c478bd9Sstevel@tonic-gate { 2286f4b3ec61Sdh struct connf_s *connfp; 2287f4b3ec61Sdh 22887c478bd9Sstevel@tonic-gate /* 22897c478bd9Sstevel@tonic-gate * We were never inserted in the global multi list. 22907c478bd9Sstevel@tonic-gate * IPCL_NONE variety is never inserted in the global multilist 22917c478bd9Sstevel@tonic-gate * since it is presumed to not need any cleanup and is transient. 22927c478bd9Sstevel@tonic-gate */ 22937c478bd9Sstevel@tonic-gate if (connp->conn_g_fanout == NULL) 22947c478bd9Sstevel@tonic-gate return; 22957c478bd9Sstevel@tonic-gate 2296f4b3ec61Sdh connfp = connp->conn_g_fanout; 2297f4b3ec61Sdh mutex_enter(&connfp->connf_lock); 22987c478bd9Sstevel@tonic-gate if (connp->conn_g_prev != NULL) 22997c478bd9Sstevel@tonic-gate connp->conn_g_prev->conn_g_next = connp->conn_g_next; 23007c478bd9Sstevel@tonic-gate else 2301f4b3ec61Sdh connfp->connf_head = connp->conn_g_next; 23027c478bd9Sstevel@tonic-gate if (connp->conn_g_next != NULL) 23037c478bd9Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 2304f4b3ec61Sdh mutex_exit(&connfp->connf_lock); 23057c478bd9Sstevel@tonic-gate 23067c478bd9Sstevel@tonic-gate /* Better to stumble on a null pointer than to corrupt memory */ 23077c478bd9Sstevel@tonic-gate connp->conn_g_next = NULL; 23087c478bd9Sstevel@tonic-gate connp->conn_g_prev = NULL; 2309*fc80c0dfSnordmark connp->conn_g_fanout = NULL; 23107c478bd9Sstevel@tonic-gate } 23117c478bd9Sstevel@tonic-gate 23127c478bd9Sstevel@tonic-gate /* 23137c478bd9Sstevel@tonic-gate * Walk the list of all conn_t's in the system, calling the function provided 23147c478bd9Sstevel@tonic-gate * with the specified argument for each. 23157c478bd9Sstevel@tonic-gate * Applies to both IPv4 and IPv6. 23167c478bd9Sstevel@tonic-gate * 23177c478bd9Sstevel@tonic-gate * IPCs may hold pointers to ipif/ill. To guard against stale pointers 23187c478bd9Sstevel@tonic-gate * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 23197c478bd9Sstevel@tonic-gate * unplumbed or removed. New conn_t's that are created while we are walking 23207c478bd9Sstevel@tonic-gate * may be missed by this walk, because they are not necessarily inserted 23217c478bd9Sstevel@tonic-gate * at the tail of the list. They are new conn_t's and thus don't have any 23227c478bd9Sstevel@tonic-gate * stale pointers. The CONN_CLOSING flag ensures that no new reference 23237c478bd9Sstevel@tonic-gate * is created to the struct that is going away. 23247c478bd9Sstevel@tonic-gate */ 23257c478bd9Sstevel@tonic-gate void 2326f4b3ec61Sdh ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 23277c478bd9Sstevel@tonic-gate { 23287c478bd9Sstevel@tonic-gate int i; 23297c478bd9Sstevel@tonic-gate conn_t *connp; 23307c478bd9Sstevel@tonic-gate conn_t *prev_connp; 23317c478bd9Sstevel@tonic-gate 23327c478bd9Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2333f4b3ec61Sdh mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23347c478bd9Sstevel@tonic-gate prev_connp = NULL; 2335f4b3ec61Sdh connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 23367c478bd9Sstevel@tonic-gate while (connp != NULL) { 23377c478bd9Sstevel@tonic-gate mutex_enter(&connp->conn_lock); 23387c478bd9Sstevel@tonic-gate if (connp->conn_state_flags & 23397c478bd9Sstevel@tonic-gate (CONN_CONDEMNED | CONN_INCIPIENT)) { 23407c478bd9Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 23417c478bd9Sstevel@tonic-gate connp = connp->conn_g_next; 23427c478bd9Sstevel@tonic-gate continue; 23437c478bd9Sstevel@tonic-gate } 23447c478bd9Sstevel@tonic-gate CONN_INC_REF_LOCKED(connp); 23457c478bd9Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 2346f4b3ec61Sdh mutex_exit( 2347f4b3ec61Sdh &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23487c478bd9Sstevel@tonic-gate (*func)(connp, arg); 23497c478bd9Sstevel@tonic-gate if (prev_connp != NULL) 23507c478bd9Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 2351f4b3ec61Sdh mutex_enter( 2352f4b3ec61Sdh &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23537c478bd9Sstevel@tonic-gate prev_connp = connp; 23547c478bd9Sstevel@tonic-gate connp = connp->conn_g_next; 23557c478bd9Sstevel@tonic-gate } 2356f4b3ec61Sdh mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 23577c478bd9Sstevel@tonic-gate if (prev_connp != NULL) 23587c478bd9Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 23597c478bd9Sstevel@tonic-gate } 23607c478bd9Sstevel@tonic-gate } 23617c478bd9Sstevel@tonic-gate 23627c478bd9Sstevel@tonic-gate /* 23637c478bd9Sstevel@tonic-gate * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 23647c478bd9Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 23657c478bd9Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 2366d0ab37afSethindra * (peer tcp in ESTABLISHED state). 23677c478bd9Sstevel@tonic-gate */ 23687c478bd9Sstevel@tonic-gate conn_t * 2369f4b3ec61Sdh ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph, 2370f4b3ec61Sdh ip_stack_t *ipst) 23717c478bd9Sstevel@tonic-gate { 23727c478bd9Sstevel@tonic-gate uint32_t ports; 23737c478bd9Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 23747c478bd9Sstevel@tonic-gate connf_t *connfp; 23757c478bd9Sstevel@tonic-gate conn_t *tconnp; 23767c478bd9Sstevel@tonic-gate boolean_t zone_chk; 23777c478bd9Sstevel@tonic-gate 23787c478bd9Sstevel@tonic-gate /* 23797c478bd9Sstevel@tonic-gate * If either the source of destination address is loopback, then 23807c478bd9Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 23817c478bd9Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 23827c478bd9Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. 23837c478bd9Sstevel@tonic-gate */ 23847c478bd9Sstevel@tonic-gate zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 23857c478bd9Sstevel@tonic-gate ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 23867c478bd9Sstevel@tonic-gate 23877c478bd9Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 23887c478bd9Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 23897c478bd9Sstevel@tonic-gate 2390f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2391f4b3ec61Sdh ports, ipst)]; 23927c478bd9Sstevel@tonic-gate 23937c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 23947c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 23957c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 23967c478bd9Sstevel@tonic-gate 23977c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 23987c478bd9Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 2399d0ab37afSethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 24007c478bd9Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 24017c478bd9Sstevel@tonic-gate 24027c478bd9Sstevel@tonic-gate ASSERT(tconnp != connp); 24037c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 24047c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24057c478bd9Sstevel@tonic-gate return (tconnp); 24067c478bd9Sstevel@tonic-gate } 24077c478bd9Sstevel@tonic-gate } 24087c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24097c478bd9Sstevel@tonic-gate return (NULL); 24107c478bd9Sstevel@tonic-gate } 24117c478bd9Sstevel@tonic-gate 24127c478bd9Sstevel@tonic-gate /* 24137c478bd9Sstevel@tonic-gate * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 24147c478bd9Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 24157c478bd9Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 2416d0ab37afSethindra * (peer tcp in ESTABLISHED state). 24177c478bd9Sstevel@tonic-gate */ 24187c478bd9Sstevel@tonic-gate conn_t * 2419f4b3ec61Sdh ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph, 2420f4b3ec61Sdh ip_stack_t *ipst) 24217c478bd9Sstevel@tonic-gate { 24227c478bd9Sstevel@tonic-gate uint32_t ports; 24237c478bd9Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 24247c478bd9Sstevel@tonic-gate connf_t *connfp; 24257c478bd9Sstevel@tonic-gate conn_t *tconnp; 24267c478bd9Sstevel@tonic-gate boolean_t zone_chk; 24277c478bd9Sstevel@tonic-gate 24287c478bd9Sstevel@tonic-gate /* 24297c478bd9Sstevel@tonic-gate * If either the source of destination address is loopback, then 24307c478bd9Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 24317c478bd9Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 24327c478bd9Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. We 24337c478bd9Sstevel@tonic-gate * don't do Zone check for link local address(es) because the 24347c478bd9Sstevel@tonic-gate * current Zone implementation treats each link local address as 24357c478bd9Sstevel@tonic-gate * being unique per system node, i.e. they belong to global Zone. 24367c478bd9Sstevel@tonic-gate */ 24377c478bd9Sstevel@tonic-gate zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 24387c478bd9Sstevel@tonic-gate IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 24397c478bd9Sstevel@tonic-gate 24407c478bd9Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 24417c478bd9Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 24427c478bd9Sstevel@tonic-gate 2443f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2444f4b3ec61Sdh ports, ipst)]; 24457c478bd9Sstevel@tonic-gate 24467c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 24477c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 24487c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 24497c478bd9Sstevel@tonic-gate 24507c478bd9Sstevel@tonic-gate /* We skip tcp_bound_if check here as this is loopback tcp */ 24517c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 24527c478bd9Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 2453d0ab37afSethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 24547c478bd9Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 24557c478bd9Sstevel@tonic-gate 24567c478bd9Sstevel@tonic-gate ASSERT(tconnp != connp); 24577c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 24587c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24597c478bd9Sstevel@tonic-gate return (tconnp); 24607c478bd9Sstevel@tonic-gate } 24617c478bd9Sstevel@tonic-gate } 24627c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24637c478bd9Sstevel@tonic-gate return (NULL); 24647c478bd9Sstevel@tonic-gate } 24657c478bd9Sstevel@tonic-gate 24667c478bd9Sstevel@tonic-gate /* 24677c478bd9Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 24687c478bd9Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 24697c478bd9Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 24707c478bd9Sstevel@tonic-gate */ 24717c478bd9Sstevel@tonic-gate conn_t * 2472f4b3ec61Sdh ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state, 2473f4b3ec61Sdh ip_stack_t *ipst) 24747c478bd9Sstevel@tonic-gate { 24757c478bd9Sstevel@tonic-gate uint32_t ports; 24767c478bd9Sstevel@tonic-gate uint16_t *pports; 24777c478bd9Sstevel@tonic-gate connf_t *connfp; 24787c478bd9Sstevel@tonic-gate conn_t *tconnp; 24797c478bd9Sstevel@tonic-gate 24807c478bd9Sstevel@tonic-gate pports = (uint16_t *)&ports; 24817c478bd9Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 24827c478bd9Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 24837c478bd9Sstevel@tonic-gate 2484f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2485121e5416Skcpoon ports, ipst)]; 24867c478bd9Sstevel@tonic-gate 24877c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 24887c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 24897c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 24907c478bd9Sstevel@tonic-gate 24917c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 24927c478bd9Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 24937c478bd9Sstevel@tonic-gate tconnp->conn_tcp->tcp_state >= min_state) { 24947c478bd9Sstevel@tonic-gate 24957c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 24967c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 24977c478bd9Sstevel@tonic-gate return (tconnp); 24987c478bd9Sstevel@tonic-gate } 24997c478bd9Sstevel@tonic-gate } 25007c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25017c478bd9Sstevel@tonic-gate return (NULL); 25027c478bd9Sstevel@tonic-gate } 25037c478bd9Sstevel@tonic-gate 25047c478bd9Sstevel@tonic-gate /* 25057c478bd9Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 25067c478bd9Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 25077c478bd9Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 25087c478bd9Sstevel@tonic-gate * Match on ifindex in addition to addresses. 25097c478bd9Sstevel@tonic-gate */ 25107c478bd9Sstevel@tonic-gate conn_t * 25117c478bd9Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 2512f4b3ec61Sdh uint_t ifindex, ip_stack_t *ipst) 25137c478bd9Sstevel@tonic-gate { 25147c478bd9Sstevel@tonic-gate tcp_t *tcp; 25157c478bd9Sstevel@tonic-gate uint32_t ports; 25167c478bd9Sstevel@tonic-gate uint16_t *pports; 25177c478bd9Sstevel@tonic-gate connf_t *connfp; 25187c478bd9Sstevel@tonic-gate conn_t *tconnp; 25197c478bd9Sstevel@tonic-gate 25207c478bd9Sstevel@tonic-gate pports = (uint16_t *)&ports; 25217c478bd9Sstevel@tonic-gate pports[0] = tcpha->tha_fport; 25227c478bd9Sstevel@tonic-gate pports[1] = tcpha->tha_lport; 25237c478bd9Sstevel@tonic-gate 2524f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2525121e5416Skcpoon ports, ipst)]; 25267c478bd9Sstevel@tonic-gate 25277c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 25287c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 25297c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 25307c478bd9Sstevel@tonic-gate 25317c478bd9Sstevel@tonic-gate tcp = tconnp->conn_tcp; 25327c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 25337c478bd9Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 25347c478bd9Sstevel@tonic-gate tcp->tcp_state >= min_state && 25357c478bd9Sstevel@tonic-gate (tcp->tcp_bound_if == 0 || 25367c478bd9Sstevel@tonic-gate tcp->tcp_bound_if == ifindex)) { 25377c478bd9Sstevel@tonic-gate 25387c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 25397c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25407c478bd9Sstevel@tonic-gate return (tconnp); 25417c478bd9Sstevel@tonic-gate } 25427c478bd9Sstevel@tonic-gate } 25437c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 25447c478bd9Sstevel@tonic-gate return (NULL); 25457c478bd9Sstevel@tonic-gate } 25467c478bd9Sstevel@tonic-gate 25477c478bd9Sstevel@tonic-gate /* 254845916cd2Sjpk * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 254945916cd2Sjpk * a listener when changing state. 25507c478bd9Sstevel@tonic-gate */ 25517c478bd9Sstevel@tonic-gate conn_t * 2552f4b3ec61Sdh ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 2553f4b3ec61Sdh ip_stack_t *ipst) 25547c478bd9Sstevel@tonic-gate { 25557c478bd9Sstevel@tonic-gate connf_t *bind_connfp; 25567c478bd9Sstevel@tonic-gate conn_t *connp; 25577c478bd9Sstevel@tonic-gate tcp_t *tcp; 25587c478bd9Sstevel@tonic-gate 25597c478bd9Sstevel@tonic-gate /* 25607c478bd9Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 25617c478bd9Sstevel@tonic-gate * all zeros. 25627c478bd9Sstevel@tonic-gate */ 25637c478bd9Sstevel@tonic-gate if (laddr == 0) 25647c478bd9Sstevel@tonic-gate return (NULL); 25657c478bd9Sstevel@tonic-gate 256645916cd2Sjpk ASSERT(zoneid != ALL_ZONES); 256745916cd2Sjpk 2568f4b3ec61Sdh bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 25697c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 25707c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 25717c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 25727c478bd9Sstevel@tonic-gate tcp = connp->conn_tcp; 25737c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 25745d0bc3edSsommerfe IPCL_ZONE_MATCH(connp, zoneid) && 25757c478bd9Sstevel@tonic-gate (tcp->tcp_listener == NULL)) { 25767c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 25777c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 25787c478bd9Sstevel@tonic-gate return (connp); 25797c478bd9Sstevel@tonic-gate } 25807c478bd9Sstevel@tonic-gate } 25817c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 25827c478bd9Sstevel@tonic-gate return (NULL); 25837c478bd9Sstevel@tonic-gate } 25847c478bd9Sstevel@tonic-gate 258545916cd2Sjpk /* 258645916cd2Sjpk * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 258745916cd2Sjpk * a listener when changing state. 258845916cd2Sjpk */ 25897c478bd9Sstevel@tonic-gate conn_t * 25907c478bd9Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 2591f4b3ec61Sdh zoneid_t zoneid, ip_stack_t *ipst) 25927c478bd9Sstevel@tonic-gate { 25937c478bd9Sstevel@tonic-gate connf_t *bind_connfp; 25947c478bd9Sstevel@tonic-gate conn_t *connp = NULL; 25957c478bd9Sstevel@tonic-gate tcp_t *tcp; 25967c478bd9Sstevel@tonic-gate 25977c478bd9Sstevel@tonic-gate /* 25987c478bd9Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 25997c478bd9Sstevel@tonic-gate * all zeros. 26007c478bd9Sstevel@tonic-gate */ 26017c478bd9Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 26027c478bd9Sstevel@tonic-gate return (NULL); 26037c478bd9Sstevel@tonic-gate 260445916cd2Sjpk ASSERT(zoneid != ALL_ZONES); 26057c478bd9Sstevel@tonic-gate 2606f4b3ec61Sdh bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 26077c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 26087c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 26097c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 26107c478bd9Sstevel@tonic-gate tcp = connp->conn_tcp; 26117c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 26125d0bc3edSsommerfe IPCL_ZONE_MATCH(connp, zoneid) && 26137c478bd9Sstevel@tonic-gate (tcp->tcp_bound_if == 0 || 26147c478bd9Sstevel@tonic-gate tcp->tcp_bound_if == ifindex) && 26157c478bd9Sstevel@tonic-gate tcp->tcp_listener == NULL) { 26167c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 26177c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26187c478bd9Sstevel@tonic-gate return (connp); 26197c478bd9Sstevel@tonic-gate } 26207c478bd9Sstevel@tonic-gate } 26217c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 26227c478bd9Sstevel@tonic-gate return (NULL); 26237c478bd9Sstevel@tonic-gate } 26247c478bd9Sstevel@tonic-gate 2625ff550d0eSmasputra /* 2626ff550d0eSmasputra * ipcl_get_next_conn 2627ff550d0eSmasputra * get the next entry in the conn global list 2628ff550d0eSmasputra * and put a reference on the next_conn. 2629ff550d0eSmasputra * decrement the reference on the current conn. 2630ff550d0eSmasputra * 2631ff550d0eSmasputra * This is an iterator based walker function that also provides for 2632ff550d0eSmasputra * some selection by the caller. It walks through the conn_hash bucket 2633ff550d0eSmasputra * searching for the next valid connp in the list, and selects connections 2634ff550d0eSmasputra * that are neither closed nor condemned. It also REFHOLDS the conn 2635ff550d0eSmasputra * thus ensuring that the conn exists when the caller uses the conn. 2636ff550d0eSmasputra */ 2637ff550d0eSmasputra conn_t * 2638ff550d0eSmasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2639ff550d0eSmasputra { 2640ff550d0eSmasputra conn_t *next_connp; 2641ff550d0eSmasputra 2642ff550d0eSmasputra if (connfp == NULL) 2643ff550d0eSmasputra return (NULL); 2644ff550d0eSmasputra 2645ff550d0eSmasputra mutex_enter(&connfp->connf_lock); 2646ff550d0eSmasputra 2647ff550d0eSmasputra next_connp = (connp == NULL) ? 2648ff550d0eSmasputra connfp->connf_head : connp->conn_g_next; 2649ff550d0eSmasputra 2650ff550d0eSmasputra while (next_connp != NULL) { 2651ff550d0eSmasputra mutex_enter(&next_connp->conn_lock); 2652ff550d0eSmasputra if (!(next_connp->conn_flags & conn_flags) || 2653ff550d0eSmasputra (next_connp->conn_state_flags & 2654ff550d0eSmasputra (CONN_CONDEMNED | CONN_INCIPIENT))) { 2655ff550d0eSmasputra /* 2656ff550d0eSmasputra * This conn has been condemned or 2657ff550d0eSmasputra * is closing, or the flags don't match 2658ff550d0eSmasputra */ 2659ff550d0eSmasputra mutex_exit(&next_connp->conn_lock); 2660ff550d0eSmasputra next_connp = next_connp->conn_g_next; 2661ff550d0eSmasputra continue; 2662ff550d0eSmasputra } 2663ff550d0eSmasputra CONN_INC_REF_LOCKED(next_connp); 2664ff550d0eSmasputra mutex_exit(&next_connp->conn_lock); 2665ff550d0eSmasputra break; 2666ff550d0eSmasputra } 2667ff550d0eSmasputra 2668ff550d0eSmasputra mutex_exit(&connfp->connf_lock); 2669ff550d0eSmasputra 2670ff550d0eSmasputra if (connp != NULL) 2671ff550d0eSmasputra CONN_DEC_REF(connp); 2672ff550d0eSmasputra 2673ff550d0eSmasputra return (next_connp); 2674ff550d0eSmasputra } 2675ff550d0eSmasputra 26767c478bd9Sstevel@tonic-gate #ifdef CONN_DEBUG 26777c478bd9Sstevel@tonic-gate /* 26787c478bd9Sstevel@tonic-gate * Trace of the last NBUF refhold/refrele 26797c478bd9Sstevel@tonic-gate */ 26807c478bd9Sstevel@tonic-gate int 26817c478bd9Sstevel@tonic-gate conn_trace_ref(conn_t *connp) 26827c478bd9Sstevel@tonic-gate { 26837c478bd9Sstevel@tonic-gate int last; 26847c478bd9Sstevel@tonic-gate conn_trace_t *ctb; 26857c478bd9Sstevel@tonic-gate 26867c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 26877c478bd9Sstevel@tonic-gate last = connp->conn_trace_last; 26887c478bd9Sstevel@tonic-gate last++; 26897c478bd9Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 26907c478bd9Sstevel@tonic-gate last = 0; 26917c478bd9Sstevel@tonic-gate 26927c478bd9Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 26936a8288c7Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 26947c478bd9Sstevel@tonic-gate connp->conn_trace_last = last; 26957c478bd9Sstevel@tonic-gate return (1); 26967c478bd9Sstevel@tonic-gate } 26977c478bd9Sstevel@tonic-gate 26987c478bd9Sstevel@tonic-gate int 26997c478bd9Sstevel@tonic-gate conn_untrace_ref(conn_t *connp) 27007c478bd9Sstevel@tonic-gate { 27017c478bd9Sstevel@tonic-gate int last; 27027c478bd9Sstevel@tonic-gate conn_trace_t *ctb; 27037c478bd9Sstevel@tonic-gate 27047c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 27057c478bd9Sstevel@tonic-gate last = connp->conn_trace_last; 27067c478bd9Sstevel@tonic-gate last++; 27077c478bd9Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 27087c478bd9Sstevel@tonic-gate last = 0; 27097c478bd9Sstevel@tonic-gate 27107c478bd9Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 27116a8288c7Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH); 27127c478bd9Sstevel@tonic-gate connp->conn_trace_last = last; 27137c478bd9Sstevel@tonic-gate return (1); 27147c478bd9Sstevel@tonic-gate } 27157c478bd9Sstevel@tonic-gate #endif 2716