17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5ee4701baSericheng * Common Development and Distribution License (the "License"). 6ee4701baSericheng * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 2274e20cfeSnh * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 277c478bd9Sstevel@tonic-gate 2845916cd2Sjpk const char ipclassifier_version[] = "@(#)ipclassifier.c %I% %E% SMI"; 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate /* 317c478bd9Sstevel@tonic-gate * IP PACKET CLASSIFIER 327c478bd9Sstevel@tonic-gate * 337c478bd9Sstevel@tonic-gate * The IP packet classifier provides mapping between IP packets and persistent 347c478bd9Sstevel@tonic-gate * connection state for connection-oriented protocols. It also provides 357c478bd9Sstevel@tonic-gate * interface for managing connection states. 367c478bd9Sstevel@tonic-gate * 377c478bd9Sstevel@tonic-gate * The connection state is kept in conn_t data structure and contains, among 387c478bd9Sstevel@tonic-gate * other things: 397c478bd9Sstevel@tonic-gate * 407c478bd9Sstevel@tonic-gate * o local/remote address and ports 417c478bd9Sstevel@tonic-gate * o Transport protocol 427c478bd9Sstevel@tonic-gate * o squeue for the connection (for TCP only) 437c478bd9Sstevel@tonic-gate * o reference counter 447c478bd9Sstevel@tonic-gate * o Connection state 457c478bd9Sstevel@tonic-gate * o hash table linkage 467c478bd9Sstevel@tonic-gate * o interface/ire information 477c478bd9Sstevel@tonic-gate * o credentials 487c478bd9Sstevel@tonic-gate * o ipsec policy 497c478bd9Sstevel@tonic-gate * o send and receive functions. 507c478bd9Sstevel@tonic-gate * o mutex lock. 517c478bd9Sstevel@tonic-gate * 527c478bd9Sstevel@tonic-gate * Connections use a reference counting scheme. They are freed when the 537c478bd9Sstevel@tonic-gate * reference counter drops to zero. A reference is incremented when connection 547c478bd9Sstevel@tonic-gate * is placed in a list or table, when incoming packet for the connection arrives 557c478bd9Sstevel@tonic-gate * and when connection is processed via squeue (squeue processing may be 567c478bd9Sstevel@tonic-gate * asynchronous and the reference protects the connection from being destroyed 577c478bd9Sstevel@tonic-gate * before its processing is finished). 587c478bd9Sstevel@tonic-gate * 597c478bd9Sstevel@tonic-gate * send and receive functions are currently used for TCP only. The send function 607c478bd9Sstevel@tonic-gate * determines the IP entry point for the packet once it leaves TCP to be sent to 617c478bd9Sstevel@tonic-gate * the destination address. The receive function is used by IP when the packet 627c478bd9Sstevel@tonic-gate * should be passed for TCP processing. When a new connection is created these 637c478bd9Sstevel@tonic-gate * are set to ip_output() and tcp_input() respectively. During the lifetime of 647c478bd9Sstevel@tonic-gate * the connection the send and receive functions may change depending on the 657c478bd9Sstevel@tonic-gate * changes in the connection state. For example, Once the connection is bound to 667c478bd9Sstevel@tonic-gate * an addresse, the receive function for this connection is set to 677c478bd9Sstevel@tonic-gate * tcp_conn_request(). This allows incoming SYNs to go directly into the 687c478bd9Sstevel@tonic-gate * listener SYN processing function without going to tcp_input() first. 697c478bd9Sstevel@tonic-gate * 707c478bd9Sstevel@tonic-gate * Classifier uses several hash tables: 717c478bd9Sstevel@tonic-gate * 727c478bd9Sstevel@tonic-gate * ipcl_conn_fanout: contains all TCP connections in CONNECTED state 737c478bd9Sstevel@tonic-gate * ipcl_bind_fanout: contains all connections in BOUND state 747c478bd9Sstevel@tonic-gate * ipcl_proto_fanout: IPv4 protocol fanout 757c478bd9Sstevel@tonic-gate * ipcl_proto_fanout_v6: IPv6 protocol fanout 767c478bd9Sstevel@tonic-gate * ipcl_udp_fanout: contains all UDP connections 777c478bd9Sstevel@tonic-gate * ipcl_globalhash_fanout: contains all connections 787c478bd9Sstevel@tonic-gate * 797c478bd9Sstevel@tonic-gate * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering) 807c478bd9Sstevel@tonic-gate * which need to view all existing connections. 817c478bd9Sstevel@tonic-gate * 827c478bd9Sstevel@tonic-gate * All tables are protected by per-bucket locks. When both per-bucket lock and 837c478bd9Sstevel@tonic-gate * connection lock need to be held, the per-bucket lock should be acquired 847c478bd9Sstevel@tonic-gate * first, followed by the connection lock. 857c478bd9Sstevel@tonic-gate * 867c478bd9Sstevel@tonic-gate * All functions doing search in one of these tables increment a reference 877c478bd9Sstevel@tonic-gate * counter on the connection found (if any). This reference should be dropped 887c478bd9Sstevel@tonic-gate * when the caller has finished processing the connection. 897c478bd9Sstevel@tonic-gate * 907c478bd9Sstevel@tonic-gate * 917c478bd9Sstevel@tonic-gate * INTERFACES: 927c478bd9Sstevel@tonic-gate * =========== 937c478bd9Sstevel@tonic-gate * 947c478bd9Sstevel@tonic-gate * Connection Lookup: 957c478bd9Sstevel@tonic-gate * ------------------ 967c478bd9Sstevel@tonic-gate * 97*f4b3ec61Sdh * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack) 98*f4b3ec61Sdh * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack) 997c478bd9Sstevel@tonic-gate * 1007c478bd9Sstevel@tonic-gate * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if 1017c478bd9Sstevel@tonic-gate * it can't find any associated connection. If the connection is found, its 1027c478bd9Sstevel@tonic-gate * reference counter is incremented. 1037c478bd9Sstevel@tonic-gate * 1047c478bd9Sstevel@tonic-gate * mp: mblock, containing packet header. The full header should fit 1057c478bd9Sstevel@tonic-gate * into a single mblock. It should also contain at least full IP 1067c478bd9Sstevel@tonic-gate * and TCP or UDP header. 1077c478bd9Sstevel@tonic-gate * 1087c478bd9Sstevel@tonic-gate * protocol: Either IPPROTO_TCP or IPPROTO_UDP. 1097c478bd9Sstevel@tonic-gate * 1107c478bd9Sstevel@tonic-gate * hdr_len: The size of IP header. It is used to find TCP or UDP header in 1117c478bd9Sstevel@tonic-gate * the packet. 1127c478bd9Sstevel@tonic-gate * 11345916cd2Sjpk * zoneid: The zone in which the returned connection must be; the zoneid 11445916cd2Sjpk * corresponding to the ire_zoneid on the IRE located for the 11545916cd2Sjpk * packet's destination address. 1167c478bd9Sstevel@tonic-gate * 1177c478bd9Sstevel@tonic-gate * For TCP connections, the lookup order is as follows: 1187c478bd9Sstevel@tonic-gate * 5-tuple {src, dst, protocol, local port, remote port} 1197c478bd9Sstevel@tonic-gate * lookup in ipcl_conn_fanout table. 1207c478bd9Sstevel@tonic-gate * 3-tuple {dst, remote port, protocol} lookup in 1217c478bd9Sstevel@tonic-gate * ipcl_bind_fanout table. 1227c478bd9Sstevel@tonic-gate * 1237c478bd9Sstevel@tonic-gate * For UDP connections, a 5-tuple {src, dst, protocol, local port, 1247c478bd9Sstevel@tonic-gate * remote port} lookup is done on ipcl_udp_fanout. Note that, 1257c478bd9Sstevel@tonic-gate * these interfaces do not handle cases where a packets belongs 1267c478bd9Sstevel@tonic-gate * to multiple UDP clients, which is handled in IP itself. 1277c478bd9Sstevel@tonic-gate * 12845916cd2Sjpk * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must 12945916cd2Sjpk * determine which actual zone gets the segment. This is used only in a 13045916cd2Sjpk * labeled environment. The matching rules are: 13145916cd2Sjpk * 13245916cd2Sjpk * - If it's not a multilevel port, then the label on the packet selects 13345916cd2Sjpk * the zone. Unlabeled packets are delivered to the global zone. 13445916cd2Sjpk * 13545916cd2Sjpk * - If it's a multilevel port, then only the zone registered to receive 13645916cd2Sjpk * packets on that port matches. 13745916cd2Sjpk * 13845916cd2Sjpk * Also, in a labeled environment, packet labels need to be checked. For fully 13945916cd2Sjpk * bound TCP connections, we can assume that the packet label was checked 14045916cd2Sjpk * during connection establishment, and doesn't need to be checked on each 14145916cd2Sjpk * packet. For others, though, we need to check for strict equality or, for 14245916cd2Sjpk * multilevel ports, membership in the range or set. This part currently does 14345916cd2Sjpk * a tnrh lookup on each packet, but could be optimized to use cached results 14445916cd2Sjpk * if that were necessary. (SCTP doesn't come through here, but if it did, 14545916cd2Sjpk * we would apply the same rules as TCP.) 14645916cd2Sjpk * 14745916cd2Sjpk * An implication of the above is that fully-bound TCP sockets must always use 14845916cd2Sjpk * distinct 4-tuples; they can't be discriminated by label alone. 14945916cd2Sjpk * 15045916cd2Sjpk * Note that we cannot trust labels on packets sent to fully-bound UDP sockets, 15145916cd2Sjpk * as there's no connection set-up handshake and no shared state. 15245916cd2Sjpk * 15345916cd2Sjpk * Labels on looped-back packets within a single zone do not need to be 15445916cd2Sjpk * checked, as all processes in the same zone have the same label. 15545916cd2Sjpk * 15645916cd2Sjpk * Finally, for unlabeled packets received by a labeled system, special rules 15745916cd2Sjpk * apply. We consider only the MLP if there is one. Otherwise, we prefer a 15845916cd2Sjpk * socket in the zone whose label matches the default label of the sender, if 15945916cd2Sjpk * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the 16045916cd2Sjpk * receiver's label must dominate the sender's default label. 16145916cd2Sjpk * 162*f4b3ec61Sdh * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack); 163*f4b3ec61Sdh * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t, 164*f4b3ec61Sdh * ip_stack); 1657c478bd9Sstevel@tonic-gate * 1667c478bd9Sstevel@tonic-gate * Lookup routine to find a exact match for {src, dst, local port, 1677c478bd9Sstevel@tonic-gate * remote port) for TCP connections in ipcl_conn_fanout. The address and 1687c478bd9Sstevel@tonic-gate * ports are read from the IP and TCP header respectively. 1697c478bd9Sstevel@tonic-gate * 170*f4b3ec61Sdh * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol, 171*f4b3ec61Sdh * zoneid, ip_stack); 172*f4b3ec61Sdh * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex, 173*f4b3ec61Sdh * zoneid, ip_stack); 1747c478bd9Sstevel@tonic-gate * 1757c478bd9Sstevel@tonic-gate * Lookup routine to find a listener with the tuple {lport, laddr, 1767c478bd9Sstevel@tonic-gate * protocol} in the ipcl_bind_fanout table. For IPv6, an additional 1777c478bd9Sstevel@tonic-gate * parameter interface index is also compared. 1787c478bd9Sstevel@tonic-gate * 179*f4b3ec61Sdh * void ipcl_walk(func, arg, ip_stack) 1807c478bd9Sstevel@tonic-gate * 1817c478bd9Sstevel@tonic-gate * Apply 'func' to every connection available. The 'func' is called as 1827c478bd9Sstevel@tonic-gate * (*func)(connp, arg). The walk is non-atomic so connections may be 1837c478bd9Sstevel@tonic-gate * created and destroyed during the walk. The CONN_CONDEMNED and 1847c478bd9Sstevel@tonic-gate * CONN_INCIPIENT flags ensure that connections which are newly created 1857c478bd9Sstevel@tonic-gate * or being destroyed are not selected by the walker. 1867c478bd9Sstevel@tonic-gate * 1877c478bd9Sstevel@tonic-gate * Table Updates 1887c478bd9Sstevel@tonic-gate * ------------- 1897c478bd9Sstevel@tonic-gate * 1907c478bd9Sstevel@tonic-gate * int ipcl_conn_insert(connp, protocol, src, dst, ports) 1917c478bd9Sstevel@tonic-gate * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex) 1927c478bd9Sstevel@tonic-gate * 1937c478bd9Sstevel@tonic-gate * Insert 'connp' in the ipcl_conn_fanout. 1947c478bd9Sstevel@tonic-gate * Arguements : 1957c478bd9Sstevel@tonic-gate * connp conn_t to be inserted 1967c478bd9Sstevel@tonic-gate * protocol connection protocol 1977c478bd9Sstevel@tonic-gate * src source address 1987c478bd9Sstevel@tonic-gate * dst destination address 1997c478bd9Sstevel@tonic-gate * ports local and remote port 2007c478bd9Sstevel@tonic-gate * ifindex interface index for IPv6 connections 2017c478bd9Sstevel@tonic-gate * 2027c478bd9Sstevel@tonic-gate * Return value : 2037c478bd9Sstevel@tonic-gate * 0 if connp was inserted 2047c478bd9Sstevel@tonic-gate * EADDRINUSE if the connection with the same tuple 2057c478bd9Sstevel@tonic-gate * already exists. 2067c478bd9Sstevel@tonic-gate * 2077c478bd9Sstevel@tonic-gate * int ipcl_bind_insert(connp, protocol, src, lport); 2087c478bd9Sstevel@tonic-gate * int ipcl_bind_insert_v6(connp, protocol, src, lport); 2097c478bd9Sstevel@tonic-gate * 2107c478bd9Sstevel@tonic-gate * Insert 'connp' in ipcl_bind_fanout. 2117c478bd9Sstevel@tonic-gate * Arguements : 2127c478bd9Sstevel@tonic-gate * connp conn_t to be inserted 2137c478bd9Sstevel@tonic-gate * protocol connection protocol 2147c478bd9Sstevel@tonic-gate * src source address connection wants 2157c478bd9Sstevel@tonic-gate * to bind to 2167c478bd9Sstevel@tonic-gate * lport local port connection wants to 2177c478bd9Sstevel@tonic-gate * bind to 2187c478bd9Sstevel@tonic-gate * 2197c478bd9Sstevel@tonic-gate * 2207c478bd9Sstevel@tonic-gate * void ipcl_hash_remove(connp); 2217c478bd9Sstevel@tonic-gate * 2227c478bd9Sstevel@tonic-gate * Removes the 'connp' from the connection fanout table. 2237c478bd9Sstevel@tonic-gate * 2247c478bd9Sstevel@tonic-gate * Connection Creation/Destruction 2257c478bd9Sstevel@tonic-gate * ------------------------------- 2267c478bd9Sstevel@tonic-gate * 227*f4b3ec61Sdh * conn_t *ipcl_conn_create(type, sleep, netstack_t *) 2287c478bd9Sstevel@tonic-gate * 2297c478bd9Sstevel@tonic-gate * Creates a new conn based on the type flag, inserts it into 2307c478bd9Sstevel@tonic-gate * globalhash table. 2317c478bd9Sstevel@tonic-gate * 2327c478bd9Sstevel@tonic-gate * type: This flag determines the type of conn_t which needs to be 2337c478bd9Sstevel@tonic-gate * created. 2347c478bd9Sstevel@tonic-gate * IPCL_TCPCONN indicates a TCP connection 2357c478bd9Sstevel@tonic-gate * IPCL_IPCONN indicates all non-TCP connections. 2367c478bd9Sstevel@tonic-gate * 2377c478bd9Sstevel@tonic-gate * void ipcl_conn_destroy(connp) 2387c478bd9Sstevel@tonic-gate * 2397c478bd9Sstevel@tonic-gate * Destroys the connection state, removes it from the global 2407c478bd9Sstevel@tonic-gate * connection hash table and frees its memory. 2417c478bd9Sstevel@tonic-gate */ 2427c478bd9Sstevel@tonic-gate 2437c478bd9Sstevel@tonic-gate #include <sys/types.h> 2447c478bd9Sstevel@tonic-gate #include <sys/stream.h> 2457c478bd9Sstevel@tonic-gate #include <sys/stropts.h> 2467c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 2477c478bd9Sstevel@tonic-gate #include <sys/strsubr.h> 2487c478bd9Sstevel@tonic-gate #include <sys/strsun.h> 2497c478bd9Sstevel@tonic-gate #define _SUN_TPI_VERSION 2 2507c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 2517c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 2527c478bd9Sstevel@tonic-gate #include <sys/debug.h> 2537c478bd9Sstevel@tonic-gate 2547c478bd9Sstevel@tonic-gate #include <sys/systm.h> 2557c478bd9Sstevel@tonic-gate #include <sys/param.h> 2567c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 2577c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h> 2587c478bd9Sstevel@tonic-gate #include <inet/common.h> 2597c478bd9Sstevel@tonic-gate #include <netinet/ip6.h> 2607c478bd9Sstevel@tonic-gate #include <netinet/icmp6.h> 2617c478bd9Sstevel@tonic-gate 2627c478bd9Sstevel@tonic-gate #include <inet/ip.h> 2637c478bd9Sstevel@tonic-gate #include <inet/ip6.h> 2647c478bd9Sstevel@tonic-gate #include <inet/tcp.h> 2657c478bd9Sstevel@tonic-gate #include <inet/ip_ndp.h> 266ff550d0eSmasputra #include <inet/udp_impl.h> 2677c478bd9Sstevel@tonic-gate #include <inet/sctp_ip.h> 268*f4b3ec61Sdh #include <inet/sctp/sctp_impl.h> 2697c478bd9Sstevel@tonic-gate 2707c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 2717c478bd9Sstevel@tonic-gate 2727c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h> 2737c478bd9Sstevel@tonic-gate #include <inet/ipsec_impl.h> 2747c478bd9Sstevel@tonic-gate 27545916cd2Sjpk #include <sys/tsol/tnet.h> 27645916cd2Sjpk 2777c478bd9Sstevel@tonic-gate #ifdef DEBUG 2787c478bd9Sstevel@tonic-gate #define IPCL_DEBUG 2797c478bd9Sstevel@tonic-gate #else 2807c478bd9Sstevel@tonic-gate #undef IPCL_DEBUG 2817c478bd9Sstevel@tonic-gate #endif 2827c478bd9Sstevel@tonic-gate 2837c478bd9Sstevel@tonic-gate #ifdef IPCL_DEBUG 2847c478bd9Sstevel@tonic-gate int ipcl_debug_level = 0; 2857c478bd9Sstevel@tonic-gate #define IPCL_DEBUG_LVL(level, args) \ 2867c478bd9Sstevel@tonic-gate if (ipcl_debug_level & level) { printf args; } 2877c478bd9Sstevel@tonic-gate #else 2887c478bd9Sstevel@tonic-gate #define IPCL_DEBUG_LVL(level, args) {; } 2897c478bd9Sstevel@tonic-gate #endif 290*f4b3ec61Sdh /* Old value for compatibility. Setable in /etc/system */ 2917c478bd9Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0; 2927c478bd9Sstevel@tonic-gate 293*f4b3ec61Sdh /* New value. Zero means choose automatically. Setable in /etc/system */ 2947c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0; 2957c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192; 2967c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500; 2977c478bd9Sstevel@tonic-gate 2987c478bd9Sstevel@tonic-gate /* bind/udp fanout table size */ 2997c478bd9Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512; 300ee4701baSericheng uint_t ipcl_udp_fanout_size = 16384; 3017c478bd9Sstevel@tonic-gate 3027c478bd9Sstevel@tonic-gate /* Raw socket fanout size. Must be a power of 2. */ 3037c478bd9Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256; 3047c478bd9Sstevel@tonic-gate 3057c478bd9Sstevel@tonic-gate /* 3067c478bd9Sstevel@tonic-gate * Power of 2^N Primes useful for hashing for N of 0-28, 3077c478bd9Sstevel@tonic-gate * these primes are the nearest prime <= 2^N - 2^(N-2). 3087c478bd9Sstevel@tonic-gate */ 3097c478bd9Sstevel@tonic-gate 3107c478bd9Sstevel@tonic-gate #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \ 3117c478bd9Sstevel@tonic-gate 6143, 12281, 24571, 49139, 98299, 196597, 393209, \ 3127c478bd9Sstevel@tonic-gate 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \ 3137c478bd9Sstevel@tonic-gate 50331599, 100663291, 201326557, 0} 3147c478bd9Sstevel@tonic-gate 3157c478bd9Sstevel@tonic-gate /* 3167c478bd9Sstevel@tonic-gate * wrapper structure to ensure that conn+tcpb are aligned 3177c478bd9Sstevel@tonic-gate * on cache lines. 3187c478bd9Sstevel@tonic-gate */ 3197c478bd9Sstevel@tonic-gate typedef struct itc_s { 3207c478bd9Sstevel@tonic-gate union { 3217c478bd9Sstevel@tonic-gate conn_t itcu_conn; 3227c478bd9Sstevel@tonic-gate char itcu_filler[CACHE_ALIGN(conn_s)]; 3237c478bd9Sstevel@tonic-gate } itc_u; 3247c478bd9Sstevel@tonic-gate tcp_t itc_tcp; 3257c478bd9Sstevel@tonic-gate } itc_t; 3267c478bd9Sstevel@tonic-gate 3277c478bd9Sstevel@tonic-gate #define itc_conn itc_u.itcu_conn 3287c478bd9Sstevel@tonic-gate 3297c478bd9Sstevel@tonic-gate struct kmem_cache *ipcl_tcpconn_cache; 3307c478bd9Sstevel@tonic-gate struct kmem_cache *ipcl_conn_cache; 3317c478bd9Sstevel@tonic-gate extern struct kmem_cache *sctp_conn_cache; 3327c478bd9Sstevel@tonic-gate extern struct kmem_cache *tcp_sack_info_cache; 3337c478bd9Sstevel@tonic-gate extern struct kmem_cache *tcp_iphc_cache; 3347c478bd9Sstevel@tonic-gate 3357c478bd9Sstevel@tonic-gate extern void tcp_timermp_free(tcp_t *); 3367c478bd9Sstevel@tonic-gate extern mblk_t *tcp_timermp_alloc(int); 3377c478bd9Sstevel@tonic-gate 3387c478bd9Sstevel@tonic-gate static int ipcl_tcpconn_constructor(void *, void *, int); 3397c478bd9Sstevel@tonic-gate static void ipcl_tcpconn_destructor(void *, void *); 3407c478bd9Sstevel@tonic-gate 3417c478bd9Sstevel@tonic-gate #ifdef IPCL_DEBUG 3427c478bd9Sstevel@tonic-gate #define INET_NTOA_BUFSIZE 18 3437c478bd9Sstevel@tonic-gate 3447c478bd9Sstevel@tonic-gate static char * 3457c478bd9Sstevel@tonic-gate inet_ntoa_r(uint32_t in, char *b) 3467c478bd9Sstevel@tonic-gate { 3477c478bd9Sstevel@tonic-gate unsigned char *p; 3487c478bd9Sstevel@tonic-gate 3497c478bd9Sstevel@tonic-gate p = (unsigned char *)∈ 3507c478bd9Sstevel@tonic-gate (void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]); 3517c478bd9Sstevel@tonic-gate return (b); 3527c478bd9Sstevel@tonic-gate } 3537c478bd9Sstevel@tonic-gate #endif 3547c478bd9Sstevel@tonic-gate 3557c478bd9Sstevel@tonic-gate /* 356*f4b3ec61Sdh * Global (for all stack instances) init routine 3577c478bd9Sstevel@tonic-gate */ 3587c478bd9Sstevel@tonic-gate void 359*f4b3ec61Sdh ipcl_g_init(void) 3607c478bd9Sstevel@tonic-gate { 3617c478bd9Sstevel@tonic-gate ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache", 3627c478bd9Sstevel@tonic-gate sizeof (conn_t), CACHE_ALIGN_SIZE, 363ff550d0eSmasputra NULL, NULL, NULL, NULL, NULL, 0); 3647c478bd9Sstevel@tonic-gate 3657c478bd9Sstevel@tonic-gate ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache", 3667c478bd9Sstevel@tonic-gate sizeof (itc_t), CACHE_ALIGN_SIZE, 3677c478bd9Sstevel@tonic-gate ipcl_tcpconn_constructor, ipcl_tcpconn_destructor, 3687c478bd9Sstevel@tonic-gate NULL, NULL, NULL, 0); 369*f4b3ec61Sdh } 370*f4b3ec61Sdh 371*f4b3ec61Sdh /* 372*f4b3ec61Sdh * ipclassifier intialization routine, sets up hash tables. 373*f4b3ec61Sdh */ 374*f4b3ec61Sdh void 375*f4b3ec61Sdh ipcl_init(ip_stack_t *ipst) 376*f4b3ec61Sdh { 377*f4b3ec61Sdh int i; 378*f4b3ec61Sdh int sizes[] = P2Ps(); 3797c478bd9Sstevel@tonic-gate 3807c478bd9Sstevel@tonic-gate /* 381*f4b3ec61Sdh * Calculate size of conn fanout table from /etc/system settings 3827c478bd9Sstevel@tonic-gate */ 3837c478bd9Sstevel@tonic-gate if (ipcl_conn_hash_size != 0) { 384*f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size; 3857c478bd9Sstevel@tonic-gate } else if (tcp_conn_hash_size != 0) { 386*f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size; 3877c478bd9Sstevel@tonic-gate } else { 3887c478bd9Sstevel@tonic-gate extern pgcnt_t freemem; 3897c478bd9Sstevel@tonic-gate 390*f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = 3917c478bd9Sstevel@tonic-gate (freemem * PAGESIZE) / ipcl_conn_hash_memfactor; 3927c478bd9Sstevel@tonic-gate 393*f4b3ec61Sdh if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) { 394*f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = 395*f4b3ec61Sdh ipcl_conn_hash_maxsize; 396*f4b3ec61Sdh } 3977c478bd9Sstevel@tonic-gate } 3987c478bd9Sstevel@tonic-gate 3997c478bd9Sstevel@tonic-gate for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) { 400*f4b3ec61Sdh if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) { 4017c478bd9Sstevel@tonic-gate break; 4027c478bd9Sstevel@tonic-gate } 4037c478bd9Sstevel@tonic-gate } 404*f4b3ec61Sdh if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) { 4057c478bd9Sstevel@tonic-gate /* Out of range, use the 2^16 value */ 406*f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = sizes[16]; 4077c478bd9Sstevel@tonic-gate } 4087c478bd9Sstevel@tonic-gate 409*f4b3ec61Sdh /* Take values from /etc/system */ 410*f4b3ec61Sdh ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size; 411*f4b3ec61Sdh ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size; 412*f4b3ec61Sdh ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size; 413*f4b3ec61Sdh 414*f4b3ec61Sdh ASSERT(ipst->ips_ipcl_conn_fanout == NULL); 415*f4b3ec61Sdh 416*f4b3ec61Sdh ipst->ips_ipcl_conn_fanout = kmem_zalloc( 417*f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP); 418*f4b3ec61Sdh 419*f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 420*f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL, 4217c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4227c478bd9Sstevel@tonic-gate } 4237c478bd9Sstevel@tonic-gate 424*f4b3ec61Sdh ipst->ips_ipcl_bind_fanout = kmem_zalloc( 425*f4b3ec61Sdh ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP); 4267c478bd9Sstevel@tonic-gate 427*f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 428*f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL, 4297c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4307c478bd9Sstevel@tonic-gate } 4317c478bd9Sstevel@tonic-gate 432*f4b3ec61Sdh ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX * 433*f4b3ec61Sdh sizeof (connf_t), KM_SLEEP); 434*f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 435*f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL, 4367c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4377c478bd9Sstevel@tonic-gate } 438*f4b3ec61Sdh 439*f4b3ec61Sdh ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX * 440*f4b3ec61Sdh sizeof (connf_t), KM_SLEEP); 441*f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 442*f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL, 4437c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4447c478bd9Sstevel@tonic-gate } 4457c478bd9Sstevel@tonic-gate 446*f4b3ec61Sdh ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP); 447*f4b3ec61Sdh mutex_init(&ipst->ips_rts_clients->connf_lock, 448*f4b3ec61Sdh NULL, MUTEX_DEFAULT, NULL); 4497c478bd9Sstevel@tonic-gate 450*f4b3ec61Sdh ipst->ips_ipcl_udp_fanout = kmem_zalloc( 451*f4b3ec61Sdh ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP); 452*f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 453*f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL, 4547c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4557c478bd9Sstevel@tonic-gate } 4567c478bd9Sstevel@tonic-gate 457*f4b3ec61Sdh ipst->ips_ipcl_raw_fanout = kmem_zalloc( 458*f4b3ec61Sdh ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP); 459*f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 460*f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL, 4617c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL); 4627c478bd9Sstevel@tonic-gate } 4637c478bd9Sstevel@tonic-gate 464*f4b3ec61Sdh ipst->ips_ipcl_globalhash_fanout = kmem_zalloc( 465*f4b3ec61Sdh sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP); 4667c478bd9Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 467*f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock, 468*f4b3ec61Sdh NULL, MUTEX_DEFAULT, NULL); 4697c478bd9Sstevel@tonic-gate } 4707c478bd9Sstevel@tonic-gate } 4717c478bd9Sstevel@tonic-gate 4727c478bd9Sstevel@tonic-gate void 473*f4b3ec61Sdh ipcl_g_destroy(void) 4747c478bd9Sstevel@tonic-gate { 4757c478bd9Sstevel@tonic-gate kmem_cache_destroy(ipcl_conn_cache); 4767c478bd9Sstevel@tonic-gate kmem_cache_destroy(ipcl_tcpconn_cache); 477*f4b3ec61Sdh } 478*f4b3ec61Sdh 479*f4b3ec61Sdh /* 480*f4b3ec61Sdh * All user-level and kernel use of the stack must be gone 481*f4b3ec61Sdh * by now. 482*f4b3ec61Sdh */ 483*f4b3ec61Sdh void 484*f4b3ec61Sdh ipcl_destroy(ip_stack_t *ipst) 485*f4b3ec61Sdh { 486*f4b3ec61Sdh int i; 487*f4b3ec61Sdh 488*f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) { 489*f4b3ec61Sdh ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL); 490*f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock); 491*f4b3ec61Sdh } 492*f4b3ec61Sdh kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size * 493*f4b3ec61Sdh sizeof (connf_t)); 494*f4b3ec61Sdh ipst->ips_ipcl_conn_fanout = NULL; 495*f4b3ec61Sdh 496*f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) { 497*f4b3ec61Sdh ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL); 498*f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock); 499*f4b3ec61Sdh } 500*f4b3ec61Sdh kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size * 501*f4b3ec61Sdh sizeof (connf_t)); 502*f4b3ec61Sdh ipst->ips_ipcl_bind_fanout = NULL; 503*f4b3ec61Sdh 504*f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 505*f4b3ec61Sdh ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL); 506*f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock); 507*f4b3ec61Sdh } 508*f4b3ec61Sdh kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t)); 509*f4b3ec61Sdh ipst->ips_ipcl_proto_fanout = NULL; 510*f4b3ec61Sdh 511*f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) { 512*f4b3ec61Sdh ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL); 513*f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock); 514*f4b3ec61Sdh } 515*f4b3ec61Sdh kmem_free(ipst->ips_ipcl_proto_fanout_v6, 516*f4b3ec61Sdh IPPROTO_MAX * sizeof (connf_t)); 517*f4b3ec61Sdh ipst->ips_ipcl_proto_fanout_v6 = NULL; 518*f4b3ec61Sdh 519*f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) { 520*f4b3ec61Sdh ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL); 521*f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock); 522*f4b3ec61Sdh } 523*f4b3ec61Sdh kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size * 524*f4b3ec61Sdh sizeof (connf_t)); 525*f4b3ec61Sdh ipst->ips_ipcl_udp_fanout = NULL; 526*f4b3ec61Sdh 527*f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) { 528*f4b3ec61Sdh ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL); 529*f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock); 530*f4b3ec61Sdh } 531*f4b3ec61Sdh kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size * 532*f4b3ec61Sdh sizeof (connf_t)); 533*f4b3ec61Sdh ipst->ips_ipcl_raw_fanout = NULL; 534*f4b3ec61Sdh 535*f4b3ec61Sdh for (i = 0; i < CONN_G_HASH_SIZE; i++) { 536*f4b3ec61Sdh ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL); 537*f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 538*f4b3ec61Sdh } 539*f4b3ec61Sdh kmem_free(ipst->ips_ipcl_globalhash_fanout, 540*f4b3ec61Sdh sizeof (connf_t) * CONN_G_HASH_SIZE); 541*f4b3ec61Sdh ipst->ips_ipcl_globalhash_fanout = NULL; 542*f4b3ec61Sdh 543*f4b3ec61Sdh ASSERT(ipst->ips_rts_clients->connf_head == NULL); 544*f4b3ec61Sdh mutex_destroy(&ipst->ips_rts_clients->connf_lock); 545*f4b3ec61Sdh kmem_free(ipst->ips_rts_clients, sizeof (connf_t)); 546*f4b3ec61Sdh ipst->ips_rts_clients = NULL; 5477c478bd9Sstevel@tonic-gate } 5487c478bd9Sstevel@tonic-gate 5497c478bd9Sstevel@tonic-gate /* 5507c478bd9Sstevel@tonic-gate * conn creation routine. initialize the conn, sets the reference 5517c478bd9Sstevel@tonic-gate * and inserts it in the global hash table. 5527c478bd9Sstevel@tonic-gate */ 5537c478bd9Sstevel@tonic-gate conn_t * 554*f4b3ec61Sdh ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns) 5557c478bd9Sstevel@tonic-gate { 5567c478bd9Sstevel@tonic-gate itc_t *itc; 5577c478bd9Sstevel@tonic-gate conn_t *connp; 558*f4b3ec61Sdh sctp_stack_t *sctps; 5597c478bd9Sstevel@tonic-gate 5607c478bd9Sstevel@tonic-gate switch (type) { 5617c478bd9Sstevel@tonic-gate case IPCL_TCPCONN: 5627c478bd9Sstevel@tonic-gate if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache, 5637c478bd9Sstevel@tonic-gate sleep)) == NULL) 5647c478bd9Sstevel@tonic-gate return (NULL); 5657c478bd9Sstevel@tonic-gate connp = &itc->itc_conn; 5667c478bd9Sstevel@tonic-gate connp->conn_ref = 1; 567*f4b3ec61Sdh netstack_hold(ns); 568*f4b3ec61Sdh connp->conn_netstack = ns; 5697c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(1, 5707c478bd9Sstevel@tonic-gate ("ipcl_conn_create: connp = %p tcp (%p)", 5717c478bd9Sstevel@tonic-gate (void *)connp, (void *)connp->conn_tcp)); 5727c478bd9Sstevel@tonic-gate ipcl_globalhash_insert(connp); 5737c478bd9Sstevel@tonic-gate break; 5747c478bd9Sstevel@tonic-gate case IPCL_SCTPCONN: 5757c478bd9Sstevel@tonic-gate if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL) 5767c478bd9Sstevel@tonic-gate return (NULL); 5777c478bd9Sstevel@tonic-gate connp->conn_flags = IPCL_SCTPCONN; 578*f4b3ec61Sdh sctps = ns->netstack_sctp; 579*f4b3ec61Sdh SCTP_G_Q_REFHOLD(sctps); 580*f4b3ec61Sdh netstack_hold(ns); 581*f4b3ec61Sdh connp->conn_netstack = ns; 5827c478bd9Sstevel@tonic-gate break; 5837c478bd9Sstevel@tonic-gate case IPCL_IPCCONN: 5847c478bd9Sstevel@tonic-gate connp = kmem_cache_alloc(ipcl_conn_cache, sleep); 5857c478bd9Sstevel@tonic-gate if (connp == NULL) 586ff550d0eSmasputra return (NULL); 5877c478bd9Sstevel@tonic-gate bzero(connp, sizeof (conn_t)); 588ff550d0eSmasputra mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL); 5897c478bd9Sstevel@tonic-gate cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL); 590ff550d0eSmasputra connp->conn_flags = IPCL_IPCCONN; 5917c478bd9Sstevel@tonic-gate connp->conn_ref = 1; 592*f4b3ec61Sdh netstack_hold(ns); 593*f4b3ec61Sdh connp->conn_netstack = ns; 5947c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(1, 5957c478bd9Sstevel@tonic-gate ("ipcl_conn_create: connp = %p\n", (void *)connp)); 5967c478bd9Sstevel@tonic-gate ipcl_globalhash_insert(connp); 5977c478bd9Sstevel@tonic-gate break; 598ff550d0eSmasputra default: 599ff550d0eSmasputra connp = NULL; 600ff550d0eSmasputra ASSERT(0); 6017c478bd9Sstevel@tonic-gate } 6027c478bd9Sstevel@tonic-gate 6037c478bd9Sstevel@tonic-gate return (connp); 6047c478bd9Sstevel@tonic-gate } 6057c478bd9Sstevel@tonic-gate 6067c478bd9Sstevel@tonic-gate void 6077c478bd9Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp) 6087c478bd9Sstevel@tonic-gate { 6097c478bd9Sstevel@tonic-gate mblk_t *mp; 610*f4b3ec61Sdh netstack_t *ns = connp->conn_netstack; 6117c478bd9Sstevel@tonic-gate 6127c478bd9Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&connp->conn_lock)); 6137c478bd9Sstevel@tonic-gate ASSERT(connp->conn_ref == 0); 6147c478bd9Sstevel@tonic-gate ASSERT(connp->conn_ire_cache == NULL); 6157c478bd9Sstevel@tonic-gate 61645916cd2Sjpk if (connp->conn_peercred != NULL && 61745916cd2Sjpk connp->conn_peercred != connp->conn_cred) 61845916cd2Sjpk crfree(connp->conn_peercred); 61945916cd2Sjpk connp->conn_peercred = NULL; 62045916cd2Sjpk 62145916cd2Sjpk if (connp->conn_cred != NULL) { 62245916cd2Sjpk crfree(connp->conn_cred); 62345916cd2Sjpk connp->conn_cred = NULL; 62445916cd2Sjpk } 62545916cd2Sjpk 6267c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(connp); 6277c478bd9Sstevel@tonic-gate 6287c478bd9Sstevel@tonic-gate cv_destroy(&connp->conn_cv); 6297c478bd9Sstevel@tonic-gate if (connp->conn_flags & IPCL_TCPCONN) { 630ff550d0eSmasputra tcp_t *tcp = connp->conn_tcp; 631*f4b3ec61Sdh tcp_stack_t *tcps; 632*f4b3ec61Sdh 633*f4b3ec61Sdh ASSERT(tcp != NULL); 634*f4b3ec61Sdh tcps = tcp->tcp_tcps; 635*f4b3ec61Sdh if (tcps != NULL) { 636*f4b3ec61Sdh if (connp->conn_latch != NULL) { 637*f4b3ec61Sdh IPLATCH_REFRELE(connp->conn_latch, ns); 638*f4b3ec61Sdh connp->conn_latch = NULL; 639*f4b3ec61Sdh } 640*f4b3ec61Sdh if (connp->conn_policy != NULL) { 641*f4b3ec61Sdh IPPH_REFRELE(connp->conn_policy, ns); 642*f4b3ec61Sdh connp->conn_policy = NULL; 643*f4b3ec61Sdh } 644*f4b3ec61Sdh tcp->tcp_tcps = NULL; 645*f4b3ec61Sdh TCPS_REFRELE(tcps); 646*f4b3ec61Sdh } 647ff550d0eSmasputra 6487c478bd9Sstevel@tonic-gate mutex_destroy(&connp->conn_lock); 6497c478bd9Sstevel@tonic-gate tcp_free(tcp); 6507c478bd9Sstevel@tonic-gate mp = tcp->tcp_timercache; 65145916cd2Sjpk tcp->tcp_cred = NULL; 6527c478bd9Sstevel@tonic-gate 6537c478bd9Sstevel@tonic-gate if (tcp->tcp_sack_info != NULL) { 6547c478bd9Sstevel@tonic-gate bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t)); 6557c478bd9Sstevel@tonic-gate kmem_cache_free(tcp_sack_info_cache, 6567c478bd9Sstevel@tonic-gate tcp->tcp_sack_info); 6577c478bd9Sstevel@tonic-gate } 6587c478bd9Sstevel@tonic-gate if (tcp->tcp_iphc != NULL) { 6597c478bd9Sstevel@tonic-gate if (tcp->tcp_hdr_grown) { 6607c478bd9Sstevel@tonic-gate kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len); 6617c478bd9Sstevel@tonic-gate } else { 6627c478bd9Sstevel@tonic-gate bzero(tcp->tcp_iphc, tcp->tcp_iphc_len); 6637c478bd9Sstevel@tonic-gate kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc); 6647c478bd9Sstevel@tonic-gate } 6657c478bd9Sstevel@tonic-gate tcp->tcp_iphc_len = 0; 6667c478bd9Sstevel@tonic-gate } 6677c478bd9Sstevel@tonic-gate ASSERT(tcp->tcp_iphc_len == 0); 6687c478bd9Sstevel@tonic-gate 669*f4b3ec61Sdh ASSERT(connp->conn_latch == NULL); 670*f4b3ec61Sdh ASSERT(connp->conn_policy == NULL); 671*f4b3ec61Sdh 6727c478bd9Sstevel@tonic-gate bzero(connp, sizeof (itc_t)); 6737c478bd9Sstevel@tonic-gate 6747c478bd9Sstevel@tonic-gate tcp->tcp_timercache = mp; 6757c478bd9Sstevel@tonic-gate connp->conn_tcp = tcp; 6767c478bd9Sstevel@tonic-gate connp->conn_flags = IPCL_TCPCONN; 6777c478bd9Sstevel@tonic-gate connp->conn_ulp = IPPROTO_TCP; 6787c478bd9Sstevel@tonic-gate tcp->tcp_connp = connp; 679*f4b3ec61Sdh if (ns != NULL) { 680*f4b3ec61Sdh ASSERT(tcp->tcp_tcps == NULL); 681*f4b3ec61Sdh connp->conn_netstack = NULL; 682*f4b3ec61Sdh netstack_rele(ns); 683*f4b3ec61Sdh } 6847c478bd9Sstevel@tonic-gate kmem_cache_free(ipcl_tcpconn_cache, connp); 6857c478bd9Sstevel@tonic-gate } else if (connp->conn_flags & IPCL_SCTPCONN) { 686*f4b3ec61Sdh ASSERT(ns != NULL); 6877c478bd9Sstevel@tonic-gate sctp_free(connp); 6887c478bd9Sstevel@tonic-gate } else { 689ff550d0eSmasputra ASSERT(connp->conn_udp == NULL); 6907c478bd9Sstevel@tonic-gate mutex_destroy(&connp->conn_lock); 691*f4b3ec61Sdh if (ns != NULL) { 692*f4b3ec61Sdh connp->conn_netstack = NULL; 693*f4b3ec61Sdh netstack_rele(ns); 694*f4b3ec61Sdh } 6957c478bd9Sstevel@tonic-gate kmem_cache_free(ipcl_conn_cache, connp); 6967c478bd9Sstevel@tonic-gate } 6977c478bd9Sstevel@tonic-gate } 6987c478bd9Sstevel@tonic-gate 6997c478bd9Sstevel@tonic-gate /* 7007c478bd9Sstevel@tonic-gate * Running in cluster mode - deregister listener information 7017c478bd9Sstevel@tonic-gate */ 7027c478bd9Sstevel@tonic-gate 7037c478bd9Sstevel@tonic-gate static void 7047c478bd9Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp) 7057c478bd9Sstevel@tonic-gate { 7067c478bd9Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0); 7077c478bd9Sstevel@tonic-gate ASSERT(connp->conn_lport != 0); 7087c478bd9Sstevel@tonic-gate 7097c478bd9Sstevel@tonic-gate if (cl_inet_unlisten != NULL) { 7107c478bd9Sstevel@tonic-gate sa_family_t addr_family; 7117c478bd9Sstevel@tonic-gate uint8_t *laddrp; 7127c478bd9Sstevel@tonic-gate 7137c478bd9Sstevel@tonic-gate if (connp->conn_pkt_isv6) { 7147c478bd9Sstevel@tonic-gate addr_family = AF_INET6; 7157c478bd9Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source_v6; 7167c478bd9Sstevel@tonic-gate } else { 7177c478bd9Sstevel@tonic-gate addr_family = AF_INET; 7187c478bd9Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source; 7197c478bd9Sstevel@tonic-gate } 7207c478bd9Sstevel@tonic-gate (*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp, 7217c478bd9Sstevel@tonic-gate connp->conn_lport); 7227c478bd9Sstevel@tonic-gate } 7237c478bd9Sstevel@tonic-gate connp->conn_flags &= ~IPCL_CL_LISTENER; 7247c478bd9Sstevel@tonic-gate } 7257c478bd9Sstevel@tonic-gate 7267c478bd9Sstevel@tonic-gate /* 7277c478bd9Sstevel@tonic-gate * We set the IPCL_REMOVED flag (instead of clearing the flag indicating 7287c478bd9Sstevel@tonic-gate * which table the conn belonged to). So for debugging we can see which hash 7297c478bd9Sstevel@tonic-gate * table this connection was in. 7307c478bd9Sstevel@tonic-gate */ 7317c478bd9Sstevel@tonic-gate #define IPCL_HASH_REMOVE(connp) { \ 7327c478bd9Sstevel@tonic-gate connf_t *connfp = (connp)->conn_fanout; \ 7337c478bd9Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \ 7347c478bd9Sstevel@tonic-gate if (connfp != NULL) { \ 7357c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p", \ 7367c478bd9Sstevel@tonic-gate (void *)(connp))); \ 7377c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); \ 7387c478bd9Sstevel@tonic-gate if ((connp)->conn_next != NULL) \ 7397c478bd9Sstevel@tonic-gate (connp)->conn_next->conn_prev = \ 7407c478bd9Sstevel@tonic-gate (connp)->conn_prev; \ 7417c478bd9Sstevel@tonic-gate if ((connp)->conn_prev != NULL) \ 7427c478bd9Sstevel@tonic-gate (connp)->conn_prev->conn_next = \ 7437c478bd9Sstevel@tonic-gate (connp)->conn_next; \ 7447c478bd9Sstevel@tonic-gate else \ 7457c478bd9Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; \ 7467c478bd9Sstevel@tonic-gate (connp)->conn_fanout = NULL; \ 7477c478bd9Sstevel@tonic-gate (connp)->conn_next = NULL; \ 7487c478bd9Sstevel@tonic-gate (connp)->conn_prev = NULL; \ 7497c478bd9Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; \ 7507c478bd9Sstevel@tonic-gate if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \ 7517c478bd9Sstevel@tonic-gate ipcl_conn_unlisten((connp)); \ 7527c478bd9Sstevel@tonic-gate CONN_DEC_REF((connp)); \ 7537c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); \ 7547c478bd9Sstevel@tonic-gate } \ 7557c478bd9Sstevel@tonic-gate } 7567c478bd9Sstevel@tonic-gate 7577c478bd9Sstevel@tonic-gate void 7587c478bd9Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp) 7597c478bd9Sstevel@tonic-gate { 7607c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 7617c478bd9Sstevel@tonic-gate } 7627c478bd9Sstevel@tonic-gate 7637c478bd9Sstevel@tonic-gate /* 7647c478bd9Sstevel@tonic-gate * The whole purpose of this function is allow removal of 7657c478bd9Sstevel@tonic-gate * a conn_t from the connected hash for timewait reclaim. 7667c478bd9Sstevel@tonic-gate * This is essentially a TW reclaim fastpath where timewait 7677c478bd9Sstevel@tonic-gate * collector checks under fanout lock (so no one else can 7687c478bd9Sstevel@tonic-gate * get access to the conn_t) that refcnt is 2 i.e. one for 7697c478bd9Sstevel@tonic-gate * TCP and one for the classifier hash list. If ref count 7707c478bd9Sstevel@tonic-gate * is indeed 2, we can just remove the conn under lock and 7717c478bd9Sstevel@tonic-gate * avoid cleaning up the conn under squeue. This gives us 7727c478bd9Sstevel@tonic-gate * improved performance. 7737c478bd9Sstevel@tonic-gate */ 7747c478bd9Sstevel@tonic-gate void 7757c478bd9Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp) 7767c478bd9Sstevel@tonic-gate { 7777c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connfp->connf_lock)); 7787c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 7797c478bd9Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0); 7807c478bd9Sstevel@tonic-gate 7817c478bd9Sstevel@tonic-gate if ((connp)->conn_next != NULL) { 7827c478bd9Sstevel@tonic-gate (connp)->conn_next->conn_prev = 7837c478bd9Sstevel@tonic-gate (connp)->conn_prev; 7847c478bd9Sstevel@tonic-gate } 7857c478bd9Sstevel@tonic-gate if ((connp)->conn_prev != NULL) { 7867c478bd9Sstevel@tonic-gate (connp)->conn_prev->conn_next = 7877c478bd9Sstevel@tonic-gate (connp)->conn_next; 7887c478bd9Sstevel@tonic-gate } else { 7897c478bd9Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; 7907c478bd9Sstevel@tonic-gate } 7917c478bd9Sstevel@tonic-gate (connp)->conn_fanout = NULL; 7927c478bd9Sstevel@tonic-gate (connp)->conn_next = NULL; 7937c478bd9Sstevel@tonic-gate (connp)->conn_prev = NULL; 7947c478bd9Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; 7957c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_ref == 2); 7967c478bd9Sstevel@tonic-gate (connp)->conn_ref--; 7977c478bd9Sstevel@tonic-gate } 7987c478bd9Sstevel@tonic-gate 7997c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \ 8007c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_fanout == NULL); \ 8017c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_next == NULL); \ 8027c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_prev == NULL); \ 8037c478bd9Sstevel@tonic-gate if ((connfp)->connf_head != NULL) { \ 8047c478bd9Sstevel@tonic-gate (connfp)->connf_head->conn_prev = (connp); \ 8057c478bd9Sstevel@tonic-gate (connp)->conn_next = (connfp)->connf_head; \ 8067c478bd9Sstevel@tonic-gate } \ 8077c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 8087c478bd9Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 8097c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 8107c478bd9Sstevel@tonic-gate IPCL_CONNECTED; \ 8117c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); \ 8127c478bd9Sstevel@tonic-gate } 8137c478bd9Sstevel@tonic-gate 8147c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \ 8157c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p " \ 8167c478bd9Sstevel@tonic-gate "connp %p", (void *)(connfp), (void *)(connp))); \ 8177c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 8187c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 8197c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \ 8207c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 8217c478bd9Sstevel@tonic-gate } 8227c478bd9Sstevel@tonic-gate 8237c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \ 8247c478bd9Sstevel@tonic-gate conn_t *pconnp = NULL, *nconnp; \ 8257c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p " \ 8267c478bd9Sstevel@tonic-gate "connp %p", (void *)connfp, (void *)(connp))); \ 8277c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 8287c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 8297c478bd9Sstevel@tonic-gate nconnp = (connfp)->connf_head; \ 8303d1c78fbSethindra while (nconnp != NULL && \ 8313d1c78fbSethindra !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) { \ 8323d1c78fbSethindra pconnp = nconnp; \ 8333d1c78fbSethindra nconnp = nconnp->conn_next; \ 8347c478bd9Sstevel@tonic-gate } \ 8357c478bd9Sstevel@tonic-gate if (pconnp != NULL) { \ 8367c478bd9Sstevel@tonic-gate pconnp->conn_next = (connp); \ 8377c478bd9Sstevel@tonic-gate (connp)->conn_prev = pconnp; \ 8387c478bd9Sstevel@tonic-gate } else { \ 8397c478bd9Sstevel@tonic-gate (connfp)->connf_head = (connp); \ 8407c478bd9Sstevel@tonic-gate } \ 8417c478bd9Sstevel@tonic-gate if (nconnp != NULL) { \ 8427c478bd9Sstevel@tonic-gate (connp)->conn_next = nconnp; \ 8437c478bd9Sstevel@tonic-gate nconnp->conn_prev = (connp); \ 8447c478bd9Sstevel@tonic-gate } \ 8457c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 8467c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 8477c478bd9Sstevel@tonic-gate IPCL_BOUND; \ 8487c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); \ 8497c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 8507c478bd9Sstevel@tonic-gate } 8517c478bd9Sstevel@tonic-gate 8527c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \ 8537c478bd9Sstevel@tonic-gate conn_t **list, *prev, *next; \ 8547c478bd9Sstevel@tonic-gate boolean_t isv4mapped = \ 8557c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6); \ 8567c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p " \ 8577c478bd9Sstevel@tonic-gate "connp %p", (void *)(connfp), (void *)(connp))); \ 8587c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \ 8597c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \ 8607c478bd9Sstevel@tonic-gate list = &(connfp)->connf_head; \ 8617c478bd9Sstevel@tonic-gate prev = NULL; \ 8627c478bd9Sstevel@tonic-gate while ((next = *list) != NULL) { \ 8637c478bd9Sstevel@tonic-gate if (isv4mapped && \ 8647c478bd9Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) && \ 8657c478bd9Sstevel@tonic-gate connp->conn_zoneid == next->conn_zoneid) { \ 8667c478bd9Sstevel@tonic-gate (connp)->conn_next = next; \ 8677c478bd9Sstevel@tonic-gate if (prev != NULL) \ 8687c478bd9Sstevel@tonic-gate prev = next->conn_prev; \ 8697c478bd9Sstevel@tonic-gate next->conn_prev = (connp); \ 8707c478bd9Sstevel@tonic-gate break; \ 8717c478bd9Sstevel@tonic-gate } \ 8727c478bd9Sstevel@tonic-gate list = &next->conn_next; \ 8737c478bd9Sstevel@tonic-gate prev = next; \ 8747c478bd9Sstevel@tonic-gate } \ 8757c478bd9Sstevel@tonic-gate (connp)->conn_prev = prev; \ 8767c478bd9Sstevel@tonic-gate *list = (connp); \ 8777c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \ 8787c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \ 8797c478bd9Sstevel@tonic-gate IPCL_BOUND; \ 8807c478bd9Sstevel@tonic-gate CONN_INC_REF((connp)); \ 8817c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \ 8827c478bd9Sstevel@tonic-gate } 8837c478bd9Sstevel@tonic-gate 8847c478bd9Sstevel@tonic-gate void 8857c478bd9Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp) 8867c478bd9Sstevel@tonic-gate { 8877c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 8887c478bd9Sstevel@tonic-gate } 8897c478bd9Sstevel@tonic-gate 8907c478bd9Sstevel@tonic-gate void 8917c478bd9Sstevel@tonic-gate ipcl_proto_insert(conn_t *connp, uint8_t protocol) 8927c478bd9Sstevel@tonic-gate { 8937c478bd9Sstevel@tonic-gate connf_t *connfp; 894*f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 8957c478bd9Sstevel@tonic-gate 8967c478bd9Sstevel@tonic-gate ASSERT(connp != NULL); 89745916cd2Sjpk ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 89845916cd2Sjpk protocol == IPPROTO_ESP); 8997c478bd9Sstevel@tonic-gate 9007c478bd9Sstevel@tonic-gate connp->conn_ulp = protocol; 9017c478bd9Sstevel@tonic-gate 9027c478bd9Sstevel@tonic-gate /* Insert it in the protocol hash */ 903*f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 9047c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9057c478bd9Sstevel@tonic-gate } 9067c478bd9Sstevel@tonic-gate 9077c478bd9Sstevel@tonic-gate void 9087c478bd9Sstevel@tonic-gate ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol) 9097c478bd9Sstevel@tonic-gate { 9107c478bd9Sstevel@tonic-gate connf_t *connfp; 911*f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 9127c478bd9Sstevel@tonic-gate 9137c478bd9Sstevel@tonic-gate ASSERT(connp != NULL); 91445916cd2Sjpk ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH || 91545916cd2Sjpk protocol == IPPROTO_ESP); 9167c478bd9Sstevel@tonic-gate 9177c478bd9Sstevel@tonic-gate connp->conn_ulp = protocol; 9187c478bd9Sstevel@tonic-gate 9197c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */ 920*f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 9217c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9227c478bd9Sstevel@tonic-gate } 9237c478bd9Sstevel@tonic-gate 9247c478bd9Sstevel@tonic-gate /* 9257c478bd9Sstevel@tonic-gate * This function is used only for inserting SCTP raw socket now. 9267c478bd9Sstevel@tonic-gate * This may change later. 9277c478bd9Sstevel@tonic-gate * 9287c478bd9Sstevel@tonic-gate * Note that only one raw socket can be bound to a port. The param 9297c478bd9Sstevel@tonic-gate * lport is in network byte order. 9307c478bd9Sstevel@tonic-gate */ 9317c478bd9Sstevel@tonic-gate static int 9327c478bd9Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport) 9337c478bd9Sstevel@tonic-gate { 9347c478bd9Sstevel@tonic-gate connf_t *connfp; 9357c478bd9Sstevel@tonic-gate conn_t *oconnp; 936*f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 9377c478bd9Sstevel@tonic-gate 938*f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 9397c478bd9Sstevel@tonic-gate 9407c478bd9Sstevel@tonic-gate /* Check for existing raw socket already bound to the port. */ 9417c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 9427c478bd9Sstevel@tonic-gate for (oconnp = connfp->connf_head; oconnp != NULL; 9437c0c0508Skcpoon oconnp = oconnp->conn_next) { 9447c478bd9Sstevel@tonic-gate if (oconnp->conn_lport == lport && 9457c478bd9Sstevel@tonic-gate oconnp->conn_zoneid == connp->conn_zoneid && 9467c478bd9Sstevel@tonic-gate oconnp->conn_af_isv6 == connp->conn_af_isv6 && 9477c478bd9Sstevel@tonic-gate ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 9487c478bd9Sstevel@tonic-gate IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) || 9497c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) || 9507c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) || 9517c478bd9Sstevel@tonic-gate IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6, 9527c478bd9Sstevel@tonic-gate &connp->conn_srcv6))) { 9537c478bd9Sstevel@tonic-gate break; 9547c478bd9Sstevel@tonic-gate } 9557c478bd9Sstevel@tonic-gate } 9567c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 9577c478bd9Sstevel@tonic-gate if (oconnp != NULL) 9587c478bd9Sstevel@tonic-gate return (EADDRNOTAVAIL); 9597c478bd9Sstevel@tonic-gate 9607c478bd9Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) || 9617c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) { 9627c478bd9Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) || 9637c478bd9Sstevel@tonic-gate IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) { 9647c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 9657c478bd9Sstevel@tonic-gate } else { 9667c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 9677c478bd9Sstevel@tonic-gate } 9687c478bd9Sstevel@tonic-gate } else { 9697c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 9707c478bd9Sstevel@tonic-gate } 9717c478bd9Sstevel@tonic-gate return (0); 9727c478bd9Sstevel@tonic-gate } 9737c478bd9Sstevel@tonic-gate 97445916cd2Sjpk /* 97545916cd2Sjpk * Check for a MAC exemption conflict on a labeled system. Note that for 97645916cd2Sjpk * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the 97745916cd2Sjpk * transport layer. This check is for binding all other protocols. 97845916cd2Sjpk * 97945916cd2Sjpk * Returns true if there's a conflict. 98045916cd2Sjpk */ 98145916cd2Sjpk static boolean_t 982*f4b3ec61Sdh check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst) 98345916cd2Sjpk { 98445916cd2Sjpk connf_t *connfp; 98545916cd2Sjpk conn_t *tconn; 98645916cd2Sjpk 987*f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 98845916cd2Sjpk mutex_enter(&connfp->connf_lock); 98945916cd2Sjpk for (tconn = connfp->connf_head; tconn != NULL; 99045916cd2Sjpk tconn = tconn->conn_next) { 99145916cd2Sjpk /* We don't allow v4 fallback for v6 raw socket */ 99245916cd2Sjpk if (connp->conn_af_isv6 != tconn->conn_af_isv6) 99345916cd2Sjpk continue; 99445916cd2Sjpk /* If neither is exempt, then there's no conflict */ 99545916cd2Sjpk if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 99645916cd2Sjpk continue; 99745916cd2Sjpk /* If both are bound to different specific addrs, ok */ 99845916cd2Sjpk if (connp->conn_src != INADDR_ANY && 99945916cd2Sjpk tconn->conn_src != INADDR_ANY && 100045916cd2Sjpk connp->conn_src != tconn->conn_src) 100145916cd2Sjpk continue; 100245916cd2Sjpk /* These two conflict; fail */ 100345916cd2Sjpk break; 100445916cd2Sjpk } 100545916cd2Sjpk mutex_exit(&connfp->connf_lock); 100645916cd2Sjpk return (tconn != NULL); 100745916cd2Sjpk } 100845916cd2Sjpk 100945916cd2Sjpk static boolean_t 1010*f4b3ec61Sdh check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst) 101145916cd2Sjpk { 101245916cd2Sjpk connf_t *connfp; 101345916cd2Sjpk conn_t *tconn; 101445916cd2Sjpk 1015*f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp]; 101645916cd2Sjpk mutex_enter(&connfp->connf_lock); 101745916cd2Sjpk for (tconn = connfp->connf_head; tconn != NULL; 101845916cd2Sjpk tconn = tconn->conn_next) { 101945916cd2Sjpk /* We don't allow v4 fallback for v6 raw socket */ 102045916cd2Sjpk if (connp->conn_af_isv6 != tconn->conn_af_isv6) 102145916cd2Sjpk continue; 102245916cd2Sjpk /* If neither is exempt, then there's no conflict */ 102345916cd2Sjpk if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt) 102445916cd2Sjpk continue; 102545916cd2Sjpk /* If both are bound to different addrs, ok */ 102645916cd2Sjpk if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) && 102745916cd2Sjpk !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) && 102845916cd2Sjpk !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6)) 102945916cd2Sjpk continue; 103045916cd2Sjpk /* These two conflict; fail */ 103145916cd2Sjpk break; 103245916cd2Sjpk } 103345916cd2Sjpk mutex_exit(&connfp->connf_lock); 103445916cd2Sjpk return (tconn != NULL); 103545916cd2Sjpk } 103645916cd2Sjpk 10377c478bd9Sstevel@tonic-gate /* 10387c478bd9Sstevel@tonic-gate * (v4, v6) bind hash insertion routines 10397c478bd9Sstevel@tonic-gate */ 10407c478bd9Sstevel@tonic-gate int 10417c478bd9Sstevel@tonic-gate ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport) 10427c478bd9Sstevel@tonic-gate { 10437c478bd9Sstevel@tonic-gate connf_t *connfp; 10447c478bd9Sstevel@tonic-gate #ifdef IPCL_DEBUG 10457c478bd9Sstevel@tonic-gate char buf[INET_NTOA_BUFSIZE]; 10467c478bd9Sstevel@tonic-gate #endif 10477c478bd9Sstevel@tonic-gate int ret = 0; 1048*f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 10497c478bd9Sstevel@tonic-gate 10507c478bd9Sstevel@tonic-gate ASSERT(connp); 10517c478bd9Sstevel@tonic-gate 10527c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, " 10537c478bd9Sstevel@tonic-gate "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport)); 10547c478bd9Sstevel@tonic-gate 10557c478bd9Sstevel@tonic-gate connp->conn_ulp = protocol; 10567c478bd9Sstevel@tonic-gate IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6); 10577c478bd9Sstevel@tonic-gate connp->conn_lport = lport; 10587c478bd9Sstevel@tonic-gate 10597c478bd9Sstevel@tonic-gate switch (protocol) { 10607c478bd9Sstevel@tonic-gate default: 1061*f4b3ec61Sdh if (is_system_labeled() && 1062*f4b3ec61Sdh check_exempt_conflict_v4(connp, ipst)) 106345916cd2Sjpk return (EADDRINUSE); 106445916cd2Sjpk /* FALLTHROUGH */ 106545916cd2Sjpk case IPPROTO_UDP: 10667c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 10677c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(64, 10687c478bd9Sstevel@tonic-gate ("ipcl_bind_insert: connp %p - udp\n", 10697c478bd9Sstevel@tonic-gate (void *)connp)); 1070*f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1071*f4b3ec61Sdh IPCL_UDP_HASH(lport, ipst)]; 10727c478bd9Sstevel@tonic-gate } else { 10737c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(64, 10747c478bd9Sstevel@tonic-gate ("ipcl_bind_insert: connp %p - protocol\n", 10757c478bd9Sstevel@tonic-gate (void *)connp)); 1076*f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 10777c478bd9Sstevel@tonic-gate } 10787c478bd9Sstevel@tonic-gate 10797c478bd9Sstevel@tonic-gate if (connp->conn_rem != INADDR_ANY) { 10807c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 10817c478bd9Sstevel@tonic-gate } else if (connp->conn_src != INADDR_ANY) { 10827c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 10837c478bd9Sstevel@tonic-gate } else { 10847c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10857c478bd9Sstevel@tonic-gate } 10867c478bd9Sstevel@tonic-gate break; 10877c478bd9Sstevel@tonic-gate 10887c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 10897c478bd9Sstevel@tonic-gate 10907c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */ 109145916cd2Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 1092*f4b3ec61Sdh connfp = &ipst->ips_ipcl_bind_fanout[ 1093*f4b3ec61Sdh IPCL_BIND_HASH(lport, ipst)]; 10947c478bd9Sstevel@tonic-gate if (connp->conn_src != INADDR_ANY) { 10957c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 10967c478bd9Sstevel@tonic-gate } else { 10977c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 10987c478bd9Sstevel@tonic-gate } 10997c478bd9Sstevel@tonic-gate if (cl_inet_listen != NULL) { 11007c478bd9Sstevel@tonic-gate ASSERT(!connp->conn_pkt_isv6); 11017c478bd9Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 11027c478bd9Sstevel@tonic-gate (*cl_inet_listen)(IPPROTO_TCP, AF_INET, 11037c478bd9Sstevel@tonic-gate (uint8_t *)&connp->conn_bound_source, lport); 11047c478bd9Sstevel@tonic-gate } 11057c478bd9Sstevel@tonic-gate break; 11067c478bd9Sstevel@tonic-gate 11077c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 11087c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 11097c478bd9Sstevel@tonic-gate break; 11107c478bd9Sstevel@tonic-gate } 11117c478bd9Sstevel@tonic-gate 11127c478bd9Sstevel@tonic-gate return (ret); 11137c478bd9Sstevel@tonic-gate } 11147c478bd9Sstevel@tonic-gate 11157c478bd9Sstevel@tonic-gate int 11167c478bd9Sstevel@tonic-gate ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 11177c478bd9Sstevel@tonic-gate uint16_t lport) 11187c478bd9Sstevel@tonic-gate { 11197c478bd9Sstevel@tonic-gate connf_t *connfp; 11207c478bd9Sstevel@tonic-gate int ret = 0; 1121*f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 11227c478bd9Sstevel@tonic-gate 11237c478bd9Sstevel@tonic-gate ASSERT(connp); 11247c478bd9Sstevel@tonic-gate 11257c478bd9Sstevel@tonic-gate connp->conn_ulp = protocol; 11267c478bd9Sstevel@tonic-gate connp->conn_srcv6 = *src; 11277c478bd9Sstevel@tonic-gate connp->conn_lport = lport; 11287c478bd9Sstevel@tonic-gate 11297c478bd9Sstevel@tonic-gate switch (protocol) { 11307c478bd9Sstevel@tonic-gate default: 1131*f4b3ec61Sdh if (is_system_labeled() && 1132*f4b3ec61Sdh check_exempt_conflict_v6(connp, ipst)) 113345916cd2Sjpk return (EADDRINUSE); 113445916cd2Sjpk /* FALLTHROUGH */ 113545916cd2Sjpk case IPPROTO_UDP: 11367c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 11377c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(128, 11387c478bd9Sstevel@tonic-gate ("ipcl_bind_insert_v6: connp %p - udp\n", 11397c478bd9Sstevel@tonic-gate (void *)connp)); 1140*f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1141*f4b3ec61Sdh IPCL_UDP_HASH(lport, ipst)]; 11427c478bd9Sstevel@tonic-gate } else { 11437c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(128, 11447c478bd9Sstevel@tonic-gate ("ipcl_bind_insert_v6: connp %p - protocol\n", 11457c478bd9Sstevel@tonic-gate (void *)connp)); 1146*f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 11477c478bd9Sstevel@tonic-gate } 11487c478bd9Sstevel@tonic-gate 11497c478bd9Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 11507c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 11517c478bd9Sstevel@tonic-gate } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 11527c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 11537c478bd9Sstevel@tonic-gate } else { 11547c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 11557c478bd9Sstevel@tonic-gate } 11567c478bd9Sstevel@tonic-gate break; 11577c478bd9Sstevel@tonic-gate 11587c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 11597c478bd9Sstevel@tonic-gate /* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */ 11607c478bd9Sstevel@tonic-gate 11617c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */ 116245916cd2Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES); 1163*f4b3ec61Sdh connfp = &ipst->ips_ipcl_bind_fanout[ 1164*f4b3ec61Sdh IPCL_BIND_HASH(lport, ipst)]; 11657c478bd9Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 11667c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 11677c478bd9Sstevel@tonic-gate } else { 11687c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 11697c478bd9Sstevel@tonic-gate } 11707c478bd9Sstevel@tonic-gate if (cl_inet_listen != NULL) { 11717c478bd9Sstevel@tonic-gate sa_family_t addr_family; 11727c478bd9Sstevel@tonic-gate uint8_t *laddrp; 11737c478bd9Sstevel@tonic-gate 11747c478bd9Sstevel@tonic-gate if (connp->conn_pkt_isv6) { 11757c478bd9Sstevel@tonic-gate addr_family = AF_INET6; 11767c478bd9Sstevel@tonic-gate laddrp = 11777c478bd9Sstevel@tonic-gate (uint8_t *)&connp->conn_bound_source_v6; 11787c478bd9Sstevel@tonic-gate } else { 11797c478bd9Sstevel@tonic-gate addr_family = AF_INET; 11807c478bd9Sstevel@tonic-gate laddrp = (uint8_t *)&connp->conn_bound_source; 11817c478bd9Sstevel@tonic-gate } 11827c478bd9Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER; 11837c478bd9Sstevel@tonic-gate (*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp, 11847c478bd9Sstevel@tonic-gate lport); 11857c478bd9Sstevel@tonic-gate } 11867c478bd9Sstevel@tonic-gate break; 11877c478bd9Sstevel@tonic-gate 11887c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 11897c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 11907c478bd9Sstevel@tonic-gate break; 11917c478bd9Sstevel@tonic-gate } 11927c478bd9Sstevel@tonic-gate 11937c478bd9Sstevel@tonic-gate return (ret); 11947c478bd9Sstevel@tonic-gate } 11957c478bd9Sstevel@tonic-gate 11967c478bd9Sstevel@tonic-gate /* 11977c478bd9Sstevel@tonic-gate * ipcl_conn_hash insertion routines. 11987c478bd9Sstevel@tonic-gate */ 11997c478bd9Sstevel@tonic-gate int 12007c478bd9Sstevel@tonic-gate ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, 12017c478bd9Sstevel@tonic-gate ipaddr_t rem, uint32_t ports) 12027c478bd9Sstevel@tonic-gate { 12037c478bd9Sstevel@tonic-gate connf_t *connfp; 12047c478bd9Sstevel@tonic-gate uint16_t *up; 12057c478bd9Sstevel@tonic-gate conn_t *tconnp; 12067c478bd9Sstevel@tonic-gate #ifdef IPCL_DEBUG 12077c478bd9Sstevel@tonic-gate char sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE]; 12087c478bd9Sstevel@tonic-gate #endif 12097c478bd9Sstevel@tonic-gate in_port_t lport; 12107c478bd9Sstevel@tonic-gate int ret = 0; 1211*f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 12127c478bd9Sstevel@tonic-gate 12137c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, " 12147c478bd9Sstevel@tonic-gate "dst = %s, ports = %x, protocol = %x", (void *)connp, 12157c478bd9Sstevel@tonic-gate inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf), 12167c478bd9Sstevel@tonic-gate ports, protocol)); 12177c478bd9Sstevel@tonic-gate 12187c478bd9Sstevel@tonic-gate switch (protocol) { 12197c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 12207c478bd9Sstevel@tonic-gate if (!(connp->conn_flags & IPCL_EAGER)) { 12217c478bd9Sstevel@tonic-gate /* 12227c478bd9Sstevel@tonic-gate * for a eager connection, i.e connections which 12237c478bd9Sstevel@tonic-gate * have just been created, the initialization is 12247c478bd9Sstevel@tonic-gate * already done in ip at conn_creation time, so 12257c478bd9Sstevel@tonic-gate * we can skip the checks here. 12267c478bd9Sstevel@tonic-gate */ 12277c478bd9Sstevel@tonic-gate IPCL_CONN_INIT(connp, protocol, src, rem, ports); 12287c478bd9Sstevel@tonic-gate } 1229*f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[ 1230*f4b3ec61Sdh IPCL_CONN_HASH(connp->conn_rem, 1231*f4b3ec61Sdh connp->conn_ports, ipst)]; 12327c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 12337c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 12347c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 12357c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp, 12367c478bd9Sstevel@tonic-gate connp->conn_rem, connp->conn_src, 12377c478bd9Sstevel@tonic-gate connp->conn_ports)) { 12387c478bd9Sstevel@tonic-gate 12397c478bd9Sstevel@tonic-gate /* Already have a conn. bail out */ 12407c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 12417c478bd9Sstevel@tonic-gate return (EADDRINUSE); 12427c478bd9Sstevel@tonic-gate } 12437c478bd9Sstevel@tonic-gate } 12447c478bd9Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 12457c478bd9Sstevel@tonic-gate /* 12467c478bd9Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 12477c478bd9Sstevel@tonic-gate * rebind. Let it happen. 12487c478bd9Sstevel@tonic-gate */ 12497c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 12507c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 12517c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 12527c478bd9Sstevel@tonic-gate } 1253866ba9ddSjprakash 1254866ba9ddSjprakash ASSERT(connp->conn_recv != NULL); 1255866ba9ddSjprakash 12567c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 12577c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 12587c478bd9Sstevel@tonic-gate break; 12597c478bd9Sstevel@tonic-gate 12607c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 12617c0c0508Skcpoon /* 12627c0c0508Skcpoon * The raw socket may have already been bound, remove it 12637c0c0508Skcpoon * from the hash first. 12647c0c0508Skcpoon */ 12657c0c0508Skcpoon IPCL_HASH_REMOVE(connp); 12667c0c0508Skcpoon lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 12677c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 12687c478bd9Sstevel@tonic-gate break; 12697c478bd9Sstevel@tonic-gate 12707c478bd9Sstevel@tonic-gate default: 127145916cd2Sjpk /* 127245916cd2Sjpk * Check for conflicts among MAC exempt bindings. For 127345916cd2Sjpk * transports with port numbers, this is done by the upper 127445916cd2Sjpk * level per-transport binding logic. For all others, it's 127545916cd2Sjpk * done here. 127645916cd2Sjpk */ 1277*f4b3ec61Sdh if (is_system_labeled() && 1278*f4b3ec61Sdh check_exempt_conflict_v4(connp, ipst)) 127945916cd2Sjpk return (EADDRINUSE); 128045916cd2Sjpk /* FALLTHROUGH */ 128145916cd2Sjpk 128245916cd2Sjpk case IPPROTO_UDP: 12837c478bd9Sstevel@tonic-gate up = (uint16_t *)&ports; 12847c478bd9Sstevel@tonic-gate IPCL_CONN_INIT(connp, protocol, src, rem, ports); 12857c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 1286*f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1287*f4b3ec61Sdh IPCL_UDP_HASH(up[1], ipst)]; 12887c478bd9Sstevel@tonic-gate } else { 1289*f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout[protocol]; 12907c478bd9Sstevel@tonic-gate } 12917c478bd9Sstevel@tonic-gate 12927c478bd9Sstevel@tonic-gate if (connp->conn_rem != INADDR_ANY) { 12937c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 12947c478bd9Sstevel@tonic-gate } else if (connp->conn_src != INADDR_ANY) { 12957c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 12967c478bd9Sstevel@tonic-gate } else { 12977c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 12987c478bd9Sstevel@tonic-gate } 12997c478bd9Sstevel@tonic-gate break; 13007c478bd9Sstevel@tonic-gate } 13017c478bd9Sstevel@tonic-gate 13027c478bd9Sstevel@tonic-gate return (ret); 13037c478bd9Sstevel@tonic-gate } 13047c478bd9Sstevel@tonic-gate 13057c478bd9Sstevel@tonic-gate int 13067c478bd9Sstevel@tonic-gate ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src, 13077c478bd9Sstevel@tonic-gate const in6_addr_t *rem, uint32_t ports, uint_t ifindex) 13087c478bd9Sstevel@tonic-gate { 13097c478bd9Sstevel@tonic-gate connf_t *connfp; 13107c478bd9Sstevel@tonic-gate uint16_t *up; 13117c478bd9Sstevel@tonic-gate conn_t *tconnp; 13127c478bd9Sstevel@tonic-gate in_port_t lport; 13137c478bd9Sstevel@tonic-gate int ret = 0; 1314*f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 13157c478bd9Sstevel@tonic-gate 13167c478bd9Sstevel@tonic-gate switch (protocol) { 13177c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 13187c478bd9Sstevel@tonic-gate /* Just need to insert a conn struct */ 13197c478bd9Sstevel@tonic-gate if (!(connp->conn_flags & IPCL_EAGER)) { 13207c478bd9Sstevel@tonic-gate IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 13217c478bd9Sstevel@tonic-gate } 1322*f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[ 1323*f4b3ec61Sdh IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports, 1324*f4b3ec61Sdh ipst)]; 13257c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13267c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 13277c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 13287c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp, 13297c478bd9Sstevel@tonic-gate connp->conn_remv6, connp->conn_srcv6, 13307c478bd9Sstevel@tonic-gate connp->conn_ports) && 13317c478bd9Sstevel@tonic-gate (tconnp->conn_tcp->tcp_bound_if == 0 || 13327c478bd9Sstevel@tonic-gate tconnp->conn_tcp->tcp_bound_if == ifindex)) { 13337c478bd9Sstevel@tonic-gate /* Already have a conn. bail out */ 13347c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13357c478bd9Sstevel@tonic-gate return (EADDRINUSE); 13367c478bd9Sstevel@tonic-gate } 13377c478bd9Sstevel@tonic-gate } 13387c478bd9Sstevel@tonic-gate if (connp->conn_fanout != NULL) { 13397c478bd9Sstevel@tonic-gate /* 13407c478bd9Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a 13417c478bd9Sstevel@tonic-gate * rebind. Let it happen. 13427c478bd9Sstevel@tonic-gate */ 13437c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13447c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp); 13457c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 13467c478bd9Sstevel@tonic-gate } 13477c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); 13487c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 13497c478bd9Sstevel@tonic-gate break; 13507c478bd9Sstevel@tonic-gate 13517c478bd9Sstevel@tonic-gate case IPPROTO_SCTP: 13527c0c0508Skcpoon IPCL_HASH_REMOVE(connp); 13537c0c0508Skcpoon lport = htons((uint16_t)(ntohl(ports) & 0xFFFF)); 13547c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport); 13557c478bd9Sstevel@tonic-gate break; 13567c478bd9Sstevel@tonic-gate 13577c478bd9Sstevel@tonic-gate default: 1358*f4b3ec61Sdh if (is_system_labeled() && 1359*f4b3ec61Sdh check_exempt_conflict_v6(connp, ipst)) 136045916cd2Sjpk return (EADDRINUSE); 136145916cd2Sjpk /* FALLTHROUGH */ 136245916cd2Sjpk case IPPROTO_UDP: 13637c478bd9Sstevel@tonic-gate up = (uint16_t *)&ports; 13647c478bd9Sstevel@tonic-gate IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports); 13657c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) { 1366*f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[ 1367*f4b3ec61Sdh IPCL_UDP_HASH(up[1], ipst)]; 13687c478bd9Sstevel@tonic-gate } else { 1369*f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol]; 13707c478bd9Sstevel@tonic-gate } 13717c478bd9Sstevel@tonic-gate 13727c478bd9Sstevel@tonic-gate if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) { 13737c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp); 13747c478bd9Sstevel@tonic-gate } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) { 13757c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp); 13767c478bd9Sstevel@tonic-gate } else { 13777c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp); 13787c478bd9Sstevel@tonic-gate } 13797c478bd9Sstevel@tonic-gate break; 13807c478bd9Sstevel@tonic-gate } 13817c478bd9Sstevel@tonic-gate 13827c478bd9Sstevel@tonic-gate return (ret); 13837c478bd9Sstevel@tonic-gate } 13847c478bd9Sstevel@tonic-gate 13857c478bd9Sstevel@tonic-gate /* 13867c478bd9Sstevel@tonic-gate * v4 packet classifying function. looks up the fanout table to 13877c478bd9Sstevel@tonic-gate * find the conn, the packet belongs to. returns the conn with 13887c478bd9Sstevel@tonic-gate * the reference held, null otherwise. 138945916cd2Sjpk * 139045916cd2Sjpk * If zoneid is ALL_ZONES, then the search rules described in the "Connection 139145916cd2Sjpk * Lookup" comment block are applied. Labels are also checked as described 139245916cd2Sjpk * above. If the packet is from the inside (looped back), and is from the same 139345916cd2Sjpk * zone, then label checks are omitted. 13947c478bd9Sstevel@tonic-gate */ 13957c478bd9Sstevel@tonic-gate conn_t * 1396*f4b3ec61Sdh ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 1397*f4b3ec61Sdh ip_stack_t *ipst) 13987c478bd9Sstevel@tonic-gate { 13997c478bd9Sstevel@tonic-gate ipha_t *ipha; 14007c478bd9Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 14017c478bd9Sstevel@tonic-gate uint16_t lport; 14027c478bd9Sstevel@tonic-gate uint16_t fport; 14037c478bd9Sstevel@tonic-gate uint32_t ports; 14047c478bd9Sstevel@tonic-gate conn_t *connp; 14057c478bd9Sstevel@tonic-gate uint16_t *up; 140645916cd2Sjpk boolean_t shared_addr; 140745916cd2Sjpk boolean_t unlabeled; 14087c478bd9Sstevel@tonic-gate 14097c478bd9Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 14107c478bd9Sstevel@tonic-gate up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET); 14117c478bd9Sstevel@tonic-gate 14127c478bd9Sstevel@tonic-gate switch (protocol) { 14137c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 14147c478bd9Sstevel@tonic-gate ports = *(uint32_t *)up; 14157c478bd9Sstevel@tonic-gate connfp = 1416*f4b3ec61Sdh &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src, 1417*f4b3ec61Sdh ports, ipst)]; 14187c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 14197c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 14207c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 14217c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(connp, protocol, 14227c478bd9Sstevel@tonic-gate ipha->ipha_src, ipha->ipha_dst, ports)) 14237c478bd9Sstevel@tonic-gate break; 14247c478bd9Sstevel@tonic-gate } 14257c478bd9Sstevel@tonic-gate 14267c478bd9Sstevel@tonic-gate if (connp != NULL) { 142745916cd2Sjpk /* 142845916cd2Sjpk * We have a fully-bound TCP connection. 142945916cd2Sjpk * 143045916cd2Sjpk * For labeled systems, there's no need to check the 143145916cd2Sjpk * label here. It's known to be good as we checked 143245916cd2Sjpk * before allowing the connection to become bound. 143345916cd2Sjpk */ 14347c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 14357c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14367c478bd9Sstevel@tonic-gate return (connp); 14377c478bd9Sstevel@tonic-gate } 14387c478bd9Sstevel@tonic-gate 14397c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 14407c478bd9Sstevel@tonic-gate 14417c478bd9Sstevel@tonic-gate lport = up[1]; 144245916cd2Sjpk unlabeled = B_FALSE; 144345916cd2Sjpk /* Cred cannot be null on IPv4 */ 144445916cd2Sjpk if (is_system_labeled()) 144545916cd2Sjpk unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 144645916cd2Sjpk TSLF_UNLABELED) != 0; 144745916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 144845916cd2Sjpk if (shared_addr) { 1449*f4b3ec61Sdh /* 1450*f4b3ec61Sdh * No need to handle exclusive-stack zones since 1451*f4b3ec61Sdh * ALL_ZONES only applies to the shared stack. 1452*f4b3ec61Sdh */ 145345916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 145445916cd2Sjpk /* 145545916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 145645916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 145745916cd2Sjpk * search for the zone based on the packet label. 145845916cd2Sjpk * 145945916cd2Sjpk * If there is such a zone, we prefer to find a 146045916cd2Sjpk * connection in it. Otherwise, we look for a 146145916cd2Sjpk * MAC-exempt connection in any zone whose label 146245916cd2Sjpk * dominates the default label on the packet. 146345916cd2Sjpk */ 146445916cd2Sjpk if (zoneid == ALL_ZONES) 146545916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 146645916cd2Sjpk else 146745916cd2Sjpk unlabeled = B_FALSE; 146845916cd2Sjpk } 146945916cd2Sjpk 1470*f4b3ec61Sdh bind_connfp = 1471*f4b3ec61Sdh &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 14727c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 14737c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 14747c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 147545916cd2Sjpk if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst, 14765d0bc3edSsommerfe lport) && (IPCL_ZONE_MATCH(connp, zoneid) || 147745916cd2Sjpk (unlabeled && connp->conn_mac_exempt))) 14787c478bd9Sstevel@tonic-gate break; 14797c478bd9Sstevel@tonic-gate } 14807c478bd9Sstevel@tonic-gate 148145916cd2Sjpk /* 148245916cd2Sjpk * If the matching connection is SLP on a private address, then 148345916cd2Sjpk * the label on the packet must match the local zone's label. 148445916cd2Sjpk * Otherwise, it must be in the label range defined by tnrh. 148545916cd2Sjpk * This is ensured by tsol_receive_label. 148645916cd2Sjpk */ 148745916cd2Sjpk if (connp != NULL && is_system_labeled() && 148845916cd2Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 148945916cd2Sjpk shared_addr, connp)) { 149045916cd2Sjpk DTRACE_PROBE3( 149145916cd2Sjpk tx__ip__log__info__classify__tcp, 149245916cd2Sjpk char *, 149345916cd2Sjpk "connp(1) could not receive mp(2)", 149445916cd2Sjpk conn_t *, connp, mblk_t *, mp); 149545916cd2Sjpk connp = NULL; 149645916cd2Sjpk } 149745916cd2Sjpk 14987c478bd9Sstevel@tonic-gate if (connp != NULL) { 149945916cd2Sjpk /* Have a listener at least */ 15007c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 15017c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 15027c478bd9Sstevel@tonic-gate return (connp); 15037c478bd9Sstevel@tonic-gate } 15047c478bd9Sstevel@tonic-gate 15057c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 15067c478bd9Sstevel@tonic-gate 15077c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 15087c478bd9Sstevel@tonic-gate ("ipcl_classify: couldn't classify mp = %p\n", 15097c478bd9Sstevel@tonic-gate (void *)mp)); 15107c478bd9Sstevel@tonic-gate break; 15117c478bd9Sstevel@tonic-gate 15127c478bd9Sstevel@tonic-gate case IPPROTO_UDP: 15137c478bd9Sstevel@tonic-gate lport = up[1]; 151445916cd2Sjpk unlabeled = B_FALSE; 151545916cd2Sjpk /* Cred cannot be null on IPv4 */ 151645916cd2Sjpk if (is_system_labeled()) 151745916cd2Sjpk unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags & 151845916cd2Sjpk TSLF_UNLABELED) != 0; 151945916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 152045916cd2Sjpk if (shared_addr) { 1521*f4b3ec61Sdh /* 1522*f4b3ec61Sdh * No need to handle exclusive-stack zones since 1523*f4b3ec61Sdh * ALL_ZONES only applies to the shared stack. 1524*f4b3ec61Sdh */ 152545916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 152645916cd2Sjpk /* 152745916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 152845916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 152945916cd2Sjpk * search for the zone based on the packet label. 153045916cd2Sjpk * 153145916cd2Sjpk * If there is such a zone, we prefer to find a 153245916cd2Sjpk * connection in it. Otherwise, we look for a 153345916cd2Sjpk * MAC-exempt connection in any zone whose label 153445916cd2Sjpk * dominates the default label on the packet. 153545916cd2Sjpk */ 153645916cd2Sjpk if (zoneid == ALL_ZONES) 153745916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 153845916cd2Sjpk else 153945916cd2Sjpk unlabeled = B_FALSE; 154045916cd2Sjpk } 15417c478bd9Sstevel@tonic-gate fport = up[0]; 15427c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport)); 1543*f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 15447c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 15457c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 15467c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 15477c478bd9Sstevel@tonic-gate if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst, 15487c478bd9Sstevel@tonic-gate fport, ipha->ipha_src) && 15495d0bc3edSsommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 155045916cd2Sjpk (unlabeled && connp->conn_mac_exempt))) 15517c478bd9Sstevel@tonic-gate break; 15527c478bd9Sstevel@tonic-gate } 15537c478bd9Sstevel@tonic-gate 155445916cd2Sjpk if (connp != NULL && is_system_labeled() && 155545916cd2Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION, 155645916cd2Sjpk shared_addr, connp)) { 155745916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp, 155845916cd2Sjpk char *, "connp(1) could not receive mp(2)", 155945916cd2Sjpk conn_t *, connp, mblk_t *, mp); 156045916cd2Sjpk connp = NULL; 156145916cd2Sjpk } 156245916cd2Sjpk 15637c478bd9Sstevel@tonic-gate if (connp != NULL) { 15647c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 15657c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15667c478bd9Sstevel@tonic-gate return (connp); 15677c478bd9Sstevel@tonic-gate } 15687c478bd9Sstevel@tonic-gate 15697c478bd9Sstevel@tonic-gate /* 15707c478bd9Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 15717c478bd9Sstevel@tonic-gate */ 15727c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 15737c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 15747c478bd9Sstevel@tonic-gate ("ipcl_classify: cant find udp conn_t for ports : %x %x", 15757c478bd9Sstevel@tonic-gate lport, fport)); 15767c478bd9Sstevel@tonic-gate break; 15777c478bd9Sstevel@tonic-gate } 15787c478bd9Sstevel@tonic-gate 15797c478bd9Sstevel@tonic-gate return (NULL); 15807c478bd9Sstevel@tonic-gate } 15817c478bd9Sstevel@tonic-gate 15827c478bd9Sstevel@tonic-gate conn_t * 1583*f4b3ec61Sdh ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid, 1584*f4b3ec61Sdh ip_stack_t *ipst) 15857c478bd9Sstevel@tonic-gate { 15867c478bd9Sstevel@tonic-gate ip6_t *ip6h; 15877c478bd9Sstevel@tonic-gate connf_t *connfp, *bind_connfp; 15887c478bd9Sstevel@tonic-gate uint16_t lport; 15897c478bd9Sstevel@tonic-gate uint16_t fport; 15907c478bd9Sstevel@tonic-gate tcph_t *tcph; 15917c478bd9Sstevel@tonic-gate uint32_t ports; 15927c478bd9Sstevel@tonic-gate conn_t *connp; 15937c478bd9Sstevel@tonic-gate uint16_t *up; 159445916cd2Sjpk boolean_t shared_addr; 159545916cd2Sjpk boolean_t unlabeled; 15967c478bd9Sstevel@tonic-gate 15977c478bd9Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr; 15987c478bd9Sstevel@tonic-gate 15997c478bd9Sstevel@tonic-gate switch (protocol) { 16007c478bd9Sstevel@tonic-gate case IPPROTO_TCP: 16017c478bd9Sstevel@tonic-gate tcph = (tcph_t *)&mp->b_rptr[hdr_len]; 16027c478bd9Sstevel@tonic-gate up = (uint16_t *)tcph->th_lport; 16037c478bd9Sstevel@tonic-gate ports = *(uint32_t *)up; 16047c478bd9Sstevel@tonic-gate 16057c478bd9Sstevel@tonic-gate connfp = 1606*f4b3ec61Sdh &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src, 1607*f4b3ec61Sdh ports, ipst)]; 16087c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 16097c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 16107c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 16117c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(connp, protocol, 16127c478bd9Sstevel@tonic-gate ip6h->ip6_src, ip6h->ip6_dst, ports)) 16137c478bd9Sstevel@tonic-gate break; 16147c478bd9Sstevel@tonic-gate } 16157c478bd9Sstevel@tonic-gate 16167c478bd9Sstevel@tonic-gate if (connp != NULL) { 161745916cd2Sjpk /* 161845916cd2Sjpk * We have a fully-bound TCP connection. 161945916cd2Sjpk * 162045916cd2Sjpk * For labeled systems, there's no need to check the 162145916cd2Sjpk * label here. It's known to be good as we checked 162245916cd2Sjpk * before allowing the connection to become bound. 162345916cd2Sjpk */ 16247c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 16257c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16267c478bd9Sstevel@tonic-gate return (connp); 16277c478bd9Sstevel@tonic-gate } 16287c478bd9Sstevel@tonic-gate 16297c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 16307c478bd9Sstevel@tonic-gate 16317c478bd9Sstevel@tonic-gate lport = up[1]; 163245916cd2Sjpk unlabeled = B_FALSE; 163345916cd2Sjpk /* Cred can be null on IPv6 */ 163445916cd2Sjpk if (is_system_labeled()) { 163545916cd2Sjpk cred_t *cr = DB_CRED(mp); 163645916cd2Sjpk 163745916cd2Sjpk unlabeled = (cr != NULL && 163845916cd2Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 163945916cd2Sjpk } 164045916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 164145916cd2Sjpk if (shared_addr) { 1642*f4b3ec61Sdh /* 1643*f4b3ec61Sdh * No need to handle exclusive-stack zones since 1644*f4b3ec61Sdh * ALL_ZONES only applies to the shared stack. 1645*f4b3ec61Sdh */ 164645916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 164745916cd2Sjpk /* 164845916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 164945916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 165045916cd2Sjpk * search for the zone based on the packet label. 165145916cd2Sjpk * 165245916cd2Sjpk * If there is such a zone, we prefer to find a 165345916cd2Sjpk * connection in it. Otherwise, we look for a 165445916cd2Sjpk * MAC-exempt connection in any zone whose label 165545916cd2Sjpk * dominates the default label on the packet. 165645916cd2Sjpk */ 165745916cd2Sjpk if (zoneid == ALL_ZONES) 165845916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 165945916cd2Sjpk else 166045916cd2Sjpk unlabeled = B_FALSE; 166145916cd2Sjpk } 166245916cd2Sjpk 1663*f4b3ec61Sdh bind_connfp = 1664*f4b3ec61Sdh &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 16657c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 16667c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 16677c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 16687c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, protocol, 16697c478bd9Sstevel@tonic-gate ip6h->ip6_dst, lport) && 16705d0bc3edSsommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 167145916cd2Sjpk (unlabeled && connp->conn_mac_exempt))) 16727c478bd9Sstevel@tonic-gate break; 16737c478bd9Sstevel@tonic-gate } 16747c478bd9Sstevel@tonic-gate 167545916cd2Sjpk if (connp != NULL && is_system_labeled() && 167645916cd2Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 167745916cd2Sjpk shared_addr, connp)) { 167845916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__tcp6, 167945916cd2Sjpk char *, "connp(1) could not receive mp(2)", 168045916cd2Sjpk conn_t *, connp, mblk_t *, mp); 168145916cd2Sjpk connp = NULL; 168245916cd2Sjpk } 168345916cd2Sjpk 16847c478bd9Sstevel@tonic-gate if (connp != NULL) { 16857c478bd9Sstevel@tonic-gate /* Have a listner at least */ 16867c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 16877c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 16887c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 16897c478bd9Sstevel@tonic-gate ("ipcl_classify_v6: found listner " 16907c478bd9Sstevel@tonic-gate "connp = %p\n", (void *)connp)); 16917c478bd9Sstevel@tonic-gate 16927c478bd9Sstevel@tonic-gate return (connp); 16937c478bd9Sstevel@tonic-gate } 16947c478bd9Sstevel@tonic-gate 16957c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 16967c478bd9Sstevel@tonic-gate 16977c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 16987c478bd9Sstevel@tonic-gate ("ipcl_classify_v6: couldn't classify mp = %p\n", 16997c478bd9Sstevel@tonic-gate (void *)mp)); 17007c478bd9Sstevel@tonic-gate break; 17017c478bd9Sstevel@tonic-gate 17027c478bd9Sstevel@tonic-gate case IPPROTO_UDP: 17037c478bd9Sstevel@tonic-gate up = (uint16_t *)&mp->b_rptr[hdr_len]; 17047c478bd9Sstevel@tonic-gate lport = up[1]; 170545916cd2Sjpk unlabeled = B_FALSE; 170645916cd2Sjpk /* Cred can be null on IPv6 */ 170745916cd2Sjpk if (is_system_labeled()) { 170845916cd2Sjpk cred_t *cr = DB_CRED(mp); 170945916cd2Sjpk 171045916cd2Sjpk unlabeled = (cr != NULL && 171145916cd2Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 171245916cd2Sjpk } 171345916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 171445916cd2Sjpk if (shared_addr) { 1715*f4b3ec61Sdh /* 1716*f4b3ec61Sdh * No need to handle exclusive-stack zones since 1717*f4b3ec61Sdh * ALL_ZONES only applies to the shared stack. 1718*f4b3ec61Sdh */ 171945916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 172045916cd2Sjpk /* 172145916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 172245916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and 172345916cd2Sjpk * search for the zone based on the packet label. 172445916cd2Sjpk * 172545916cd2Sjpk * If there is such a zone, we prefer to find a 172645916cd2Sjpk * connection in it. Otherwise, we look for a 172745916cd2Sjpk * MAC-exempt connection in any zone whose label 172845916cd2Sjpk * dominates the default label on the packet. 172945916cd2Sjpk */ 173045916cd2Sjpk if (zoneid == ALL_ZONES) 173145916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 173245916cd2Sjpk else 173345916cd2Sjpk unlabeled = B_FALSE; 173445916cd2Sjpk } 173545916cd2Sjpk 17367c478bd9Sstevel@tonic-gate fport = up[0]; 17377c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport, 17387c478bd9Sstevel@tonic-gate fport)); 1739*f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)]; 17407c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 17417c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 17427c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 17437c478bd9Sstevel@tonic-gate if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst, 17447c478bd9Sstevel@tonic-gate fport, ip6h->ip6_src) && 17455d0bc3edSsommerfe (IPCL_ZONE_MATCH(connp, zoneid) || 174645916cd2Sjpk (unlabeled && connp->conn_mac_exempt))) 17477c478bd9Sstevel@tonic-gate break; 17487c478bd9Sstevel@tonic-gate } 17497c478bd9Sstevel@tonic-gate 175045916cd2Sjpk if (connp != NULL && is_system_labeled() && 175145916cd2Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION, 175245916cd2Sjpk shared_addr, connp)) { 175345916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp6, 175445916cd2Sjpk char *, "connp(1) could not receive mp(2)", 175545916cd2Sjpk conn_t *, connp, mblk_t *, mp); 175645916cd2Sjpk connp = NULL; 175745916cd2Sjpk } 175845916cd2Sjpk 17597c478bd9Sstevel@tonic-gate if (connp != NULL) { 17607c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 17617c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17627c478bd9Sstevel@tonic-gate return (connp); 17637c478bd9Sstevel@tonic-gate } 17647c478bd9Sstevel@tonic-gate 17657c478bd9Sstevel@tonic-gate /* 17667c478bd9Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets 17677c478bd9Sstevel@tonic-gate */ 17687c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 17697c478bd9Sstevel@tonic-gate IPCL_DEBUG_LVL(512, 17707c478bd9Sstevel@tonic-gate ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x", 17717c478bd9Sstevel@tonic-gate lport, fport)); 17727c478bd9Sstevel@tonic-gate break; 17737c478bd9Sstevel@tonic-gate } 17747c478bd9Sstevel@tonic-gate 17757c478bd9Sstevel@tonic-gate return (NULL); 17767c478bd9Sstevel@tonic-gate } 17777c478bd9Sstevel@tonic-gate 17787c478bd9Sstevel@tonic-gate /* 17797c478bd9Sstevel@tonic-gate * wrapper around ipcl_classify_(v4,v6) routines. 17807c478bd9Sstevel@tonic-gate */ 17817c478bd9Sstevel@tonic-gate conn_t * 1782*f4b3ec61Sdh ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst) 17837c478bd9Sstevel@tonic-gate { 17847c478bd9Sstevel@tonic-gate uint16_t hdr_len; 17857c478bd9Sstevel@tonic-gate ipha_t *ipha; 17867c478bd9Sstevel@tonic-gate uint8_t *nexthdrp; 17877c478bd9Sstevel@tonic-gate 17887c478bd9Sstevel@tonic-gate if (MBLKL(mp) < sizeof (ipha_t)) 17897c478bd9Sstevel@tonic-gate return (NULL); 17907c478bd9Sstevel@tonic-gate 17917c478bd9Sstevel@tonic-gate switch (IPH_HDR_VERSION(mp->b_rptr)) { 17927c478bd9Sstevel@tonic-gate case IPV4_VERSION: 17937c478bd9Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr; 17947c478bd9Sstevel@tonic-gate hdr_len = IPH_HDR_LENGTH(ipha); 17957c478bd9Sstevel@tonic-gate return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len, 1796*f4b3ec61Sdh zoneid, ipst)); 17977c478bd9Sstevel@tonic-gate case IPV6_VERSION: 17987c478bd9Sstevel@tonic-gate if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr, 17997c478bd9Sstevel@tonic-gate &hdr_len, &nexthdrp)) 18007c478bd9Sstevel@tonic-gate return (NULL); 18017c478bd9Sstevel@tonic-gate 1802*f4b3ec61Sdh return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst)); 18037c478bd9Sstevel@tonic-gate } 18047c478bd9Sstevel@tonic-gate 18057c478bd9Sstevel@tonic-gate return (NULL); 18067c478bd9Sstevel@tonic-gate } 18077c478bd9Sstevel@tonic-gate 18087c478bd9Sstevel@tonic-gate conn_t * 180945916cd2Sjpk ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid, 1810*f4b3ec61Sdh uint32_t ports, ipha_t *hdr, ip_stack_t *ipst) 18117c478bd9Sstevel@tonic-gate { 181245916cd2Sjpk connf_t *connfp; 18137c478bd9Sstevel@tonic-gate conn_t *connp; 18147c478bd9Sstevel@tonic-gate in_port_t lport; 18157c478bd9Sstevel@tonic-gate int af; 181645916cd2Sjpk boolean_t shared_addr; 181745916cd2Sjpk boolean_t unlabeled; 181845916cd2Sjpk const void *dst; 18197c478bd9Sstevel@tonic-gate 18207c478bd9Sstevel@tonic-gate lport = ((uint16_t *)&ports)[1]; 182145916cd2Sjpk 182245916cd2Sjpk unlabeled = B_FALSE; 182345916cd2Sjpk /* Cred can be null on IPv6 */ 182445916cd2Sjpk if (is_system_labeled()) { 182545916cd2Sjpk cred_t *cr = DB_CRED(mp); 182645916cd2Sjpk 182745916cd2Sjpk unlabeled = (cr != NULL && 182845916cd2Sjpk crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0; 182945916cd2Sjpk } 183045916cd2Sjpk shared_addr = (zoneid == ALL_ZONES); 183145916cd2Sjpk if (shared_addr) { 1832*f4b3ec61Sdh /* 1833*f4b3ec61Sdh * No need to handle exclusive-stack zones since ALL_ZONES 1834*f4b3ec61Sdh * only applies to the shared stack. 1835*f4b3ec61Sdh */ 183645916cd2Sjpk zoneid = tsol_mlp_findzone(protocol, lport); 183745916cd2Sjpk /* 183845916cd2Sjpk * If no shared MLP is found, tsol_mlp_findzone returns 183945916cd2Sjpk * ALL_ZONES. In that case, we assume it's SLP, and search for 184045916cd2Sjpk * the zone based on the packet label. 184145916cd2Sjpk * 184245916cd2Sjpk * If there is such a zone, we prefer to find a connection in 184345916cd2Sjpk * it. Otherwise, we look for a MAC-exempt connection in any 184445916cd2Sjpk * zone whose label dominates the default label on the packet. 184545916cd2Sjpk */ 184645916cd2Sjpk if (zoneid == ALL_ZONES) 184745916cd2Sjpk zoneid = tsol_packet_to_zoneid(mp); 184845916cd2Sjpk else 184945916cd2Sjpk unlabeled = B_FALSE; 185045916cd2Sjpk } 185145916cd2Sjpk 18527c478bd9Sstevel@tonic-gate af = IPH_HDR_VERSION(hdr); 185345916cd2Sjpk dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst : 185445916cd2Sjpk (const void *)&((ip6_t *)hdr)->ip6_dst; 1855*f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)]; 18567c478bd9Sstevel@tonic-gate 18577c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 18587c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL; 18597c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 18607c478bd9Sstevel@tonic-gate /* We don't allow v4 fallback for v6 raw socket. */ 186145916cd2Sjpk if (af == (connp->conn_af_isv6 ? IPV4_VERSION : 186245916cd2Sjpk IPV6_VERSION)) 18637c478bd9Sstevel@tonic-gate continue; 18647c478bd9Sstevel@tonic-gate if (connp->conn_fully_bound) { 18657c478bd9Sstevel@tonic-gate if (af == IPV4_VERSION) { 186645916cd2Sjpk if (!IPCL_CONN_MATCH(connp, protocol, 186745916cd2Sjpk hdr->ipha_src, hdr->ipha_dst, ports)) 186845916cd2Sjpk continue; 18697c478bd9Sstevel@tonic-gate } else { 187045916cd2Sjpk if (!IPCL_CONN_MATCH_V6(connp, protocol, 18717c478bd9Sstevel@tonic-gate ((ip6_t *)hdr)->ip6_src, 187245916cd2Sjpk ((ip6_t *)hdr)->ip6_dst, ports)) 187345916cd2Sjpk continue; 18747c478bd9Sstevel@tonic-gate } 18757c478bd9Sstevel@tonic-gate } else { 18767c478bd9Sstevel@tonic-gate if (af == IPV4_VERSION) { 187745916cd2Sjpk if (!IPCL_BIND_MATCH(connp, protocol, 187845916cd2Sjpk hdr->ipha_dst, lport)) 187945916cd2Sjpk continue; 18807c478bd9Sstevel@tonic-gate } else { 188145916cd2Sjpk if (!IPCL_BIND_MATCH_V6(connp, protocol, 188245916cd2Sjpk ((ip6_t *)hdr)->ip6_dst, lport)) 188345916cd2Sjpk continue; 18847c478bd9Sstevel@tonic-gate } 18857c478bd9Sstevel@tonic-gate } 188645916cd2Sjpk 18875d0bc3edSsommerfe if (IPCL_ZONE_MATCH(connp, zoneid) || 188845916cd2Sjpk (unlabeled && connp->conn_mac_exempt)) 188945916cd2Sjpk break; 189045916cd2Sjpk } 189145916cd2Sjpk /* 189245916cd2Sjpk * If the connection is fully-bound and connection-oriented (TCP or 189345916cd2Sjpk * SCTP), then we've already validated the remote system's label. 189445916cd2Sjpk * There's no need to do it again for every packet. 189545916cd2Sjpk */ 189645916cd2Sjpk if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound || 189745916cd2Sjpk !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) && 189845916cd2Sjpk !tsol_receive_local(mp, dst, af, shared_addr, connp)) { 189945916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__rawip, 190045916cd2Sjpk char *, "connp(1) could not receive mp(2)", 190145916cd2Sjpk conn_t *, connp, mblk_t *, mp); 190245916cd2Sjpk connp = NULL; 19037c478bd9Sstevel@tonic-gate } 19047c0c0508Skcpoon 19057c0c0508Skcpoon if (connp != NULL) 19067c0c0508Skcpoon goto found; 19077c0c0508Skcpoon mutex_exit(&connfp->connf_lock); 19087c0c0508Skcpoon 19097c0c0508Skcpoon /* Try to look for a wildcard match. */ 1910*f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)]; 19117c0c0508Skcpoon mutex_enter(&connfp->connf_lock); 19127c0c0508Skcpoon for (connp = connfp->connf_head; connp != NULL; 19137c0c0508Skcpoon connp = connp->conn_next) { 19147c0c0508Skcpoon /* We don't allow v4 fallback for v6 raw socket. */ 19157c0c0508Skcpoon if ((af == (connp->conn_af_isv6 ? IPV4_VERSION : 19165d0bc3edSsommerfe IPV6_VERSION)) || !IPCL_ZONE_MATCH(connp, zoneid)) { 19177c0c0508Skcpoon continue; 19187c0c0508Skcpoon } 19197c0c0508Skcpoon if (af == IPV4_VERSION) { 19207c0c0508Skcpoon if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst)) 19217c0c0508Skcpoon break; 19227c0c0508Skcpoon } else { 19237c0c0508Skcpoon if (IPCL_RAW_MATCH_V6(connp, protocol, 19247c0c0508Skcpoon ((ip6_t *)hdr)->ip6_dst)) { 19257c0c0508Skcpoon break; 19267c0c0508Skcpoon } 19277c0c0508Skcpoon } 19287c478bd9Sstevel@tonic-gate } 19297c0c0508Skcpoon 19307c0c0508Skcpoon if (connp != NULL) 19317c0c0508Skcpoon goto found; 19327c0c0508Skcpoon 19337c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 19347c478bd9Sstevel@tonic-gate return (NULL); 19357c0c0508Skcpoon 19367c0c0508Skcpoon found: 19377c0c0508Skcpoon ASSERT(connp != NULL); 19387c0c0508Skcpoon CONN_INC_REF(connp); 19397c0c0508Skcpoon mutex_exit(&connfp->connf_lock); 19407c0c0508Skcpoon return (connp); 19417c478bd9Sstevel@tonic-gate } 19427c478bd9Sstevel@tonic-gate 19437c478bd9Sstevel@tonic-gate /* ARGSUSED */ 19447c478bd9Sstevel@tonic-gate static int 19457c478bd9Sstevel@tonic-gate ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags) 19467c478bd9Sstevel@tonic-gate { 19477c478bd9Sstevel@tonic-gate itc_t *itc = (itc_t *)buf; 19487c478bd9Sstevel@tonic-gate conn_t *connp = &itc->itc_conn; 19497c478bd9Sstevel@tonic-gate tcp_t *tcp = &itc->itc_tcp; 19507c478bd9Sstevel@tonic-gate bzero(itc, sizeof (itc_t)); 19517c478bd9Sstevel@tonic-gate tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP); 19527c478bd9Sstevel@tonic-gate connp->conn_tcp = tcp; 19537c478bd9Sstevel@tonic-gate connp->conn_flags = IPCL_TCPCONN; 19547c478bd9Sstevel@tonic-gate connp->conn_ulp = IPPROTO_TCP; 19557c478bd9Sstevel@tonic-gate tcp->tcp_connp = connp; 19567c478bd9Sstevel@tonic-gate return (0); 19577c478bd9Sstevel@tonic-gate } 19587c478bd9Sstevel@tonic-gate 19597c478bd9Sstevel@tonic-gate /* ARGSUSED */ 19607c478bd9Sstevel@tonic-gate static void 19617c478bd9Sstevel@tonic-gate ipcl_tcpconn_destructor(void *buf, void *cdrarg) 19627c478bd9Sstevel@tonic-gate { 19637c478bd9Sstevel@tonic-gate tcp_timermp_free(((conn_t *)buf)->conn_tcp); 19647c478bd9Sstevel@tonic-gate } 19657c478bd9Sstevel@tonic-gate 19667c478bd9Sstevel@tonic-gate /* 19677c478bd9Sstevel@tonic-gate * All conns are inserted in a global multi-list for the benefit of 19687c478bd9Sstevel@tonic-gate * walkers. The walk is guaranteed to walk all open conns at the time 19697c478bd9Sstevel@tonic-gate * of the start of the walk exactly once. This property is needed to 19707c478bd9Sstevel@tonic-gate * achieve some cleanups during unplumb of interfaces. This is achieved 19717c478bd9Sstevel@tonic-gate * as follows. 19727c478bd9Sstevel@tonic-gate * 19737c478bd9Sstevel@tonic-gate * ipcl_conn_create and ipcl_conn_destroy are the only functions that 19747c478bd9Sstevel@tonic-gate * call the insert and delete functions below at creation and deletion 19757c478bd9Sstevel@tonic-gate * time respectively. The conn never moves or changes its position in this 19767c478bd9Sstevel@tonic-gate * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt 19777c478bd9Sstevel@tonic-gate * won't increase due to walkers, once the conn deletion has started. Note 19787c478bd9Sstevel@tonic-gate * that we can't remove the conn from the global list and then wait for 19797c478bd9Sstevel@tonic-gate * the refcnt to drop to zero, since walkers would then see a truncated 19807c478bd9Sstevel@tonic-gate * list. CONN_INCIPIENT ensures that walkers don't start looking at 19817c478bd9Sstevel@tonic-gate * conns until ip_open is ready to make them globally visible. 19827c478bd9Sstevel@tonic-gate * The global round robin multi-list locks are held only to get the 19837c478bd9Sstevel@tonic-gate * next member/insertion/deletion and contention should be negligible 19847c478bd9Sstevel@tonic-gate * if the multi-list is much greater than the number of cpus. 19857c478bd9Sstevel@tonic-gate */ 19867c478bd9Sstevel@tonic-gate void 19877c478bd9Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp) 19887c478bd9Sstevel@tonic-gate { 19897c478bd9Sstevel@tonic-gate int index; 1990*f4b3ec61Sdh struct connf_s *connfp; 1991*f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 19927c478bd9Sstevel@tonic-gate 19937c478bd9Sstevel@tonic-gate /* 19947c478bd9Sstevel@tonic-gate * No need for atomic here. Approximate even distribution 19957c478bd9Sstevel@tonic-gate * in the global lists is sufficient. 19967c478bd9Sstevel@tonic-gate */ 1997*f4b3ec61Sdh ipst->ips_conn_g_index++; 1998*f4b3ec61Sdh index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1); 19997c478bd9Sstevel@tonic-gate 20007c478bd9Sstevel@tonic-gate connp->conn_g_prev = NULL; 20017c478bd9Sstevel@tonic-gate /* 20027c478bd9Sstevel@tonic-gate * Mark as INCIPIENT, so that walkers will ignore this 20037c478bd9Sstevel@tonic-gate * for now, till ip_open is ready to make it visible globally. 20047c478bd9Sstevel@tonic-gate */ 20057c478bd9Sstevel@tonic-gate connp->conn_state_flags |= CONN_INCIPIENT; 20067c478bd9Sstevel@tonic-gate 2007*f4b3ec61Sdh connfp = &ipst->ips_ipcl_globalhash_fanout[index]; 20087c478bd9Sstevel@tonic-gate /* Insert at the head of the list */ 2009*f4b3ec61Sdh mutex_enter(&connfp->connf_lock); 2010*f4b3ec61Sdh connp->conn_g_next = connfp->connf_head; 20117c478bd9Sstevel@tonic-gate if (connp->conn_g_next != NULL) 20127c478bd9Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp; 2013*f4b3ec61Sdh connfp->connf_head = connp; 20147c478bd9Sstevel@tonic-gate 20157c478bd9Sstevel@tonic-gate /* The fanout bucket this conn points to */ 2016*f4b3ec61Sdh connp->conn_g_fanout = connfp; 20177c478bd9Sstevel@tonic-gate 2018*f4b3ec61Sdh mutex_exit(&connfp->connf_lock); 20197c478bd9Sstevel@tonic-gate } 20207c478bd9Sstevel@tonic-gate 20217c478bd9Sstevel@tonic-gate void 20227c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp) 20237c478bd9Sstevel@tonic-gate { 2024*f4b3ec61Sdh struct connf_s *connfp; 2025*f4b3ec61Sdh 20267c478bd9Sstevel@tonic-gate /* 20277c478bd9Sstevel@tonic-gate * We were never inserted in the global multi list. 20287c478bd9Sstevel@tonic-gate * IPCL_NONE variety is never inserted in the global multilist 20297c478bd9Sstevel@tonic-gate * since it is presumed to not need any cleanup and is transient. 20307c478bd9Sstevel@tonic-gate */ 20317c478bd9Sstevel@tonic-gate if (connp->conn_g_fanout == NULL) 20327c478bd9Sstevel@tonic-gate return; 20337c478bd9Sstevel@tonic-gate 2034*f4b3ec61Sdh connfp = connp->conn_g_fanout; 2035*f4b3ec61Sdh mutex_enter(&connfp->connf_lock); 20367c478bd9Sstevel@tonic-gate if (connp->conn_g_prev != NULL) 20377c478bd9Sstevel@tonic-gate connp->conn_g_prev->conn_g_next = connp->conn_g_next; 20387c478bd9Sstevel@tonic-gate else 2039*f4b3ec61Sdh connfp->connf_head = connp->conn_g_next; 20407c478bd9Sstevel@tonic-gate if (connp->conn_g_next != NULL) 20417c478bd9Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp->conn_g_prev; 2042*f4b3ec61Sdh mutex_exit(&connfp->connf_lock); 20437c478bd9Sstevel@tonic-gate 20447c478bd9Sstevel@tonic-gate /* Better to stumble on a null pointer than to corrupt memory */ 20457c478bd9Sstevel@tonic-gate connp->conn_g_next = NULL; 20467c478bd9Sstevel@tonic-gate connp->conn_g_prev = NULL; 20477c478bd9Sstevel@tonic-gate } 20487c478bd9Sstevel@tonic-gate 20497c478bd9Sstevel@tonic-gate /* 20507c478bd9Sstevel@tonic-gate * Walk the list of all conn_t's in the system, calling the function provided 20517c478bd9Sstevel@tonic-gate * with the specified argument for each. 20527c478bd9Sstevel@tonic-gate * Applies to both IPv4 and IPv6. 20537c478bd9Sstevel@tonic-gate * 20547c478bd9Sstevel@tonic-gate * IPCs may hold pointers to ipif/ill. To guard against stale pointers 20557c478bd9Sstevel@tonic-gate * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is 20567c478bd9Sstevel@tonic-gate * unplumbed or removed. New conn_t's that are created while we are walking 20577c478bd9Sstevel@tonic-gate * may be missed by this walk, because they are not necessarily inserted 20587c478bd9Sstevel@tonic-gate * at the tail of the list. They are new conn_t's and thus don't have any 20597c478bd9Sstevel@tonic-gate * stale pointers. The CONN_CLOSING flag ensures that no new reference 20607c478bd9Sstevel@tonic-gate * is created to the struct that is going away. 20617c478bd9Sstevel@tonic-gate */ 20627c478bd9Sstevel@tonic-gate void 2063*f4b3ec61Sdh ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst) 20647c478bd9Sstevel@tonic-gate { 20657c478bd9Sstevel@tonic-gate int i; 20667c478bd9Sstevel@tonic-gate conn_t *connp; 20677c478bd9Sstevel@tonic-gate conn_t *prev_connp; 20687c478bd9Sstevel@tonic-gate 20697c478bd9Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2070*f4b3ec61Sdh mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 20717c478bd9Sstevel@tonic-gate prev_connp = NULL; 2072*f4b3ec61Sdh connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head; 20737c478bd9Sstevel@tonic-gate while (connp != NULL) { 20747c478bd9Sstevel@tonic-gate mutex_enter(&connp->conn_lock); 20757c478bd9Sstevel@tonic-gate if (connp->conn_state_flags & 20767c478bd9Sstevel@tonic-gate (CONN_CONDEMNED | CONN_INCIPIENT)) { 20777c478bd9Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 20787c478bd9Sstevel@tonic-gate connp = connp->conn_g_next; 20797c478bd9Sstevel@tonic-gate continue; 20807c478bd9Sstevel@tonic-gate } 20817c478bd9Sstevel@tonic-gate CONN_INC_REF_LOCKED(connp); 20827c478bd9Sstevel@tonic-gate mutex_exit(&connp->conn_lock); 2083*f4b3ec61Sdh mutex_exit( 2084*f4b3ec61Sdh &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 20857c478bd9Sstevel@tonic-gate (*func)(connp, arg); 20867c478bd9Sstevel@tonic-gate if (prev_connp != NULL) 20877c478bd9Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 2088*f4b3ec61Sdh mutex_enter( 2089*f4b3ec61Sdh &ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 20907c478bd9Sstevel@tonic-gate prev_connp = connp; 20917c478bd9Sstevel@tonic-gate connp = connp->conn_g_next; 20927c478bd9Sstevel@tonic-gate } 2093*f4b3ec61Sdh mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock); 20947c478bd9Sstevel@tonic-gate if (prev_connp != NULL) 20957c478bd9Sstevel@tonic-gate CONN_DEC_REF(prev_connp); 20967c478bd9Sstevel@tonic-gate } 20977c478bd9Sstevel@tonic-gate } 20987c478bd9Sstevel@tonic-gate 20997c478bd9Sstevel@tonic-gate /* 21007c478bd9Sstevel@tonic-gate * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on 21017c478bd9Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 21027c478bd9Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 2103d0ab37afSethindra * (peer tcp in ESTABLISHED state). 21047c478bd9Sstevel@tonic-gate */ 21057c478bd9Sstevel@tonic-gate conn_t * 2106*f4b3ec61Sdh ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph, 2107*f4b3ec61Sdh ip_stack_t *ipst) 21087c478bd9Sstevel@tonic-gate { 21097c478bd9Sstevel@tonic-gate uint32_t ports; 21107c478bd9Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 21117c478bd9Sstevel@tonic-gate connf_t *connfp; 21127c478bd9Sstevel@tonic-gate conn_t *tconnp; 21137c478bd9Sstevel@tonic-gate boolean_t zone_chk; 21147c478bd9Sstevel@tonic-gate 21157c478bd9Sstevel@tonic-gate /* 21167c478bd9Sstevel@tonic-gate * If either the source of destination address is loopback, then 21177c478bd9Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 21187c478bd9Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 21197c478bd9Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. 21207c478bd9Sstevel@tonic-gate */ 21217c478bd9Sstevel@tonic-gate zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) || 21227c478bd9Sstevel@tonic-gate ipha->ipha_dst == htonl(INADDR_LOOPBACK)); 21237c478bd9Sstevel@tonic-gate 21247c478bd9Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 21257c478bd9Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 21267c478bd9Sstevel@tonic-gate 2127*f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2128*f4b3ec61Sdh ports, ipst)]; 21297c478bd9Sstevel@tonic-gate 21307c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 21317c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 21327c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 21337c478bd9Sstevel@tonic-gate 21347c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 21357c478bd9Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 2136d0ab37afSethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 21377c478bd9Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 21387c478bd9Sstevel@tonic-gate 21397c478bd9Sstevel@tonic-gate ASSERT(tconnp != connp); 21407c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 21417c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 21427c478bd9Sstevel@tonic-gate return (tconnp); 21437c478bd9Sstevel@tonic-gate } 21447c478bd9Sstevel@tonic-gate } 21457c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 21467c478bd9Sstevel@tonic-gate return (NULL); 21477c478bd9Sstevel@tonic-gate } 21487c478bd9Sstevel@tonic-gate 21497c478bd9Sstevel@tonic-gate /* 21507c478bd9Sstevel@tonic-gate * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on 21517c478bd9Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference 21527c478bd9Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries 2153d0ab37afSethindra * (peer tcp in ESTABLISHED state). 21547c478bd9Sstevel@tonic-gate */ 21557c478bd9Sstevel@tonic-gate conn_t * 2156*f4b3ec61Sdh ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph, 2157*f4b3ec61Sdh ip_stack_t *ipst) 21587c478bd9Sstevel@tonic-gate { 21597c478bd9Sstevel@tonic-gate uint32_t ports; 21607c478bd9Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports; 21617c478bd9Sstevel@tonic-gate connf_t *connfp; 21627c478bd9Sstevel@tonic-gate conn_t *tconnp; 21637c478bd9Sstevel@tonic-gate boolean_t zone_chk; 21647c478bd9Sstevel@tonic-gate 21657c478bd9Sstevel@tonic-gate /* 21667c478bd9Sstevel@tonic-gate * If either the source of destination address is loopback, then 21677c478bd9Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of 21687c478bd9Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED 21697c478bd9Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. We 21707c478bd9Sstevel@tonic-gate * don't do Zone check for link local address(es) because the 21717c478bd9Sstevel@tonic-gate * current Zone implementation treats each link local address as 21727c478bd9Sstevel@tonic-gate * being unique per system node, i.e. they belong to global Zone. 21737c478bd9Sstevel@tonic-gate */ 21747c478bd9Sstevel@tonic-gate zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) || 21757c478bd9Sstevel@tonic-gate IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst)); 21767c478bd9Sstevel@tonic-gate 21777c478bd9Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 21787c478bd9Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 21797c478bd9Sstevel@tonic-gate 2180*f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2181*f4b3ec61Sdh ports, ipst)]; 21827c478bd9Sstevel@tonic-gate 21837c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 21847c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 21857c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 21867c478bd9Sstevel@tonic-gate 21877c478bd9Sstevel@tonic-gate /* We skip tcp_bound_if check here as this is loopback tcp */ 21887c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 21897c478bd9Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 2190d0ab37afSethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED && 21917c478bd9Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) { 21927c478bd9Sstevel@tonic-gate 21937c478bd9Sstevel@tonic-gate ASSERT(tconnp != connp); 21947c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 21957c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 21967c478bd9Sstevel@tonic-gate return (tconnp); 21977c478bd9Sstevel@tonic-gate } 21987c478bd9Sstevel@tonic-gate } 21997c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 22007c478bd9Sstevel@tonic-gate return (NULL); 22017c478bd9Sstevel@tonic-gate } 22027c478bd9Sstevel@tonic-gate 22037c478bd9Sstevel@tonic-gate /* 22047c478bd9Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 22057c478bd9Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 22067c478bd9Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 22077c478bd9Sstevel@tonic-gate */ 22087c478bd9Sstevel@tonic-gate conn_t * 2209*f4b3ec61Sdh ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state, 2210*f4b3ec61Sdh ip_stack_t *ipst) 22117c478bd9Sstevel@tonic-gate { 22127c478bd9Sstevel@tonic-gate uint32_t ports; 22137c478bd9Sstevel@tonic-gate uint16_t *pports; 22147c478bd9Sstevel@tonic-gate connf_t *connfp; 22157c478bd9Sstevel@tonic-gate conn_t *tconnp; 22167c478bd9Sstevel@tonic-gate 22177c478bd9Sstevel@tonic-gate pports = (uint16_t *)&ports; 22187c478bd9Sstevel@tonic-gate bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t)); 22197c478bd9Sstevel@tonic-gate bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t)); 22207c478bd9Sstevel@tonic-gate 2221*f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst, 2222*f4b3ec61Sdh ports, ipst)]; 22237c478bd9Sstevel@tonic-gate 22247c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 22257c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 22267c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 22277c478bd9Sstevel@tonic-gate 22287c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP, 22297c478bd9Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) && 22307c478bd9Sstevel@tonic-gate tconnp->conn_tcp->tcp_state >= min_state) { 22317c478bd9Sstevel@tonic-gate 22327c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 22337c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 22347c478bd9Sstevel@tonic-gate return (tconnp); 22357c478bd9Sstevel@tonic-gate } 22367c478bd9Sstevel@tonic-gate } 22377c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 22387c478bd9Sstevel@tonic-gate return (NULL); 22397c478bd9Sstevel@tonic-gate } 22407c478bd9Sstevel@tonic-gate 22417c478bd9Sstevel@tonic-gate /* 22427c478bd9Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram. 22437c478bd9Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF. 22447c478bd9Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks. 22457c478bd9Sstevel@tonic-gate * Match on ifindex in addition to addresses. 22467c478bd9Sstevel@tonic-gate */ 22477c478bd9Sstevel@tonic-gate conn_t * 22487c478bd9Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state, 2249*f4b3ec61Sdh uint_t ifindex, ip_stack_t *ipst) 22507c478bd9Sstevel@tonic-gate { 22517c478bd9Sstevel@tonic-gate tcp_t *tcp; 22527c478bd9Sstevel@tonic-gate uint32_t ports; 22537c478bd9Sstevel@tonic-gate uint16_t *pports; 22547c478bd9Sstevel@tonic-gate connf_t *connfp; 22557c478bd9Sstevel@tonic-gate conn_t *tconnp; 22567c478bd9Sstevel@tonic-gate 22577c478bd9Sstevel@tonic-gate pports = (uint16_t *)&ports; 22587c478bd9Sstevel@tonic-gate pports[0] = tcpha->tha_fport; 22597c478bd9Sstevel@tonic-gate pports[1] = tcpha->tha_lport; 22607c478bd9Sstevel@tonic-gate 2261*f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst, 2262*f4b3ec61Sdh ports, ipst)]; 22637c478bd9Sstevel@tonic-gate 22647c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); 22657c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL; 22667c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) { 22677c478bd9Sstevel@tonic-gate 22687c478bd9Sstevel@tonic-gate tcp = tconnp->conn_tcp; 22697c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP, 22707c478bd9Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) && 22717c478bd9Sstevel@tonic-gate tcp->tcp_state >= min_state && 22727c478bd9Sstevel@tonic-gate (tcp->tcp_bound_if == 0 || 22737c478bd9Sstevel@tonic-gate tcp->tcp_bound_if == ifindex)) { 22747c478bd9Sstevel@tonic-gate 22757c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp); 22767c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 22777c478bd9Sstevel@tonic-gate return (tconnp); 22787c478bd9Sstevel@tonic-gate } 22797c478bd9Sstevel@tonic-gate } 22807c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); 22817c478bd9Sstevel@tonic-gate return (NULL); 22827c478bd9Sstevel@tonic-gate } 22837c478bd9Sstevel@tonic-gate 22847c478bd9Sstevel@tonic-gate /* 228545916cd2Sjpk * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate 228645916cd2Sjpk * a listener when changing state. 22877c478bd9Sstevel@tonic-gate */ 22887c478bd9Sstevel@tonic-gate conn_t * 2289*f4b3ec61Sdh ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid, 2290*f4b3ec61Sdh ip_stack_t *ipst) 22917c478bd9Sstevel@tonic-gate { 22927c478bd9Sstevel@tonic-gate connf_t *bind_connfp; 22937c478bd9Sstevel@tonic-gate conn_t *connp; 22947c478bd9Sstevel@tonic-gate tcp_t *tcp; 22957c478bd9Sstevel@tonic-gate 22967c478bd9Sstevel@tonic-gate /* 22977c478bd9Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 22987c478bd9Sstevel@tonic-gate * all zeros. 22997c478bd9Sstevel@tonic-gate */ 23007c478bd9Sstevel@tonic-gate if (laddr == 0) 23017c478bd9Sstevel@tonic-gate return (NULL); 23027c478bd9Sstevel@tonic-gate 230345916cd2Sjpk ASSERT(zoneid != ALL_ZONES); 230445916cd2Sjpk 2305*f4b3ec61Sdh bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 23067c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 23077c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 23087c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 23097c478bd9Sstevel@tonic-gate tcp = connp->conn_tcp; 23107c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) && 23115d0bc3edSsommerfe IPCL_ZONE_MATCH(connp, zoneid) && 23127c478bd9Sstevel@tonic-gate (tcp->tcp_listener == NULL)) { 23137c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 23147c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 23157c478bd9Sstevel@tonic-gate return (connp); 23167c478bd9Sstevel@tonic-gate } 23177c478bd9Sstevel@tonic-gate } 23187c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 23197c478bd9Sstevel@tonic-gate return (NULL); 23207c478bd9Sstevel@tonic-gate } 23217c478bd9Sstevel@tonic-gate 232245916cd2Sjpk /* 232345916cd2Sjpk * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate 232445916cd2Sjpk * a listener when changing state. 232545916cd2Sjpk */ 23267c478bd9Sstevel@tonic-gate conn_t * 23277c478bd9Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex, 2328*f4b3ec61Sdh zoneid_t zoneid, ip_stack_t *ipst) 23297c478bd9Sstevel@tonic-gate { 23307c478bd9Sstevel@tonic-gate connf_t *bind_connfp; 23317c478bd9Sstevel@tonic-gate conn_t *connp = NULL; 23327c478bd9Sstevel@tonic-gate tcp_t *tcp; 23337c478bd9Sstevel@tonic-gate 23347c478bd9Sstevel@tonic-gate /* 23357c478bd9Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of 23367c478bd9Sstevel@tonic-gate * all zeros. 23377c478bd9Sstevel@tonic-gate */ 23387c478bd9Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 23397c478bd9Sstevel@tonic-gate return (NULL); 23407c478bd9Sstevel@tonic-gate 234145916cd2Sjpk ASSERT(zoneid != ALL_ZONES); 23427c478bd9Sstevel@tonic-gate 2343*f4b3ec61Sdh bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)]; 23447c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock); 23457c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL; 23467c478bd9Sstevel@tonic-gate connp = connp->conn_next) { 23477c478bd9Sstevel@tonic-gate tcp = connp->conn_tcp; 23487c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) && 23495d0bc3edSsommerfe IPCL_ZONE_MATCH(connp, zoneid) && 23507c478bd9Sstevel@tonic-gate (tcp->tcp_bound_if == 0 || 23517c478bd9Sstevel@tonic-gate tcp->tcp_bound_if == ifindex) && 23527c478bd9Sstevel@tonic-gate tcp->tcp_listener == NULL) { 23537c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); 23547c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 23557c478bd9Sstevel@tonic-gate return (connp); 23567c478bd9Sstevel@tonic-gate } 23577c478bd9Sstevel@tonic-gate } 23587c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock); 23597c478bd9Sstevel@tonic-gate return (NULL); 23607c478bd9Sstevel@tonic-gate } 23617c478bd9Sstevel@tonic-gate 2362ff550d0eSmasputra /* 2363ff550d0eSmasputra * ipcl_get_next_conn 2364ff550d0eSmasputra * get the next entry in the conn global list 2365ff550d0eSmasputra * and put a reference on the next_conn. 2366ff550d0eSmasputra * decrement the reference on the current conn. 2367ff550d0eSmasputra * 2368ff550d0eSmasputra * This is an iterator based walker function that also provides for 2369ff550d0eSmasputra * some selection by the caller. It walks through the conn_hash bucket 2370ff550d0eSmasputra * searching for the next valid connp in the list, and selects connections 2371ff550d0eSmasputra * that are neither closed nor condemned. It also REFHOLDS the conn 2372ff550d0eSmasputra * thus ensuring that the conn exists when the caller uses the conn. 2373ff550d0eSmasputra */ 2374ff550d0eSmasputra conn_t * 2375ff550d0eSmasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags) 2376ff550d0eSmasputra { 2377ff550d0eSmasputra conn_t *next_connp; 2378ff550d0eSmasputra 2379ff550d0eSmasputra if (connfp == NULL) 2380ff550d0eSmasputra return (NULL); 2381ff550d0eSmasputra 2382ff550d0eSmasputra mutex_enter(&connfp->connf_lock); 2383ff550d0eSmasputra 2384ff550d0eSmasputra next_connp = (connp == NULL) ? 2385ff550d0eSmasputra connfp->connf_head : connp->conn_g_next; 2386ff550d0eSmasputra 2387ff550d0eSmasputra while (next_connp != NULL) { 2388ff550d0eSmasputra mutex_enter(&next_connp->conn_lock); 2389ff550d0eSmasputra if (!(next_connp->conn_flags & conn_flags) || 2390ff550d0eSmasputra (next_connp->conn_state_flags & 2391ff550d0eSmasputra (CONN_CONDEMNED | CONN_INCIPIENT))) { 2392ff550d0eSmasputra /* 2393ff550d0eSmasputra * This conn has been condemned or 2394ff550d0eSmasputra * is closing, or the flags don't match 2395ff550d0eSmasputra */ 2396ff550d0eSmasputra mutex_exit(&next_connp->conn_lock); 2397ff550d0eSmasputra next_connp = next_connp->conn_g_next; 2398ff550d0eSmasputra continue; 2399ff550d0eSmasputra } 2400ff550d0eSmasputra CONN_INC_REF_LOCKED(next_connp); 2401ff550d0eSmasputra mutex_exit(&next_connp->conn_lock); 2402ff550d0eSmasputra break; 2403ff550d0eSmasputra } 2404ff550d0eSmasputra 2405ff550d0eSmasputra mutex_exit(&connfp->connf_lock); 2406ff550d0eSmasputra 2407ff550d0eSmasputra if (connp != NULL) 2408ff550d0eSmasputra CONN_DEC_REF(connp); 2409ff550d0eSmasputra 2410ff550d0eSmasputra return (next_connp); 2411ff550d0eSmasputra } 2412ff550d0eSmasputra 24137c478bd9Sstevel@tonic-gate #ifdef CONN_DEBUG 24147c478bd9Sstevel@tonic-gate /* 24157c478bd9Sstevel@tonic-gate * Trace of the last NBUF refhold/refrele 24167c478bd9Sstevel@tonic-gate */ 24177c478bd9Sstevel@tonic-gate int 24187c478bd9Sstevel@tonic-gate conn_trace_ref(conn_t *connp) 24197c478bd9Sstevel@tonic-gate { 24207c478bd9Sstevel@tonic-gate int last; 24217c478bd9Sstevel@tonic-gate conn_trace_t *ctb; 24227c478bd9Sstevel@tonic-gate 24237c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 24247c478bd9Sstevel@tonic-gate last = connp->conn_trace_last; 24257c478bd9Sstevel@tonic-gate last++; 24267c478bd9Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 24277c478bd9Sstevel@tonic-gate last = 0; 24287c478bd9Sstevel@tonic-gate 24297c478bd9Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 24307c478bd9Sstevel@tonic-gate ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 24317c478bd9Sstevel@tonic-gate connp->conn_trace_last = last; 24327c478bd9Sstevel@tonic-gate return (1); 24337c478bd9Sstevel@tonic-gate } 24347c478bd9Sstevel@tonic-gate 24357c478bd9Sstevel@tonic-gate int 24367c478bd9Sstevel@tonic-gate conn_untrace_ref(conn_t *connp) 24377c478bd9Sstevel@tonic-gate { 24387c478bd9Sstevel@tonic-gate int last; 24397c478bd9Sstevel@tonic-gate conn_trace_t *ctb; 24407c478bd9Sstevel@tonic-gate 24417c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock)); 24427c478bd9Sstevel@tonic-gate last = connp->conn_trace_last; 24437c478bd9Sstevel@tonic-gate last++; 24447c478bd9Sstevel@tonic-gate if (last == CONN_TRACE_MAX) 24457c478bd9Sstevel@tonic-gate last = 0; 24467c478bd9Sstevel@tonic-gate 24477c478bd9Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last]; 24487c478bd9Sstevel@tonic-gate ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH); 24497c478bd9Sstevel@tonic-gate connp->conn_trace_last = last; 24507c478bd9Sstevel@tonic-gate return (1); 24517c478bd9Sstevel@tonic-gate } 24527c478bd9Sstevel@tonic-gate #endif 2453