17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5ee4701baSericheng * Common Development and Distribution License (the "License").
6ee4701baSericheng * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
2266cd0f60SKacheong Poon * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
2378a2e113SAndy Fiddaman * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
24*041297c2SDan McDonald * Copyright 2022 Joyent, Inc.
257c478bd9Sstevel@tonic-gate */
267c478bd9Sstevel@tonic-gate
277c478bd9Sstevel@tonic-gate /*
287c478bd9Sstevel@tonic-gate * IP PACKET CLASSIFIER
297c478bd9Sstevel@tonic-gate *
307c478bd9Sstevel@tonic-gate * The IP packet classifier provides mapping between IP packets and persistent
317c478bd9Sstevel@tonic-gate * connection state for connection-oriented protocols. It also provides
327c478bd9Sstevel@tonic-gate * interface for managing connection states.
337c478bd9Sstevel@tonic-gate *
347c478bd9Sstevel@tonic-gate * The connection state is kept in conn_t data structure and contains, among
357c478bd9Sstevel@tonic-gate * other things:
367c478bd9Sstevel@tonic-gate *
377c478bd9Sstevel@tonic-gate * o local/remote address and ports
387c478bd9Sstevel@tonic-gate * o Transport protocol
397c478bd9Sstevel@tonic-gate * o squeue for the connection (for TCP only)
407c478bd9Sstevel@tonic-gate * o reference counter
417c478bd9Sstevel@tonic-gate * o Connection state
427c478bd9Sstevel@tonic-gate * o hash table linkage
437c478bd9Sstevel@tonic-gate * o interface/ire information
447c478bd9Sstevel@tonic-gate * o credentials
457c478bd9Sstevel@tonic-gate * o ipsec policy
467c478bd9Sstevel@tonic-gate * o send and receive functions.
477c478bd9Sstevel@tonic-gate * o mutex lock.
487c478bd9Sstevel@tonic-gate *
497c478bd9Sstevel@tonic-gate * Connections use a reference counting scheme. They are freed when the
507c478bd9Sstevel@tonic-gate * reference counter drops to zero. A reference is incremented when connection
517c478bd9Sstevel@tonic-gate * is placed in a list or table, when incoming packet for the connection arrives
527c478bd9Sstevel@tonic-gate * and when connection is processed via squeue (squeue processing may be
537c478bd9Sstevel@tonic-gate * asynchronous and the reference protects the connection from being destroyed
547c478bd9Sstevel@tonic-gate * before its processing is finished).
557c478bd9Sstevel@tonic-gate *
56bd670b35SErik Nordmark * conn_recv is used to pass up packets to the ULP.
57bd670b35SErik Nordmark * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
58bd670b35SErik Nordmark * a listener, and changes to tcp_input_listener as the listener has picked a
59bd670b35SErik Nordmark * good squeue. For other cases it is set to tcp_input_data.
60bd670b35SErik Nordmark *
61bd670b35SErik Nordmark * conn_recvicmp is used to pass up ICMP errors to the ULP.
627c478bd9Sstevel@tonic-gate *
637c478bd9Sstevel@tonic-gate * Classifier uses several hash tables:
647c478bd9Sstevel@tonic-gate *
6578a2e113SAndy Fiddaman * ipcl_conn_fanout: contains all TCP connections in CONNECTED state
667c478bd9Sstevel@tonic-gate * ipcl_bind_fanout: contains all connections in BOUND state
677c478bd9Sstevel@tonic-gate * ipcl_proto_fanout: IPv4 protocol fanout
687c478bd9Sstevel@tonic-gate * ipcl_proto_fanout_v6: IPv6 protocol fanout
697c478bd9Sstevel@tonic-gate * ipcl_udp_fanout: contains all UDP connections
702b24ab6bSSebastien Roy * ipcl_iptun_fanout: contains all IP tunnel connections
717c478bd9Sstevel@tonic-gate * ipcl_globalhash_fanout: contains all connections
727c478bd9Sstevel@tonic-gate *
737c478bd9Sstevel@tonic-gate * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
747c478bd9Sstevel@tonic-gate * which need to view all existing connections.
757c478bd9Sstevel@tonic-gate *
767c478bd9Sstevel@tonic-gate * All tables are protected by per-bucket locks. When both per-bucket lock and
777c478bd9Sstevel@tonic-gate * connection lock need to be held, the per-bucket lock should be acquired
787c478bd9Sstevel@tonic-gate * first, followed by the connection lock.
797c478bd9Sstevel@tonic-gate *
807c478bd9Sstevel@tonic-gate * All functions doing search in one of these tables increment a reference
817c478bd9Sstevel@tonic-gate * counter on the connection found (if any). This reference should be dropped
827c478bd9Sstevel@tonic-gate * when the caller has finished processing the connection.
837c478bd9Sstevel@tonic-gate *
847c478bd9Sstevel@tonic-gate *
857c478bd9Sstevel@tonic-gate * INTERFACES:
867c478bd9Sstevel@tonic-gate * ===========
877c478bd9Sstevel@tonic-gate *
887c478bd9Sstevel@tonic-gate * Connection Lookup:
897c478bd9Sstevel@tonic-gate * ------------------
907c478bd9Sstevel@tonic-gate *
91bd670b35SErik Nordmark * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
92bd670b35SErik Nordmark * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
937c478bd9Sstevel@tonic-gate *
947c478bd9Sstevel@tonic-gate * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
957c478bd9Sstevel@tonic-gate * it can't find any associated connection. If the connection is found, its
967c478bd9Sstevel@tonic-gate * reference counter is incremented.
977c478bd9Sstevel@tonic-gate *
987c478bd9Sstevel@tonic-gate * mp: mblock, containing packet header. The full header should fit
997c478bd9Sstevel@tonic-gate * into a single mblock. It should also contain at least full IP
1007c478bd9Sstevel@tonic-gate * and TCP or UDP header.
1017c478bd9Sstevel@tonic-gate *
1027c478bd9Sstevel@tonic-gate * protocol: Either IPPROTO_TCP or IPPROTO_UDP.
1037c478bd9Sstevel@tonic-gate *
1047c478bd9Sstevel@tonic-gate * hdr_len: The size of IP header. It is used to find TCP or UDP header in
1057c478bd9Sstevel@tonic-gate * the packet.
1067c478bd9Sstevel@tonic-gate *
10778a2e113SAndy Fiddaman * ira->ira_zoneid: The zone in which the returned connection must be; the
108bd670b35SErik Nordmark * zoneid corresponding to the ire_zoneid on the IRE located for
109bd670b35SErik Nordmark * the packet's destination address.
110bd670b35SErik Nordmark *
111bd670b35SErik Nordmark * ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
112bd670b35SErik Nordmark * IRAF_TX_SHARED_ADDR flags
1137c478bd9Sstevel@tonic-gate *
1147c478bd9Sstevel@tonic-gate * For TCP connections, the lookup order is as follows:
1157c478bd9Sstevel@tonic-gate * 5-tuple {src, dst, protocol, local port, remote port}
1167c478bd9Sstevel@tonic-gate * lookup in ipcl_conn_fanout table.
1177c478bd9Sstevel@tonic-gate * 3-tuple {dst, remote port, protocol} lookup in
1187c478bd9Sstevel@tonic-gate * ipcl_bind_fanout table.
1197c478bd9Sstevel@tonic-gate *
1207c478bd9Sstevel@tonic-gate * For UDP connections, a 5-tuple {src, dst, protocol, local port,
1217c478bd9Sstevel@tonic-gate * remote port} lookup is done on ipcl_udp_fanout. Note that,
1227c478bd9Sstevel@tonic-gate * these interfaces do not handle cases where a packets belongs
1237c478bd9Sstevel@tonic-gate * to multiple UDP clients, which is handled in IP itself.
1247c478bd9Sstevel@tonic-gate *
12545916cd2Sjpk * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
12645916cd2Sjpk * determine which actual zone gets the segment. This is used only in a
12745916cd2Sjpk * labeled environment. The matching rules are:
12845916cd2Sjpk *
12945916cd2Sjpk * - If it's not a multilevel port, then the label on the packet selects
13045916cd2Sjpk * the zone. Unlabeled packets are delivered to the global zone.
13145916cd2Sjpk *
13245916cd2Sjpk * - If it's a multilevel port, then only the zone registered to receive
13345916cd2Sjpk * packets on that port matches.
13445916cd2Sjpk *
13545916cd2Sjpk * Also, in a labeled environment, packet labels need to be checked. For fully
13645916cd2Sjpk * bound TCP connections, we can assume that the packet label was checked
13745916cd2Sjpk * during connection establishment, and doesn't need to be checked on each
13845916cd2Sjpk * packet. For others, though, we need to check for strict equality or, for
13945916cd2Sjpk * multilevel ports, membership in the range or set. This part currently does
14045916cd2Sjpk * a tnrh lookup on each packet, but could be optimized to use cached results
14145916cd2Sjpk * if that were necessary. (SCTP doesn't come through here, but if it did,
14245916cd2Sjpk * we would apply the same rules as TCP.)
14345916cd2Sjpk *
14445916cd2Sjpk * An implication of the above is that fully-bound TCP sockets must always use
14545916cd2Sjpk * distinct 4-tuples; they can't be discriminated by label alone.
14645916cd2Sjpk *
14745916cd2Sjpk * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
14845916cd2Sjpk * as there's no connection set-up handshake and no shared state.
14945916cd2Sjpk *
15045916cd2Sjpk * Labels on looped-back packets within a single zone do not need to be
15145916cd2Sjpk * checked, as all processes in the same zone have the same label.
15245916cd2Sjpk *
15345916cd2Sjpk * Finally, for unlabeled packets received by a labeled system, special rules
15445916cd2Sjpk * apply. We consider only the MLP if there is one. Otherwise, we prefer a
15545916cd2Sjpk * socket in the zone whose label matches the default label of the sender, if
15645916cd2Sjpk * any. In any event, the receiving socket must have SO_MAC_EXEMPT set and the
15745916cd2Sjpk * receiver's label must dominate the sender's default label.
15845916cd2Sjpk *
159bd670b35SErik Nordmark * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
160f4b3ec61Sdh * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
161f4b3ec61Sdh * ip_stack);
1627c478bd9Sstevel@tonic-gate *
1637c478bd9Sstevel@tonic-gate * Lookup routine to find a exact match for {src, dst, local port,
1647c478bd9Sstevel@tonic-gate * remote port) for TCP connections in ipcl_conn_fanout. The address and
1657c478bd9Sstevel@tonic-gate * ports are read from the IP and TCP header respectively.
1667c478bd9Sstevel@tonic-gate *
167f4b3ec61Sdh * conn_t *ipcl_lookup_listener_v4(lport, laddr, protocol,
168f4b3ec61Sdh * zoneid, ip_stack);
169f4b3ec61Sdh * conn_t *ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
170f4b3ec61Sdh * zoneid, ip_stack);
1717c478bd9Sstevel@tonic-gate *
17278a2e113SAndy Fiddaman * Lookup routine to find a listener with the tuple {lport, laddr,
17378a2e113SAndy Fiddaman * protocol} in the ipcl_bind_fanout table. For IPv6, an additional
17478a2e113SAndy Fiddaman * parameter interface index is also compared.
1757c478bd9Sstevel@tonic-gate *
176f4b3ec61Sdh * void ipcl_walk(func, arg, ip_stack)
1777c478bd9Sstevel@tonic-gate *
17878a2e113SAndy Fiddaman * Apply 'func' to every connection available. The 'func' is called as
1797c478bd9Sstevel@tonic-gate * (*func)(connp, arg). The walk is non-atomic so connections may be
1807c478bd9Sstevel@tonic-gate * created and destroyed during the walk. The CONN_CONDEMNED and
1817c478bd9Sstevel@tonic-gate * CONN_INCIPIENT flags ensure that connections which are newly created
1827c478bd9Sstevel@tonic-gate * or being destroyed are not selected by the walker.
1837c478bd9Sstevel@tonic-gate *
1847c478bd9Sstevel@tonic-gate * Table Updates
1857c478bd9Sstevel@tonic-gate * -------------
1867c478bd9Sstevel@tonic-gate *
187bd670b35SErik Nordmark * int ipcl_conn_insert(connp);
188bd670b35SErik Nordmark * int ipcl_conn_insert_v4(connp);
189bd670b35SErik Nordmark * int ipcl_conn_insert_v6(connp);
1907c478bd9Sstevel@tonic-gate *
1917c478bd9Sstevel@tonic-gate * Insert 'connp' in the ipcl_conn_fanout.
19278a2e113SAndy Fiddaman * Arguments :
1937c478bd9Sstevel@tonic-gate * connp conn_t to be inserted
1947c478bd9Sstevel@tonic-gate *
1957c478bd9Sstevel@tonic-gate * Return value :
1967c478bd9Sstevel@tonic-gate * 0 if connp was inserted
1977c478bd9Sstevel@tonic-gate * EADDRINUSE if the connection with the same tuple
1987c478bd9Sstevel@tonic-gate * already exists.
1997c478bd9Sstevel@tonic-gate *
200bd670b35SErik Nordmark * int ipcl_bind_insert(connp);
201bd670b35SErik Nordmark * int ipcl_bind_insert_v4(connp);
202bd670b35SErik Nordmark * int ipcl_bind_insert_v6(connp);
2037c478bd9Sstevel@tonic-gate *
20478a2e113SAndy Fiddaman * Insert 'connp' in ipcl_bind_fanout.
20578a2e113SAndy Fiddaman * Arguments :
20678a2e113SAndy Fiddaman * connp conn_t to be inserted
2077c478bd9Sstevel@tonic-gate *
2087c478bd9Sstevel@tonic-gate *
2097c478bd9Sstevel@tonic-gate * void ipcl_hash_remove(connp);
2107c478bd9Sstevel@tonic-gate *
21178a2e113SAndy Fiddaman * Removes the 'connp' from the connection fanout table.
2127c478bd9Sstevel@tonic-gate *
2137c478bd9Sstevel@tonic-gate * Connection Creation/Destruction
2147c478bd9Sstevel@tonic-gate * -------------------------------
2157c478bd9Sstevel@tonic-gate *
216f4b3ec61Sdh * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
2177c478bd9Sstevel@tonic-gate *
21878a2e113SAndy Fiddaman * Creates a new conn based on the type flag, inserts it into
21978a2e113SAndy Fiddaman * globalhash table.
2207c478bd9Sstevel@tonic-gate *
2217c478bd9Sstevel@tonic-gate * type: This flag determines the type of conn_t which needs to be
222fc80c0dfSnordmark * created i.e., which kmem_cache it comes from.
2237c478bd9Sstevel@tonic-gate * IPCL_TCPCONN indicates a TCP connection
224fc80c0dfSnordmark * IPCL_SCTPCONN indicates a SCTP connection
225fc80c0dfSnordmark * IPCL_UDPCONN indicates a UDP conn_t.
226fc80c0dfSnordmark * IPCL_RAWIPCONN indicates a RAWIP/ICMP conn_t.
227fc80c0dfSnordmark * IPCL_RTSCONN indicates a RTS conn_t.
228fc80c0dfSnordmark * IPCL_IPCCONN indicates all other connections.
2297c478bd9Sstevel@tonic-gate *
2307c478bd9Sstevel@tonic-gate * void ipcl_conn_destroy(connp)
2317c478bd9Sstevel@tonic-gate *
23278a2e113SAndy Fiddaman * Destroys the connection state, removes it from the global
23378a2e113SAndy Fiddaman * connection hash table and frees its memory.
2347c478bd9Sstevel@tonic-gate */
2357c478bd9Sstevel@tonic-gate
2367c478bd9Sstevel@tonic-gate #include <sys/types.h>
2377c478bd9Sstevel@tonic-gate #include <sys/stream.h>
2387c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
2397c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
2407c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
2417c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
2427c478bd9Sstevel@tonic-gate #define _SUN_TPI_VERSION 2
2437c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
2447c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
2457c478bd9Sstevel@tonic-gate #include <sys/debug.h>
2467c478bd9Sstevel@tonic-gate
2477c478bd9Sstevel@tonic-gate #include <sys/systm.h>
2487c478bd9Sstevel@tonic-gate #include <sys/param.h>
2497c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
2507c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h>
2517c478bd9Sstevel@tonic-gate #include <inet/common.h>
2527c478bd9Sstevel@tonic-gate #include <netinet/ip6.h>
2537c478bd9Sstevel@tonic-gate #include <netinet/icmp6.h>
2547c478bd9Sstevel@tonic-gate
2557c478bd9Sstevel@tonic-gate #include <inet/ip.h>
256bd670b35SErik Nordmark #include <inet/ip_if.h>
257bd670b35SErik Nordmark #include <inet/ip_ire.h>
2587c478bd9Sstevel@tonic-gate #include <inet/ip6.h>
2597c478bd9Sstevel@tonic-gate #include <inet/ip_ndp.h>
2600f1702c5SYu Xiangning #include <inet/ip_impl.h>
261ff550d0eSmasputra #include <inet/udp_impl.h>
2627c478bd9Sstevel@tonic-gate #include <inet/sctp_ip.h>
263f4b3ec61Sdh #include <inet/sctp/sctp_impl.h>
264fc80c0dfSnordmark #include <inet/rawip_impl.h>
265fc80c0dfSnordmark #include <inet/rts_impl.h>
2662b24ab6bSSebastien Roy #include <inet/iptun/iptun_impl.h>
2677c478bd9Sstevel@tonic-gate
2687c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
2697c478bd9Sstevel@tonic-gate
2707c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h>
2710f1702c5SYu Xiangning #include <inet/tcp.h>
2727c478bd9Sstevel@tonic-gate #include <inet/ipsec_impl.h>
2737c478bd9Sstevel@tonic-gate
27445916cd2Sjpk #include <sys/tsol/tnet.h>
2750f1702c5SYu Xiangning #include <sys/sockio.h>
27645916cd2Sjpk
277f4b3ec61Sdh /* Old value for compatibility. Setable in /etc/system */
2787c478bd9Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0;
2797c478bd9Sstevel@tonic-gate
280f4b3ec61Sdh /* New value. Zero means choose automatically. Setable in /etc/system */
2817c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0;
2827c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192;
2837c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500;
2847c478bd9Sstevel@tonic-gate
2857c478bd9Sstevel@tonic-gate /* bind/udp fanout table size */
2867c478bd9Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512;
287ee4701baSericheng uint_t ipcl_udp_fanout_size = 16384;
2887c478bd9Sstevel@tonic-gate
2897c478bd9Sstevel@tonic-gate /* Raw socket fanout size. Must be a power of 2. */
2907c478bd9Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256;
2917c478bd9Sstevel@tonic-gate
2922b24ab6bSSebastien Roy /*
2932b24ab6bSSebastien Roy * The IPCL_IPTUN_HASH() function works best with a prime table size. We
2942b24ab6bSSebastien Roy * expect that most large deployments would have hundreds of tunnels, and
2952b24ab6bSSebastien Roy * thousands in the extreme case.
2962b24ab6bSSebastien Roy */
2972b24ab6bSSebastien Roy uint_t ipcl_iptun_fanout_size = 6143;
2982b24ab6bSSebastien Roy
2997c478bd9Sstevel@tonic-gate /*
3007c478bd9Sstevel@tonic-gate * Power of 2^N Primes useful for hashing for N of 0-28,
3017c478bd9Sstevel@tonic-gate * these primes are the nearest prime <= 2^N - 2^(N-2).
3027c478bd9Sstevel@tonic-gate */
3037c478bd9Sstevel@tonic-gate
3047c478bd9Sstevel@tonic-gate #define P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067, \
3057c478bd9Sstevel@tonic-gate 6143, 12281, 24571, 49139, 98299, 196597, 393209, \
3067c478bd9Sstevel@tonic-gate 786431, 1572853, 3145721, 6291449, 12582893, 25165813, \
3077c478bd9Sstevel@tonic-gate 50331599, 100663291, 201326557, 0}
3087c478bd9Sstevel@tonic-gate
3097c478bd9Sstevel@tonic-gate /*
310fc80c0dfSnordmark * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
311fc80c0dfSnordmark * are aligned on cache lines.
3127c478bd9Sstevel@tonic-gate */
313fc80c0dfSnordmark typedef union itc_s {
314fc80c0dfSnordmark conn_t itc_conn;
315fc80c0dfSnordmark char itcu_filler[CACHE_ALIGN(conn_s)];
3167c478bd9Sstevel@tonic-gate } itc_t;
3177c478bd9Sstevel@tonic-gate
318fc80c0dfSnordmark struct kmem_cache *tcp_conn_cache;
319fc80c0dfSnordmark struct kmem_cache *ip_conn_cache;
3207c478bd9Sstevel@tonic-gate extern struct kmem_cache *sctp_conn_cache;
321fc80c0dfSnordmark struct kmem_cache *udp_conn_cache;
322fc80c0dfSnordmark struct kmem_cache *rawip_conn_cache;
323fc80c0dfSnordmark struct kmem_cache *rts_conn_cache;
3247c478bd9Sstevel@tonic-gate
3257c478bd9Sstevel@tonic-gate extern void tcp_timermp_free(tcp_t *);
3267c478bd9Sstevel@tonic-gate extern mblk_t *tcp_timermp_alloc(int);
3277c478bd9Sstevel@tonic-gate
328fc80c0dfSnordmark static int ip_conn_constructor(void *, void *, int);
329fc80c0dfSnordmark static void ip_conn_destructor(void *, void *);
330fc80c0dfSnordmark
331fc80c0dfSnordmark static int tcp_conn_constructor(void *, void *, int);
332fc80c0dfSnordmark static void tcp_conn_destructor(void *, void *);
333fc80c0dfSnordmark
334fc80c0dfSnordmark static int udp_conn_constructor(void *, void *, int);
335fc80c0dfSnordmark static void udp_conn_destructor(void *, void *);
336fc80c0dfSnordmark
337fc80c0dfSnordmark static int rawip_conn_constructor(void *, void *, int);
338fc80c0dfSnordmark static void rawip_conn_destructor(void *, void *);
339fc80c0dfSnordmark
340fc80c0dfSnordmark static int rts_conn_constructor(void *, void *, int);
341fc80c0dfSnordmark static void rts_conn_destructor(void *, void *);
3427c478bd9Sstevel@tonic-gate
3437c478bd9Sstevel@tonic-gate /*
344f4b3ec61Sdh * Global (for all stack instances) init routine
3457c478bd9Sstevel@tonic-gate */
3467c478bd9Sstevel@tonic-gate void
ipcl_g_init(void)347f4b3ec61Sdh ipcl_g_init(void)
3487c478bd9Sstevel@tonic-gate {
349fc80c0dfSnordmark ip_conn_cache = kmem_cache_create("ip_conn_cache",
3507c478bd9Sstevel@tonic-gate sizeof (conn_t), CACHE_ALIGN_SIZE,
351fc80c0dfSnordmark ip_conn_constructor, ip_conn_destructor,
352fc80c0dfSnordmark NULL, NULL, NULL, 0);
353fc80c0dfSnordmark
354fc80c0dfSnordmark tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
355fc80c0dfSnordmark sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
356fc80c0dfSnordmark tcp_conn_constructor, tcp_conn_destructor,
35793fcb0b9SKacheong Poon tcp_conn_reclaim, NULL, NULL, 0);
358fc80c0dfSnordmark
359fc80c0dfSnordmark udp_conn_cache = kmem_cache_create("udp_conn_cache",
360fc80c0dfSnordmark sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
361fc80c0dfSnordmark udp_conn_constructor, udp_conn_destructor,
362fc80c0dfSnordmark NULL, NULL, NULL, 0);
3637c478bd9Sstevel@tonic-gate
364fc80c0dfSnordmark rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
365fc80c0dfSnordmark sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
366fc80c0dfSnordmark rawip_conn_constructor, rawip_conn_destructor,
367fc80c0dfSnordmark NULL, NULL, NULL, 0);
368fc80c0dfSnordmark
369fc80c0dfSnordmark rts_conn_cache = kmem_cache_create("rts_conn_cache",
370fc80c0dfSnordmark sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
371fc80c0dfSnordmark rts_conn_constructor, rts_conn_destructor,
3727c478bd9Sstevel@tonic-gate NULL, NULL, NULL, 0);
373f4b3ec61Sdh }
374f4b3ec61Sdh
375f4b3ec61Sdh /*
376f4b3ec61Sdh * ipclassifier intialization routine, sets up hash tables.
377f4b3ec61Sdh */
378f4b3ec61Sdh void
ipcl_init(ip_stack_t * ipst)379f4b3ec61Sdh ipcl_init(ip_stack_t *ipst)
380f4b3ec61Sdh {
381f4b3ec61Sdh int i;
382f4b3ec61Sdh int sizes[] = P2Ps();
3837c478bd9Sstevel@tonic-gate
3847c478bd9Sstevel@tonic-gate /*
385f4b3ec61Sdh * Calculate size of conn fanout table from /etc/system settings
3867c478bd9Sstevel@tonic-gate */
3877c478bd9Sstevel@tonic-gate if (ipcl_conn_hash_size != 0) {
388f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
3897c478bd9Sstevel@tonic-gate } else if (tcp_conn_hash_size != 0) {
390f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
3917c478bd9Sstevel@tonic-gate } else {
3927c478bd9Sstevel@tonic-gate extern pgcnt_t freemem;
3937c478bd9Sstevel@tonic-gate
394f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size =
3957c478bd9Sstevel@tonic-gate (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
3967c478bd9Sstevel@tonic-gate
397f4b3ec61Sdh if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
398f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size =
399f4b3ec61Sdh ipcl_conn_hash_maxsize;
400f4b3ec61Sdh }
4017c478bd9Sstevel@tonic-gate }
4027c478bd9Sstevel@tonic-gate
4037c478bd9Sstevel@tonic-gate for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
404f4b3ec61Sdh if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
4057c478bd9Sstevel@tonic-gate break;
4067c478bd9Sstevel@tonic-gate }
4077c478bd9Sstevel@tonic-gate }
408f4b3ec61Sdh if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
4097c478bd9Sstevel@tonic-gate /* Out of range, use the 2^16 value */
410f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size = sizes[16];
4117c478bd9Sstevel@tonic-gate }
4127c478bd9Sstevel@tonic-gate
413f4b3ec61Sdh /* Take values from /etc/system */
414f4b3ec61Sdh ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
415f4b3ec61Sdh ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
416f4b3ec61Sdh ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
4172b24ab6bSSebastien Roy ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
418f4b3ec61Sdh
419f4b3ec61Sdh ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
420f4b3ec61Sdh
421f4b3ec61Sdh ipst->ips_ipcl_conn_fanout = kmem_zalloc(
422f4b3ec61Sdh ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
423f4b3ec61Sdh
424f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
425f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
4267c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL);
4277c478bd9Sstevel@tonic-gate }
4287c478bd9Sstevel@tonic-gate
429f4b3ec61Sdh ipst->ips_ipcl_bind_fanout = kmem_zalloc(
430f4b3ec61Sdh ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
4317c478bd9Sstevel@tonic-gate
432f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
433f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
4347c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL);
4357c478bd9Sstevel@tonic-gate }
4367c478bd9Sstevel@tonic-gate
437bd670b35SErik Nordmark ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
438f4b3ec61Sdh sizeof (connf_t), KM_SLEEP);
439f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) {
440bd670b35SErik Nordmark mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
4417c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL);
4427c478bd9Sstevel@tonic-gate }
443f4b3ec61Sdh
444f4b3ec61Sdh ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
445f4b3ec61Sdh sizeof (connf_t), KM_SLEEP);
446f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) {
447f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
4487c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL);
4497c478bd9Sstevel@tonic-gate }
4507c478bd9Sstevel@tonic-gate
451f4b3ec61Sdh ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
452f4b3ec61Sdh mutex_init(&ipst->ips_rts_clients->connf_lock,
453f4b3ec61Sdh NULL, MUTEX_DEFAULT, NULL);
4547c478bd9Sstevel@tonic-gate
455f4b3ec61Sdh ipst->ips_ipcl_udp_fanout = kmem_zalloc(
456f4b3ec61Sdh ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
457f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
458f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
4597c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL);
4607c478bd9Sstevel@tonic-gate }
4617c478bd9Sstevel@tonic-gate
4622b24ab6bSSebastien Roy ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
4632b24ab6bSSebastien Roy ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
4642b24ab6bSSebastien Roy for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
4652b24ab6bSSebastien Roy mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
4662b24ab6bSSebastien Roy MUTEX_DEFAULT, NULL);
4672b24ab6bSSebastien Roy }
4682b24ab6bSSebastien Roy
469f4b3ec61Sdh ipst->ips_ipcl_raw_fanout = kmem_zalloc(
470f4b3ec61Sdh ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
471f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
472f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
4737c478bd9Sstevel@tonic-gate MUTEX_DEFAULT, NULL);
4747c478bd9Sstevel@tonic-gate }
4757c478bd9Sstevel@tonic-gate
476f4b3ec61Sdh ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
477f4b3ec61Sdh sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
4787c478bd9Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) {
479f4b3ec61Sdh mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
480f4b3ec61Sdh NULL, MUTEX_DEFAULT, NULL);
4817c478bd9Sstevel@tonic-gate }
4827c478bd9Sstevel@tonic-gate }
4837c478bd9Sstevel@tonic-gate
4847c478bd9Sstevel@tonic-gate void
ipcl_g_destroy(void)485f4b3ec61Sdh ipcl_g_destroy(void)
4867c478bd9Sstevel@tonic-gate {
487fc80c0dfSnordmark kmem_cache_destroy(ip_conn_cache);
488fc80c0dfSnordmark kmem_cache_destroy(tcp_conn_cache);
489fc80c0dfSnordmark kmem_cache_destroy(udp_conn_cache);
490fc80c0dfSnordmark kmem_cache_destroy(rawip_conn_cache);
491fc80c0dfSnordmark kmem_cache_destroy(rts_conn_cache);
492f4b3ec61Sdh }
493f4b3ec61Sdh
494f4b3ec61Sdh /*
495f4b3ec61Sdh * All user-level and kernel use of the stack must be gone
496f4b3ec61Sdh * by now.
497f4b3ec61Sdh */
498f4b3ec61Sdh void
ipcl_destroy(ip_stack_t * ipst)499f4b3ec61Sdh ipcl_destroy(ip_stack_t *ipst)
500f4b3ec61Sdh {
501f4b3ec61Sdh int i;
502f4b3ec61Sdh
503f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
504f4b3ec61Sdh ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
505f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
506f4b3ec61Sdh }
507f4b3ec61Sdh kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
508f4b3ec61Sdh sizeof (connf_t));
509f4b3ec61Sdh ipst->ips_ipcl_conn_fanout = NULL;
510f4b3ec61Sdh
511f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
512f4b3ec61Sdh ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
513f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
514f4b3ec61Sdh }
515f4b3ec61Sdh kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
516f4b3ec61Sdh sizeof (connf_t));
517f4b3ec61Sdh ipst->ips_ipcl_bind_fanout = NULL;
518f4b3ec61Sdh
519f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) {
520bd670b35SErik Nordmark ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
521bd670b35SErik Nordmark mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
522f4b3ec61Sdh }
523bd670b35SErik Nordmark kmem_free(ipst->ips_ipcl_proto_fanout_v4,
524bd670b35SErik Nordmark IPPROTO_MAX * sizeof (connf_t));
525bd670b35SErik Nordmark ipst->ips_ipcl_proto_fanout_v4 = NULL;
526f4b3ec61Sdh
527f4b3ec61Sdh for (i = 0; i < IPPROTO_MAX; i++) {
528f4b3ec61Sdh ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
529f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
530f4b3ec61Sdh }
531f4b3ec61Sdh kmem_free(ipst->ips_ipcl_proto_fanout_v6,
532f4b3ec61Sdh IPPROTO_MAX * sizeof (connf_t));
533f4b3ec61Sdh ipst->ips_ipcl_proto_fanout_v6 = NULL;
534f4b3ec61Sdh
535f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
536f4b3ec61Sdh ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
537f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
538f4b3ec61Sdh }
539f4b3ec61Sdh kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
540f4b3ec61Sdh sizeof (connf_t));
541f4b3ec61Sdh ipst->ips_ipcl_udp_fanout = NULL;
542f4b3ec61Sdh
5432b24ab6bSSebastien Roy for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
5442b24ab6bSSebastien Roy ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
5452b24ab6bSSebastien Roy mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
5462b24ab6bSSebastien Roy }
5472b24ab6bSSebastien Roy kmem_free(ipst->ips_ipcl_iptun_fanout,
5482b24ab6bSSebastien Roy ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
5492b24ab6bSSebastien Roy ipst->ips_ipcl_iptun_fanout = NULL;
5502b24ab6bSSebastien Roy
551f4b3ec61Sdh for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
552f4b3ec61Sdh ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
553f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
554f4b3ec61Sdh }
555f4b3ec61Sdh kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
556f4b3ec61Sdh sizeof (connf_t));
557f4b3ec61Sdh ipst->ips_ipcl_raw_fanout = NULL;
558f4b3ec61Sdh
559f4b3ec61Sdh for (i = 0; i < CONN_G_HASH_SIZE; i++) {
560f4b3ec61Sdh ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
561f4b3ec61Sdh mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
562f4b3ec61Sdh }
563f4b3ec61Sdh kmem_free(ipst->ips_ipcl_globalhash_fanout,
564f4b3ec61Sdh sizeof (connf_t) * CONN_G_HASH_SIZE);
565f4b3ec61Sdh ipst->ips_ipcl_globalhash_fanout = NULL;
566f4b3ec61Sdh
567f4b3ec61Sdh ASSERT(ipst->ips_rts_clients->connf_head == NULL);
568f4b3ec61Sdh mutex_destroy(&ipst->ips_rts_clients->connf_lock);
569f4b3ec61Sdh kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
570f4b3ec61Sdh ipst->ips_rts_clients = NULL;
5717c478bd9Sstevel@tonic-gate }
5727c478bd9Sstevel@tonic-gate
5737c478bd9Sstevel@tonic-gate /*
5747c478bd9Sstevel@tonic-gate * conn creation routine. initialize the conn, sets the reference
5757c478bd9Sstevel@tonic-gate * and inserts it in the global hash table.
5767c478bd9Sstevel@tonic-gate */
5777c478bd9Sstevel@tonic-gate conn_t *
ipcl_conn_create(uint32_t type,int sleep,netstack_t * ns)578f4b3ec61Sdh ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
5797c478bd9Sstevel@tonic-gate {
5807c478bd9Sstevel@tonic-gate conn_t *connp;
581fc80c0dfSnordmark struct kmem_cache *conn_cache;
5827c478bd9Sstevel@tonic-gate
5837c478bd9Sstevel@tonic-gate switch (type) {
5847c478bd9Sstevel@tonic-gate case IPCL_SCTPCONN:
5857c478bd9Sstevel@tonic-gate if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
5867c478bd9Sstevel@tonic-gate return (NULL);
587121e5416Skcpoon sctp_conn_init(connp);
588f4b3ec61Sdh netstack_hold(ns);
589f4b3ec61Sdh connp->conn_netstack = ns;
590bd670b35SErik Nordmark connp->conn_ixa->ixa_ipst = ns->netstack_ip;
5919cd928feSAlan Maguire connp->conn_ixa->ixa_conn_id = (long)connp;
592bd670b35SErik Nordmark ipcl_globalhash_insert(connp);
593fc80c0dfSnordmark return (connp);
594fc80c0dfSnordmark
595fc80c0dfSnordmark case IPCL_TCPCONN:
596fc80c0dfSnordmark conn_cache = tcp_conn_cache;
5977c478bd9Sstevel@tonic-gate break;
598fc80c0dfSnordmark
599fc80c0dfSnordmark case IPCL_UDPCONN:
600fc80c0dfSnordmark conn_cache = udp_conn_cache;
601fc80c0dfSnordmark break;
602fc80c0dfSnordmark
603fc80c0dfSnordmark case IPCL_RAWIPCONN:
604fc80c0dfSnordmark conn_cache = rawip_conn_cache;
605fc80c0dfSnordmark break;
606fc80c0dfSnordmark
607fc80c0dfSnordmark case IPCL_RTSCONN:
608fc80c0dfSnordmark conn_cache = rts_conn_cache;
609fc80c0dfSnordmark break;
610fc80c0dfSnordmark
6117c478bd9Sstevel@tonic-gate case IPCL_IPCCONN:
612fc80c0dfSnordmark conn_cache = ip_conn_cache;
6137c478bd9Sstevel@tonic-gate break;
614fc80c0dfSnordmark
615ff550d0eSmasputra default:
616ab82c29bSToomas Soome conn_cache = NULL;
617ff550d0eSmasputra connp = NULL;
618ff550d0eSmasputra ASSERT(0);
6197c478bd9Sstevel@tonic-gate }
6207c478bd9Sstevel@tonic-gate
621fc80c0dfSnordmark if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
622fc80c0dfSnordmark return (NULL);
623fc80c0dfSnordmark
624fc80c0dfSnordmark connp->conn_ref = 1;
625fc80c0dfSnordmark netstack_hold(ns);
626fc80c0dfSnordmark connp->conn_netstack = ns;
627bd670b35SErik Nordmark connp->conn_ixa->ixa_ipst = ns->netstack_ip;
6289cd928feSAlan Maguire connp->conn_ixa->ixa_conn_id = (long)connp;
629fc80c0dfSnordmark ipcl_globalhash_insert(connp);
6307c478bd9Sstevel@tonic-gate return (connp);
6317c478bd9Sstevel@tonic-gate }
6327c478bd9Sstevel@tonic-gate
6337c478bd9Sstevel@tonic-gate void
ipcl_conn_destroy(conn_t * connp)6347c478bd9Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp)
6357c478bd9Sstevel@tonic-gate {
6367c478bd9Sstevel@tonic-gate mblk_t *mp;
637f4b3ec61Sdh netstack_t *ns = connp->conn_netstack;
6387c478bd9Sstevel@tonic-gate
6397c478bd9Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&connp->conn_lock));
6407c478bd9Sstevel@tonic-gate ASSERT(connp->conn_ref == 0);
641a5628610SRamesh Kumar Katla ASSERT(connp->conn_ioctlref == 0);
6427c478bd9Sstevel@tonic-gate
643fab254e2SAruna Ramakrishna DTRACE_PROBE1(conn__destroy, conn_t *, connp);
644fab254e2SAruna Ramakrishna
64545916cd2Sjpk if (connp->conn_cred != NULL) {
64645916cd2Sjpk crfree(connp->conn_cred);
64745916cd2Sjpk connp->conn_cred = NULL;
64876a1033eSErik Nordmark /* ixa_cred done in ipcl_conn_cleanup below */
64945916cd2Sjpk }
65045916cd2Sjpk
651bd670b35SErik Nordmark if (connp->conn_ht_iphc != NULL) {
652bd670b35SErik Nordmark kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
653bd670b35SErik Nordmark connp->conn_ht_iphc = NULL;
654bd670b35SErik Nordmark connp->conn_ht_iphc_allocated = 0;
655bd670b35SErik Nordmark connp->conn_ht_iphc_len = 0;
656bd670b35SErik Nordmark connp->conn_ht_ulp = NULL;
657bd670b35SErik Nordmark connp->conn_ht_ulp_len = 0;
658bd670b35SErik Nordmark }
659bd670b35SErik Nordmark ip_pkt_free(&connp->conn_xmit_ipp);
660bd670b35SErik Nordmark
6617c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(connp);
6627c478bd9Sstevel@tonic-gate
663bd670b35SErik Nordmark if (connp->conn_latch != NULL) {
664bd670b35SErik Nordmark IPLATCH_REFRELE(connp->conn_latch);
665bd670b35SErik Nordmark connp->conn_latch = NULL;
666bd670b35SErik Nordmark }
667bd670b35SErik Nordmark if (connp->conn_latch_in_policy != NULL) {
668bd670b35SErik Nordmark IPPOL_REFRELE(connp->conn_latch_in_policy);
669bd670b35SErik Nordmark connp->conn_latch_in_policy = NULL;
670bd670b35SErik Nordmark }
671bd670b35SErik Nordmark if (connp->conn_latch_in_action != NULL) {
672bd670b35SErik Nordmark IPACT_REFRELE(connp->conn_latch_in_action);
673bd670b35SErik Nordmark connp->conn_latch_in_action = NULL;
674bd670b35SErik Nordmark }
675bd670b35SErik Nordmark if (connp->conn_policy != NULL) {
676bd670b35SErik Nordmark IPPH_REFRELE(connp->conn_policy, ns);
677bd670b35SErik Nordmark connp->conn_policy = NULL;
678bd670b35SErik Nordmark }
679bd670b35SErik Nordmark
680bd670b35SErik Nordmark if (connp->conn_ipsec_opt_mp != NULL) {
681bd670b35SErik Nordmark freemsg(connp->conn_ipsec_opt_mp);
682bd670b35SErik Nordmark connp->conn_ipsec_opt_mp = NULL;
683bd670b35SErik Nordmark }
684bd670b35SErik Nordmark
6857c478bd9Sstevel@tonic-gate if (connp->conn_flags & IPCL_TCPCONN) {
686bd670b35SErik Nordmark tcp_t *tcp = connp->conn_tcp;
687ff550d0eSmasputra
6887c478bd9Sstevel@tonic-gate tcp_free(tcp);
6897c478bd9Sstevel@tonic-gate mp = tcp->tcp_timercache;
690bd670b35SErik Nordmark
691bd670b35SErik Nordmark tcp->tcp_tcps = NULL;
6927c478bd9Sstevel@tonic-gate
693f7f8e53dSKacheong Poon /*
694f7f8e53dSKacheong Poon * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
695f7f8e53dSKacheong Poon * the mblk.
696f7f8e53dSKacheong Poon */
697f7f8e53dSKacheong Poon if (tcp->tcp_rsrv_mp != NULL) {
698f7f8e53dSKacheong Poon freeb(tcp->tcp_rsrv_mp);
699f7f8e53dSKacheong Poon tcp->tcp_rsrv_mp = NULL;
700f7f8e53dSKacheong Poon mutex_destroy(&tcp->tcp_rsrv_mp_lock);
701f7f8e53dSKacheong Poon }
702f7f8e53dSKacheong Poon
703bd670b35SErik Nordmark ipcl_conn_cleanup(connp);
704bd670b35SErik Nordmark connp->conn_flags = IPCL_TCPCONN;
705f4b3ec61Sdh if (ns != NULL) {
706f4b3ec61Sdh ASSERT(tcp->tcp_tcps == NULL);
707f4b3ec61Sdh connp->conn_netstack = NULL;
708bd670b35SErik Nordmark connp->conn_ixa->ixa_ipst = NULL;
709f4b3ec61Sdh netstack_rele(ns);
710f4b3ec61Sdh }
711fc80c0dfSnordmark
712fc80c0dfSnordmark bzero(tcp, sizeof (tcp_t));
713fc80c0dfSnordmark
714fc80c0dfSnordmark tcp->tcp_timercache = mp;
715fc80c0dfSnordmark tcp->tcp_connp = connp;
716fc80c0dfSnordmark kmem_cache_free(tcp_conn_cache, connp);
717fc80c0dfSnordmark return;
718fc80c0dfSnordmark }
719fc80c0dfSnordmark
720fc80c0dfSnordmark if (connp->conn_flags & IPCL_SCTPCONN) {
721f4b3ec61Sdh ASSERT(ns != NULL);
7227c478bd9Sstevel@tonic-gate sctp_free(connp);
723fc80c0dfSnordmark return;
724fc80c0dfSnordmark }
725fc80c0dfSnordmark
726bd670b35SErik Nordmark ipcl_conn_cleanup(connp);
727fc80c0dfSnordmark if (ns != NULL) {
728fc80c0dfSnordmark connp->conn_netstack = NULL;
729bd670b35SErik Nordmark connp->conn_ixa->ixa_ipst = NULL;
730fc80c0dfSnordmark netstack_rele(ns);
731fc80c0dfSnordmark }
7320f1702c5SYu Xiangning
733fc80c0dfSnordmark /* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
734fc80c0dfSnordmark if (connp->conn_flags & IPCL_UDPCONN) {
735fc80c0dfSnordmark connp->conn_flags = IPCL_UDPCONN;
736fc80c0dfSnordmark kmem_cache_free(udp_conn_cache, connp);
737fc80c0dfSnordmark } else if (connp->conn_flags & IPCL_RAWIPCONN) {
738fc80c0dfSnordmark connp->conn_flags = IPCL_RAWIPCONN;
739bd670b35SErik Nordmark connp->conn_proto = IPPROTO_ICMP;
740bd670b35SErik Nordmark connp->conn_ixa->ixa_protocol = connp->conn_proto;
741fc80c0dfSnordmark kmem_cache_free(rawip_conn_cache, connp);
742fc80c0dfSnordmark } else if (connp->conn_flags & IPCL_RTSCONN) {
743fc80c0dfSnordmark connp->conn_flags = IPCL_RTSCONN;
744fc80c0dfSnordmark kmem_cache_free(rts_conn_cache, connp);
7457c478bd9Sstevel@tonic-gate } else {
746fc80c0dfSnordmark connp->conn_flags = IPCL_IPCCONN;
747fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_IPCCONN);
748fc80c0dfSnordmark ASSERT(connp->conn_priv == NULL);
749fc80c0dfSnordmark kmem_cache_free(ip_conn_cache, connp);
7507c478bd9Sstevel@tonic-gate }
7517c478bd9Sstevel@tonic-gate }
7527c478bd9Sstevel@tonic-gate
7537c478bd9Sstevel@tonic-gate /*
7547c478bd9Sstevel@tonic-gate * Running in cluster mode - deregister listener information
7557c478bd9Sstevel@tonic-gate */
7567c478bd9Sstevel@tonic-gate static void
ipcl_conn_unlisten(conn_t * connp)7577c478bd9Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp)
7587c478bd9Sstevel@tonic-gate {
7597c478bd9Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
7607c478bd9Sstevel@tonic-gate ASSERT(connp->conn_lport != 0);
7617c478bd9Sstevel@tonic-gate
7627c478bd9Sstevel@tonic-gate if (cl_inet_unlisten != NULL) {
7637c478bd9Sstevel@tonic-gate sa_family_t addr_family;
7647c478bd9Sstevel@tonic-gate uint8_t *laddrp;
7657c478bd9Sstevel@tonic-gate
766bd670b35SErik Nordmark if (connp->conn_ipversion == IPV6_VERSION) {
7677c478bd9Sstevel@tonic-gate addr_family = AF_INET6;
768bd670b35SErik Nordmark laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
7697c478bd9Sstevel@tonic-gate } else {
7707c478bd9Sstevel@tonic-gate addr_family = AF_INET;
771bd670b35SErik Nordmark laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
7727c478bd9Sstevel@tonic-gate }
7738e4b770fSLu Huafeng (*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
7748e4b770fSLu Huafeng IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
7757c478bd9Sstevel@tonic-gate }
7767c478bd9Sstevel@tonic-gate connp->conn_flags &= ~IPCL_CL_LISTENER;
7777c478bd9Sstevel@tonic-gate }
7787c478bd9Sstevel@tonic-gate
7797c478bd9Sstevel@tonic-gate /*
7807c478bd9Sstevel@tonic-gate * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
7817c478bd9Sstevel@tonic-gate * which table the conn belonged to). So for debugging we can see which hash
7827c478bd9Sstevel@tonic-gate * table this connection was in.
7837c478bd9Sstevel@tonic-gate */
7847c478bd9Sstevel@tonic-gate #define IPCL_HASH_REMOVE(connp) { \
7857c478bd9Sstevel@tonic-gate connf_t *connfp = (connp)->conn_fanout; \
7867c478bd9Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&((connp)->conn_lock))); \
7877c478bd9Sstevel@tonic-gate if (connfp != NULL) { \
7887c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock); \
7897c478bd9Sstevel@tonic-gate if ((connp)->conn_next != NULL) \
7907c478bd9Sstevel@tonic-gate (connp)->conn_next->conn_prev = \
7917c478bd9Sstevel@tonic-gate (connp)->conn_prev; \
7927c478bd9Sstevel@tonic-gate if ((connp)->conn_prev != NULL) \
7937c478bd9Sstevel@tonic-gate (connp)->conn_prev->conn_next = \
7947c478bd9Sstevel@tonic-gate (connp)->conn_next; \
7957c478bd9Sstevel@tonic-gate else \
7967c478bd9Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next; \
7977c478bd9Sstevel@tonic-gate (connp)->conn_fanout = NULL; \
7987c478bd9Sstevel@tonic-gate (connp)->conn_next = NULL; \
7997c478bd9Sstevel@tonic-gate (connp)->conn_prev = NULL; \
8007c478bd9Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED; \
8017c478bd9Sstevel@tonic-gate if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0) \
8027c478bd9Sstevel@tonic-gate ipcl_conn_unlisten((connp)); \
8037c478bd9Sstevel@tonic-gate CONN_DEC_REF((connp)); \
8047c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock); \
8057c478bd9Sstevel@tonic-gate } \
8067c478bd9Sstevel@tonic-gate }
8077c478bd9Sstevel@tonic-gate
8087c478bd9Sstevel@tonic-gate void
ipcl_hash_remove(conn_t * connp)8097c478bd9Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp)
8107c478bd9Sstevel@tonic-gate {
811bd670b35SErik Nordmark uint8_t protocol = connp->conn_proto;
812bd670b35SErik Nordmark
8137c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp);
814bd670b35SErik Nordmark if (protocol == IPPROTO_RSVP)
815bd670b35SErik Nordmark ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
8167c478bd9Sstevel@tonic-gate }
8177c478bd9Sstevel@tonic-gate
8187c478bd9Sstevel@tonic-gate /*
8197c478bd9Sstevel@tonic-gate * The whole purpose of this function is allow removal of
8207c478bd9Sstevel@tonic-gate * a conn_t from the connected hash for timewait reclaim.
8217c478bd9Sstevel@tonic-gate * This is essentially a TW reclaim fastpath where timewait
8227c478bd9Sstevel@tonic-gate * collector checks under fanout lock (so no one else can
8237c478bd9Sstevel@tonic-gate * get access to the conn_t) that refcnt is 2 i.e. one for
8247c478bd9Sstevel@tonic-gate * TCP and one for the classifier hash list. If ref count
8257c478bd9Sstevel@tonic-gate * is indeed 2, we can just remove the conn under lock and
8267c478bd9Sstevel@tonic-gate * avoid cleaning up the conn under squeue. This gives us
8277c478bd9Sstevel@tonic-gate * improved performance.
8287c478bd9Sstevel@tonic-gate */
8297c478bd9Sstevel@tonic-gate void
ipcl_hash_remove_locked(conn_t * connp,connf_t * connfp)8307c478bd9Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp)
8317c478bd9Sstevel@tonic-gate {
8327c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connfp->connf_lock));
8337c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock));
8347c478bd9Sstevel@tonic-gate ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
8357c478bd9Sstevel@tonic-gate
8367c478bd9Sstevel@tonic-gate if ((connp)->conn_next != NULL) {
837121e5416Skcpoon (connp)->conn_next->conn_prev = (connp)->conn_prev;
8387c478bd9Sstevel@tonic-gate }
8397c478bd9Sstevel@tonic-gate if ((connp)->conn_prev != NULL) {
840121e5416Skcpoon (connp)->conn_prev->conn_next = (connp)->conn_next;
8417c478bd9Sstevel@tonic-gate } else {
8427c478bd9Sstevel@tonic-gate connfp->connf_head = (connp)->conn_next;
8437c478bd9Sstevel@tonic-gate }
8447c478bd9Sstevel@tonic-gate (connp)->conn_fanout = NULL;
8457c478bd9Sstevel@tonic-gate (connp)->conn_next = NULL;
8467c478bd9Sstevel@tonic-gate (connp)->conn_prev = NULL;
8477c478bd9Sstevel@tonic-gate (connp)->conn_flags |= IPCL_REMOVED;
8487c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_ref == 2);
8497c478bd9Sstevel@tonic-gate (connp)->conn_ref--;
8507c478bd9Sstevel@tonic-gate }
8517c478bd9Sstevel@tonic-gate
8527c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) { \
8537c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_fanout == NULL); \
8547c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_next == NULL); \
8557c478bd9Sstevel@tonic-gate ASSERT((connp)->conn_prev == NULL); \
8567c478bd9Sstevel@tonic-gate if ((connfp)->connf_head != NULL) { \
8577c478bd9Sstevel@tonic-gate (connfp)->connf_head->conn_prev = (connp); \
8587c478bd9Sstevel@tonic-gate (connp)->conn_next = (connfp)->connf_head; \
8597c478bd9Sstevel@tonic-gate } \
8607c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \
8617c478bd9Sstevel@tonic-gate (connfp)->connf_head = (connp); \
8627c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
8637c478bd9Sstevel@tonic-gate IPCL_CONNECTED; \
8647c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); \
8657c478bd9Sstevel@tonic-gate }
8667c478bd9Sstevel@tonic-gate
8677c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_CONNECTED(connfp, connp) { \
8687c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \
8697c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \
8707c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp); \
8717c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \
8727c478bd9Sstevel@tonic-gate }
8737c478bd9Sstevel@tonic-gate
8747c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_BOUND(connfp, connp) { \
8757c478bd9Sstevel@tonic-gate conn_t *pconnp = NULL, *nconnp; \
8767c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \
8777c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \
8787c478bd9Sstevel@tonic-gate nconnp = (connfp)->connf_head; \
8793d1c78fbSethindra while (nconnp != NULL && \
880bd670b35SErik Nordmark !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) { \
8813d1c78fbSethindra pconnp = nconnp; \
8823d1c78fbSethindra nconnp = nconnp->conn_next; \
8837c478bd9Sstevel@tonic-gate } \
8847c478bd9Sstevel@tonic-gate if (pconnp != NULL) { \
8857c478bd9Sstevel@tonic-gate pconnp->conn_next = (connp); \
8867c478bd9Sstevel@tonic-gate (connp)->conn_prev = pconnp; \
8877c478bd9Sstevel@tonic-gate } else { \
8887c478bd9Sstevel@tonic-gate (connfp)->connf_head = (connp); \
8897c478bd9Sstevel@tonic-gate } \
8907c478bd9Sstevel@tonic-gate if (nconnp != NULL) { \
8917c478bd9Sstevel@tonic-gate (connp)->conn_next = nconnp; \
8927c478bd9Sstevel@tonic-gate nconnp->conn_prev = (connp); \
8937c478bd9Sstevel@tonic-gate } \
8947c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \
8957c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
8967c478bd9Sstevel@tonic-gate IPCL_BOUND; \
8977c478bd9Sstevel@tonic-gate CONN_INC_REF(connp); \
8987c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \
8997c478bd9Sstevel@tonic-gate }
9007c478bd9Sstevel@tonic-gate
9017c478bd9Sstevel@tonic-gate #define IPCL_HASH_INSERT_WILDCARD(connfp, connp) { \
9027c478bd9Sstevel@tonic-gate conn_t **list, *prev, *next; \
9037c478bd9Sstevel@tonic-gate boolean_t isv4mapped = \
904bd670b35SErik Nordmark IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6); \
9057c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE((connp)); \
9067c478bd9Sstevel@tonic-gate mutex_enter(&(connfp)->connf_lock); \
9077c478bd9Sstevel@tonic-gate list = &(connfp)->connf_head; \
9087c478bd9Sstevel@tonic-gate prev = NULL; \
9097c478bd9Sstevel@tonic-gate while ((next = *list) != NULL) { \
9107c478bd9Sstevel@tonic-gate if (isv4mapped && \
911bd670b35SErik Nordmark IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) && \
9127c478bd9Sstevel@tonic-gate connp->conn_zoneid == next->conn_zoneid) { \
9137c478bd9Sstevel@tonic-gate (connp)->conn_next = next; \
9147c478bd9Sstevel@tonic-gate if (prev != NULL) \
9157c478bd9Sstevel@tonic-gate prev = next->conn_prev; \
9167c478bd9Sstevel@tonic-gate next->conn_prev = (connp); \
9177c478bd9Sstevel@tonic-gate break; \
9187c478bd9Sstevel@tonic-gate } \
9197c478bd9Sstevel@tonic-gate list = &next->conn_next; \
9207c478bd9Sstevel@tonic-gate prev = next; \
9217c478bd9Sstevel@tonic-gate } \
9227c478bd9Sstevel@tonic-gate (connp)->conn_prev = prev; \
9237c478bd9Sstevel@tonic-gate *list = (connp); \
9247c478bd9Sstevel@tonic-gate (connp)->conn_fanout = (connfp); \
9257c478bd9Sstevel@tonic-gate (connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) | \
9267c478bd9Sstevel@tonic-gate IPCL_BOUND; \
9277c478bd9Sstevel@tonic-gate CONN_INC_REF((connp)); \
9287c478bd9Sstevel@tonic-gate mutex_exit(&(connfp)->connf_lock); \
9297c478bd9Sstevel@tonic-gate }
9307c478bd9Sstevel@tonic-gate
9317c478bd9Sstevel@tonic-gate void
ipcl_hash_insert_wildcard(connf_t * connfp,conn_t * connp)9327c478bd9Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
9337c478bd9Sstevel@tonic-gate {
9347c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9357c478bd9Sstevel@tonic-gate }
9367c478bd9Sstevel@tonic-gate
9372b24ab6bSSebastien Roy /*
9382b24ab6bSSebastien Roy * Because the classifier is used to classify inbound packets, the destination
9392b24ab6bSSebastien Roy * address is meant to be our local tunnel address (tunnel source), and the
9402b24ab6bSSebastien Roy * source the remote tunnel address (tunnel destination).
941bd670b35SErik Nordmark *
942bd670b35SErik Nordmark * Note that conn_proto can't be used for fanout since the upper protocol
943bd670b35SErik Nordmark * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
9442b24ab6bSSebastien Roy */
9452b24ab6bSSebastien Roy conn_t *
ipcl_iptun_classify_v4(ipaddr_t * src,ipaddr_t * dst,ip_stack_t * ipst)9462b24ab6bSSebastien Roy ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
9472b24ab6bSSebastien Roy {
9482b24ab6bSSebastien Roy connf_t *connfp;
9492b24ab6bSSebastien Roy conn_t *connp;
9502b24ab6bSSebastien Roy
9512b24ab6bSSebastien Roy /* first look for IPv4 tunnel links */
9522b24ab6bSSebastien Roy connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
9532b24ab6bSSebastien Roy mutex_enter(&connfp->connf_lock);
9542b24ab6bSSebastien Roy for (connp = connfp->connf_head; connp != NULL;
9552b24ab6bSSebastien Roy connp = connp->conn_next) {
9562b24ab6bSSebastien Roy if (IPCL_IPTUN_MATCH(connp, *dst, *src))
9572b24ab6bSSebastien Roy break;
9582b24ab6bSSebastien Roy }
9592b24ab6bSSebastien Roy if (connp != NULL)
9602b24ab6bSSebastien Roy goto done;
9612b24ab6bSSebastien Roy
9622b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock);
9632b24ab6bSSebastien Roy
9642b24ab6bSSebastien Roy /* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
9652b24ab6bSSebastien Roy connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
9662b24ab6bSSebastien Roy INADDR_ANY)];
9672b24ab6bSSebastien Roy mutex_enter(&connfp->connf_lock);
9682b24ab6bSSebastien Roy for (connp = connfp->connf_head; connp != NULL;
9692b24ab6bSSebastien Roy connp = connp->conn_next) {
9702b24ab6bSSebastien Roy if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
9712b24ab6bSSebastien Roy break;
9722b24ab6bSSebastien Roy }
9732b24ab6bSSebastien Roy done:
9742b24ab6bSSebastien Roy if (connp != NULL)
9752b24ab6bSSebastien Roy CONN_INC_REF(connp);
9762b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock);
9772b24ab6bSSebastien Roy return (connp);
9782b24ab6bSSebastien Roy }
9792b24ab6bSSebastien Roy
9802b24ab6bSSebastien Roy conn_t *
ipcl_iptun_classify_v6(in6_addr_t * src,in6_addr_t * dst,ip_stack_t * ipst)9812b24ab6bSSebastien Roy ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
9822b24ab6bSSebastien Roy {
9832b24ab6bSSebastien Roy connf_t *connfp;
9842b24ab6bSSebastien Roy conn_t *connp;
9852b24ab6bSSebastien Roy
9862b24ab6bSSebastien Roy /* Look for an IPv6 tunnel link */
9872b24ab6bSSebastien Roy connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
9882b24ab6bSSebastien Roy mutex_enter(&connfp->connf_lock);
9892b24ab6bSSebastien Roy for (connp = connfp->connf_head; connp != NULL;
9902b24ab6bSSebastien Roy connp = connp->conn_next) {
9912b24ab6bSSebastien Roy if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
9922b24ab6bSSebastien Roy CONN_INC_REF(connp);
9932b24ab6bSSebastien Roy break;
9942b24ab6bSSebastien Roy }
9952b24ab6bSSebastien Roy }
9962b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock);
9972b24ab6bSSebastien Roy return (connp);
9982b24ab6bSSebastien Roy }
9992b24ab6bSSebastien Roy
10007c478bd9Sstevel@tonic-gate /*
10017c478bd9Sstevel@tonic-gate * This function is used only for inserting SCTP raw socket now.
10027c478bd9Sstevel@tonic-gate * This may change later.
10037c478bd9Sstevel@tonic-gate *
10047c478bd9Sstevel@tonic-gate * Note that only one raw socket can be bound to a port. The param
10057c478bd9Sstevel@tonic-gate * lport is in network byte order.
10067c478bd9Sstevel@tonic-gate */
10077c478bd9Sstevel@tonic-gate static int
ipcl_sctp_hash_insert(conn_t * connp,in_port_t lport)10087c478bd9Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
10097c478bd9Sstevel@tonic-gate {
10107c478bd9Sstevel@tonic-gate connf_t *connfp;
10117c478bd9Sstevel@tonic-gate conn_t *oconnp;
1012f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
10137c478bd9Sstevel@tonic-gate
1014f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
10157c478bd9Sstevel@tonic-gate
10167c478bd9Sstevel@tonic-gate /* Check for existing raw socket already bound to the port. */
10177c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
10187c478bd9Sstevel@tonic-gate for (oconnp = connfp->connf_head; oconnp != NULL;
10197c0c0508Skcpoon oconnp = oconnp->conn_next) {
10207c478bd9Sstevel@tonic-gate if (oconnp->conn_lport == lport &&
10217c478bd9Sstevel@tonic-gate oconnp->conn_zoneid == connp->conn_zoneid &&
1022bd670b35SErik Nordmark oconnp->conn_family == connp->conn_family &&
1023bd670b35SErik Nordmark ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1024bd670b35SErik Nordmark IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1025bd670b35SErik Nordmark IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1026bd670b35SErik Nordmark IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1027bd670b35SErik Nordmark IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1028bd670b35SErik Nordmark &connp->conn_laddr_v6))) {
10297c478bd9Sstevel@tonic-gate break;
10307c478bd9Sstevel@tonic-gate }
10317c478bd9Sstevel@tonic-gate }
10327c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
10337c478bd9Sstevel@tonic-gate if (oconnp != NULL)
10347c478bd9Sstevel@tonic-gate return (EADDRNOTAVAIL);
10357c478bd9Sstevel@tonic-gate
1036bd670b35SErik Nordmark if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1037bd670b35SErik Nordmark IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1038bd670b35SErik Nordmark if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1039bd670b35SErik Nordmark IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
10407c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10417c478bd9Sstevel@tonic-gate } else {
10427c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
10437c478bd9Sstevel@tonic-gate }
10447c478bd9Sstevel@tonic-gate } else {
10457c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp);
10467c478bd9Sstevel@tonic-gate }
10477c478bd9Sstevel@tonic-gate return (0);
10487c478bd9Sstevel@tonic-gate }
10497c478bd9Sstevel@tonic-gate
10502b24ab6bSSebastien Roy static int
ipcl_iptun_hash_insert(conn_t * connp,ip_stack_t * ipst)1051bd670b35SErik Nordmark ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
10522b24ab6bSSebastien Roy {
10532b24ab6bSSebastien Roy connf_t *connfp;
10542b24ab6bSSebastien Roy conn_t *tconnp;
1055bd670b35SErik Nordmark ipaddr_t laddr = connp->conn_laddr_v4;
1056bd670b35SErik Nordmark ipaddr_t faddr = connp->conn_faddr_v4;
10572b24ab6bSSebastien Roy
1058bd670b35SErik Nordmark connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
10592b24ab6bSSebastien Roy mutex_enter(&connfp->connf_lock);
10602b24ab6bSSebastien Roy for (tconnp = connfp->connf_head; tconnp != NULL;
10612b24ab6bSSebastien Roy tconnp = tconnp->conn_next) {
1062bd670b35SErik Nordmark if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
10632b24ab6bSSebastien Roy /* A tunnel is already bound to these addresses. */
10642b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock);
10652b24ab6bSSebastien Roy return (EADDRINUSE);
10662b24ab6bSSebastien Roy }
10672b24ab6bSSebastien Roy }
10682b24ab6bSSebastien Roy IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
10692b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock);
10702b24ab6bSSebastien Roy return (0);
10712b24ab6bSSebastien Roy }
10722b24ab6bSSebastien Roy
10732b24ab6bSSebastien Roy static int
ipcl_iptun_hash_insert_v6(conn_t * connp,ip_stack_t * ipst)1074bd670b35SErik Nordmark ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
10752b24ab6bSSebastien Roy {
10762b24ab6bSSebastien Roy connf_t *connfp;
10772b24ab6bSSebastien Roy conn_t *tconnp;
1078bd670b35SErik Nordmark in6_addr_t *laddr = &connp->conn_laddr_v6;
1079bd670b35SErik Nordmark in6_addr_t *faddr = &connp->conn_faddr_v6;
10802b24ab6bSSebastien Roy
1081bd670b35SErik Nordmark connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
10822b24ab6bSSebastien Roy mutex_enter(&connfp->connf_lock);
10832b24ab6bSSebastien Roy for (tconnp = connfp->connf_head; tconnp != NULL;
10842b24ab6bSSebastien Roy tconnp = tconnp->conn_next) {
1085bd670b35SErik Nordmark if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
10862b24ab6bSSebastien Roy /* A tunnel is already bound to these addresses. */
10872b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock);
10882b24ab6bSSebastien Roy return (EADDRINUSE);
10892b24ab6bSSebastien Roy }
10902b24ab6bSSebastien Roy }
10912b24ab6bSSebastien Roy IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
10922b24ab6bSSebastien Roy mutex_exit(&connfp->connf_lock);
10932b24ab6bSSebastien Roy return (0);
10942b24ab6bSSebastien Roy }
10952b24ab6bSSebastien Roy
109645916cd2Sjpk /*
109745916cd2Sjpk * Check for a MAC exemption conflict on a labeled system. Note that for
109845916cd2Sjpk * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
109945916cd2Sjpk * transport layer. This check is for binding all other protocols.
110045916cd2Sjpk *
110145916cd2Sjpk * Returns true if there's a conflict.
110245916cd2Sjpk */
110345916cd2Sjpk static boolean_t
check_exempt_conflict_v4(conn_t * connp,ip_stack_t * ipst)1104f4b3ec61Sdh check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
110545916cd2Sjpk {
110645916cd2Sjpk connf_t *connfp;
110745916cd2Sjpk conn_t *tconn;
110845916cd2Sjpk
1109bd670b35SErik Nordmark connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
111045916cd2Sjpk mutex_enter(&connfp->connf_lock);
111145916cd2Sjpk for (tconn = connfp->connf_head; tconn != NULL;
111245916cd2Sjpk tconn = tconn->conn_next) {
111345916cd2Sjpk /* We don't allow v4 fallback for v6 raw socket */
1114bd670b35SErik Nordmark if (connp->conn_family != tconn->conn_family)
111545916cd2Sjpk continue;
111645916cd2Sjpk /* If neither is exempt, then there's no conflict */
11175d3b8cb7SBill Sommerfeld if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
11185d3b8cb7SBill Sommerfeld (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
111945916cd2Sjpk continue;
11205f9878b0Sken Powell - Sun Microsystem /* We are only concerned about sockets for a different zone */
11215f9878b0Sken Powell - Sun Microsystem if (connp->conn_zoneid == tconn->conn_zoneid)
11225f9878b0Sken Powell - Sun Microsystem continue;
112345916cd2Sjpk /* If both are bound to different specific addrs, ok */
1124bd670b35SErik Nordmark if (connp->conn_laddr_v4 != INADDR_ANY &&
1125bd670b35SErik Nordmark tconn->conn_laddr_v4 != INADDR_ANY &&
1126bd670b35SErik Nordmark connp->conn_laddr_v4 != tconn->conn_laddr_v4)
112745916cd2Sjpk continue;
112845916cd2Sjpk /* These two conflict; fail */
112945916cd2Sjpk break;
113045916cd2Sjpk }
113145916cd2Sjpk mutex_exit(&connfp->connf_lock);
113245916cd2Sjpk return (tconn != NULL);
113345916cd2Sjpk }
113445916cd2Sjpk
113545916cd2Sjpk static boolean_t
check_exempt_conflict_v6(conn_t * connp,ip_stack_t * ipst)1136f4b3ec61Sdh check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
113745916cd2Sjpk {
113845916cd2Sjpk connf_t *connfp;
113945916cd2Sjpk conn_t *tconn;
114045916cd2Sjpk
1141bd670b35SErik Nordmark connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
114245916cd2Sjpk mutex_enter(&connfp->connf_lock);
114345916cd2Sjpk for (tconn = connfp->connf_head; tconn != NULL;
114445916cd2Sjpk tconn = tconn->conn_next) {
114545916cd2Sjpk /* We don't allow v4 fallback for v6 raw socket */
1146bd670b35SErik Nordmark if (connp->conn_family != tconn->conn_family)
114745916cd2Sjpk continue;
114845916cd2Sjpk /* If neither is exempt, then there's no conflict */
11495d3b8cb7SBill Sommerfeld if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
11505d3b8cb7SBill Sommerfeld (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
115145916cd2Sjpk continue;
11525f9878b0Sken Powell - Sun Microsystem /* We are only concerned about sockets for a different zone */
11535f9878b0Sken Powell - Sun Microsystem if (connp->conn_zoneid == tconn->conn_zoneid)
11545f9878b0Sken Powell - Sun Microsystem continue;
115545916cd2Sjpk /* If both are bound to different addrs, ok */
1156bd670b35SErik Nordmark if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1157bd670b35SErik Nordmark !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1158bd670b35SErik Nordmark !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1159bd670b35SErik Nordmark &tconn->conn_laddr_v6))
116045916cd2Sjpk continue;
116145916cd2Sjpk /* These two conflict; fail */
116245916cd2Sjpk break;
116345916cd2Sjpk }
116445916cd2Sjpk mutex_exit(&connfp->connf_lock);
116545916cd2Sjpk return (tconn != NULL);
116645916cd2Sjpk }
116745916cd2Sjpk
11687c478bd9Sstevel@tonic-gate /*
11697c478bd9Sstevel@tonic-gate * (v4, v6) bind hash insertion routines
1170bd670b35SErik Nordmark * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
11717c478bd9Sstevel@tonic-gate */
1172bd670b35SErik Nordmark
11737c478bd9Sstevel@tonic-gate int
ipcl_bind_insert(conn_t * connp)1174bd670b35SErik Nordmark ipcl_bind_insert(conn_t *connp)
1175bd670b35SErik Nordmark {
1176bd670b35SErik Nordmark if (connp->conn_ipversion == IPV6_VERSION)
1177bd670b35SErik Nordmark return (ipcl_bind_insert_v6(connp));
1178bd670b35SErik Nordmark else
1179bd670b35SErik Nordmark return (ipcl_bind_insert_v4(connp));
1180bd670b35SErik Nordmark }
1181bd670b35SErik Nordmark
1182bd670b35SErik Nordmark int
ipcl_bind_insert_v4(conn_t * connp)1183bd670b35SErik Nordmark ipcl_bind_insert_v4(conn_t *connp)
11847c478bd9Sstevel@tonic-gate {
11857c478bd9Sstevel@tonic-gate connf_t *connfp;
11867c478bd9Sstevel@tonic-gate int ret = 0;
1187f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1188bd670b35SErik Nordmark uint16_t lport = connp->conn_lport;
1189bd670b35SErik Nordmark uint8_t protocol = connp->conn_proto;
11907c478bd9Sstevel@tonic-gate
11912b24ab6bSSebastien Roy if (IPCL_IS_IPTUN(connp))
1192bd670b35SErik Nordmark return (ipcl_iptun_hash_insert(connp, ipst));
11932b24ab6bSSebastien Roy
11947c478bd9Sstevel@tonic-gate switch (protocol) {
11957c478bd9Sstevel@tonic-gate default:
1196f4b3ec61Sdh if (is_system_labeled() &&
1197f4b3ec61Sdh check_exempt_conflict_v4(connp, ipst))
119845916cd2Sjpk return (EADDRINUSE);
119945916cd2Sjpk /* FALLTHROUGH */
120045916cd2Sjpk case IPPROTO_UDP:
12017c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) {
1202f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[
1203f4b3ec61Sdh IPCL_UDP_HASH(lport, ipst)];
12047c478bd9Sstevel@tonic-gate } else {
1205bd670b35SErik Nordmark connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
12067c478bd9Sstevel@tonic-gate }
12077c478bd9Sstevel@tonic-gate
1208bd670b35SErik Nordmark if (connp->conn_faddr_v4 != INADDR_ANY) {
12097c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1210bd670b35SErik Nordmark } else if (connp->conn_laddr_v4 != INADDR_ANY) {
12117c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
12127c478bd9Sstevel@tonic-gate } else {
12137c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12147c478bd9Sstevel@tonic-gate }
1215bd670b35SErik Nordmark if (protocol == IPPROTO_RSVP)
1216bd670b35SErik Nordmark ill_set_inputfn_all(ipst);
12177c478bd9Sstevel@tonic-gate break;
12187c478bd9Sstevel@tonic-gate
12197c478bd9Sstevel@tonic-gate case IPPROTO_TCP:
12207c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */
122145916cd2Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES);
1222f4b3ec61Sdh connfp = &ipst->ips_ipcl_bind_fanout[
1223f4b3ec61Sdh IPCL_BIND_HASH(lport, ipst)];
1224bd670b35SErik Nordmark if (connp->conn_laddr_v4 != INADDR_ANY) {
12257c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
12267c478bd9Sstevel@tonic-gate } else {
12277c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12287c478bd9Sstevel@tonic-gate }
12297c478bd9Sstevel@tonic-gate if (cl_inet_listen != NULL) {
1230bd670b35SErik Nordmark ASSERT(connp->conn_ipversion == IPV4_VERSION);
12317c478bd9Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER;
12328e4b770fSLu Huafeng (*cl_inet_listen)(
12338e4b770fSLu Huafeng connp->conn_netstack->netstack_stackid,
12348e4b770fSLu Huafeng IPPROTO_TCP, AF_INET,
1235bd670b35SErik Nordmark (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
12367c478bd9Sstevel@tonic-gate }
12377c478bd9Sstevel@tonic-gate break;
12387c478bd9Sstevel@tonic-gate
12397c478bd9Sstevel@tonic-gate case IPPROTO_SCTP:
12407c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport);
12417c478bd9Sstevel@tonic-gate break;
12427c478bd9Sstevel@tonic-gate }
12437c478bd9Sstevel@tonic-gate
12447c478bd9Sstevel@tonic-gate return (ret);
12457c478bd9Sstevel@tonic-gate }
12467c478bd9Sstevel@tonic-gate
12477c478bd9Sstevel@tonic-gate int
ipcl_bind_insert_v6(conn_t * connp)1248bd670b35SErik Nordmark ipcl_bind_insert_v6(conn_t *connp)
12497c478bd9Sstevel@tonic-gate {
12502b24ab6bSSebastien Roy connf_t *connfp;
12512b24ab6bSSebastien Roy int ret = 0;
1252f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1253bd670b35SErik Nordmark uint16_t lport = connp->conn_lport;
1254bd670b35SErik Nordmark uint8_t protocol = connp->conn_proto;
12557c478bd9Sstevel@tonic-gate
12562b24ab6bSSebastien Roy if (IPCL_IS_IPTUN(connp)) {
1257bd670b35SErik Nordmark return (ipcl_iptun_hash_insert_v6(connp, ipst));
12582b24ab6bSSebastien Roy }
12592b24ab6bSSebastien Roy
12607c478bd9Sstevel@tonic-gate switch (protocol) {
12617c478bd9Sstevel@tonic-gate default:
1262f4b3ec61Sdh if (is_system_labeled() &&
1263f4b3ec61Sdh check_exempt_conflict_v6(connp, ipst))
126445916cd2Sjpk return (EADDRINUSE);
126545916cd2Sjpk /* FALLTHROUGH */
126645916cd2Sjpk case IPPROTO_UDP:
12677c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) {
1268f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[
1269f4b3ec61Sdh IPCL_UDP_HASH(lport, ipst)];
12707c478bd9Sstevel@tonic-gate } else {
1271f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
12727c478bd9Sstevel@tonic-gate }
12737c478bd9Sstevel@tonic-gate
1274bd670b35SErik Nordmark if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
12757c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1276bd670b35SErik Nordmark } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
12777c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
12787c478bd9Sstevel@tonic-gate } else {
12797c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12807c478bd9Sstevel@tonic-gate }
12817c478bd9Sstevel@tonic-gate break;
12827c478bd9Sstevel@tonic-gate
12837c478bd9Sstevel@tonic-gate case IPPROTO_TCP:
12847c478bd9Sstevel@tonic-gate /* Insert it in the Bind Hash */
128545916cd2Sjpk ASSERT(connp->conn_zoneid != ALL_ZONES);
1286f4b3ec61Sdh connfp = &ipst->ips_ipcl_bind_fanout[
1287f4b3ec61Sdh IPCL_BIND_HASH(lport, ipst)];
1288bd670b35SErik Nordmark if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
12897c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
12907c478bd9Sstevel@tonic-gate } else {
12917c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12927c478bd9Sstevel@tonic-gate }
12937c478bd9Sstevel@tonic-gate if (cl_inet_listen != NULL) {
12947c478bd9Sstevel@tonic-gate sa_family_t addr_family;
12957c478bd9Sstevel@tonic-gate uint8_t *laddrp;
12967c478bd9Sstevel@tonic-gate
1297bd670b35SErik Nordmark if (connp->conn_ipversion == IPV6_VERSION) {
12987c478bd9Sstevel@tonic-gate addr_family = AF_INET6;
12997c478bd9Sstevel@tonic-gate laddrp =
1300bd670b35SErik Nordmark (uint8_t *)&connp->conn_bound_addr_v6;
13017c478bd9Sstevel@tonic-gate } else {
13027c478bd9Sstevel@tonic-gate addr_family = AF_INET;
1303bd670b35SErik Nordmark laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
13047c478bd9Sstevel@tonic-gate }
13057c478bd9Sstevel@tonic-gate connp->conn_flags |= IPCL_CL_LISTENER;
13068e4b770fSLu Huafeng (*cl_inet_listen)(
13078e4b770fSLu Huafeng connp->conn_netstack->netstack_stackid,
13088e4b770fSLu Huafeng IPPROTO_TCP, addr_family, laddrp, lport, NULL);
13097c478bd9Sstevel@tonic-gate }
13107c478bd9Sstevel@tonic-gate break;
13117c478bd9Sstevel@tonic-gate
13127c478bd9Sstevel@tonic-gate case IPPROTO_SCTP:
13137c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport);
13147c478bd9Sstevel@tonic-gate break;
13157c478bd9Sstevel@tonic-gate }
13167c478bd9Sstevel@tonic-gate
13177c478bd9Sstevel@tonic-gate return (ret);
13187c478bd9Sstevel@tonic-gate }
13197c478bd9Sstevel@tonic-gate
13207c478bd9Sstevel@tonic-gate /*
13217c478bd9Sstevel@tonic-gate * ipcl_conn_hash insertion routines.
1322bd670b35SErik Nordmark * The caller has already set conn_proto and the addresses/ports in the conn_t.
13237c478bd9Sstevel@tonic-gate */
1324bd670b35SErik Nordmark
1325bd670b35SErik Nordmark int
ipcl_conn_insert(conn_t * connp)1326bd670b35SErik Nordmark ipcl_conn_insert(conn_t *connp)
1327bd670b35SErik Nordmark {
1328bd670b35SErik Nordmark if (connp->conn_ipversion == IPV6_VERSION)
1329bd670b35SErik Nordmark return (ipcl_conn_insert_v6(connp));
1330bd670b35SErik Nordmark else
1331bd670b35SErik Nordmark return (ipcl_conn_insert_v4(connp));
1332bd670b35SErik Nordmark }
1333bd670b35SErik Nordmark
13347c478bd9Sstevel@tonic-gate int
ipcl_conn_insert_v4(conn_t * connp)1335bd670b35SErik Nordmark ipcl_conn_insert_v4(conn_t *connp)
13367c478bd9Sstevel@tonic-gate {
13377c478bd9Sstevel@tonic-gate connf_t *connfp;
13387c478bd9Sstevel@tonic-gate conn_t *tconnp;
13397c478bd9Sstevel@tonic-gate int ret = 0;
1340f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1341bd670b35SErik Nordmark uint16_t lport = connp->conn_lport;
1342bd670b35SErik Nordmark uint8_t protocol = connp->conn_proto;
13437c478bd9Sstevel@tonic-gate
13442b24ab6bSSebastien Roy if (IPCL_IS_IPTUN(connp))
1345bd670b35SErik Nordmark return (ipcl_iptun_hash_insert(connp, ipst));
13462b24ab6bSSebastien Roy
13477c478bd9Sstevel@tonic-gate switch (protocol) {
13487c478bd9Sstevel@tonic-gate case IPPROTO_TCP:
1349a12220b3SJon Anderson /*
1350bd670b35SErik Nordmark * For TCP, we check whether the connection tuple already
1351a12220b3SJon Anderson * exists before allowing the connection to proceed. We
1352a12220b3SJon Anderson * also allow indexing on the zoneid. This is to allow
1353a12220b3SJon Anderson * multiple shared stack zones to have the same tcp
1354a12220b3SJon Anderson * connection tuple. In practice this only happens for
1355a12220b3SJon Anderson * INADDR_LOOPBACK as it's the only local address which
1356a12220b3SJon Anderson * doesn't have to be unique.
1357a12220b3SJon Anderson */
1358f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[
1359bd670b35SErik Nordmark IPCL_CONN_HASH(connp->conn_faddr_v4,
1360f4b3ec61Sdh connp->conn_ports, ipst)];
13617c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
13627c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL;
13637c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) {
1364bd670b35SErik Nordmark if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1365bd670b35SErik Nordmark connp->conn_faddr_v4, connp->conn_laddr_v4,
1366bd670b35SErik Nordmark connp->conn_ports) &&
1367bd670b35SErik Nordmark IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
13687c478bd9Sstevel@tonic-gate /* Already have a conn. bail out */
13697c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
13707c478bd9Sstevel@tonic-gate return (EADDRINUSE);
13717c478bd9Sstevel@tonic-gate }
13727c478bd9Sstevel@tonic-gate }
13737c478bd9Sstevel@tonic-gate if (connp->conn_fanout != NULL) {
13747c478bd9Sstevel@tonic-gate /*
13757c478bd9Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a
13767c478bd9Sstevel@tonic-gate * rebind. Let it happen.
13777c478bd9Sstevel@tonic-gate */
13787c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
13797c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp);
13807c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
13817c478bd9Sstevel@tonic-gate }
1382866ba9ddSjprakash
1383866ba9ddSjprakash ASSERT(connp->conn_recv != NULL);
1384bd670b35SErik Nordmark ASSERT(connp->conn_recvicmp != NULL);
1385866ba9ddSjprakash
13867c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
13877c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
13887c478bd9Sstevel@tonic-gate break;
13897c478bd9Sstevel@tonic-gate
13907c478bd9Sstevel@tonic-gate case IPPROTO_SCTP:
13917c0c0508Skcpoon /*
13927c0c0508Skcpoon * The raw socket may have already been bound, remove it
13937c0c0508Skcpoon * from the hash first.
13947c0c0508Skcpoon */
13957c0c0508Skcpoon IPCL_HASH_REMOVE(connp);
13967c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport);
13977c478bd9Sstevel@tonic-gate break;
13987c478bd9Sstevel@tonic-gate
13997c478bd9Sstevel@tonic-gate default:
140045916cd2Sjpk /*
140145916cd2Sjpk * Check for conflicts among MAC exempt bindings. For
140245916cd2Sjpk * transports with port numbers, this is done by the upper
140345916cd2Sjpk * level per-transport binding logic. For all others, it's
140445916cd2Sjpk * done here.
140545916cd2Sjpk */
1406f4b3ec61Sdh if (is_system_labeled() &&
1407f4b3ec61Sdh check_exempt_conflict_v4(connp, ipst))
140845916cd2Sjpk return (EADDRINUSE);
140945916cd2Sjpk /* FALLTHROUGH */
141045916cd2Sjpk
141145916cd2Sjpk case IPPROTO_UDP:
14127c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) {
1413f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[
1414bd670b35SErik Nordmark IPCL_UDP_HASH(lport, ipst)];
14157c478bd9Sstevel@tonic-gate } else {
1416bd670b35SErik Nordmark connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
14177c478bd9Sstevel@tonic-gate }
14187c478bd9Sstevel@tonic-gate
1419bd670b35SErik Nordmark if (connp->conn_faddr_v4 != INADDR_ANY) {
14207c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1421bd670b35SErik Nordmark } else if (connp->conn_laddr_v4 != INADDR_ANY) {
14227c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
14237c478bd9Sstevel@tonic-gate } else {
14247c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
14257c478bd9Sstevel@tonic-gate }
14267c478bd9Sstevel@tonic-gate break;
14277c478bd9Sstevel@tonic-gate }
14287c478bd9Sstevel@tonic-gate
14297c478bd9Sstevel@tonic-gate return (ret);
14307c478bd9Sstevel@tonic-gate }
14317c478bd9Sstevel@tonic-gate
14327c478bd9Sstevel@tonic-gate int
ipcl_conn_insert_v6(conn_t * connp)1433bd670b35SErik Nordmark ipcl_conn_insert_v6(conn_t *connp)
14347c478bd9Sstevel@tonic-gate {
14357c478bd9Sstevel@tonic-gate connf_t *connfp;
14367c478bd9Sstevel@tonic-gate conn_t *tconnp;
14377c478bd9Sstevel@tonic-gate int ret = 0;
1438f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1439bd670b35SErik Nordmark uint16_t lport = connp->conn_lport;
1440bd670b35SErik Nordmark uint8_t protocol = connp->conn_proto;
1441bd670b35SErik Nordmark uint_t ifindex = connp->conn_bound_if;
14427c478bd9Sstevel@tonic-gate
14432b24ab6bSSebastien Roy if (IPCL_IS_IPTUN(connp))
1444bd670b35SErik Nordmark return (ipcl_iptun_hash_insert_v6(connp, ipst));
14452b24ab6bSSebastien Roy
14467c478bd9Sstevel@tonic-gate switch (protocol) {
14477c478bd9Sstevel@tonic-gate case IPPROTO_TCP:
1448a12220b3SJon Anderson
1449a12220b3SJon Anderson /*
1450a12220b3SJon Anderson * For tcp, we check whether the connection tuple already
1451a12220b3SJon Anderson * exists before allowing the connection to proceed. We
1452a12220b3SJon Anderson * also allow indexing on the zoneid. This is to allow
1453a12220b3SJon Anderson * multiple shared stack zones to have the same tcp
1454a12220b3SJon Anderson * connection tuple. In practice this only happens for
1455a12220b3SJon Anderson * ipv6_loopback as it's the only local address which
1456a12220b3SJon Anderson * doesn't have to be unique.
1457a12220b3SJon Anderson */
1458f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[
1459bd670b35SErik Nordmark IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1460f4b3ec61Sdh ipst)];
14617c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
14627c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL;
14637c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) {
1464bd670b35SErik Nordmark /* NOTE: need to match zoneid. Bug in onnv-gate */
1465bd670b35SErik Nordmark if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1466bd670b35SErik Nordmark connp->conn_faddr_v6, connp->conn_laddr_v6,
14677c478bd9Sstevel@tonic-gate connp->conn_ports) &&
1468bd670b35SErik Nordmark (tconnp->conn_bound_if == 0 ||
1469bd670b35SErik Nordmark tconnp->conn_bound_if == ifindex) &&
1470bd670b35SErik Nordmark IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
14717c478bd9Sstevel@tonic-gate /* Already have a conn. bail out */
14727c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
14737c478bd9Sstevel@tonic-gate return (EADDRINUSE);
14747c478bd9Sstevel@tonic-gate }
14757c478bd9Sstevel@tonic-gate }
14767c478bd9Sstevel@tonic-gate if (connp->conn_fanout != NULL) {
14777c478bd9Sstevel@tonic-gate /*
14787c478bd9Sstevel@tonic-gate * Probably a XTI/TLI application trying to do a
14797c478bd9Sstevel@tonic-gate * rebind. Let it happen.
14807c478bd9Sstevel@tonic-gate */
14817c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
14827c478bd9Sstevel@tonic-gate IPCL_HASH_REMOVE(connp);
14837c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
14847c478bd9Sstevel@tonic-gate }
14857c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
14867c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
14877c478bd9Sstevel@tonic-gate break;
14887c478bd9Sstevel@tonic-gate
14897c478bd9Sstevel@tonic-gate case IPPROTO_SCTP:
14907c0c0508Skcpoon IPCL_HASH_REMOVE(connp);
14917c478bd9Sstevel@tonic-gate ret = ipcl_sctp_hash_insert(connp, lport);
14927c478bd9Sstevel@tonic-gate break;
14937c478bd9Sstevel@tonic-gate
14947c478bd9Sstevel@tonic-gate default:
1495f4b3ec61Sdh if (is_system_labeled() &&
1496f4b3ec61Sdh check_exempt_conflict_v6(connp, ipst))
149745916cd2Sjpk return (EADDRINUSE);
149845916cd2Sjpk /* FALLTHROUGH */
149945916cd2Sjpk case IPPROTO_UDP:
15007c478bd9Sstevel@tonic-gate if (protocol == IPPROTO_UDP) {
1501f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[
1502bd670b35SErik Nordmark IPCL_UDP_HASH(lport, ipst)];
15037c478bd9Sstevel@tonic-gate } else {
1504f4b3ec61Sdh connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
15057c478bd9Sstevel@tonic-gate }
15067c478bd9Sstevel@tonic-gate
1507bd670b35SErik Nordmark if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
15087c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1509bd670b35SErik Nordmark } else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
15107c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_BOUND(connfp, connp);
15117c478bd9Sstevel@tonic-gate } else {
15127c478bd9Sstevel@tonic-gate IPCL_HASH_INSERT_WILDCARD(connfp, connp);
15137c478bd9Sstevel@tonic-gate }
15147c478bd9Sstevel@tonic-gate break;
15157c478bd9Sstevel@tonic-gate }
15167c478bd9Sstevel@tonic-gate
15177c478bd9Sstevel@tonic-gate return (ret);
15187c478bd9Sstevel@tonic-gate }
15197c478bd9Sstevel@tonic-gate
15207c478bd9Sstevel@tonic-gate /*
15217c478bd9Sstevel@tonic-gate * v4 packet classifying function. looks up the fanout table to
15227c478bd9Sstevel@tonic-gate * find the conn, the packet belongs to. returns the conn with
15237c478bd9Sstevel@tonic-gate * the reference held, null otherwise.
152445916cd2Sjpk *
152545916cd2Sjpk * If zoneid is ALL_ZONES, then the search rules described in the "Connection
152645916cd2Sjpk * Lookup" comment block are applied. Labels are also checked as described
152745916cd2Sjpk * above. If the packet is from the inside (looped back), and is from the same
152845916cd2Sjpk * zone, then label checks are omitted.
15297c478bd9Sstevel@tonic-gate */
15307c478bd9Sstevel@tonic-gate conn_t *
ipcl_classify_v4(mblk_t * mp,uint8_t protocol,uint_t hdr_len,ip_recv_attr_t * ira,ip_stack_t * ipst)1531bd670b35SErik Nordmark ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1532bd670b35SErik Nordmark ip_recv_attr_t *ira, ip_stack_t *ipst)
15337c478bd9Sstevel@tonic-gate {
15347c478bd9Sstevel@tonic-gate ipha_t *ipha;
15357c478bd9Sstevel@tonic-gate connf_t *connfp, *bind_connfp;
15367c478bd9Sstevel@tonic-gate uint16_t lport;
15377c478bd9Sstevel@tonic-gate uint16_t fport;
15387c478bd9Sstevel@tonic-gate uint32_t ports;
15397c478bd9Sstevel@tonic-gate conn_t *connp;
15407c478bd9Sstevel@tonic-gate uint16_t *up;
1541bd670b35SErik Nordmark zoneid_t zoneid = ira->ira_zoneid;
15427c478bd9Sstevel@tonic-gate
15437c478bd9Sstevel@tonic-gate ipha = (ipha_t *)mp->b_rptr;
15447c478bd9Sstevel@tonic-gate up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
15457c478bd9Sstevel@tonic-gate
15467c478bd9Sstevel@tonic-gate switch (protocol) {
15477c478bd9Sstevel@tonic-gate case IPPROTO_TCP:
15487c478bd9Sstevel@tonic-gate ports = *(uint32_t *)up;
15497c478bd9Sstevel@tonic-gate connfp =
1550f4b3ec61Sdh &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1551f4b3ec61Sdh ports, ipst)];
15527c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
15537c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL;
15547c478bd9Sstevel@tonic-gate connp = connp->conn_next) {
1555bd670b35SErik Nordmark if (IPCL_CONN_MATCH(connp, protocol,
1556bd670b35SErik Nordmark ipha->ipha_src, ipha->ipha_dst, ports) &&
1557bd670b35SErik Nordmark (connp->conn_zoneid == zoneid ||
1558bd670b35SErik Nordmark connp->conn_allzones ||
1559bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1560bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1561bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
15627c478bd9Sstevel@tonic-gate break;
15637c478bd9Sstevel@tonic-gate }
15647c478bd9Sstevel@tonic-gate
15657c478bd9Sstevel@tonic-gate if (connp != NULL) {
156645916cd2Sjpk /*
156745916cd2Sjpk * We have a fully-bound TCP connection.
156845916cd2Sjpk *
156945916cd2Sjpk * For labeled systems, there's no need to check the
157045916cd2Sjpk * label here. It's known to be good as we checked
157145916cd2Sjpk * before allowing the connection to become bound.
157245916cd2Sjpk */
15737c478bd9Sstevel@tonic-gate CONN_INC_REF(connp);
15747c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
15757c478bd9Sstevel@tonic-gate return (connp);
15767c478bd9Sstevel@tonic-gate }
15777c478bd9Sstevel@tonic-gate
15787c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
15797c478bd9Sstevel@tonic-gate lport = up[1];
1580f4b3ec61Sdh bind_connfp =
1581f4b3ec61Sdh &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
15827c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock);
15837c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL;
15847c478bd9Sstevel@tonic-gate connp = connp->conn_next) {
158545916cd2Sjpk if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1586bd670b35SErik Nordmark lport) &&
1587bd670b35SErik Nordmark (connp->conn_zoneid == zoneid ||
1588bd670b35SErik Nordmark connp->conn_allzones ||
1589bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1590bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1591bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
15927c478bd9Sstevel@tonic-gate break;
15937c478bd9Sstevel@tonic-gate }
15947c478bd9Sstevel@tonic-gate
159545916cd2Sjpk /*
159645916cd2Sjpk * If the matching connection is SLP on a private address, then
159745916cd2Sjpk * the label on the packet must match the local zone's label.
159845916cd2Sjpk * Otherwise, it must be in the label range defined by tnrh.
1599bd670b35SErik Nordmark * This is ensured by tsol_receive_local.
1600bd670b35SErik Nordmark *
1601bd670b35SErik Nordmark * Note that we don't check tsol_receive_local for
1602bd670b35SErik Nordmark * the connected case.
160345916cd2Sjpk */
1604bd670b35SErik Nordmark if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
160545916cd2Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1606bd670b35SErik Nordmark ira, connp)) {
1607bd670b35SErik Nordmark DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1608bd670b35SErik Nordmark char *, "connp(1) could not receive mp(2)",
1609bd670b35SErik Nordmark conn_t *, connp, mblk_t *, mp);
161045916cd2Sjpk connp = NULL;
161145916cd2Sjpk }
161245916cd2Sjpk
16137c478bd9Sstevel@tonic-gate if (connp != NULL) {
161445916cd2Sjpk /* Have a listener at least */
16157c478bd9Sstevel@tonic-gate CONN_INC_REF(connp);
16167c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
16177c478bd9Sstevel@tonic-gate return (connp);
16187c478bd9Sstevel@tonic-gate }
16197c478bd9Sstevel@tonic-gate
16207c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
16217c478bd9Sstevel@tonic-gate break;
16227c478bd9Sstevel@tonic-gate
16237c478bd9Sstevel@tonic-gate case IPPROTO_UDP:
16247c478bd9Sstevel@tonic-gate lport = up[1];
16257c478bd9Sstevel@tonic-gate fport = up[0];
1626f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
16277c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
16287c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL;
16297c478bd9Sstevel@tonic-gate connp = connp->conn_next) {
16307c478bd9Sstevel@tonic-gate if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
16317c478bd9Sstevel@tonic-gate fport, ipha->ipha_src) &&
1632bd670b35SErik Nordmark (connp->conn_zoneid == zoneid ||
1633bd670b35SErik Nordmark connp->conn_allzones ||
1634bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1635bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
16367c478bd9Sstevel@tonic-gate break;
16377c478bd9Sstevel@tonic-gate }
16387c478bd9Sstevel@tonic-gate
1639bd670b35SErik Nordmark if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
164045916cd2Sjpk !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1641bd670b35SErik Nordmark ira, connp)) {
164245916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp,
164345916cd2Sjpk char *, "connp(1) could not receive mp(2)",
164445916cd2Sjpk conn_t *, connp, mblk_t *, mp);
164545916cd2Sjpk connp = NULL;
164645916cd2Sjpk }
164745916cd2Sjpk
16487c478bd9Sstevel@tonic-gate if (connp != NULL) {
16497c478bd9Sstevel@tonic-gate CONN_INC_REF(connp);
16507c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
16517c478bd9Sstevel@tonic-gate return (connp);
16527c478bd9Sstevel@tonic-gate }
16537c478bd9Sstevel@tonic-gate
16547c478bd9Sstevel@tonic-gate /*
16557c478bd9Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets
16567c478bd9Sstevel@tonic-gate */
16577c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
1658bd670b35SErik Nordmark
16597c478bd9Sstevel@tonic-gate break;
16602b24ab6bSSebastien Roy
16612b24ab6bSSebastien Roy case IPPROTO_ENCAP:
16622b24ab6bSSebastien Roy case IPPROTO_IPV6:
16632b24ab6bSSebastien Roy return (ipcl_iptun_classify_v4(&ipha->ipha_src,
16642b24ab6bSSebastien Roy &ipha->ipha_dst, ipst));
16657c478bd9Sstevel@tonic-gate }
16667c478bd9Sstevel@tonic-gate
16677c478bd9Sstevel@tonic-gate return (NULL);
16687c478bd9Sstevel@tonic-gate }
16697c478bd9Sstevel@tonic-gate
16707c478bd9Sstevel@tonic-gate conn_t *
ipcl_classify_v6(mblk_t * mp,uint8_t protocol,uint_t hdr_len,ip_recv_attr_t * ira,ip_stack_t * ipst)1671bd670b35SErik Nordmark ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1672bd670b35SErik Nordmark ip_recv_attr_t *ira, ip_stack_t *ipst)
16737c478bd9Sstevel@tonic-gate {
16747c478bd9Sstevel@tonic-gate ip6_t *ip6h;
16757c478bd9Sstevel@tonic-gate connf_t *connfp, *bind_connfp;
16767c478bd9Sstevel@tonic-gate uint16_t lport;
16777c478bd9Sstevel@tonic-gate uint16_t fport;
1678bd670b35SErik Nordmark tcpha_t *tcpha;
16797c478bd9Sstevel@tonic-gate uint32_t ports;
16807c478bd9Sstevel@tonic-gate conn_t *connp;
16817c478bd9Sstevel@tonic-gate uint16_t *up;
1682bd670b35SErik Nordmark zoneid_t zoneid = ira->ira_zoneid;
16837c478bd9Sstevel@tonic-gate
16847c478bd9Sstevel@tonic-gate ip6h = (ip6_t *)mp->b_rptr;
16857c478bd9Sstevel@tonic-gate
16867c478bd9Sstevel@tonic-gate switch (protocol) {
16877c478bd9Sstevel@tonic-gate case IPPROTO_TCP:
1688bd670b35SErik Nordmark tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1689bd670b35SErik Nordmark up = &tcpha->tha_lport;
16907c478bd9Sstevel@tonic-gate ports = *(uint32_t *)up;
16917c478bd9Sstevel@tonic-gate
16927c478bd9Sstevel@tonic-gate connfp =
1693f4b3ec61Sdh &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1694f4b3ec61Sdh ports, ipst)];
16957c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
16967c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL;
16977c478bd9Sstevel@tonic-gate connp = connp->conn_next) {
1698bd670b35SErik Nordmark if (IPCL_CONN_MATCH_V6(connp, protocol,
1699bd670b35SErik Nordmark ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1700bd670b35SErik Nordmark (connp->conn_zoneid == zoneid ||
1701bd670b35SErik Nordmark connp->conn_allzones ||
1702bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1703bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1704bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
17057c478bd9Sstevel@tonic-gate break;
17067c478bd9Sstevel@tonic-gate }
17077c478bd9Sstevel@tonic-gate
17087c478bd9Sstevel@tonic-gate if (connp != NULL) {
170945916cd2Sjpk /*
171045916cd2Sjpk * We have a fully-bound TCP connection.
171145916cd2Sjpk *
171245916cd2Sjpk * For labeled systems, there's no need to check the
171345916cd2Sjpk * label here. It's known to be good as we checked
171445916cd2Sjpk * before allowing the connection to become bound.
171545916cd2Sjpk */
17167c478bd9Sstevel@tonic-gate CONN_INC_REF(connp);
17177c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
17187c478bd9Sstevel@tonic-gate return (connp);
17197c478bd9Sstevel@tonic-gate }
17207c478bd9Sstevel@tonic-gate
17217c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
17227c478bd9Sstevel@tonic-gate
17237c478bd9Sstevel@tonic-gate lport = up[1];
1724f4b3ec61Sdh bind_connfp =
1725f4b3ec61Sdh &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
17267c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock);
17277c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL;
17287c478bd9Sstevel@tonic-gate connp = connp->conn_next) {
17297c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, protocol,
17307c478bd9Sstevel@tonic-gate ip6h->ip6_dst, lport) &&
1731bd670b35SErik Nordmark (connp->conn_zoneid == zoneid ||
1732bd670b35SErik Nordmark connp->conn_allzones ||
1733bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1734bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1735bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
17367c478bd9Sstevel@tonic-gate break;
17377c478bd9Sstevel@tonic-gate }
17387c478bd9Sstevel@tonic-gate
1739bd670b35SErik Nordmark if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
174045916cd2Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1741bd670b35SErik Nordmark ira, connp)) {
174245916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
174345916cd2Sjpk char *, "connp(1) could not receive mp(2)",
174445916cd2Sjpk conn_t *, connp, mblk_t *, mp);
174545916cd2Sjpk connp = NULL;
174645916cd2Sjpk }
174745916cd2Sjpk
17487c478bd9Sstevel@tonic-gate if (connp != NULL) {
17497c478bd9Sstevel@tonic-gate /* Have a listner at least */
17507c478bd9Sstevel@tonic-gate CONN_INC_REF(connp);
17517c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
17527c478bd9Sstevel@tonic-gate return (connp);
17537c478bd9Sstevel@tonic-gate }
17547c478bd9Sstevel@tonic-gate
17557c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
17567c478bd9Sstevel@tonic-gate break;
17577c478bd9Sstevel@tonic-gate
17587c478bd9Sstevel@tonic-gate case IPPROTO_UDP:
17597c478bd9Sstevel@tonic-gate up = (uint16_t *)&mp->b_rptr[hdr_len];
17607c478bd9Sstevel@tonic-gate lport = up[1];
17617c478bd9Sstevel@tonic-gate fport = up[0];
1762f4b3ec61Sdh connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
17637c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
17647c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL;
17657c478bd9Sstevel@tonic-gate connp = connp->conn_next) {
17667c478bd9Sstevel@tonic-gate if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
17677c478bd9Sstevel@tonic-gate fport, ip6h->ip6_src) &&
1768bd670b35SErik Nordmark (connp->conn_zoneid == zoneid ||
1769bd670b35SErik Nordmark connp->conn_allzones ||
1770bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1771bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1772bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
17737c478bd9Sstevel@tonic-gate break;
17747c478bd9Sstevel@tonic-gate }
17757c478bd9Sstevel@tonic-gate
1776bd670b35SErik Nordmark if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
177745916cd2Sjpk !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1778bd670b35SErik Nordmark ira, connp)) {
177945916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
178045916cd2Sjpk char *, "connp(1) could not receive mp(2)",
178145916cd2Sjpk conn_t *, connp, mblk_t *, mp);
178245916cd2Sjpk connp = NULL;
178345916cd2Sjpk }
178445916cd2Sjpk
17857c478bd9Sstevel@tonic-gate if (connp != NULL) {
17867c478bd9Sstevel@tonic-gate CONN_INC_REF(connp);
17877c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
17887c478bd9Sstevel@tonic-gate return (connp);
17897c478bd9Sstevel@tonic-gate }
17907c478bd9Sstevel@tonic-gate
17917c478bd9Sstevel@tonic-gate /*
17927c478bd9Sstevel@tonic-gate * We shouldn't come here for multicast/broadcast packets
17937c478bd9Sstevel@tonic-gate */
17947c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
17957c478bd9Sstevel@tonic-gate break;
17962b24ab6bSSebastien Roy case IPPROTO_ENCAP:
17972b24ab6bSSebastien Roy case IPPROTO_IPV6:
17982b24ab6bSSebastien Roy return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
17992b24ab6bSSebastien Roy &ip6h->ip6_dst, ipst));
18007c478bd9Sstevel@tonic-gate }
18017c478bd9Sstevel@tonic-gate
18027c478bd9Sstevel@tonic-gate return (NULL);
18037c478bd9Sstevel@tonic-gate }
18047c478bd9Sstevel@tonic-gate
18057c478bd9Sstevel@tonic-gate /*
18067c478bd9Sstevel@tonic-gate * wrapper around ipcl_classify_(v4,v6) routines.
18077c478bd9Sstevel@tonic-gate */
18087c478bd9Sstevel@tonic-gate conn_t *
ipcl_classify(mblk_t * mp,ip_recv_attr_t * ira,ip_stack_t * ipst)1809bd670b35SErik Nordmark ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
18107c478bd9Sstevel@tonic-gate {
1811bd670b35SErik Nordmark if (ira->ira_flags & IRAF_IS_IPV4) {
1812bd670b35SErik Nordmark return (ipcl_classify_v4(mp, ira->ira_protocol,
1813bd670b35SErik Nordmark ira->ira_ip_hdr_length, ira, ipst));
1814bd670b35SErik Nordmark } else {
1815bd670b35SErik Nordmark return (ipcl_classify_v6(mp, ira->ira_protocol,
1816bd670b35SErik Nordmark ira->ira_ip_hdr_length, ira, ipst));
18177c478bd9Sstevel@tonic-gate }
18187c478bd9Sstevel@tonic-gate }
18197c478bd9Sstevel@tonic-gate
1820bd670b35SErik Nordmark /*
1821bd670b35SErik Nordmark * Only used to classify SCTP RAW sockets
1822bd670b35SErik Nordmark */
18237c478bd9Sstevel@tonic-gate conn_t *
ipcl_classify_raw(mblk_t * mp,uint8_t protocol,uint32_t ports,ipha_t * ipha,ip6_t * ip6h,ip_recv_attr_t * ira,ip_stack_t * ipst)1824bd670b35SErik Nordmark ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1825bd670b35SErik Nordmark ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
18267c478bd9Sstevel@tonic-gate {
182745916cd2Sjpk connf_t *connfp;
18287c478bd9Sstevel@tonic-gate conn_t *connp;
18297c478bd9Sstevel@tonic-gate in_port_t lport;
1830bd670b35SErik Nordmark int ipversion;
183145916cd2Sjpk const void *dst;
1832bd670b35SErik Nordmark zoneid_t zoneid = ira->ira_zoneid;
18337c478bd9Sstevel@tonic-gate
18347c478bd9Sstevel@tonic-gate lport = ((uint16_t *)&ports)[1];
1835bd670b35SErik Nordmark if (ira->ira_flags & IRAF_IS_IPV4) {
1836bd670b35SErik Nordmark dst = (const void *)&ipha->ipha_dst;
1837bd670b35SErik Nordmark ipversion = IPV4_VERSION;
1838bd670b35SErik Nordmark } else {
1839bd670b35SErik Nordmark dst = (const void *)&ip6h->ip6_dst;
1840bd670b35SErik Nordmark ipversion = IPV6_VERSION;
184145916cd2Sjpk }
184245916cd2Sjpk
1843f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
18447c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
18457c478bd9Sstevel@tonic-gate for (connp = connfp->connf_head; connp != NULL;
18467c478bd9Sstevel@tonic-gate connp = connp->conn_next) {
18477c478bd9Sstevel@tonic-gate /* We don't allow v4 fallback for v6 raw socket. */
1848bd670b35SErik Nordmark if (ipversion != connp->conn_ipversion)
18497c478bd9Sstevel@tonic-gate continue;
1850bd670b35SErik Nordmark if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1851bd670b35SErik Nordmark !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1852bd670b35SErik Nordmark if (ipversion == IPV4_VERSION) {
185345916cd2Sjpk if (!IPCL_CONN_MATCH(connp, protocol,
1854bd670b35SErik Nordmark ipha->ipha_src, ipha->ipha_dst, ports))
185545916cd2Sjpk continue;
18567c478bd9Sstevel@tonic-gate } else {
185745916cd2Sjpk if (!IPCL_CONN_MATCH_V6(connp, protocol,
1858bd670b35SErik Nordmark ip6h->ip6_src, ip6h->ip6_dst, ports))
185945916cd2Sjpk continue;
18607c478bd9Sstevel@tonic-gate }
18617c478bd9Sstevel@tonic-gate } else {
1862bd670b35SErik Nordmark if (ipversion == IPV4_VERSION) {
186345916cd2Sjpk if (!IPCL_BIND_MATCH(connp, protocol,
1864bd670b35SErik Nordmark ipha->ipha_dst, lport))
186545916cd2Sjpk continue;
18667c478bd9Sstevel@tonic-gate } else {
186745916cd2Sjpk if (!IPCL_BIND_MATCH_V6(connp, protocol,
1868bd670b35SErik Nordmark ip6h->ip6_dst, lport))
186945916cd2Sjpk continue;
18707c478bd9Sstevel@tonic-gate }
18717c478bd9Sstevel@tonic-gate }
187245916cd2Sjpk
1873bd670b35SErik Nordmark if (connp->conn_zoneid == zoneid ||
1874bd670b35SErik Nordmark connp->conn_allzones ||
1875bd670b35SErik Nordmark ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1876bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1877bd670b35SErik Nordmark (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
187845916cd2Sjpk break;
187945916cd2Sjpk }
1880bd670b35SErik Nordmark
1881bd670b35SErik Nordmark if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1882bd670b35SErik Nordmark !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
188345916cd2Sjpk DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
188445916cd2Sjpk char *, "connp(1) could not receive mp(2)",
188545916cd2Sjpk conn_t *, connp, mblk_t *, mp);
188645916cd2Sjpk connp = NULL;
18877c478bd9Sstevel@tonic-gate }
18887c0c0508Skcpoon
18897c0c0508Skcpoon if (connp != NULL)
18907c0c0508Skcpoon goto found;
18917c0c0508Skcpoon mutex_exit(&connfp->connf_lock);
18927c0c0508Skcpoon
1893bd670b35SErik Nordmark /* Try to look for a wildcard SCTP RAW socket match. */
1894f4b3ec61Sdh connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
18957c0c0508Skcpoon mutex_enter(&connfp->connf_lock);
18967c0c0508Skcpoon for (connp = connfp->connf_head; connp != NULL;
18977c0c0508Skcpoon connp = connp->conn_next) {
18987c0c0508Skcpoon /* We don't allow v4 fallback for v6 raw socket. */
1899bd670b35SErik Nordmark if (ipversion != connp->conn_ipversion)
19007c0c0508Skcpoon continue;
1901bd670b35SErik Nordmark if (!IPCL_ZONE_MATCH(connp, zoneid))
1902bd670b35SErik Nordmark continue;
1903bd670b35SErik Nordmark
1904bd670b35SErik Nordmark if (ipversion == IPV4_VERSION) {
1905bd670b35SErik Nordmark if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
19067c0c0508Skcpoon break;
19077c0c0508Skcpoon } else {
1908bd670b35SErik Nordmark if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
19097c0c0508Skcpoon break;
19107c0c0508Skcpoon }
19117c0c0508Skcpoon }
19127c478bd9Sstevel@tonic-gate }
19137c0c0508Skcpoon
19147c0c0508Skcpoon if (connp != NULL)
19157c0c0508Skcpoon goto found;
19167c0c0508Skcpoon
19177c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
19187c478bd9Sstevel@tonic-gate return (NULL);
19197c0c0508Skcpoon
19207c0c0508Skcpoon found:
19217c0c0508Skcpoon ASSERT(connp != NULL);
19227c0c0508Skcpoon CONN_INC_REF(connp);
19237c0c0508Skcpoon mutex_exit(&connfp->connf_lock);
19247c0c0508Skcpoon return (connp);
19257c478bd9Sstevel@tonic-gate }
19267c478bd9Sstevel@tonic-gate
19277c478bd9Sstevel@tonic-gate /* ARGSUSED */
19287c478bd9Sstevel@tonic-gate static int
tcp_conn_constructor(void * buf,void * cdrarg,int kmflags)1929fc80c0dfSnordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
19307c478bd9Sstevel@tonic-gate {
19317c478bd9Sstevel@tonic-gate itc_t *itc = (itc_t *)buf;
193278a2e113SAndy Fiddaman conn_t *connp = &itc->itc_conn;
1933fc80c0dfSnordmark tcp_t *tcp = (tcp_t *)&itc[1];
1934fc80c0dfSnordmark
1935fc80c0dfSnordmark bzero(connp, sizeof (conn_t));
1936fc80c0dfSnordmark bzero(tcp, sizeof (tcp_t));
1937fc80c0dfSnordmark
1938fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1939fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
19400f1702c5SYu Xiangning cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
1941bd670b35SErik Nordmark tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
1942bd670b35SErik Nordmark if (tcp->tcp_timercache == NULL)
1943bd670b35SErik Nordmark return (ENOMEM);
19447c478bd9Sstevel@tonic-gate connp->conn_tcp = tcp;
19457c478bd9Sstevel@tonic-gate connp->conn_flags = IPCL_TCPCONN;
1946bd670b35SErik Nordmark connp->conn_proto = IPPROTO_TCP;
19477c478bd9Sstevel@tonic-gate tcp->tcp_connp = connp;
1948bd670b35SErik Nordmark rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1949bd670b35SErik Nordmark
1950bd670b35SErik Nordmark connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1951bd670b35SErik Nordmark if (connp->conn_ixa == NULL) {
1952bd670b35SErik Nordmark tcp_timermp_free(tcp);
1953bd670b35SErik Nordmark return (ENOMEM);
1954bd670b35SErik Nordmark }
1955bd670b35SErik Nordmark connp->conn_ixa->ixa_refcnt = 1;
1956bd670b35SErik Nordmark connp->conn_ixa->ixa_protocol = connp->conn_proto;
1957bd670b35SErik Nordmark connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
19587c478bd9Sstevel@tonic-gate return (0);
19597c478bd9Sstevel@tonic-gate }
19607c478bd9Sstevel@tonic-gate
19617c478bd9Sstevel@tonic-gate /* ARGSUSED */
19627c478bd9Sstevel@tonic-gate static void
tcp_conn_destructor(void * buf,void * cdrarg)1963fc80c0dfSnordmark tcp_conn_destructor(void *buf, void *cdrarg)
1964fc80c0dfSnordmark {
1965fc80c0dfSnordmark itc_t *itc = (itc_t *)buf;
196678a2e113SAndy Fiddaman conn_t *connp = &itc->itc_conn;
1967fc80c0dfSnordmark tcp_t *tcp = (tcp_t *)&itc[1];
1968fc80c0dfSnordmark
1969fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_TCPCONN);
1970fc80c0dfSnordmark ASSERT(tcp->tcp_connp == connp);
1971fc80c0dfSnordmark ASSERT(connp->conn_tcp == tcp);
1972fc80c0dfSnordmark tcp_timermp_free(tcp);
1973fc80c0dfSnordmark mutex_destroy(&connp->conn_lock);
1974fc80c0dfSnordmark cv_destroy(&connp->conn_cv);
19750f1702c5SYu Xiangning cv_destroy(&connp->conn_sq_cv);
1976bd670b35SErik Nordmark rw_destroy(&connp->conn_ilg_lock);
1977bd670b35SErik Nordmark
1978bd670b35SErik Nordmark /* Can be NULL if constructor failed */
1979bd670b35SErik Nordmark if (connp->conn_ixa != NULL) {
1980bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_refcnt == 1);
1981bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_ire == NULL);
1982bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_nce == NULL);
1983bd670b35SErik Nordmark ixa_refrele(connp->conn_ixa);
1984bd670b35SErik Nordmark }
1985fc80c0dfSnordmark }
1986fc80c0dfSnordmark
1987fc80c0dfSnordmark /* ARGSUSED */
1988fc80c0dfSnordmark static int
ip_conn_constructor(void * buf,void * cdrarg,int kmflags)1989fc80c0dfSnordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
1990fc80c0dfSnordmark {
1991fc80c0dfSnordmark itc_t *itc = (itc_t *)buf;
199278a2e113SAndy Fiddaman conn_t *connp = &itc->itc_conn;
1993fc80c0dfSnordmark
1994fc80c0dfSnordmark bzero(connp, sizeof (conn_t));
1995fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1996fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1997fc80c0dfSnordmark connp->conn_flags = IPCL_IPCCONN;
1998bd670b35SErik Nordmark rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1999fc80c0dfSnordmark
2000bd670b35SErik Nordmark connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2001bd670b35SErik Nordmark if (connp->conn_ixa == NULL)
2002bd670b35SErik Nordmark return (ENOMEM);
2003bd670b35SErik Nordmark connp->conn_ixa->ixa_refcnt = 1;
2004bd670b35SErik Nordmark connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2005fc80c0dfSnordmark return (0);
2006fc80c0dfSnordmark }
2007fc80c0dfSnordmark
2008fc80c0dfSnordmark /* ARGSUSED */
2009fc80c0dfSnordmark static void
ip_conn_destructor(void * buf,void * cdrarg)2010fc80c0dfSnordmark ip_conn_destructor(void *buf, void *cdrarg)
2011fc80c0dfSnordmark {
2012fc80c0dfSnordmark itc_t *itc = (itc_t *)buf;
201378a2e113SAndy Fiddaman conn_t *connp = &itc->itc_conn;
2014fc80c0dfSnordmark
2015fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_IPCCONN);
2016fc80c0dfSnordmark ASSERT(connp->conn_priv == NULL);
2017fc80c0dfSnordmark mutex_destroy(&connp->conn_lock);
2018fc80c0dfSnordmark cv_destroy(&connp->conn_cv);
2019bd670b35SErik Nordmark rw_destroy(&connp->conn_ilg_lock);
2020bd670b35SErik Nordmark
2021bd670b35SErik Nordmark /* Can be NULL if constructor failed */
2022bd670b35SErik Nordmark if (connp->conn_ixa != NULL) {
2023bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2024bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_ire == NULL);
2025bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_nce == NULL);
2026bd670b35SErik Nordmark ixa_refrele(connp->conn_ixa);
2027bd670b35SErik Nordmark }
2028fc80c0dfSnordmark }
2029fc80c0dfSnordmark
2030fc80c0dfSnordmark /* ARGSUSED */
2031fc80c0dfSnordmark static int
udp_conn_constructor(void * buf,void * cdrarg,int kmflags)2032fc80c0dfSnordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2033fc80c0dfSnordmark {
2034fc80c0dfSnordmark itc_t *itc = (itc_t *)buf;
203578a2e113SAndy Fiddaman conn_t *connp = &itc->itc_conn;
2036fc80c0dfSnordmark udp_t *udp = (udp_t *)&itc[1];
2037fc80c0dfSnordmark
2038fc80c0dfSnordmark bzero(connp, sizeof (conn_t));
2039fc80c0dfSnordmark bzero(udp, sizeof (udp_t));
2040fc80c0dfSnordmark
2041fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2042fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2043fc80c0dfSnordmark connp->conn_udp = udp;
2044fc80c0dfSnordmark connp->conn_flags = IPCL_UDPCONN;
2045bd670b35SErik Nordmark connp->conn_proto = IPPROTO_UDP;
2046fc80c0dfSnordmark udp->udp_connp = connp;
2047bd670b35SErik Nordmark rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2048bd670b35SErik Nordmark connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2049bd670b35SErik Nordmark if (connp->conn_ixa == NULL)
2050bd670b35SErik Nordmark return (ENOMEM);
2051bd670b35SErik Nordmark connp->conn_ixa->ixa_refcnt = 1;
2052bd670b35SErik Nordmark connp->conn_ixa->ixa_protocol = connp->conn_proto;
2053bd670b35SErik Nordmark connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2054fc80c0dfSnordmark return (0);
2055fc80c0dfSnordmark }
2056fc80c0dfSnordmark
2057fc80c0dfSnordmark /* ARGSUSED */
2058fc80c0dfSnordmark static void
udp_conn_destructor(void * buf,void * cdrarg)2059fc80c0dfSnordmark udp_conn_destructor(void *buf, void *cdrarg)
2060fc80c0dfSnordmark {
2061fc80c0dfSnordmark itc_t *itc = (itc_t *)buf;
206278a2e113SAndy Fiddaman conn_t *connp = &itc->itc_conn;
2063fc80c0dfSnordmark udp_t *udp = (udp_t *)&itc[1];
2064fc80c0dfSnordmark
2065fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_UDPCONN);
2066fc80c0dfSnordmark ASSERT(udp->udp_connp == connp);
2067fc80c0dfSnordmark ASSERT(connp->conn_udp == udp);
2068fc80c0dfSnordmark mutex_destroy(&connp->conn_lock);
2069fc80c0dfSnordmark cv_destroy(&connp->conn_cv);
2070bd670b35SErik Nordmark rw_destroy(&connp->conn_ilg_lock);
2071bd670b35SErik Nordmark
2072bd670b35SErik Nordmark /* Can be NULL if constructor failed */
2073bd670b35SErik Nordmark if (connp->conn_ixa != NULL) {
2074bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2075bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_ire == NULL);
2076bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_nce == NULL);
2077bd670b35SErik Nordmark ixa_refrele(connp->conn_ixa);
2078bd670b35SErik Nordmark }
2079fc80c0dfSnordmark }
2080fc80c0dfSnordmark
2081fc80c0dfSnordmark /* ARGSUSED */
2082fc80c0dfSnordmark static int
rawip_conn_constructor(void * buf,void * cdrarg,int kmflags)2083fc80c0dfSnordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2084fc80c0dfSnordmark {
2085fc80c0dfSnordmark itc_t *itc = (itc_t *)buf;
208678a2e113SAndy Fiddaman conn_t *connp = &itc->itc_conn;
2087fc80c0dfSnordmark icmp_t *icmp = (icmp_t *)&itc[1];
2088fc80c0dfSnordmark
2089fc80c0dfSnordmark bzero(connp, sizeof (conn_t));
2090fc80c0dfSnordmark bzero(icmp, sizeof (icmp_t));
2091fc80c0dfSnordmark
2092fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2093fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2094fc80c0dfSnordmark connp->conn_icmp = icmp;
2095fc80c0dfSnordmark connp->conn_flags = IPCL_RAWIPCONN;
2096bd670b35SErik Nordmark connp->conn_proto = IPPROTO_ICMP;
2097fc80c0dfSnordmark icmp->icmp_connp = connp;
2098bd670b35SErik Nordmark rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2099bd670b35SErik Nordmark connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2100bd670b35SErik Nordmark if (connp->conn_ixa == NULL)
2101bd670b35SErik Nordmark return (ENOMEM);
2102bd670b35SErik Nordmark connp->conn_ixa->ixa_refcnt = 1;
2103bd670b35SErik Nordmark connp->conn_ixa->ixa_protocol = connp->conn_proto;
2104bd670b35SErik Nordmark connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2105fc80c0dfSnordmark return (0);
2106fc80c0dfSnordmark }
2107fc80c0dfSnordmark
2108fc80c0dfSnordmark /* ARGSUSED */
2109fc80c0dfSnordmark static void
rawip_conn_destructor(void * buf,void * cdrarg)2110fc80c0dfSnordmark rawip_conn_destructor(void *buf, void *cdrarg)
2111fc80c0dfSnordmark {
2112fc80c0dfSnordmark itc_t *itc = (itc_t *)buf;
211378a2e113SAndy Fiddaman conn_t *connp = &itc->itc_conn;
2114fc80c0dfSnordmark icmp_t *icmp = (icmp_t *)&itc[1];
2115fc80c0dfSnordmark
2116fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2117fc80c0dfSnordmark ASSERT(icmp->icmp_connp == connp);
2118fc80c0dfSnordmark ASSERT(connp->conn_icmp == icmp);
2119fc80c0dfSnordmark mutex_destroy(&connp->conn_lock);
2120fc80c0dfSnordmark cv_destroy(&connp->conn_cv);
2121bd670b35SErik Nordmark rw_destroy(&connp->conn_ilg_lock);
2122bd670b35SErik Nordmark
2123bd670b35SErik Nordmark /* Can be NULL if constructor failed */
2124bd670b35SErik Nordmark if (connp->conn_ixa != NULL) {
2125bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2126bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_ire == NULL);
2127bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_nce == NULL);
2128bd670b35SErik Nordmark ixa_refrele(connp->conn_ixa);
2129bd670b35SErik Nordmark }
2130fc80c0dfSnordmark }
2131fc80c0dfSnordmark
2132fc80c0dfSnordmark /* ARGSUSED */
2133fc80c0dfSnordmark static int
rts_conn_constructor(void * buf,void * cdrarg,int kmflags)2134fc80c0dfSnordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2135fc80c0dfSnordmark {
2136fc80c0dfSnordmark itc_t *itc = (itc_t *)buf;
213778a2e113SAndy Fiddaman conn_t *connp = &itc->itc_conn;
2138fc80c0dfSnordmark rts_t *rts = (rts_t *)&itc[1];
2139fc80c0dfSnordmark
2140fc80c0dfSnordmark bzero(connp, sizeof (conn_t));
2141fc80c0dfSnordmark bzero(rts, sizeof (rts_t));
2142fc80c0dfSnordmark
2143fc80c0dfSnordmark mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2144fc80c0dfSnordmark cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2145fc80c0dfSnordmark connp->conn_rts = rts;
2146fc80c0dfSnordmark connp->conn_flags = IPCL_RTSCONN;
2147fc80c0dfSnordmark rts->rts_connp = connp;
2148bd670b35SErik Nordmark rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2149bd670b35SErik Nordmark connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2150bd670b35SErik Nordmark if (connp->conn_ixa == NULL)
2151bd670b35SErik Nordmark return (ENOMEM);
2152bd670b35SErik Nordmark connp->conn_ixa->ixa_refcnt = 1;
2153bd670b35SErik Nordmark connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2154fc80c0dfSnordmark return (0);
2155fc80c0dfSnordmark }
2156fc80c0dfSnordmark
2157fc80c0dfSnordmark /* ARGSUSED */
2158fc80c0dfSnordmark static void
rts_conn_destructor(void * buf,void * cdrarg)2159fc80c0dfSnordmark rts_conn_destructor(void *buf, void *cdrarg)
21607c478bd9Sstevel@tonic-gate {
2161fc80c0dfSnordmark itc_t *itc = (itc_t *)buf;
216278a2e113SAndy Fiddaman conn_t *connp = &itc->itc_conn;
2163fc80c0dfSnordmark rts_t *rts = (rts_t *)&itc[1];
2164fc80c0dfSnordmark
2165fc80c0dfSnordmark ASSERT(connp->conn_flags & IPCL_RTSCONN);
2166fc80c0dfSnordmark ASSERT(rts->rts_connp == connp);
2167fc80c0dfSnordmark ASSERT(connp->conn_rts == rts);
2168fc80c0dfSnordmark mutex_destroy(&connp->conn_lock);
2169fc80c0dfSnordmark cv_destroy(&connp->conn_cv);
2170bd670b35SErik Nordmark rw_destroy(&connp->conn_ilg_lock);
2171bd670b35SErik Nordmark
2172bd670b35SErik Nordmark /* Can be NULL if constructor failed */
2173bd670b35SErik Nordmark if (connp->conn_ixa != NULL) {
2174bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2175bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_ire == NULL);
2176bd670b35SErik Nordmark ASSERT(connp->conn_ixa->ixa_nce == NULL);
2177bd670b35SErik Nordmark ixa_refrele(connp->conn_ixa);
21780f1702c5SYu Xiangning }
21790f1702c5SYu Xiangning }
21800f1702c5SYu Xiangning
2181fc80c0dfSnordmark /*
2182fc80c0dfSnordmark * Called as part of ipcl_conn_destroy to assert and clear any pointers
2183fc80c0dfSnordmark * in the conn_t.
2184bd670b35SErik Nordmark *
2185bd670b35SErik Nordmark * Below we list all the pointers in the conn_t as a documentation aid.
2186bd670b35SErik Nordmark * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2187bd670b35SErik Nordmark * If you add any pointers to the conn_t please add an ASSERT here
2188bd670b35SErik Nordmark * and #ifdef it out if it can't be actually asserted to be NULL.
2189bd670b35SErik Nordmark * In any case, we bzero most of the conn_t at the end of the function.
2190fc80c0dfSnordmark */
2191fc80c0dfSnordmark void
ipcl_conn_cleanup(conn_t * connp)2192fc80c0dfSnordmark ipcl_conn_cleanup(conn_t *connp)
2193fc80c0dfSnordmark {
2194bd670b35SErik Nordmark ip_xmit_attr_t *ixa;
2195bd670b35SErik Nordmark
2196fc80c0dfSnordmark ASSERT(connp->conn_latch == NULL);
2197bd670b35SErik Nordmark ASSERT(connp->conn_latch_in_policy == NULL);
2198bd670b35SErik Nordmark ASSERT(connp->conn_latch_in_action == NULL);
2199fc80c0dfSnordmark #ifdef notdef
2200fc80c0dfSnordmark ASSERT(connp->conn_rq == NULL);
2201fc80c0dfSnordmark ASSERT(connp->conn_wq == NULL);
2202fc80c0dfSnordmark #endif
2203fc80c0dfSnordmark ASSERT(connp->conn_cred == NULL);
2204fc80c0dfSnordmark ASSERT(connp->conn_g_fanout == NULL);
2205fc80c0dfSnordmark ASSERT(connp->conn_g_next == NULL);
2206fc80c0dfSnordmark ASSERT(connp->conn_g_prev == NULL);
2207fc80c0dfSnordmark ASSERT(connp->conn_policy == NULL);
2208fc80c0dfSnordmark ASSERT(connp->conn_fanout == NULL);
2209fc80c0dfSnordmark ASSERT(connp->conn_next == NULL);
2210fc80c0dfSnordmark ASSERT(connp->conn_prev == NULL);
2211fc80c0dfSnordmark ASSERT(connp->conn_oper_pending_ill == NULL);
2212fc80c0dfSnordmark ASSERT(connp->conn_ilg == NULL);
2213fc80c0dfSnordmark ASSERT(connp->conn_drain_next == NULL);
2214fc80c0dfSnordmark ASSERT(connp->conn_drain_prev == NULL);
2215a9737be2Snordmark #ifdef notdef
2216a9737be2Snordmark /* conn_idl is not cleared when removed from idl list */
2217fc80c0dfSnordmark ASSERT(connp->conn_idl == NULL);
2218a9737be2Snordmark #endif
2219fc80c0dfSnordmark ASSERT(connp->conn_ipsec_opt_mp == NULL);
2220bd670b35SErik Nordmark #ifdef notdef
2221bd670b35SErik Nordmark /* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2222fc80c0dfSnordmark ASSERT(connp->conn_netstack == NULL);
2223bd670b35SErik Nordmark #endif
2224fc80c0dfSnordmark
22250f1702c5SYu Xiangning ASSERT(connp->conn_helper_info == NULL);
2226bd670b35SErik Nordmark ASSERT(connp->conn_ixa != NULL);
2227bd670b35SErik Nordmark ixa = connp->conn_ixa;
2228bd670b35SErik Nordmark ASSERT(ixa->ixa_refcnt == 1);
2229bd670b35SErik Nordmark /* Need to preserve ixa_protocol */
2230bd670b35SErik Nordmark ixa_cleanup(ixa);
2231bd670b35SErik Nordmark ixa->ixa_flags = 0;
2232bd670b35SErik Nordmark
2233fc80c0dfSnordmark /* Clear out the conn_t fields that are not preserved */
2234fc80c0dfSnordmark bzero(&connp->conn_start_clr,
2235fc80c0dfSnordmark sizeof (conn_t) -
2236fc80c0dfSnordmark ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
22377c478bd9Sstevel@tonic-gate }
22387c478bd9Sstevel@tonic-gate
22397c478bd9Sstevel@tonic-gate /*
22407c478bd9Sstevel@tonic-gate * All conns are inserted in a global multi-list for the benefit of
22417c478bd9Sstevel@tonic-gate * walkers. The walk is guaranteed to walk all open conns at the time
22427c478bd9Sstevel@tonic-gate * of the start of the walk exactly once. This property is needed to
22437c478bd9Sstevel@tonic-gate * achieve some cleanups during unplumb of interfaces. This is achieved
22447c478bd9Sstevel@tonic-gate * as follows.
22457c478bd9Sstevel@tonic-gate *
22467c478bd9Sstevel@tonic-gate * ipcl_conn_create and ipcl_conn_destroy are the only functions that
22477c478bd9Sstevel@tonic-gate * call the insert and delete functions below at creation and deletion
22487c478bd9Sstevel@tonic-gate * time respectively. The conn never moves or changes its position in this
22497c478bd9Sstevel@tonic-gate * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
22507c478bd9Sstevel@tonic-gate * won't increase due to walkers, once the conn deletion has started. Note
22517c478bd9Sstevel@tonic-gate * that we can't remove the conn from the global list and then wait for
22527c478bd9Sstevel@tonic-gate * the refcnt to drop to zero, since walkers would then see a truncated
22537c478bd9Sstevel@tonic-gate * list. CONN_INCIPIENT ensures that walkers don't start looking at
22547c478bd9Sstevel@tonic-gate * conns until ip_open is ready to make them globally visible.
22557c478bd9Sstevel@tonic-gate * The global round robin multi-list locks are held only to get the
22567c478bd9Sstevel@tonic-gate * next member/insertion/deletion and contention should be negligible
22577c478bd9Sstevel@tonic-gate * if the multi-list is much greater than the number of cpus.
22587c478bd9Sstevel@tonic-gate */
22597c478bd9Sstevel@tonic-gate void
ipcl_globalhash_insert(conn_t * connp)22607c478bd9Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp)
22617c478bd9Sstevel@tonic-gate {
22627c478bd9Sstevel@tonic-gate int index;
2263f4b3ec61Sdh struct connf_s *connfp;
2264f4b3ec61Sdh ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
22657c478bd9Sstevel@tonic-gate
22667c478bd9Sstevel@tonic-gate /*
22677c478bd9Sstevel@tonic-gate * No need for atomic here. Approximate even distribution
22687c478bd9Sstevel@tonic-gate * in the global lists is sufficient.
22697c478bd9Sstevel@tonic-gate */
2270f4b3ec61Sdh ipst->ips_conn_g_index++;
2271f4b3ec61Sdh index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
22727c478bd9Sstevel@tonic-gate
22737c478bd9Sstevel@tonic-gate connp->conn_g_prev = NULL;
22747c478bd9Sstevel@tonic-gate /*
22757c478bd9Sstevel@tonic-gate * Mark as INCIPIENT, so that walkers will ignore this
22767c478bd9Sstevel@tonic-gate * for now, till ip_open is ready to make it visible globally.
22777c478bd9Sstevel@tonic-gate */
22787c478bd9Sstevel@tonic-gate connp->conn_state_flags |= CONN_INCIPIENT;
22797c478bd9Sstevel@tonic-gate
2280f4b3ec61Sdh connfp = &ipst->ips_ipcl_globalhash_fanout[index];
22817c478bd9Sstevel@tonic-gate /* Insert at the head of the list */
2282f4b3ec61Sdh mutex_enter(&connfp->connf_lock);
2283f4b3ec61Sdh connp->conn_g_next = connfp->connf_head;
22847c478bd9Sstevel@tonic-gate if (connp->conn_g_next != NULL)
22857c478bd9Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp;
2286f4b3ec61Sdh connfp->connf_head = connp;
22877c478bd9Sstevel@tonic-gate
22887c478bd9Sstevel@tonic-gate /* The fanout bucket this conn points to */
2289f4b3ec61Sdh connp->conn_g_fanout = connfp;
22907c478bd9Sstevel@tonic-gate
2291f4b3ec61Sdh mutex_exit(&connfp->connf_lock);
22927c478bd9Sstevel@tonic-gate }
22937c478bd9Sstevel@tonic-gate
22947c478bd9Sstevel@tonic-gate void
ipcl_globalhash_remove(conn_t * connp)22957c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp)
22967c478bd9Sstevel@tonic-gate {
2297f4b3ec61Sdh struct connf_s *connfp;
2298f4b3ec61Sdh
22997c478bd9Sstevel@tonic-gate /*
23007c478bd9Sstevel@tonic-gate * We were never inserted in the global multi list.
23017c478bd9Sstevel@tonic-gate * IPCL_NONE variety is never inserted in the global multilist
23027c478bd9Sstevel@tonic-gate * since it is presumed to not need any cleanup and is transient.
23037c478bd9Sstevel@tonic-gate */
23047c478bd9Sstevel@tonic-gate if (connp->conn_g_fanout == NULL)
23057c478bd9Sstevel@tonic-gate return;
23067c478bd9Sstevel@tonic-gate
2307f4b3ec61Sdh connfp = connp->conn_g_fanout;
2308f4b3ec61Sdh mutex_enter(&connfp->connf_lock);
23097c478bd9Sstevel@tonic-gate if (connp->conn_g_prev != NULL)
23107c478bd9Sstevel@tonic-gate connp->conn_g_prev->conn_g_next = connp->conn_g_next;
23117c478bd9Sstevel@tonic-gate else
2312f4b3ec61Sdh connfp->connf_head = connp->conn_g_next;
23137c478bd9Sstevel@tonic-gate if (connp->conn_g_next != NULL)
23147c478bd9Sstevel@tonic-gate connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2315f4b3ec61Sdh mutex_exit(&connfp->connf_lock);
23167c478bd9Sstevel@tonic-gate
23177c478bd9Sstevel@tonic-gate /* Better to stumble on a null pointer than to corrupt memory */
23187c478bd9Sstevel@tonic-gate connp->conn_g_next = NULL;
23197c478bd9Sstevel@tonic-gate connp->conn_g_prev = NULL;
2320fc80c0dfSnordmark connp->conn_g_fanout = NULL;
23217c478bd9Sstevel@tonic-gate }
23227c478bd9Sstevel@tonic-gate
23237c478bd9Sstevel@tonic-gate /*
23247c478bd9Sstevel@tonic-gate * Walk the list of all conn_t's in the system, calling the function provided
2325bd670b35SErik Nordmark * With the specified argument for each.
23267c478bd9Sstevel@tonic-gate * Applies to both IPv4 and IPv6.
23277c478bd9Sstevel@tonic-gate *
2328bd670b35SErik Nordmark * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2329bd670b35SErik Nordmark * conn_oper_pending_ill). To guard against stale pointers
23307c478bd9Sstevel@tonic-gate * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
23317c478bd9Sstevel@tonic-gate * unplumbed or removed. New conn_t's that are created while we are walking
23327c478bd9Sstevel@tonic-gate * may be missed by this walk, because they are not necessarily inserted
23337c478bd9Sstevel@tonic-gate * at the tail of the list. They are new conn_t's and thus don't have any
23347c478bd9Sstevel@tonic-gate * stale pointers. The CONN_CLOSING flag ensures that no new reference
23357c478bd9Sstevel@tonic-gate * is created to the struct that is going away.
23367c478bd9Sstevel@tonic-gate */
23377c478bd9Sstevel@tonic-gate void
ipcl_walk(pfv_t func,void * arg,ip_stack_t * ipst)2338f4b3ec61Sdh ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
23397c478bd9Sstevel@tonic-gate {
23407c478bd9Sstevel@tonic-gate int i;
23417c478bd9Sstevel@tonic-gate conn_t *connp;
23427c478bd9Sstevel@tonic-gate conn_t *prev_connp;
23437c478bd9Sstevel@tonic-gate
23447c478bd9Sstevel@tonic-gate for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2345f4b3ec61Sdh mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23467c478bd9Sstevel@tonic-gate prev_connp = NULL;
2347f4b3ec61Sdh connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
23487c478bd9Sstevel@tonic-gate while (connp != NULL) {
23497c478bd9Sstevel@tonic-gate mutex_enter(&connp->conn_lock);
23507c478bd9Sstevel@tonic-gate if (connp->conn_state_flags &
23517c478bd9Sstevel@tonic-gate (CONN_CONDEMNED | CONN_INCIPIENT)) {
23527c478bd9Sstevel@tonic-gate mutex_exit(&connp->conn_lock);
23537c478bd9Sstevel@tonic-gate connp = connp->conn_g_next;
23547c478bd9Sstevel@tonic-gate continue;
23557c478bd9Sstevel@tonic-gate }
23567c478bd9Sstevel@tonic-gate CONN_INC_REF_LOCKED(connp);
23577c478bd9Sstevel@tonic-gate mutex_exit(&connp->conn_lock);
2358f4b3ec61Sdh mutex_exit(
2359f4b3ec61Sdh &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23607c478bd9Sstevel@tonic-gate (*func)(connp, arg);
23617c478bd9Sstevel@tonic-gate if (prev_connp != NULL)
23627c478bd9Sstevel@tonic-gate CONN_DEC_REF(prev_connp);
2363f4b3ec61Sdh mutex_enter(
2364f4b3ec61Sdh &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23657c478bd9Sstevel@tonic-gate prev_connp = connp;
23667c478bd9Sstevel@tonic-gate connp = connp->conn_g_next;
23677c478bd9Sstevel@tonic-gate }
2368f4b3ec61Sdh mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23697c478bd9Sstevel@tonic-gate if (prev_connp != NULL)
23707c478bd9Sstevel@tonic-gate CONN_DEC_REF(prev_connp);
23717c478bd9Sstevel@tonic-gate }
23727c478bd9Sstevel@tonic-gate }
23737c478bd9Sstevel@tonic-gate
23747c478bd9Sstevel@tonic-gate /*
23757c478bd9Sstevel@tonic-gate * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
23767c478bd9Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference
23777c478bd9Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries
2378d0ab37afSethindra * (peer tcp in ESTABLISHED state).
23797c478bd9Sstevel@tonic-gate */
23807c478bd9Sstevel@tonic-gate conn_t *
ipcl_conn_tcp_lookup_reversed_ipv4(conn_t * connp,ipha_t * ipha,tcpha_t * tcpha,ip_stack_t * ipst)2381bd670b35SErik Nordmark ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2382f4b3ec61Sdh ip_stack_t *ipst)
23837c478bd9Sstevel@tonic-gate {
23847c478bd9Sstevel@tonic-gate uint32_t ports;
23857c478bd9Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports;
23867c478bd9Sstevel@tonic-gate connf_t *connfp;
23877c478bd9Sstevel@tonic-gate conn_t *tconnp;
23887c478bd9Sstevel@tonic-gate boolean_t zone_chk;
23897c478bd9Sstevel@tonic-gate
23907c478bd9Sstevel@tonic-gate /*
23917c478bd9Sstevel@tonic-gate * If either the source of destination address is loopback, then
23927c478bd9Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of
23937c478bd9Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED
23947c478bd9Sstevel@tonic-gate * state) and the endpoints may reside in different Zones.
23957c478bd9Sstevel@tonic-gate */
23967c478bd9Sstevel@tonic-gate zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
23977c478bd9Sstevel@tonic-gate ipha->ipha_dst == htonl(INADDR_LOOPBACK));
23987c478bd9Sstevel@tonic-gate
2399bd670b35SErik Nordmark pports[0] = tcpha->tha_fport;
2400bd670b35SErik Nordmark pports[1] = tcpha->tha_lport;
24017c478bd9Sstevel@tonic-gate
2402f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2403f4b3ec61Sdh ports, ipst)];
24047c478bd9Sstevel@tonic-gate
24057c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
24067c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL;
24077c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) {
24087c478bd9Sstevel@tonic-gate
24097c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
24107c478bd9Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) &&
2411d0ab37afSethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
24127c478bd9Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
24137c478bd9Sstevel@tonic-gate
24147c478bd9Sstevel@tonic-gate ASSERT(tconnp != connp);
24157c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp);
24167c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
24177c478bd9Sstevel@tonic-gate return (tconnp);
24187c478bd9Sstevel@tonic-gate }
24197c478bd9Sstevel@tonic-gate }
24207c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
24217c478bd9Sstevel@tonic-gate return (NULL);
24227c478bd9Sstevel@tonic-gate }
24237c478bd9Sstevel@tonic-gate
24247c478bd9Sstevel@tonic-gate /*
24257c478bd9Sstevel@tonic-gate * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
24267c478bd9Sstevel@tonic-gate * the {src, dst, lport, fport} quadruplet. Returns with conn reference
24277c478bd9Sstevel@tonic-gate * held; caller must call CONN_DEC_REF. Only checks for connected entries
2428d0ab37afSethindra * (peer tcp in ESTABLISHED state).
24297c478bd9Sstevel@tonic-gate */
24307c478bd9Sstevel@tonic-gate conn_t *
ipcl_conn_tcp_lookup_reversed_ipv6(conn_t * connp,ip6_t * ip6h,tcpha_t * tcpha,ip_stack_t * ipst)2431bd670b35SErik Nordmark ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2432f4b3ec61Sdh ip_stack_t *ipst)
24337c478bd9Sstevel@tonic-gate {
24347c478bd9Sstevel@tonic-gate uint32_t ports;
24357c478bd9Sstevel@tonic-gate uint16_t *pports = (uint16_t *)&ports;
24367c478bd9Sstevel@tonic-gate connf_t *connfp;
24377c478bd9Sstevel@tonic-gate conn_t *tconnp;
24387c478bd9Sstevel@tonic-gate boolean_t zone_chk;
24397c478bd9Sstevel@tonic-gate
24407c478bd9Sstevel@tonic-gate /*
24417c478bd9Sstevel@tonic-gate * If either the source of destination address is loopback, then
24427c478bd9Sstevel@tonic-gate * both endpoints must be in the same Zone. Otherwise, both of
24437c478bd9Sstevel@tonic-gate * the addresses are system-wide unique (tcp is in ESTABLISHED
24447c478bd9Sstevel@tonic-gate * state) and the endpoints may reside in different Zones. We
24457c478bd9Sstevel@tonic-gate * don't do Zone check for link local address(es) because the
24467c478bd9Sstevel@tonic-gate * current Zone implementation treats each link local address as
24477c478bd9Sstevel@tonic-gate * being unique per system node, i.e. they belong to global Zone.
24487c478bd9Sstevel@tonic-gate */
24497c478bd9Sstevel@tonic-gate zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
24507c478bd9Sstevel@tonic-gate IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
24517c478bd9Sstevel@tonic-gate
2452bd670b35SErik Nordmark pports[0] = tcpha->tha_fport;
2453bd670b35SErik Nordmark pports[1] = tcpha->tha_lport;
24547c478bd9Sstevel@tonic-gate
2455f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2456f4b3ec61Sdh ports, ipst)];
24577c478bd9Sstevel@tonic-gate
24587c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
24597c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL;
24607c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) {
24617c478bd9Sstevel@tonic-gate
2462bd670b35SErik Nordmark /* We skip conn_bound_if check here as this is loopback tcp */
24637c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
24647c478bd9Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2465d0ab37afSethindra tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
24667c478bd9Sstevel@tonic-gate (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
24677c478bd9Sstevel@tonic-gate
24687c478bd9Sstevel@tonic-gate ASSERT(tconnp != connp);
24697c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp);
24707c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
24717c478bd9Sstevel@tonic-gate return (tconnp);
24727c478bd9Sstevel@tonic-gate }
24737c478bd9Sstevel@tonic-gate }
24747c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
24757c478bd9Sstevel@tonic-gate return (NULL);
24767c478bd9Sstevel@tonic-gate }
24777c478bd9Sstevel@tonic-gate
24787c478bd9Sstevel@tonic-gate /*
24797c478bd9Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram.
24807c478bd9Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF.
24817c478bd9Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks.
24827c478bd9Sstevel@tonic-gate */
24837c478bd9Sstevel@tonic-gate conn_t *
ipcl_tcp_lookup_reversed_ipv4(ipha_t * ipha,tcpha_t * tcpha,int min_state,ip_stack_t * ipst)2484bd670b35SErik Nordmark ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2485f4b3ec61Sdh ip_stack_t *ipst)
24867c478bd9Sstevel@tonic-gate {
24877c478bd9Sstevel@tonic-gate uint32_t ports;
24887c478bd9Sstevel@tonic-gate uint16_t *pports;
24897c478bd9Sstevel@tonic-gate connf_t *connfp;
24907c478bd9Sstevel@tonic-gate conn_t *tconnp;
24917c478bd9Sstevel@tonic-gate
24927c478bd9Sstevel@tonic-gate pports = (uint16_t *)&ports;
2493bd670b35SErik Nordmark pports[0] = tcpha->tha_fport;
2494bd670b35SErik Nordmark pports[1] = tcpha->tha_lport;
24957c478bd9Sstevel@tonic-gate
2496f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2497121e5416Skcpoon ports, ipst)];
24987c478bd9Sstevel@tonic-gate
24997c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
25007c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL;
25017c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) {
25027c478bd9Sstevel@tonic-gate
25037c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
25047c478bd9Sstevel@tonic-gate ipha->ipha_dst, ipha->ipha_src, ports) &&
25057c478bd9Sstevel@tonic-gate tconnp->conn_tcp->tcp_state >= min_state) {
25067c478bd9Sstevel@tonic-gate
25077c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp);
25087c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
25097c478bd9Sstevel@tonic-gate return (tconnp);
25107c478bd9Sstevel@tonic-gate }
25117c478bd9Sstevel@tonic-gate }
25127c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
25137c478bd9Sstevel@tonic-gate return (NULL);
25147c478bd9Sstevel@tonic-gate }
25157c478bd9Sstevel@tonic-gate
25167c478bd9Sstevel@tonic-gate /*
25177c478bd9Sstevel@tonic-gate * Find an exact {src, dst, lport, fport} match for a bounced datagram.
25187c478bd9Sstevel@tonic-gate * Returns with conn reference held. Caller must call CONN_DEC_REF.
25197c478bd9Sstevel@tonic-gate * Only checks for connected entries i.e. no INADDR_ANY checks.
25207c478bd9Sstevel@tonic-gate * Match on ifindex in addition to addresses.
25217c478bd9Sstevel@tonic-gate */
25227c478bd9Sstevel@tonic-gate conn_t *
ipcl_tcp_lookup_reversed_ipv6(ip6_t * ip6h,tcpha_t * tcpha,int min_state,uint_t ifindex,ip_stack_t * ipst)25237c478bd9Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2524f4b3ec61Sdh uint_t ifindex, ip_stack_t *ipst)
25257c478bd9Sstevel@tonic-gate {
25267c478bd9Sstevel@tonic-gate tcp_t *tcp;
25277c478bd9Sstevel@tonic-gate uint32_t ports;
25287c478bd9Sstevel@tonic-gate uint16_t *pports;
25297c478bd9Sstevel@tonic-gate connf_t *connfp;
25307c478bd9Sstevel@tonic-gate conn_t *tconnp;
25317c478bd9Sstevel@tonic-gate
25327c478bd9Sstevel@tonic-gate pports = (uint16_t *)&ports;
25337c478bd9Sstevel@tonic-gate pports[0] = tcpha->tha_fport;
25347c478bd9Sstevel@tonic-gate pports[1] = tcpha->tha_lport;
25357c478bd9Sstevel@tonic-gate
2536f4b3ec61Sdh connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2537121e5416Skcpoon ports, ipst)];
25387c478bd9Sstevel@tonic-gate
25397c478bd9Sstevel@tonic-gate mutex_enter(&connfp->connf_lock);
25407c478bd9Sstevel@tonic-gate for (tconnp = connfp->connf_head; tconnp != NULL;
25417c478bd9Sstevel@tonic-gate tconnp = tconnp->conn_next) {
25427c478bd9Sstevel@tonic-gate
25437c478bd9Sstevel@tonic-gate tcp = tconnp->conn_tcp;
25447c478bd9Sstevel@tonic-gate if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
25457c478bd9Sstevel@tonic-gate ip6h->ip6_dst, ip6h->ip6_src, ports) &&
25467c478bd9Sstevel@tonic-gate tcp->tcp_state >= min_state &&
2547bd670b35SErik Nordmark (tconnp->conn_bound_if == 0 ||
2548bd670b35SErik Nordmark tconnp->conn_bound_if == ifindex)) {
25497c478bd9Sstevel@tonic-gate
25507c478bd9Sstevel@tonic-gate CONN_INC_REF(tconnp);
25517c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
25527c478bd9Sstevel@tonic-gate return (tconnp);
25537c478bd9Sstevel@tonic-gate }
25547c478bd9Sstevel@tonic-gate }
25557c478bd9Sstevel@tonic-gate mutex_exit(&connfp->connf_lock);
25567c478bd9Sstevel@tonic-gate return (NULL);
25577c478bd9Sstevel@tonic-gate }
25587c478bd9Sstevel@tonic-gate
25597c478bd9Sstevel@tonic-gate /*
256045916cd2Sjpk * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
256145916cd2Sjpk * a listener when changing state.
25627c478bd9Sstevel@tonic-gate */
25637c478bd9Sstevel@tonic-gate conn_t *
ipcl_lookup_listener_v4(uint16_t lport,ipaddr_t laddr,zoneid_t zoneid,ip_stack_t * ipst)2564f4b3ec61Sdh ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2565f4b3ec61Sdh ip_stack_t *ipst)
25667c478bd9Sstevel@tonic-gate {
25677c478bd9Sstevel@tonic-gate connf_t *bind_connfp;
25687c478bd9Sstevel@tonic-gate conn_t *connp;
25697c478bd9Sstevel@tonic-gate tcp_t *tcp;
25707c478bd9Sstevel@tonic-gate
25717c478bd9Sstevel@tonic-gate /*
25727c478bd9Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of
25737c478bd9Sstevel@tonic-gate * all zeros.
25747c478bd9Sstevel@tonic-gate */
25757c478bd9Sstevel@tonic-gate if (laddr == 0)
25767c478bd9Sstevel@tonic-gate return (NULL);
25777c478bd9Sstevel@tonic-gate
257845916cd2Sjpk ASSERT(zoneid != ALL_ZONES);
257945916cd2Sjpk
2580f4b3ec61Sdh bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
25817c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock);
25827c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL;
25837c478bd9Sstevel@tonic-gate connp = connp->conn_next) {
25847c478bd9Sstevel@tonic-gate tcp = connp->conn_tcp;
25857c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
25865d0bc3edSsommerfe IPCL_ZONE_MATCH(connp, zoneid) &&
25877c478bd9Sstevel@tonic-gate (tcp->tcp_listener == NULL)) {
25887c478bd9Sstevel@tonic-gate CONN_INC_REF(connp);
25897c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
25907c478bd9Sstevel@tonic-gate return (connp);
25917c478bd9Sstevel@tonic-gate }
25927c478bd9Sstevel@tonic-gate }
25937c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
25947c478bd9Sstevel@tonic-gate return (NULL);
25957c478bd9Sstevel@tonic-gate }
25967c478bd9Sstevel@tonic-gate
259745916cd2Sjpk /*
259845916cd2Sjpk * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
259945916cd2Sjpk * a listener when changing state.
260045916cd2Sjpk */
26017c478bd9Sstevel@tonic-gate conn_t *
ipcl_lookup_listener_v6(uint16_t lport,in6_addr_t * laddr,uint_t ifindex,zoneid_t zoneid,ip_stack_t * ipst)26027c478bd9Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2603f4b3ec61Sdh zoneid_t zoneid, ip_stack_t *ipst)
26047c478bd9Sstevel@tonic-gate {
26057c478bd9Sstevel@tonic-gate connf_t *bind_connfp;
26067c478bd9Sstevel@tonic-gate conn_t *connp = NULL;
26077c478bd9Sstevel@tonic-gate tcp_t *tcp;
26087c478bd9Sstevel@tonic-gate
26097c478bd9Sstevel@tonic-gate /*
26107c478bd9Sstevel@tonic-gate * Avoid false matches for packets sent to an IP destination of
26117c478bd9Sstevel@tonic-gate * all zeros.
26127c478bd9Sstevel@tonic-gate */
26137c478bd9Sstevel@tonic-gate if (IN6_IS_ADDR_UNSPECIFIED(laddr))
26147c478bd9Sstevel@tonic-gate return (NULL);
26157c478bd9Sstevel@tonic-gate
261645916cd2Sjpk ASSERT(zoneid != ALL_ZONES);
26177c478bd9Sstevel@tonic-gate
2618f4b3ec61Sdh bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
26197c478bd9Sstevel@tonic-gate mutex_enter(&bind_connfp->connf_lock);
26207c478bd9Sstevel@tonic-gate for (connp = bind_connfp->connf_head; connp != NULL;
26217c478bd9Sstevel@tonic-gate connp = connp->conn_next) {
26227c478bd9Sstevel@tonic-gate tcp = connp->conn_tcp;
26237c478bd9Sstevel@tonic-gate if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
26245d0bc3edSsommerfe IPCL_ZONE_MATCH(connp, zoneid) &&
2625bd670b35SErik Nordmark (connp->conn_bound_if == 0 ||
2626bd670b35SErik Nordmark connp->conn_bound_if == ifindex) &&
26277c478bd9Sstevel@tonic-gate tcp->tcp_listener == NULL) {
26287c478bd9Sstevel@tonic-gate CONN_INC_REF(connp);
26297c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
26307c478bd9Sstevel@tonic-gate return (connp);
26317c478bd9Sstevel@tonic-gate }
26327c478bd9Sstevel@tonic-gate }
26337c478bd9Sstevel@tonic-gate mutex_exit(&bind_connfp->connf_lock);
26347c478bd9Sstevel@tonic-gate return (NULL);
26357c478bd9Sstevel@tonic-gate }
26367c478bd9Sstevel@tonic-gate
2637ff550d0eSmasputra /*
2638ff550d0eSmasputra * ipcl_get_next_conn
2639ff550d0eSmasputra * get the next entry in the conn global list
2640ff550d0eSmasputra * and put a reference on the next_conn.
2641ff550d0eSmasputra * decrement the reference on the current conn.
2642ff550d0eSmasputra *
2643ff550d0eSmasputra * This is an iterator based walker function that also provides for
2644ff550d0eSmasputra * some selection by the caller. It walks through the conn_hash bucket
2645ff550d0eSmasputra * searching for the next valid connp in the list, and selects connections
2646ff550d0eSmasputra * that are neither closed nor condemned. It also REFHOLDS the conn
2647ff550d0eSmasputra * thus ensuring that the conn exists when the caller uses the conn.
2648ff550d0eSmasputra */
2649ff550d0eSmasputra conn_t *
ipcl_get_next_conn(connf_t * connfp,conn_t * connp,uint32_t conn_flags)2650ff550d0eSmasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2651ff550d0eSmasputra {
2652ff550d0eSmasputra conn_t *next_connp;
2653ff550d0eSmasputra
2654ff550d0eSmasputra if (connfp == NULL)
2655ff550d0eSmasputra return (NULL);
2656ff550d0eSmasputra
2657ff550d0eSmasputra mutex_enter(&connfp->connf_lock);
2658ff550d0eSmasputra
2659ff550d0eSmasputra next_connp = (connp == NULL) ?
2660ff550d0eSmasputra connfp->connf_head : connp->conn_g_next;
2661ff550d0eSmasputra
2662ff550d0eSmasputra while (next_connp != NULL) {
2663ff550d0eSmasputra mutex_enter(&next_connp->conn_lock);
2664ff550d0eSmasputra if (!(next_connp->conn_flags & conn_flags) ||
2665ff550d0eSmasputra (next_connp->conn_state_flags &
2666ff550d0eSmasputra (CONN_CONDEMNED | CONN_INCIPIENT))) {
2667ff550d0eSmasputra /*
2668ff550d0eSmasputra * This conn has been condemned or
2669ff550d0eSmasputra * is closing, or the flags don't match
2670ff550d0eSmasputra */
2671ff550d0eSmasputra mutex_exit(&next_connp->conn_lock);
2672ff550d0eSmasputra next_connp = next_connp->conn_g_next;
2673ff550d0eSmasputra continue;
2674ff550d0eSmasputra }
2675ff550d0eSmasputra CONN_INC_REF_LOCKED(next_connp);
2676ff550d0eSmasputra mutex_exit(&next_connp->conn_lock);
2677ff550d0eSmasputra break;
2678ff550d0eSmasputra }
2679ff550d0eSmasputra
2680ff550d0eSmasputra mutex_exit(&connfp->connf_lock);
2681ff550d0eSmasputra
2682ff550d0eSmasputra if (connp != NULL)
2683ff550d0eSmasputra CONN_DEC_REF(connp);
2684ff550d0eSmasputra
2685ff550d0eSmasputra return (next_connp);
2686ff550d0eSmasputra }
2687ff550d0eSmasputra
26887c478bd9Sstevel@tonic-gate #ifdef CONN_DEBUG
26897c478bd9Sstevel@tonic-gate /*
26907c478bd9Sstevel@tonic-gate * Trace of the last NBUF refhold/refrele
26917c478bd9Sstevel@tonic-gate */
26927c478bd9Sstevel@tonic-gate int
conn_trace_ref(conn_t * connp)26937c478bd9Sstevel@tonic-gate conn_trace_ref(conn_t *connp)
26947c478bd9Sstevel@tonic-gate {
26957c478bd9Sstevel@tonic-gate int last;
26967c478bd9Sstevel@tonic-gate conn_trace_t *ctb;
26977c478bd9Sstevel@tonic-gate
26987c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock));
26997c478bd9Sstevel@tonic-gate last = connp->conn_trace_last;
27007c478bd9Sstevel@tonic-gate last++;
27017c478bd9Sstevel@tonic-gate if (last == CONN_TRACE_MAX)
27027c478bd9Sstevel@tonic-gate last = 0;
27037c478bd9Sstevel@tonic-gate
27047c478bd9Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last];
27056a8288c7Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
27067c478bd9Sstevel@tonic-gate connp->conn_trace_last = last;
27077c478bd9Sstevel@tonic-gate return (1);
27087c478bd9Sstevel@tonic-gate }
27097c478bd9Sstevel@tonic-gate
27107c478bd9Sstevel@tonic-gate int
conn_untrace_ref(conn_t * connp)27117c478bd9Sstevel@tonic-gate conn_untrace_ref(conn_t *connp)
27127c478bd9Sstevel@tonic-gate {
27137c478bd9Sstevel@tonic-gate int last;
27147c478bd9Sstevel@tonic-gate conn_trace_t *ctb;
27157c478bd9Sstevel@tonic-gate
27167c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&connp->conn_lock));
27177c478bd9Sstevel@tonic-gate last = connp->conn_trace_last;
27187c478bd9Sstevel@tonic-gate last++;
27197c478bd9Sstevel@tonic-gate if (last == CONN_TRACE_MAX)
27207c478bd9Sstevel@tonic-gate last = 0;
27217c478bd9Sstevel@tonic-gate
27227c478bd9Sstevel@tonic-gate ctb = &connp->conn_trace_buf[last];
27236a8288c7Scarlsonj ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
27247c478bd9Sstevel@tonic-gate connp->conn_trace_last = last;
27257c478bd9Sstevel@tonic-gate return (1);
27267c478bd9Sstevel@tonic-gate }
27277c478bd9Sstevel@tonic-gate #endif
272878a2e113SAndy Fiddaman
272978a2e113SAndy Fiddaman mib2_socketInfoEntry_t *
conn_get_socket_info(conn_t * connp,mib2_socketInfoEntry_t * sie)273078a2e113SAndy Fiddaman conn_get_socket_info(conn_t *connp, mib2_socketInfoEntry_t *sie)
273178a2e113SAndy Fiddaman {
273278a2e113SAndy Fiddaman vnode_t *vn = NULL;
273378a2e113SAndy Fiddaman vattr_t attr;
273478a2e113SAndy Fiddaman uint64_t flags = 0;
2735*041297c2SDan McDonald sock_upcalls_t *upcalls;
2736*041297c2SDan McDonald sock_upper_handle_t upper_handle;
273778a2e113SAndy Fiddaman
273878a2e113SAndy Fiddaman /*
273978a2e113SAndy Fiddaman * If the connection is closing, it is not safe to make an upcall or
274078a2e113SAndy Fiddaman * access the stream associated with the connection.
274178a2e113SAndy Fiddaman * The callers of this function have a reference on connp itself
274278a2e113SAndy Fiddaman * so, as long as it is not closing, it's safe to continue.
274378a2e113SAndy Fiddaman */
274478a2e113SAndy Fiddaman mutex_enter(&connp->conn_lock);
274578a2e113SAndy Fiddaman
274678a2e113SAndy Fiddaman if ((connp->conn_state_flags & CONN_CLOSING)) {
274778a2e113SAndy Fiddaman mutex_exit(&connp->conn_lock);
274878a2e113SAndy Fiddaman return (NULL);
274978a2e113SAndy Fiddaman }
275078a2e113SAndy Fiddaman
27512ad53042SDan McDonald /*
27522ad53042SDan McDonald * Continue to hold conn_lock because we don't want to race with an
27532ad53042SDan McDonald * in-progress close, which will have set-to-NULL (and destroyed
27542ad53042SDan McDonald * upper_handle, aka sonode (and vnode)) BEFORE setting CONN_CLOSING.
2755*041297c2SDan McDonald *
2756*041297c2SDan McDonald * There is still a race with an in-progress OPEN, however, where
2757*041297c2SDan McDonald * conn_upper_handle and conn_upcalls are being assigned (in multiple
2758*041297c2SDan McDonald * codepaths) WITHOUT conn_lock being held. We address that race
2759*041297c2SDan McDonald * HERE, however, given that both are going from NULL to non-NULL,
2760*041297c2SDan McDonald * if we lose the race, we don't get any data for the in-progress-OPEN
2761*041297c2SDan McDonald * socket.
27622ad53042SDan McDonald */
276378a2e113SAndy Fiddaman
2764*041297c2SDan McDonald upcalls = connp->conn_upcalls;
2765*041297c2SDan McDonald upper_handle = connp->conn_upper_handle;
2766*041297c2SDan McDonald /* Check BOTH for non-NULL before attempting an upcall. */
2767*041297c2SDan McDonald if (upper_handle != NULL && upcalls != NULL) {
2768*041297c2SDan McDonald /* su_get_vnode() returns one with VN_HOLD() already done. */
2769*041297c2SDan McDonald vn = upcalls->su_get_vnode(upper_handle);
277078a2e113SAndy Fiddaman } else if (!IPCL_IS_NONSTR(connp) && connp->conn_rq != NULL) {
277178a2e113SAndy Fiddaman vn = STREAM(connp->conn_rq)->sd_pvnode;
277278a2e113SAndy Fiddaman if (vn != NULL)
277378a2e113SAndy Fiddaman VN_HOLD(vn);
277478a2e113SAndy Fiddaman flags |= MIB2_SOCKINFO_STREAM;
277578a2e113SAndy Fiddaman }
277678a2e113SAndy Fiddaman
27772ad53042SDan McDonald mutex_exit(&connp->conn_lock);
27782ad53042SDan McDonald
277978a2e113SAndy Fiddaman if (vn == NULL || VOP_GETATTR(vn, &attr, 0, CRED(), NULL) != 0) {
278078a2e113SAndy Fiddaman if (vn != NULL)
278178a2e113SAndy Fiddaman VN_RELE(vn);
278278a2e113SAndy Fiddaman return (NULL);
278378a2e113SAndy Fiddaman }
278478a2e113SAndy Fiddaman
278578a2e113SAndy Fiddaman VN_RELE(vn);
278678a2e113SAndy Fiddaman
278778a2e113SAndy Fiddaman bzero(sie, sizeof (*sie));
278878a2e113SAndy Fiddaman
278978a2e113SAndy Fiddaman sie->sie_flags = flags;
279078a2e113SAndy Fiddaman sie->sie_inode = attr.va_nodeid;
279178a2e113SAndy Fiddaman sie->sie_dev = attr.va_rdev;
279278a2e113SAndy Fiddaman
279378a2e113SAndy Fiddaman return (sie);
279478a2e113SAndy Fiddaman }
2795