17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5ee4701baSericheng  * Common Development and Distribution License (the "License").
6ee4701baSericheng  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22e11c3f44Smeem  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  * IP PACKET CLASSIFIER
287c478bd9Sstevel@tonic-gate  *
297c478bd9Sstevel@tonic-gate  * The IP packet classifier provides mapping between IP packets and persistent
307c478bd9Sstevel@tonic-gate  * connection state for connection-oriented protocols. It also provides
317c478bd9Sstevel@tonic-gate  * interface for managing connection states.
327c478bd9Sstevel@tonic-gate  *
337c478bd9Sstevel@tonic-gate  * The connection state is kept in conn_t data structure and contains, among
347c478bd9Sstevel@tonic-gate  * other things:
357c478bd9Sstevel@tonic-gate  *
367c478bd9Sstevel@tonic-gate  *	o local/remote address and ports
377c478bd9Sstevel@tonic-gate  *	o Transport protocol
387c478bd9Sstevel@tonic-gate  *	o squeue for the connection (for TCP only)
397c478bd9Sstevel@tonic-gate  *	o reference counter
407c478bd9Sstevel@tonic-gate  *	o Connection state
417c478bd9Sstevel@tonic-gate  *	o hash table linkage
427c478bd9Sstevel@tonic-gate  *	o interface/ire information
437c478bd9Sstevel@tonic-gate  *	o credentials
447c478bd9Sstevel@tonic-gate  *	o ipsec policy
457c478bd9Sstevel@tonic-gate  *	o send and receive functions.
467c478bd9Sstevel@tonic-gate  *	o mutex lock.
477c478bd9Sstevel@tonic-gate  *
487c478bd9Sstevel@tonic-gate  * Connections use a reference counting scheme. They are freed when the
497c478bd9Sstevel@tonic-gate  * reference counter drops to zero. A reference is incremented when connection
507c478bd9Sstevel@tonic-gate  * is placed in a list or table, when incoming packet for the connection arrives
517c478bd9Sstevel@tonic-gate  * and when connection is processed via squeue (squeue processing may be
527c478bd9Sstevel@tonic-gate  * asynchronous and the reference protects the connection from being destroyed
537c478bd9Sstevel@tonic-gate  * before its processing is finished).
547c478bd9Sstevel@tonic-gate  *
55*bd670b35SErik Nordmark  * conn_recv is used to pass up packets to the ULP.
56*bd670b35SErik Nordmark  * For TCP conn_recv changes. It is tcp_input_listener_unbound initially for
57*bd670b35SErik Nordmark  * a listener, and changes to tcp_input_listener as the listener has picked a
58*bd670b35SErik Nordmark  * good squeue. For other cases it is set to tcp_input_data.
59*bd670b35SErik Nordmark  *
60*bd670b35SErik Nordmark  * conn_recvicmp is used to pass up ICMP errors to the ULP.
617c478bd9Sstevel@tonic-gate  *
627c478bd9Sstevel@tonic-gate  * Classifier uses several hash tables:
637c478bd9Sstevel@tonic-gate  *
647c478bd9Sstevel@tonic-gate  * 	ipcl_conn_fanout:	contains all TCP connections in CONNECTED state
657c478bd9Sstevel@tonic-gate  *	ipcl_bind_fanout:	contains all connections in BOUND state
667c478bd9Sstevel@tonic-gate  *	ipcl_proto_fanout:	IPv4 protocol fanout
677c478bd9Sstevel@tonic-gate  *	ipcl_proto_fanout_v6:	IPv6 protocol fanout
687c478bd9Sstevel@tonic-gate  *	ipcl_udp_fanout:	contains all UDP connections
692b24ab6bSSebastien Roy  *	ipcl_iptun_fanout:	contains all IP tunnel connections
707c478bd9Sstevel@tonic-gate  *	ipcl_globalhash_fanout:	contains all connections
717c478bd9Sstevel@tonic-gate  *
727c478bd9Sstevel@tonic-gate  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
737c478bd9Sstevel@tonic-gate  * which need to view all existing connections.
747c478bd9Sstevel@tonic-gate  *
757c478bd9Sstevel@tonic-gate  * All tables are protected by per-bucket locks. When both per-bucket lock and
767c478bd9Sstevel@tonic-gate  * connection lock need to be held, the per-bucket lock should be acquired
777c478bd9Sstevel@tonic-gate  * first, followed by the connection lock.
787c478bd9Sstevel@tonic-gate  *
797c478bd9Sstevel@tonic-gate  * All functions doing search in one of these tables increment a reference
807c478bd9Sstevel@tonic-gate  * counter on the connection found (if any). This reference should be dropped
817c478bd9Sstevel@tonic-gate  * when the caller has finished processing the connection.
827c478bd9Sstevel@tonic-gate  *
837c478bd9Sstevel@tonic-gate  *
847c478bd9Sstevel@tonic-gate  * INTERFACES:
857c478bd9Sstevel@tonic-gate  * ===========
867c478bd9Sstevel@tonic-gate  *
877c478bd9Sstevel@tonic-gate  * Connection Lookup:
887c478bd9Sstevel@tonic-gate  * ------------------
897c478bd9Sstevel@tonic-gate  *
90*bd670b35SErik Nordmark  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, ira, ip_stack)
91*bd670b35SErik Nordmark  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, ira, ip_stack)
927c478bd9Sstevel@tonic-gate  *
937c478bd9Sstevel@tonic-gate  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
947c478bd9Sstevel@tonic-gate  * it can't find any associated connection. If the connection is found, its
957c478bd9Sstevel@tonic-gate  * reference counter is incremented.
967c478bd9Sstevel@tonic-gate  *
977c478bd9Sstevel@tonic-gate  *	mp:	mblock, containing packet header. The full header should fit
987c478bd9Sstevel@tonic-gate  *		into a single mblock. It should also contain at least full IP
997c478bd9Sstevel@tonic-gate  *		and TCP or UDP header.
1007c478bd9Sstevel@tonic-gate  *
1017c478bd9Sstevel@tonic-gate  *	protocol: Either IPPROTO_TCP or IPPROTO_UDP.
1027c478bd9Sstevel@tonic-gate  *
1037c478bd9Sstevel@tonic-gate  *	hdr_len: The size of IP header. It is used to find TCP or UDP header in
1047c478bd9Sstevel@tonic-gate  *		 the packet.
1057c478bd9Sstevel@tonic-gate  *
106*bd670b35SErik Nordmark  * 	ira->ira_zoneid: The zone in which the returned connection must be; the
107*bd670b35SErik Nordmark  *		zoneid corresponding to the ire_zoneid on the IRE located for
108*bd670b35SErik Nordmark  *		the packet's destination address.
109*bd670b35SErik Nordmark  *
110*bd670b35SErik Nordmark  *	ira->ira_flags: Contains the IRAF_TX_MAC_EXEMPTABLE and
111*bd670b35SErik Nordmark  *		IRAF_TX_SHARED_ADDR flags
1127c478bd9Sstevel@tonic-gate  *
1137c478bd9Sstevel@tonic-gate  *	For TCP connections, the lookup order is as follows:
1147c478bd9Sstevel@tonic-gate  *		5-tuple {src, dst, protocol, local port, remote port}
1157c478bd9Sstevel@tonic-gate  *			lookup in ipcl_conn_fanout table.
1167c478bd9Sstevel@tonic-gate  *		3-tuple {dst, remote port, protocol} lookup in
1177c478bd9Sstevel@tonic-gate  *			ipcl_bind_fanout table.
1187c478bd9Sstevel@tonic-gate  *
1197c478bd9Sstevel@tonic-gate  *	For UDP connections, a 5-tuple {src, dst, protocol, local port,
1207c478bd9Sstevel@tonic-gate  *	remote port} lookup is done on ipcl_udp_fanout. Note that,
1217c478bd9Sstevel@tonic-gate  *	these interfaces do not handle cases where a packets belongs
1227c478bd9Sstevel@tonic-gate  *	to multiple UDP clients, which is handled in IP itself.
1237c478bd9Sstevel@tonic-gate  *
12445916cd2Sjpk  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
12545916cd2Sjpk  * determine which actual zone gets the segment.  This is used only in a
12645916cd2Sjpk  * labeled environment.  The matching rules are:
12745916cd2Sjpk  *
12845916cd2Sjpk  *	- If it's not a multilevel port, then the label on the packet selects
12945916cd2Sjpk  *	  the zone.  Unlabeled packets are delivered to the global zone.
13045916cd2Sjpk  *
13145916cd2Sjpk  *	- If it's a multilevel port, then only the zone registered to receive
13245916cd2Sjpk  *	  packets on that port matches.
13345916cd2Sjpk  *
13445916cd2Sjpk  * Also, in a labeled environment, packet labels need to be checked.  For fully
13545916cd2Sjpk  * bound TCP connections, we can assume that the packet label was checked
13645916cd2Sjpk  * during connection establishment, and doesn't need to be checked on each
13745916cd2Sjpk  * packet.  For others, though, we need to check for strict equality or, for
13845916cd2Sjpk  * multilevel ports, membership in the range or set.  This part currently does
13945916cd2Sjpk  * a tnrh lookup on each packet, but could be optimized to use cached results
14045916cd2Sjpk  * if that were necessary.  (SCTP doesn't come through here, but if it did,
14145916cd2Sjpk  * we would apply the same rules as TCP.)
14245916cd2Sjpk  *
14345916cd2Sjpk  * An implication of the above is that fully-bound TCP sockets must always use
14445916cd2Sjpk  * distinct 4-tuples; they can't be discriminated by label alone.
14545916cd2Sjpk  *
14645916cd2Sjpk  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
14745916cd2Sjpk  * as there's no connection set-up handshake and no shared state.
14845916cd2Sjpk  *
14945916cd2Sjpk  * Labels on looped-back packets within a single zone do not need to be
15045916cd2Sjpk  * checked, as all processes in the same zone have the same label.
15145916cd2Sjpk  *
15245916cd2Sjpk  * Finally, for unlabeled packets received by a labeled system, special rules
15345916cd2Sjpk  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
15445916cd2Sjpk  * socket in the zone whose label matches the default label of the sender, if
15545916cd2Sjpk  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
15645916cd2Sjpk  * receiver's label must dominate the sender's default label.
15745916cd2Sjpk  *
158*bd670b35SErik Nordmark  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack);
159f4b3ec61Sdh  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
160f4b3ec61Sdh  *					 ip_stack);
1617c478bd9Sstevel@tonic-gate  *
1627c478bd9Sstevel@tonic-gate  *	Lookup routine to find a exact match for {src, dst, local port,
1637c478bd9Sstevel@tonic-gate  *	remote port) for TCP connections in ipcl_conn_fanout. The address and
1647c478bd9Sstevel@tonic-gate  *	ports are read from the IP and TCP header respectively.
1657c478bd9Sstevel@tonic-gate  *
166f4b3ec61Sdh  * conn_t	*ipcl_lookup_listener_v4(lport, laddr, protocol,
167f4b3ec61Sdh  *					 zoneid, ip_stack);
168f4b3ec61Sdh  * conn_t	*ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
169f4b3ec61Sdh  *					 zoneid, ip_stack);
1707c478bd9Sstevel@tonic-gate  *
1717c478bd9Sstevel@tonic-gate  * 	Lookup routine to find a listener with the tuple {lport, laddr,
1727c478bd9Sstevel@tonic-gate  * 	protocol} in the ipcl_bind_fanout table. For IPv6, an additional
1737c478bd9Sstevel@tonic-gate  * 	parameter interface index is also compared.
1747c478bd9Sstevel@tonic-gate  *
175f4b3ec61Sdh  * void ipcl_walk(func, arg, ip_stack)
1767c478bd9Sstevel@tonic-gate  *
1777c478bd9Sstevel@tonic-gate  * 	Apply 'func' to every connection available. The 'func' is called as
1787c478bd9Sstevel@tonic-gate  *	(*func)(connp, arg). The walk is non-atomic so connections may be
1797c478bd9Sstevel@tonic-gate  *	created and destroyed during the walk. The CONN_CONDEMNED and
1807c478bd9Sstevel@tonic-gate  *	CONN_INCIPIENT flags ensure that connections which are newly created
1817c478bd9Sstevel@tonic-gate  *	or being destroyed are not selected by the walker.
1827c478bd9Sstevel@tonic-gate  *
1837c478bd9Sstevel@tonic-gate  * Table Updates
1847c478bd9Sstevel@tonic-gate  * -------------
1857c478bd9Sstevel@tonic-gate  *
186*bd670b35SErik Nordmark  * int ipcl_conn_insert(connp);
187*bd670b35SErik Nordmark  * int ipcl_conn_insert_v4(connp);
188*bd670b35SErik Nordmark  * int ipcl_conn_insert_v6(connp);
1897c478bd9Sstevel@tonic-gate  *
1907c478bd9Sstevel@tonic-gate  *	Insert 'connp' in the ipcl_conn_fanout.
1917c478bd9Sstevel@tonic-gate  *	Arguements :
1927c478bd9Sstevel@tonic-gate  *		connp		conn_t to be inserted
1937c478bd9Sstevel@tonic-gate  *
1947c478bd9Sstevel@tonic-gate  *	Return value :
1957c478bd9Sstevel@tonic-gate  *		0		if connp was inserted
1967c478bd9Sstevel@tonic-gate  *		EADDRINUSE	if the connection with the same tuple
1977c478bd9Sstevel@tonic-gate  *				already exists.
1987c478bd9Sstevel@tonic-gate  *
199*bd670b35SErik Nordmark  * int ipcl_bind_insert(connp);
200*bd670b35SErik Nordmark  * int ipcl_bind_insert_v4(connp);
201*bd670b35SErik Nordmark  * int ipcl_bind_insert_v6(connp);
2027c478bd9Sstevel@tonic-gate  *
2037c478bd9Sstevel@tonic-gate  * 	Insert 'connp' in ipcl_bind_fanout.
2047c478bd9Sstevel@tonic-gate  * 	Arguements :
2057c478bd9Sstevel@tonic-gate  * 		connp		conn_t to be inserted
2067c478bd9Sstevel@tonic-gate  *
2077c478bd9Sstevel@tonic-gate  *
2087c478bd9Sstevel@tonic-gate  * void ipcl_hash_remove(connp);
2097c478bd9Sstevel@tonic-gate  *
2107c478bd9Sstevel@tonic-gate  * 	Removes the 'connp' from the connection fanout table.
2117c478bd9Sstevel@tonic-gate  *
2127c478bd9Sstevel@tonic-gate  * Connection Creation/Destruction
2137c478bd9Sstevel@tonic-gate  * -------------------------------
2147c478bd9Sstevel@tonic-gate  *
215f4b3ec61Sdh  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
2167c478bd9Sstevel@tonic-gate  *
2177c478bd9Sstevel@tonic-gate  * 	Creates a new conn based on the type flag, inserts it into
2187c478bd9Sstevel@tonic-gate  * 	globalhash table.
2197c478bd9Sstevel@tonic-gate  *
2207c478bd9Sstevel@tonic-gate  *	type:	This flag determines the type of conn_t which needs to be
221fc80c0dfSnordmark  *		created i.e., which kmem_cache it comes from.
2227c478bd9Sstevel@tonic-gate  *		IPCL_TCPCONN	indicates a TCP connection
223fc80c0dfSnordmark  *		IPCL_SCTPCONN	indicates a SCTP connection
224fc80c0dfSnordmark  *		IPCL_UDPCONN	indicates a UDP conn_t.
225fc80c0dfSnordmark  *		IPCL_RAWIPCONN	indicates a RAWIP/ICMP conn_t.
226fc80c0dfSnordmark  *		IPCL_RTSCONN	indicates a RTS conn_t.
227fc80c0dfSnordmark  *		IPCL_IPCCONN	indicates all other connections.
2287c478bd9Sstevel@tonic-gate  *
2297c478bd9Sstevel@tonic-gate  * void ipcl_conn_destroy(connp)
2307c478bd9Sstevel@tonic-gate  *
2317c478bd9Sstevel@tonic-gate  * 	Destroys the connection state, removes it from the global
2327c478bd9Sstevel@tonic-gate  * 	connection hash table and frees its memory.
2337c478bd9Sstevel@tonic-gate  */
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate #include <sys/types.h>
2367c478bd9Sstevel@tonic-gate #include <sys/stream.h>
2377c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
2387c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
2397c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
2407c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
2417c478bd9Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
2427c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
2437c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
2447c478bd9Sstevel@tonic-gate #include <sys/debug.h>
2457c478bd9Sstevel@tonic-gate 
2467c478bd9Sstevel@tonic-gate #include <sys/systm.h>
2477c478bd9Sstevel@tonic-gate #include <sys/param.h>
2487c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
2497c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h>
2507c478bd9Sstevel@tonic-gate #include <inet/common.h>
2517c478bd9Sstevel@tonic-gate #include <netinet/ip6.h>
2527c478bd9Sstevel@tonic-gate #include <netinet/icmp6.h>
2537c478bd9Sstevel@tonic-gate 
2547c478bd9Sstevel@tonic-gate #include <inet/ip.h>
255*bd670b35SErik Nordmark #include <inet/ip_if.h>
256*bd670b35SErik Nordmark #include <inet/ip_ire.h>
2577c478bd9Sstevel@tonic-gate #include <inet/ip6.h>
2587c478bd9Sstevel@tonic-gate #include <inet/ip_ndp.h>
2590f1702c5SYu Xiangning #include <inet/ip_impl.h>
260ff550d0eSmasputra #include <inet/udp_impl.h>
2617c478bd9Sstevel@tonic-gate #include <inet/sctp_ip.h>
262f4b3ec61Sdh #include <inet/sctp/sctp_impl.h>
263fc80c0dfSnordmark #include <inet/rawip_impl.h>
264fc80c0dfSnordmark #include <inet/rts_impl.h>
2652b24ab6bSSebastien Roy #include <inet/iptun/iptun_impl.h>
2667c478bd9Sstevel@tonic-gate 
2677c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
2687c478bd9Sstevel@tonic-gate 
2697c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h>
2700f1702c5SYu Xiangning #include <inet/tcp.h>
2717c478bd9Sstevel@tonic-gate #include <inet/ipsec_impl.h>
2727c478bd9Sstevel@tonic-gate 
27345916cd2Sjpk #include <sys/tsol/tnet.h>
2740f1702c5SYu Xiangning #include <sys/sockio.h>
27545916cd2Sjpk 
276f4b3ec61Sdh /* Old value for compatibility. Setable in /etc/system */
2777c478bd9Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0;
2787c478bd9Sstevel@tonic-gate 
279f4b3ec61Sdh /* New value. Zero means choose automatically.  Setable in /etc/system */
2807c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0;
2817c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192;
2827c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500;
2837c478bd9Sstevel@tonic-gate 
2847c478bd9Sstevel@tonic-gate /* bind/udp fanout table size */
2857c478bd9Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512;
286ee4701baSericheng uint_t ipcl_udp_fanout_size = 16384;
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate /* Raw socket fanout size.  Must be a power of 2. */
2897c478bd9Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256;
2907c478bd9Sstevel@tonic-gate 
2912b24ab6bSSebastien Roy /*
2922b24ab6bSSebastien Roy  * The IPCL_IPTUN_HASH() function works best with a prime table size.  We
2932b24ab6bSSebastien Roy  * expect that most large deployments would have hundreds of tunnels, and
2942b24ab6bSSebastien Roy  * thousands in the extreme case.
2952b24ab6bSSebastien Roy  */
2962b24ab6bSSebastien Roy uint_t ipcl_iptun_fanout_size = 6143;
2972b24ab6bSSebastien Roy 
2987c478bd9Sstevel@tonic-gate /*
2997c478bd9Sstevel@tonic-gate  * Power of 2^N Primes useful for hashing for N of 0-28,
3007c478bd9Sstevel@tonic-gate  * these primes are the nearest prime <= 2^N - 2^(N-2).
3017c478bd9Sstevel@tonic-gate  */
3027c478bd9Sstevel@tonic-gate 
3037c478bd9Sstevel@tonic-gate #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
3047c478bd9Sstevel@tonic-gate 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
3057c478bd9Sstevel@tonic-gate 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
3067c478bd9Sstevel@tonic-gate 		50331599, 100663291, 201326557, 0}
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate /*
309fc80c0dfSnordmark  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
310fc80c0dfSnordmark  * are aligned on cache lines.
3117c478bd9Sstevel@tonic-gate  */
312fc80c0dfSnordmark typedef union itc_s {
313fc80c0dfSnordmark 	conn_t	itc_conn;
314fc80c0dfSnordmark 	char	itcu_filler[CACHE_ALIGN(conn_s)];
3157c478bd9Sstevel@tonic-gate } itc_t;
3167c478bd9Sstevel@tonic-gate 
317fc80c0dfSnordmark struct kmem_cache  *tcp_conn_cache;
318fc80c0dfSnordmark struct kmem_cache  *ip_conn_cache;
3197c478bd9Sstevel@tonic-gate extern struct kmem_cache  *sctp_conn_cache;
3207c478bd9Sstevel@tonic-gate extern struct kmem_cache  *tcp_sack_info_cache;
321fc80c0dfSnordmark struct kmem_cache  *udp_conn_cache;
322fc80c0dfSnordmark struct kmem_cache  *rawip_conn_cache;
323fc80c0dfSnordmark struct kmem_cache  *rts_conn_cache;
3247c478bd9Sstevel@tonic-gate 
3257c478bd9Sstevel@tonic-gate extern void	tcp_timermp_free(tcp_t *);
3267c478bd9Sstevel@tonic-gate extern mblk_t	*tcp_timermp_alloc(int);
3277c478bd9Sstevel@tonic-gate 
328fc80c0dfSnordmark static int	ip_conn_constructor(void *, void *, int);
329fc80c0dfSnordmark static void	ip_conn_destructor(void *, void *);
330fc80c0dfSnordmark 
331fc80c0dfSnordmark static int	tcp_conn_constructor(void *, void *, int);
332fc80c0dfSnordmark static void	tcp_conn_destructor(void *, void *);
333fc80c0dfSnordmark 
334fc80c0dfSnordmark static int	udp_conn_constructor(void *, void *, int);
335fc80c0dfSnordmark static void	udp_conn_destructor(void *, void *);
336fc80c0dfSnordmark 
337fc80c0dfSnordmark static int	rawip_conn_constructor(void *, void *, int);
338fc80c0dfSnordmark static void	rawip_conn_destructor(void *, void *);
339fc80c0dfSnordmark 
340fc80c0dfSnordmark static int	rts_conn_constructor(void *, void *, int);
341fc80c0dfSnordmark static void	rts_conn_destructor(void *, void *);
3427c478bd9Sstevel@tonic-gate 
3437c478bd9Sstevel@tonic-gate /*
344f4b3ec61Sdh  * Global (for all stack instances) init routine
3457c478bd9Sstevel@tonic-gate  */
3467c478bd9Sstevel@tonic-gate void
347f4b3ec61Sdh ipcl_g_init(void)
3487c478bd9Sstevel@tonic-gate {
349fc80c0dfSnordmark 	ip_conn_cache = kmem_cache_create("ip_conn_cache",
3507c478bd9Sstevel@tonic-gate 	    sizeof (conn_t), CACHE_ALIGN_SIZE,
351fc80c0dfSnordmark 	    ip_conn_constructor, ip_conn_destructor,
352fc80c0dfSnordmark 	    NULL, NULL, NULL, 0);
353fc80c0dfSnordmark 
354fc80c0dfSnordmark 	tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
355fc80c0dfSnordmark 	    sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
356fc80c0dfSnordmark 	    tcp_conn_constructor, tcp_conn_destructor,
357fc80c0dfSnordmark 	    NULL, NULL, NULL, 0);
358fc80c0dfSnordmark 
359fc80c0dfSnordmark 	udp_conn_cache = kmem_cache_create("udp_conn_cache",
360fc80c0dfSnordmark 	    sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
361fc80c0dfSnordmark 	    udp_conn_constructor, udp_conn_destructor,
362fc80c0dfSnordmark 	    NULL, NULL, NULL, 0);
3637c478bd9Sstevel@tonic-gate 
364fc80c0dfSnordmark 	rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
365fc80c0dfSnordmark 	    sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
366fc80c0dfSnordmark 	    rawip_conn_constructor, rawip_conn_destructor,
367fc80c0dfSnordmark 	    NULL, NULL, NULL, 0);
368fc80c0dfSnordmark 
369fc80c0dfSnordmark 	rts_conn_cache = kmem_cache_create("rts_conn_cache",
370fc80c0dfSnordmark 	    sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
371fc80c0dfSnordmark 	    rts_conn_constructor, rts_conn_destructor,
3727c478bd9Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
373f4b3ec61Sdh }
374f4b3ec61Sdh 
375f4b3ec61Sdh /*
376f4b3ec61Sdh  * ipclassifier intialization routine, sets up hash tables.
377f4b3ec61Sdh  */
378f4b3ec61Sdh void
379f4b3ec61Sdh ipcl_init(ip_stack_t *ipst)
380f4b3ec61Sdh {
381f4b3ec61Sdh 	int i;
382f4b3ec61Sdh 	int sizes[] = P2Ps();
3837c478bd9Sstevel@tonic-gate 
3847c478bd9Sstevel@tonic-gate 	/*
385f4b3ec61Sdh 	 * Calculate size of conn fanout table from /etc/system settings
3867c478bd9Sstevel@tonic-gate 	 */
3877c478bd9Sstevel@tonic-gate 	if (ipcl_conn_hash_size != 0) {
388f4b3ec61Sdh 		ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
3897c478bd9Sstevel@tonic-gate 	} else if (tcp_conn_hash_size != 0) {
390f4b3ec61Sdh 		ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
3917c478bd9Sstevel@tonic-gate 	} else {
3927c478bd9Sstevel@tonic-gate 		extern pgcnt_t freemem;
3937c478bd9Sstevel@tonic-gate 
394f4b3ec61Sdh 		ipst->ips_ipcl_conn_fanout_size =
3957c478bd9Sstevel@tonic-gate 		    (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
3967c478bd9Sstevel@tonic-gate 
397f4b3ec61Sdh 		if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
398f4b3ec61Sdh 			ipst->ips_ipcl_conn_fanout_size =
399f4b3ec61Sdh 			    ipcl_conn_hash_maxsize;
400f4b3ec61Sdh 		}
4017c478bd9Sstevel@tonic-gate 	}
4027c478bd9Sstevel@tonic-gate 
4037c478bd9Sstevel@tonic-gate 	for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
404f4b3ec61Sdh 		if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
4057c478bd9Sstevel@tonic-gate 			break;
4067c478bd9Sstevel@tonic-gate 		}
4077c478bd9Sstevel@tonic-gate 	}
408f4b3ec61Sdh 	if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
4097c478bd9Sstevel@tonic-gate 		/* Out of range, use the 2^16 value */
410f4b3ec61Sdh 		ipst->ips_ipcl_conn_fanout_size = sizes[16];
4117c478bd9Sstevel@tonic-gate 	}
4127c478bd9Sstevel@tonic-gate 
413f4b3ec61Sdh 	/* Take values from /etc/system */
414f4b3ec61Sdh 	ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
415f4b3ec61Sdh 	ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
416f4b3ec61Sdh 	ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
4172b24ab6bSSebastien Roy 	ipst->ips_ipcl_iptun_fanout_size = ipcl_iptun_fanout_size;
418f4b3ec61Sdh 
419f4b3ec61Sdh 	ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
420f4b3ec61Sdh 
421f4b3ec61Sdh 	ipst->ips_ipcl_conn_fanout = kmem_zalloc(
422f4b3ec61Sdh 	    ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
423f4b3ec61Sdh 
424f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
425f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
4267c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4277c478bd9Sstevel@tonic-gate 	}
4287c478bd9Sstevel@tonic-gate 
429f4b3ec61Sdh 	ipst->ips_ipcl_bind_fanout = kmem_zalloc(
430f4b3ec61Sdh 	    ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
4317c478bd9Sstevel@tonic-gate 
432f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
433f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
4347c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4357c478bd9Sstevel@tonic-gate 	}
4367c478bd9Sstevel@tonic-gate 
437*bd670b35SErik Nordmark 	ipst->ips_ipcl_proto_fanout_v4 = kmem_zalloc(IPPROTO_MAX *
438f4b3ec61Sdh 	    sizeof (connf_t), KM_SLEEP);
439f4b3ec61Sdh 	for (i = 0; i < IPPROTO_MAX; i++) {
440*bd670b35SErik Nordmark 		mutex_init(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock, NULL,
4417c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4427c478bd9Sstevel@tonic-gate 	}
443f4b3ec61Sdh 
444f4b3ec61Sdh 	ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
445f4b3ec61Sdh 	    sizeof (connf_t), KM_SLEEP);
446f4b3ec61Sdh 	for (i = 0; i < IPPROTO_MAX; i++) {
447f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
4487c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4497c478bd9Sstevel@tonic-gate 	}
4507c478bd9Sstevel@tonic-gate 
451f4b3ec61Sdh 	ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
452f4b3ec61Sdh 	mutex_init(&ipst->ips_rts_clients->connf_lock,
453f4b3ec61Sdh 	    NULL, MUTEX_DEFAULT, NULL);
4547c478bd9Sstevel@tonic-gate 
455f4b3ec61Sdh 	ipst->ips_ipcl_udp_fanout = kmem_zalloc(
456f4b3ec61Sdh 	    ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
457f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
458f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
4597c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4607c478bd9Sstevel@tonic-gate 	}
4617c478bd9Sstevel@tonic-gate 
4622b24ab6bSSebastien Roy 	ipst->ips_ipcl_iptun_fanout = kmem_zalloc(
4632b24ab6bSSebastien Roy 	    ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t), KM_SLEEP);
4642b24ab6bSSebastien Roy 	for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
4652b24ab6bSSebastien Roy 		mutex_init(&ipst->ips_ipcl_iptun_fanout[i].connf_lock, NULL,
4662b24ab6bSSebastien Roy 		    MUTEX_DEFAULT, NULL);
4672b24ab6bSSebastien Roy 	}
4682b24ab6bSSebastien Roy 
469f4b3ec61Sdh 	ipst->ips_ipcl_raw_fanout = kmem_zalloc(
470f4b3ec61Sdh 	    ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
471f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
472f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
4737c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4747c478bd9Sstevel@tonic-gate 	}
4757c478bd9Sstevel@tonic-gate 
476f4b3ec61Sdh 	ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
477f4b3ec61Sdh 	    sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
4787c478bd9Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
479f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
480f4b3ec61Sdh 		    NULL, MUTEX_DEFAULT, NULL);
4817c478bd9Sstevel@tonic-gate 	}
4827c478bd9Sstevel@tonic-gate }
4837c478bd9Sstevel@tonic-gate 
4847c478bd9Sstevel@tonic-gate void
485f4b3ec61Sdh ipcl_g_destroy(void)
4867c478bd9Sstevel@tonic-gate {
487fc80c0dfSnordmark 	kmem_cache_destroy(ip_conn_cache);
488fc80c0dfSnordmark 	kmem_cache_destroy(tcp_conn_cache);
489fc80c0dfSnordmark 	kmem_cache_destroy(udp_conn_cache);
490fc80c0dfSnordmark 	kmem_cache_destroy(rawip_conn_cache);
491fc80c0dfSnordmark 	kmem_cache_destroy(rts_conn_cache);
492f4b3ec61Sdh }
493f4b3ec61Sdh 
494f4b3ec61Sdh /*
495f4b3ec61Sdh  * All user-level and kernel use of the stack must be gone
496f4b3ec61Sdh  * by now.
497f4b3ec61Sdh  */
498f4b3ec61Sdh void
499f4b3ec61Sdh ipcl_destroy(ip_stack_t *ipst)
500f4b3ec61Sdh {
501f4b3ec61Sdh 	int i;
502f4b3ec61Sdh 
503f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
504f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
505f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
506f4b3ec61Sdh 	}
507f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
508f4b3ec61Sdh 	    sizeof (connf_t));
509f4b3ec61Sdh 	ipst->ips_ipcl_conn_fanout = NULL;
510f4b3ec61Sdh 
511f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
512f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
513f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
514f4b3ec61Sdh 	}
515f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
516f4b3ec61Sdh 	    sizeof (connf_t));
517f4b3ec61Sdh 	ipst->ips_ipcl_bind_fanout = NULL;
518f4b3ec61Sdh 
519f4b3ec61Sdh 	for (i = 0; i < IPPROTO_MAX; i++) {
520*bd670b35SErik Nordmark 		ASSERT(ipst->ips_ipcl_proto_fanout_v4[i].connf_head == NULL);
521*bd670b35SErik Nordmark 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v4[i].connf_lock);
522f4b3ec61Sdh 	}
523*bd670b35SErik Nordmark 	kmem_free(ipst->ips_ipcl_proto_fanout_v4,
524*bd670b35SErik Nordmark 	    IPPROTO_MAX * sizeof (connf_t));
525*bd670b35SErik Nordmark 	ipst->ips_ipcl_proto_fanout_v4 = NULL;
526f4b3ec61Sdh 
527f4b3ec61Sdh 	for (i = 0; i < IPPROTO_MAX; i++) {
528f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
529f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
530f4b3ec61Sdh 	}
531f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_proto_fanout_v6,
532f4b3ec61Sdh 	    IPPROTO_MAX * sizeof (connf_t));
533f4b3ec61Sdh 	ipst->ips_ipcl_proto_fanout_v6 = NULL;
534f4b3ec61Sdh 
535f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
536f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
537f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
538f4b3ec61Sdh 	}
539f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
540f4b3ec61Sdh 	    sizeof (connf_t));
541f4b3ec61Sdh 	ipst->ips_ipcl_udp_fanout = NULL;
542f4b3ec61Sdh 
5432b24ab6bSSebastien Roy 	for (i = 0; i < ipst->ips_ipcl_iptun_fanout_size; i++) {
5442b24ab6bSSebastien Roy 		ASSERT(ipst->ips_ipcl_iptun_fanout[i].connf_head == NULL);
5452b24ab6bSSebastien Roy 		mutex_destroy(&ipst->ips_ipcl_iptun_fanout[i].connf_lock);
5462b24ab6bSSebastien Roy 	}
5472b24ab6bSSebastien Roy 	kmem_free(ipst->ips_ipcl_iptun_fanout,
5482b24ab6bSSebastien Roy 	    ipst->ips_ipcl_iptun_fanout_size * sizeof (connf_t));
5492b24ab6bSSebastien Roy 	ipst->ips_ipcl_iptun_fanout = NULL;
5502b24ab6bSSebastien Roy 
551f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
552f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
553f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
554f4b3ec61Sdh 	}
555f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
556f4b3ec61Sdh 	    sizeof (connf_t));
557f4b3ec61Sdh 	ipst->ips_ipcl_raw_fanout = NULL;
558f4b3ec61Sdh 
559f4b3ec61Sdh 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
560f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
561f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
562f4b3ec61Sdh 	}
563f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_globalhash_fanout,
564f4b3ec61Sdh 	    sizeof (connf_t) * CONN_G_HASH_SIZE);
565f4b3ec61Sdh 	ipst->ips_ipcl_globalhash_fanout = NULL;
566f4b3ec61Sdh 
567f4b3ec61Sdh 	ASSERT(ipst->ips_rts_clients->connf_head == NULL);
568f4b3ec61Sdh 	mutex_destroy(&ipst->ips_rts_clients->connf_lock);
569f4b3ec61Sdh 	kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
570f4b3ec61Sdh 	ipst->ips_rts_clients = NULL;
5717c478bd9Sstevel@tonic-gate }
5727c478bd9Sstevel@tonic-gate 
5737c478bd9Sstevel@tonic-gate /*
5747c478bd9Sstevel@tonic-gate  * conn creation routine. initialize the conn, sets the reference
5757c478bd9Sstevel@tonic-gate  * and inserts it in the global hash table.
5767c478bd9Sstevel@tonic-gate  */
5777c478bd9Sstevel@tonic-gate conn_t *
578f4b3ec61Sdh ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
5797c478bd9Sstevel@tonic-gate {
5807c478bd9Sstevel@tonic-gate 	conn_t	*connp;
581fc80c0dfSnordmark 	struct kmem_cache *conn_cache;
5827c478bd9Sstevel@tonic-gate 
5837c478bd9Sstevel@tonic-gate 	switch (type) {
5847c478bd9Sstevel@tonic-gate 	case IPCL_SCTPCONN:
5857c478bd9Sstevel@tonic-gate 		if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
5867c478bd9Sstevel@tonic-gate 			return (NULL);
587121e5416Skcpoon 		sctp_conn_init(connp);
588f4b3ec61Sdh 		netstack_hold(ns);
589f4b3ec61Sdh 		connp->conn_netstack = ns;
590*bd670b35SErik Nordmark 		connp->conn_ixa->ixa_ipst = ns->netstack_ip;
591*bd670b35SErik Nordmark 		ipcl_globalhash_insert(connp);
592fc80c0dfSnordmark 		return (connp);
593fc80c0dfSnordmark 
594fc80c0dfSnordmark 	case IPCL_TCPCONN:
595fc80c0dfSnordmark 		conn_cache = tcp_conn_cache;
5967c478bd9Sstevel@tonic-gate 		break;
597fc80c0dfSnordmark 
598fc80c0dfSnordmark 	case IPCL_UDPCONN:
599fc80c0dfSnordmark 		conn_cache = udp_conn_cache;
600fc80c0dfSnordmark 		break;
601fc80c0dfSnordmark 
602fc80c0dfSnordmark 	case IPCL_RAWIPCONN:
603fc80c0dfSnordmark 		conn_cache = rawip_conn_cache;
604fc80c0dfSnordmark 		break;
605fc80c0dfSnordmark 
606fc80c0dfSnordmark 	case IPCL_RTSCONN:
607fc80c0dfSnordmark 		conn_cache = rts_conn_cache;
608fc80c0dfSnordmark 		break;
609fc80c0dfSnordmark 
6107c478bd9Sstevel@tonic-gate 	case IPCL_IPCCONN:
611fc80c0dfSnordmark 		conn_cache = ip_conn_cache;
6127c478bd9Sstevel@tonic-gate 		break;
613fc80c0dfSnordmark 
614ff550d0eSmasputra 	default:
615ff550d0eSmasputra 		connp = NULL;
616ff550d0eSmasputra 		ASSERT(0);
6177c478bd9Sstevel@tonic-gate 	}
6187c478bd9Sstevel@tonic-gate 
619fc80c0dfSnordmark 	if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
620fc80c0dfSnordmark 		return (NULL);
621fc80c0dfSnordmark 
622fc80c0dfSnordmark 	connp->conn_ref = 1;
623fc80c0dfSnordmark 	netstack_hold(ns);
624fc80c0dfSnordmark 	connp->conn_netstack = ns;
625*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_ipst = ns->netstack_ip;
626fc80c0dfSnordmark 	ipcl_globalhash_insert(connp);
6277c478bd9Sstevel@tonic-gate 	return (connp);
6287c478bd9Sstevel@tonic-gate }
6297c478bd9Sstevel@tonic-gate 
6307c478bd9Sstevel@tonic-gate void
6317c478bd9Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp)
6327c478bd9Sstevel@tonic-gate {
6337c478bd9Sstevel@tonic-gate 	mblk_t	*mp;
634f4b3ec61Sdh 	netstack_t	*ns = connp->conn_netstack;
6357c478bd9Sstevel@tonic-gate 
6367c478bd9Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
6377c478bd9Sstevel@tonic-gate 	ASSERT(connp->conn_ref == 0);
6387c478bd9Sstevel@tonic-gate 
639fab254e2SAruna Ramakrishna 	DTRACE_PROBE1(conn__destroy, conn_t *, connp);
640fab254e2SAruna Ramakrishna 
64145916cd2Sjpk 	if (connp->conn_cred != NULL) {
64245916cd2Sjpk 		crfree(connp->conn_cred);
64345916cd2Sjpk 		connp->conn_cred = NULL;
64445916cd2Sjpk 	}
64545916cd2Sjpk 
646*bd670b35SErik Nordmark 	if (connp->conn_ht_iphc != NULL) {
647*bd670b35SErik Nordmark 		kmem_free(connp->conn_ht_iphc, connp->conn_ht_iphc_allocated);
648*bd670b35SErik Nordmark 		connp->conn_ht_iphc = NULL;
649*bd670b35SErik Nordmark 		connp->conn_ht_iphc_allocated = 0;
650*bd670b35SErik Nordmark 		connp->conn_ht_iphc_len = 0;
651*bd670b35SErik Nordmark 		connp->conn_ht_ulp = NULL;
652*bd670b35SErik Nordmark 		connp->conn_ht_ulp_len = 0;
653*bd670b35SErik Nordmark 	}
654*bd670b35SErik Nordmark 	ip_pkt_free(&connp->conn_xmit_ipp);
655*bd670b35SErik Nordmark 
6567c478bd9Sstevel@tonic-gate 	ipcl_globalhash_remove(connp);
6577c478bd9Sstevel@tonic-gate 
658*bd670b35SErik Nordmark 	if (connp->conn_latch != NULL) {
659*bd670b35SErik Nordmark 		IPLATCH_REFRELE(connp->conn_latch);
660*bd670b35SErik Nordmark 		connp->conn_latch = NULL;
661*bd670b35SErik Nordmark 	}
662*bd670b35SErik Nordmark 	if (connp->conn_latch_in_policy != NULL) {
663*bd670b35SErik Nordmark 		IPPOL_REFRELE(connp->conn_latch_in_policy);
664*bd670b35SErik Nordmark 		connp->conn_latch_in_policy = NULL;
665*bd670b35SErik Nordmark 	}
666*bd670b35SErik Nordmark 	if (connp->conn_latch_in_action != NULL) {
667*bd670b35SErik Nordmark 		IPACT_REFRELE(connp->conn_latch_in_action);
668*bd670b35SErik Nordmark 		connp->conn_latch_in_action = NULL;
669*bd670b35SErik Nordmark 	}
670*bd670b35SErik Nordmark 	if (connp->conn_policy != NULL) {
671*bd670b35SErik Nordmark 		IPPH_REFRELE(connp->conn_policy, ns);
672*bd670b35SErik Nordmark 		connp->conn_policy = NULL;
673*bd670b35SErik Nordmark 	}
674*bd670b35SErik Nordmark 
675*bd670b35SErik Nordmark 	if (connp->conn_ipsec_opt_mp != NULL) {
676*bd670b35SErik Nordmark 		freemsg(connp->conn_ipsec_opt_mp);
677*bd670b35SErik Nordmark 		connp->conn_ipsec_opt_mp = NULL;
678*bd670b35SErik Nordmark 	}
679*bd670b35SErik Nordmark 
6807c478bd9Sstevel@tonic-gate 	if (connp->conn_flags & IPCL_TCPCONN) {
681*bd670b35SErik Nordmark 		tcp_t *tcp = connp->conn_tcp;
682ff550d0eSmasputra 
6837c478bd9Sstevel@tonic-gate 		tcp_free(tcp);
6847c478bd9Sstevel@tonic-gate 		mp = tcp->tcp_timercache;
685*bd670b35SErik Nordmark 
686*bd670b35SErik Nordmark 		tcp->tcp_tcps = NULL;
6877c478bd9Sstevel@tonic-gate 
6887c478bd9Sstevel@tonic-gate 		if (tcp->tcp_sack_info != NULL) {
6897c478bd9Sstevel@tonic-gate 			bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
6907c478bd9Sstevel@tonic-gate 			kmem_cache_free(tcp_sack_info_cache,
6917c478bd9Sstevel@tonic-gate 			    tcp->tcp_sack_info);
6927c478bd9Sstevel@tonic-gate 		}
6937c478bd9Sstevel@tonic-gate 
694f7f8e53dSKacheong Poon 		/*
695f7f8e53dSKacheong Poon 		 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
696f7f8e53dSKacheong Poon 		 * the mblk.
697f7f8e53dSKacheong Poon 		 */
698f7f8e53dSKacheong Poon 		if (tcp->tcp_rsrv_mp != NULL) {
699f7f8e53dSKacheong Poon 			freeb(tcp->tcp_rsrv_mp);
700f7f8e53dSKacheong Poon 			tcp->tcp_rsrv_mp = NULL;
701f7f8e53dSKacheong Poon 			mutex_destroy(&tcp->tcp_rsrv_mp_lock);
702f7f8e53dSKacheong Poon 		}
703f7f8e53dSKacheong Poon 
704*bd670b35SErik Nordmark 		ipcl_conn_cleanup(connp);
705*bd670b35SErik Nordmark 		connp->conn_flags = IPCL_TCPCONN;
706f4b3ec61Sdh 		if (ns != NULL) {
707f4b3ec61Sdh 			ASSERT(tcp->tcp_tcps == NULL);
708f4b3ec61Sdh 			connp->conn_netstack = NULL;
709*bd670b35SErik Nordmark 			connp->conn_ixa->ixa_ipst = NULL;
710f4b3ec61Sdh 			netstack_rele(ns);
711f4b3ec61Sdh 		}
712fc80c0dfSnordmark 
713fc80c0dfSnordmark 		bzero(tcp, sizeof (tcp_t));
714fc80c0dfSnordmark 
715fc80c0dfSnordmark 		tcp->tcp_timercache = mp;
716fc80c0dfSnordmark 		tcp->tcp_connp = connp;
717fc80c0dfSnordmark 		kmem_cache_free(tcp_conn_cache, connp);
718fc80c0dfSnordmark 		return;
719fc80c0dfSnordmark 	}
720fc80c0dfSnordmark 
721fc80c0dfSnordmark 	if (connp->conn_flags & IPCL_SCTPCONN) {
722f4b3ec61Sdh 		ASSERT(ns != NULL);
7237c478bd9Sstevel@tonic-gate 		sctp_free(connp);
724fc80c0dfSnordmark 		return;
725fc80c0dfSnordmark 	}
726fc80c0dfSnordmark 
727*bd670b35SErik Nordmark 	ipcl_conn_cleanup(connp);
728fc80c0dfSnordmark 	if (ns != NULL) {
729fc80c0dfSnordmark 		connp->conn_netstack = NULL;
730*bd670b35SErik Nordmark 		connp->conn_ixa->ixa_ipst = NULL;
731fc80c0dfSnordmark 		netstack_rele(ns);
732fc80c0dfSnordmark 	}
7330f1702c5SYu Xiangning 
734fc80c0dfSnordmark 	/* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
735fc80c0dfSnordmark 	if (connp->conn_flags & IPCL_UDPCONN) {
736fc80c0dfSnordmark 		connp->conn_flags = IPCL_UDPCONN;
737fc80c0dfSnordmark 		kmem_cache_free(udp_conn_cache, connp);
738fc80c0dfSnordmark 	} else if (connp->conn_flags & IPCL_RAWIPCONN) {
739fc80c0dfSnordmark 		connp->conn_flags = IPCL_RAWIPCONN;
740*bd670b35SErik Nordmark 		connp->conn_proto = IPPROTO_ICMP;
741*bd670b35SErik Nordmark 		connp->conn_ixa->ixa_protocol = connp->conn_proto;
742fc80c0dfSnordmark 		kmem_cache_free(rawip_conn_cache, connp);
743fc80c0dfSnordmark 	} else if (connp->conn_flags & IPCL_RTSCONN) {
744fc80c0dfSnordmark 		connp->conn_flags = IPCL_RTSCONN;
745fc80c0dfSnordmark 		kmem_cache_free(rts_conn_cache, connp);
7467c478bd9Sstevel@tonic-gate 	} else {
747fc80c0dfSnordmark 		connp->conn_flags = IPCL_IPCCONN;
748fc80c0dfSnordmark 		ASSERT(connp->conn_flags & IPCL_IPCCONN);
749fc80c0dfSnordmark 		ASSERT(connp->conn_priv == NULL);
750fc80c0dfSnordmark 		kmem_cache_free(ip_conn_cache, connp);
7517c478bd9Sstevel@tonic-gate 	}
7527c478bd9Sstevel@tonic-gate }
7537c478bd9Sstevel@tonic-gate 
7547c478bd9Sstevel@tonic-gate /*
7557c478bd9Sstevel@tonic-gate  * Running in cluster mode - deregister listener information
7567c478bd9Sstevel@tonic-gate  */
7577c478bd9Sstevel@tonic-gate static void
7587c478bd9Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp)
7597c478bd9Sstevel@tonic-gate {
7607c478bd9Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
7617c478bd9Sstevel@tonic-gate 	ASSERT(connp->conn_lport != 0);
7627c478bd9Sstevel@tonic-gate 
7637c478bd9Sstevel@tonic-gate 	if (cl_inet_unlisten != NULL) {
7647c478bd9Sstevel@tonic-gate 		sa_family_t	addr_family;
7657c478bd9Sstevel@tonic-gate 		uint8_t		*laddrp;
7667c478bd9Sstevel@tonic-gate 
767*bd670b35SErik Nordmark 		if (connp->conn_ipversion == IPV6_VERSION) {
7687c478bd9Sstevel@tonic-gate 			addr_family = AF_INET6;
769*bd670b35SErik Nordmark 			laddrp = (uint8_t *)&connp->conn_bound_addr_v6;
7707c478bd9Sstevel@tonic-gate 		} else {
7717c478bd9Sstevel@tonic-gate 			addr_family = AF_INET;
772*bd670b35SErik Nordmark 			laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
7737c478bd9Sstevel@tonic-gate 		}
7748e4b770fSLu Huafeng 		(*cl_inet_unlisten)(connp->conn_netstack->netstack_stackid,
7758e4b770fSLu Huafeng 		    IPPROTO_TCP, addr_family, laddrp, connp->conn_lport, NULL);
7767c478bd9Sstevel@tonic-gate 	}
7777c478bd9Sstevel@tonic-gate 	connp->conn_flags &= ~IPCL_CL_LISTENER;
7787c478bd9Sstevel@tonic-gate }
7797c478bd9Sstevel@tonic-gate 
7807c478bd9Sstevel@tonic-gate /*
7817c478bd9Sstevel@tonic-gate  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
7827c478bd9Sstevel@tonic-gate  * which table the conn belonged to). So for debugging we can see which hash
7837c478bd9Sstevel@tonic-gate  * table this connection was in.
7847c478bd9Sstevel@tonic-gate  */
7857c478bd9Sstevel@tonic-gate #define	IPCL_HASH_REMOVE(connp)	{					\
7867c478bd9Sstevel@tonic-gate 	connf_t	*connfp = (connp)->conn_fanout;				\
7877c478bd9Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));			\
7887c478bd9Sstevel@tonic-gate 	if (connfp != NULL) {						\
7897c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);			\
7907c478bd9Sstevel@tonic-gate 		if ((connp)->conn_next != NULL)				\
7917c478bd9Sstevel@tonic-gate 			(connp)->conn_next->conn_prev =			\
7927c478bd9Sstevel@tonic-gate 			    (connp)->conn_prev;				\
7937c478bd9Sstevel@tonic-gate 		if ((connp)->conn_prev != NULL)				\
7947c478bd9Sstevel@tonic-gate 			(connp)->conn_prev->conn_next =			\
7957c478bd9Sstevel@tonic-gate 			    (connp)->conn_next;				\
7967c478bd9Sstevel@tonic-gate 		else							\
7977c478bd9Sstevel@tonic-gate 			connfp->connf_head = (connp)->conn_next;	\
7987c478bd9Sstevel@tonic-gate 		(connp)->conn_fanout = NULL;				\
7997c478bd9Sstevel@tonic-gate 		(connp)->conn_next = NULL;				\
8007c478bd9Sstevel@tonic-gate 		(connp)->conn_prev = NULL;				\
8017c478bd9Sstevel@tonic-gate 		(connp)->conn_flags |= IPCL_REMOVED;			\
8027c478bd9Sstevel@tonic-gate 		if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)	\
8037c478bd9Sstevel@tonic-gate 			ipcl_conn_unlisten((connp));			\
8047c478bd9Sstevel@tonic-gate 		CONN_DEC_REF((connp));					\
8057c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);			\
8067c478bd9Sstevel@tonic-gate 	}								\
8077c478bd9Sstevel@tonic-gate }
8087c478bd9Sstevel@tonic-gate 
8097c478bd9Sstevel@tonic-gate void
8107c478bd9Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp)
8117c478bd9Sstevel@tonic-gate {
812*bd670b35SErik Nordmark 	uint8_t		protocol = connp->conn_proto;
813*bd670b35SErik Nordmark 
8147c478bd9Sstevel@tonic-gate 	IPCL_HASH_REMOVE(connp);
815*bd670b35SErik Nordmark 	if (protocol == IPPROTO_RSVP)
816*bd670b35SErik Nordmark 		ill_set_inputfn_all(connp->conn_netstack->netstack_ip);
8177c478bd9Sstevel@tonic-gate }
8187c478bd9Sstevel@tonic-gate 
8197c478bd9Sstevel@tonic-gate /*
8207c478bd9Sstevel@tonic-gate  * The whole purpose of this function is allow removal of
8217c478bd9Sstevel@tonic-gate  * a conn_t from the connected hash for timewait reclaim.
8227c478bd9Sstevel@tonic-gate  * This is essentially a TW reclaim fastpath where timewait
8237c478bd9Sstevel@tonic-gate  * collector checks under fanout lock (so no one else can
8247c478bd9Sstevel@tonic-gate  * get access to the conn_t) that refcnt is 2 i.e. one for
8257c478bd9Sstevel@tonic-gate  * TCP and one for the classifier hash list. If ref count
8267c478bd9Sstevel@tonic-gate  * is indeed 2, we can just remove the conn under lock and
8277c478bd9Sstevel@tonic-gate  * avoid cleaning up the conn under squeue. This gives us
8287c478bd9Sstevel@tonic-gate  * improved performance.
8297c478bd9Sstevel@tonic-gate  */
8307c478bd9Sstevel@tonic-gate void
8317c478bd9Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t	*connfp)
8327c478bd9Sstevel@tonic-gate {
8337c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connfp->connf_lock));
8347c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
8357c478bd9Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
8367c478bd9Sstevel@tonic-gate 
8377c478bd9Sstevel@tonic-gate 	if ((connp)->conn_next != NULL) {
838121e5416Skcpoon 		(connp)->conn_next->conn_prev = (connp)->conn_prev;
8397c478bd9Sstevel@tonic-gate 	}
8407c478bd9Sstevel@tonic-gate 	if ((connp)->conn_prev != NULL) {
841121e5416Skcpoon 		(connp)->conn_prev->conn_next = (connp)->conn_next;
8427c478bd9Sstevel@tonic-gate 	} else {
8437c478bd9Sstevel@tonic-gate 		connfp->connf_head = (connp)->conn_next;
8447c478bd9Sstevel@tonic-gate 	}
8457c478bd9Sstevel@tonic-gate 	(connp)->conn_fanout = NULL;
8467c478bd9Sstevel@tonic-gate 	(connp)->conn_next = NULL;
8477c478bd9Sstevel@tonic-gate 	(connp)->conn_prev = NULL;
8487c478bd9Sstevel@tonic-gate 	(connp)->conn_flags |= IPCL_REMOVED;
8497c478bd9Sstevel@tonic-gate 	ASSERT((connp)->conn_ref == 2);
8507c478bd9Sstevel@tonic-gate 	(connp)->conn_ref--;
8517c478bd9Sstevel@tonic-gate }
8527c478bd9Sstevel@tonic-gate 
8537c478bd9Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {		\
8547c478bd9Sstevel@tonic-gate 	ASSERT((connp)->conn_fanout == NULL);				\
8557c478bd9Sstevel@tonic-gate 	ASSERT((connp)->conn_next == NULL);				\
8567c478bd9Sstevel@tonic-gate 	ASSERT((connp)->conn_prev == NULL);				\
8577c478bd9Sstevel@tonic-gate 	if ((connfp)->connf_head != NULL) {				\
8587c478bd9Sstevel@tonic-gate 		(connfp)->connf_head->conn_prev = (connp);		\
8597c478bd9Sstevel@tonic-gate 		(connp)->conn_next = (connfp)->connf_head;		\
8607c478bd9Sstevel@tonic-gate 	}								\
8617c478bd9Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
8627c478bd9Sstevel@tonic-gate 	(connfp)->connf_head = (connp);					\
8637c478bd9Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
8647c478bd9Sstevel@tonic-gate 	    IPCL_CONNECTED;						\
8657c478bd9Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
8667c478bd9Sstevel@tonic-gate }
8677c478bd9Sstevel@tonic-gate 
8687c478bd9Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED(connfp, connp) {			\
8697c478bd9Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
8707c478bd9Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
8717c478bd9Sstevel@tonic-gate 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);		\
8727c478bd9Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
8737c478bd9Sstevel@tonic-gate }
8747c478bd9Sstevel@tonic-gate 
8757c478bd9Sstevel@tonic-gate #define	IPCL_HASH_INSERT_BOUND(connfp, connp) {				\
8767c478bd9Sstevel@tonic-gate 	conn_t *pconnp = NULL, *nconnp;					\
8777c478bd9Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
8787c478bd9Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
8797c478bd9Sstevel@tonic-gate 	nconnp = (connfp)->connf_head;					\
8803d1c78fbSethindra 	while (nconnp != NULL &&					\
881*bd670b35SErik Nordmark 	    !_IPCL_V4_MATCH_ANY(nconnp->conn_laddr_v6)) {		\
8823d1c78fbSethindra 		pconnp = nconnp;					\
8833d1c78fbSethindra 		nconnp = nconnp->conn_next;				\
8847c478bd9Sstevel@tonic-gate 	}								\
8857c478bd9Sstevel@tonic-gate 	if (pconnp != NULL) {						\
8867c478bd9Sstevel@tonic-gate 		pconnp->conn_next = (connp);				\
8877c478bd9Sstevel@tonic-gate 		(connp)->conn_prev = pconnp;				\
8887c478bd9Sstevel@tonic-gate 	} else {							\
8897c478bd9Sstevel@tonic-gate 		(connfp)->connf_head = (connp);				\
8907c478bd9Sstevel@tonic-gate 	}								\
8917c478bd9Sstevel@tonic-gate 	if (nconnp != NULL) {						\
8927c478bd9Sstevel@tonic-gate 		(connp)->conn_next = nconnp;				\
8937c478bd9Sstevel@tonic-gate 		nconnp->conn_prev = (connp);				\
8947c478bd9Sstevel@tonic-gate 	}								\
8957c478bd9Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
8967c478bd9Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
8977c478bd9Sstevel@tonic-gate 	    IPCL_BOUND;							\
8987c478bd9Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
8997c478bd9Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
9007c478bd9Sstevel@tonic-gate }
9017c478bd9Sstevel@tonic-gate 
9027c478bd9Sstevel@tonic-gate #define	IPCL_HASH_INSERT_WILDCARD(connfp, connp) {			\
9037c478bd9Sstevel@tonic-gate 	conn_t **list, *prev, *next;					\
9047c478bd9Sstevel@tonic-gate 	boolean_t isv4mapped =						\
905*bd670b35SErik Nordmark 	    IN6_IS_ADDR_V4MAPPED(&(connp)->conn_laddr_v6);		\
9067c478bd9Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
9077c478bd9Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
9087c478bd9Sstevel@tonic-gate 	list = &(connfp)->connf_head;					\
9097c478bd9Sstevel@tonic-gate 	prev = NULL;							\
9107c478bd9Sstevel@tonic-gate 	while ((next = *list) != NULL) {				\
9117c478bd9Sstevel@tonic-gate 		if (isv4mapped &&					\
912*bd670b35SErik Nordmark 		    IN6_IS_ADDR_UNSPECIFIED(&next->conn_laddr_v6) &&	\
9137c478bd9Sstevel@tonic-gate 		    connp->conn_zoneid == next->conn_zoneid) {		\
9147c478bd9Sstevel@tonic-gate 			(connp)->conn_next = next;			\
9157c478bd9Sstevel@tonic-gate 			if (prev != NULL)				\
9167c478bd9Sstevel@tonic-gate 				prev = next->conn_prev;			\
9177c478bd9Sstevel@tonic-gate 			next->conn_prev = (connp);			\
9187c478bd9Sstevel@tonic-gate 			break;						\
9197c478bd9Sstevel@tonic-gate 		}							\
9207c478bd9Sstevel@tonic-gate 		list = &next->conn_next;				\
9217c478bd9Sstevel@tonic-gate 		prev = next;						\
9227c478bd9Sstevel@tonic-gate 	}								\
9237c478bd9Sstevel@tonic-gate 	(connp)->conn_prev = prev;					\
9247c478bd9Sstevel@tonic-gate 	*list = (connp);						\
9257c478bd9Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
9267c478bd9Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
9277c478bd9Sstevel@tonic-gate 	    IPCL_BOUND;							\
9287c478bd9Sstevel@tonic-gate 	CONN_INC_REF((connp));						\
9297c478bd9Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
9307c478bd9Sstevel@tonic-gate }
9317c478bd9Sstevel@tonic-gate 
9327c478bd9Sstevel@tonic-gate void
9337c478bd9Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
9347c478bd9Sstevel@tonic-gate {
9357c478bd9Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9367c478bd9Sstevel@tonic-gate }
9377c478bd9Sstevel@tonic-gate 
9382b24ab6bSSebastien Roy /*
9392b24ab6bSSebastien Roy  * Because the classifier is used to classify inbound packets, the destination
9402b24ab6bSSebastien Roy  * address is meant to be our local tunnel address (tunnel source), and the
9412b24ab6bSSebastien Roy  * source the remote tunnel address (tunnel destination).
942*bd670b35SErik Nordmark  *
943*bd670b35SErik Nordmark  * Note that conn_proto can't be used for fanout since the upper protocol
944*bd670b35SErik Nordmark  * can be both 41 and 4 when IPv6 and IPv4 are over the same tunnel.
9452b24ab6bSSebastien Roy  */
9462b24ab6bSSebastien Roy conn_t *
9472b24ab6bSSebastien Roy ipcl_iptun_classify_v4(ipaddr_t *src, ipaddr_t *dst, ip_stack_t *ipst)
9482b24ab6bSSebastien Roy {
9492b24ab6bSSebastien Roy 	connf_t	*connfp;
9502b24ab6bSSebastien Roy 	conn_t	*connp;
9512b24ab6bSSebastien Roy 
9522b24ab6bSSebastien Roy 	/* first look for IPv4 tunnel links */
9532b24ab6bSSebastien Roy 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst, *src)];
9542b24ab6bSSebastien Roy 	mutex_enter(&connfp->connf_lock);
9552b24ab6bSSebastien Roy 	for (connp = connfp->connf_head; connp != NULL;
9562b24ab6bSSebastien Roy 	    connp = connp->conn_next) {
9572b24ab6bSSebastien Roy 		if (IPCL_IPTUN_MATCH(connp, *dst, *src))
9582b24ab6bSSebastien Roy 			break;
9592b24ab6bSSebastien Roy 	}
9602b24ab6bSSebastien Roy 	if (connp != NULL)
9612b24ab6bSSebastien Roy 		goto done;
9622b24ab6bSSebastien Roy 
9632b24ab6bSSebastien Roy 	mutex_exit(&connfp->connf_lock);
9642b24ab6bSSebastien Roy 
9652b24ab6bSSebastien Roy 	/* We didn't find an IPv4 tunnel, try a 6to4 tunnel */
9662b24ab6bSSebastien Roy 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(*dst,
9672b24ab6bSSebastien Roy 	    INADDR_ANY)];
9682b24ab6bSSebastien Roy 	mutex_enter(&connfp->connf_lock);
9692b24ab6bSSebastien Roy 	for (connp = connfp->connf_head; connp != NULL;
9702b24ab6bSSebastien Roy 	    connp = connp->conn_next) {
9712b24ab6bSSebastien Roy 		if (IPCL_IPTUN_MATCH(connp, *dst, INADDR_ANY))
9722b24ab6bSSebastien Roy 			break;
9732b24ab6bSSebastien Roy 	}
9742b24ab6bSSebastien Roy done:
9752b24ab6bSSebastien Roy 	if (connp != NULL)
9762b24ab6bSSebastien Roy 		CONN_INC_REF(connp);
9772b24ab6bSSebastien Roy 	mutex_exit(&connfp->connf_lock);
9782b24ab6bSSebastien Roy 	return (connp);
9792b24ab6bSSebastien Roy }
9802b24ab6bSSebastien Roy 
9812b24ab6bSSebastien Roy conn_t *
9822b24ab6bSSebastien Roy ipcl_iptun_classify_v6(in6_addr_t *src, in6_addr_t *dst, ip_stack_t *ipst)
9832b24ab6bSSebastien Roy {
9842b24ab6bSSebastien Roy 	connf_t	*connfp;
9852b24ab6bSSebastien Roy 	conn_t	*connp;
9862b24ab6bSSebastien Roy 
9872b24ab6bSSebastien Roy 	/* Look for an IPv6 tunnel link */
9882b24ab6bSSebastien Roy 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(dst, src)];
9892b24ab6bSSebastien Roy 	mutex_enter(&connfp->connf_lock);
9902b24ab6bSSebastien Roy 	for (connp = connfp->connf_head; connp != NULL;
9912b24ab6bSSebastien Roy 	    connp = connp->conn_next) {
9922b24ab6bSSebastien Roy 		if (IPCL_IPTUN_MATCH_V6(connp, dst, src)) {
9932b24ab6bSSebastien Roy 			CONN_INC_REF(connp);
9942b24ab6bSSebastien Roy 			break;
9952b24ab6bSSebastien Roy 		}
9962b24ab6bSSebastien Roy 	}
9972b24ab6bSSebastien Roy 	mutex_exit(&connfp->connf_lock);
9982b24ab6bSSebastien Roy 	return (connp);
9992b24ab6bSSebastien Roy }
10002b24ab6bSSebastien Roy 
10017c478bd9Sstevel@tonic-gate /*
10027c478bd9Sstevel@tonic-gate  * This function is used only for inserting SCTP raw socket now.
10037c478bd9Sstevel@tonic-gate  * This may change later.
10047c478bd9Sstevel@tonic-gate  *
10057c478bd9Sstevel@tonic-gate  * Note that only one raw socket can be bound to a port.  The param
10067c478bd9Sstevel@tonic-gate  * lport is in network byte order.
10077c478bd9Sstevel@tonic-gate  */
10087c478bd9Sstevel@tonic-gate static int
10097c478bd9Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
10107c478bd9Sstevel@tonic-gate {
10117c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
10127c478bd9Sstevel@tonic-gate 	conn_t	*oconnp;
1013f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
10147c478bd9Sstevel@tonic-gate 
1015f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
10167c478bd9Sstevel@tonic-gate 
10177c478bd9Sstevel@tonic-gate 	/* Check for existing raw socket already bound to the port. */
10187c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
10197c478bd9Sstevel@tonic-gate 	for (oconnp = connfp->connf_head; oconnp != NULL;
10207c0c0508Skcpoon 	    oconnp = oconnp->conn_next) {
10217c478bd9Sstevel@tonic-gate 		if (oconnp->conn_lport == lport &&
10227c478bd9Sstevel@tonic-gate 		    oconnp->conn_zoneid == connp->conn_zoneid &&
1023*bd670b35SErik Nordmark 		    oconnp->conn_family == connp->conn_family &&
1024*bd670b35SErik Nordmark 		    ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1025*bd670b35SErik Nordmark 		    IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_laddr_v6) ||
1026*bd670b35SErik Nordmark 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6) ||
1027*bd670b35SErik Nordmark 		    IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_laddr_v6)) ||
1028*bd670b35SErik Nordmark 		    IN6_ARE_ADDR_EQUAL(&oconnp->conn_laddr_v6,
1029*bd670b35SErik Nordmark 		    &connp->conn_laddr_v6))) {
10307c478bd9Sstevel@tonic-gate 			break;
10317c478bd9Sstevel@tonic-gate 		}
10327c478bd9Sstevel@tonic-gate 	}
10337c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
10347c478bd9Sstevel@tonic-gate 	if (oconnp != NULL)
10357c478bd9Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
10367c478bd9Sstevel@tonic-gate 
1037*bd670b35SErik Nordmark 	if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
1038*bd670b35SErik Nordmark 	    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1039*bd670b35SErik Nordmark 		if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
1040*bd670b35SErik Nordmark 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_laddr_v6)) {
10417c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10427c478bd9Sstevel@tonic-gate 		} else {
10437c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
10447c478bd9Sstevel@tonic-gate 		}
10457c478bd9Sstevel@tonic-gate 	} else {
10467c478bd9Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED(connfp, connp);
10477c478bd9Sstevel@tonic-gate 	}
10487c478bd9Sstevel@tonic-gate 	return (0);
10497c478bd9Sstevel@tonic-gate }
10507c478bd9Sstevel@tonic-gate 
10512b24ab6bSSebastien Roy static int
1052*bd670b35SErik Nordmark ipcl_iptun_hash_insert(conn_t *connp, ip_stack_t *ipst)
10532b24ab6bSSebastien Roy {
10542b24ab6bSSebastien Roy 	connf_t	*connfp;
10552b24ab6bSSebastien Roy 	conn_t	*tconnp;
1056*bd670b35SErik Nordmark 	ipaddr_t laddr = connp->conn_laddr_v4;
1057*bd670b35SErik Nordmark 	ipaddr_t faddr = connp->conn_faddr_v4;
10582b24ab6bSSebastien Roy 
1059*bd670b35SErik Nordmark 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH(laddr, faddr)];
10602b24ab6bSSebastien Roy 	mutex_enter(&connfp->connf_lock);
10612b24ab6bSSebastien Roy 	for (tconnp = connfp->connf_head; tconnp != NULL;
10622b24ab6bSSebastien Roy 	    tconnp = tconnp->conn_next) {
1063*bd670b35SErik Nordmark 		if (IPCL_IPTUN_MATCH(tconnp, laddr, faddr)) {
10642b24ab6bSSebastien Roy 			/* A tunnel is already bound to these addresses. */
10652b24ab6bSSebastien Roy 			mutex_exit(&connfp->connf_lock);
10662b24ab6bSSebastien Roy 			return (EADDRINUSE);
10672b24ab6bSSebastien Roy 		}
10682b24ab6bSSebastien Roy 	}
10692b24ab6bSSebastien Roy 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
10702b24ab6bSSebastien Roy 	mutex_exit(&connfp->connf_lock);
10712b24ab6bSSebastien Roy 	return (0);
10722b24ab6bSSebastien Roy }
10732b24ab6bSSebastien Roy 
10742b24ab6bSSebastien Roy static int
1075*bd670b35SErik Nordmark ipcl_iptun_hash_insert_v6(conn_t *connp, ip_stack_t *ipst)
10762b24ab6bSSebastien Roy {
10772b24ab6bSSebastien Roy 	connf_t	*connfp;
10782b24ab6bSSebastien Roy 	conn_t	*tconnp;
1079*bd670b35SErik Nordmark 	in6_addr_t *laddr = &connp->conn_laddr_v6;
1080*bd670b35SErik Nordmark 	in6_addr_t *faddr = &connp->conn_faddr_v6;
10812b24ab6bSSebastien Roy 
1082*bd670b35SErik Nordmark 	connfp = &ipst->ips_ipcl_iptun_fanout[IPCL_IPTUN_HASH_V6(laddr, faddr)];
10832b24ab6bSSebastien Roy 	mutex_enter(&connfp->connf_lock);
10842b24ab6bSSebastien Roy 	for (tconnp = connfp->connf_head; tconnp != NULL;
10852b24ab6bSSebastien Roy 	    tconnp = tconnp->conn_next) {
1086*bd670b35SErik Nordmark 		if (IPCL_IPTUN_MATCH_V6(tconnp, laddr, faddr)) {
10872b24ab6bSSebastien Roy 			/* A tunnel is already bound to these addresses. */
10882b24ab6bSSebastien Roy 			mutex_exit(&connfp->connf_lock);
10892b24ab6bSSebastien Roy 			return (EADDRINUSE);
10902b24ab6bSSebastien Roy 		}
10912b24ab6bSSebastien Roy 	}
10922b24ab6bSSebastien Roy 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
10932b24ab6bSSebastien Roy 	mutex_exit(&connfp->connf_lock);
10942b24ab6bSSebastien Roy 	return (0);
10952b24ab6bSSebastien Roy }
10962b24ab6bSSebastien Roy 
109745916cd2Sjpk /*
109845916cd2Sjpk  * Check for a MAC exemption conflict on a labeled system.  Note that for
109945916cd2Sjpk  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
110045916cd2Sjpk  * transport layer.  This check is for binding all other protocols.
110145916cd2Sjpk  *
110245916cd2Sjpk  * Returns true if there's a conflict.
110345916cd2Sjpk  */
110445916cd2Sjpk static boolean_t
1105f4b3ec61Sdh check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
110645916cd2Sjpk {
110745916cd2Sjpk 	connf_t	*connfp;
110845916cd2Sjpk 	conn_t *tconn;
110945916cd2Sjpk 
1110*bd670b35SErik Nordmark 	connfp = &ipst->ips_ipcl_proto_fanout_v4[connp->conn_proto];
111145916cd2Sjpk 	mutex_enter(&connfp->connf_lock);
111245916cd2Sjpk 	for (tconn = connfp->connf_head; tconn != NULL;
111345916cd2Sjpk 	    tconn = tconn->conn_next) {
111445916cd2Sjpk 		/* We don't allow v4 fallback for v6 raw socket */
1115*bd670b35SErik Nordmark 		if (connp->conn_family != tconn->conn_family)
111645916cd2Sjpk 			continue;
111745916cd2Sjpk 		/* If neither is exempt, then there's no conflict */
11185d3b8cb7SBill Sommerfeld 		if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
11195d3b8cb7SBill Sommerfeld 		    (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
112045916cd2Sjpk 			continue;
11215f9878b0Sken Powell - Sun Microsystem 		/* We are only concerned about sockets for a different zone */
11225f9878b0Sken Powell - Sun Microsystem 		if (connp->conn_zoneid == tconn->conn_zoneid)
11235f9878b0Sken Powell - Sun Microsystem 			continue;
112445916cd2Sjpk 		/* If both are bound to different specific addrs, ok */
1125*bd670b35SErik Nordmark 		if (connp->conn_laddr_v4 != INADDR_ANY &&
1126*bd670b35SErik Nordmark 		    tconn->conn_laddr_v4 != INADDR_ANY &&
1127*bd670b35SErik Nordmark 		    connp->conn_laddr_v4 != tconn->conn_laddr_v4)
112845916cd2Sjpk 			continue;
112945916cd2Sjpk 		/* These two conflict; fail */
113045916cd2Sjpk 		break;
113145916cd2Sjpk 	}
113245916cd2Sjpk 	mutex_exit(&connfp->connf_lock);
113345916cd2Sjpk 	return (tconn != NULL);
113445916cd2Sjpk }
113545916cd2Sjpk 
113645916cd2Sjpk static boolean_t
1137f4b3ec61Sdh check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
113845916cd2Sjpk {
113945916cd2Sjpk 	connf_t	*connfp;
114045916cd2Sjpk 	conn_t *tconn;
114145916cd2Sjpk 
1142*bd670b35SErik Nordmark 	connfp = &ipst->ips_ipcl_proto_fanout_v6[connp->conn_proto];
114345916cd2Sjpk 	mutex_enter(&connfp->connf_lock);
114445916cd2Sjpk 	for (tconn = connfp->connf_head; tconn != NULL;
114545916cd2Sjpk 	    tconn = tconn->conn_next) {
114645916cd2Sjpk 		/* We don't allow v4 fallback for v6 raw socket */
1147*bd670b35SErik Nordmark 		if (connp->conn_family != tconn->conn_family)
114845916cd2Sjpk 			continue;
114945916cd2Sjpk 		/* If neither is exempt, then there's no conflict */
11505d3b8cb7SBill Sommerfeld 		if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) &&
11515d3b8cb7SBill Sommerfeld 		    (tconn->conn_mac_mode == CONN_MAC_DEFAULT))
115245916cd2Sjpk 			continue;
11535f9878b0Sken Powell - Sun Microsystem 		/* We are only concerned about sockets for a different zone */
11545f9878b0Sken Powell - Sun Microsystem 		if (connp->conn_zoneid == tconn->conn_zoneid)
11555f9878b0Sken Powell - Sun Microsystem 			continue;
115645916cd2Sjpk 		/* If both are bound to different addrs, ok */
1157*bd670b35SErik Nordmark 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) &&
1158*bd670b35SErik Nordmark 		    !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_laddr_v6) &&
1159*bd670b35SErik Nordmark 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
1160*bd670b35SErik Nordmark 		    &tconn->conn_laddr_v6))
116145916cd2Sjpk 			continue;
116245916cd2Sjpk 		/* These two conflict; fail */
116345916cd2Sjpk 		break;
116445916cd2Sjpk 	}
116545916cd2Sjpk 	mutex_exit(&connfp->connf_lock);
116645916cd2Sjpk 	return (tconn != NULL);
116745916cd2Sjpk }
116845916cd2Sjpk 
11697c478bd9Sstevel@tonic-gate /*
11707c478bd9Sstevel@tonic-gate  * (v4, v6) bind hash insertion routines
1171*bd670b35SErik Nordmark  * The caller has already setup the conn (conn_proto, conn_laddr_v6, conn_lport)
11727c478bd9Sstevel@tonic-gate  */
1173*bd670b35SErik Nordmark 
11747c478bd9Sstevel@tonic-gate int
1175*bd670b35SErik Nordmark ipcl_bind_insert(conn_t *connp)
1176*bd670b35SErik Nordmark {
1177*bd670b35SErik Nordmark 	if (connp->conn_ipversion == IPV6_VERSION)
1178*bd670b35SErik Nordmark 		return (ipcl_bind_insert_v6(connp));
1179*bd670b35SErik Nordmark 	else
1180*bd670b35SErik Nordmark 		return (ipcl_bind_insert_v4(connp));
1181*bd670b35SErik Nordmark }
1182*bd670b35SErik Nordmark 
1183*bd670b35SErik Nordmark int
1184*bd670b35SErik Nordmark ipcl_bind_insert_v4(conn_t *connp)
11857c478bd9Sstevel@tonic-gate {
11867c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
11877c478bd9Sstevel@tonic-gate 	int	ret = 0;
1188f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1189*bd670b35SErik Nordmark 	uint16_t	lport = connp->conn_lport;
1190*bd670b35SErik Nordmark 	uint8_t		protocol = connp->conn_proto;
11917c478bd9Sstevel@tonic-gate 
11922b24ab6bSSebastien Roy 	if (IPCL_IS_IPTUN(connp))
1193*bd670b35SErik Nordmark 		return (ipcl_iptun_hash_insert(connp, ipst));
11942b24ab6bSSebastien Roy 
11957c478bd9Sstevel@tonic-gate 	switch (protocol) {
11967c478bd9Sstevel@tonic-gate 	default:
1197f4b3ec61Sdh 		if (is_system_labeled() &&
1198f4b3ec61Sdh 		    check_exempt_conflict_v4(connp, ipst))
119945916cd2Sjpk 			return (EADDRINUSE);
120045916cd2Sjpk 		/* FALLTHROUGH */
120145916cd2Sjpk 	case IPPROTO_UDP:
12027c478bd9Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
1203f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_udp_fanout[
1204f4b3ec61Sdh 			    IPCL_UDP_HASH(lport, ipst)];
12057c478bd9Sstevel@tonic-gate 		} else {
1206*bd670b35SErik Nordmark 			connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
12077c478bd9Sstevel@tonic-gate 		}
12087c478bd9Sstevel@tonic-gate 
1209*bd670b35SErik Nordmark 		if (connp->conn_faddr_v4 != INADDR_ANY) {
12107c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1211*bd670b35SErik Nordmark 		} else if (connp->conn_laddr_v4 != INADDR_ANY) {
12127c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12137c478bd9Sstevel@tonic-gate 		} else {
12147c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12157c478bd9Sstevel@tonic-gate 		}
1216*bd670b35SErik Nordmark 		if (protocol == IPPROTO_RSVP)
1217*bd670b35SErik Nordmark 			ill_set_inputfn_all(ipst);
12187c478bd9Sstevel@tonic-gate 		break;
12197c478bd9Sstevel@tonic-gate 
12207c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
12217c478bd9Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
122245916cd2Sjpk 		ASSERT(connp->conn_zoneid != ALL_ZONES);
1223f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_bind_fanout[
1224f4b3ec61Sdh 		    IPCL_BIND_HASH(lport, ipst)];
1225*bd670b35SErik Nordmark 		if (connp->conn_laddr_v4 != INADDR_ANY) {
12267c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12277c478bd9Sstevel@tonic-gate 		} else {
12287c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12297c478bd9Sstevel@tonic-gate 		}
12307c478bd9Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
1231*bd670b35SErik Nordmark 			ASSERT(connp->conn_ipversion == IPV4_VERSION);
12327c478bd9Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
12338e4b770fSLu Huafeng 			(*cl_inet_listen)(
12348e4b770fSLu Huafeng 			    connp->conn_netstack->netstack_stackid,
12358e4b770fSLu Huafeng 			    IPPROTO_TCP, AF_INET,
1236*bd670b35SErik Nordmark 			    (uint8_t *)&connp->conn_bound_addr_v4, lport, NULL);
12377c478bd9Sstevel@tonic-gate 		}
12387c478bd9Sstevel@tonic-gate 		break;
12397c478bd9Sstevel@tonic-gate 
12407c478bd9Sstevel@tonic-gate 	case IPPROTO_SCTP:
12417c478bd9Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
12427c478bd9Sstevel@tonic-gate 		break;
12437c478bd9Sstevel@tonic-gate 	}
12447c478bd9Sstevel@tonic-gate 
12457c478bd9Sstevel@tonic-gate 	return (ret);
12467c478bd9Sstevel@tonic-gate }
12477c478bd9Sstevel@tonic-gate 
12487c478bd9Sstevel@tonic-gate int
1249*bd670b35SErik Nordmark ipcl_bind_insert_v6(conn_t *connp)
12507c478bd9Sstevel@tonic-gate {
12512b24ab6bSSebastien Roy 	connf_t		*connfp;
12522b24ab6bSSebastien Roy 	int		ret = 0;
1253f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1254*bd670b35SErik Nordmark 	uint16_t	lport = connp->conn_lport;
1255*bd670b35SErik Nordmark 	uint8_t		protocol = connp->conn_proto;
12567c478bd9Sstevel@tonic-gate 
12572b24ab6bSSebastien Roy 	if (IPCL_IS_IPTUN(connp)) {
1258*bd670b35SErik Nordmark 		return (ipcl_iptun_hash_insert_v6(connp, ipst));
12592b24ab6bSSebastien Roy 	}
12602b24ab6bSSebastien Roy 
12617c478bd9Sstevel@tonic-gate 	switch (protocol) {
12627c478bd9Sstevel@tonic-gate 	default:
1263f4b3ec61Sdh 		if (is_system_labeled() &&
1264f4b3ec61Sdh 		    check_exempt_conflict_v6(connp, ipst))
126545916cd2Sjpk 			return (EADDRINUSE);
126645916cd2Sjpk 		/* FALLTHROUGH */
126745916cd2Sjpk 	case IPPROTO_UDP:
12687c478bd9Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
1269f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_udp_fanout[
1270f4b3ec61Sdh 			    IPCL_UDP_HASH(lport, ipst)];
12717c478bd9Sstevel@tonic-gate 		} else {
1272f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
12737c478bd9Sstevel@tonic-gate 		}
12747c478bd9Sstevel@tonic-gate 
1275*bd670b35SErik Nordmark 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
12767c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1277*bd670b35SErik Nordmark 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
12787c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12797c478bd9Sstevel@tonic-gate 		} else {
12807c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12817c478bd9Sstevel@tonic-gate 		}
12827c478bd9Sstevel@tonic-gate 		break;
12837c478bd9Sstevel@tonic-gate 
12847c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
12857c478bd9Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
128645916cd2Sjpk 		ASSERT(connp->conn_zoneid != ALL_ZONES);
1287f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_bind_fanout[
1288f4b3ec61Sdh 		    IPCL_BIND_HASH(lport, ipst)];
1289*bd670b35SErik Nordmark 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
12907c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12917c478bd9Sstevel@tonic-gate 		} else {
12927c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12937c478bd9Sstevel@tonic-gate 		}
12947c478bd9Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
12957c478bd9Sstevel@tonic-gate 			sa_family_t	addr_family;
12967c478bd9Sstevel@tonic-gate 			uint8_t		*laddrp;
12977c478bd9Sstevel@tonic-gate 
1298*bd670b35SErik Nordmark 			if (connp->conn_ipversion == IPV6_VERSION) {
12997c478bd9Sstevel@tonic-gate 				addr_family = AF_INET6;
13007c478bd9Sstevel@tonic-gate 				laddrp =
1301*bd670b35SErik Nordmark 				    (uint8_t *)&connp->conn_bound_addr_v6;
13027c478bd9Sstevel@tonic-gate 			} else {
13037c478bd9Sstevel@tonic-gate 				addr_family = AF_INET;
1304*bd670b35SErik Nordmark 				laddrp = (uint8_t *)&connp->conn_bound_addr_v4;
13057c478bd9Sstevel@tonic-gate 			}
13067c478bd9Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
13078e4b770fSLu Huafeng 			(*cl_inet_listen)(
13088e4b770fSLu Huafeng 			    connp->conn_netstack->netstack_stackid,
13098e4b770fSLu Huafeng 			    IPPROTO_TCP, addr_family, laddrp, lport, NULL);
13107c478bd9Sstevel@tonic-gate 		}
13117c478bd9Sstevel@tonic-gate 		break;
13127c478bd9Sstevel@tonic-gate 
13137c478bd9Sstevel@tonic-gate 	case IPPROTO_SCTP:
13147c478bd9Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
13157c478bd9Sstevel@tonic-gate 		break;
13167c478bd9Sstevel@tonic-gate 	}
13177c478bd9Sstevel@tonic-gate 
13187c478bd9Sstevel@tonic-gate 	return (ret);
13197c478bd9Sstevel@tonic-gate }
13207c478bd9Sstevel@tonic-gate 
13217c478bd9Sstevel@tonic-gate /*
13227c478bd9Sstevel@tonic-gate  * ipcl_conn_hash insertion routines.
1323*bd670b35SErik Nordmark  * The caller has already set conn_proto and the addresses/ports in the conn_t.
13247c478bd9Sstevel@tonic-gate  */
1325*bd670b35SErik Nordmark 
1326*bd670b35SErik Nordmark int
1327*bd670b35SErik Nordmark ipcl_conn_insert(conn_t *connp)
1328*bd670b35SErik Nordmark {
1329*bd670b35SErik Nordmark 	if (connp->conn_ipversion == IPV6_VERSION)
1330*bd670b35SErik Nordmark 		return (ipcl_conn_insert_v6(connp));
1331*bd670b35SErik Nordmark 	else
1332*bd670b35SErik Nordmark 		return (ipcl_conn_insert_v4(connp));
1333*bd670b35SErik Nordmark }
1334*bd670b35SErik Nordmark 
13357c478bd9Sstevel@tonic-gate int
1336*bd670b35SErik Nordmark ipcl_conn_insert_v4(conn_t *connp)
13377c478bd9Sstevel@tonic-gate {
13387c478bd9Sstevel@tonic-gate 	connf_t		*connfp;
13397c478bd9Sstevel@tonic-gate 	conn_t		*tconnp;
13407c478bd9Sstevel@tonic-gate 	int		ret = 0;
1341f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1342*bd670b35SErik Nordmark 	uint16_t	lport = connp->conn_lport;
1343*bd670b35SErik Nordmark 	uint8_t		protocol = connp->conn_proto;
13447c478bd9Sstevel@tonic-gate 
13452b24ab6bSSebastien Roy 	if (IPCL_IS_IPTUN(connp))
1346*bd670b35SErik Nordmark 		return (ipcl_iptun_hash_insert(connp, ipst));
13472b24ab6bSSebastien Roy 
13487c478bd9Sstevel@tonic-gate 	switch (protocol) {
13497c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
1350a12220b3SJon Anderson 		/*
1351*bd670b35SErik Nordmark 		 * For TCP, we check whether the connection tuple already
1352a12220b3SJon Anderson 		 * exists before allowing the connection to proceed.  We
1353a12220b3SJon Anderson 		 * also allow indexing on the zoneid. This is to allow
1354a12220b3SJon Anderson 		 * multiple shared stack zones to have the same tcp
1355a12220b3SJon Anderson 		 * connection tuple. In practice this only happens for
1356a12220b3SJon Anderson 		 * INADDR_LOOPBACK as it's the only local address which
1357a12220b3SJon Anderson 		 * doesn't have to be unique.
1358a12220b3SJon Anderson 		 */
1359f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_conn_fanout[
1360*bd670b35SErik Nordmark 		    IPCL_CONN_HASH(connp->conn_faddr_v4,
1361f4b3ec61Sdh 		    connp->conn_ports, ipst)];
13627c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
13637c478bd9Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
13647c478bd9Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
1365*bd670b35SErik Nordmark 			if (IPCL_CONN_MATCH(tconnp, connp->conn_proto,
1366*bd670b35SErik Nordmark 			    connp->conn_faddr_v4, connp->conn_laddr_v4,
1367*bd670b35SErik Nordmark 			    connp->conn_ports) &&
1368*bd670b35SErik Nordmark 			    IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
13697c478bd9Sstevel@tonic-gate 				/* Already have a conn. bail out */
13707c478bd9Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
13717c478bd9Sstevel@tonic-gate 				return (EADDRINUSE);
13727c478bd9Sstevel@tonic-gate 			}
13737c478bd9Sstevel@tonic-gate 		}
13747c478bd9Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
13757c478bd9Sstevel@tonic-gate 			/*
13767c478bd9Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
13777c478bd9Sstevel@tonic-gate 			 * rebind. Let it happen.
13787c478bd9Sstevel@tonic-gate 			 */
13797c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
13807c478bd9Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
13817c478bd9Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
13827c478bd9Sstevel@tonic-gate 		}
1383866ba9ddSjprakash 
1384866ba9ddSjprakash 		ASSERT(connp->conn_recv != NULL);
1385*bd670b35SErik Nordmark 		ASSERT(connp->conn_recvicmp != NULL);
1386866ba9ddSjprakash 
13877c478bd9Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
13887c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
13897c478bd9Sstevel@tonic-gate 		break;
13907c478bd9Sstevel@tonic-gate 
13917c478bd9Sstevel@tonic-gate 	case IPPROTO_SCTP:
13927c0c0508Skcpoon 		/*
13937c0c0508Skcpoon 		 * The raw socket may have already been bound, remove it
13947c0c0508Skcpoon 		 * from the hash first.
13957c0c0508Skcpoon 		 */
13967c0c0508Skcpoon 		IPCL_HASH_REMOVE(connp);
13977c478bd9Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
13987c478bd9Sstevel@tonic-gate 		break;
13997c478bd9Sstevel@tonic-gate 
14007c478bd9Sstevel@tonic-gate 	default:
140145916cd2Sjpk 		/*
140245916cd2Sjpk 		 * Check for conflicts among MAC exempt bindings.  For
140345916cd2Sjpk 		 * transports with port numbers, this is done by the upper
140445916cd2Sjpk 		 * level per-transport binding logic.  For all others, it's
140545916cd2Sjpk 		 * done here.
140645916cd2Sjpk 		 */
1407f4b3ec61Sdh 		if (is_system_labeled() &&
1408f4b3ec61Sdh 		    check_exempt_conflict_v4(connp, ipst))
140945916cd2Sjpk 			return (EADDRINUSE);
141045916cd2Sjpk 		/* FALLTHROUGH */
141145916cd2Sjpk 
141245916cd2Sjpk 	case IPPROTO_UDP:
14137c478bd9Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
1414f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_udp_fanout[
1415*bd670b35SErik Nordmark 			    IPCL_UDP_HASH(lport, ipst)];
14167c478bd9Sstevel@tonic-gate 		} else {
1417*bd670b35SErik Nordmark 			connfp = &ipst->ips_ipcl_proto_fanout_v4[protocol];
14187c478bd9Sstevel@tonic-gate 		}
14197c478bd9Sstevel@tonic-gate 
1420*bd670b35SErik Nordmark 		if (connp->conn_faddr_v4 != INADDR_ANY) {
14217c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1422*bd670b35SErik Nordmark 		} else if (connp->conn_laddr_v4 != INADDR_ANY) {
14237c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
14247c478bd9Sstevel@tonic-gate 		} else {
14257c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
14267c478bd9Sstevel@tonic-gate 		}
14277c478bd9Sstevel@tonic-gate 		break;
14287c478bd9Sstevel@tonic-gate 	}
14297c478bd9Sstevel@tonic-gate 
14307c478bd9Sstevel@tonic-gate 	return (ret);
14317c478bd9Sstevel@tonic-gate }
14327c478bd9Sstevel@tonic-gate 
14337c478bd9Sstevel@tonic-gate int
1434*bd670b35SErik Nordmark ipcl_conn_insert_v6(conn_t *connp)
14357c478bd9Sstevel@tonic-gate {
14367c478bd9Sstevel@tonic-gate 	connf_t		*connfp;
14377c478bd9Sstevel@tonic-gate 	conn_t		*tconnp;
14387c478bd9Sstevel@tonic-gate 	int		ret = 0;
1439f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
1440*bd670b35SErik Nordmark 	uint16_t	lport = connp->conn_lport;
1441*bd670b35SErik Nordmark 	uint8_t		protocol = connp->conn_proto;
1442*bd670b35SErik Nordmark 	uint_t		ifindex = connp->conn_bound_if;
14437c478bd9Sstevel@tonic-gate 
14442b24ab6bSSebastien Roy 	if (IPCL_IS_IPTUN(connp))
1445*bd670b35SErik Nordmark 		return (ipcl_iptun_hash_insert_v6(connp, ipst));
14462b24ab6bSSebastien Roy 
14477c478bd9Sstevel@tonic-gate 	switch (protocol) {
14487c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
1449a12220b3SJon Anderson 
1450a12220b3SJon Anderson 		/*
1451a12220b3SJon Anderson 		 * For tcp, we check whether the connection tuple already
1452a12220b3SJon Anderson 		 * exists before allowing the connection to proceed.  We
1453a12220b3SJon Anderson 		 * also allow indexing on the zoneid. This is to allow
1454a12220b3SJon Anderson 		 * multiple shared stack zones to have the same tcp
1455a12220b3SJon Anderson 		 * connection tuple. In practice this only happens for
1456a12220b3SJon Anderson 		 * ipv6_loopback as it's the only local address which
1457a12220b3SJon Anderson 		 * doesn't have to be unique.
1458a12220b3SJon Anderson 		 */
1459f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_conn_fanout[
1460*bd670b35SErik Nordmark 		    IPCL_CONN_HASH_V6(connp->conn_faddr_v6, connp->conn_ports,
1461f4b3ec61Sdh 		    ipst)];
14627c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
14637c478bd9Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
14647c478bd9Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
1465*bd670b35SErik Nordmark 			/* NOTE: need to match zoneid. Bug in onnv-gate */
1466*bd670b35SErik Nordmark 			if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_proto,
1467*bd670b35SErik Nordmark 			    connp->conn_faddr_v6, connp->conn_laddr_v6,
14687c478bd9Sstevel@tonic-gate 			    connp->conn_ports) &&
1469*bd670b35SErik Nordmark 			    (tconnp->conn_bound_if == 0 ||
1470*bd670b35SErik Nordmark 			    tconnp->conn_bound_if == ifindex) &&
1471*bd670b35SErik Nordmark 			    IPCL_ZONE_MATCH(tconnp, connp->conn_zoneid)) {
14727c478bd9Sstevel@tonic-gate 				/* Already have a conn. bail out */
14737c478bd9Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
14747c478bd9Sstevel@tonic-gate 				return (EADDRINUSE);
14757c478bd9Sstevel@tonic-gate 			}
14767c478bd9Sstevel@tonic-gate 		}
14777c478bd9Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
14787c478bd9Sstevel@tonic-gate 			/*
14797c478bd9Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
14807c478bd9Sstevel@tonic-gate 			 * rebind. Let it happen.
14817c478bd9Sstevel@tonic-gate 			 */
14827c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
14837c478bd9Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
14847c478bd9Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
14857c478bd9Sstevel@tonic-gate 		}
14867c478bd9Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
14877c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
14887c478bd9Sstevel@tonic-gate 		break;
14897c478bd9Sstevel@tonic-gate 
14907c478bd9Sstevel@tonic-gate 	case IPPROTO_SCTP:
14917c0c0508Skcpoon 		IPCL_HASH_REMOVE(connp);
14927c478bd9Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
14937c478bd9Sstevel@tonic-gate 		break;
14947c478bd9Sstevel@tonic-gate 
14957c478bd9Sstevel@tonic-gate 	default:
1496f4b3ec61Sdh 		if (is_system_labeled() &&
1497f4b3ec61Sdh 		    check_exempt_conflict_v6(connp, ipst))
149845916cd2Sjpk 			return (EADDRINUSE);
149945916cd2Sjpk 		/* FALLTHROUGH */
150045916cd2Sjpk 	case IPPROTO_UDP:
15017c478bd9Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
1502f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_udp_fanout[
1503*bd670b35SErik Nordmark 			    IPCL_UDP_HASH(lport, ipst)];
15047c478bd9Sstevel@tonic-gate 		} else {
1505f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
15067c478bd9Sstevel@tonic-gate 		}
15077c478bd9Sstevel@tonic-gate 
1508*bd670b35SErik Nordmark 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6)) {
15097c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
1510*bd670b35SErik Nordmark 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6)) {
15117c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
15127c478bd9Sstevel@tonic-gate 		} else {
15137c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
15147c478bd9Sstevel@tonic-gate 		}
15157c478bd9Sstevel@tonic-gate 		break;
15167c478bd9Sstevel@tonic-gate 	}
15177c478bd9Sstevel@tonic-gate 
15187c478bd9Sstevel@tonic-gate 	return (ret);
15197c478bd9Sstevel@tonic-gate }
15207c478bd9Sstevel@tonic-gate 
15217c478bd9Sstevel@tonic-gate /*
15227c478bd9Sstevel@tonic-gate  * v4 packet classifying function. looks up the fanout table to
15237c478bd9Sstevel@tonic-gate  * find the conn, the packet belongs to. returns the conn with
15247c478bd9Sstevel@tonic-gate  * the reference held, null otherwise.
152545916cd2Sjpk  *
152645916cd2Sjpk  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
152745916cd2Sjpk  * Lookup" comment block are applied.  Labels are also checked as described
152845916cd2Sjpk  * above.  If the packet is from the inside (looped back), and is from the same
152945916cd2Sjpk  * zone, then label checks are omitted.
15307c478bd9Sstevel@tonic-gate  */
15317c478bd9Sstevel@tonic-gate conn_t *
1532*bd670b35SErik Nordmark ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1533*bd670b35SErik Nordmark     ip_recv_attr_t *ira, ip_stack_t *ipst)
15347c478bd9Sstevel@tonic-gate {
15357c478bd9Sstevel@tonic-gate 	ipha_t	*ipha;
15367c478bd9Sstevel@tonic-gate 	connf_t	*connfp, *bind_connfp;
15377c478bd9Sstevel@tonic-gate 	uint16_t lport;
15387c478bd9Sstevel@tonic-gate 	uint16_t fport;
15397c478bd9Sstevel@tonic-gate 	uint32_t ports;
15407c478bd9Sstevel@tonic-gate 	conn_t	*connp;
15417c478bd9Sstevel@tonic-gate 	uint16_t  *up;
1542*bd670b35SErik Nordmark 	zoneid_t	zoneid = ira->ira_zoneid;
15437c478bd9Sstevel@tonic-gate 
15447c478bd9Sstevel@tonic-gate 	ipha = (ipha_t *)mp->b_rptr;
15457c478bd9Sstevel@tonic-gate 	up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
15467c478bd9Sstevel@tonic-gate 
15477c478bd9Sstevel@tonic-gate 	switch (protocol) {
15487c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
15497c478bd9Sstevel@tonic-gate 		ports = *(uint32_t *)up;
15507c478bd9Sstevel@tonic-gate 		connfp =
1551f4b3ec61Sdh 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1552f4b3ec61Sdh 		    ports, ipst)];
15537c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
15547c478bd9Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
15557c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
1556*bd670b35SErik Nordmark 			if (IPCL_CONN_MATCH(connp, protocol,
1557*bd670b35SErik Nordmark 			    ipha->ipha_src, ipha->ipha_dst, ports) &&
1558*bd670b35SErik Nordmark 			    (connp->conn_zoneid == zoneid ||
1559*bd670b35SErik Nordmark 			    connp->conn_allzones ||
1560*bd670b35SErik Nordmark 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1561*bd670b35SErik Nordmark 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1562*bd670b35SErik Nordmark 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
15637c478bd9Sstevel@tonic-gate 				break;
15647c478bd9Sstevel@tonic-gate 		}
15657c478bd9Sstevel@tonic-gate 
15667c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
156745916cd2Sjpk 			/*
156845916cd2Sjpk 			 * We have a fully-bound TCP connection.
156945916cd2Sjpk 			 *
157045916cd2Sjpk 			 * For labeled systems, there's no need to check the
157145916cd2Sjpk 			 * label here.  It's known to be good as we checked
157245916cd2Sjpk 			 * before allowing the connection to become bound.
157345916cd2Sjpk 			 */
15747c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
15757c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
15767c478bd9Sstevel@tonic-gate 			return (connp);
15777c478bd9Sstevel@tonic-gate 		}
15787c478bd9Sstevel@tonic-gate 
15797c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
15807c478bd9Sstevel@tonic-gate 		lport = up[1];
1581f4b3ec61Sdh 		bind_connfp =
1582f4b3ec61Sdh 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
15837c478bd9Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
15847c478bd9Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
15857c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
158645916cd2Sjpk 			if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
1587*bd670b35SErik Nordmark 			    lport) &&
1588*bd670b35SErik Nordmark 			    (connp->conn_zoneid == zoneid ||
1589*bd670b35SErik Nordmark 			    connp->conn_allzones ||
1590*bd670b35SErik Nordmark 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1591*bd670b35SErik Nordmark 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1592*bd670b35SErik Nordmark 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
15937c478bd9Sstevel@tonic-gate 				break;
15947c478bd9Sstevel@tonic-gate 		}
15957c478bd9Sstevel@tonic-gate 
159645916cd2Sjpk 		/*
159745916cd2Sjpk 		 * If the matching connection is SLP on a private address, then
159845916cd2Sjpk 		 * the label on the packet must match the local zone's label.
159945916cd2Sjpk 		 * Otherwise, it must be in the label range defined by tnrh.
1600*bd670b35SErik Nordmark 		 * This is ensured by tsol_receive_local.
1601*bd670b35SErik Nordmark 		 *
1602*bd670b35SErik Nordmark 		 * Note that we don't check tsol_receive_local for
1603*bd670b35SErik Nordmark 		 * the connected case.
160445916cd2Sjpk 		 */
1605*bd670b35SErik Nordmark 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
160645916cd2Sjpk 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1607*bd670b35SErik Nordmark 		    ira, connp)) {
1608*bd670b35SErik Nordmark 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp,
1609*bd670b35SErik Nordmark 			    char *, "connp(1) could not receive mp(2)",
1610*bd670b35SErik Nordmark 			    conn_t *, connp, mblk_t *, mp);
161145916cd2Sjpk 			connp = NULL;
161245916cd2Sjpk 		}
161345916cd2Sjpk 
16147c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
161545916cd2Sjpk 			/* Have a listener at least */
16167c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
16177c478bd9Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
16187c478bd9Sstevel@tonic-gate 			return (connp);
16197c478bd9Sstevel@tonic-gate 		}
16207c478bd9Sstevel@tonic-gate 
16217c478bd9Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
16227c478bd9Sstevel@tonic-gate 		break;
16237c478bd9Sstevel@tonic-gate 
16247c478bd9Sstevel@tonic-gate 	case IPPROTO_UDP:
16257c478bd9Sstevel@tonic-gate 		lport = up[1];
16267c478bd9Sstevel@tonic-gate 		fport = up[0];
1627f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
16287c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
16297c478bd9Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
16307c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
16317c478bd9Sstevel@tonic-gate 			if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
16327c478bd9Sstevel@tonic-gate 			    fport, ipha->ipha_src) &&
1633*bd670b35SErik Nordmark 			    (connp->conn_zoneid == zoneid ||
1634*bd670b35SErik Nordmark 			    connp->conn_allzones ||
1635*bd670b35SErik Nordmark 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1636*bd670b35SErik Nordmark 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE))))
16377c478bd9Sstevel@tonic-gate 				break;
16387c478bd9Sstevel@tonic-gate 		}
16397c478bd9Sstevel@tonic-gate 
1640*bd670b35SErik Nordmark 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
164145916cd2Sjpk 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
1642*bd670b35SErik Nordmark 		    ira, connp)) {
164345916cd2Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__udp,
164445916cd2Sjpk 			    char *, "connp(1) could not receive mp(2)",
164545916cd2Sjpk 			    conn_t *, connp, mblk_t *, mp);
164645916cd2Sjpk 			connp = NULL;
164745916cd2Sjpk 		}
164845916cd2Sjpk 
16497c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
16507c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
16517c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
16527c478bd9Sstevel@tonic-gate 			return (connp);
16537c478bd9Sstevel@tonic-gate 		}
16547c478bd9Sstevel@tonic-gate 
16557c478bd9Sstevel@tonic-gate 		/*
16567c478bd9Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
16577c478bd9Sstevel@tonic-gate 		 */
16587c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
1659*bd670b35SErik Nordmark 
16607c478bd9Sstevel@tonic-gate 		break;
16612b24ab6bSSebastien Roy 
16622b24ab6bSSebastien Roy 	case IPPROTO_ENCAP:
16632b24ab6bSSebastien Roy 	case IPPROTO_IPV6:
16642b24ab6bSSebastien Roy 		return (ipcl_iptun_classify_v4(&ipha->ipha_src,
16652b24ab6bSSebastien Roy 		    &ipha->ipha_dst, ipst));
16667c478bd9Sstevel@tonic-gate 	}
16677c478bd9Sstevel@tonic-gate 
16687c478bd9Sstevel@tonic-gate 	return (NULL);
16697c478bd9Sstevel@tonic-gate }
16707c478bd9Sstevel@tonic-gate 
16717c478bd9Sstevel@tonic-gate conn_t *
1672*bd670b35SErik Nordmark ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len,
1673*bd670b35SErik Nordmark     ip_recv_attr_t *ira, ip_stack_t *ipst)
16747c478bd9Sstevel@tonic-gate {
16757c478bd9Sstevel@tonic-gate 	ip6_t		*ip6h;
16767c478bd9Sstevel@tonic-gate 	connf_t		*connfp, *bind_connfp;
16777c478bd9Sstevel@tonic-gate 	uint16_t	lport;
16787c478bd9Sstevel@tonic-gate 	uint16_t	fport;
1679*bd670b35SErik Nordmark 	tcpha_t		*tcpha;
16807c478bd9Sstevel@tonic-gate 	uint32_t	ports;
16817c478bd9Sstevel@tonic-gate 	conn_t		*connp;
16827c478bd9Sstevel@tonic-gate 	uint16_t	*up;
1683*bd670b35SErik Nordmark 	zoneid_t	zoneid = ira->ira_zoneid;
16847c478bd9Sstevel@tonic-gate 
16857c478bd9Sstevel@tonic-gate 	ip6h = (ip6_t *)mp->b_rptr;
16867c478bd9Sstevel@tonic-gate 
16877c478bd9Sstevel@tonic-gate 	switch (protocol) {
16887c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
1689*bd670b35SErik Nordmark 		tcpha = (tcpha_t *)&mp->b_rptr[hdr_len];
1690*bd670b35SErik Nordmark 		up = &tcpha->tha_lport;
16917c478bd9Sstevel@tonic-gate 		ports = *(uint32_t *)up;
16927c478bd9Sstevel@tonic-gate 
16937c478bd9Sstevel@tonic-gate 		connfp =
1694f4b3ec61Sdh 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1695f4b3ec61Sdh 		    ports, ipst)];
16967c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
16977c478bd9Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
16987c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
1699*bd670b35SErik Nordmark 			if (IPCL_CONN_MATCH_V6(connp, protocol,
1700*bd670b35SErik Nordmark 			    ip6h->ip6_src, ip6h->ip6_dst, ports) &&
1701*bd670b35SErik Nordmark 			    (connp->conn_zoneid == zoneid ||
1702*bd670b35SErik Nordmark 			    connp->conn_allzones ||
1703*bd670b35SErik Nordmark 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1704*bd670b35SErik Nordmark 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1705*bd670b35SErik Nordmark 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
17067c478bd9Sstevel@tonic-gate 				break;
17077c478bd9Sstevel@tonic-gate 		}
17087c478bd9Sstevel@tonic-gate 
17097c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
171045916cd2Sjpk 			/*
171145916cd2Sjpk 			 * We have a fully-bound TCP connection.
171245916cd2Sjpk 			 *
171345916cd2Sjpk 			 * For labeled systems, there's no need to check the
171445916cd2Sjpk 			 * label here.  It's known to be good as we checked
171545916cd2Sjpk 			 * before allowing the connection to become bound.
171645916cd2Sjpk 			 */
17177c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
17187c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
17197c478bd9Sstevel@tonic-gate 			return (connp);
17207c478bd9Sstevel@tonic-gate 		}
17217c478bd9Sstevel@tonic-gate 
17227c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
17237c478bd9Sstevel@tonic-gate 
17247c478bd9Sstevel@tonic-gate 		lport = up[1];
1725f4b3ec61Sdh 		bind_connfp =
1726f4b3ec61Sdh 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
17277c478bd9Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
17287c478bd9Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
17297c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
17307c478bd9Sstevel@tonic-gate 			if (IPCL_BIND_MATCH_V6(connp, protocol,
17317c478bd9Sstevel@tonic-gate 			    ip6h->ip6_dst, lport) &&
1732*bd670b35SErik Nordmark 			    (connp->conn_zoneid == zoneid ||
1733*bd670b35SErik Nordmark 			    connp->conn_allzones ||
1734*bd670b35SErik Nordmark 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1735*bd670b35SErik Nordmark 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1736*bd670b35SErik Nordmark 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
17377c478bd9Sstevel@tonic-gate 				break;
17387c478bd9Sstevel@tonic-gate 		}
17397c478bd9Sstevel@tonic-gate 
1740*bd670b35SErik Nordmark 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
174145916cd2Sjpk 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1742*bd670b35SErik Nordmark 		    ira, connp)) {
174345916cd2Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
174445916cd2Sjpk 			    char *, "connp(1) could not receive mp(2)",
174545916cd2Sjpk 			    conn_t *, connp, mblk_t *, mp);
174645916cd2Sjpk 			connp = NULL;
174745916cd2Sjpk 		}
174845916cd2Sjpk 
17497c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
17507c478bd9Sstevel@tonic-gate 			/* Have a listner at least */
17517c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
17527c478bd9Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
17537c478bd9Sstevel@tonic-gate 			return (connp);
17547c478bd9Sstevel@tonic-gate 		}
17557c478bd9Sstevel@tonic-gate 
17567c478bd9Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
17577c478bd9Sstevel@tonic-gate 		break;
17587c478bd9Sstevel@tonic-gate 
17597c478bd9Sstevel@tonic-gate 	case IPPROTO_UDP:
17607c478bd9Sstevel@tonic-gate 		up = (uint16_t *)&mp->b_rptr[hdr_len];
17617c478bd9Sstevel@tonic-gate 		lport = up[1];
17627c478bd9Sstevel@tonic-gate 		fport = up[0];
1763f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
17647c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
17657c478bd9Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
17667c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
17677c478bd9Sstevel@tonic-gate 			if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
17687c478bd9Sstevel@tonic-gate 			    fport, ip6h->ip6_src) &&
1769*bd670b35SErik Nordmark 			    (connp->conn_zoneid == zoneid ||
1770*bd670b35SErik Nordmark 			    connp->conn_allzones ||
1771*bd670b35SErik Nordmark 			    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1772*bd670b35SErik Nordmark 			    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1773*bd670b35SErik Nordmark 			    (ira->ira_flags & IRAF_TX_SHARED_ADDR))))
17747c478bd9Sstevel@tonic-gate 				break;
17757c478bd9Sstevel@tonic-gate 		}
17767c478bd9Sstevel@tonic-gate 
1777*bd670b35SErik Nordmark 		if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
177845916cd2Sjpk 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
1779*bd670b35SErik Nordmark 		    ira, connp)) {
178045916cd2Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
178145916cd2Sjpk 			    char *, "connp(1) could not receive mp(2)",
178245916cd2Sjpk 			    conn_t *, connp, mblk_t *, mp);
178345916cd2Sjpk 			connp = NULL;
178445916cd2Sjpk 		}
178545916cd2Sjpk 
17867c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
17877c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
17887c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
17897c478bd9Sstevel@tonic-gate 			return (connp);
17907c478bd9Sstevel@tonic-gate 		}
17917c478bd9Sstevel@tonic-gate 
17927c478bd9Sstevel@tonic-gate 		/*
17937c478bd9Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
17947c478bd9Sstevel@tonic-gate 		 */
17957c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
17967c478bd9Sstevel@tonic-gate 		break;
17972b24ab6bSSebastien Roy 	case IPPROTO_ENCAP:
17982b24ab6bSSebastien Roy 	case IPPROTO_IPV6:
17992b24ab6bSSebastien Roy 		return (ipcl_iptun_classify_v6(&ip6h->ip6_src,
18002b24ab6bSSebastien Roy 		    &ip6h->ip6_dst, ipst));
18017c478bd9Sstevel@tonic-gate 	}
18027c478bd9Sstevel@tonic-gate 
18037c478bd9Sstevel@tonic-gate 	return (NULL);
18047c478bd9Sstevel@tonic-gate }
18057c478bd9Sstevel@tonic-gate 
18067c478bd9Sstevel@tonic-gate /*
18077c478bd9Sstevel@tonic-gate  * wrapper around ipcl_classify_(v4,v6) routines.
18087c478bd9Sstevel@tonic-gate  */
18097c478bd9Sstevel@tonic-gate conn_t *
1810*bd670b35SErik Nordmark ipcl_classify(mblk_t *mp, ip_recv_attr_t *ira, ip_stack_t *ipst)
18117c478bd9Sstevel@tonic-gate {
1812*bd670b35SErik Nordmark 	if (ira->ira_flags & IRAF_IS_IPV4) {
1813*bd670b35SErik Nordmark 		return (ipcl_classify_v4(mp, ira->ira_protocol,
1814*bd670b35SErik Nordmark 		    ira->ira_ip_hdr_length, ira, ipst));
1815*bd670b35SErik Nordmark 	} else {
1816*bd670b35SErik Nordmark 		return (ipcl_classify_v6(mp, ira->ira_protocol,
1817*bd670b35SErik Nordmark 		    ira->ira_ip_hdr_length, ira, ipst));
18187c478bd9Sstevel@tonic-gate 	}
18197c478bd9Sstevel@tonic-gate }
18207c478bd9Sstevel@tonic-gate 
1821*bd670b35SErik Nordmark /*
1822*bd670b35SErik Nordmark  * Only used to classify SCTP RAW sockets
1823*bd670b35SErik Nordmark  */
18247c478bd9Sstevel@tonic-gate conn_t *
1825*bd670b35SErik Nordmark ipcl_classify_raw(mblk_t *mp, uint8_t protocol, uint32_t ports,
1826*bd670b35SErik Nordmark     ipha_t *ipha, ip6_t *ip6h, ip_recv_attr_t *ira, ip_stack_t *ipst)
18277c478bd9Sstevel@tonic-gate {
182845916cd2Sjpk 	connf_t		*connfp;
18297c478bd9Sstevel@tonic-gate 	conn_t		*connp;
18307c478bd9Sstevel@tonic-gate 	in_port_t	lport;
1831*bd670b35SErik Nordmark 	int		ipversion;
183245916cd2Sjpk 	const void	*dst;
1833*bd670b35SErik Nordmark 	zoneid_t	zoneid = ira->ira_zoneid;
18347c478bd9Sstevel@tonic-gate 
18357c478bd9Sstevel@tonic-gate 	lport = ((uint16_t *)&ports)[1];
1836*bd670b35SErik Nordmark 	if (ira->ira_flags & IRAF_IS_IPV4) {
1837*bd670b35SErik Nordmark 		dst = (const void *)&ipha->ipha_dst;
1838*bd670b35SErik Nordmark 		ipversion = IPV4_VERSION;
1839*bd670b35SErik Nordmark 	} else {
1840*bd670b35SErik Nordmark 		dst = (const void *)&ip6h->ip6_dst;
1841*bd670b35SErik Nordmark 		ipversion = IPV6_VERSION;
184245916cd2Sjpk 	}
184345916cd2Sjpk 
1844f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
18457c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
18467c478bd9Sstevel@tonic-gate 	for (connp = connfp->connf_head; connp != NULL;
18477c478bd9Sstevel@tonic-gate 	    connp = connp->conn_next) {
18487c478bd9Sstevel@tonic-gate 		/* We don't allow v4 fallback for v6 raw socket. */
1849*bd670b35SErik Nordmark 		if (ipversion != connp->conn_ipversion)
18507c478bd9Sstevel@tonic-gate 			continue;
1851*bd670b35SErik Nordmark 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
1852*bd670b35SErik Nordmark 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
1853*bd670b35SErik Nordmark 			if (ipversion == IPV4_VERSION) {
185445916cd2Sjpk 				if (!IPCL_CONN_MATCH(connp, protocol,
1855*bd670b35SErik Nordmark 				    ipha->ipha_src, ipha->ipha_dst, ports))
185645916cd2Sjpk 					continue;
18577c478bd9Sstevel@tonic-gate 			} else {
185845916cd2Sjpk 				if (!IPCL_CONN_MATCH_V6(connp, protocol,
1859*bd670b35SErik Nordmark 				    ip6h->ip6_src, ip6h->ip6_dst, ports))
186045916cd2Sjpk 					continue;
18617c478bd9Sstevel@tonic-gate 			}
18627c478bd9Sstevel@tonic-gate 		} else {
1863*bd670b35SErik Nordmark 			if (ipversion == IPV4_VERSION) {
186445916cd2Sjpk 				if (!IPCL_BIND_MATCH(connp, protocol,
1865*bd670b35SErik Nordmark 				    ipha->ipha_dst, lport))
186645916cd2Sjpk 					continue;
18677c478bd9Sstevel@tonic-gate 			} else {
186845916cd2Sjpk 				if (!IPCL_BIND_MATCH_V6(connp, protocol,
1869*bd670b35SErik Nordmark 				    ip6h->ip6_dst, lport))
187045916cd2Sjpk 					continue;
18717c478bd9Sstevel@tonic-gate 			}
18727c478bd9Sstevel@tonic-gate 		}
187345916cd2Sjpk 
1874*bd670b35SErik Nordmark 		if (connp->conn_zoneid == zoneid ||
1875*bd670b35SErik Nordmark 		    connp->conn_allzones ||
1876*bd670b35SErik Nordmark 		    ((connp->conn_mac_mode != CONN_MAC_DEFAULT) &&
1877*bd670b35SErik Nordmark 		    (ira->ira_flags & IRAF_TX_MAC_EXEMPTABLE) &&
1878*bd670b35SErik Nordmark 		    (ira->ira_flags & IRAF_TX_SHARED_ADDR)))
187945916cd2Sjpk 			break;
188045916cd2Sjpk 	}
1881*bd670b35SErik Nordmark 
1882*bd670b35SErik Nordmark 	if (connp != NULL && (ira->ira_flags & IRAF_SYSTEM_LABELED) &&
1883*bd670b35SErik Nordmark 	    !tsol_receive_local(mp, dst, ipversion, ira, connp)) {
188445916cd2Sjpk 		DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
188545916cd2Sjpk 		    char *, "connp(1) could not receive mp(2)",
188645916cd2Sjpk 		    conn_t *, connp, mblk_t *, mp);
188745916cd2Sjpk 		connp = NULL;
18887c478bd9Sstevel@tonic-gate 	}
18897c0c0508Skcpoon 
18907c0c0508Skcpoon 	if (connp != NULL)
18917c0c0508Skcpoon 		goto found;
18927c0c0508Skcpoon 	mutex_exit(&connfp->connf_lock);
18937c0c0508Skcpoon 
1894*bd670b35SErik Nordmark 	/* Try to look for a wildcard SCTP RAW socket match. */
1895f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
18967c0c0508Skcpoon 	mutex_enter(&connfp->connf_lock);
18977c0c0508Skcpoon 	for (connp = connfp->connf_head; connp != NULL;
18987c0c0508Skcpoon 	    connp = connp->conn_next) {
18997c0c0508Skcpoon 		/* We don't allow v4 fallback for v6 raw socket. */
1900*bd670b35SErik Nordmark 		if (ipversion != connp->conn_ipversion)
19017c0c0508Skcpoon 			continue;
1902*bd670b35SErik Nordmark 		if (!IPCL_ZONE_MATCH(connp, zoneid))
1903*bd670b35SErik Nordmark 			continue;
1904*bd670b35SErik Nordmark 
1905*bd670b35SErik Nordmark 		if (ipversion == IPV4_VERSION) {
1906*bd670b35SErik Nordmark 			if (IPCL_RAW_MATCH(connp, protocol, ipha->ipha_dst))
19077c0c0508Skcpoon 				break;
19087c0c0508Skcpoon 		} else {
1909*bd670b35SErik Nordmark 			if (IPCL_RAW_MATCH_V6(connp, protocol, ip6h->ip6_dst)) {
19107c0c0508Skcpoon 				break;
19117c0c0508Skcpoon 			}
19127c0c0508Skcpoon 		}
19137c478bd9Sstevel@tonic-gate 	}
19147c0c0508Skcpoon 
19157c0c0508Skcpoon 	if (connp != NULL)
19167c0c0508Skcpoon 		goto found;
19177c0c0508Skcpoon 
19187c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
19197c478bd9Sstevel@tonic-gate 	return (NULL);
19207c0c0508Skcpoon 
19217c0c0508Skcpoon found:
19227c0c0508Skcpoon 	ASSERT(connp != NULL);
19237c0c0508Skcpoon 	CONN_INC_REF(connp);
19247c0c0508Skcpoon 	mutex_exit(&connfp->connf_lock);
19257c0c0508Skcpoon 	return (connp);
19267c478bd9Sstevel@tonic-gate }
19277c478bd9Sstevel@tonic-gate 
19287c478bd9Sstevel@tonic-gate /* ARGSUSED */
19297c478bd9Sstevel@tonic-gate static int
1930fc80c0dfSnordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
19317c478bd9Sstevel@tonic-gate {
19327c478bd9Sstevel@tonic-gate 	itc_t	*itc = (itc_t *)buf;
19337c478bd9Sstevel@tonic-gate 	conn_t 	*connp = &itc->itc_conn;
1934fc80c0dfSnordmark 	tcp_t	*tcp = (tcp_t *)&itc[1];
1935fc80c0dfSnordmark 
1936fc80c0dfSnordmark 	bzero(connp, sizeof (conn_t));
1937fc80c0dfSnordmark 	bzero(tcp, sizeof (tcp_t));
1938fc80c0dfSnordmark 
1939fc80c0dfSnordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1940fc80c0dfSnordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
19410f1702c5SYu Xiangning 	cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
1942*bd670b35SErik Nordmark 	tcp->tcp_timercache = tcp_timermp_alloc(kmflags);
1943*bd670b35SErik Nordmark 	if (tcp->tcp_timercache == NULL)
1944*bd670b35SErik Nordmark 		return (ENOMEM);
19457c478bd9Sstevel@tonic-gate 	connp->conn_tcp = tcp;
19467c478bd9Sstevel@tonic-gate 	connp->conn_flags = IPCL_TCPCONN;
1947*bd670b35SErik Nordmark 	connp->conn_proto = IPPROTO_TCP;
19487c478bd9Sstevel@tonic-gate 	tcp->tcp_connp = connp;
1949*bd670b35SErik Nordmark 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
1950*bd670b35SErik Nordmark 
1951*bd670b35SErik Nordmark 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
1952*bd670b35SErik Nordmark 	if (connp->conn_ixa == NULL) {
1953*bd670b35SErik Nordmark 		tcp_timermp_free(tcp);
1954*bd670b35SErik Nordmark 		return (ENOMEM);
1955*bd670b35SErik Nordmark 	}
1956*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_refcnt = 1;
1957*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
1958*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
19597c478bd9Sstevel@tonic-gate 	return (0);
19607c478bd9Sstevel@tonic-gate }
19617c478bd9Sstevel@tonic-gate 
19627c478bd9Sstevel@tonic-gate /* ARGSUSED */
19637c478bd9Sstevel@tonic-gate static void
1964fc80c0dfSnordmark tcp_conn_destructor(void *buf, void *cdrarg)
1965fc80c0dfSnordmark {
1966fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
1967fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
1968fc80c0dfSnordmark 	tcp_t	*tcp = (tcp_t *)&itc[1];
1969fc80c0dfSnordmark 
1970fc80c0dfSnordmark 	ASSERT(connp->conn_flags & IPCL_TCPCONN);
1971fc80c0dfSnordmark 	ASSERT(tcp->tcp_connp == connp);
1972fc80c0dfSnordmark 	ASSERT(connp->conn_tcp == tcp);
1973fc80c0dfSnordmark 	tcp_timermp_free(tcp);
1974fc80c0dfSnordmark 	mutex_destroy(&connp->conn_lock);
1975fc80c0dfSnordmark 	cv_destroy(&connp->conn_cv);
19760f1702c5SYu Xiangning 	cv_destroy(&connp->conn_sq_cv);
1977*bd670b35SErik Nordmark 	rw_destroy(&connp->conn_ilg_lock);
1978*bd670b35SErik Nordmark 
1979*bd670b35SErik Nordmark 	/* Can be NULL if constructor failed */
1980*bd670b35SErik Nordmark 	if (connp->conn_ixa != NULL) {
1981*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
1982*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
1983*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
1984*bd670b35SErik Nordmark 		ixa_refrele(connp->conn_ixa);
1985*bd670b35SErik Nordmark 	}
1986fc80c0dfSnordmark }
1987fc80c0dfSnordmark 
1988fc80c0dfSnordmark /* ARGSUSED */
1989fc80c0dfSnordmark static int
1990fc80c0dfSnordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
1991fc80c0dfSnordmark {
1992fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
1993fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
1994fc80c0dfSnordmark 
1995fc80c0dfSnordmark 	bzero(connp, sizeof (conn_t));
1996fc80c0dfSnordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
1997fc80c0dfSnordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
1998fc80c0dfSnordmark 	connp->conn_flags = IPCL_IPCCONN;
1999*bd670b35SErik Nordmark 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2000fc80c0dfSnordmark 
2001*bd670b35SErik Nordmark 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2002*bd670b35SErik Nordmark 	if (connp->conn_ixa == NULL)
2003*bd670b35SErik Nordmark 		return (ENOMEM);
2004*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_refcnt = 1;
2005*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2006fc80c0dfSnordmark 	return (0);
2007fc80c0dfSnordmark }
2008fc80c0dfSnordmark 
2009fc80c0dfSnordmark /* ARGSUSED */
2010fc80c0dfSnordmark static void
2011fc80c0dfSnordmark ip_conn_destructor(void *buf, void *cdrarg)
2012fc80c0dfSnordmark {
2013fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2014fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2015fc80c0dfSnordmark 
2016fc80c0dfSnordmark 	ASSERT(connp->conn_flags & IPCL_IPCCONN);
2017fc80c0dfSnordmark 	ASSERT(connp->conn_priv == NULL);
2018fc80c0dfSnordmark 	mutex_destroy(&connp->conn_lock);
2019fc80c0dfSnordmark 	cv_destroy(&connp->conn_cv);
2020*bd670b35SErik Nordmark 	rw_destroy(&connp->conn_ilg_lock);
2021*bd670b35SErik Nordmark 
2022*bd670b35SErik Nordmark 	/* Can be NULL if constructor failed */
2023*bd670b35SErik Nordmark 	if (connp->conn_ixa != NULL) {
2024*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2025*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
2026*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
2027*bd670b35SErik Nordmark 		ixa_refrele(connp->conn_ixa);
2028*bd670b35SErik Nordmark 	}
2029fc80c0dfSnordmark }
2030fc80c0dfSnordmark 
2031fc80c0dfSnordmark /* ARGSUSED */
2032fc80c0dfSnordmark static int
2033fc80c0dfSnordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2034fc80c0dfSnordmark {
2035fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2036fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2037fc80c0dfSnordmark 	udp_t	*udp = (udp_t *)&itc[1];
2038fc80c0dfSnordmark 
2039fc80c0dfSnordmark 	bzero(connp, sizeof (conn_t));
2040fc80c0dfSnordmark 	bzero(udp, sizeof (udp_t));
2041fc80c0dfSnordmark 
2042fc80c0dfSnordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2043fc80c0dfSnordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2044fc80c0dfSnordmark 	connp->conn_udp = udp;
2045fc80c0dfSnordmark 	connp->conn_flags = IPCL_UDPCONN;
2046*bd670b35SErik Nordmark 	connp->conn_proto = IPPROTO_UDP;
2047fc80c0dfSnordmark 	udp->udp_connp = connp;
2048*bd670b35SErik Nordmark 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2049*bd670b35SErik Nordmark 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2050*bd670b35SErik Nordmark 	if (connp->conn_ixa == NULL)
2051*bd670b35SErik Nordmark 		return (ENOMEM);
2052*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_refcnt = 1;
2053*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
2054*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2055fc80c0dfSnordmark 	return (0);
2056fc80c0dfSnordmark }
2057fc80c0dfSnordmark 
2058fc80c0dfSnordmark /* ARGSUSED */
2059fc80c0dfSnordmark static void
2060fc80c0dfSnordmark udp_conn_destructor(void *buf, void *cdrarg)
2061fc80c0dfSnordmark {
2062fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2063fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2064fc80c0dfSnordmark 	udp_t	*udp = (udp_t *)&itc[1];
2065fc80c0dfSnordmark 
2066fc80c0dfSnordmark 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
2067fc80c0dfSnordmark 	ASSERT(udp->udp_connp == connp);
2068fc80c0dfSnordmark 	ASSERT(connp->conn_udp == udp);
2069fc80c0dfSnordmark 	mutex_destroy(&connp->conn_lock);
2070fc80c0dfSnordmark 	cv_destroy(&connp->conn_cv);
2071*bd670b35SErik Nordmark 	rw_destroy(&connp->conn_ilg_lock);
2072*bd670b35SErik Nordmark 
2073*bd670b35SErik Nordmark 	/* Can be NULL if constructor failed */
2074*bd670b35SErik Nordmark 	if (connp->conn_ixa != NULL) {
2075*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2076*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
2077*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
2078*bd670b35SErik Nordmark 		ixa_refrele(connp->conn_ixa);
2079*bd670b35SErik Nordmark 	}
2080fc80c0dfSnordmark }
2081fc80c0dfSnordmark 
2082fc80c0dfSnordmark /* ARGSUSED */
2083fc80c0dfSnordmark static int
2084fc80c0dfSnordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2085fc80c0dfSnordmark {
2086fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2087fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2088fc80c0dfSnordmark 	icmp_t	*icmp = (icmp_t *)&itc[1];
2089fc80c0dfSnordmark 
2090fc80c0dfSnordmark 	bzero(connp, sizeof (conn_t));
2091fc80c0dfSnordmark 	bzero(icmp, sizeof (icmp_t));
2092fc80c0dfSnordmark 
2093fc80c0dfSnordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2094fc80c0dfSnordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2095fc80c0dfSnordmark 	connp->conn_icmp = icmp;
2096fc80c0dfSnordmark 	connp->conn_flags = IPCL_RAWIPCONN;
2097*bd670b35SErik Nordmark 	connp->conn_proto = IPPROTO_ICMP;
2098fc80c0dfSnordmark 	icmp->icmp_connp = connp;
2099*bd670b35SErik Nordmark 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2100*bd670b35SErik Nordmark 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2101*bd670b35SErik Nordmark 	if (connp->conn_ixa == NULL)
2102*bd670b35SErik Nordmark 		return (ENOMEM);
2103*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_refcnt = 1;
2104*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_protocol = connp->conn_proto;
2105*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2106fc80c0dfSnordmark 	return (0);
2107fc80c0dfSnordmark }
2108fc80c0dfSnordmark 
2109fc80c0dfSnordmark /* ARGSUSED */
2110fc80c0dfSnordmark static void
2111fc80c0dfSnordmark rawip_conn_destructor(void *buf, void *cdrarg)
2112fc80c0dfSnordmark {
2113fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2114fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2115fc80c0dfSnordmark 	icmp_t	*icmp = (icmp_t *)&itc[1];
2116fc80c0dfSnordmark 
2117fc80c0dfSnordmark 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2118fc80c0dfSnordmark 	ASSERT(icmp->icmp_connp == connp);
2119fc80c0dfSnordmark 	ASSERT(connp->conn_icmp == icmp);
2120fc80c0dfSnordmark 	mutex_destroy(&connp->conn_lock);
2121fc80c0dfSnordmark 	cv_destroy(&connp->conn_cv);
2122*bd670b35SErik Nordmark 	rw_destroy(&connp->conn_ilg_lock);
2123*bd670b35SErik Nordmark 
2124*bd670b35SErik Nordmark 	/* Can be NULL if constructor failed */
2125*bd670b35SErik Nordmark 	if (connp->conn_ixa != NULL) {
2126*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2127*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
2128*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
2129*bd670b35SErik Nordmark 		ixa_refrele(connp->conn_ixa);
2130*bd670b35SErik Nordmark 	}
2131fc80c0dfSnordmark }
2132fc80c0dfSnordmark 
2133fc80c0dfSnordmark /* ARGSUSED */
2134fc80c0dfSnordmark static int
2135fc80c0dfSnordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2136fc80c0dfSnordmark {
2137fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2138fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2139fc80c0dfSnordmark 	rts_t	*rts = (rts_t *)&itc[1];
2140fc80c0dfSnordmark 
2141fc80c0dfSnordmark 	bzero(connp, sizeof (conn_t));
2142fc80c0dfSnordmark 	bzero(rts, sizeof (rts_t));
2143fc80c0dfSnordmark 
2144fc80c0dfSnordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2145fc80c0dfSnordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2146fc80c0dfSnordmark 	connp->conn_rts = rts;
2147fc80c0dfSnordmark 	connp->conn_flags = IPCL_RTSCONN;
2148fc80c0dfSnordmark 	rts->rts_connp = connp;
2149*bd670b35SErik Nordmark 	rw_init(&connp->conn_ilg_lock, NULL, RW_DEFAULT, NULL);
2150*bd670b35SErik Nordmark 	connp->conn_ixa = kmem_zalloc(sizeof (ip_xmit_attr_t), kmflags);
2151*bd670b35SErik Nordmark 	if (connp->conn_ixa == NULL)
2152*bd670b35SErik Nordmark 		return (ENOMEM);
2153*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_refcnt = 1;
2154*bd670b35SErik Nordmark 	connp->conn_ixa->ixa_xmit_hint = CONN_TO_XMIT_HINT(connp);
2155fc80c0dfSnordmark 	return (0);
2156fc80c0dfSnordmark }
2157fc80c0dfSnordmark 
2158fc80c0dfSnordmark /* ARGSUSED */
2159fc80c0dfSnordmark static void
2160fc80c0dfSnordmark rts_conn_destructor(void *buf, void *cdrarg)
21617c478bd9Sstevel@tonic-gate {
2162fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2163fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2164fc80c0dfSnordmark 	rts_t	*rts = (rts_t *)&itc[1];
2165fc80c0dfSnordmark 
2166fc80c0dfSnordmark 	ASSERT(connp->conn_flags & IPCL_RTSCONN);
2167fc80c0dfSnordmark 	ASSERT(rts->rts_connp == connp);
2168fc80c0dfSnordmark 	ASSERT(connp->conn_rts == rts);
2169fc80c0dfSnordmark 	mutex_destroy(&connp->conn_lock);
2170fc80c0dfSnordmark 	cv_destroy(&connp->conn_cv);
2171*bd670b35SErik Nordmark 	rw_destroy(&connp->conn_ilg_lock);
2172*bd670b35SErik Nordmark 
2173*bd670b35SErik Nordmark 	/* Can be NULL if constructor failed */
2174*bd670b35SErik Nordmark 	if (connp->conn_ixa != NULL) {
2175*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_refcnt == 1);
2176*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_ire == NULL);
2177*bd670b35SErik Nordmark 		ASSERT(connp->conn_ixa->ixa_nce == NULL);
2178*bd670b35SErik Nordmark 		ixa_refrele(connp->conn_ixa);
21790f1702c5SYu Xiangning 	}
21800f1702c5SYu Xiangning }
21810f1702c5SYu Xiangning 
2182fc80c0dfSnordmark /*
2183fc80c0dfSnordmark  * Called as part of ipcl_conn_destroy to assert and clear any pointers
2184fc80c0dfSnordmark  * in the conn_t.
2185*bd670b35SErik Nordmark  *
2186*bd670b35SErik Nordmark  * Below we list all the pointers in the conn_t as a documentation aid.
2187*bd670b35SErik Nordmark  * The ones that we can not ASSERT to be NULL are #ifdef'ed out.
2188*bd670b35SErik Nordmark  * If you add any pointers to the conn_t please add an ASSERT here
2189*bd670b35SErik Nordmark  * and #ifdef it out if it can't be actually asserted to be NULL.
2190*bd670b35SErik Nordmark  * In any case, we bzero most of the conn_t at the end of the function.
2191fc80c0dfSnordmark  */
2192fc80c0dfSnordmark void
2193fc80c0dfSnordmark ipcl_conn_cleanup(conn_t *connp)
2194fc80c0dfSnordmark {
2195*bd670b35SErik Nordmark 	ip_xmit_attr_t	*ixa;
2196*bd670b35SErik Nordmark 
2197fc80c0dfSnordmark 	ASSERT(connp->conn_latch == NULL);
2198*bd670b35SErik Nordmark 	ASSERT(connp->conn_latch_in_policy == NULL);
2199*bd670b35SErik Nordmark 	ASSERT(connp->conn_latch_in_action == NULL);
2200fc80c0dfSnordmark #ifdef notdef
2201fc80c0dfSnordmark 	ASSERT(connp->conn_rq == NULL);
2202fc80c0dfSnordmark 	ASSERT(connp->conn_wq == NULL);
2203fc80c0dfSnordmark #endif
2204fc80c0dfSnordmark 	ASSERT(connp->conn_cred == NULL);
2205fc80c0dfSnordmark 	ASSERT(connp->conn_g_fanout == NULL);
2206fc80c0dfSnordmark 	ASSERT(connp->conn_g_next == NULL);
2207fc80c0dfSnordmark 	ASSERT(connp->conn_g_prev == NULL);
2208fc80c0dfSnordmark 	ASSERT(connp->conn_policy == NULL);
2209fc80c0dfSnordmark 	ASSERT(connp->conn_fanout == NULL);
2210fc80c0dfSnordmark 	ASSERT(connp->conn_next == NULL);
2211fc80c0dfSnordmark 	ASSERT(connp->conn_prev == NULL);
2212fc80c0dfSnordmark 	ASSERT(connp->conn_oper_pending_ill == NULL);
2213fc80c0dfSnordmark 	ASSERT(connp->conn_ilg == NULL);
2214fc80c0dfSnordmark 	ASSERT(connp->conn_drain_next == NULL);
2215fc80c0dfSnordmark 	ASSERT(connp->conn_drain_prev == NULL);
2216a9737be2Snordmark #ifdef notdef
2217a9737be2Snordmark 	/* conn_idl is not cleared when removed from idl list */
2218fc80c0dfSnordmark 	ASSERT(connp->conn_idl == NULL);
2219a9737be2Snordmark #endif
2220fc80c0dfSnordmark 	ASSERT(connp->conn_ipsec_opt_mp == NULL);
2221*bd670b35SErik Nordmark #ifdef notdef
2222*bd670b35SErik Nordmark 	/* conn_netstack is cleared by the caller; needed by ixa_cleanup */
2223fc80c0dfSnordmark 	ASSERT(connp->conn_netstack == NULL);
2224*bd670b35SErik Nordmark #endif
2225fc80c0dfSnordmark 
22260f1702c5SYu Xiangning 	ASSERT(connp->conn_helper_info == NULL);
2227*bd670b35SErik Nordmark 	ASSERT(connp->conn_ixa != NULL);
2228*bd670b35SErik Nordmark 	ixa = connp->conn_ixa;
2229*bd670b35SErik Nordmark 	ASSERT(ixa->ixa_refcnt == 1);
2230*bd670b35SErik Nordmark 	/* Need to preserve ixa_protocol */
2231*bd670b35SErik Nordmark 	ixa_cleanup(ixa);
2232*bd670b35SErik Nordmark 	ixa->ixa_flags = 0;
2233*bd670b35SErik Nordmark 
2234fc80c0dfSnordmark 	/* Clear out the conn_t fields that are not preserved */
2235fc80c0dfSnordmark 	bzero(&connp->conn_start_clr,
2236fc80c0dfSnordmark 	    sizeof (conn_t) -
2237fc80c0dfSnordmark 	    ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
22387c478bd9Sstevel@tonic-gate }
22397c478bd9Sstevel@tonic-gate 
22407c478bd9Sstevel@tonic-gate /*
22417c478bd9Sstevel@tonic-gate  * All conns are inserted in a global multi-list for the benefit of
22427c478bd9Sstevel@tonic-gate  * walkers. The walk is guaranteed to walk all open conns at the time
22437c478bd9Sstevel@tonic-gate  * of the start of the walk exactly once. This property is needed to
22447c478bd9Sstevel@tonic-gate  * achieve some cleanups during unplumb of interfaces. This is achieved
22457c478bd9Sstevel@tonic-gate  * as follows.
22467c478bd9Sstevel@tonic-gate  *
22477c478bd9Sstevel@tonic-gate  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
22487c478bd9Sstevel@tonic-gate  * call the insert and delete functions below at creation and deletion
22497c478bd9Sstevel@tonic-gate  * time respectively. The conn never moves or changes its position in this
22507c478bd9Sstevel@tonic-gate  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
22517c478bd9Sstevel@tonic-gate  * won't increase due to walkers, once the conn deletion has started. Note
22527c478bd9Sstevel@tonic-gate  * that we can't remove the conn from the global list and then wait for
22537c478bd9Sstevel@tonic-gate  * the refcnt to drop to zero, since walkers would then see a truncated
22547c478bd9Sstevel@tonic-gate  * list. CONN_INCIPIENT ensures that walkers don't start looking at
22557c478bd9Sstevel@tonic-gate  * conns until ip_open is ready to make them globally visible.
22567c478bd9Sstevel@tonic-gate  * The global round robin multi-list locks are held only to get the
22577c478bd9Sstevel@tonic-gate  * next member/insertion/deletion and contention should be negligible
22587c478bd9Sstevel@tonic-gate  * if the multi-list is much greater than the number of cpus.
22597c478bd9Sstevel@tonic-gate  */
22607c478bd9Sstevel@tonic-gate void
22617c478bd9Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp)
22627c478bd9Sstevel@tonic-gate {
22637c478bd9Sstevel@tonic-gate 	int	index;
2264f4b3ec61Sdh 	struct connf_s	*connfp;
2265f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
22667c478bd9Sstevel@tonic-gate 
22677c478bd9Sstevel@tonic-gate 	/*
22687c478bd9Sstevel@tonic-gate 	 * No need for atomic here. Approximate even distribution
22697c478bd9Sstevel@tonic-gate 	 * in the global lists is sufficient.
22707c478bd9Sstevel@tonic-gate 	 */
2271f4b3ec61Sdh 	ipst->ips_conn_g_index++;
2272f4b3ec61Sdh 	index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
22737c478bd9Sstevel@tonic-gate 
22747c478bd9Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
22757c478bd9Sstevel@tonic-gate 	/*
22767c478bd9Sstevel@tonic-gate 	 * Mark as INCIPIENT, so that walkers will ignore this
22777c478bd9Sstevel@tonic-gate 	 * for now, till ip_open is ready to make it visible globally.
22787c478bd9Sstevel@tonic-gate 	 */
22797c478bd9Sstevel@tonic-gate 	connp->conn_state_flags |= CONN_INCIPIENT;
22807c478bd9Sstevel@tonic-gate 
2281f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_globalhash_fanout[index];
22827c478bd9Sstevel@tonic-gate 	/* Insert at the head of the list */
2283f4b3ec61Sdh 	mutex_enter(&connfp->connf_lock);
2284f4b3ec61Sdh 	connp->conn_g_next = connfp->connf_head;
22857c478bd9Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
22867c478bd9Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp;
2287f4b3ec61Sdh 	connfp->connf_head = connp;
22887c478bd9Sstevel@tonic-gate 
22897c478bd9Sstevel@tonic-gate 	/* The fanout bucket this conn points to */
2290f4b3ec61Sdh 	connp->conn_g_fanout = connfp;
22917c478bd9Sstevel@tonic-gate 
2292f4b3ec61Sdh 	mutex_exit(&connfp->connf_lock);
22937c478bd9Sstevel@tonic-gate }
22947c478bd9Sstevel@tonic-gate 
22957c478bd9Sstevel@tonic-gate void
22967c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp)
22977c478bd9Sstevel@tonic-gate {
2298f4b3ec61Sdh 	struct connf_s	*connfp;
2299f4b3ec61Sdh 
23007c478bd9Sstevel@tonic-gate 	/*
23017c478bd9Sstevel@tonic-gate 	 * We were never inserted in the global multi list.
23027c478bd9Sstevel@tonic-gate 	 * IPCL_NONE variety is never inserted in the global multilist
23037c478bd9Sstevel@tonic-gate 	 * since it is presumed to not need any cleanup and is transient.
23047c478bd9Sstevel@tonic-gate 	 */
23057c478bd9Sstevel@tonic-gate 	if (connp->conn_g_fanout == NULL)
23067c478bd9Sstevel@tonic-gate 		return;
23077c478bd9Sstevel@tonic-gate 
2308f4b3ec61Sdh 	connfp = connp->conn_g_fanout;
2309f4b3ec61Sdh 	mutex_enter(&connfp->connf_lock);
23107c478bd9Sstevel@tonic-gate 	if (connp->conn_g_prev != NULL)
23117c478bd9Sstevel@tonic-gate 		connp->conn_g_prev->conn_g_next = connp->conn_g_next;
23127c478bd9Sstevel@tonic-gate 	else
2313f4b3ec61Sdh 		connfp->connf_head = connp->conn_g_next;
23147c478bd9Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
23157c478bd9Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2316f4b3ec61Sdh 	mutex_exit(&connfp->connf_lock);
23177c478bd9Sstevel@tonic-gate 
23187c478bd9Sstevel@tonic-gate 	/* Better to stumble on a null pointer than to corrupt memory */
23197c478bd9Sstevel@tonic-gate 	connp->conn_g_next = NULL;
23207c478bd9Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
2321fc80c0dfSnordmark 	connp->conn_g_fanout = NULL;
23227c478bd9Sstevel@tonic-gate }
23237c478bd9Sstevel@tonic-gate 
23247c478bd9Sstevel@tonic-gate /*
23257c478bd9Sstevel@tonic-gate  * Walk the list of all conn_t's in the system, calling the function provided
2326*bd670b35SErik Nordmark  * With the specified argument for each.
23277c478bd9Sstevel@tonic-gate  * Applies to both IPv4 and IPv6.
23287c478bd9Sstevel@tonic-gate  *
2329*bd670b35SErik Nordmark  * CONNs may hold pointers to ills (conn_dhcpinit_ill and
2330*bd670b35SErik Nordmark  * conn_oper_pending_ill). To guard against stale pointers
23317c478bd9Sstevel@tonic-gate  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
23327c478bd9Sstevel@tonic-gate  * unplumbed or removed. New conn_t's that are created while we are walking
23337c478bd9Sstevel@tonic-gate  * may be missed by this walk, because they are not necessarily inserted
23347c478bd9Sstevel@tonic-gate  * at the tail of the list. They are new conn_t's and thus don't have any
23357c478bd9Sstevel@tonic-gate  * stale pointers. The CONN_CLOSING flag ensures that no new reference
23367c478bd9Sstevel@tonic-gate  * is created to the struct that is going away.
23377c478bd9Sstevel@tonic-gate  */
23387c478bd9Sstevel@tonic-gate void
2339f4b3ec61Sdh ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
23407c478bd9Sstevel@tonic-gate {
23417c478bd9Sstevel@tonic-gate 	int	i;
23427c478bd9Sstevel@tonic-gate 	conn_t	*connp;
23437c478bd9Sstevel@tonic-gate 	conn_t	*prev_connp;
23447c478bd9Sstevel@tonic-gate 
23457c478bd9Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2346f4b3ec61Sdh 		mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23477c478bd9Sstevel@tonic-gate 		prev_connp = NULL;
2348f4b3ec61Sdh 		connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
23497c478bd9Sstevel@tonic-gate 		while (connp != NULL) {
23507c478bd9Sstevel@tonic-gate 			mutex_enter(&connp->conn_lock);
23517c478bd9Sstevel@tonic-gate 			if (connp->conn_state_flags &
23527c478bd9Sstevel@tonic-gate 			    (CONN_CONDEMNED | CONN_INCIPIENT)) {
23537c478bd9Sstevel@tonic-gate 				mutex_exit(&connp->conn_lock);
23547c478bd9Sstevel@tonic-gate 				connp = connp->conn_g_next;
23557c478bd9Sstevel@tonic-gate 				continue;
23567c478bd9Sstevel@tonic-gate 			}
23577c478bd9Sstevel@tonic-gate 			CONN_INC_REF_LOCKED(connp);
23587c478bd9Sstevel@tonic-gate 			mutex_exit(&connp->conn_lock);
2359f4b3ec61Sdh 			mutex_exit(
2360f4b3ec61Sdh 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23617c478bd9Sstevel@tonic-gate 			(*func)(connp, arg);
23627c478bd9Sstevel@tonic-gate 			if (prev_connp != NULL)
23637c478bd9Sstevel@tonic-gate 				CONN_DEC_REF(prev_connp);
2364f4b3ec61Sdh 			mutex_enter(
2365f4b3ec61Sdh 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23667c478bd9Sstevel@tonic-gate 			prev_connp = connp;
23677c478bd9Sstevel@tonic-gate 			connp = connp->conn_g_next;
23687c478bd9Sstevel@tonic-gate 		}
2369f4b3ec61Sdh 		mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
23707c478bd9Sstevel@tonic-gate 		if (prev_connp != NULL)
23717c478bd9Sstevel@tonic-gate 			CONN_DEC_REF(prev_connp);
23727c478bd9Sstevel@tonic-gate 	}
23737c478bd9Sstevel@tonic-gate }
23747c478bd9Sstevel@tonic-gate 
23757c478bd9Sstevel@tonic-gate /*
23767c478bd9Sstevel@tonic-gate  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
23777c478bd9Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
23787c478bd9Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2379d0ab37afSethindra  * (peer tcp in ESTABLISHED state).
23807c478bd9Sstevel@tonic-gate  */
23817c478bd9Sstevel@tonic-gate conn_t *
2382*bd670b35SErik Nordmark ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcpha_t *tcpha,
2383f4b3ec61Sdh     ip_stack_t *ipst)
23847c478bd9Sstevel@tonic-gate {
23857c478bd9Sstevel@tonic-gate 	uint32_t ports;
23867c478bd9Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
23877c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
23887c478bd9Sstevel@tonic-gate 	conn_t	*tconnp;
23897c478bd9Sstevel@tonic-gate 	boolean_t zone_chk;
23907c478bd9Sstevel@tonic-gate 
23917c478bd9Sstevel@tonic-gate 	/*
23927c478bd9Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
23937c478bd9Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
23947c478bd9Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
23957c478bd9Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.
23967c478bd9Sstevel@tonic-gate 	 */
23977c478bd9Sstevel@tonic-gate 	zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
23987c478bd9Sstevel@tonic-gate 	    ipha->ipha_dst == htonl(INADDR_LOOPBACK));
23997c478bd9Sstevel@tonic-gate 
2400*bd670b35SErik Nordmark 	pports[0] = tcpha->tha_fport;
2401*bd670b35SErik Nordmark 	pports[1] = tcpha->tha_lport;
24027c478bd9Sstevel@tonic-gate 
2403f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2404f4b3ec61Sdh 	    ports, ipst)];
24057c478bd9Sstevel@tonic-gate 
24067c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
24077c478bd9Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
24087c478bd9Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
24097c478bd9Sstevel@tonic-gate 
24107c478bd9Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
24117c478bd9Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
2412d0ab37afSethindra 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
24137c478bd9Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
24147c478bd9Sstevel@tonic-gate 
24157c478bd9Sstevel@tonic-gate 			ASSERT(tconnp != connp);
24167c478bd9Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
24177c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
24187c478bd9Sstevel@tonic-gate 			return (tconnp);
24197c478bd9Sstevel@tonic-gate 		}
24207c478bd9Sstevel@tonic-gate 	}
24217c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
24227c478bd9Sstevel@tonic-gate 	return (NULL);
24237c478bd9Sstevel@tonic-gate }
24247c478bd9Sstevel@tonic-gate 
24257c478bd9Sstevel@tonic-gate /*
24267c478bd9Sstevel@tonic-gate  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
24277c478bd9Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
24287c478bd9Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2429d0ab37afSethindra  * (peer tcp in ESTABLISHED state).
24307c478bd9Sstevel@tonic-gate  */
24317c478bd9Sstevel@tonic-gate conn_t *
2432*bd670b35SErik Nordmark ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcpha_t *tcpha,
2433f4b3ec61Sdh     ip_stack_t *ipst)
24347c478bd9Sstevel@tonic-gate {
24357c478bd9Sstevel@tonic-gate 	uint32_t ports;
24367c478bd9Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
24377c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
24387c478bd9Sstevel@tonic-gate 	conn_t	*tconnp;
24397c478bd9Sstevel@tonic-gate 	boolean_t zone_chk;
24407c478bd9Sstevel@tonic-gate 
24417c478bd9Sstevel@tonic-gate 	/*
24427c478bd9Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
24437c478bd9Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
24447c478bd9Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
24457c478bd9Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.  We
24467c478bd9Sstevel@tonic-gate 	 * don't do Zone check for link local address(es) because the
24477c478bd9Sstevel@tonic-gate 	 * current Zone implementation treats each link local address as
24487c478bd9Sstevel@tonic-gate 	 * being unique per system node, i.e. they belong to global Zone.
24497c478bd9Sstevel@tonic-gate 	 */
24507c478bd9Sstevel@tonic-gate 	zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
24517c478bd9Sstevel@tonic-gate 	    IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
24527c478bd9Sstevel@tonic-gate 
2453*bd670b35SErik Nordmark 	pports[0] = tcpha->tha_fport;
2454*bd670b35SErik Nordmark 	pports[1] = tcpha->tha_lport;
24557c478bd9Sstevel@tonic-gate 
2456f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2457f4b3ec61Sdh 	    ports, ipst)];
24587c478bd9Sstevel@tonic-gate 
24597c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
24607c478bd9Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
24617c478bd9Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
24627c478bd9Sstevel@tonic-gate 
2463*bd670b35SErik Nordmark 		/* We skip conn_bound_if check here as this is loopback tcp */
24647c478bd9Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
24657c478bd9Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2466d0ab37afSethindra 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
24677c478bd9Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
24687c478bd9Sstevel@tonic-gate 
24697c478bd9Sstevel@tonic-gate 			ASSERT(tconnp != connp);
24707c478bd9Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
24717c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
24727c478bd9Sstevel@tonic-gate 			return (tconnp);
24737c478bd9Sstevel@tonic-gate 		}
24747c478bd9Sstevel@tonic-gate 	}
24757c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
24767c478bd9Sstevel@tonic-gate 	return (NULL);
24777c478bd9Sstevel@tonic-gate }
24787c478bd9Sstevel@tonic-gate 
24797c478bd9Sstevel@tonic-gate /*
24807c478bd9Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
24817c478bd9Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
24827c478bd9Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
24837c478bd9Sstevel@tonic-gate  */
24847c478bd9Sstevel@tonic-gate conn_t *
2485*bd670b35SErik Nordmark ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcpha_t *tcpha, int min_state,
2486f4b3ec61Sdh     ip_stack_t *ipst)
24877c478bd9Sstevel@tonic-gate {
24887c478bd9Sstevel@tonic-gate 	uint32_t ports;
24897c478bd9Sstevel@tonic-gate 	uint16_t *pports;
24907c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
24917c478bd9Sstevel@tonic-gate 	conn_t	*tconnp;
24927c478bd9Sstevel@tonic-gate 
24937c478bd9Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
2494*bd670b35SErik Nordmark 	pports[0] = tcpha->tha_fport;
2495*bd670b35SErik Nordmark 	pports[1] = tcpha->tha_lport;
24967c478bd9Sstevel@tonic-gate 
2497f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2498121e5416Skcpoon 	    ports, ipst)];
24997c478bd9Sstevel@tonic-gate 
25007c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
25017c478bd9Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
25027c478bd9Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
25037c478bd9Sstevel@tonic-gate 
25047c478bd9Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
25057c478bd9Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
25067c478bd9Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= min_state) {
25077c478bd9Sstevel@tonic-gate 
25087c478bd9Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
25097c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
25107c478bd9Sstevel@tonic-gate 			return (tconnp);
25117c478bd9Sstevel@tonic-gate 		}
25127c478bd9Sstevel@tonic-gate 	}
25137c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
25147c478bd9Sstevel@tonic-gate 	return (NULL);
25157c478bd9Sstevel@tonic-gate }
25167c478bd9Sstevel@tonic-gate 
25177c478bd9Sstevel@tonic-gate /*
25187c478bd9Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
25197c478bd9Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
25207c478bd9Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
25217c478bd9Sstevel@tonic-gate  * Match on ifindex in addition to addresses.
25227c478bd9Sstevel@tonic-gate  */
25237c478bd9Sstevel@tonic-gate conn_t *
25247c478bd9Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2525f4b3ec61Sdh     uint_t ifindex, ip_stack_t *ipst)
25267c478bd9Sstevel@tonic-gate {
25277c478bd9Sstevel@tonic-gate 	tcp_t	*tcp;
25287c478bd9Sstevel@tonic-gate 	uint32_t ports;
25297c478bd9Sstevel@tonic-gate 	uint16_t *pports;
25307c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
25317c478bd9Sstevel@tonic-gate 	conn_t	*tconnp;
25327c478bd9Sstevel@tonic-gate 
25337c478bd9Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
25347c478bd9Sstevel@tonic-gate 	pports[0] = tcpha->tha_fport;
25357c478bd9Sstevel@tonic-gate 	pports[1] = tcpha->tha_lport;
25367c478bd9Sstevel@tonic-gate 
2537f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2538121e5416Skcpoon 	    ports, ipst)];
25397c478bd9Sstevel@tonic-gate 
25407c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
25417c478bd9Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
25427c478bd9Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
25437c478bd9Sstevel@tonic-gate 
25447c478bd9Sstevel@tonic-gate 		tcp = tconnp->conn_tcp;
25457c478bd9Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
25467c478bd9Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
25477c478bd9Sstevel@tonic-gate 		    tcp->tcp_state >= min_state &&
2548*bd670b35SErik Nordmark 		    (tconnp->conn_bound_if == 0 ||
2549*bd670b35SErik Nordmark 		    tconnp->conn_bound_if == ifindex)) {
25507c478bd9Sstevel@tonic-gate 
25517c478bd9Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
25527c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
25537c478bd9Sstevel@tonic-gate 			return (tconnp);
25547c478bd9Sstevel@tonic-gate 		}
25557c478bd9Sstevel@tonic-gate 	}
25567c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
25577c478bd9Sstevel@tonic-gate 	return (NULL);
25587c478bd9Sstevel@tonic-gate }
25597c478bd9Sstevel@tonic-gate 
25607c478bd9Sstevel@tonic-gate /*
256145916cd2Sjpk  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
256245916cd2Sjpk  * a listener when changing state.
25637c478bd9Sstevel@tonic-gate  */
25647c478bd9Sstevel@tonic-gate conn_t *
2565f4b3ec61Sdh ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2566f4b3ec61Sdh     ip_stack_t *ipst)
25677c478bd9Sstevel@tonic-gate {
25687c478bd9Sstevel@tonic-gate 	connf_t		*bind_connfp;
25697c478bd9Sstevel@tonic-gate 	conn_t		*connp;
25707c478bd9Sstevel@tonic-gate 	tcp_t		*tcp;
25717c478bd9Sstevel@tonic-gate 
25727c478bd9Sstevel@tonic-gate 	/*
25737c478bd9Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
25747c478bd9Sstevel@tonic-gate 	 * all zeros.
25757c478bd9Sstevel@tonic-gate 	 */
25767c478bd9Sstevel@tonic-gate 	if (laddr == 0)
25777c478bd9Sstevel@tonic-gate 		return (NULL);
25787c478bd9Sstevel@tonic-gate 
257945916cd2Sjpk 	ASSERT(zoneid != ALL_ZONES);
258045916cd2Sjpk 
2581f4b3ec61Sdh 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
25827c478bd9Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
25837c478bd9Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
25847c478bd9Sstevel@tonic-gate 	    connp = connp->conn_next) {
25857c478bd9Sstevel@tonic-gate 		tcp = connp->conn_tcp;
25867c478bd9Sstevel@tonic-gate 		if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
25875d0bc3edSsommerfe 		    IPCL_ZONE_MATCH(connp, zoneid) &&
25887c478bd9Sstevel@tonic-gate 		    (tcp->tcp_listener == NULL)) {
25897c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
25907c478bd9Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
25917c478bd9Sstevel@tonic-gate 			return (connp);
25927c478bd9Sstevel@tonic-gate 		}
25937c478bd9Sstevel@tonic-gate 	}
25947c478bd9Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
25957c478bd9Sstevel@tonic-gate 	return (NULL);
25967c478bd9Sstevel@tonic-gate }
25977c478bd9Sstevel@tonic-gate 
259845916cd2Sjpk /*
259945916cd2Sjpk  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
260045916cd2Sjpk  * a listener when changing state.
260145916cd2Sjpk  */
26027c478bd9Sstevel@tonic-gate conn_t *
26037c478bd9Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2604f4b3ec61Sdh     zoneid_t zoneid, ip_stack_t *ipst)
26057c478bd9Sstevel@tonic-gate {
26067c478bd9Sstevel@tonic-gate 	connf_t		*bind_connfp;
26077c478bd9Sstevel@tonic-gate 	conn_t		*connp = NULL;
26087c478bd9Sstevel@tonic-gate 	tcp_t		*tcp;
26097c478bd9Sstevel@tonic-gate 
26107c478bd9Sstevel@tonic-gate 	/*
26117c478bd9Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
26127c478bd9Sstevel@tonic-gate 	 * all zeros.
26137c478bd9Sstevel@tonic-gate 	 */
26147c478bd9Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(laddr))
26157c478bd9Sstevel@tonic-gate 		return (NULL);
26167c478bd9Sstevel@tonic-gate 
261745916cd2Sjpk 	ASSERT(zoneid != ALL_ZONES);
26187c478bd9Sstevel@tonic-gate 
2619f4b3ec61Sdh 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
26207c478bd9Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
26217c478bd9Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
26227c478bd9Sstevel@tonic-gate 	    connp = connp->conn_next) {
26237c478bd9Sstevel@tonic-gate 		tcp = connp->conn_tcp;
26247c478bd9Sstevel@tonic-gate 		if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
26255d0bc3edSsommerfe 		    IPCL_ZONE_MATCH(connp, zoneid) &&
2626*bd670b35SErik Nordmark 		    (connp->conn_bound_if == 0 ||
2627*bd670b35SErik Nordmark 		    connp->conn_bound_if == ifindex) &&
26287c478bd9Sstevel@tonic-gate 		    tcp->tcp_listener == NULL) {
26297c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
26307c478bd9Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
26317c478bd9Sstevel@tonic-gate 			return (connp);
26327c478bd9Sstevel@tonic-gate 		}
26337c478bd9Sstevel@tonic-gate 	}
26347c478bd9Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
26357c478bd9Sstevel@tonic-gate 	return (NULL);
26367c478bd9Sstevel@tonic-gate }
26377c478bd9Sstevel@tonic-gate 
2638ff550d0eSmasputra /*
2639ff550d0eSmasputra  * ipcl_get_next_conn
2640ff550d0eSmasputra  *	get the next entry in the conn global list
2641ff550d0eSmasputra  *	and put a reference on the next_conn.
2642ff550d0eSmasputra  *	decrement the reference on the current conn.
2643ff550d0eSmasputra  *
2644ff550d0eSmasputra  * This is an iterator based walker function that also provides for
2645ff550d0eSmasputra  * some selection by the caller. It walks through the conn_hash bucket
2646ff550d0eSmasputra  * searching for the next valid connp in the list, and selects connections
2647ff550d0eSmasputra  * that are neither closed nor condemned. It also REFHOLDS the conn
2648ff550d0eSmasputra  * thus ensuring that the conn exists when the caller uses the conn.
2649ff550d0eSmasputra  */
2650ff550d0eSmasputra conn_t *
2651ff550d0eSmasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2652ff550d0eSmasputra {
2653ff550d0eSmasputra 	conn_t	*next_connp;
2654ff550d0eSmasputra 
2655ff550d0eSmasputra 	if (connfp == NULL)
2656ff550d0eSmasputra 		return (NULL);
2657ff550d0eSmasputra 
2658ff550d0eSmasputra 	mutex_enter(&connfp->connf_lock);
2659ff550d0eSmasputra 
2660ff550d0eSmasputra 	next_connp = (connp == NULL) ?
2661ff550d0eSmasputra 	    connfp->connf_head : connp->conn_g_next;
2662ff550d0eSmasputra 
2663ff550d0eSmasputra 	while (next_connp != NULL) {
2664ff550d0eSmasputra 		mutex_enter(&next_connp->conn_lock);
2665ff550d0eSmasputra 		if (!(next_connp->conn_flags & conn_flags) ||
2666ff550d0eSmasputra 		    (next_connp->conn_state_flags &
2667ff550d0eSmasputra 		    (CONN_CONDEMNED | CONN_INCIPIENT))) {
2668ff550d0eSmasputra 			/*
2669ff550d0eSmasputra 			 * This conn has been condemned or
2670ff550d0eSmasputra 			 * is closing, or the flags don't match
2671ff550d0eSmasputra 			 */
2672ff550d0eSmasputra 			mutex_exit(&next_connp->conn_lock);
2673ff550d0eSmasputra 			next_connp = next_connp->conn_g_next;
2674ff550d0eSmasputra 			continue;
2675ff550d0eSmasputra 		}
2676ff550d0eSmasputra 		CONN_INC_REF_LOCKED(next_connp);
2677ff550d0eSmasputra 		mutex_exit(&next_connp->conn_lock);
2678ff550d0eSmasputra 		break;
2679ff550d0eSmasputra 	}
2680ff550d0eSmasputra 
2681ff550d0eSmasputra 	mutex_exit(&connfp->connf_lock);
2682ff550d0eSmasputra 
2683ff550d0eSmasputra 	if (connp != NULL)
2684ff550d0eSmasputra 		CONN_DEC_REF(connp);
2685ff550d0eSmasputra 
2686ff550d0eSmasputra 	return (next_connp);
2687ff550d0eSmasputra }
2688ff550d0eSmasputra 
26897c478bd9Sstevel@tonic-gate #ifdef CONN_DEBUG
26907c478bd9Sstevel@tonic-gate /*
26917c478bd9Sstevel@tonic-gate  * Trace of the last NBUF refhold/refrele
26927c478bd9Sstevel@tonic-gate  */
26937c478bd9Sstevel@tonic-gate int
26947c478bd9Sstevel@tonic-gate conn_trace_ref(conn_t *connp)
26957c478bd9Sstevel@tonic-gate {
26967c478bd9Sstevel@tonic-gate 	int	last;
26977c478bd9Sstevel@tonic-gate 	conn_trace_t	*ctb;
26987c478bd9Sstevel@tonic-gate 
26997c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
27007c478bd9Sstevel@tonic-gate 	last = connp->conn_trace_last;
27017c478bd9Sstevel@tonic-gate 	last++;
27027c478bd9Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
27037c478bd9Sstevel@tonic-gate 		last = 0;
27047c478bd9Sstevel@tonic-gate 
27057c478bd9Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
27066a8288c7Scarlsonj 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
27077c478bd9Sstevel@tonic-gate 	connp->conn_trace_last = last;
27087c478bd9Sstevel@tonic-gate 	return (1);
27097c478bd9Sstevel@tonic-gate }
27107c478bd9Sstevel@tonic-gate 
27117c478bd9Sstevel@tonic-gate int
27127c478bd9Sstevel@tonic-gate conn_untrace_ref(conn_t *connp)
27137c478bd9Sstevel@tonic-gate {
27147c478bd9Sstevel@tonic-gate 	int	last;
27157c478bd9Sstevel@tonic-gate 	conn_trace_t	*ctb;
27167c478bd9Sstevel@tonic-gate 
27177c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
27187c478bd9Sstevel@tonic-gate 	last = connp->conn_trace_last;
27197c478bd9Sstevel@tonic-gate 	last++;
27207c478bd9Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
27217c478bd9Sstevel@tonic-gate 		last = 0;
27227c478bd9Sstevel@tonic-gate 
27237c478bd9Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
27246a8288c7Scarlsonj 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
27257c478bd9Sstevel@tonic-gate 	connp->conn_trace_last = last;
27267c478bd9Sstevel@tonic-gate 	return (1);
27277c478bd9Sstevel@tonic-gate }
27287c478bd9Sstevel@tonic-gate #endif
2729