17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5ee4701baSericheng  * Common Development and Distribution License (the "License").
6ee4701baSericheng  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2274e20cfeSnh  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
277c478bd9Sstevel@tonic-gate 
2845916cd2Sjpk const char ipclassifier_version[] = "@(#)ipclassifier.c	%I%	%E% SMI";
297c478bd9Sstevel@tonic-gate 
307c478bd9Sstevel@tonic-gate /*
317c478bd9Sstevel@tonic-gate  * IP PACKET CLASSIFIER
327c478bd9Sstevel@tonic-gate  *
337c478bd9Sstevel@tonic-gate  * The IP packet classifier provides mapping between IP packets and persistent
347c478bd9Sstevel@tonic-gate  * connection state for connection-oriented protocols. It also provides
357c478bd9Sstevel@tonic-gate  * interface for managing connection states.
367c478bd9Sstevel@tonic-gate  *
377c478bd9Sstevel@tonic-gate  * The connection state is kept in conn_t data structure and contains, among
387c478bd9Sstevel@tonic-gate  * other things:
397c478bd9Sstevel@tonic-gate  *
407c478bd9Sstevel@tonic-gate  *	o local/remote address and ports
417c478bd9Sstevel@tonic-gate  *	o Transport protocol
427c478bd9Sstevel@tonic-gate  *	o squeue for the connection (for TCP only)
437c478bd9Sstevel@tonic-gate  *	o reference counter
447c478bd9Sstevel@tonic-gate  *	o Connection state
457c478bd9Sstevel@tonic-gate  *	o hash table linkage
467c478bd9Sstevel@tonic-gate  *	o interface/ire information
477c478bd9Sstevel@tonic-gate  *	o credentials
487c478bd9Sstevel@tonic-gate  *	o ipsec policy
497c478bd9Sstevel@tonic-gate  *	o send and receive functions.
507c478bd9Sstevel@tonic-gate  *	o mutex lock.
517c478bd9Sstevel@tonic-gate  *
527c478bd9Sstevel@tonic-gate  * Connections use a reference counting scheme. They are freed when the
537c478bd9Sstevel@tonic-gate  * reference counter drops to zero. A reference is incremented when connection
547c478bd9Sstevel@tonic-gate  * is placed in a list or table, when incoming packet for the connection arrives
557c478bd9Sstevel@tonic-gate  * and when connection is processed via squeue (squeue processing may be
567c478bd9Sstevel@tonic-gate  * asynchronous and the reference protects the connection from being destroyed
577c478bd9Sstevel@tonic-gate  * before its processing is finished).
587c478bd9Sstevel@tonic-gate  *
597c478bd9Sstevel@tonic-gate  * send and receive functions are currently used for TCP only. The send function
607c478bd9Sstevel@tonic-gate  * determines the IP entry point for the packet once it leaves TCP to be sent to
617c478bd9Sstevel@tonic-gate  * the destination address. The receive function is used by IP when the packet
627c478bd9Sstevel@tonic-gate  * should be passed for TCP processing. When a new connection is created these
637c478bd9Sstevel@tonic-gate  * are set to ip_output() and tcp_input() respectively. During the lifetime of
647c478bd9Sstevel@tonic-gate  * the connection the send and receive functions may change depending on the
657c478bd9Sstevel@tonic-gate  * changes in the connection state. For example, Once the connection is bound to
667c478bd9Sstevel@tonic-gate  * an addresse, the receive function for this connection is set to
677c478bd9Sstevel@tonic-gate  * tcp_conn_request().  This allows incoming SYNs to go directly into the
687c478bd9Sstevel@tonic-gate  * listener SYN processing function without going to tcp_input() first.
697c478bd9Sstevel@tonic-gate  *
707c478bd9Sstevel@tonic-gate  * Classifier uses several hash tables:
717c478bd9Sstevel@tonic-gate  *
727c478bd9Sstevel@tonic-gate  * 	ipcl_conn_fanout:	contains all TCP connections in CONNECTED state
737c478bd9Sstevel@tonic-gate  *	ipcl_bind_fanout:	contains all connections in BOUND state
747c478bd9Sstevel@tonic-gate  *	ipcl_proto_fanout:	IPv4 protocol fanout
757c478bd9Sstevel@tonic-gate  *	ipcl_proto_fanout_v6:	IPv6 protocol fanout
767c478bd9Sstevel@tonic-gate  *	ipcl_udp_fanout:	contains all UDP connections
777c478bd9Sstevel@tonic-gate  *	ipcl_globalhash_fanout:	contains all connections
787c478bd9Sstevel@tonic-gate  *
797c478bd9Sstevel@tonic-gate  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
807c478bd9Sstevel@tonic-gate  * which need to view all existing connections.
817c478bd9Sstevel@tonic-gate  *
827c478bd9Sstevel@tonic-gate  * All tables are protected by per-bucket locks. When both per-bucket lock and
837c478bd9Sstevel@tonic-gate  * connection lock need to be held, the per-bucket lock should be acquired
847c478bd9Sstevel@tonic-gate  * first, followed by the connection lock.
857c478bd9Sstevel@tonic-gate  *
867c478bd9Sstevel@tonic-gate  * All functions doing search in one of these tables increment a reference
877c478bd9Sstevel@tonic-gate  * counter on the connection found (if any). This reference should be dropped
887c478bd9Sstevel@tonic-gate  * when the caller has finished processing the connection.
897c478bd9Sstevel@tonic-gate  *
907c478bd9Sstevel@tonic-gate  *
917c478bd9Sstevel@tonic-gate  * INTERFACES:
927c478bd9Sstevel@tonic-gate  * ===========
937c478bd9Sstevel@tonic-gate  *
947c478bd9Sstevel@tonic-gate  * Connection Lookup:
957c478bd9Sstevel@tonic-gate  * ------------------
967c478bd9Sstevel@tonic-gate  *
97*f4b3ec61Sdh  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack)
98*f4b3ec61Sdh  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack)
997c478bd9Sstevel@tonic-gate  *
1007c478bd9Sstevel@tonic-gate  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
1017c478bd9Sstevel@tonic-gate  * it can't find any associated connection. If the connection is found, its
1027c478bd9Sstevel@tonic-gate  * reference counter is incremented.
1037c478bd9Sstevel@tonic-gate  *
1047c478bd9Sstevel@tonic-gate  *	mp:	mblock, containing packet header. The full header should fit
1057c478bd9Sstevel@tonic-gate  *		into a single mblock. It should also contain at least full IP
1067c478bd9Sstevel@tonic-gate  *		and TCP or UDP header.
1077c478bd9Sstevel@tonic-gate  *
1087c478bd9Sstevel@tonic-gate  *	protocol: Either IPPROTO_TCP or IPPROTO_UDP.
1097c478bd9Sstevel@tonic-gate  *
1107c478bd9Sstevel@tonic-gate  *	hdr_len: The size of IP header. It is used to find TCP or UDP header in
1117c478bd9Sstevel@tonic-gate  *		 the packet.
1127c478bd9Sstevel@tonic-gate  *
11345916cd2Sjpk  * 	zoneid: The zone in which the returned connection must be; the zoneid
11445916cd2Sjpk  *		corresponding to the ire_zoneid on the IRE located for the
11545916cd2Sjpk  *		packet's destination address.
1167c478bd9Sstevel@tonic-gate  *
1177c478bd9Sstevel@tonic-gate  *	For TCP connections, the lookup order is as follows:
1187c478bd9Sstevel@tonic-gate  *		5-tuple {src, dst, protocol, local port, remote port}
1197c478bd9Sstevel@tonic-gate  *			lookup in ipcl_conn_fanout table.
1207c478bd9Sstevel@tonic-gate  *		3-tuple {dst, remote port, protocol} lookup in
1217c478bd9Sstevel@tonic-gate  *			ipcl_bind_fanout table.
1227c478bd9Sstevel@tonic-gate  *
1237c478bd9Sstevel@tonic-gate  *	For UDP connections, a 5-tuple {src, dst, protocol, local port,
1247c478bd9Sstevel@tonic-gate  *	remote port} lookup is done on ipcl_udp_fanout. Note that,
1257c478bd9Sstevel@tonic-gate  *	these interfaces do not handle cases where a packets belongs
1267c478bd9Sstevel@tonic-gate  *	to multiple UDP clients, which is handled in IP itself.
1277c478bd9Sstevel@tonic-gate  *
12845916cd2Sjpk  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
12945916cd2Sjpk  * determine which actual zone gets the segment.  This is used only in a
13045916cd2Sjpk  * labeled environment.  The matching rules are:
13145916cd2Sjpk  *
13245916cd2Sjpk  *	- If it's not a multilevel port, then the label on the packet selects
13345916cd2Sjpk  *	  the zone.  Unlabeled packets are delivered to the global zone.
13445916cd2Sjpk  *
13545916cd2Sjpk  *	- If it's a multilevel port, then only the zone registered to receive
13645916cd2Sjpk  *	  packets on that port matches.
13745916cd2Sjpk  *
13845916cd2Sjpk  * Also, in a labeled environment, packet labels need to be checked.  For fully
13945916cd2Sjpk  * bound TCP connections, we can assume that the packet label was checked
14045916cd2Sjpk  * during connection establishment, and doesn't need to be checked on each
14145916cd2Sjpk  * packet.  For others, though, we need to check for strict equality or, for
14245916cd2Sjpk  * multilevel ports, membership in the range or set.  This part currently does
14345916cd2Sjpk  * a tnrh lookup on each packet, but could be optimized to use cached results
14445916cd2Sjpk  * if that were necessary.  (SCTP doesn't come through here, but if it did,
14545916cd2Sjpk  * we would apply the same rules as TCP.)
14645916cd2Sjpk  *
14745916cd2Sjpk  * An implication of the above is that fully-bound TCP sockets must always use
14845916cd2Sjpk  * distinct 4-tuples; they can't be discriminated by label alone.
14945916cd2Sjpk  *
15045916cd2Sjpk  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
15145916cd2Sjpk  * as there's no connection set-up handshake and no shared state.
15245916cd2Sjpk  *
15345916cd2Sjpk  * Labels on looped-back packets within a single zone do not need to be
15445916cd2Sjpk  * checked, as all processes in the same zone have the same label.
15545916cd2Sjpk  *
15645916cd2Sjpk  * Finally, for unlabeled packets received by a labeled system, special rules
15745916cd2Sjpk  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
15845916cd2Sjpk  * socket in the zone whose label matches the default label of the sender, if
15945916cd2Sjpk  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
16045916cd2Sjpk  * receiver's label must dominate the sender's default label.
16145916cd2Sjpk  *
162*f4b3ec61Sdh  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack);
163*f4b3ec61Sdh  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
164*f4b3ec61Sdh  *					 ip_stack);
1657c478bd9Sstevel@tonic-gate  *
1667c478bd9Sstevel@tonic-gate  *	Lookup routine to find a exact match for {src, dst, local port,
1677c478bd9Sstevel@tonic-gate  *	remote port) for TCP connections in ipcl_conn_fanout. The address and
1687c478bd9Sstevel@tonic-gate  *	ports are read from the IP and TCP header respectively.
1697c478bd9Sstevel@tonic-gate  *
170*f4b3ec61Sdh  * conn_t	*ipcl_lookup_listener_v4(lport, laddr, protocol,
171*f4b3ec61Sdh  *					 zoneid, ip_stack);
172*f4b3ec61Sdh  * conn_t	*ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
173*f4b3ec61Sdh  *					 zoneid, ip_stack);
1747c478bd9Sstevel@tonic-gate  *
1757c478bd9Sstevel@tonic-gate  * 	Lookup routine to find a listener with the tuple {lport, laddr,
1767c478bd9Sstevel@tonic-gate  * 	protocol} in the ipcl_bind_fanout table. For IPv6, an additional
1777c478bd9Sstevel@tonic-gate  * 	parameter interface index is also compared.
1787c478bd9Sstevel@tonic-gate  *
179*f4b3ec61Sdh  * void ipcl_walk(func, arg, ip_stack)
1807c478bd9Sstevel@tonic-gate  *
1817c478bd9Sstevel@tonic-gate  * 	Apply 'func' to every connection available. The 'func' is called as
1827c478bd9Sstevel@tonic-gate  *	(*func)(connp, arg). The walk is non-atomic so connections may be
1837c478bd9Sstevel@tonic-gate  *	created and destroyed during the walk. The CONN_CONDEMNED and
1847c478bd9Sstevel@tonic-gate  *	CONN_INCIPIENT flags ensure that connections which are newly created
1857c478bd9Sstevel@tonic-gate  *	or being destroyed are not selected by the walker.
1867c478bd9Sstevel@tonic-gate  *
1877c478bd9Sstevel@tonic-gate  * Table Updates
1887c478bd9Sstevel@tonic-gate  * -------------
1897c478bd9Sstevel@tonic-gate  *
1907c478bd9Sstevel@tonic-gate  * int ipcl_conn_insert(connp, protocol, src, dst, ports)
1917c478bd9Sstevel@tonic-gate  * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex)
1927c478bd9Sstevel@tonic-gate  *
1937c478bd9Sstevel@tonic-gate  *	Insert 'connp' in the ipcl_conn_fanout.
1947c478bd9Sstevel@tonic-gate  *	Arguements :
1957c478bd9Sstevel@tonic-gate  *		connp		conn_t to be inserted
1967c478bd9Sstevel@tonic-gate  *		protocol	connection protocol
1977c478bd9Sstevel@tonic-gate  *		src		source address
1987c478bd9Sstevel@tonic-gate  *		dst		destination address
1997c478bd9Sstevel@tonic-gate  *		ports		local and remote port
2007c478bd9Sstevel@tonic-gate  *		ifindex		interface index for IPv6 connections
2017c478bd9Sstevel@tonic-gate  *
2027c478bd9Sstevel@tonic-gate  *	Return value :
2037c478bd9Sstevel@tonic-gate  *		0		if connp was inserted
2047c478bd9Sstevel@tonic-gate  *		EADDRINUSE	if the connection with the same tuple
2057c478bd9Sstevel@tonic-gate  *				already exists.
2067c478bd9Sstevel@tonic-gate  *
2077c478bd9Sstevel@tonic-gate  * int ipcl_bind_insert(connp, protocol, src, lport);
2087c478bd9Sstevel@tonic-gate  * int ipcl_bind_insert_v6(connp, protocol, src, lport);
2097c478bd9Sstevel@tonic-gate  *
2107c478bd9Sstevel@tonic-gate  * 	Insert 'connp' in ipcl_bind_fanout.
2117c478bd9Sstevel@tonic-gate  * 	Arguements :
2127c478bd9Sstevel@tonic-gate  * 		connp		conn_t to be inserted
2137c478bd9Sstevel@tonic-gate  * 		protocol	connection protocol
2147c478bd9Sstevel@tonic-gate  * 		src		source address connection wants
2157c478bd9Sstevel@tonic-gate  * 				to bind to
2167c478bd9Sstevel@tonic-gate  * 		lport		local port connection wants to
2177c478bd9Sstevel@tonic-gate  * 				bind to
2187c478bd9Sstevel@tonic-gate  *
2197c478bd9Sstevel@tonic-gate  *
2207c478bd9Sstevel@tonic-gate  * void ipcl_hash_remove(connp);
2217c478bd9Sstevel@tonic-gate  *
2227c478bd9Sstevel@tonic-gate  * 	Removes the 'connp' from the connection fanout table.
2237c478bd9Sstevel@tonic-gate  *
2247c478bd9Sstevel@tonic-gate  * Connection Creation/Destruction
2257c478bd9Sstevel@tonic-gate  * -------------------------------
2267c478bd9Sstevel@tonic-gate  *
227*f4b3ec61Sdh  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
2287c478bd9Sstevel@tonic-gate  *
2297c478bd9Sstevel@tonic-gate  * 	Creates a new conn based on the type flag, inserts it into
2307c478bd9Sstevel@tonic-gate  * 	globalhash table.
2317c478bd9Sstevel@tonic-gate  *
2327c478bd9Sstevel@tonic-gate  *	type:	This flag determines the type of conn_t which needs to be
2337c478bd9Sstevel@tonic-gate  *		created.
2347c478bd9Sstevel@tonic-gate  *		IPCL_TCPCONN	indicates a TCP connection
2357c478bd9Sstevel@tonic-gate  *		IPCL_IPCONN	indicates all non-TCP connections.
2367c478bd9Sstevel@tonic-gate  *
2377c478bd9Sstevel@tonic-gate  * void ipcl_conn_destroy(connp)
2387c478bd9Sstevel@tonic-gate  *
2397c478bd9Sstevel@tonic-gate  * 	Destroys the connection state, removes it from the global
2407c478bd9Sstevel@tonic-gate  * 	connection hash table and frees its memory.
2417c478bd9Sstevel@tonic-gate  */
2427c478bd9Sstevel@tonic-gate 
2437c478bd9Sstevel@tonic-gate #include <sys/types.h>
2447c478bd9Sstevel@tonic-gate #include <sys/stream.h>
2457c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
2467c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
2477c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
2487c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
2497c478bd9Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
2507c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
2517c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
2527c478bd9Sstevel@tonic-gate #include <sys/debug.h>
2537c478bd9Sstevel@tonic-gate 
2547c478bd9Sstevel@tonic-gate #include <sys/systm.h>
2557c478bd9Sstevel@tonic-gate #include <sys/param.h>
2567c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
2577c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h>
2587c478bd9Sstevel@tonic-gate #include <inet/common.h>
2597c478bd9Sstevel@tonic-gate #include <netinet/ip6.h>
2607c478bd9Sstevel@tonic-gate #include <netinet/icmp6.h>
2617c478bd9Sstevel@tonic-gate 
2627c478bd9Sstevel@tonic-gate #include <inet/ip.h>
2637c478bd9Sstevel@tonic-gate #include <inet/ip6.h>
2647c478bd9Sstevel@tonic-gate #include <inet/tcp.h>
2657c478bd9Sstevel@tonic-gate #include <inet/ip_ndp.h>
266ff550d0eSmasputra #include <inet/udp_impl.h>
2677c478bd9Sstevel@tonic-gate #include <inet/sctp_ip.h>
268*f4b3ec61Sdh #include <inet/sctp/sctp_impl.h>
2697c478bd9Sstevel@tonic-gate 
2707c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
2717c478bd9Sstevel@tonic-gate 
2727c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h>
2737c478bd9Sstevel@tonic-gate #include <inet/ipsec_impl.h>
2747c478bd9Sstevel@tonic-gate 
27545916cd2Sjpk #include <sys/tsol/tnet.h>
27645916cd2Sjpk 
2777c478bd9Sstevel@tonic-gate #ifdef DEBUG
2787c478bd9Sstevel@tonic-gate #define	IPCL_DEBUG
2797c478bd9Sstevel@tonic-gate #else
2807c478bd9Sstevel@tonic-gate #undef	IPCL_DEBUG
2817c478bd9Sstevel@tonic-gate #endif
2827c478bd9Sstevel@tonic-gate 
2837c478bd9Sstevel@tonic-gate #ifdef	IPCL_DEBUG
2847c478bd9Sstevel@tonic-gate int	ipcl_debug_level = 0;
2857c478bd9Sstevel@tonic-gate #define	IPCL_DEBUG_LVL(level, args)	\
2867c478bd9Sstevel@tonic-gate 	if (ipcl_debug_level  & level) { printf args; }
2877c478bd9Sstevel@tonic-gate #else
2887c478bd9Sstevel@tonic-gate #define	IPCL_DEBUG_LVL(level, args) {; }
2897c478bd9Sstevel@tonic-gate #endif
290*f4b3ec61Sdh /* Old value for compatibility. Setable in /etc/system */
2917c478bd9Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0;
2927c478bd9Sstevel@tonic-gate 
293*f4b3ec61Sdh /* New value. Zero means choose automatically.  Setable in /etc/system */
2947c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0;
2957c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192;
2967c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500;
2977c478bd9Sstevel@tonic-gate 
2987c478bd9Sstevel@tonic-gate /* bind/udp fanout table size */
2997c478bd9Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512;
300ee4701baSericheng uint_t ipcl_udp_fanout_size = 16384;
3017c478bd9Sstevel@tonic-gate 
3027c478bd9Sstevel@tonic-gate /* Raw socket fanout size.  Must be a power of 2. */
3037c478bd9Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256;
3047c478bd9Sstevel@tonic-gate 
3057c478bd9Sstevel@tonic-gate /*
3067c478bd9Sstevel@tonic-gate  * Power of 2^N Primes useful for hashing for N of 0-28,
3077c478bd9Sstevel@tonic-gate  * these primes are the nearest prime <= 2^N - 2^(N-2).
3087c478bd9Sstevel@tonic-gate  */
3097c478bd9Sstevel@tonic-gate 
3107c478bd9Sstevel@tonic-gate #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
3117c478bd9Sstevel@tonic-gate 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
3127c478bd9Sstevel@tonic-gate 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
3137c478bd9Sstevel@tonic-gate 		50331599, 100663291, 201326557, 0}
3147c478bd9Sstevel@tonic-gate 
3157c478bd9Sstevel@tonic-gate /*
3167c478bd9Sstevel@tonic-gate  * wrapper structure to ensure that conn+tcpb are aligned
3177c478bd9Sstevel@tonic-gate  * on cache lines.
3187c478bd9Sstevel@tonic-gate  */
3197c478bd9Sstevel@tonic-gate typedef struct itc_s {
3207c478bd9Sstevel@tonic-gate 	union {
3217c478bd9Sstevel@tonic-gate 		conn_t	itcu_conn;
3227c478bd9Sstevel@tonic-gate 		char	itcu_filler[CACHE_ALIGN(conn_s)];
3237c478bd9Sstevel@tonic-gate 	}	itc_u;
3247c478bd9Sstevel@tonic-gate 	tcp_t	itc_tcp;
3257c478bd9Sstevel@tonic-gate } itc_t;
3267c478bd9Sstevel@tonic-gate 
3277c478bd9Sstevel@tonic-gate #define	itc_conn	itc_u.itcu_conn
3287c478bd9Sstevel@tonic-gate 
3297c478bd9Sstevel@tonic-gate struct kmem_cache  *ipcl_tcpconn_cache;
3307c478bd9Sstevel@tonic-gate struct kmem_cache  *ipcl_conn_cache;
3317c478bd9Sstevel@tonic-gate extern struct kmem_cache  *sctp_conn_cache;
3327c478bd9Sstevel@tonic-gate extern struct kmem_cache  *tcp_sack_info_cache;
3337c478bd9Sstevel@tonic-gate extern struct kmem_cache  *tcp_iphc_cache;
3347c478bd9Sstevel@tonic-gate 
3357c478bd9Sstevel@tonic-gate extern void	tcp_timermp_free(tcp_t *);
3367c478bd9Sstevel@tonic-gate extern mblk_t	*tcp_timermp_alloc(int);
3377c478bd9Sstevel@tonic-gate 
3387c478bd9Sstevel@tonic-gate static int	ipcl_tcpconn_constructor(void *, void *, int);
3397c478bd9Sstevel@tonic-gate static void	ipcl_tcpconn_destructor(void *, void *);
3407c478bd9Sstevel@tonic-gate 
3417c478bd9Sstevel@tonic-gate #ifdef	IPCL_DEBUG
3427c478bd9Sstevel@tonic-gate #define	INET_NTOA_BUFSIZE	18
3437c478bd9Sstevel@tonic-gate 
3447c478bd9Sstevel@tonic-gate static char *
3457c478bd9Sstevel@tonic-gate inet_ntoa_r(uint32_t in, char *b)
3467c478bd9Sstevel@tonic-gate {
3477c478bd9Sstevel@tonic-gate 	unsigned char	*p;
3487c478bd9Sstevel@tonic-gate 
3497c478bd9Sstevel@tonic-gate 	p = (unsigned char *)&in;
3507c478bd9Sstevel@tonic-gate 	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
3517c478bd9Sstevel@tonic-gate 	return (b);
3527c478bd9Sstevel@tonic-gate }
3537c478bd9Sstevel@tonic-gate #endif
3547c478bd9Sstevel@tonic-gate 
3557c478bd9Sstevel@tonic-gate /*
356*f4b3ec61Sdh  * Global (for all stack instances) init routine
3577c478bd9Sstevel@tonic-gate  */
3587c478bd9Sstevel@tonic-gate void
359*f4b3ec61Sdh ipcl_g_init(void)
3607c478bd9Sstevel@tonic-gate {
3617c478bd9Sstevel@tonic-gate 	ipcl_conn_cache = kmem_cache_create("ipcl_conn_cache",
3627c478bd9Sstevel@tonic-gate 	    sizeof (conn_t), CACHE_ALIGN_SIZE,
363ff550d0eSmasputra 	    NULL, NULL, NULL, NULL, NULL, 0);
3647c478bd9Sstevel@tonic-gate 
3657c478bd9Sstevel@tonic-gate 	ipcl_tcpconn_cache = kmem_cache_create("ipcl_tcpconn_cache",
3667c478bd9Sstevel@tonic-gate 	    sizeof (itc_t), CACHE_ALIGN_SIZE,
3677c478bd9Sstevel@tonic-gate 	    ipcl_tcpconn_constructor, ipcl_tcpconn_destructor,
3687c478bd9Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
369*f4b3ec61Sdh }
370*f4b3ec61Sdh 
371*f4b3ec61Sdh /*
372*f4b3ec61Sdh  * ipclassifier intialization routine, sets up hash tables.
373*f4b3ec61Sdh  */
374*f4b3ec61Sdh void
375*f4b3ec61Sdh ipcl_init(ip_stack_t *ipst)
376*f4b3ec61Sdh {
377*f4b3ec61Sdh 	int i;
378*f4b3ec61Sdh 	int sizes[] = P2Ps();
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 	/*
381*f4b3ec61Sdh 	 * Calculate size of conn fanout table from /etc/system settings
3827c478bd9Sstevel@tonic-gate 	 */
3837c478bd9Sstevel@tonic-gate 	if (ipcl_conn_hash_size != 0) {
384*f4b3ec61Sdh 		ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
3857c478bd9Sstevel@tonic-gate 	} else if (tcp_conn_hash_size != 0) {
386*f4b3ec61Sdh 		ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
3877c478bd9Sstevel@tonic-gate 	} else {
3887c478bd9Sstevel@tonic-gate 		extern pgcnt_t freemem;
3897c478bd9Sstevel@tonic-gate 
390*f4b3ec61Sdh 		ipst->ips_ipcl_conn_fanout_size =
3917c478bd9Sstevel@tonic-gate 		    (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
3927c478bd9Sstevel@tonic-gate 
393*f4b3ec61Sdh 		if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
394*f4b3ec61Sdh 			ipst->ips_ipcl_conn_fanout_size =
395*f4b3ec61Sdh 			    ipcl_conn_hash_maxsize;
396*f4b3ec61Sdh 		}
3977c478bd9Sstevel@tonic-gate 	}
3987c478bd9Sstevel@tonic-gate 
3997c478bd9Sstevel@tonic-gate 	for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
400*f4b3ec61Sdh 		if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
4017c478bd9Sstevel@tonic-gate 			break;
4027c478bd9Sstevel@tonic-gate 		}
4037c478bd9Sstevel@tonic-gate 	}
404*f4b3ec61Sdh 	if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
4057c478bd9Sstevel@tonic-gate 		/* Out of range, use the 2^16 value */
406*f4b3ec61Sdh 		ipst->ips_ipcl_conn_fanout_size = sizes[16];
4077c478bd9Sstevel@tonic-gate 	}
4087c478bd9Sstevel@tonic-gate 
409*f4b3ec61Sdh 	/* Take values from /etc/system */
410*f4b3ec61Sdh 	ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
411*f4b3ec61Sdh 	ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
412*f4b3ec61Sdh 	ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
413*f4b3ec61Sdh 
414*f4b3ec61Sdh 	ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
415*f4b3ec61Sdh 
416*f4b3ec61Sdh 	ipst->ips_ipcl_conn_fanout = kmem_zalloc(
417*f4b3ec61Sdh 	    ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
418*f4b3ec61Sdh 
419*f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
420*f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
4217c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4227c478bd9Sstevel@tonic-gate 	}
4237c478bd9Sstevel@tonic-gate 
424*f4b3ec61Sdh 	ipst->ips_ipcl_bind_fanout = kmem_zalloc(
425*f4b3ec61Sdh 	    ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
4267c478bd9Sstevel@tonic-gate 
427*f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
428*f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
4297c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4307c478bd9Sstevel@tonic-gate 	}
4317c478bd9Sstevel@tonic-gate 
432*f4b3ec61Sdh 	ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX *
433*f4b3ec61Sdh 	    sizeof (connf_t), KM_SLEEP);
434*f4b3ec61Sdh 	for (i = 0; i < IPPROTO_MAX; i++) {
435*f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL,
4367c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4377c478bd9Sstevel@tonic-gate 	}
438*f4b3ec61Sdh 
439*f4b3ec61Sdh 	ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
440*f4b3ec61Sdh 	    sizeof (connf_t), KM_SLEEP);
441*f4b3ec61Sdh 	for (i = 0; i < IPPROTO_MAX; i++) {
442*f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
4437c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4447c478bd9Sstevel@tonic-gate 	}
4457c478bd9Sstevel@tonic-gate 
446*f4b3ec61Sdh 	ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
447*f4b3ec61Sdh 	mutex_init(&ipst->ips_rts_clients->connf_lock,
448*f4b3ec61Sdh 	    NULL, MUTEX_DEFAULT, NULL);
4497c478bd9Sstevel@tonic-gate 
450*f4b3ec61Sdh 	ipst->ips_ipcl_udp_fanout = kmem_zalloc(
451*f4b3ec61Sdh 	    ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
452*f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
453*f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
4547c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4557c478bd9Sstevel@tonic-gate 	}
4567c478bd9Sstevel@tonic-gate 
457*f4b3ec61Sdh 	ipst->ips_ipcl_raw_fanout = kmem_zalloc(
458*f4b3ec61Sdh 	    ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
459*f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
460*f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
4617c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4627c478bd9Sstevel@tonic-gate 	}
4637c478bd9Sstevel@tonic-gate 
464*f4b3ec61Sdh 	ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
465*f4b3ec61Sdh 	    sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
4667c478bd9Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
467*f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
468*f4b3ec61Sdh 		    NULL, MUTEX_DEFAULT, NULL);
4697c478bd9Sstevel@tonic-gate 	}
4707c478bd9Sstevel@tonic-gate }
4717c478bd9Sstevel@tonic-gate 
4727c478bd9Sstevel@tonic-gate void
473*f4b3ec61Sdh ipcl_g_destroy(void)
4747c478bd9Sstevel@tonic-gate {
4757c478bd9Sstevel@tonic-gate 	kmem_cache_destroy(ipcl_conn_cache);
4767c478bd9Sstevel@tonic-gate 	kmem_cache_destroy(ipcl_tcpconn_cache);
477*f4b3ec61Sdh }
478*f4b3ec61Sdh 
479*f4b3ec61Sdh /*
480*f4b3ec61Sdh  * All user-level and kernel use of the stack must be gone
481*f4b3ec61Sdh  * by now.
482*f4b3ec61Sdh  */
483*f4b3ec61Sdh void
484*f4b3ec61Sdh ipcl_destroy(ip_stack_t *ipst)
485*f4b3ec61Sdh {
486*f4b3ec61Sdh 	int i;
487*f4b3ec61Sdh 
488*f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
489*f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
490*f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
491*f4b3ec61Sdh 	}
492*f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
493*f4b3ec61Sdh 	    sizeof (connf_t));
494*f4b3ec61Sdh 	ipst->ips_ipcl_conn_fanout = NULL;
495*f4b3ec61Sdh 
496*f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
497*f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
498*f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
499*f4b3ec61Sdh 	}
500*f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
501*f4b3ec61Sdh 	    sizeof (connf_t));
502*f4b3ec61Sdh 	ipst->ips_ipcl_bind_fanout = NULL;
503*f4b3ec61Sdh 
504*f4b3ec61Sdh 	for (i = 0; i < IPPROTO_MAX; i++) {
505*f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL);
506*f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock);
507*f4b3ec61Sdh 	}
508*f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t));
509*f4b3ec61Sdh 	ipst->ips_ipcl_proto_fanout = NULL;
510*f4b3ec61Sdh 
511*f4b3ec61Sdh 	for (i = 0; i < IPPROTO_MAX; i++) {
512*f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
513*f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
514*f4b3ec61Sdh 	}
515*f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_proto_fanout_v6,
516*f4b3ec61Sdh 	    IPPROTO_MAX * sizeof (connf_t));
517*f4b3ec61Sdh 	ipst->ips_ipcl_proto_fanout_v6 = NULL;
518*f4b3ec61Sdh 
519*f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
520*f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
521*f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
522*f4b3ec61Sdh 	}
523*f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
524*f4b3ec61Sdh 	    sizeof (connf_t));
525*f4b3ec61Sdh 	ipst->ips_ipcl_udp_fanout = NULL;
526*f4b3ec61Sdh 
527*f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
528*f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
529*f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
530*f4b3ec61Sdh 	}
531*f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
532*f4b3ec61Sdh 	    sizeof (connf_t));
533*f4b3ec61Sdh 	ipst->ips_ipcl_raw_fanout = NULL;
534*f4b3ec61Sdh 
535*f4b3ec61Sdh 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
536*f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
537*f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
538*f4b3ec61Sdh 	}
539*f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_globalhash_fanout,
540*f4b3ec61Sdh 	    sizeof (connf_t) * CONN_G_HASH_SIZE);
541*f4b3ec61Sdh 	ipst->ips_ipcl_globalhash_fanout = NULL;
542*f4b3ec61Sdh 
543*f4b3ec61Sdh 	ASSERT(ipst->ips_rts_clients->connf_head == NULL);
544*f4b3ec61Sdh 	mutex_destroy(&ipst->ips_rts_clients->connf_lock);
545*f4b3ec61Sdh 	kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
546*f4b3ec61Sdh 	ipst->ips_rts_clients = NULL;
5477c478bd9Sstevel@tonic-gate }
5487c478bd9Sstevel@tonic-gate 
5497c478bd9Sstevel@tonic-gate /*
5507c478bd9Sstevel@tonic-gate  * conn creation routine. initialize the conn, sets the reference
5517c478bd9Sstevel@tonic-gate  * and inserts it in the global hash table.
5527c478bd9Sstevel@tonic-gate  */
5537c478bd9Sstevel@tonic-gate conn_t *
554*f4b3ec61Sdh ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
5557c478bd9Sstevel@tonic-gate {
5567c478bd9Sstevel@tonic-gate 	itc_t	*itc;
5577c478bd9Sstevel@tonic-gate 	conn_t	*connp;
558*f4b3ec61Sdh 	sctp_stack_t *sctps;
5597c478bd9Sstevel@tonic-gate 
5607c478bd9Sstevel@tonic-gate 	switch (type) {
5617c478bd9Sstevel@tonic-gate 	case IPCL_TCPCONN:
5627c478bd9Sstevel@tonic-gate 		if ((itc = kmem_cache_alloc(ipcl_tcpconn_cache,
5637c478bd9Sstevel@tonic-gate 		    sleep)) == NULL)
5647c478bd9Sstevel@tonic-gate 			return (NULL);
5657c478bd9Sstevel@tonic-gate 		connp = &itc->itc_conn;
5667c478bd9Sstevel@tonic-gate 		connp->conn_ref = 1;
567*f4b3ec61Sdh 		netstack_hold(ns);
568*f4b3ec61Sdh 		connp->conn_netstack = ns;
5697c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(1,
5707c478bd9Sstevel@tonic-gate 		    ("ipcl_conn_create: connp = %p tcp (%p)",
5717c478bd9Sstevel@tonic-gate 		    (void *)connp, (void *)connp->conn_tcp));
5727c478bd9Sstevel@tonic-gate 		ipcl_globalhash_insert(connp);
5737c478bd9Sstevel@tonic-gate 		break;
5747c478bd9Sstevel@tonic-gate 	case IPCL_SCTPCONN:
5757c478bd9Sstevel@tonic-gate 		if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
5767c478bd9Sstevel@tonic-gate 			return (NULL);
5777c478bd9Sstevel@tonic-gate 		connp->conn_flags = IPCL_SCTPCONN;
578*f4b3ec61Sdh 		sctps = ns->netstack_sctp;
579*f4b3ec61Sdh 		SCTP_G_Q_REFHOLD(sctps);
580*f4b3ec61Sdh 		netstack_hold(ns);
581*f4b3ec61Sdh 		connp->conn_netstack = ns;
5827c478bd9Sstevel@tonic-gate 		break;
5837c478bd9Sstevel@tonic-gate 	case IPCL_IPCCONN:
5847c478bd9Sstevel@tonic-gate 		connp = kmem_cache_alloc(ipcl_conn_cache, sleep);
5857c478bd9Sstevel@tonic-gate 		if (connp == NULL)
586ff550d0eSmasputra 			return (NULL);
5877c478bd9Sstevel@tonic-gate 		bzero(connp, sizeof (conn_t));
588ff550d0eSmasputra 		mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
5897c478bd9Sstevel@tonic-gate 		cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
590ff550d0eSmasputra 		connp->conn_flags = IPCL_IPCCONN;
5917c478bd9Sstevel@tonic-gate 		connp->conn_ref = 1;
592*f4b3ec61Sdh 		netstack_hold(ns);
593*f4b3ec61Sdh 		connp->conn_netstack = ns;
5947c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(1,
5957c478bd9Sstevel@tonic-gate 		    ("ipcl_conn_create: connp = %p\n", (void *)connp));
5967c478bd9Sstevel@tonic-gate 		ipcl_globalhash_insert(connp);
5977c478bd9Sstevel@tonic-gate 		break;
598ff550d0eSmasputra 	default:
599ff550d0eSmasputra 		connp = NULL;
600ff550d0eSmasputra 		ASSERT(0);
6017c478bd9Sstevel@tonic-gate 	}
6027c478bd9Sstevel@tonic-gate 
6037c478bd9Sstevel@tonic-gate 	return (connp);
6047c478bd9Sstevel@tonic-gate }
6057c478bd9Sstevel@tonic-gate 
6067c478bd9Sstevel@tonic-gate void
6077c478bd9Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp)
6087c478bd9Sstevel@tonic-gate {
6097c478bd9Sstevel@tonic-gate 	mblk_t	*mp;
610*f4b3ec61Sdh 	netstack_t	*ns = connp->conn_netstack;
6117c478bd9Sstevel@tonic-gate 
6127c478bd9Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
6137c478bd9Sstevel@tonic-gate 	ASSERT(connp->conn_ref == 0);
6147c478bd9Sstevel@tonic-gate 	ASSERT(connp->conn_ire_cache == NULL);
6157c478bd9Sstevel@tonic-gate 
61645916cd2Sjpk 	if (connp->conn_peercred != NULL &&
61745916cd2Sjpk 	    connp->conn_peercred != connp->conn_cred)
61845916cd2Sjpk 		crfree(connp->conn_peercred);
61945916cd2Sjpk 	connp->conn_peercred = NULL;
62045916cd2Sjpk 
62145916cd2Sjpk 	if (connp->conn_cred != NULL) {
62245916cd2Sjpk 		crfree(connp->conn_cred);
62345916cd2Sjpk 		connp->conn_cred = NULL;
62445916cd2Sjpk 	}
62545916cd2Sjpk 
6267c478bd9Sstevel@tonic-gate 	ipcl_globalhash_remove(connp);
6277c478bd9Sstevel@tonic-gate 
6287c478bd9Sstevel@tonic-gate 	cv_destroy(&connp->conn_cv);
6297c478bd9Sstevel@tonic-gate 	if (connp->conn_flags & IPCL_TCPCONN) {
630ff550d0eSmasputra 		tcp_t	*tcp = connp->conn_tcp;
631*f4b3ec61Sdh 		tcp_stack_t *tcps;
632*f4b3ec61Sdh 
633*f4b3ec61Sdh 		ASSERT(tcp != NULL);
634*f4b3ec61Sdh 		tcps = tcp->tcp_tcps;
635*f4b3ec61Sdh 		if (tcps != NULL) {
636*f4b3ec61Sdh 			if (connp->conn_latch != NULL) {
637*f4b3ec61Sdh 				IPLATCH_REFRELE(connp->conn_latch, ns);
638*f4b3ec61Sdh 				connp->conn_latch = NULL;
639*f4b3ec61Sdh 			}
640*f4b3ec61Sdh 			if (connp->conn_policy != NULL) {
641*f4b3ec61Sdh 				IPPH_REFRELE(connp->conn_policy, ns);
642*f4b3ec61Sdh 				connp->conn_policy = NULL;
643*f4b3ec61Sdh 			}
644*f4b3ec61Sdh 			tcp->tcp_tcps = NULL;
645*f4b3ec61Sdh 			TCPS_REFRELE(tcps);
646*f4b3ec61Sdh 		}
647ff550d0eSmasputra 
6487c478bd9Sstevel@tonic-gate 		mutex_destroy(&connp->conn_lock);
6497c478bd9Sstevel@tonic-gate 		tcp_free(tcp);
6507c478bd9Sstevel@tonic-gate 		mp = tcp->tcp_timercache;
65145916cd2Sjpk 		tcp->tcp_cred = NULL;
6527c478bd9Sstevel@tonic-gate 
6537c478bd9Sstevel@tonic-gate 		if (tcp->tcp_sack_info != NULL) {
6547c478bd9Sstevel@tonic-gate 			bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
6557c478bd9Sstevel@tonic-gate 			kmem_cache_free(tcp_sack_info_cache,
6567c478bd9Sstevel@tonic-gate 			    tcp->tcp_sack_info);
6577c478bd9Sstevel@tonic-gate 		}
6587c478bd9Sstevel@tonic-gate 		if (tcp->tcp_iphc != NULL) {
6597c478bd9Sstevel@tonic-gate 			if (tcp->tcp_hdr_grown) {
6607c478bd9Sstevel@tonic-gate 				kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len);
6617c478bd9Sstevel@tonic-gate 			} else {
6627c478bd9Sstevel@tonic-gate 				bzero(tcp->tcp_iphc, tcp->tcp_iphc_len);
6637c478bd9Sstevel@tonic-gate 				kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc);
6647c478bd9Sstevel@tonic-gate 			}
6657c478bd9Sstevel@tonic-gate 			tcp->tcp_iphc_len = 0;
6667c478bd9Sstevel@tonic-gate 		}
6677c478bd9Sstevel@tonic-gate 		ASSERT(tcp->tcp_iphc_len == 0);
6687c478bd9Sstevel@tonic-gate 
669*f4b3ec61Sdh 		ASSERT(connp->conn_latch == NULL);
670*f4b3ec61Sdh 		ASSERT(connp->conn_policy == NULL);
671*f4b3ec61Sdh 
6727c478bd9Sstevel@tonic-gate 		bzero(connp, sizeof (itc_t));
6737c478bd9Sstevel@tonic-gate 
6747c478bd9Sstevel@tonic-gate 		tcp->tcp_timercache = mp;
6757c478bd9Sstevel@tonic-gate 		connp->conn_tcp = tcp;
6767c478bd9Sstevel@tonic-gate 		connp->conn_flags = IPCL_TCPCONN;
6777c478bd9Sstevel@tonic-gate 		connp->conn_ulp = IPPROTO_TCP;
6787c478bd9Sstevel@tonic-gate 		tcp->tcp_connp = connp;
679*f4b3ec61Sdh 		if (ns != NULL) {
680*f4b3ec61Sdh 			ASSERT(tcp->tcp_tcps == NULL);
681*f4b3ec61Sdh 			connp->conn_netstack = NULL;
682*f4b3ec61Sdh 			netstack_rele(ns);
683*f4b3ec61Sdh 		}
6847c478bd9Sstevel@tonic-gate 		kmem_cache_free(ipcl_tcpconn_cache, connp);
6857c478bd9Sstevel@tonic-gate 	} else if (connp->conn_flags & IPCL_SCTPCONN) {
686*f4b3ec61Sdh 		ASSERT(ns != NULL);
6877c478bd9Sstevel@tonic-gate 		sctp_free(connp);
6887c478bd9Sstevel@tonic-gate 	} else {
689ff550d0eSmasputra 		ASSERT(connp->conn_udp == NULL);
6907c478bd9Sstevel@tonic-gate 		mutex_destroy(&connp->conn_lock);
691*f4b3ec61Sdh 		if (ns != NULL) {
692*f4b3ec61Sdh 			connp->conn_netstack = NULL;
693*f4b3ec61Sdh 			netstack_rele(ns);
694*f4b3ec61Sdh 		}
6957c478bd9Sstevel@tonic-gate 		kmem_cache_free(ipcl_conn_cache, connp);
6967c478bd9Sstevel@tonic-gate 	}
6977c478bd9Sstevel@tonic-gate }
6987c478bd9Sstevel@tonic-gate 
6997c478bd9Sstevel@tonic-gate /*
7007c478bd9Sstevel@tonic-gate  * Running in cluster mode - deregister listener information
7017c478bd9Sstevel@tonic-gate  */
7027c478bd9Sstevel@tonic-gate 
7037c478bd9Sstevel@tonic-gate static void
7047c478bd9Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp)
7057c478bd9Sstevel@tonic-gate {
7067c478bd9Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
7077c478bd9Sstevel@tonic-gate 	ASSERT(connp->conn_lport != 0);
7087c478bd9Sstevel@tonic-gate 
7097c478bd9Sstevel@tonic-gate 	if (cl_inet_unlisten != NULL) {
7107c478bd9Sstevel@tonic-gate 		sa_family_t	addr_family;
7117c478bd9Sstevel@tonic-gate 		uint8_t		*laddrp;
7127c478bd9Sstevel@tonic-gate 
7137c478bd9Sstevel@tonic-gate 		if (connp->conn_pkt_isv6) {
7147c478bd9Sstevel@tonic-gate 			addr_family = AF_INET6;
7157c478bd9Sstevel@tonic-gate 			laddrp = (uint8_t *)&connp->conn_bound_source_v6;
7167c478bd9Sstevel@tonic-gate 		} else {
7177c478bd9Sstevel@tonic-gate 			addr_family = AF_INET;
7187c478bd9Sstevel@tonic-gate 			laddrp = (uint8_t *)&connp->conn_bound_source;
7197c478bd9Sstevel@tonic-gate 		}
7207c478bd9Sstevel@tonic-gate 		(*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp,
7217c478bd9Sstevel@tonic-gate 		    connp->conn_lport);
7227c478bd9Sstevel@tonic-gate 	}
7237c478bd9Sstevel@tonic-gate 	connp->conn_flags &= ~IPCL_CL_LISTENER;
7247c478bd9Sstevel@tonic-gate }
7257c478bd9Sstevel@tonic-gate 
7267c478bd9Sstevel@tonic-gate /*
7277c478bd9Sstevel@tonic-gate  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
7287c478bd9Sstevel@tonic-gate  * which table the conn belonged to). So for debugging we can see which hash
7297c478bd9Sstevel@tonic-gate  * table this connection was in.
7307c478bd9Sstevel@tonic-gate  */
7317c478bd9Sstevel@tonic-gate #define	IPCL_HASH_REMOVE(connp)	{					\
7327c478bd9Sstevel@tonic-gate 	connf_t	*connfp = (connp)->conn_fanout;				\
7337c478bd9Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));			\
7347c478bd9Sstevel@tonic-gate 	if (connfp != NULL) {						\
7357c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p",	\
7367c478bd9Sstevel@tonic-gate 		    (void *)(connp)));					\
7377c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);			\
7387c478bd9Sstevel@tonic-gate 		if ((connp)->conn_next != NULL)				\
7397c478bd9Sstevel@tonic-gate 			(connp)->conn_next->conn_prev =			\
7407c478bd9Sstevel@tonic-gate 			    (connp)->conn_prev;				\
7417c478bd9Sstevel@tonic-gate 		if ((connp)->conn_prev != NULL)				\
7427c478bd9Sstevel@tonic-gate 			(connp)->conn_prev->conn_next =			\
7437c478bd9Sstevel@tonic-gate 			    (connp)->conn_next;				\
7447c478bd9Sstevel@tonic-gate 		else							\
7457c478bd9Sstevel@tonic-gate 			connfp->connf_head = (connp)->conn_next;	\
7467c478bd9Sstevel@tonic-gate 		(connp)->conn_fanout = NULL;				\
7477c478bd9Sstevel@tonic-gate 		(connp)->conn_next = NULL;				\
7487c478bd9Sstevel@tonic-gate 		(connp)->conn_prev = NULL;				\
7497c478bd9Sstevel@tonic-gate 		(connp)->conn_flags |= IPCL_REMOVED;			\
7507c478bd9Sstevel@tonic-gate 		if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)	\
7517c478bd9Sstevel@tonic-gate 			ipcl_conn_unlisten((connp));			\
7527c478bd9Sstevel@tonic-gate 		CONN_DEC_REF((connp));					\
7537c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);			\
7547c478bd9Sstevel@tonic-gate 	}								\
7557c478bd9Sstevel@tonic-gate }
7567c478bd9Sstevel@tonic-gate 
7577c478bd9Sstevel@tonic-gate void
7587c478bd9Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp)
7597c478bd9Sstevel@tonic-gate {
7607c478bd9Sstevel@tonic-gate 	IPCL_HASH_REMOVE(connp);
7617c478bd9Sstevel@tonic-gate }
7627c478bd9Sstevel@tonic-gate 
7637c478bd9Sstevel@tonic-gate /*
7647c478bd9Sstevel@tonic-gate  * The whole purpose of this function is allow removal of
7657c478bd9Sstevel@tonic-gate  * a conn_t from the connected hash for timewait reclaim.
7667c478bd9Sstevel@tonic-gate  * This is essentially a TW reclaim fastpath where timewait
7677c478bd9Sstevel@tonic-gate  * collector checks under fanout lock (so no one else can
7687c478bd9Sstevel@tonic-gate  * get access to the conn_t) that refcnt is 2 i.e. one for
7697c478bd9Sstevel@tonic-gate  * TCP and one for the classifier hash list. If ref count
7707c478bd9Sstevel@tonic-gate  * is indeed 2, we can just remove the conn under lock and
7717c478bd9Sstevel@tonic-gate  * avoid cleaning up the conn under squeue. This gives us
7727c478bd9Sstevel@tonic-gate  * improved performance.
7737c478bd9Sstevel@tonic-gate  */
7747c478bd9Sstevel@tonic-gate void
7757c478bd9Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t	*connfp)
7767c478bd9Sstevel@tonic-gate {
7777c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connfp->connf_lock));
7787c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
7797c478bd9Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
7807c478bd9Sstevel@tonic-gate 
7817c478bd9Sstevel@tonic-gate 	if ((connp)->conn_next != NULL) {
7827c478bd9Sstevel@tonic-gate 		(connp)->conn_next->conn_prev =
7837c478bd9Sstevel@tonic-gate 			(connp)->conn_prev;
7847c478bd9Sstevel@tonic-gate 	}
7857c478bd9Sstevel@tonic-gate 	if ((connp)->conn_prev != NULL) {
7867c478bd9Sstevel@tonic-gate 		(connp)->conn_prev->conn_next =
7877c478bd9Sstevel@tonic-gate 			(connp)->conn_next;
7887c478bd9Sstevel@tonic-gate 	} else {
7897c478bd9Sstevel@tonic-gate 		connfp->connf_head = (connp)->conn_next;
7907c478bd9Sstevel@tonic-gate 	}
7917c478bd9Sstevel@tonic-gate 	(connp)->conn_fanout = NULL;
7927c478bd9Sstevel@tonic-gate 	(connp)->conn_next = NULL;
7937c478bd9Sstevel@tonic-gate 	(connp)->conn_prev = NULL;
7947c478bd9Sstevel@tonic-gate 	(connp)->conn_flags |= IPCL_REMOVED;
7957c478bd9Sstevel@tonic-gate 	ASSERT((connp)->conn_ref == 2);
7967c478bd9Sstevel@tonic-gate 	(connp)->conn_ref--;
7977c478bd9Sstevel@tonic-gate }
7987c478bd9Sstevel@tonic-gate 
7997c478bd9Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {		\
8007c478bd9Sstevel@tonic-gate 	ASSERT((connp)->conn_fanout == NULL);				\
8017c478bd9Sstevel@tonic-gate 	ASSERT((connp)->conn_next == NULL);				\
8027c478bd9Sstevel@tonic-gate 	ASSERT((connp)->conn_prev == NULL);				\
8037c478bd9Sstevel@tonic-gate 	if ((connfp)->connf_head != NULL) {				\
8047c478bd9Sstevel@tonic-gate 		(connfp)->connf_head->conn_prev = (connp);		\
8057c478bd9Sstevel@tonic-gate 		(connp)->conn_next = (connfp)->connf_head;		\
8067c478bd9Sstevel@tonic-gate 	}								\
8077c478bd9Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
8087c478bd9Sstevel@tonic-gate 	(connfp)->connf_head = (connp);					\
8097c478bd9Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
8107c478bd9Sstevel@tonic-gate 	    IPCL_CONNECTED;						\
8117c478bd9Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
8127c478bd9Sstevel@tonic-gate }
8137c478bd9Sstevel@tonic-gate 
8147c478bd9Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED(connfp, connp) {			\
8157c478bd9Sstevel@tonic-gate 	IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p "	\
8167c478bd9Sstevel@tonic-gate 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
8177c478bd9Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
8187c478bd9Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
8197c478bd9Sstevel@tonic-gate 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);		\
8207c478bd9Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
8217c478bd9Sstevel@tonic-gate }
8227c478bd9Sstevel@tonic-gate 
8237c478bd9Sstevel@tonic-gate #define	IPCL_HASH_INSERT_BOUND(connfp, connp) {				\
8247c478bd9Sstevel@tonic-gate 	conn_t *pconnp = NULL, *nconnp;					\
8257c478bd9Sstevel@tonic-gate 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p "	\
8267c478bd9Sstevel@tonic-gate 	    "connp %p", (void *)connfp, (void *)(connp)));		\
8277c478bd9Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
8287c478bd9Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
8297c478bd9Sstevel@tonic-gate 	nconnp = (connfp)->connf_head;					\
8303d1c78fbSethindra 	while (nconnp != NULL &&					\
8313d1c78fbSethindra 	    !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) {			\
8323d1c78fbSethindra 		pconnp = nconnp;					\
8333d1c78fbSethindra 		nconnp = nconnp->conn_next;				\
8347c478bd9Sstevel@tonic-gate 	}								\
8357c478bd9Sstevel@tonic-gate 	if (pconnp != NULL) {						\
8367c478bd9Sstevel@tonic-gate 		pconnp->conn_next = (connp);				\
8377c478bd9Sstevel@tonic-gate 		(connp)->conn_prev = pconnp;				\
8387c478bd9Sstevel@tonic-gate 	} else {							\
8397c478bd9Sstevel@tonic-gate 		(connfp)->connf_head = (connp);				\
8407c478bd9Sstevel@tonic-gate 	}								\
8417c478bd9Sstevel@tonic-gate 	if (nconnp != NULL) {						\
8427c478bd9Sstevel@tonic-gate 		(connp)->conn_next = nconnp;				\
8437c478bd9Sstevel@tonic-gate 		nconnp->conn_prev = (connp);				\
8447c478bd9Sstevel@tonic-gate 	}								\
8457c478bd9Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
8467c478bd9Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
8477c478bd9Sstevel@tonic-gate 	    IPCL_BOUND;							\
8487c478bd9Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
8497c478bd9Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
8507c478bd9Sstevel@tonic-gate }
8517c478bd9Sstevel@tonic-gate 
8527c478bd9Sstevel@tonic-gate #define	IPCL_HASH_INSERT_WILDCARD(connfp, connp) {			\
8537c478bd9Sstevel@tonic-gate 	conn_t **list, *prev, *next;					\
8547c478bd9Sstevel@tonic-gate 	boolean_t isv4mapped =						\
8557c478bd9Sstevel@tonic-gate 	    IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6);			\
8567c478bd9Sstevel@tonic-gate 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p "	\
8577c478bd9Sstevel@tonic-gate 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
8587c478bd9Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
8597c478bd9Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
8607c478bd9Sstevel@tonic-gate 	list = &(connfp)->connf_head;					\
8617c478bd9Sstevel@tonic-gate 	prev = NULL;							\
8627c478bd9Sstevel@tonic-gate 	while ((next = *list) != NULL) {				\
8637c478bd9Sstevel@tonic-gate 		if (isv4mapped &&					\
8647c478bd9Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) &&	\
8657c478bd9Sstevel@tonic-gate 		    connp->conn_zoneid == next->conn_zoneid) {		\
8667c478bd9Sstevel@tonic-gate 			(connp)->conn_next = next;			\
8677c478bd9Sstevel@tonic-gate 			if (prev != NULL)				\
8687c478bd9Sstevel@tonic-gate 				prev = next->conn_prev;			\
8697c478bd9Sstevel@tonic-gate 			next->conn_prev = (connp);			\
8707c478bd9Sstevel@tonic-gate 			break;						\
8717c478bd9Sstevel@tonic-gate 		}							\
8727c478bd9Sstevel@tonic-gate 		list = &next->conn_next;				\
8737c478bd9Sstevel@tonic-gate 		prev = next;						\
8747c478bd9Sstevel@tonic-gate 	}								\
8757c478bd9Sstevel@tonic-gate 	(connp)->conn_prev = prev;					\
8767c478bd9Sstevel@tonic-gate 	*list = (connp);						\
8777c478bd9Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
8787c478bd9Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
8797c478bd9Sstevel@tonic-gate 	    IPCL_BOUND;							\
8807c478bd9Sstevel@tonic-gate 	CONN_INC_REF((connp));						\
8817c478bd9Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
8827c478bd9Sstevel@tonic-gate }
8837c478bd9Sstevel@tonic-gate 
8847c478bd9Sstevel@tonic-gate void
8857c478bd9Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
8867c478bd9Sstevel@tonic-gate {
8877c478bd9Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
8887c478bd9Sstevel@tonic-gate }
8897c478bd9Sstevel@tonic-gate 
8907c478bd9Sstevel@tonic-gate void
8917c478bd9Sstevel@tonic-gate ipcl_proto_insert(conn_t *connp, uint8_t protocol)
8927c478bd9Sstevel@tonic-gate {
8937c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
894*f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
8957c478bd9Sstevel@tonic-gate 
8967c478bd9Sstevel@tonic-gate 	ASSERT(connp != NULL);
89745916cd2Sjpk 	ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH ||
89845916cd2Sjpk 	    protocol == IPPROTO_ESP);
8997c478bd9Sstevel@tonic-gate 
9007c478bd9Sstevel@tonic-gate 	connp->conn_ulp = protocol;
9017c478bd9Sstevel@tonic-gate 
9027c478bd9Sstevel@tonic-gate 	/* Insert it in the protocol hash */
903*f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_proto_fanout[protocol];
9047c478bd9Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9057c478bd9Sstevel@tonic-gate }
9067c478bd9Sstevel@tonic-gate 
9077c478bd9Sstevel@tonic-gate void
9087c478bd9Sstevel@tonic-gate ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol)
9097c478bd9Sstevel@tonic-gate {
9107c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
911*f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
9127c478bd9Sstevel@tonic-gate 
9137c478bd9Sstevel@tonic-gate 	ASSERT(connp != NULL);
91445916cd2Sjpk 	ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH ||
91545916cd2Sjpk 	    protocol == IPPROTO_ESP);
9167c478bd9Sstevel@tonic-gate 
9177c478bd9Sstevel@tonic-gate 	connp->conn_ulp = protocol;
9187c478bd9Sstevel@tonic-gate 
9197c478bd9Sstevel@tonic-gate 	/* Insert it in the Bind Hash */
920*f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
9217c478bd9Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9227c478bd9Sstevel@tonic-gate }
9237c478bd9Sstevel@tonic-gate 
9247c478bd9Sstevel@tonic-gate /*
9257c478bd9Sstevel@tonic-gate  * This function is used only for inserting SCTP raw socket now.
9267c478bd9Sstevel@tonic-gate  * This may change later.
9277c478bd9Sstevel@tonic-gate  *
9287c478bd9Sstevel@tonic-gate  * Note that only one raw socket can be bound to a port.  The param
9297c478bd9Sstevel@tonic-gate  * lport is in network byte order.
9307c478bd9Sstevel@tonic-gate  */
9317c478bd9Sstevel@tonic-gate static int
9327c478bd9Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
9337c478bd9Sstevel@tonic-gate {
9347c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
9357c478bd9Sstevel@tonic-gate 	conn_t	*oconnp;
936*f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
9377c478bd9Sstevel@tonic-gate 
938*f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
9397c478bd9Sstevel@tonic-gate 
9407c478bd9Sstevel@tonic-gate 	/* Check for existing raw socket already bound to the port. */
9417c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
9427c478bd9Sstevel@tonic-gate 	for (oconnp = connfp->connf_head; oconnp != NULL;
9437c0c0508Skcpoon 	    oconnp = oconnp->conn_next) {
9447c478bd9Sstevel@tonic-gate 		if (oconnp->conn_lport == lport &&
9457c478bd9Sstevel@tonic-gate 		    oconnp->conn_zoneid == connp->conn_zoneid &&
9467c478bd9Sstevel@tonic-gate 		    oconnp->conn_af_isv6 == connp->conn_af_isv6 &&
9477c478bd9Sstevel@tonic-gate 		    ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
9487c478bd9Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) ||
9497c478bd9Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) ||
9507c478bd9Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) ||
9517c478bd9Sstevel@tonic-gate 		    IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6,
9527c478bd9Sstevel@tonic-gate 		    &connp->conn_srcv6))) {
9537c478bd9Sstevel@tonic-gate 			break;
9547c478bd9Sstevel@tonic-gate 		}
9557c478bd9Sstevel@tonic-gate 	}
9567c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
9577c478bd9Sstevel@tonic-gate 	if (oconnp != NULL)
9587c478bd9Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
9597c478bd9Sstevel@tonic-gate 
9607c478bd9Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
9617c478bd9Sstevel@tonic-gate 	    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) {
9627c478bd9Sstevel@tonic-gate 		if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
9637c478bd9Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) {
9647c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9657c478bd9Sstevel@tonic-gate 		} else {
9667c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
9677c478bd9Sstevel@tonic-gate 		}
9687c478bd9Sstevel@tonic-gate 	} else {
9697c478bd9Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED(connfp, connp);
9707c478bd9Sstevel@tonic-gate 	}
9717c478bd9Sstevel@tonic-gate 	return (0);
9727c478bd9Sstevel@tonic-gate }
9737c478bd9Sstevel@tonic-gate 
97445916cd2Sjpk /*
97545916cd2Sjpk  * Check for a MAC exemption conflict on a labeled system.  Note that for
97645916cd2Sjpk  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
97745916cd2Sjpk  * transport layer.  This check is for binding all other protocols.
97845916cd2Sjpk  *
97945916cd2Sjpk  * Returns true if there's a conflict.
98045916cd2Sjpk  */
98145916cd2Sjpk static boolean_t
982*f4b3ec61Sdh check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
98345916cd2Sjpk {
98445916cd2Sjpk 	connf_t	*connfp;
98545916cd2Sjpk 	conn_t *tconn;
98645916cd2Sjpk 
987*f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp];
98845916cd2Sjpk 	mutex_enter(&connfp->connf_lock);
98945916cd2Sjpk 	for (tconn = connfp->connf_head; tconn != NULL;
99045916cd2Sjpk 	    tconn = tconn->conn_next) {
99145916cd2Sjpk 		/* We don't allow v4 fallback for v6 raw socket */
99245916cd2Sjpk 		if (connp->conn_af_isv6 != tconn->conn_af_isv6)
99345916cd2Sjpk 			continue;
99445916cd2Sjpk 		/* If neither is exempt, then there's no conflict */
99545916cd2Sjpk 		if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt)
99645916cd2Sjpk 			continue;
99745916cd2Sjpk 		/* If both are bound to different specific addrs, ok */
99845916cd2Sjpk 		if (connp->conn_src != INADDR_ANY &&
99945916cd2Sjpk 		    tconn->conn_src != INADDR_ANY &&
100045916cd2Sjpk 		    connp->conn_src != tconn->conn_src)
100145916cd2Sjpk 			continue;
100245916cd2Sjpk 		/* These two conflict; fail */
100345916cd2Sjpk 		break;
100445916cd2Sjpk 	}
100545916cd2Sjpk 	mutex_exit(&connfp->connf_lock);
100645916cd2Sjpk 	return (tconn != NULL);
100745916cd2Sjpk }
100845916cd2Sjpk 
100945916cd2Sjpk static boolean_t
1010*f4b3ec61Sdh check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
101145916cd2Sjpk {
101245916cd2Sjpk 	connf_t	*connfp;
101345916cd2Sjpk 	conn_t *tconn;
101445916cd2Sjpk 
1015*f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp];
101645916cd2Sjpk 	mutex_enter(&connfp->connf_lock);
101745916cd2Sjpk 	for (tconn = connfp->connf_head; tconn != NULL;
101845916cd2Sjpk 	    tconn = tconn->conn_next) {
101945916cd2Sjpk 		/* We don't allow v4 fallback for v6 raw socket */
102045916cd2Sjpk 		if (connp->conn_af_isv6 != tconn->conn_af_isv6)
102145916cd2Sjpk 			continue;
102245916cd2Sjpk 		/* If neither is exempt, then there's no conflict */
102345916cd2Sjpk 		if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt)
102445916cd2Sjpk 			continue;
102545916cd2Sjpk 		/* If both are bound to different addrs, ok */
102645916cd2Sjpk 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) &&
102745916cd2Sjpk 		    !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) &&
102845916cd2Sjpk 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6))
102945916cd2Sjpk 			continue;
103045916cd2Sjpk 		/* These two conflict; fail */
103145916cd2Sjpk 		break;
103245916cd2Sjpk 	}
103345916cd2Sjpk 	mutex_exit(&connfp->connf_lock);
103445916cd2Sjpk 	return (tconn != NULL);
103545916cd2Sjpk }
103645916cd2Sjpk 
10377c478bd9Sstevel@tonic-gate /*
10387c478bd9Sstevel@tonic-gate  * (v4, v6) bind hash insertion routines
10397c478bd9Sstevel@tonic-gate  */
10407c478bd9Sstevel@tonic-gate int
10417c478bd9Sstevel@tonic-gate ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport)
10427c478bd9Sstevel@tonic-gate {
10437c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
10447c478bd9Sstevel@tonic-gate #ifdef	IPCL_DEBUG
10457c478bd9Sstevel@tonic-gate 	char	buf[INET_NTOA_BUFSIZE];
10467c478bd9Sstevel@tonic-gate #endif
10477c478bd9Sstevel@tonic-gate 	int	ret = 0;
1048*f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
10497c478bd9Sstevel@tonic-gate 
10507c478bd9Sstevel@tonic-gate 	ASSERT(connp);
10517c478bd9Sstevel@tonic-gate 
10527c478bd9Sstevel@tonic-gate 	IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, "
10537c478bd9Sstevel@tonic-gate 	    "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport));
10547c478bd9Sstevel@tonic-gate 
10557c478bd9Sstevel@tonic-gate 	connp->conn_ulp = protocol;
10567c478bd9Sstevel@tonic-gate 	IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6);
10577c478bd9Sstevel@tonic-gate 	connp->conn_lport = lport;
10587c478bd9Sstevel@tonic-gate 
10597c478bd9Sstevel@tonic-gate 	switch (protocol) {
10607c478bd9Sstevel@tonic-gate 	default:
1061*f4b3ec61Sdh 		if (is_system_labeled() &&
1062*f4b3ec61Sdh 		    check_exempt_conflict_v4(connp, ipst))
106345916cd2Sjpk 			return (EADDRINUSE);
106445916cd2Sjpk 		/* FALLTHROUGH */
106545916cd2Sjpk 	case IPPROTO_UDP:
10667c478bd9Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
10677c478bd9Sstevel@tonic-gate 			IPCL_DEBUG_LVL(64,
10687c478bd9Sstevel@tonic-gate 			    ("ipcl_bind_insert: connp %p - udp\n",
10697c478bd9Sstevel@tonic-gate 			    (void *)connp));
1070*f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_udp_fanout[
1071*f4b3ec61Sdh 			    IPCL_UDP_HASH(lport, ipst)];
10727c478bd9Sstevel@tonic-gate 		} else {
10737c478bd9Sstevel@tonic-gate 			IPCL_DEBUG_LVL(64,
10747c478bd9Sstevel@tonic-gate 			    ("ipcl_bind_insert: connp %p - protocol\n",
10757c478bd9Sstevel@tonic-gate 			    (void *)connp));
1076*f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_proto_fanout[protocol];
10777c478bd9Sstevel@tonic-gate 		}
10787c478bd9Sstevel@tonic-gate 
10797c478bd9Sstevel@tonic-gate 		if (connp->conn_rem != INADDR_ANY) {
10807c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
10817c478bd9Sstevel@tonic-gate 		} else if (connp->conn_src != INADDR_ANY) {
10827c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
10837c478bd9Sstevel@tonic-gate 		} else {
10847c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10857c478bd9Sstevel@tonic-gate 		}
10867c478bd9Sstevel@tonic-gate 		break;
10877c478bd9Sstevel@tonic-gate 
10887c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
10897c478bd9Sstevel@tonic-gate 
10907c478bd9Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
109145916cd2Sjpk 		ASSERT(connp->conn_zoneid != ALL_ZONES);
1092*f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_bind_fanout[
1093*f4b3ec61Sdh 		    IPCL_BIND_HASH(lport, ipst)];
10947c478bd9Sstevel@tonic-gate 		if (connp->conn_src != INADDR_ANY) {
10957c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
10967c478bd9Sstevel@tonic-gate 		} else {
10977c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10987c478bd9Sstevel@tonic-gate 		}
10997c478bd9Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
11007c478bd9Sstevel@tonic-gate 			ASSERT(!connp->conn_pkt_isv6);
11017c478bd9Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
11027c478bd9Sstevel@tonic-gate 			(*cl_inet_listen)(IPPROTO_TCP, AF_INET,
11037c478bd9Sstevel@tonic-gate 			    (uint8_t *)&connp->conn_bound_source, lport);
11047c478bd9Sstevel@tonic-gate 		}
11057c478bd9Sstevel@tonic-gate 		break;
11067c478bd9Sstevel@tonic-gate 
11077c478bd9Sstevel@tonic-gate 	case IPPROTO_SCTP:
11087c478bd9Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
11097c478bd9Sstevel@tonic-gate 		break;
11107c478bd9Sstevel@tonic-gate 	}
11117c478bd9Sstevel@tonic-gate 
11127c478bd9Sstevel@tonic-gate 	return (ret);
11137c478bd9Sstevel@tonic-gate }
11147c478bd9Sstevel@tonic-gate 
11157c478bd9Sstevel@tonic-gate int
11167c478bd9Sstevel@tonic-gate ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
11177c478bd9Sstevel@tonic-gate     uint16_t lport)
11187c478bd9Sstevel@tonic-gate {
11197c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
11207c478bd9Sstevel@tonic-gate 	int	ret = 0;
1121*f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
11227c478bd9Sstevel@tonic-gate 
11237c478bd9Sstevel@tonic-gate 	ASSERT(connp);
11247c478bd9Sstevel@tonic-gate 
11257c478bd9Sstevel@tonic-gate 	connp->conn_ulp = protocol;
11267c478bd9Sstevel@tonic-gate 	connp->conn_srcv6 = *src;
11277c478bd9Sstevel@tonic-gate 	connp->conn_lport = lport;
11287c478bd9Sstevel@tonic-gate 
11297c478bd9Sstevel@tonic-gate 	switch (protocol) {
11307c478bd9Sstevel@tonic-gate 	default:
1131*f4b3ec61Sdh 		if (is_system_labeled() &&
1132*f4b3ec61Sdh 		    check_exempt_conflict_v6(connp, ipst))
113345916cd2Sjpk 			return (EADDRINUSE);
113445916cd2Sjpk 		/* FALLTHROUGH */
113545916cd2Sjpk 	case IPPROTO_UDP:
11367c478bd9Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
11377c478bd9Sstevel@tonic-gate 			IPCL_DEBUG_LVL(128,
11387c478bd9Sstevel@tonic-gate 			    ("ipcl_bind_insert_v6: connp %p - udp\n",
11397c478bd9Sstevel@tonic-gate 			    (void *)connp));
1140*f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_udp_fanout[
1141*f4b3ec61Sdh 			    IPCL_UDP_HASH(lport, ipst)];
11427c478bd9Sstevel@tonic-gate 		} else {
11437c478bd9Sstevel@tonic-gate 			IPCL_DEBUG_LVL(128,
11447c478bd9Sstevel@tonic-gate 			    ("ipcl_bind_insert_v6: connp %p - protocol\n",
11457c478bd9Sstevel@tonic-gate 			    (void *)connp));
1146*f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
11477c478bd9Sstevel@tonic-gate 		}
11487c478bd9Sstevel@tonic-gate 
11497c478bd9Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
11507c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
11517c478bd9Sstevel@tonic-gate 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
11527c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
11537c478bd9Sstevel@tonic-gate 		} else {
11547c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
11557c478bd9Sstevel@tonic-gate 		}
11567c478bd9Sstevel@tonic-gate 		break;
11577c478bd9Sstevel@tonic-gate 
11587c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
11597c478bd9Sstevel@tonic-gate 		/* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */
11607c478bd9Sstevel@tonic-gate 
11617c478bd9Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
116245916cd2Sjpk 		ASSERT(connp->conn_zoneid != ALL_ZONES);
1163*f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_bind_fanout[
1164*f4b3ec61Sdh 		    IPCL_BIND_HASH(lport, ipst)];
11657c478bd9Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
11667c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
11677c478bd9Sstevel@tonic-gate 		} else {
11687c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
11697c478bd9Sstevel@tonic-gate 		}
11707c478bd9Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
11717c478bd9Sstevel@tonic-gate 			sa_family_t	addr_family;
11727c478bd9Sstevel@tonic-gate 			uint8_t		*laddrp;
11737c478bd9Sstevel@tonic-gate 
11747c478bd9Sstevel@tonic-gate 			if (connp->conn_pkt_isv6) {
11757c478bd9Sstevel@tonic-gate 				addr_family = AF_INET6;
11767c478bd9Sstevel@tonic-gate 				laddrp =
11777c478bd9Sstevel@tonic-gate 				    (uint8_t *)&connp->conn_bound_source_v6;
11787c478bd9Sstevel@tonic-gate 			} else {
11797c478bd9Sstevel@tonic-gate 				addr_family = AF_INET;
11807c478bd9Sstevel@tonic-gate 				laddrp = (uint8_t *)&connp->conn_bound_source;
11817c478bd9Sstevel@tonic-gate 			}
11827c478bd9Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
11837c478bd9Sstevel@tonic-gate 			(*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp,
11847c478bd9Sstevel@tonic-gate 			    lport);
11857c478bd9Sstevel@tonic-gate 		}
11867c478bd9Sstevel@tonic-gate 		break;
11877c478bd9Sstevel@tonic-gate 
11887c478bd9Sstevel@tonic-gate 	case IPPROTO_SCTP:
11897c478bd9Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
11907c478bd9Sstevel@tonic-gate 		break;
11917c478bd9Sstevel@tonic-gate 	}
11927c478bd9Sstevel@tonic-gate 
11937c478bd9Sstevel@tonic-gate 	return (ret);
11947c478bd9Sstevel@tonic-gate }
11957c478bd9Sstevel@tonic-gate 
11967c478bd9Sstevel@tonic-gate /*
11977c478bd9Sstevel@tonic-gate  * ipcl_conn_hash insertion routines.
11987c478bd9Sstevel@tonic-gate  */
11997c478bd9Sstevel@tonic-gate int
12007c478bd9Sstevel@tonic-gate ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src,
12017c478bd9Sstevel@tonic-gate     ipaddr_t rem, uint32_t ports)
12027c478bd9Sstevel@tonic-gate {
12037c478bd9Sstevel@tonic-gate 	connf_t		*connfp;
12047c478bd9Sstevel@tonic-gate 	uint16_t	*up;
12057c478bd9Sstevel@tonic-gate 	conn_t		*tconnp;
12067c478bd9Sstevel@tonic-gate #ifdef	IPCL_DEBUG
12077c478bd9Sstevel@tonic-gate 	char	sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE];
12087c478bd9Sstevel@tonic-gate #endif
12097c478bd9Sstevel@tonic-gate 	in_port_t	lport;
12107c478bd9Sstevel@tonic-gate 	int		ret = 0;
1211*f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
12127c478bd9Sstevel@tonic-gate 
12137c478bd9Sstevel@tonic-gate 	IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, "
12147c478bd9Sstevel@tonic-gate 	    "dst = %s, ports = %x, protocol = %x", (void *)connp,
12157c478bd9Sstevel@tonic-gate 	    inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf),
12167c478bd9Sstevel@tonic-gate 	    ports, protocol));
12177c478bd9Sstevel@tonic-gate 
12187c478bd9Sstevel@tonic-gate 	switch (protocol) {
12197c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
12207c478bd9Sstevel@tonic-gate 		if (!(connp->conn_flags & IPCL_EAGER)) {
12217c478bd9Sstevel@tonic-gate 			/*
12227c478bd9Sstevel@tonic-gate 			 * for a eager connection, i.e connections which
12237c478bd9Sstevel@tonic-gate 			 * have just been created, the initialization is
12247c478bd9Sstevel@tonic-gate 			 * already done in ip at conn_creation time, so
12257c478bd9Sstevel@tonic-gate 			 * we can skip the checks here.
12267c478bd9Sstevel@tonic-gate 			 */
12277c478bd9Sstevel@tonic-gate 			IPCL_CONN_INIT(connp, protocol, src, rem, ports);
12287c478bd9Sstevel@tonic-gate 		}
1229*f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_conn_fanout[
1230*f4b3ec61Sdh 		    IPCL_CONN_HASH(connp->conn_rem,
1231*f4b3ec61Sdh 		    connp->conn_ports, ipst)];
12327c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
12337c478bd9Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
12347c478bd9Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
12357c478bd9Sstevel@tonic-gate 			if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp,
12367c478bd9Sstevel@tonic-gate 			    connp->conn_rem, connp->conn_src,
12377c478bd9Sstevel@tonic-gate 			    connp->conn_ports)) {
12387c478bd9Sstevel@tonic-gate 
12397c478bd9Sstevel@tonic-gate 				/* Already have a conn. bail out */
12407c478bd9Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
12417c478bd9Sstevel@tonic-gate 				return (EADDRINUSE);
12427c478bd9Sstevel@tonic-gate 			}
12437c478bd9Sstevel@tonic-gate 		}
12447c478bd9Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
12457c478bd9Sstevel@tonic-gate 			/*
12467c478bd9Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
12477c478bd9Sstevel@tonic-gate 			 * rebind. Let it happen.
12487c478bd9Sstevel@tonic-gate 			 */
12497c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
12507c478bd9Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
12517c478bd9Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
12527c478bd9Sstevel@tonic-gate 		}
1253866ba9ddSjprakash 
1254866ba9ddSjprakash 		ASSERT(connp->conn_recv != NULL);
1255866ba9ddSjprakash 
12567c478bd9Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
12577c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
12587c478bd9Sstevel@tonic-gate 		break;
12597c478bd9Sstevel@tonic-gate 
12607c478bd9Sstevel@tonic-gate 	case IPPROTO_SCTP:
12617c0c0508Skcpoon 		/*
12627c0c0508Skcpoon 		 * The raw socket may have already been bound, remove it
12637c0c0508Skcpoon 		 * from the hash first.
12647c0c0508Skcpoon 		 */
12657c0c0508Skcpoon 		IPCL_HASH_REMOVE(connp);
12667c0c0508Skcpoon 		lport = htons((uint16_t)(ntohl(ports) & 0xFFFF));
12677c478bd9Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
12687c478bd9Sstevel@tonic-gate 		break;
12697c478bd9Sstevel@tonic-gate 
12707c478bd9Sstevel@tonic-gate 	default:
127145916cd2Sjpk 		/*
127245916cd2Sjpk 		 * Check for conflicts among MAC exempt bindings.  For
127345916cd2Sjpk 		 * transports with port numbers, this is done by the upper
127445916cd2Sjpk 		 * level per-transport binding logic.  For all others, it's
127545916cd2Sjpk 		 * done here.
127645916cd2Sjpk 		 */
1277*f4b3ec61Sdh 		if (is_system_labeled() &&
1278*f4b3ec61Sdh 		    check_exempt_conflict_v4(connp, ipst))
127945916cd2Sjpk 			return (EADDRINUSE);
128045916cd2Sjpk 		/* FALLTHROUGH */
128145916cd2Sjpk 
128245916cd2Sjpk 	case IPPROTO_UDP:
12837c478bd9Sstevel@tonic-gate 		up = (uint16_t *)&ports;
12847c478bd9Sstevel@tonic-gate 		IPCL_CONN_INIT(connp, protocol, src, rem, ports);
12857c478bd9Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
1286*f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_udp_fanout[
1287*f4b3ec61Sdh 			    IPCL_UDP_HASH(up[1], ipst)];
12887c478bd9Sstevel@tonic-gate 		} else {
1289*f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_proto_fanout[protocol];
12907c478bd9Sstevel@tonic-gate 		}
12917c478bd9Sstevel@tonic-gate 
12927c478bd9Sstevel@tonic-gate 		if (connp->conn_rem != INADDR_ANY) {
12937c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
12947c478bd9Sstevel@tonic-gate 		} else if (connp->conn_src != INADDR_ANY) {
12957c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12967c478bd9Sstevel@tonic-gate 		} else {
12977c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12987c478bd9Sstevel@tonic-gate 		}
12997c478bd9Sstevel@tonic-gate 		break;
13007c478bd9Sstevel@tonic-gate 	}
13017c478bd9Sstevel@tonic-gate 
13027c478bd9Sstevel@tonic-gate 	return (ret);
13037c478bd9Sstevel@tonic-gate }
13047c478bd9Sstevel@tonic-gate 
13057c478bd9Sstevel@tonic-gate int
13067c478bd9Sstevel@tonic-gate ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
13077c478bd9Sstevel@tonic-gate     const in6_addr_t *rem, uint32_t ports, uint_t ifindex)
13087c478bd9Sstevel@tonic-gate {
13097c478bd9Sstevel@tonic-gate 	connf_t		*connfp;
13107c478bd9Sstevel@tonic-gate 	uint16_t	*up;
13117c478bd9Sstevel@tonic-gate 	conn_t		*tconnp;
13127c478bd9Sstevel@tonic-gate 	in_port_t	lport;
13137c478bd9Sstevel@tonic-gate 	int		ret = 0;
1314*f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
13157c478bd9Sstevel@tonic-gate 
13167c478bd9Sstevel@tonic-gate 	switch (protocol) {
13177c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
13187c478bd9Sstevel@tonic-gate 		/* Just need to insert a conn struct */
13197c478bd9Sstevel@tonic-gate 		if (!(connp->conn_flags & IPCL_EAGER)) {
13207c478bd9Sstevel@tonic-gate 			IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
13217c478bd9Sstevel@tonic-gate 		}
1322*f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_conn_fanout[
1323*f4b3ec61Sdh 		    IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports,
1324*f4b3ec61Sdh 		    ipst)];
13257c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
13267c478bd9Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
13277c478bd9Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
13287c478bd9Sstevel@tonic-gate 			if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp,
13297c478bd9Sstevel@tonic-gate 			    connp->conn_remv6, connp->conn_srcv6,
13307c478bd9Sstevel@tonic-gate 			    connp->conn_ports) &&
13317c478bd9Sstevel@tonic-gate 			    (tconnp->conn_tcp->tcp_bound_if == 0 ||
13327c478bd9Sstevel@tonic-gate 			    tconnp->conn_tcp->tcp_bound_if == ifindex)) {
13337c478bd9Sstevel@tonic-gate 				/* Already have a conn. bail out */
13347c478bd9Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
13357c478bd9Sstevel@tonic-gate 				return (EADDRINUSE);
13367c478bd9Sstevel@tonic-gate 			}
13377c478bd9Sstevel@tonic-gate 		}
13387c478bd9Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
13397c478bd9Sstevel@tonic-gate 			/*
13407c478bd9Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
13417c478bd9Sstevel@tonic-gate 			 * rebind. Let it happen.
13427c478bd9Sstevel@tonic-gate 			 */
13437c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
13447c478bd9Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
13457c478bd9Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
13467c478bd9Sstevel@tonic-gate 		}
13477c478bd9Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
13487c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
13497c478bd9Sstevel@tonic-gate 		break;
13507c478bd9Sstevel@tonic-gate 
13517c478bd9Sstevel@tonic-gate 	case IPPROTO_SCTP:
13527c0c0508Skcpoon 		IPCL_HASH_REMOVE(connp);
13537c0c0508Skcpoon 		lport = htons((uint16_t)(ntohl(ports) & 0xFFFF));
13547c478bd9Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
13557c478bd9Sstevel@tonic-gate 		break;
13567c478bd9Sstevel@tonic-gate 
13577c478bd9Sstevel@tonic-gate 	default:
1358*f4b3ec61Sdh 		if (is_system_labeled() &&
1359*f4b3ec61Sdh 		    check_exempt_conflict_v6(connp, ipst))
136045916cd2Sjpk 			return (EADDRINUSE);
136145916cd2Sjpk 		/* FALLTHROUGH */
136245916cd2Sjpk 	case IPPROTO_UDP:
13637c478bd9Sstevel@tonic-gate 		up = (uint16_t *)&ports;
13647c478bd9Sstevel@tonic-gate 		IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
13657c478bd9Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
1366*f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_udp_fanout[
1367*f4b3ec61Sdh 			    IPCL_UDP_HASH(up[1], ipst)];
13687c478bd9Sstevel@tonic-gate 		} else {
1369*f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
13707c478bd9Sstevel@tonic-gate 		}
13717c478bd9Sstevel@tonic-gate 
13727c478bd9Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
13737c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
13747c478bd9Sstevel@tonic-gate 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
13757c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
13767c478bd9Sstevel@tonic-gate 		} else {
13777c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
13787c478bd9Sstevel@tonic-gate 		}
13797c478bd9Sstevel@tonic-gate 		break;
13807c478bd9Sstevel@tonic-gate 	}
13817c478bd9Sstevel@tonic-gate 
13827c478bd9Sstevel@tonic-gate 	return (ret);
13837c478bd9Sstevel@tonic-gate }
13847c478bd9Sstevel@tonic-gate 
13857c478bd9Sstevel@tonic-gate /*
13867c478bd9Sstevel@tonic-gate  * v4 packet classifying function. looks up the fanout table to
13877c478bd9Sstevel@tonic-gate  * find the conn, the packet belongs to. returns the conn with
13887c478bd9Sstevel@tonic-gate  * the reference held, null otherwise.
138945916cd2Sjpk  *
139045916cd2Sjpk  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
139145916cd2Sjpk  * Lookup" comment block are applied.  Labels are also checked as described
139245916cd2Sjpk  * above.  If the packet is from the inside (looped back), and is from the same
139345916cd2Sjpk  * zone, then label checks are omitted.
13947c478bd9Sstevel@tonic-gate  */
13957c478bd9Sstevel@tonic-gate conn_t *
1396*f4b3ec61Sdh ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid,
1397*f4b3ec61Sdh     ip_stack_t *ipst)
13987c478bd9Sstevel@tonic-gate {
13997c478bd9Sstevel@tonic-gate 	ipha_t	*ipha;
14007c478bd9Sstevel@tonic-gate 	connf_t	*connfp, *bind_connfp;
14017c478bd9Sstevel@tonic-gate 	uint16_t lport;
14027c478bd9Sstevel@tonic-gate 	uint16_t fport;
14037c478bd9Sstevel@tonic-gate 	uint32_t ports;
14047c478bd9Sstevel@tonic-gate 	conn_t	*connp;
14057c478bd9Sstevel@tonic-gate 	uint16_t  *up;
140645916cd2Sjpk 	boolean_t shared_addr;
140745916cd2Sjpk 	boolean_t unlabeled;
14087c478bd9Sstevel@tonic-gate 
14097c478bd9Sstevel@tonic-gate 	ipha = (ipha_t *)mp->b_rptr;
14107c478bd9Sstevel@tonic-gate 	up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
14117c478bd9Sstevel@tonic-gate 
14127c478bd9Sstevel@tonic-gate 	switch (protocol) {
14137c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
14147c478bd9Sstevel@tonic-gate 		ports = *(uint32_t *)up;
14157c478bd9Sstevel@tonic-gate 		connfp =
1416*f4b3ec61Sdh 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1417*f4b3ec61Sdh 		    ports, ipst)];
14187c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
14197c478bd9Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
14207c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
14217c478bd9Sstevel@tonic-gate 			if (IPCL_CONN_MATCH(connp, protocol,
14227c478bd9Sstevel@tonic-gate 			    ipha->ipha_src, ipha->ipha_dst, ports))
14237c478bd9Sstevel@tonic-gate 				break;
14247c478bd9Sstevel@tonic-gate 		}
14257c478bd9Sstevel@tonic-gate 
14267c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
142745916cd2Sjpk 			/*
142845916cd2Sjpk 			 * We have a fully-bound TCP connection.
142945916cd2Sjpk 			 *
143045916cd2Sjpk 			 * For labeled systems, there's no need to check the
143145916cd2Sjpk 			 * label here.  It's known to be good as we checked
143245916cd2Sjpk 			 * before allowing the connection to become bound.
143345916cd2Sjpk 			 */
14347c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
14357c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
14367c478bd9Sstevel@tonic-gate 			return (connp);
14377c478bd9Sstevel@tonic-gate 		}
14387c478bd9Sstevel@tonic-gate 
14397c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
14407c478bd9Sstevel@tonic-gate 
14417c478bd9Sstevel@tonic-gate 		lport = up[1];
144245916cd2Sjpk 		unlabeled = B_FALSE;
144345916cd2Sjpk 		/* Cred cannot be null on IPv4 */
144445916cd2Sjpk 		if (is_system_labeled())
144545916cd2Sjpk 			unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags &
144645916cd2Sjpk 			    TSLF_UNLABELED) != 0;
144745916cd2Sjpk 		shared_addr = (zoneid == ALL_ZONES);
144845916cd2Sjpk 		if (shared_addr) {
1449*f4b3ec61Sdh 			/*
1450*f4b3ec61Sdh 			 * No need to handle exclusive-stack zones since
1451*f4b3ec61Sdh 			 * ALL_ZONES only applies to the shared stack.
1452*f4b3ec61Sdh 			 */
145345916cd2Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
145445916cd2Sjpk 			/*
145545916cd2Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
145645916cd2Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
145745916cd2Sjpk 			 * search for the zone based on the packet label.
145845916cd2Sjpk 			 *
145945916cd2Sjpk 			 * If there is such a zone, we prefer to find a
146045916cd2Sjpk 			 * connection in it.  Otherwise, we look for a
146145916cd2Sjpk 			 * MAC-exempt connection in any zone whose label
146245916cd2Sjpk 			 * dominates the default label on the packet.
146345916cd2Sjpk 			 */
146445916cd2Sjpk 			if (zoneid == ALL_ZONES)
146545916cd2Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
146645916cd2Sjpk 			else
146745916cd2Sjpk 				unlabeled = B_FALSE;
146845916cd2Sjpk 		}
146945916cd2Sjpk 
1470*f4b3ec61Sdh 		bind_connfp =
1471*f4b3ec61Sdh 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
14727c478bd9Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
14737c478bd9Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
14747c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
147545916cd2Sjpk 			if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
14765d0bc3edSsommerfe 			    lport) && (IPCL_ZONE_MATCH(connp, zoneid) ||
147745916cd2Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
14787c478bd9Sstevel@tonic-gate 				break;
14797c478bd9Sstevel@tonic-gate 		}
14807c478bd9Sstevel@tonic-gate 
148145916cd2Sjpk 		/*
148245916cd2Sjpk 		 * If the matching connection is SLP on a private address, then
148345916cd2Sjpk 		 * the label on the packet must match the local zone's label.
148445916cd2Sjpk 		 * Otherwise, it must be in the label range defined by tnrh.
148545916cd2Sjpk 		 * This is ensured by tsol_receive_label.
148645916cd2Sjpk 		 */
148745916cd2Sjpk 		if (connp != NULL && is_system_labeled() &&
148845916cd2Sjpk 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
148945916cd2Sjpk 		    shared_addr, connp)) {
149045916cd2Sjpk 				DTRACE_PROBE3(
149145916cd2Sjpk 				    tx__ip__log__info__classify__tcp,
149245916cd2Sjpk 				    char *,
149345916cd2Sjpk 				    "connp(1) could not receive mp(2)",
149445916cd2Sjpk 				    conn_t *, connp, mblk_t *, mp);
149545916cd2Sjpk 			connp = NULL;
149645916cd2Sjpk 		}
149745916cd2Sjpk 
14987c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
149945916cd2Sjpk 			/* Have a listener at least */
15007c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
15017c478bd9Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
15027c478bd9Sstevel@tonic-gate 			return (connp);
15037c478bd9Sstevel@tonic-gate 		}
15047c478bd9Sstevel@tonic-gate 
15057c478bd9Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
15067c478bd9Sstevel@tonic-gate 
15077c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
15087c478bd9Sstevel@tonic-gate 		    ("ipcl_classify: couldn't classify mp = %p\n",
15097c478bd9Sstevel@tonic-gate 		    (void *)mp));
15107c478bd9Sstevel@tonic-gate 		break;
15117c478bd9Sstevel@tonic-gate 
15127c478bd9Sstevel@tonic-gate 	case IPPROTO_UDP:
15137c478bd9Sstevel@tonic-gate 		lport = up[1];
151445916cd2Sjpk 		unlabeled = B_FALSE;
151545916cd2Sjpk 		/* Cred cannot be null on IPv4 */
151645916cd2Sjpk 		if (is_system_labeled())
151745916cd2Sjpk 			unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags &
151845916cd2Sjpk 			    TSLF_UNLABELED) != 0;
151945916cd2Sjpk 		shared_addr = (zoneid == ALL_ZONES);
152045916cd2Sjpk 		if (shared_addr) {
1521*f4b3ec61Sdh 			/*
1522*f4b3ec61Sdh 			 * No need to handle exclusive-stack zones since
1523*f4b3ec61Sdh 			 * ALL_ZONES only applies to the shared stack.
1524*f4b3ec61Sdh 			 */
152545916cd2Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
152645916cd2Sjpk 			/*
152745916cd2Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
152845916cd2Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
152945916cd2Sjpk 			 * search for the zone based on the packet label.
153045916cd2Sjpk 			 *
153145916cd2Sjpk 			 * If there is such a zone, we prefer to find a
153245916cd2Sjpk 			 * connection in it.  Otherwise, we look for a
153345916cd2Sjpk 			 * MAC-exempt connection in any zone whose label
153445916cd2Sjpk 			 * dominates the default label on the packet.
153545916cd2Sjpk 			 */
153645916cd2Sjpk 			if (zoneid == ALL_ZONES)
153745916cd2Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
153845916cd2Sjpk 			else
153945916cd2Sjpk 				unlabeled = B_FALSE;
154045916cd2Sjpk 		}
15417c478bd9Sstevel@tonic-gate 		fport = up[0];
15427c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport));
1543*f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
15447c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
15457c478bd9Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
15467c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
15477c478bd9Sstevel@tonic-gate 			if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
15487c478bd9Sstevel@tonic-gate 			    fport, ipha->ipha_src) &&
15495d0bc3edSsommerfe 			    (IPCL_ZONE_MATCH(connp, zoneid) ||
155045916cd2Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
15517c478bd9Sstevel@tonic-gate 				break;
15527c478bd9Sstevel@tonic-gate 		}
15537c478bd9Sstevel@tonic-gate 
155445916cd2Sjpk 		if (connp != NULL && is_system_labeled() &&
155545916cd2Sjpk 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
155645916cd2Sjpk 		    shared_addr, connp)) {
155745916cd2Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__udp,
155845916cd2Sjpk 			    char *, "connp(1) could not receive mp(2)",
155945916cd2Sjpk 			    conn_t *, connp, mblk_t *, mp);
156045916cd2Sjpk 			connp = NULL;
156145916cd2Sjpk 		}
156245916cd2Sjpk 
15637c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
15647c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
15657c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
15667c478bd9Sstevel@tonic-gate 			return (connp);
15677c478bd9Sstevel@tonic-gate 		}
15687c478bd9Sstevel@tonic-gate 
15697c478bd9Sstevel@tonic-gate 		/*
15707c478bd9Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
15717c478bd9Sstevel@tonic-gate 		 */
15727c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
15737c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
15747c478bd9Sstevel@tonic-gate 		    ("ipcl_classify: cant find udp conn_t for ports : %x %x",
15757c478bd9Sstevel@tonic-gate 		    lport, fport));
15767c478bd9Sstevel@tonic-gate 		break;
15777c478bd9Sstevel@tonic-gate 	}
15787c478bd9Sstevel@tonic-gate 
15797c478bd9Sstevel@tonic-gate 	return (NULL);
15807c478bd9Sstevel@tonic-gate }
15817c478bd9Sstevel@tonic-gate 
15827c478bd9Sstevel@tonic-gate conn_t *
1583*f4b3ec61Sdh ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid,
1584*f4b3ec61Sdh     ip_stack_t *ipst)
15857c478bd9Sstevel@tonic-gate {
15867c478bd9Sstevel@tonic-gate 	ip6_t		*ip6h;
15877c478bd9Sstevel@tonic-gate 	connf_t		*connfp, *bind_connfp;
15887c478bd9Sstevel@tonic-gate 	uint16_t	lport;
15897c478bd9Sstevel@tonic-gate 	uint16_t	fport;
15907c478bd9Sstevel@tonic-gate 	tcph_t		*tcph;
15917c478bd9Sstevel@tonic-gate 	uint32_t	ports;
15927c478bd9Sstevel@tonic-gate 	conn_t		*connp;
15937c478bd9Sstevel@tonic-gate 	uint16_t	*up;
159445916cd2Sjpk 	boolean_t	shared_addr;
159545916cd2Sjpk 	boolean_t	unlabeled;
15967c478bd9Sstevel@tonic-gate 
15977c478bd9Sstevel@tonic-gate 	ip6h = (ip6_t *)mp->b_rptr;
15987c478bd9Sstevel@tonic-gate 
15997c478bd9Sstevel@tonic-gate 	switch (protocol) {
16007c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
16017c478bd9Sstevel@tonic-gate 		tcph = (tcph_t *)&mp->b_rptr[hdr_len];
16027c478bd9Sstevel@tonic-gate 		up = (uint16_t *)tcph->th_lport;
16037c478bd9Sstevel@tonic-gate 		ports = *(uint32_t *)up;
16047c478bd9Sstevel@tonic-gate 
16057c478bd9Sstevel@tonic-gate 		connfp =
1606*f4b3ec61Sdh 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1607*f4b3ec61Sdh 		    ports, ipst)];
16087c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
16097c478bd9Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
16107c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
16117c478bd9Sstevel@tonic-gate 			if (IPCL_CONN_MATCH_V6(connp, protocol,
16127c478bd9Sstevel@tonic-gate 			    ip6h->ip6_src, ip6h->ip6_dst, ports))
16137c478bd9Sstevel@tonic-gate 				break;
16147c478bd9Sstevel@tonic-gate 		}
16157c478bd9Sstevel@tonic-gate 
16167c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
161745916cd2Sjpk 			/*
161845916cd2Sjpk 			 * We have a fully-bound TCP connection.
161945916cd2Sjpk 			 *
162045916cd2Sjpk 			 * For labeled systems, there's no need to check the
162145916cd2Sjpk 			 * label here.  It's known to be good as we checked
162245916cd2Sjpk 			 * before allowing the connection to become bound.
162345916cd2Sjpk 			 */
16247c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
16257c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
16267c478bd9Sstevel@tonic-gate 			return (connp);
16277c478bd9Sstevel@tonic-gate 		}
16287c478bd9Sstevel@tonic-gate 
16297c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
16307c478bd9Sstevel@tonic-gate 
16317c478bd9Sstevel@tonic-gate 		lport = up[1];
163245916cd2Sjpk 		unlabeled = B_FALSE;
163345916cd2Sjpk 		/* Cred can be null on IPv6 */
163445916cd2Sjpk 		if (is_system_labeled()) {
163545916cd2Sjpk 			cred_t *cr = DB_CRED(mp);
163645916cd2Sjpk 
163745916cd2Sjpk 			unlabeled = (cr != NULL &&
163845916cd2Sjpk 			    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
163945916cd2Sjpk 		}
164045916cd2Sjpk 		shared_addr = (zoneid == ALL_ZONES);
164145916cd2Sjpk 		if (shared_addr) {
1642*f4b3ec61Sdh 			/*
1643*f4b3ec61Sdh 			 * No need to handle exclusive-stack zones since
1644*f4b3ec61Sdh 			 * ALL_ZONES only applies to the shared stack.
1645*f4b3ec61Sdh 			 */
164645916cd2Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
164745916cd2Sjpk 			/*
164845916cd2Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
164945916cd2Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
165045916cd2Sjpk 			 * search for the zone based on the packet label.
165145916cd2Sjpk 			 *
165245916cd2Sjpk 			 * If there is such a zone, we prefer to find a
165345916cd2Sjpk 			 * connection in it.  Otherwise, we look for a
165445916cd2Sjpk 			 * MAC-exempt connection in any zone whose label
165545916cd2Sjpk 			 * dominates the default label on the packet.
165645916cd2Sjpk 			 */
165745916cd2Sjpk 			if (zoneid == ALL_ZONES)
165845916cd2Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
165945916cd2Sjpk 			else
166045916cd2Sjpk 				unlabeled = B_FALSE;
166145916cd2Sjpk 		}
166245916cd2Sjpk 
1663*f4b3ec61Sdh 		bind_connfp =
1664*f4b3ec61Sdh 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
16657c478bd9Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
16667c478bd9Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
16677c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
16687c478bd9Sstevel@tonic-gate 			if (IPCL_BIND_MATCH_V6(connp, protocol,
16697c478bd9Sstevel@tonic-gate 			    ip6h->ip6_dst, lport) &&
16705d0bc3edSsommerfe 			    (IPCL_ZONE_MATCH(connp, zoneid) ||
167145916cd2Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
16727c478bd9Sstevel@tonic-gate 				break;
16737c478bd9Sstevel@tonic-gate 		}
16747c478bd9Sstevel@tonic-gate 
167545916cd2Sjpk 		if (connp != NULL && is_system_labeled() &&
167645916cd2Sjpk 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
167745916cd2Sjpk 		    shared_addr, connp)) {
167845916cd2Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
167945916cd2Sjpk 			    char *, "connp(1) could not receive mp(2)",
168045916cd2Sjpk 			    conn_t *, connp, mblk_t *, mp);
168145916cd2Sjpk 			connp = NULL;
168245916cd2Sjpk 		}
168345916cd2Sjpk 
16847c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
16857c478bd9Sstevel@tonic-gate 			/* Have a listner at least */
16867c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
16877c478bd9Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
16887c478bd9Sstevel@tonic-gate 			IPCL_DEBUG_LVL(512,
16897c478bd9Sstevel@tonic-gate 			    ("ipcl_classify_v6: found listner "
16907c478bd9Sstevel@tonic-gate 			    "connp = %p\n", (void *)connp));
16917c478bd9Sstevel@tonic-gate 
16927c478bd9Sstevel@tonic-gate 			return (connp);
16937c478bd9Sstevel@tonic-gate 		}
16947c478bd9Sstevel@tonic-gate 
16957c478bd9Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
16967c478bd9Sstevel@tonic-gate 
16977c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
16987c478bd9Sstevel@tonic-gate 		    ("ipcl_classify_v6: couldn't classify mp = %p\n",
16997c478bd9Sstevel@tonic-gate 		    (void *)mp));
17007c478bd9Sstevel@tonic-gate 		break;
17017c478bd9Sstevel@tonic-gate 
17027c478bd9Sstevel@tonic-gate 	case IPPROTO_UDP:
17037c478bd9Sstevel@tonic-gate 		up = (uint16_t *)&mp->b_rptr[hdr_len];
17047c478bd9Sstevel@tonic-gate 		lport = up[1];
170545916cd2Sjpk 		unlabeled = B_FALSE;
170645916cd2Sjpk 		/* Cred can be null on IPv6 */
170745916cd2Sjpk 		if (is_system_labeled()) {
170845916cd2Sjpk 			cred_t *cr = DB_CRED(mp);
170945916cd2Sjpk 
171045916cd2Sjpk 			unlabeled = (cr != NULL &&
171145916cd2Sjpk 			    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
171245916cd2Sjpk 		}
171345916cd2Sjpk 		shared_addr = (zoneid == ALL_ZONES);
171445916cd2Sjpk 		if (shared_addr) {
1715*f4b3ec61Sdh 			/*
1716*f4b3ec61Sdh 			 * No need to handle exclusive-stack zones since
1717*f4b3ec61Sdh 			 * ALL_ZONES only applies to the shared stack.
1718*f4b3ec61Sdh 			 */
171945916cd2Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
172045916cd2Sjpk 			/*
172145916cd2Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
172245916cd2Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
172345916cd2Sjpk 			 * search for the zone based on the packet label.
172445916cd2Sjpk 			 *
172545916cd2Sjpk 			 * If there is such a zone, we prefer to find a
172645916cd2Sjpk 			 * connection in it.  Otherwise, we look for a
172745916cd2Sjpk 			 * MAC-exempt connection in any zone whose label
172845916cd2Sjpk 			 * dominates the default label on the packet.
172945916cd2Sjpk 			 */
173045916cd2Sjpk 			if (zoneid == ALL_ZONES)
173145916cd2Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
173245916cd2Sjpk 			else
173345916cd2Sjpk 				unlabeled = B_FALSE;
173445916cd2Sjpk 		}
173545916cd2Sjpk 
17367c478bd9Sstevel@tonic-gate 		fport = up[0];
17377c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport,
17387c478bd9Sstevel@tonic-gate 		    fport));
1739*f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
17407c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
17417c478bd9Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
17427c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
17437c478bd9Sstevel@tonic-gate 			if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
17447c478bd9Sstevel@tonic-gate 			    fport, ip6h->ip6_src) &&
17455d0bc3edSsommerfe 			    (IPCL_ZONE_MATCH(connp, zoneid) ||
174645916cd2Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
17477c478bd9Sstevel@tonic-gate 				break;
17487c478bd9Sstevel@tonic-gate 		}
17497c478bd9Sstevel@tonic-gate 
175045916cd2Sjpk 		if (connp != NULL && is_system_labeled() &&
175145916cd2Sjpk 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
175245916cd2Sjpk 		    shared_addr, connp)) {
175345916cd2Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
175445916cd2Sjpk 			    char *, "connp(1) could not receive mp(2)",
175545916cd2Sjpk 			    conn_t *, connp, mblk_t *, mp);
175645916cd2Sjpk 			connp = NULL;
175745916cd2Sjpk 		}
175845916cd2Sjpk 
17597c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
17607c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
17617c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
17627c478bd9Sstevel@tonic-gate 			return (connp);
17637c478bd9Sstevel@tonic-gate 		}
17647c478bd9Sstevel@tonic-gate 
17657c478bd9Sstevel@tonic-gate 		/*
17667c478bd9Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
17677c478bd9Sstevel@tonic-gate 		 */
17687c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
17697c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
17707c478bd9Sstevel@tonic-gate 		    ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x",
17717c478bd9Sstevel@tonic-gate 		    lport, fport));
17727c478bd9Sstevel@tonic-gate 		break;
17737c478bd9Sstevel@tonic-gate 	}
17747c478bd9Sstevel@tonic-gate 
17757c478bd9Sstevel@tonic-gate 	return (NULL);
17767c478bd9Sstevel@tonic-gate }
17777c478bd9Sstevel@tonic-gate 
17787c478bd9Sstevel@tonic-gate /*
17797c478bd9Sstevel@tonic-gate  * wrapper around ipcl_classify_(v4,v6) routines.
17807c478bd9Sstevel@tonic-gate  */
17817c478bd9Sstevel@tonic-gate conn_t *
1782*f4b3ec61Sdh ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst)
17837c478bd9Sstevel@tonic-gate {
17847c478bd9Sstevel@tonic-gate 	uint16_t	hdr_len;
17857c478bd9Sstevel@tonic-gate 	ipha_t		*ipha;
17867c478bd9Sstevel@tonic-gate 	uint8_t		*nexthdrp;
17877c478bd9Sstevel@tonic-gate 
17887c478bd9Sstevel@tonic-gate 	if (MBLKL(mp) < sizeof (ipha_t))
17897c478bd9Sstevel@tonic-gate 		return (NULL);
17907c478bd9Sstevel@tonic-gate 
17917c478bd9Sstevel@tonic-gate 	switch (IPH_HDR_VERSION(mp->b_rptr)) {
17927c478bd9Sstevel@tonic-gate 	case IPV4_VERSION:
17937c478bd9Sstevel@tonic-gate 		ipha = (ipha_t *)mp->b_rptr;
17947c478bd9Sstevel@tonic-gate 		hdr_len = IPH_HDR_LENGTH(ipha);
17957c478bd9Sstevel@tonic-gate 		return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len,
1796*f4b3ec61Sdh 		    zoneid, ipst));
17977c478bd9Sstevel@tonic-gate 	case IPV6_VERSION:
17987c478bd9Sstevel@tonic-gate 		if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr,
17997c478bd9Sstevel@tonic-gate 		    &hdr_len, &nexthdrp))
18007c478bd9Sstevel@tonic-gate 			return (NULL);
18017c478bd9Sstevel@tonic-gate 
1802*f4b3ec61Sdh 		return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst));
18037c478bd9Sstevel@tonic-gate 	}
18047c478bd9Sstevel@tonic-gate 
18057c478bd9Sstevel@tonic-gate 	return (NULL);
18067c478bd9Sstevel@tonic-gate }
18077c478bd9Sstevel@tonic-gate 
18087c478bd9Sstevel@tonic-gate conn_t *
180945916cd2Sjpk ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid,
1810*f4b3ec61Sdh     uint32_t ports, ipha_t *hdr, ip_stack_t *ipst)
18117c478bd9Sstevel@tonic-gate {
181245916cd2Sjpk 	connf_t		*connfp;
18137c478bd9Sstevel@tonic-gate 	conn_t		*connp;
18147c478bd9Sstevel@tonic-gate 	in_port_t	lport;
18157c478bd9Sstevel@tonic-gate 	int		af;
181645916cd2Sjpk 	boolean_t	shared_addr;
181745916cd2Sjpk 	boolean_t	unlabeled;
181845916cd2Sjpk 	const void	*dst;
18197c478bd9Sstevel@tonic-gate 
18207c478bd9Sstevel@tonic-gate 	lport = ((uint16_t *)&ports)[1];
182145916cd2Sjpk 
182245916cd2Sjpk 	unlabeled = B_FALSE;
182345916cd2Sjpk 	/* Cred can be null on IPv6 */
182445916cd2Sjpk 	if (is_system_labeled()) {
182545916cd2Sjpk 		cred_t *cr = DB_CRED(mp);
182645916cd2Sjpk 
182745916cd2Sjpk 		unlabeled = (cr != NULL &&
182845916cd2Sjpk 		    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
182945916cd2Sjpk 	}
183045916cd2Sjpk 	shared_addr = (zoneid == ALL_ZONES);
183145916cd2Sjpk 	if (shared_addr) {
1832*f4b3ec61Sdh 		/*
1833*f4b3ec61Sdh 		 * No need to handle exclusive-stack zones since ALL_ZONES
1834*f4b3ec61Sdh 		 * only applies to the shared stack.
1835*f4b3ec61Sdh 		 */
183645916cd2Sjpk 		zoneid = tsol_mlp_findzone(protocol, lport);
183745916cd2Sjpk 		/*
183845916cd2Sjpk 		 * If no shared MLP is found, tsol_mlp_findzone returns
183945916cd2Sjpk 		 * ALL_ZONES.  In that case, we assume it's SLP, and search for
184045916cd2Sjpk 		 * the zone based on the packet label.
184145916cd2Sjpk 		 *
184245916cd2Sjpk 		 * If there is such a zone, we prefer to find a connection in
184345916cd2Sjpk 		 * it.  Otherwise, we look for a MAC-exempt connection in any
184445916cd2Sjpk 		 * zone whose label dominates the default label on the packet.
184545916cd2Sjpk 		 */
184645916cd2Sjpk 		if (zoneid == ALL_ZONES)
184745916cd2Sjpk 			zoneid = tsol_packet_to_zoneid(mp);
184845916cd2Sjpk 		else
184945916cd2Sjpk 			unlabeled = B_FALSE;
185045916cd2Sjpk 	}
185145916cd2Sjpk 
18527c478bd9Sstevel@tonic-gate 	af = IPH_HDR_VERSION(hdr);
185345916cd2Sjpk 	dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst :
185445916cd2Sjpk 	    (const void *)&((ip6_t *)hdr)->ip6_dst;
1855*f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
18567c478bd9Sstevel@tonic-gate 
18577c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
18587c478bd9Sstevel@tonic-gate 	for (connp = connfp->connf_head; connp != NULL;
18597c478bd9Sstevel@tonic-gate 	    connp = connp->conn_next) {
18607c478bd9Sstevel@tonic-gate 		/* We don't allow v4 fallback for v6 raw socket. */
186145916cd2Sjpk 		if (af == (connp->conn_af_isv6 ? IPV4_VERSION :
186245916cd2Sjpk 		    IPV6_VERSION))
18637c478bd9Sstevel@tonic-gate 			continue;
18647c478bd9Sstevel@tonic-gate 		if (connp->conn_fully_bound) {
18657c478bd9Sstevel@tonic-gate 			if (af == IPV4_VERSION) {
186645916cd2Sjpk 				if (!IPCL_CONN_MATCH(connp, protocol,
186745916cd2Sjpk 				    hdr->ipha_src, hdr->ipha_dst, ports))
186845916cd2Sjpk 					continue;
18697c478bd9Sstevel@tonic-gate 			} else {
187045916cd2Sjpk 				if (!IPCL_CONN_MATCH_V6(connp, protocol,
18717c478bd9Sstevel@tonic-gate 				    ((ip6_t *)hdr)->ip6_src,
187245916cd2Sjpk 				    ((ip6_t *)hdr)->ip6_dst, ports))
187345916cd2Sjpk 					continue;
18747c478bd9Sstevel@tonic-gate 			}
18757c478bd9Sstevel@tonic-gate 		} else {
18767c478bd9Sstevel@tonic-gate 			if (af == IPV4_VERSION) {
187745916cd2Sjpk 				if (!IPCL_BIND_MATCH(connp, protocol,
187845916cd2Sjpk 				    hdr->ipha_dst, lport))
187945916cd2Sjpk 					continue;
18807c478bd9Sstevel@tonic-gate 			} else {
188145916cd2Sjpk 				if (!IPCL_BIND_MATCH_V6(connp, protocol,
188245916cd2Sjpk 				    ((ip6_t *)hdr)->ip6_dst, lport))
188345916cd2Sjpk 					continue;
18847c478bd9Sstevel@tonic-gate 			}
18857c478bd9Sstevel@tonic-gate 		}
188645916cd2Sjpk 
18875d0bc3edSsommerfe 		if (IPCL_ZONE_MATCH(connp, zoneid) ||
188845916cd2Sjpk 		    (unlabeled && connp->conn_mac_exempt))
188945916cd2Sjpk 			break;
189045916cd2Sjpk 	}
189145916cd2Sjpk 	/*
189245916cd2Sjpk 	 * If the connection is fully-bound and connection-oriented (TCP or
189345916cd2Sjpk 	 * SCTP), then we've already validated the remote system's label.
189445916cd2Sjpk 	 * There's no need to do it again for every packet.
189545916cd2Sjpk 	 */
189645916cd2Sjpk 	if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound ||
189745916cd2Sjpk 	    !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) &&
189845916cd2Sjpk 	    !tsol_receive_local(mp, dst, af, shared_addr, connp)) {
189945916cd2Sjpk 		DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
190045916cd2Sjpk 		    char *, "connp(1) could not receive mp(2)",
190145916cd2Sjpk 		    conn_t *, connp, mblk_t *, mp);
190245916cd2Sjpk 		connp = NULL;
19037c478bd9Sstevel@tonic-gate 	}
19047c0c0508Skcpoon 
19057c0c0508Skcpoon 	if (connp != NULL)
19067c0c0508Skcpoon 		goto found;
19077c0c0508Skcpoon 	mutex_exit(&connfp->connf_lock);
19087c0c0508Skcpoon 
19097c0c0508Skcpoon 	/* Try to look for a wildcard match. */
1910*f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
19117c0c0508Skcpoon 	mutex_enter(&connfp->connf_lock);
19127c0c0508Skcpoon 	for (connp = connfp->connf_head; connp != NULL;
19137c0c0508Skcpoon 	    connp = connp->conn_next) {
19147c0c0508Skcpoon 		/* We don't allow v4 fallback for v6 raw socket. */
19157c0c0508Skcpoon 		if ((af == (connp->conn_af_isv6 ? IPV4_VERSION :
19165d0bc3edSsommerfe 		    IPV6_VERSION)) || !IPCL_ZONE_MATCH(connp, zoneid)) {
19177c0c0508Skcpoon 			continue;
19187c0c0508Skcpoon 		}
19197c0c0508Skcpoon 		if (af == IPV4_VERSION) {
19207c0c0508Skcpoon 			if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst))
19217c0c0508Skcpoon 				break;
19227c0c0508Skcpoon 		} else {
19237c0c0508Skcpoon 			if (IPCL_RAW_MATCH_V6(connp, protocol,
19247c0c0508Skcpoon 			    ((ip6_t *)hdr)->ip6_dst)) {
19257c0c0508Skcpoon 				break;
19267c0c0508Skcpoon 			}
19277c0c0508Skcpoon 		}
19287c478bd9Sstevel@tonic-gate 	}
19297c0c0508Skcpoon 
19307c0c0508Skcpoon 	if (connp != NULL)
19317c0c0508Skcpoon 		goto found;
19327c0c0508Skcpoon 
19337c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
19347c478bd9Sstevel@tonic-gate 	return (NULL);
19357c0c0508Skcpoon 
19367c0c0508Skcpoon found:
19377c0c0508Skcpoon 	ASSERT(connp != NULL);
19387c0c0508Skcpoon 	CONN_INC_REF(connp);
19397c0c0508Skcpoon 	mutex_exit(&connfp->connf_lock);
19407c0c0508Skcpoon 	return (connp);
19417c478bd9Sstevel@tonic-gate }
19427c478bd9Sstevel@tonic-gate 
19437c478bd9Sstevel@tonic-gate /* ARGSUSED */
19447c478bd9Sstevel@tonic-gate static int
19457c478bd9Sstevel@tonic-gate ipcl_tcpconn_constructor(void *buf, void *cdrarg, int kmflags)
19467c478bd9Sstevel@tonic-gate {
19477c478bd9Sstevel@tonic-gate 	itc_t	*itc = (itc_t *)buf;
19487c478bd9Sstevel@tonic-gate 	conn_t 	*connp = &itc->itc_conn;
19497c478bd9Sstevel@tonic-gate 	tcp_t	*tcp = &itc->itc_tcp;
19507c478bd9Sstevel@tonic-gate 	bzero(itc, sizeof (itc_t));
19517c478bd9Sstevel@tonic-gate 	tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP);
19527c478bd9Sstevel@tonic-gate 	connp->conn_tcp = tcp;
19537c478bd9Sstevel@tonic-gate 	connp->conn_flags = IPCL_TCPCONN;
19547c478bd9Sstevel@tonic-gate 	connp->conn_ulp = IPPROTO_TCP;
19557c478bd9Sstevel@tonic-gate 	tcp->tcp_connp = connp;
19567c478bd9Sstevel@tonic-gate 	return (0);
19577c478bd9Sstevel@tonic-gate }
19587c478bd9Sstevel@tonic-gate 
19597c478bd9Sstevel@tonic-gate /* ARGSUSED */
19607c478bd9Sstevel@tonic-gate static void
19617c478bd9Sstevel@tonic-gate ipcl_tcpconn_destructor(void *buf, void *cdrarg)
19627c478bd9Sstevel@tonic-gate {
19637c478bd9Sstevel@tonic-gate 	tcp_timermp_free(((conn_t *)buf)->conn_tcp);
19647c478bd9Sstevel@tonic-gate }
19657c478bd9Sstevel@tonic-gate 
19667c478bd9Sstevel@tonic-gate /*
19677c478bd9Sstevel@tonic-gate  * All conns are inserted in a global multi-list for the benefit of
19687c478bd9Sstevel@tonic-gate  * walkers. The walk is guaranteed to walk all open conns at the time
19697c478bd9Sstevel@tonic-gate  * of the start of the walk exactly once. This property is needed to
19707c478bd9Sstevel@tonic-gate  * achieve some cleanups during unplumb of interfaces. This is achieved
19717c478bd9Sstevel@tonic-gate  * as follows.
19727c478bd9Sstevel@tonic-gate  *
19737c478bd9Sstevel@tonic-gate  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
19747c478bd9Sstevel@tonic-gate  * call the insert and delete functions below at creation and deletion
19757c478bd9Sstevel@tonic-gate  * time respectively. The conn never moves or changes its position in this
19767c478bd9Sstevel@tonic-gate  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
19777c478bd9Sstevel@tonic-gate  * won't increase due to walkers, once the conn deletion has started. Note
19787c478bd9Sstevel@tonic-gate  * that we can't remove the conn from the global list and then wait for
19797c478bd9Sstevel@tonic-gate  * the refcnt to drop to zero, since walkers would then see a truncated
19807c478bd9Sstevel@tonic-gate  * list. CONN_INCIPIENT ensures that walkers don't start looking at
19817c478bd9Sstevel@tonic-gate  * conns until ip_open is ready to make them globally visible.
19827c478bd9Sstevel@tonic-gate  * The global round robin multi-list locks are held only to get the
19837c478bd9Sstevel@tonic-gate  * next member/insertion/deletion and contention should be negligible
19847c478bd9Sstevel@tonic-gate  * if the multi-list is much greater than the number of cpus.
19857c478bd9Sstevel@tonic-gate  */
19867c478bd9Sstevel@tonic-gate void
19877c478bd9Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp)
19887c478bd9Sstevel@tonic-gate {
19897c478bd9Sstevel@tonic-gate 	int	index;
1990*f4b3ec61Sdh 	struct connf_s	*connfp;
1991*f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
19927c478bd9Sstevel@tonic-gate 
19937c478bd9Sstevel@tonic-gate 	/*
19947c478bd9Sstevel@tonic-gate 	 * No need for atomic here. Approximate even distribution
19957c478bd9Sstevel@tonic-gate 	 * in the global lists is sufficient.
19967c478bd9Sstevel@tonic-gate 	 */
1997*f4b3ec61Sdh 	ipst->ips_conn_g_index++;
1998*f4b3ec61Sdh 	index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
19997c478bd9Sstevel@tonic-gate 
20007c478bd9Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
20017c478bd9Sstevel@tonic-gate 	/*
20027c478bd9Sstevel@tonic-gate 	 * Mark as INCIPIENT, so that walkers will ignore this
20037c478bd9Sstevel@tonic-gate 	 * for now, till ip_open is ready to make it visible globally.
20047c478bd9Sstevel@tonic-gate 	 */
20057c478bd9Sstevel@tonic-gate 	connp->conn_state_flags |= CONN_INCIPIENT;
20067c478bd9Sstevel@tonic-gate 
2007*f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_globalhash_fanout[index];
20087c478bd9Sstevel@tonic-gate 	/* Insert at the head of the list */
2009*f4b3ec61Sdh 	mutex_enter(&connfp->connf_lock);
2010*f4b3ec61Sdh 	connp->conn_g_next = connfp->connf_head;
20117c478bd9Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
20127c478bd9Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp;
2013*f4b3ec61Sdh 	connfp->connf_head = connp;
20147c478bd9Sstevel@tonic-gate 
20157c478bd9Sstevel@tonic-gate 	/* The fanout bucket this conn points to */
2016*f4b3ec61Sdh 	connp->conn_g_fanout = connfp;
20177c478bd9Sstevel@tonic-gate 
2018*f4b3ec61Sdh 	mutex_exit(&connfp->connf_lock);
20197c478bd9Sstevel@tonic-gate }
20207c478bd9Sstevel@tonic-gate 
20217c478bd9Sstevel@tonic-gate void
20227c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp)
20237c478bd9Sstevel@tonic-gate {
2024*f4b3ec61Sdh 	struct connf_s	*connfp;
2025*f4b3ec61Sdh 
20267c478bd9Sstevel@tonic-gate 	/*
20277c478bd9Sstevel@tonic-gate 	 * We were never inserted in the global multi list.
20287c478bd9Sstevel@tonic-gate 	 * IPCL_NONE variety is never inserted in the global multilist
20297c478bd9Sstevel@tonic-gate 	 * since it is presumed to not need any cleanup and is transient.
20307c478bd9Sstevel@tonic-gate 	 */
20317c478bd9Sstevel@tonic-gate 	if (connp->conn_g_fanout == NULL)
20327c478bd9Sstevel@tonic-gate 		return;
20337c478bd9Sstevel@tonic-gate 
2034*f4b3ec61Sdh 	connfp = connp->conn_g_fanout;
2035*f4b3ec61Sdh 	mutex_enter(&connfp->connf_lock);
20367c478bd9Sstevel@tonic-gate 	if (connp->conn_g_prev != NULL)
20377c478bd9Sstevel@tonic-gate 		connp->conn_g_prev->conn_g_next = connp->conn_g_next;
20387c478bd9Sstevel@tonic-gate 	else
2039*f4b3ec61Sdh 		connfp->connf_head = connp->conn_g_next;
20407c478bd9Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
20417c478bd9Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2042*f4b3ec61Sdh 	mutex_exit(&connfp->connf_lock);
20437c478bd9Sstevel@tonic-gate 
20447c478bd9Sstevel@tonic-gate 	/* Better to stumble on a null pointer than to corrupt memory */
20457c478bd9Sstevel@tonic-gate 	connp->conn_g_next = NULL;
20467c478bd9Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
20477c478bd9Sstevel@tonic-gate }
20487c478bd9Sstevel@tonic-gate 
20497c478bd9Sstevel@tonic-gate /*
20507c478bd9Sstevel@tonic-gate  * Walk the list of all conn_t's in the system, calling the function provided
20517c478bd9Sstevel@tonic-gate  * with the specified argument for each.
20527c478bd9Sstevel@tonic-gate  * Applies to both IPv4 and IPv6.
20537c478bd9Sstevel@tonic-gate  *
20547c478bd9Sstevel@tonic-gate  * IPCs may hold pointers to ipif/ill. To guard against stale pointers
20557c478bd9Sstevel@tonic-gate  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
20567c478bd9Sstevel@tonic-gate  * unplumbed or removed. New conn_t's that are created while we are walking
20577c478bd9Sstevel@tonic-gate  * may be missed by this walk, because they are not necessarily inserted
20587c478bd9Sstevel@tonic-gate  * at the tail of the list. They are new conn_t's and thus don't have any
20597c478bd9Sstevel@tonic-gate  * stale pointers. The CONN_CLOSING flag ensures that no new reference
20607c478bd9Sstevel@tonic-gate  * is created to the struct that is going away.
20617c478bd9Sstevel@tonic-gate  */
20627c478bd9Sstevel@tonic-gate void
2063*f4b3ec61Sdh ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
20647c478bd9Sstevel@tonic-gate {
20657c478bd9Sstevel@tonic-gate 	int	i;
20667c478bd9Sstevel@tonic-gate 	conn_t	*connp;
20677c478bd9Sstevel@tonic-gate 	conn_t	*prev_connp;
20687c478bd9Sstevel@tonic-gate 
20697c478bd9Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2070*f4b3ec61Sdh 		mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
20717c478bd9Sstevel@tonic-gate 		prev_connp = NULL;
2072*f4b3ec61Sdh 		connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
20737c478bd9Sstevel@tonic-gate 		while (connp != NULL) {
20747c478bd9Sstevel@tonic-gate 			mutex_enter(&connp->conn_lock);
20757c478bd9Sstevel@tonic-gate 			if (connp->conn_state_flags &
20767c478bd9Sstevel@tonic-gate 			    (CONN_CONDEMNED | CONN_INCIPIENT)) {
20777c478bd9Sstevel@tonic-gate 				mutex_exit(&connp->conn_lock);
20787c478bd9Sstevel@tonic-gate 				connp = connp->conn_g_next;
20797c478bd9Sstevel@tonic-gate 				continue;
20807c478bd9Sstevel@tonic-gate 			}
20817c478bd9Sstevel@tonic-gate 			CONN_INC_REF_LOCKED(connp);
20827c478bd9Sstevel@tonic-gate 			mutex_exit(&connp->conn_lock);
2083*f4b3ec61Sdh 			mutex_exit(
2084*f4b3ec61Sdh 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
20857c478bd9Sstevel@tonic-gate 			(*func)(connp, arg);
20867c478bd9Sstevel@tonic-gate 			if (prev_connp != NULL)
20877c478bd9Sstevel@tonic-gate 				CONN_DEC_REF(prev_connp);
2088*f4b3ec61Sdh 			mutex_enter(
2089*f4b3ec61Sdh 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
20907c478bd9Sstevel@tonic-gate 			prev_connp = connp;
20917c478bd9Sstevel@tonic-gate 			connp = connp->conn_g_next;
20927c478bd9Sstevel@tonic-gate 		}
2093*f4b3ec61Sdh 		mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
20947c478bd9Sstevel@tonic-gate 		if (prev_connp != NULL)
20957c478bd9Sstevel@tonic-gate 			CONN_DEC_REF(prev_connp);
20967c478bd9Sstevel@tonic-gate 	}
20977c478bd9Sstevel@tonic-gate }
20987c478bd9Sstevel@tonic-gate 
20997c478bd9Sstevel@tonic-gate /*
21007c478bd9Sstevel@tonic-gate  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
21017c478bd9Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
21027c478bd9Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2103d0ab37afSethindra  * (peer tcp in ESTABLISHED state).
21047c478bd9Sstevel@tonic-gate  */
21057c478bd9Sstevel@tonic-gate conn_t *
2106*f4b3ec61Sdh ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph,
2107*f4b3ec61Sdh     ip_stack_t *ipst)
21087c478bd9Sstevel@tonic-gate {
21097c478bd9Sstevel@tonic-gate 	uint32_t ports;
21107c478bd9Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
21117c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
21127c478bd9Sstevel@tonic-gate 	conn_t	*tconnp;
21137c478bd9Sstevel@tonic-gate 	boolean_t zone_chk;
21147c478bd9Sstevel@tonic-gate 
21157c478bd9Sstevel@tonic-gate 	/*
21167c478bd9Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
21177c478bd9Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
21187c478bd9Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
21197c478bd9Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.
21207c478bd9Sstevel@tonic-gate 	 */
21217c478bd9Sstevel@tonic-gate 	zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
21227c478bd9Sstevel@tonic-gate 	    ipha->ipha_dst == htonl(INADDR_LOOPBACK));
21237c478bd9Sstevel@tonic-gate 
21247c478bd9Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
21257c478bd9Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
21267c478bd9Sstevel@tonic-gate 
2127*f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2128*f4b3ec61Sdh 	    ports, ipst)];
21297c478bd9Sstevel@tonic-gate 
21307c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
21317c478bd9Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
21327c478bd9Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
21337c478bd9Sstevel@tonic-gate 
21347c478bd9Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
21357c478bd9Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
2136d0ab37afSethindra 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
21377c478bd9Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
21387c478bd9Sstevel@tonic-gate 
21397c478bd9Sstevel@tonic-gate 			ASSERT(tconnp != connp);
21407c478bd9Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
21417c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
21427c478bd9Sstevel@tonic-gate 			return (tconnp);
21437c478bd9Sstevel@tonic-gate 		}
21447c478bd9Sstevel@tonic-gate 	}
21457c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
21467c478bd9Sstevel@tonic-gate 	return (NULL);
21477c478bd9Sstevel@tonic-gate }
21487c478bd9Sstevel@tonic-gate 
21497c478bd9Sstevel@tonic-gate /*
21507c478bd9Sstevel@tonic-gate  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
21517c478bd9Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
21527c478bd9Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2153d0ab37afSethindra  * (peer tcp in ESTABLISHED state).
21547c478bd9Sstevel@tonic-gate  */
21557c478bd9Sstevel@tonic-gate conn_t *
2156*f4b3ec61Sdh ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph,
2157*f4b3ec61Sdh     ip_stack_t *ipst)
21587c478bd9Sstevel@tonic-gate {
21597c478bd9Sstevel@tonic-gate 	uint32_t ports;
21607c478bd9Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
21617c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
21627c478bd9Sstevel@tonic-gate 	conn_t	*tconnp;
21637c478bd9Sstevel@tonic-gate 	boolean_t zone_chk;
21647c478bd9Sstevel@tonic-gate 
21657c478bd9Sstevel@tonic-gate 	/*
21667c478bd9Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
21677c478bd9Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
21687c478bd9Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
21697c478bd9Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.  We
21707c478bd9Sstevel@tonic-gate 	 * don't do Zone check for link local address(es) because the
21717c478bd9Sstevel@tonic-gate 	 * current Zone implementation treats each link local address as
21727c478bd9Sstevel@tonic-gate 	 * being unique per system node, i.e. they belong to global Zone.
21737c478bd9Sstevel@tonic-gate 	 */
21747c478bd9Sstevel@tonic-gate 	zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
21757c478bd9Sstevel@tonic-gate 	    IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
21767c478bd9Sstevel@tonic-gate 
21777c478bd9Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
21787c478bd9Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
21797c478bd9Sstevel@tonic-gate 
2180*f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2181*f4b3ec61Sdh 	    ports, ipst)];
21827c478bd9Sstevel@tonic-gate 
21837c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
21847c478bd9Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
21857c478bd9Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
21867c478bd9Sstevel@tonic-gate 
21877c478bd9Sstevel@tonic-gate 		/* We skip tcp_bound_if check here as this is loopback tcp */
21887c478bd9Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
21897c478bd9Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2190d0ab37afSethindra 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
21917c478bd9Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
21927c478bd9Sstevel@tonic-gate 
21937c478bd9Sstevel@tonic-gate 			ASSERT(tconnp != connp);
21947c478bd9Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
21957c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
21967c478bd9Sstevel@tonic-gate 			return (tconnp);
21977c478bd9Sstevel@tonic-gate 		}
21987c478bd9Sstevel@tonic-gate 	}
21997c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
22007c478bd9Sstevel@tonic-gate 	return (NULL);
22017c478bd9Sstevel@tonic-gate }
22027c478bd9Sstevel@tonic-gate 
22037c478bd9Sstevel@tonic-gate /*
22047c478bd9Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
22057c478bd9Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
22067c478bd9Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
22077c478bd9Sstevel@tonic-gate  */
22087c478bd9Sstevel@tonic-gate conn_t *
2209*f4b3ec61Sdh ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state,
2210*f4b3ec61Sdh     ip_stack_t *ipst)
22117c478bd9Sstevel@tonic-gate {
22127c478bd9Sstevel@tonic-gate 	uint32_t ports;
22137c478bd9Sstevel@tonic-gate 	uint16_t *pports;
22147c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
22157c478bd9Sstevel@tonic-gate 	conn_t	*tconnp;
22167c478bd9Sstevel@tonic-gate 
22177c478bd9Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
22187c478bd9Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
22197c478bd9Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
22207c478bd9Sstevel@tonic-gate 
2221*f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2222*f4b3ec61Sdh 					    ports, ipst)];
22237c478bd9Sstevel@tonic-gate 
22247c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
22257c478bd9Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
22267c478bd9Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
22277c478bd9Sstevel@tonic-gate 
22287c478bd9Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
22297c478bd9Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
22307c478bd9Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= min_state) {
22317c478bd9Sstevel@tonic-gate 
22327c478bd9Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
22337c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
22347c478bd9Sstevel@tonic-gate 			return (tconnp);
22357c478bd9Sstevel@tonic-gate 		}
22367c478bd9Sstevel@tonic-gate 	}
22377c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
22387c478bd9Sstevel@tonic-gate 	return (NULL);
22397c478bd9Sstevel@tonic-gate }
22407c478bd9Sstevel@tonic-gate 
22417c478bd9Sstevel@tonic-gate /*
22427c478bd9Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
22437c478bd9Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
22447c478bd9Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
22457c478bd9Sstevel@tonic-gate  * Match on ifindex in addition to addresses.
22467c478bd9Sstevel@tonic-gate  */
22477c478bd9Sstevel@tonic-gate conn_t *
22487c478bd9Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2249*f4b3ec61Sdh     uint_t ifindex, ip_stack_t *ipst)
22507c478bd9Sstevel@tonic-gate {
22517c478bd9Sstevel@tonic-gate 	tcp_t	*tcp;
22527c478bd9Sstevel@tonic-gate 	uint32_t ports;
22537c478bd9Sstevel@tonic-gate 	uint16_t *pports;
22547c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
22557c478bd9Sstevel@tonic-gate 	conn_t	*tconnp;
22567c478bd9Sstevel@tonic-gate 
22577c478bd9Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
22587c478bd9Sstevel@tonic-gate 	pports[0] = tcpha->tha_fport;
22597c478bd9Sstevel@tonic-gate 	pports[1] = tcpha->tha_lport;
22607c478bd9Sstevel@tonic-gate 
2261*f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2262*f4b3ec61Sdh 					    ports, ipst)];
22637c478bd9Sstevel@tonic-gate 
22647c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
22657c478bd9Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
22667c478bd9Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
22677c478bd9Sstevel@tonic-gate 
22687c478bd9Sstevel@tonic-gate 		tcp = tconnp->conn_tcp;
22697c478bd9Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
22707c478bd9Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
22717c478bd9Sstevel@tonic-gate 		    tcp->tcp_state >= min_state &&
22727c478bd9Sstevel@tonic-gate 		    (tcp->tcp_bound_if == 0 ||
22737c478bd9Sstevel@tonic-gate 		    tcp->tcp_bound_if == ifindex)) {
22747c478bd9Sstevel@tonic-gate 
22757c478bd9Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
22767c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
22777c478bd9Sstevel@tonic-gate 			return (tconnp);
22787c478bd9Sstevel@tonic-gate 		}
22797c478bd9Sstevel@tonic-gate 	}
22807c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
22817c478bd9Sstevel@tonic-gate 	return (NULL);
22827c478bd9Sstevel@tonic-gate }
22837c478bd9Sstevel@tonic-gate 
22847c478bd9Sstevel@tonic-gate /*
228545916cd2Sjpk  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
228645916cd2Sjpk  * a listener when changing state.
22877c478bd9Sstevel@tonic-gate  */
22887c478bd9Sstevel@tonic-gate conn_t *
2289*f4b3ec61Sdh ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2290*f4b3ec61Sdh     ip_stack_t *ipst)
22917c478bd9Sstevel@tonic-gate {
22927c478bd9Sstevel@tonic-gate 	connf_t		*bind_connfp;
22937c478bd9Sstevel@tonic-gate 	conn_t		*connp;
22947c478bd9Sstevel@tonic-gate 	tcp_t		*tcp;
22957c478bd9Sstevel@tonic-gate 
22967c478bd9Sstevel@tonic-gate 	/*
22977c478bd9Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
22987c478bd9Sstevel@tonic-gate 	 * all zeros.
22997c478bd9Sstevel@tonic-gate 	 */
23007c478bd9Sstevel@tonic-gate 	if (laddr == 0)
23017c478bd9Sstevel@tonic-gate 		return (NULL);
23027c478bd9Sstevel@tonic-gate 
230345916cd2Sjpk 	ASSERT(zoneid != ALL_ZONES);
230445916cd2Sjpk 
2305*f4b3ec61Sdh 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
23067c478bd9Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
23077c478bd9Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
23087c478bd9Sstevel@tonic-gate 	    connp = connp->conn_next) {
23097c478bd9Sstevel@tonic-gate 		tcp = connp->conn_tcp;
23107c478bd9Sstevel@tonic-gate 		if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
23115d0bc3edSsommerfe 		    IPCL_ZONE_MATCH(connp, zoneid) &&
23127c478bd9Sstevel@tonic-gate 		    (tcp->tcp_listener == NULL)) {
23137c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
23147c478bd9Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
23157c478bd9Sstevel@tonic-gate 			return (connp);
23167c478bd9Sstevel@tonic-gate 		}
23177c478bd9Sstevel@tonic-gate 	}
23187c478bd9Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
23197c478bd9Sstevel@tonic-gate 	return (NULL);
23207c478bd9Sstevel@tonic-gate }
23217c478bd9Sstevel@tonic-gate 
232245916cd2Sjpk /*
232345916cd2Sjpk  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
232445916cd2Sjpk  * a listener when changing state.
232545916cd2Sjpk  */
23267c478bd9Sstevel@tonic-gate conn_t *
23277c478bd9Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2328*f4b3ec61Sdh     zoneid_t zoneid, ip_stack_t *ipst)
23297c478bd9Sstevel@tonic-gate {
23307c478bd9Sstevel@tonic-gate 	connf_t		*bind_connfp;
23317c478bd9Sstevel@tonic-gate 	conn_t		*connp = NULL;
23327c478bd9Sstevel@tonic-gate 	tcp_t		*tcp;
23337c478bd9Sstevel@tonic-gate 
23347c478bd9Sstevel@tonic-gate 	/*
23357c478bd9Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
23367c478bd9Sstevel@tonic-gate 	 * all zeros.
23377c478bd9Sstevel@tonic-gate 	 */
23387c478bd9Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(laddr))
23397c478bd9Sstevel@tonic-gate 		return (NULL);
23407c478bd9Sstevel@tonic-gate 
234145916cd2Sjpk 	ASSERT(zoneid != ALL_ZONES);
23427c478bd9Sstevel@tonic-gate 
2343*f4b3ec61Sdh 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
23447c478bd9Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
23457c478bd9Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
23467c478bd9Sstevel@tonic-gate 	    connp = connp->conn_next) {
23477c478bd9Sstevel@tonic-gate 		tcp = connp->conn_tcp;
23487c478bd9Sstevel@tonic-gate 		if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
23495d0bc3edSsommerfe 		    IPCL_ZONE_MATCH(connp, zoneid) &&
23507c478bd9Sstevel@tonic-gate 		    (tcp->tcp_bound_if == 0 ||
23517c478bd9Sstevel@tonic-gate 		    tcp->tcp_bound_if == ifindex) &&
23527c478bd9Sstevel@tonic-gate 		    tcp->tcp_listener == NULL) {
23537c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
23547c478bd9Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
23557c478bd9Sstevel@tonic-gate 			return (connp);
23567c478bd9Sstevel@tonic-gate 		}
23577c478bd9Sstevel@tonic-gate 	}
23587c478bd9Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
23597c478bd9Sstevel@tonic-gate 	return (NULL);
23607c478bd9Sstevel@tonic-gate }
23617c478bd9Sstevel@tonic-gate 
2362ff550d0eSmasputra /*
2363ff550d0eSmasputra  * ipcl_get_next_conn
2364ff550d0eSmasputra  *	get the next entry in the conn global list
2365ff550d0eSmasputra  *	and put a reference on the next_conn.
2366ff550d0eSmasputra  *	decrement the reference on the current conn.
2367ff550d0eSmasputra  *
2368ff550d0eSmasputra  * This is an iterator based walker function that also provides for
2369ff550d0eSmasputra  * some selection by the caller. It walks through the conn_hash bucket
2370ff550d0eSmasputra  * searching for the next valid connp in the list, and selects connections
2371ff550d0eSmasputra  * that are neither closed nor condemned. It also REFHOLDS the conn
2372ff550d0eSmasputra  * thus ensuring that the conn exists when the caller uses the conn.
2373ff550d0eSmasputra  */
2374ff550d0eSmasputra conn_t *
2375ff550d0eSmasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2376ff550d0eSmasputra {
2377ff550d0eSmasputra 	conn_t	*next_connp;
2378ff550d0eSmasputra 
2379ff550d0eSmasputra 	if (connfp == NULL)
2380ff550d0eSmasputra 		return (NULL);
2381ff550d0eSmasputra 
2382ff550d0eSmasputra 	mutex_enter(&connfp->connf_lock);
2383ff550d0eSmasputra 
2384ff550d0eSmasputra 	next_connp = (connp == NULL) ?
2385ff550d0eSmasputra 	    connfp->connf_head : connp->conn_g_next;
2386ff550d0eSmasputra 
2387ff550d0eSmasputra 	while (next_connp != NULL) {
2388ff550d0eSmasputra 		mutex_enter(&next_connp->conn_lock);
2389ff550d0eSmasputra 		if (!(next_connp->conn_flags & conn_flags) ||
2390ff550d0eSmasputra 		    (next_connp->conn_state_flags &
2391ff550d0eSmasputra 		    (CONN_CONDEMNED | CONN_INCIPIENT))) {
2392ff550d0eSmasputra 			/*
2393ff550d0eSmasputra 			 * This conn has been condemned or
2394ff550d0eSmasputra 			 * is closing, or the flags don't match
2395ff550d0eSmasputra 			 */
2396ff550d0eSmasputra 			mutex_exit(&next_connp->conn_lock);
2397ff550d0eSmasputra 			next_connp = next_connp->conn_g_next;
2398ff550d0eSmasputra 			continue;
2399ff550d0eSmasputra 		}
2400ff550d0eSmasputra 		CONN_INC_REF_LOCKED(next_connp);
2401ff550d0eSmasputra 		mutex_exit(&next_connp->conn_lock);
2402ff550d0eSmasputra 		break;
2403ff550d0eSmasputra 	}
2404ff550d0eSmasputra 
2405ff550d0eSmasputra 	mutex_exit(&connfp->connf_lock);
2406ff550d0eSmasputra 
2407ff550d0eSmasputra 	if (connp != NULL)
2408ff550d0eSmasputra 		CONN_DEC_REF(connp);
2409ff550d0eSmasputra 
2410ff550d0eSmasputra 	return (next_connp);
2411ff550d0eSmasputra }
2412ff550d0eSmasputra 
24137c478bd9Sstevel@tonic-gate #ifdef CONN_DEBUG
24147c478bd9Sstevel@tonic-gate /*
24157c478bd9Sstevel@tonic-gate  * Trace of the last NBUF refhold/refrele
24167c478bd9Sstevel@tonic-gate  */
24177c478bd9Sstevel@tonic-gate int
24187c478bd9Sstevel@tonic-gate conn_trace_ref(conn_t *connp)
24197c478bd9Sstevel@tonic-gate {
24207c478bd9Sstevel@tonic-gate 	int	last;
24217c478bd9Sstevel@tonic-gate 	conn_trace_t	*ctb;
24227c478bd9Sstevel@tonic-gate 
24237c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
24247c478bd9Sstevel@tonic-gate 	last = connp->conn_trace_last;
24257c478bd9Sstevel@tonic-gate 	last++;
24267c478bd9Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
24277c478bd9Sstevel@tonic-gate 		last = 0;
24287c478bd9Sstevel@tonic-gate 
24297c478bd9Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
24307c478bd9Sstevel@tonic-gate 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH);
24317c478bd9Sstevel@tonic-gate 	connp->conn_trace_last = last;
24327c478bd9Sstevel@tonic-gate 	return (1);
24337c478bd9Sstevel@tonic-gate }
24347c478bd9Sstevel@tonic-gate 
24357c478bd9Sstevel@tonic-gate int
24367c478bd9Sstevel@tonic-gate conn_untrace_ref(conn_t *connp)
24377c478bd9Sstevel@tonic-gate {
24387c478bd9Sstevel@tonic-gate 	int	last;
24397c478bd9Sstevel@tonic-gate 	conn_trace_t	*ctb;
24407c478bd9Sstevel@tonic-gate 
24417c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
24427c478bd9Sstevel@tonic-gate 	last = connp->conn_trace_last;
24437c478bd9Sstevel@tonic-gate 	last++;
24447c478bd9Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
24457c478bd9Sstevel@tonic-gate 		last = 0;
24467c478bd9Sstevel@tonic-gate 
24477c478bd9Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
24487c478bd9Sstevel@tonic-gate 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, IP_STACK_DEPTH);
24497c478bd9Sstevel@tonic-gate 	connp->conn_trace_last = last;
24507c478bd9Sstevel@tonic-gate 	return (1);
24517c478bd9Sstevel@tonic-gate }
24527c478bd9Sstevel@tonic-gate #endif
2453