17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5ee4701baSericheng  * Common Development and Distribution License (the "License").
6ee4701baSericheng  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22fab254e2SAruna Ramakrishna  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  * IP PACKET CLASSIFIER
287c478bd9Sstevel@tonic-gate  *
297c478bd9Sstevel@tonic-gate  * The IP packet classifier provides mapping between IP packets and persistent
307c478bd9Sstevel@tonic-gate  * connection state for connection-oriented protocols. It also provides
317c478bd9Sstevel@tonic-gate  * interface for managing connection states.
327c478bd9Sstevel@tonic-gate  *
337c478bd9Sstevel@tonic-gate  * The connection state is kept in conn_t data structure and contains, among
347c478bd9Sstevel@tonic-gate  * other things:
357c478bd9Sstevel@tonic-gate  *
367c478bd9Sstevel@tonic-gate  *	o local/remote address and ports
377c478bd9Sstevel@tonic-gate  *	o Transport protocol
387c478bd9Sstevel@tonic-gate  *	o squeue for the connection (for TCP only)
397c478bd9Sstevel@tonic-gate  *	o reference counter
407c478bd9Sstevel@tonic-gate  *	o Connection state
417c478bd9Sstevel@tonic-gate  *	o hash table linkage
427c478bd9Sstevel@tonic-gate  *	o interface/ire information
437c478bd9Sstevel@tonic-gate  *	o credentials
447c478bd9Sstevel@tonic-gate  *	o ipsec policy
457c478bd9Sstevel@tonic-gate  *	o send and receive functions.
467c478bd9Sstevel@tonic-gate  *	o mutex lock.
477c478bd9Sstevel@tonic-gate  *
487c478bd9Sstevel@tonic-gate  * Connections use a reference counting scheme. They are freed when the
497c478bd9Sstevel@tonic-gate  * reference counter drops to zero. A reference is incremented when connection
507c478bd9Sstevel@tonic-gate  * is placed in a list or table, when incoming packet for the connection arrives
517c478bd9Sstevel@tonic-gate  * and when connection is processed via squeue (squeue processing may be
527c478bd9Sstevel@tonic-gate  * asynchronous and the reference protects the connection from being destroyed
537c478bd9Sstevel@tonic-gate  * before its processing is finished).
547c478bd9Sstevel@tonic-gate  *
557c478bd9Sstevel@tonic-gate  * send and receive functions are currently used for TCP only. The send function
567c478bd9Sstevel@tonic-gate  * determines the IP entry point for the packet once it leaves TCP to be sent to
577c478bd9Sstevel@tonic-gate  * the destination address. The receive function is used by IP when the packet
587c478bd9Sstevel@tonic-gate  * should be passed for TCP processing. When a new connection is created these
597c478bd9Sstevel@tonic-gate  * are set to ip_output() and tcp_input() respectively. During the lifetime of
607c478bd9Sstevel@tonic-gate  * the connection the send and receive functions may change depending on the
617c478bd9Sstevel@tonic-gate  * changes in the connection state. For example, Once the connection is bound to
627c478bd9Sstevel@tonic-gate  * an addresse, the receive function for this connection is set to
637c478bd9Sstevel@tonic-gate  * tcp_conn_request().  This allows incoming SYNs to go directly into the
647c478bd9Sstevel@tonic-gate  * listener SYN processing function without going to tcp_input() first.
657c478bd9Sstevel@tonic-gate  *
667c478bd9Sstevel@tonic-gate  * Classifier uses several hash tables:
677c478bd9Sstevel@tonic-gate  *
687c478bd9Sstevel@tonic-gate  * 	ipcl_conn_fanout:	contains all TCP connections in CONNECTED state
697c478bd9Sstevel@tonic-gate  *	ipcl_bind_fanout:	contains all connections in BOUND state
707c478bd9Sstevel@tonic-gate  *	ipcl_proto_fanout:	IPv4 protocol fanout
717c478bd9Sstevel@tonic-gate  *	ipcl_proto_fanout_v6:	IPv6 protocol fanout
727c478bd9Sstevel@tonic-gate  *	ipcl_udp_fanout:	contains all UDP connections
737c478bd9Sstevel@tonic-gate  *	ipcl_globalhash_fanout:	contains all connections
747c478bd9Sstevel@tonic-gate  *
757c478bd9Sstevel@tonic-gate  * The ipcl_globalhash_fanout is used for any walkers (like snmp and Clustering)
767c478bd9Sstevel@tonic-gate  * which need to view all existing connections.
777c478bd9Sstevel@tonic-gate  *
787c478bd9Sstevel@tonic-gate  * All tables are protected by per-bucket locks. When both per-bucket lock and
797c478bd9Sstevel@tonic-gate  * connection lock need to be held, the per-bucket lock should be acquired
807c478bd9Sstevel@tonic-gate  * first, followed by the connection lock.
817c478bd9Sstevel@tonic-gate  *
827c478bd9Sstevel@tonic-gate  * All functions doing search in one of these tables increment a reference
837c478bd9Sstevel@tonic-gate  * counter on the connection found (if any). This reference should be dropped
847c478bd9Sstevel@tonic-gate  * when the caller has finished processing the connection.
857c478bd9Sstevel@tonic-gate  *
867c478bd9Sstevel@tonic-gate  *
877c478bd9Sstevel@tonic-gate  * INTERFACES:
887c478bd9Sstevel@tonic-gate  * ===========
897c478bd9Sstevel@tonic-gate  *
907c478bd9Sstevel@tonic-gate  * Connection Lookup:
917c478bd9Sstevel@tonic-gate  * ------------------
927c478bd9Sstevel@tonic-gate  *
93f4b3ec61Sdh  * conn_t *ipcl_classify_v4(mp, protocol, hdr_len, zoneid, ip_stack)
94f4b3ec61Sdh  * conn_t *ipcl_classify_v6(mp, protocol, hdr_len, zoneid, ip_stack)
957c478bd9Sstevel@tonic-gate  *
967c478bd9Sstevel@tonic-gate  * Finds connection for an incoming IPv4 or IPv6 packet. Returns NULL if
977c478bd9Sstevel@tonic-gate  * it can't find any associated connection. If the connection is found, its
987c478bd9Sstevel@tonic-gate  * reference counter is incremented.
997c478bd9Sstevel@tonic-gate  *
1007c478bd9Sstevel@tonic-gate  *	mp:	mblock, containing packet header. The full header should fit
1017c478bd9Sstevel@tonic-gate  *		into a single mblock. It should also contain at least full IP
1027c478bd9Sstevel@tonic-gate  *		and TCP or UDP header.
1037c478bd9Sstevel@tonic-gate  *
1047c478bd9Sstevel@tonic-gate  *	protocol: Either IPPROTO_TCP or IPPROTO_UDP.
1057c478bd9Sstevel@tonic-gate  *
1067c478bd9Sstevel@tonic-gate  *	hdr_len: The size of IP header. It is used to find TCP or UDP header in
1077c478bd9Sstevel@tonic-gate  *		 the packet.
1087c478bd9Sstevel@tonic-gate  *
10945916cd2Sjpk  * 	zoneid: The zone in which the returned connection must be; the zoneid
11045916cd2Sjpk  *		corresponding to the ire_zoneid on the IRE located for the
11145916cd2Sjpk  *		packet's destination address.
1127c478bd9Sstevel@tonic-gate  *
1137c478bd9Sstevel@tonic-gate  *	For TCP connections, the lookup order is as follows:
1147c478bd9Sstevel@tonic-gate  *		5-tuple {src, dst, protocol, local port, remote port}
1157c478bd9Sstevel@tonic-gate  *			lookup in ipcl_conn_fanout table.
1167c478bd9Sstevel@tonic-gate  *		3-tuple {dst, remote port, protocol} lookup in
1177c478bd9Sstevel@tonic-gate  *			ipcl_bind_fanout table.
1187c478bd9Sstevel@tonic-gate  *
1197c478bd9Sstevel@tonic-gate  *	For UDP connections, a 5-tuple {src, dst, protocol, local port,
1207c478bd9Sstevel@tonic-gate  *	remote port} lookup is done on ipcl_udp_fanout. Note that,
1217c478bd9Sstevel@tonic-gate  *	these interfaces do not handle cases where a packets belongs
1227c478bd9Sstevel@tonic-gate  *	to multiple UDP clients, which is handled in IP itself.
1237c478bd9Sstevel@tonic-gate  *
12445916cd2Sjpk  * If the destination IRE is ALL_ZONES (indicated by zoneid), then we must
12545916cd2Sjpk  * determine which actual zone gets the segment.  This is used only in a
12645916cd2Sjpk  * labeled environment.  The matching rules are:
12745916cd2Sjpk  *
12845916cd2Sjpk  *	- If it's not a multilevel port, then the label on the packet selects
12945916cd2Sjpk  *	  the zone.  Unlabeled packets are delivered to the global zone.
13045916cd2Sjpk  *
13145916cd2Sjpk  *	- If it's a multilevel port, then only the zone registered to receive
13245916cd2Sjpk  *	  packets on that port matches.
13345916cd2Sjpk  *
13445916cd2Sjpk  * Also, in a labeled environment, packet labels need to be checked.  For fully
13545916cd2Sjpk  * bound TCP connections, we can assume that the packet label was checked
13645916cd2Sjpk  * during connection establishment, and doesn't need to be checked on each
13745916cd2Sjpk  * packet.  For others, though, we need to check for strict equality or, for
13845916cd2Sjpk  * multilevel ports, membership in the range or set.  This part currently does
13945916cd2Sjpk  * a tnrh lookup on each packet, but could be optimized to use cached results
14045916cd2Sjpk  * if that were necessary.  (SCTP doesn't come through here, but if it did,
14145916cd2Sjpk  * we would apply the same rules as TCP.)
14245916cd2Sjpk  *
14345916cd2Sjpk  * An implication of the above is that fully-bound TCP sockets must always use
14445916cd2Sjpk  * distinct 4-tuples; they can't be discriminated by label alone.
14545916cd2Sjpk  *
14645916cd2Sjpk  * Note that we cannot trust labels on packets sent to fully-bound UDP sockets,
14745916cd2Sjpk  * as there's no connection set-up handshake and no shared state.
14845916cd2Sjpk  *
14945916cd2Sjpk  * Labels on looped-back packets within a single zone do not need to be
15045916cd2Sjpk  * checked, as all processes in the same zone have the same label.
15145916cd2Sjpk  *
15245916cd2Sjpk  * Finally, for unlabeled packets received by a labeled system, special rules
15345916cd2Sjpk  * apply.  We consider only the MLP if there is one.  Otherwise, we prefer a
15445916cd2Sjpk  * socket in the zone whose label matches the default label of the sender, if
15545916cd2Sjpk  * any.  In any event, the receiving socket must have SO_MAC_EXEMPT set and the
15645916cd2Sjpk  * receiver's label must dominate the sender's default label.
15745916cd2Sjpk  *
158f4b3ec61Sdh  * conn_t *ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcph_t *, int, ip_stack);
159f4b3ec61Sdh  * conn_t *ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
160f4b3ec61Sdh  *					 ip_stack);
1617c478bd9Sstevel@tonic-gate  *
1627c478bd9Sstevel@tonic-gate  *	Lookup routine to find a exact match for {src, dst, local port,
1637c478bd9Sstevel@tonic-gate  *	remote port) for TCP connections in ipcl_conn_fanout. The address and
1647c478bd9Sstevel@tonic-gate  *	ports are read from the IP and TCP header respectively.
1657c478bd9Sstevel@tonic-gate  *
166f4b3ec61Sdh  * conn_t	*ipcl_lookup_listener_v4(lport, laddr, protocol,
167f4b3ec61Sdh  *					 zoneid, ip_stack);
168f4b3ec61Sdh  * conn_t	*ipcl_lookup_listener_v6(lport, laddr, protocol, ifindex,
169f4b3ec61Sdh  *					 zoneid, ip_stack);
1707c478bd9Sstevel@tonic-gate  *
1717c478bd9Sstevel@tonic-gate  * 	Lookup routine to find a listener with the tuple {lport, laddr,
1727c478bd9Sstevel@tonic-gate  * 	protocol} in the ipcl_bind_fanout table. For IPv6, an additional
1737c478bd9Sstevel@tonic-gate  * 	parameter interface index is also compared.
1747c478bd9Sstevel@tonic-gate  *
175f4b3ec61Sdh  * void ipcl_walk(func, arg, ip_stack)
1767c478bd9Sstevel@tonic-gate  *
1777c478bd9Sstevel@tonic-gate  * 	Apply 'func' to every connection available. The 'func' is called as
1787c478bd9Sstevel@tonic-gate  *	(*func)(connp, arg). The walk is non-atomic so connections may be
1797c478bd9Sstevel@tonic-gate  *	created and destroyed during the walk. The CONN_CONDEMNED and
1807c478bd9Sstevel@tonic-gate  *	CONN_INCIPIENT flags ensure that connections which are newly created
1817c478bd9Sstevel@tonic-gate  *	or being destroyed are not selected by the walker.
1827c478bd9Sstevel@tonic-gate  *
1837c478bd9Sstevel@tonic-gate  * Table Updates
1847c478bd9Sstevel@tonic-gate  * -------------
1857c478bd9Sstevel@tonic-gate  *
1867c478bd9Sstevel@tonic-gate  * int ipcl_conn_insert(connp, protocol, src, dst, ports)
1877c478bd9Sstevel@tonic-gate  * int ipcl_conn_insert_v6(connp, protocol, src, dst, ports, ifindex)
1887c478bd9Sstevel@tonic-gate  *
1897c478bd9Sstevel@tonic-gate  *	Insert 'connp' in the ipcl_conn_fanout.
1907c478bd9Sstevel@tonic-gate  *	Arguements :
1917c478bd9Sstevel@tonic-gate  *		connp		conn_t to be inserted
1927c478bd9Sstevel@tonic-gate  *		protocol	connection protocol
1937c478bd9Sstevel@tonic-gate  *		src		source address
1947c478bd9Sstevel@tonic-gate  *		dst		destination address
1957c478bd9Sstevel@tonic-gate  *		ports		local and remote port
1967c478bd9Sstevel@tonic-gate  *		ifindex		interface index for IPv6 connections
1977c478bd9Sstevel@tonic-gate  *
1987c478bd9Sstevel@tonic-gate  *	Return value :
1997c478bd9Sstevel@tonic-gate  *		0		if connp was inserted
2007c478bd9Sstevel@tonic-gate  *		EADDRINUSE	if the connection with the same tuple
2017c478bd9Sstevel@tonic-gate  *				already exists.
2027c478bd9Sstevel@tonic-gate  *
2037c478bd9Sstevel@tonic-gate  * int ipcl_bind_insert(connp, protocol, src, lport);
2047c478bd9Sstevel@tonic-gate  * int ipcl_bind_insert_v6(connp, protocol, src, lport);
2057c478bd9Sstevel@tonic-gate  *
2067c478bd9Sstevel@tonic-gate  * 	Insert 'connp' in ipcl_bind_fanout.
2077c478bd9Sstevel@tonic-gate  * 	Arguements :
2087c478bd9Sstevel@tonic-gate  * 		connp		conn_t to be inserted
2097c478bd9Sstevel@tonic-gate  * 		protocol	connection protocol
2107c478bd9Sstevel@tonic-gate  * 		src		source address connection wants
2117c478bd9Sstevel@tonic-gate  * 				to bind to
2127c478bd9Sstevel@tonic-gate  * 		lport		local port connection wants to
2137c478bd9Sstevel@tonic-gate  * 				bind to
2147c478bd9Sstevel@tonic-gate  *
2157c478bd9Sstevel@tonic-gate  *
2167c478bd9Sstevel@tonic-gate  * void ipcl_hash_remove(connp);
2177c478bd9Sstevel@tonic-gate  *
2187c478bd9Sstevel@tonic-gate  * 	Removes the 'connp' from the connection fanout table.
2197c478bd9Sstevel@tonic-gate  *
2207c478bd9Sstevel@tonic-gate  * Connection Creation/Destruction
2217c478bd9Sstevel@tonic-gate  * -------------------------------
2227c478bd9Sstevel@tonic-gate  *
223f4b3ec61Sdh  * conn_t *ipcl_conn_create(type, sleep, netstack_t *)
2247c478bd9Sstevel@tonic-gate  *
2257c478bd9Sstevel@tonic-gate  * 	Creates a new conn based on the type flag, inserts it into
2267c478bd9Sstevel@tonic-gate  * 	globalhash table.
2277c478bd9Sstevel@tonic-gate  *
2287c478bd9Sstevel@tonic-gate  *	type:	This flag determines the type of conn_t which needs to be
229fc80c0dfSnordmark  *		created i.e., which kmem_cache it comes from.
2307c478bd9Sstevel@tonic-gate  *		IPCL_TCPCONN	indicates a TCP connection
231fc80c0dfSnordmark  *		IPCL_SCTPCONN	indicates a SCTP connection
232fc80c0dfSnordmark  *		IPCL_UDPCONN	indicates a UDP conn_t.
233fc80c0dfSnordmark  *		IPCL_RAWIPCONN	indicates a RAWIP/ICMP conn_t.
234fc80c0dfSnordmark  *		IPCL_RTSCONN	indicates a RTS conn_t.
235fc80c0dfSnordmark  *		IPCL_IPCCONN	indicates all other connections.
2367c478bd9Sstevel@tonic-gate  *
2377c478bd9Sstevel@tonic-gate  * void ipcl_conn_destroy(connp)
2387c478bd9Sstevel@tonic-gate  *
2397c478bd9Sstevel@tonic-gate  * 	Destroys the connection state, removes it from the global
2407c478bd9Sstevel@tonic-gate  * 	connection hash table and frees its memory.
2417c478bd9Sstevel@tonic-gate  */
2427c478bd9Sstevel@tonic-gate 
2437c478bd9Sstevel@tonic-gate #include <sys/types.h>
2447c478bd9Sstevel@tonic-gate #include <sys/stream.h>
2457c478bd9Sstevel@tonic-gate #include <sys/stropts.h>
2467c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
2477c478bd9Sstevel@tonic-gate #include <sys/strsubr.h>
2487c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
2497c478bd9Sstevel@tonic-gate #define	_SUN_TPI_VERSION 2
2507c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
2517c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
2527c478bd9Sstevel@tonic-gate #include <sys/debug.h>
2537c478bd9Sstevel@tonic-gate 
2547c478bd9Sstevel@tonic-gate #include <sys/systm.h>
2557c478bd9Sstevel@tonic-gate #include <sys/param.h>
2567c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
2577c478bd9Sstevel@tonic-gate #include <sys/isa_defs.h>
2587c478bd9Sstevel@tonic-gate #include <inet/common.h>
2597c478bd9Sstevel@tonic-gate #include <netinet/ip6.h>
2607c478bd9Sstevel@tonic-gate #include <netinet/icmp6.h>
2617c478bd9Sstevel@tonic-gate 
2627c478bd9Sstevel@tonic-gate #include <inet/ip.h>
2637c478bd9Sstevel@tonic-gate #include <inet/ip6.h>
2647c478bd9Sstevel@tonic-gate #include <inet/ip_ndp.h>
265*0f1702c5SYu Xiangning #include <inet/ip_impl.h>
266ff550d0eSmasputra #include <inet/udp_impl.h>
2677c478bd9Sstevel@tonic-gate #include <inet/sctp_ip.h>
268f4b3ec61Sdh #include <inet/sctp/sctp_impl.h>
269fc80c0dfSnordmark #include <inet/rawip_impl.h>
270fc80c0dfSnordmark #include <inet/rts_impl.h>
2717c478bd9Sstevel@tonic-gate 
2727c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
2737c478bd9Sstevel@tonic-gate 
2747c478bd9Sstevel@tonic-gate #include <inet/ipclassifier.h>
275*0f1702c5SYu Xiangning #include <inet/tcp.h>
2767c478bd9Sstevel@tonic-gate #include <inet/ipsec_impl.h>
2777c478bd9Sstevel@tonic-gate 
27845916cd2Sjpk #include <sys/tsol/tnet.h>
279*0f1702c5SYu Xiangning #include <sys/sockio.h>
28045916cd2Sjpk 
2817c478bd9Sstevel@tonic-gate #ifdef DEBUG
2827c478bd9Sstevel@tonic-gate #define	IPCL_DEBUG
2837c478bd9Sstevel@tonic-gate #else
2847c478bd9Sstevel@tonic-gate #undef	IPCL_DEBUG
2857c478bd9Sstevel@tonic-gate #endif
2867c478bd9Sstevel@tonic-gate 
2877c478bd9Sstevel@tonic-gate #ifdef	IPCL_DEBUG
2887c478bd9Sstevel@tonic-gate int	ipcl_debug_level = 0;
2897c478bd9Sstevel@tonic-gate #define	IPCL_DEBUG_LVL(level, args)	\
2907c478bd9Sstevel@tonic-gate 	if (ipcl_debug_level  & level) { printf args; }
2917c478bd9Sstevel@tonic-gate #else
2927c478bd9Sstevel@tonic-gate #define	IPCL_DEBUG_LVL(level, args) {; }
2937c478bd9Sstevel@tonic-gate #endif
294f4b3ec61Sdh /* Old value for compatibility. Setable in /etc/system */
2957c478bd9Sstevel@tonic-gate uint_t tcp_conn_hash_size = 0;
2967c478bd9Sstevel@tonic-gate 
297f4b3ec61Sdh /* New value. Zero means choose automatically.  Setable in /etc/system */
2987c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_size = 0;
2997c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_memfactor = 8192;
3007c478bd9Sstevel@tonic-gate uint_t ipcl_conn_hash_maxsize = 82500;
3017c478bd9Sstevel@tonic-gate 
3027c478bd9Sstevel@tonic-gate /* bind/udp fanout table size */
3037c478bd9Sstevel@tonic-gate uint_t ipcl_bind_fanout_size = 512;
304ee4701baSericheng uint_t ipcl_udp_fanout_size = 16384;
3057c478bd9Sstevel@tonic-gate 
3067c478bd9Sstevel@tonic-gate /* Raw socket fanout size.  Must be a power of 2. */
3077c478bd9Sstevel@tonic-gate uint_t ipcl_raw_fanout_size = 256;
3087c478bd9Sstevel@tonic-gate 
3097c478bd9Sstevel@tonic-gate /*
3107c478bd9Sstevel@tonic-gate  * Power of 2^N Primes useful for hashing for N of 0-28,
3117c478bd9Sstevel@tonic-gate  * these primes are the nearest prime <= 2^N - 2^(N-2).
3127c478bd9Sstevel@tonic-gate  */
3137c478bd9Sstevel@tonic-gate 
3147c478bd9Sstevel@tonic-gate #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
3157c478bd9Sstevel@tonic-gate 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
3167c478bd9Sstevel@tonic-gate 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
3177c478bd9Sstevel@tonic-gate 		50331599, 100663291, 201326557, 0}
3187c478bd9Sstevel@tonic-gate 
3197c478bd9Sstevel@tonic-gate /*
320fc80c0dfSnordmark  * wrapper structure to ensure that conn and what follows it (tcp_t, etc)
321fc80c0dfSnordmark  * are aligned on cache lines.
3227c478bd9Sstevel@tonic-gate  */
323fc80c0dfSnordmark typedef union itc_s {
324fc80c0dfSnordmark 	conn_t	itc_conn;
325fc80c0dfSnordmark 	char	itcu_filler[CACHE_ALIGN(conn_s)];
3267c478bd9Sstevel@tonic-gate } itc_t;
3277c478bd9Sstevel@tonic-gate 
328fc80c0dfSnordmark struct kmem_cache  *tcp_conn_cache;
329fc80c0dfSnordmark struct kmem_cache  *ip_conn_cache;
330*0f1702c5SYu Xiangning struct kmem_cache  *ip_helper_stream_cache;
3317c478bd9Sstevel@tonic-gate extern struct kmem_cache  *sctp_conn_cache;
3327c478bd9Sstevel@tonic-gate extern struct kmem_cache  *tcp_sack_info_cache;
3337c478bd9Sstevel@tonic-gate extern struct kmem_cache  *tcp_iphc_cache;
334fc80c0dfSnordmark struct kmem_cache  *udp_conn_cache;
335fc80c0dfSnordmark struct kmem_cache  *rawip_conn_cache;
336fc80c0dfSnordmark struct kmem_cache  *rts_conn_cache;
3377c478bd9Sstevel@tonic-gate 
3387c478bd9Sstevel@tonic-gate extern void	tcp_timermp_free(tcp_t *);
3397c478bd9Sstevel@tonic-gate extern mblk_t	*tcp_timermp_alloc(int);
3407c478bd9Sstevel@tonic-gate 
341fc80c0dfSnordmark static int	ip_conn_constructor(void *, void *, int);
342fc80c0dfSnordmark static void	ip_conn_destructor(void *, void *);
343fc80c0dfSnordmark 
344fc80c0dfSnordmark static int	tcp_conn_constructor(void *, void *, int);
345fc80c0dfSnordmark static void	tcp_conn_destructor(void *, void *);
346fc80c0dfSnordmark 
347fc80c0dfSnordmark static int	udp_conn_constructor(void *, void *, int);
348fc80c0dfSnordmark static void	udp_conn_destructor(void *, void *);
349fc80c0dfSnordmark 
350fc80c0dfSnordmark static int	rawip_conn_constructor(void *, void *, int);
351fc80c0dfSnordmark static void	rawip_conn_destructor(void *, void *);
352fc80c0dfSnordmark 
353fc80c0dfSnordmark static int	rts_conn_constructor(void *, void *, int);
354fc80c0dfSnordmark static void	rts_conn_destructor(void *, void *);
3557c478bd9Sstevel@tonic-gate 
356*0f1702c5SYu Xiangning static int	ip_helper_stream_constructor(void *, void *, int);
357*0f1702c5SYu Xiangning static void	ip_helper_stream_destructor(void *, void *);
358*0f1702c5SYu Xiangning 
359*0f1702c5SYu Xiangning boolean_t	ip_use_helper_cache = B_TRUE;
360*0f1702c5SYu Xiangning 
3617c478bd9Sstevel@tonic-gate #ifdef	IPCL_DEBUG
3627c478bd9Sstevel@tonic-gate #define	INET_NTOA_BUFSIZE	18
3637c478bd9Sstevel@tonic-gate 
3647c478bd9Sstevel@tonic-gate static char *
3657c478bd9Sstevel@tonic-gate inet_ntoa_r(uint32_t in, char *b)
3667c478bd9Sstevel@tonic-gate {
3677c478bd9Sstevel@tonic-gate 	unsigned char	*p;
3687c478bd9Sstevel@tonic-gate 
3697c478bd9Sstevel@tonic-gate 	p = (unsigned char *)&in;
3707c478bd9Sstevel@tonic-gate 	(void) sprintf(b, "%d.%d.%d.%d", p[0], p[1], p[2], p[3]);
3717c478bd9Sstevel@tonic-gate 	return (b);
3727c478bd9Sstevel@tonic-gate }
3737c478bd9Sstevel@tonic-gate #endif
3747c478bd9Sstevel@tonic-gate 
3757c478bd9Sstevel@tonic-gate /*
376f4b3ec61Sdh  * Global (for all stack instances) init routine
3777c478bd9Sstevel@tonic-gate  */
3787c478bd9Sstevel@tonic-gate void
379f4b3ec61Sdh ipcl_g_init(void)
3807c478bd9Sstevel@tonic-gate {
381fc80c0dfSnordmark 	ip_conn_cache = kmem_cache_create("ip_conn_cache",
3827c478bd9Sstevel@tonic-gate 	    sizeof (conn_t), CACHE_ALIGN_SIZE,
383fc80c0dfSnordmark 	    ip_conn_constructor, ip_conn_destructor,
384fc80c0dfSnordmark 	    NULL, NULL, NULL, 0);
385fc80c0dfSnordmark 
386fc80c0dfSnordmark 	tcp_conn_cache = kmem_cache_create("tcp_conn_cache",
387fc80c0dfSnordmark 	    sizeof (itc_t) + sizeof (tcp_t), CACHE_ALIGN_SIZE,
388fc80c0dfSnordmark 	    tcp_conn_constructor, tcp_conn_destructor,
389fc80c0dfSnordmark 	    NULL, NULL, NULL, 0);
390fc80c0dfSnordmark 
391fc80c0dfSnordmark 	udp_conn_cache = kmem_cache_create("udp_conn_cache",
392fc80c0dfSnordmark 	    sizeof (itc_t) + sizeof (udp_t), CACHE_ALIGN_SIZE,
393fc80c0dfSnordmark 	    udp_conn_constructor, udp_conn_destructor,
394fc80c0dfSnordmark 	    NULL, NULL, NULL, 0);
3957c478bd9Sstevel@tonic-gate 
396fc80c0dfSnordmark 	rawip_conn_cache = kmem_cache_create("rawip_conn_cache",
397fc80c0dfSnordmark 	    sizeof (itc_t) + sizeof (icmp_t), CACHE_ALIGN_SIZE,
398fc80c0dfSnordmark 	    rawip_conn_constructor, rawip_conn_destructor,
399fc80c0dfSnordmark 	    NULL, NULL, NULL, 0);
400fc80c0dfSnordmark 
401fc80c0dfSnordmark 	rts_conn_cache = kmem_cache_create("rts_conn_cache",
402fc80c0dfSnordmark 	    sizeof (itc_t) + sizeof (rts_t), CACHE_ALIGN_SIZE,
403fc80c0dfSnordmark 	    rts_conn_constructor, rts_conn_destructor,
4047c478bd9Sstevel@tonic-gate 	    NULL, NULL, NULL, 0);
405*0f1702c5SYu Xiangning 
406*0f1702c5SYu Xiangning 	if (ip_use_helper_cache) {
407*0f1702c5SYu Xiangning 		ip_helper_stream_cache = kmem_cache_create
408*0f1702c5SYu Xiangning 		    ("ip_helper_stream_cache", sizeof (ip_helper_stream_info_t),
409*0f1702c5SYu Xiangning 		    CACHE_ALIGN_SIZE, ip_helper_stream_constructor,
410*0f1702c5SYu Xiangning 		    ip_helper_stream_destructor, NULL, NULL, NULL, 0);
411*0f1702c5SYu Xiangning 	} else {
412*0f1702c5SYu Xiangning 		ip_helper_stream_cache = NULL;
413*0f1702c5SYu Xiangning 	}
414f4b3ec61Sdh }
415f4b3ec61Sdh 
416f4b3ec61Sdh /*
417f4b3ec61Sdh  * ipclassifier intialization routine, sets up hash tables.
418f4b3ec61Sdh  */
419f4b3ec61Sdh void
420f4b3ec61Sdh ipcl_init(ip_stack_t *ipst)
421f4b3ec61Sdh {
422f4b3ec61Sdh 	int i;
423f4b3ec61Sdh 	int sizes[] = P2Ps();
4247c478bd9Sstevel@tonic-gate 
4257c478bd9Sstevel@tonic-gate 	/*
426f4b3ec61Sdh 	 * Calculate size of conn fanout table from /etc/system settings
4277c478bd9Sstevel@tonic-gate 	 */
4287c478bd9Sstevel@tonic-gate 	if (ipcl_conn_hash_size != 0) {
429f4b3ec61Sdh 		ipst->ips_ipcl_conn_fanout_size = ipcl_conn_hash_size;
4307c478bd9Sstevel@tonic-gate 	} else if (tcp_conn_hash_size != 0) {
431f4b3ec61Sdh 		ipst->ips_ipcl_conn_fanout_size = tcp_conn_hash_size;
4327c478bd9Sstevel@tonic-gate 	} else {
4337c478bd9Sstevel@tonic-gate 		extern pgcnt_t freemem;
4347c478bd9Sstevel@tonic-gate 
435f4b3ec61Sdh 		ipst->ips_ipcl_conn_fanout_size =
4367c478bd9Sstevel@tonic-gate 		    (freemem * PAGESIZE) / ipcl_conn_hash_memfactor;
4377c478bd9Sstevel@tonic-gate 
438f4b3ec61Sdh 		if (ipst->ips_ipcl_conn_fanout_size > ipcl_conn_hash_maxsize) {
439f4b3ec61Sdh 			ipst->ips_ipcl_conn_fanout_size =
440f4b3ec61Sdh 			    ipcl_conn_hash_maxsize;
441f4b3ec61Sdh 		}
4427c478bd9Sstevel@tonic-gate 	}
4437c478bd9Sstevel@tonic-gate 
4447c478bd9Sstevel@tonic-gate 	for (i = 9; i < sizeof (sizes) / sizeof (*sizes) - 1; i++) {
445f4b3ec61Sdh 		if (sizes[i] >= ipst->ips_ipcl_conn_fanout_size) {
4467c478bd9Sstevel@tonic-gate 			break;
4477c478bd9Sstevel@tonic-gate 		}
4487c478bd9Sstevel@tonic-gate 	}
449f4b3ec61Sdh 	if ((ipst->ips_ipcl_conn_fanout_size = sizes[i]) == 0) {
4507c478bd9Sstevel@tonic-gate 		/* Out of range, use the 2^16 value */
451f4b3ec61Sdh 		ipst->ips_ipcl_conn_fanout_size = sizes[16];
4527c478bd9Sstevel@tonic-gate 	}
4537c478bd9Sstevel@tonic-gate 
454f4b3ec61Sdh 	/* Take values from /etc/system */
455f4b3ec61Sdh 	ipst->ips_ipcl_bind_fanout_size = ipcl_bind_fanout_size;
456f4b3ec61Sdh 	ipst->ips_ipcl_udp_fanout_size = ipcl_udp_fanout_size;
457f4b3ec61Sdh 	ipst->ips_ipcl_raw_fanout_size = ipcl_raw_fanout_size;
458f4b3ec61Sdh 
459f4b3ec61Sdh 	ASSERT(ipst->ips_ipcl_conn_fanout == NULL);
460f4b3ec61Sdh 
461f4b3ec61Sdh 	ipst->ips_ipcl_conn_fanout = kmem_zalloc(
462f4b3ec61Sdh 	    ipst->ips_ipcl_conn_fanout_size * sizeof (connf_t), KM_SLEEP);
463f4b3ec61Sdh 
464f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
465f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_conn_fanout[i].connf_lock, NULL,
4667c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4677c478bd9Sstevel@tonic-gate 	}
4687c478bd9Sstevel@tonic-gate 
469f4b3ec61Sdh 	ipst->ips_ipcl_bind_fanout = kmem_zalloc(
470f4b3ec61Sdh 	    ipst->ips_ipcl_bind_fanout_size * sizeof (connf_t), KM_SLEEP);
4717c478bd9Sstevel@tonic-gate 
472f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
473f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_bind_fanout[i].connf_lock, NULL,
4747c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4757c478bd9Sstevel@tonic-gate 	}
4767c478bd9Sstevel@tonic-gate 
477f4b3ec61Sdh 	ipst->ips_ipcl_proto_fanout = kmem_zalloc(IPPROTO_MAX *
478f4b3ec61Sdh 	    sizeof (connf_t), KM_SLEEP);
479f4b3ec61Sdh 	for (i = 0; i < IPPROTO_MAX; i++) {
480f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_proto_fanout[i].connf_lock, NULL,
4817c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4827c478bd9Sstevel@tonic-gate 	}
483f4b3ec61Sdh 
484f4b3ec61Sdh 	ipst->ips_ipcl_proto_fanout_v6 = kmem_zalloc(IPPROTO_MAX *
485f4b3ec61Sdh 	    sizeof (connf_t), KM_SLEEP);
486f4b3ec61Sdh 	for (i = 0; i < IPPROTO_MAX; i++) {
487f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock, NULL,
4887c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
4897c478bd9Sstevel@tonic-gate 	}
4907c478bd9Sstevel@tonic-gate 
491f4b3ec61Sdh 	ipst->ips_rts_clients = kmem_zalloc(sizeof (connf_t), KM_SLEEP);
492f4b3ec61Sdh 	mutex_init(&ipst->ips_rts_clients->connf_lock,
493f4b3ec61Sdh 	    NULL, MUTEX_DEFAULT, NULL);
4947c478bd9Sstevel@tonic-gate 
495f4b3ec61Sdh 	ipst->ips_ipcl_udp_fanout = kmem_zalloc(
496f4b3ec61Sdh 	    ipst->ips_ipcl_udp_fanout_size * sizeof (connf_t), KM_SLEEP);
497f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
498f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_udp_fanout[i].connf_lock, NULL,
4997c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
5007c478bd9Sstevel@tonic-gate 	}
5017c478bd9Sstevel@tonic-gate 
502f4b3ec61Sdh 	ipst->ips_ipcl_raw_fanout = kmem_zalloc(
503f4b3ec61Sdh 	    ipst->ips_ipcl_raw_fanout_size * sizeof (connf_t), KM_SLEEP);
504f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
505f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_raw_fanout[i].connf_lock, NULL,
5067c478bd9Sstevel@tonic-gate 		    MUTEX_DEFAULT, NULL);
5077c478bd9Sstevel@tonic-gate 	}
5087c478bd9Sstevel@tonic-gate 
509f4b3ec61Sdh 	ipst->ips_ipcl_globalhash_fanout = kmem_zalloc(
510f4b3ec61Sdh 	    sizeof (connf_t) * CONN_G_HASH_SIZE, KM_SLEEP);
5117c478bd9Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
512f4b3ec61Sdh 		mutex_init(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock,
513f4b3ec61Sdh 		    NULL, MUTEX_DEFAULT, NULL);
5147c478bd9Sstevel@tonic-gate 	}
5157c478bd9Sstevel@tonic-gate }
5167c478bd9Sstevel@tonic-gate 
5177c478bd9Sstevel@tonic-gate void
518f4b3ec61Sdh ipcl_g_destroy(void)
5197c478bd9Sstevel@tonic-gate {
520fc80c0dfSnordmark 	kmem_cache_destroy(ip_conn_cache);
521fc80c0dfSnordmark 	kmem_cache_destroy(tcp_conn_cache);
522fc80c0dfSnordmark 	kmem_cache_destroy(udp_conn_cache);
523fc80c0dfSnordmark 	kmem_cache_destroy(rawip_conn_cache);
524fc80c0dfSnordmark 	kmem_cache_destroy(rts_conn_cache);
525f4b3ec61Sdh }
526f4b3ec61Sdh 
527f4b3ec61Sdh /*
528f4b3ec61Sdh  * All user-level and kernel use of the stack must be gone
529f4b3ec61Sdh  * by now.
530f4b3ec61Sdh  */
531f4b3ec61Sdh void
532f4b3ec61Sdh ipcl_destroy(ip_stack_t *ipst)
533f4b3ec61Sdh {
534f4b3ec61Sdh 	int i;
535f4b3ec61Sdh 
536f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_conn_fanout_size; i++) {
537f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_conn_fanout[i].connf_head == NULL);
538f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_conn_fanout[i].connf_lock);
539f4b3ec61Sdh 	}
540f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_conn_fanout, ipst->ips_ipcl_conn_fanout_size *
541f4b3ec61Sdh 	    sizeof (connf_t));
542f4b3ec61Sdh 	ipst->ips_ipcl_conn_fanout = NULL;
543f4b3ec61Sdh 
544f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_bind_fanout_size; i++) {
545f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_bind_fanout[i].connf_head == NULL);
546f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_bind_fanout[i].connf_lock);
547f4b3ec61Sdh 	}
548f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_bind_fanout, ipst->ips_ipcl_bind_fanout_size *
549f4b3ec61Sdh 	    sizeof (connf_t));
550f4b3ec61Sdh 	ipst->ips_ipcl_bind_fanout = NULL;
551f4b3ec61Sdh 
552f4b3ec61Sdh 	for (i = 0; i < IPPROTO_MAX; i++) {
553f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_proto_fanout[i].connf_head == NULL);
554f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_proto_fanout[i].connf_lock);
555f4b3ec61Sdh 	}
556f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_proto_fanout, IPPROTO_MAX * sizeof (connf_t));
557f4b3ec61Sdh 	ipst->ips_ipcl_proto_fanout = NULL;
558f4b3ec61Sdh 
559f4b3ec61Sdh 	for (i = 0; i < IPPROTO_MAX; i++) {
560f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_proto_fanout_v6[i].connf_head == NULL);
561f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_proto_fanout_v6[i].connf_lock);
562f4b3ec61Sdh 	}
563f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_proto_fanout_v6,
564f4b3ec61Sdh 	    IPPROTO_MAX * sizeof (connf_t));
565f4b3ec61Sdh 	ipst->ips_ipcl_proto_fanout_v6 = NULL;
566f4b3ec61Sdh 
567f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_udp_fanout_size; i++) {
568f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_udp_fanout[i].connf_head == NULL);
569f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_udp_fanout[i].connf_lock);
570f4b3ec61Sdh 	}
571f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_udp_fanout, ipst->ips_ipcl_udp_fanout_size *
572f4b3ec61Sdh 	    sizeof (connf_t));
573f4b3ec61Sdh 	ipst->ips_ipcl_udp_fanout = NULL;
574f4b3ec61Sdh 
575f4b3ec61Sdh 	for (i = 0; i < ipst->ips_ipcl_raw_fanout_size; i++) {
576f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_raw_fanout[i].connf_head == NULL);
577f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_raw_fanout[i].connf_lock);
578f4b3ec61Sdh 	}
579f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_raw_fanout, ipst->ips_ipcl_raw_fanout_size *
580f4b3ec61Sdh 	    sizeof (connf_t));
581f4b3ec61Sdh 	ipst->ips_ipcl_raw_fanout = NULL;
582f4b3ec61Sdh 
583f4b3ec61Sdh 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
584f4b3ec61Sdh 		ASSERT(ipst->ips_ipcl_globalhash_fanout[i].connf_head == NULL);
585f4b3ec61Sdh 		mutex_destroy(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
586f4b3ec61Sdh 	}
587f4b3ec61Sdh 	kmem_free(ipst->ips_ipcl_globalhash_fanout,
588f4b3ec61Sdh 	    sizeof (connf_t) * CONN_G_HASH_SIZE);
589f4b3ec61Sdh 	ipst->ips_ipcl_globalhash_fanout = NULL;
590f4b3ec61Sdh 
591f4b3ec61Sdh 	ASSERT(ipst->ips_rts_clients->connf_head == NULL);
592f4b3ec61Sdh 	mutex_destroy(&ipst->ips_rts_clients->connf_lock);
593f4b3ec61Sdh 	kmem_free(ipst->ips_rts_clients, sizeof (connf_t));
594f4b3ec61Sdh 	ipst->ips_rts_clients = NULL;
5957c478bd9Sstevel@tonic-gate }
5967c478bd9Sstevel@tonic-gate 
5977c478bd9Sstevel@tonic-gate /*
5987c478bd9Sstevel@tonic-gate  * conn creation routine. initialize the conn, sets the reference
5997c478bd9Sstevel@tonic-gate  * and inserts it in the global hash table.
6007c478bd9Sstevel@tonic-gate  */
6017c478bd9Sstevel@tonic-gate conn_t *
602f4b3ec61Sdh ipcl_conn_create(uint32_t type, int sleep, netstack_t *ns)
6037c478bd9Sstevel@tonic-gate {
6047c478bd9Sstevel@tonic-gate 	conn_t	*connp;
605f4b3ec61Sdh 	sctp_stack_t *sctps;
606fc80c0dfSnordmark 	struct kmem_cache *conn_cache;
6077c478bd9Sstevel@tonic-gate 
6087c478bd9Sstevel@tonic-gate 	switch (type) {
6097c478bd9Sstevel@tonic-gate 	case IPCL_SCTPCONN:
6107c478bd9Sstevel@tonic-gate 		if ((connp = kmem_cache_alloc(sctp_conn_cache, sleep)) == NULL)
6117c478bd9Sstevel@tonic-gate 			return (NULL);
612121e5416Skcpoon 		sctp_conn_init(connp);
613f4b3ec61Sdh 		sctps = ns->netstack_sctp;
614f4b3ec61Sdh 		SCTP_G_Q_REFHOLD(sctps);
615f4b3ec61Sdh 		netstack_hold(ns);
616f4b3ec61Sdh 		connp->conn_netstack = ns;
617fc80c0dfSnordmark 		return (connp);
618fc80c0dfSnordmark 
619fc80c0dfSnordmark 	case IPCL_TCPCONN:
620fc80c0dfSnordmark 		conn_cache = tcp_conn_cache;
6217c478bd9Sstevel@tonic-gate 		break;
622fc80c0dfSnordmark 
623fc80c0dfSnordmark 	case IPCL_UDPCONN:
624fc80c0dfSnordmark 		conn_cache = udp_conn_cache;
625fc80c0dfSnordmark 		break;
626fc80c0dfSnordmark 
627fc80c0dfSnordmark 	case IPCL_RAWIPCONN:
628fc80c0dfSnordmark 		conn_cache = rawip_conn_cache;
629fc80c0dfSnordmark 		break;
630fc80c0dfSnordmark 
631fc80c0dfSnordmark 	case IPCL_RTSCONN:
632fc80c0dfSnordmark 		conn_cache = rts_conn_cache;
633fc80c0dfSnordmark 		break;
634fc80c0dfSnordmark 
6357c478bd9Sstevel@tonic-gate 	case IPCL_IPCCONN:
636fc80c0dfSnordmark 		conn_cache = ip_conn_cache;
6377c478bd9Sstevel@tonic-gate 		break;
638fc80c0dfSnordmark 
639ff550d0eSmasputra 	default:
640ff550d0eSmasputra 		connp = NULL;
641ff550d0eSmasputra 		ASSERT(0);
6427c478bd9Sstevel@tonic-gate 	}
6437c478bd9Sstevel@tonic-gate 
644fc80c0dfSnordmark 	if ((connp = kmem_cache_alloc(conn_cache, sleep)) == NULL)
645fc80c0dfSnordmark 		return (NULL);
646fc80c0dfSnordmark 
647fc80c0dfSnordmark 	connp->conn_ref = 1;
648fc80c0dfSnordmark 	netstack_hold(ns);
649fc80c0dfSnordmark 	connp->conn_netstack = ns;
650fc80c0dfSnordmark 	ipcl_globalhash_insert(connp);
6517c478bd9Sstevel@tonic-gate 	return (connp);
6527c478bd9Sstevel@tonic-gate }
6537c478bd9Sstevel@tonic-gate 
6547c478bd9Sstevel@tonic-gate void
6557c478bd9Sstevel@tonic-gate ipcl_conn_destroy(conn_t *connp)
6567c478bd9Sstevel@tonic-gate {
6577c478bd9Sstevel@tonic-gate 	mblk_t	*mp;
658f4b3ec61Sdh 	netstack_t	*ns = connp->conn_netstack;
6597c478bd9Sstevel@tonic-gate 
6607c478bd9Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&connp->conn_lock));
6617c478bd9Sstevel@tonic-gate 	ASSERT(connp->conn_ref == 0);
6627c478bd9Sstevel@tonic-gate 	ASSERT(connp->conn_ire_cache == NULL);
6637c478bd9Sstevel@tonic-gate 
664fab254e2SAruna Ramakrishna 	DTRACE_PROBE1(conn__destroy, conn_t *, connp);
665fab254e2SAruna Ramakrishna 
66645916cd2Sjpk 	if (connp->conn_peercred != NULL &&
66745916cd2Sjpk 	    connp->conn_peercred != connp->conn_cred)
66845916cd2Sjpk 		crfree(connp->conn_peercred);
66945916cd2Sjpk 	connp->conn_peercred = NULL;
67045916cd2Sjpk 
67145916cd2Sjpk 	if (connp->conn_cred != NULL) {
67245916cd2Sjpk 		crfree(connp->conn_cred);
67345916cd2Sjpk 		connp->conn_cred = NULL;
67445916cd2Sjpk 	}
67545916cd2Sjpk 
6767c478bd9Sstevel@tonic-gate 	ipcl_globalhash_remove(connp);
6777c478bd9Sstevel@tonic-gate 
678fc80c0dfSnordmark 	/* FIXME: add separate tcp_conn_free()? */
6797c478bd9Sstevel@tonic-gate 	if (connp->conn_flags & IPCL_TCPCONN) {
680ff550d0eSmasputra 		tcp_t	*tcp = connp->conn_tcp;
681f4b3ec61Sdh 		tcp_stack_t *tcps;
682f4b3ec61Sdh 
683f4b3ec61Sdh 		ASSERT(tcp != NULL);
684f4b3ec61Sdh 		tcps = tcp->tcp_tcps;
685f4b3ec61Sdh 		if (tcps != NULL) {
686f4b3ec61Sdh 			if (connp->conn_latch != NULL) {
687f4b3ec61Sdh 				IPLATCH_REFRELE(connp->conn_latch, ns);
688f4b3ec61Sdh 				connp->conn_latch = NULL;
689f4b3ec61Sdh 			}
690f4b3ec61Sdh 			if (connp->conn_policy != NULL) {
691f4b3ec61Sdh 				IPPH_REFRELE(connp->conn_policy, ns);
692f4b3ec61Sdh 				connp->conn_policy = NULL;
693f4b3ec61Sdh 			}
694f4b3ec61Sdh 			tcp->tcp_tcps = NULL;
695f4b3ec61Sdh 			TCPS_REFRELE(tcps);
696f4b3ec61Sdh 		}
697ff550d0eSmasputra 
6987c478bd9Sstevel@tonic-gate 		tcp_free(tcp);
6997c478bd9Sstevel@tonic-gate 		mp = tcp->tcp_timercache;
70045916cd2Sjpk 		tcp->tcp_cred = NULL;
7017c478bd9Sstevel@tonic-gate 
7027c478bd9Sstevel@tonic-gate 		if (tcp->tcp_sack_info != NULL) {
7037c478bd9Sstevel@tonic-gate 			bzero(tcp->tcp_sack_info, sizeof (tcp_sack_info_t));
7047c478bd9Sstevel@tonic-gate 			kmem_cache_free(tcp_sack_info_cache,
7057c478bd9Sstevel@tonic-gate 			    tcp->tcp_sack_info);
7067c478bd9Sstevel@tonic-gate 		}
7077c478bd9Sstevel@tonic-gate 		if (tcp->tcp_iphc != NULL) {
7087c478bd9Sstevel@tonic-gate 			if (tcp->tcp_hdr_grown) {
7097c478bd9Sstevel@tonic-gate 				kmem_free(tcp->tcp_iphc, tcp->tcp_iphc_len);
7107c478bd9Sstevel@tonic-gate 			} else {
7117c478bd9Sstevel@tonic-gate 				bzero(tcp->tcp_iphc, tcp->tcp_iphc_len);
7127c478bd9Sstevel@tonic-gate 				kmem_cache_free(tcp_iphc_cache, tcp->tcp_iphc);
7137c478bd9Sstevel@tonic-gate 			}
7147c478bd9Sstevel@tonic-gate 			tcp->tcp_iphc_len = 0;
7157c478bd9Sstevel@tonic-gate 		}
7167c478bd9Sstevel@tonic-gate 		ASSERT(tcp->tcp_iphc_len == 0);
7177c478bd9Sstevel@tonic-gate 
718f7f8e53dSKacheong Poon 		/*
719f7f8e53dSKacheong Poon 		 * tcp_rsrv_mp can be NULL if tcp_get_conn() fails to allocate
720f7f8e53dSKacheong Poon 		 * the mblk.
721f7f8e53dSKacheong Poon 		 */
722f7f8e53dSKacheong Poon 		if (tcp->tcp_rsrv_mp != NULL) {
723f7f8e53dSKacheong Poon 			freeb(tcp->tcp_rsrv_mp);
724f7f8e53dSKacheong Poon 			tcp->tcp_rsrv_mp = NULL;
725f7f8e53dSKacheong Poon 			mutex_destroy(&tcp->tcp_rsrv_mp_lock);
726f7f8e53dSKacheong Poon 		}
727f7f8e53dSKacheong Poon 
728f4b3ec61Sdh 		ASSERT(connp->conn_latch == NULL);
729f4b3ec61Sdh 		ASSERT(connp->conn_policy == NULL);
730f4b3ec61Sdh 
731f4b3ec61Sdh 		if (ns != NULL) {
732f4b3ec61Sdh 			ASSERT(tcp->tcp_tcps == NULL);
733f4b3ec61Sdh 			connp->conn_netstack = NULL;
734f4b3ec61Sdh 			netstack_rele(ns);
735f4b3ec61Sdh 		}
736fc80c0dfSnordmark 
737fc80c0dfSnordmark 		ipcl_conn_cleanup(connp);
738fc80c0dfSnordmark 		connp->conn_flags = IPCL_TCPCONN;
739fc80c0dfSnordmark 		bzero(tcp, sizeof (tcp_t));
740fc80c0dfSnordmark 
741fc80c0dfSnordmark 		tcp->tcp_timercache = mp;
742fc80c0dfSnordmark 		tcp->tcp_connp = connp;
743fc80c0dfSnordmark 		kmem_cache_free(tcp_conn_cache, connp);
744fc80c0dfSnordmark 		return;
745fc80c0dfSnordmark 	}
746fc80c0dfSnordmark 	if (connp->conn_latch != NULL) {
747fc80c0dfSnordmark 		IPLATCH_REFRELE(connp->conn_latch, connp->conn_netstack);
748fc80c0dfSnordmark 		connp->conn_latch = NULL;
749fc80c0dfSnordmark 	}
750fc80c0dfSnordmark 	if (connp->conn_policy != NULL) {
751fc80c0dfSnordmark 		IPPH_REFRELE(connp->conn_policy, connp->conn_netstack);
752fc80c0dfSnordmark 		connp->conn_policy = NULL;
753fc80c0dfSnordmark 	}
754fc80c0dfSnordmark 	if (connp->conn_ipsec_opt_mp != NULL) {
755fc80c0dfSnordmark 		freemsg(connp->conn_ipsec_opt_mp);
756fc80c0dfSnordmark 		connp->conn_ipsec_opt_mp = NULL;
757fc80c0dfSnordmark 	}
758fc80c0dfSnordmark 
759fc80c0dfSnordmark 	if (connp->conn_flags & IPCL_SCTPCONN) {
760f4b3ec61Sdh 		ASSERT(ns != NULL);
7617c478bd9Sstevel@tonic-gate 		sctp_free(connp);
762fc80c0dfSnordmark 		return;
763fc80c0dfSnordmark 	}
764fc80c0dfSnordmark 
765fc80c0dfSnordmark 	if (ns != NULL) {
766fc80c0dfSnordmark 		connp->conn_netstack = NULL;
767fc80c0dfSnordmark 		netstack_rele(ns);
768fc80c0dfSnordmark 	}
769*0f1702c5SYu Xiangning 
770fc80c0dfSnordmark 	ipcl_conn_cleanup(connp);
771fc80c0dfSnordmark 
772fc80c0dfSnordmark 	/* leave conn_priv aka conn_udp, conn_icmp, etc in place. */
773fc80c0dfSnordmark 	if (connp->conn_flags & IPCL_UDPCONN) {
774fc80c0dfSnordmark 		connp->conn_flags = IPCL_UDPCONN;
775fc80c0dfSnordmark 		kmem_cache_free(udp_conn_cache, connp);
776fc80c0dfSnordmark 	} else if (connp->conn_flags & IPCL_RAWIPCONN) {
777*0f1702c5SYu Xiangning 
778fc80c0dfSnordmark 		connp->conn_flags = IPCL_RAWIPCONN;
779fc80c0dfSnordmark 		connp->conn_ulp = IPPROTO_ICMP;
780fc80c0dfSnordmark 		kmem_cache_free(rawip_conn_cache, connp);
781fc80c0dfSnordmark 	} else if (connp->conn_flags & IPCL_RTSCONN) {
782fc80c0dfSnordmark 		connp->conn_flags = IPCL_RTSCONN;
783fc80c0dfSnordmark 		kmem_cache_free(rts_conn_cache, connp);
7847c478bd9Sstevel@tonic-gate 	} else {
785fc80c0dfSnordmark 		connp->conn_flags = IPCL_IPCCONN;
786fc80c0dfSnordmark 		ASSERT(connp->conn_flags & IPCL_IPCCONN);
787fc80c0dfSnordmark 		ASSERT(connp->conn_priv == NULL);
788fc80c0dfSnordmark 		kmem_cache_free(ip_conn_cache, connp);
7897c478bd9Sstevel@tonic-gate 	}
7907c478bd9Sstevel@tonic-gate }
7917c478bd9Sstevel@tonic-gate 
7927c478bd9Sstevel@tonic-gate /*
7937c478bd9Sstevel@tonic-gate  * Running in cluster mode - deregister listener information
7947c478bd9Sstevel@tonic-gate  */
7957c478bd9Sstevel@tonic-gate 
7967c478bd9Sstevel@tonic-gate static void
7977c478bd9Sstevel@tonic-gate ipcl_conn_unlisten(conn_t *connp)
7987c478bd9Sstevel@tonic-gate {
7997c478bd9Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) != 0);
8007c478bd9Sstevel@tonic-gate 	ASSERT(connp->conn_lport != 0);
8017c478bd9Sstevel@tonic-gate 
8027c478bd9Sstevel@tonic-gate 	if (cl_inet_unlisten != NULL) {
8037c478bd9Sstevel@tonic-gate 		sa_family_t	addr_family;
8047c478bd9Sstevel@tonic-gate 		uint8_t		*laddrp;
8057c478bd9Sstevel@tonic-gate 
8067c478bd9Sstevel@tonic-gate 		if (connp->conn_pkt_isv6) {
8077c478bd9Sstevel@tonic-gate 			addr_family = AF_INET6;
8087c478bd9Sstevel@tonic-gate 			laddrp = (uint8_t *)&connp->conn_bound_source_v6;
8097c478bd9Sstevel@tonic-gate 		} else {
8107c478bd9Sstevel@tonic-gate 			addr_family = AF_INET;
8117c478bd9Sstevel@tonic-gate 			laddrp = (uint8_t *)&connp->conn_bound_source;
8127c478bd9Sstevel@tonic-gate 		}
8137c478bd9Sstevel@tonic-gate 		(*cl_inet_unlisten)(IPPROTO_TCP, addr_family, laddrp,
8147c478bd9Sstevel@tonic-gate 		    connp->conn_lport);
8157c478bd9Sstevel@tonic-gate 	}
8167c478bd9Sstevel@tonic-gate 	connp->conn_flags &= ~IPCL_CL_LISTENER;
8177c478bd9Sstevel@tonic-gate }
8187c478bd9Sstevel@tonic-gate 
8197c478bd9Sstevel@tonic-gate /*
8207c478bd9Sstevel@tonic-gate  * We set the IPCL_REMOVED flag (instead of clearing the flag indicating
8217c478bd9Sstevel@tonic-gate  * which table the conn belonged to). So for debugging we can see which hash
8227c478bd9Sstevel@tonic-gate  * table this connection was in.
8237c478bd9Sstevel@tonic-gate  */
8247c478bd9Sstevel@tonic-gate #define	IPCL_HASH_REMOVE(connp)	{					\
8257c478bd9Sstevel@tonic-gate 	connf_t	*connfp = (connp)->conn_fanout;				\
8267c478bd9Sstevel@tonic-gate 	ASSERT(!MUTEX_HELD(&((connp)->conn_lock)));			\
8277c478bd9Sstevel@tonic-gate 	if (connfp != NULL) {						\
8287c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(4, ("IPCL_HASH_REMOVE: connp %p",	\
8297c478bd9Sstevel@tonic-gate 		    (void *)(connp)));					\
8307c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);			\
8317c478bd9Sstevel@tonic-gate 		if ((connp)->conn_next != NULL)				\
8327c478bd9Sstevel@tonic-gate 			(connp)->conn_next->conn_prev =			\
8337c478bd9Sstevel@tonic-gate 			    (connp)->conn_prev;				\
8347c478bd9Sstevel@tonic-gate 		if ((connp)->conn_prev != NULL)				\
8357c478bd9Sstevel@tonic-gate 			(connp)->conn_prev->conn_next =			\
8367c478bd9Sstevel@tonic-gate 			    (connp)->conn_next;				\
8377c478bd9Sstevel@tonic-gate 		else							\
8387c478bd9Sstevel@tonic-gate 			connfp->connf_head = (connp)->conn_next;	\
8397c478bd9Sstevel@tonic-gate 		(connp)->conn_fanout = NULL;				\
8407c478bd9Sstevel@tonic-gate 		(connp)->conn_next = NULL;				\
8417c478bd9Sstevel@tonic-gate 		(connp)->conn_prev = NULL;				\
8427c478bd9Sstevel@tonic-gate 		(connp)->conn_flags |= IPCL_REMOVED;			\
8437c478bd9Sstevel@tonic-gate 		if (((connp)->conn_flags & IPCL_CL_LISTENER) != 0)	\
8447c478bd9Sstevel@tonic-gate 			ipcl_conn_unlisten((connp));			\
8457c478bd9Sstevel@tonic-gate 		CONN_DEC_REF((connp));					\
8467c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);			\
8477c478bd9Sstevel@tonic-gate 	}								\
8487c478bd9Sstevel@tonic-gate }
8497c478bd9Sstevel@tonic-gate 
8507c478bd9Sstevel@tonic-gate void
8517c478bd9Sstevel@tonic-gate ipcl_hash_remove(conn_t *connp)
8527c478bd9Sstevel@tonic-gate {
8537c478bd9Sstevel@tonic-gate 	IPCL_HASH_REMOVE(connp);
8547c478bd9Sstevel@tonic-gate }
8557c478bd9Sstevel@tonic-gate 
8567c478bd9Sstevel@tonic-gate /*
8577c478bd9Sstevel@tonic-gate  * The whole purpose of this function is allow removal of
8587c478bd9Sstevel@tonic-gate  * a conn_t from the connected hash for timewait reclaim.
8597c478bd9Sstevel@tonic-gate  * This is essentially a TW reclaim fastpath where timewait
8607c478bd9Sstevel@tonic-gate  * collector checks under fanout lock (so no one else can
8617c478bd9Sstevel@tonic-gate  * get access to the conn_t) that refcnt is 2 i.e. one for
8627c478bd9Sstevel@tonic-gate  * TCP and one for the classifier hash list. If ref count
8637c478bd9Sstevel@tonic-gate  * is indeed 2, we can just remove the conn under lock and
8647c478bd9Sstevel@tonic-gate  * avoid cleaning up the conn under squeue. This gives us
8657c478bd9Sstevel@tonic-gate  * improved performance.
8667c478bd9Sstevel@tonic-gate  */
8677c478bd9Sstevel@tonic-gate void
8687c478bd9Sstevel@tonic-gate ipcl_hash_remove_locked(conn_t *connp, connf_t	*connfp)
8697c478bd9Sstevel@tonic-gate {
8707c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connfp->connf_lock));
8717c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
8727c478bd9Sstevel@tonic-gate 	ASSERT((connp->conn_flags & IPCL_CL_LISTENER) == 0);
8737c478bd9Sstevel@tonic-gate 
8747c478bd9Sstevel@tonic-gate 	if ((connp)->conn_next != NULL) {
875121e5416Skcpoon 		(connp)->conn_next->conn_prev = (connp)->conn_prev;
8767c478bd9Sstevel@tonic-gate 	}
8777c478bd9Sstevel@tonic-gate 	if ((connp)->conn_prev != NULL) {
878121e5416Skcpoon 		(connp)->conn_prev->conn_next = (connp)->conn_next;
8797c478bd9Sstevel@tonic-gate 	} else {
8807c478bd9Sstevel@tonic-gate 		connfp->connf_head = (connp)->conn_next;
8817c478bd9Sstevel@tonic-gate 	}
8827c478bd9Sstevel@tonic-gate 	(connp)->conn_fanout = NULL;
8837c478bd9Sstevel@tonic-gate 	(connp)->conn_next = NULL;
8847c478bd9Sstevel@tonic-gate 	(connp)->conn_prev = NULL;
8857c478bd9Sstevel@tonic-gate 	(connp)->conn_flags |= IPCL_REMOVED;
8867c478bd9Sstevel@tonic-gate 	ASSERT((connp)->conn_ref == 2);
8877c478bd9Sstevel@tonic-gate 	(connp)->conn_ref--;
8887c478bd9Sstevel@tonic-gate }
8897c478bd9Sstevel@tonic-gate 
8907c478bd9Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp) {		\
8917c478bd9Sstevel@tonic-gate 	ASSERT((connp)->conn_fanout == NULL);				\
8927c478bd9Sstevel@tonic-gate 	ASSERT((connp)->conn_next == NULL);				\
8937c478bd9Sstevel@tonic-gate 	ASSERT((connp)->conn_prev == NULL);				\
8947c478bd9Sstevel@tonic-gate 	if ((connfp)->connf_head != NULL) {				\
8957c478bd9Sstevel@tonic-gate 		(connfp)->connf_head->conn_prev = (connp);		\
8967c478bd9Sstevel@tonic-gate 		(connp)->conn_next = (connfp)->connf_head;		\
8977c478bd9Sstevel@tonic-gate 	}								\
8987c478bd9Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
8997c478bd9Sstevel@tonic-gate 	(connfp)->connf_head = (connp);					\
9007c478bd9Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
9017c478bd9Sstevel@tonic-gate 	    IPCL_CONNECTED;						\
9027c478bd9Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
9037c478bd9Sstevel@tonic-gate }
9047c478bd9Sstevel@tonic-gate 
9057c478bd9Sstevel@tonic-gate #define	IPCL_HASH_INSERT_CONNECTED(connfp, connp) {			\
9067c478bd9Sstevel@tonic-gate 	IPCL_DEBUG_LVL(8, ("IPCL_HASH_INSERT_CONNECTED: connfp %p "	\
9077c478bd9Sstevel@tonic-gate 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
9087c478bd9Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
9097c478bd9Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
9107c478bd9Sstevel@tonic-gate 	IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);		\
9117c478bd9Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
9127c478bd9Sstevel@tonic-gate }
9137c478bd9Sstevel@tonic-gate 
9147c478bd9Sstevel@tonic-gate #define	IPCL_HASH_INSERT_BOUND(connfp, connp) {				\
9157c478bd9Sstevel@tonic-gate 	conn_t *pconnp = NULL, *nconnp;					\
9167c478bd9Sstevel@tonic-gate 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_BOUND: connfp %p "	\
9177c478bd9Sstevel@tonic-gate 	    "connp %p", (void *)connfp, (void *)(connp)));		\
9187c478bd9Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
9197c478bd9Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
9207c478bd9Sstevel@tonic-gate 	nconnp = (connfp)->connf_head;					\
9213d1c78fbSethindra 	while (nconnp != NULL &&					\
9223d1c78fbSethindra 	    !_IPCL_V4_MATCH_ANY(nconnp->conn_srcv6)) {			\
9233d1c78fbSethindra 		pconnp = nconnp;					\
9243d1c78fbSethindra 		nconnp = nconnp->conn_next;				\
9257c478bd9Sstevel@tonic-gate 	}								\
9267c478bd9Sstevel@tonic-gate 	if (pconnp != NULL) {						\
9277c478bd9Sstevel@tonic-gate 		pconnp->conn_next = (connp);				\
9287c478bd9Sstevel@tonic-gate 		(connp)->conn_prev = pconnp;				\
9297c478bd9Sstevel@tonic-gate 	} else {							\
9307c478bd9Sstevel@tonic-gate 		(connfp)->connf_head = (connp);				\
9317c478bd9Sstevel@tonic-gate 	}								\
9327c478bd9Sstevel@tonic-gate 	if (nconnp != NULL) {						\
9337c478bd9Sstevel@tonic-gate 		(connp)->conn_next = nconnp;				\
9347c478bd9Sstevel@tonic-gate 		nconnp->conn_prev = (connp);				\
9357c478bd9Sstevel@tonic-gate 	}								\
9367c478bd9Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
9377c478bd9Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
9387c478bd9Sstevel@tonic-gate 	    IPCL_BOUND;							\
9397c478bd9Sstevel@tonic-gate 	CONN_INC_REF(connp);						\
9407c478bd9Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
9417c478bd9Sstevel@tonic-gate }
9427c478bd9Sstevel@tonic-gate 
9437c478bd9Sstevel@tonic-gate #define	IPCL_HASH_INSERT_WILDCARD(connfp, connp) {			\
9447c478bd9Sstevel@tonic-gate 	conn_t **list, *prev, *next;					\
9457c478bd9Sstevel@tonic-gate 	boolean_t isv4mapped =						\
9467c478bd9Sstevel@tonic-gate 	    IN6_IS_ADDR_V4MAPPED(&(connp)->conn_srcv6);			\
9477c478bd9Sstevel@tonic-gate 	IPCL_DEBUG_LVL(32, ("IPCL_HASH_INSERT_WILDCARD: connfp %p "	\
9487c478bd9Sstevel@tonic-gate 	    "connp %p", (void *)(connfp), (void *)(connp)));		\
9497c478bd9Sstevel@tonic-gate 	IPCL_HASH_REMOVE((connp));					\
9507c478bd9Sstevel@tonic-gate 	mutex_enter(&(connfp)->connf_lock);				\
9517c478bd9Sstevel@tonic-gate 	list = &(connfp)->connf_head;					\
9527c478bd9Sstevel@tonic-gate 	prev = NULL;							\
9537c478bd9Sstevel@tonic-gate 	while ((next = *list) != NULL) {				\
9547c478bd9Sstevel@tonic-gate 		if (isv4mapped &&					\
9557c478bd9Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&next->conn_srcv6) &&	\
9567c478bd9Sstevel@tonic-gate 		    connp->conn_zoneid == next->conn_zoneid) {		\
9577c478bd9Sstevel@tonic-gate 			(connp)->conn_next = next;			\
9587c478bd9Sstevel@tonic-gate 			if (prev != NULL)				\
9597c478bd9Sstevel@tonic-gate 				prev = next->conn_prev;			\
9607c478bd9Sstevel@tonic-gate 			next->conn_prev = (connp);			\
9617c478bd9Sstevel@tonic-gate 			break;						\
9627c478bd9Sstevel@tonic-gate 		}							\
9637c478bd9Sstevel@tonic-gate 		list = &next->conn_next;				\
9647c478bd9Sstevel@tonic-gate 		prev = next;						\
9657c478bd9Sstevel@tonic-gate 	}								\
9667c478bd9Sstevel@tonic-gate 	(connp)->conn_prev = prev;					\
9677c478bd9Sstevel@tonic-gate 	*list = (connp);						\
9687c478bd9Sstevel@tonic-gate 	(connp)->conn_fanout = (connfp);				\
9697c478bd9Sstevel@tonic-gate 	(connp)->conn_flags = ((connp)->conn_flags & ~IPCL_REMOVED) |	\
9707c478bd9Sstevel@tonic-gate 	    IPCL_BOUND;							\
9717c478bd9Sstevel@tonic-gate 	CONN_INC_REF((connp));						\
9727c478bd9Sstevel@tonic-gate 	mutex_exit(&(connfp)->connf_lock);				\
9737c478bd9Sstevel@tonic-gate }
9747c478bd9Sstevel@tonic-gate 
9757c478bd9Sstevel@tonic-gate void
9767c478bd9Sstevel@tonic-gate ipcl_hash_insert_wildcard(connf_t *connfp, conn_t *connp)
9777c478bd9Sstevel@tonic-gate {
9787c478bd9Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9797c478bd9Sstevel@tonic-gate }
9807c478bd9Sstevel@tonic-gate 
9817c478bd9Sstevel@tonic-gate void
9827c478bd9Sstevel@tonic-gate ipcl_proto_insert(conn_t *connp, uint8_t protocol)
9837c478bd9Sstevel@tonic-gate {
9847c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
985f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
9867c478bd9Sstevel@tonic-gate 
9877c478bd9Sstevel@tonic-gate 	ASSERT(connp != NULL);
98845916cd2Sjpk 	ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH ||
98945916cd2Sjpk 	    protocol == IPPROTO_ESP);
9907c478bd9Sstevel@tonic-gate 
9917c478bd9Sstevel@tonic-gate 	connp->conn_ulp = protocol;
9927c478bd9Sstevel@tonic-gate 
9937c478bd9Sstevel@tonic-gate 	/* Insert it in the protocol hash */
994f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_proto_fanout[protocol];
9957c478bd9Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
9967c478bd9Sstevel@tonic-gate }
9977c478bd9Sstevel@tonic-gate 
9987c478bd9Sstevel@tonic-gate void
9997c478bd9Sstevel@tonic-gate ipcl_proto_insert_v6(conn_t *connp, uint8_t protocol)
10007c478bd9Sstevel@tonic-gate {
10017c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
1002f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
10037c478bd9Sstevel@tonic-gate 
10047c478bd9Sstevel@tonic-gate 	ASSERT(connp != NULL);
100545916cd2Sjpk 	ASSERT(!connp->conn_mac_exempt || protocol == IPPROTO_AH ||
100645916cd2Sjpk 	    protocol == IPPROTO_ESP);
10077c478bd9Sstevel@tonic-gate 
10087c478bd9Sstevel@tonic-gate 	connp->conn_ulp = protocol;
10097c478bd9Sstevel@tonic-gate 
10107c478bd9Sstevel@tonic-gate 	/* Insert it in the Bind Hash */
1011f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
10127c478bd9Sstevel@tonic-gate 	IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10137c478bd9Sstevel@tonic-gate }
10147c478bd9Sstevel@tonic-gate 
10157c478bd9Sstevel@tonic-gate /*
10167c478bd9Sstevel@tonic-gate  * This function is used only for inserting SCTP raw socket now.
10177c478bd9Sstevel@tonic-gate  * This may change later.
10187c478bd9Sstevel@tonic-gate  *
10197c478bd9Sstevel@tonic-gate  * Note that only one raw socket can be bound to a port.  The param
10207c478bd9Sstevel@tonic-gate  * lport is in network byte order.
10217c478bd9Sstevel@tonic-gate  */
10227c478bd9Sstevel@tonic-gate static int
10237c478bd9Sstevel@tonic-gate ipcl_sctp_hash_insert(conn_t *connp, in_port_t lport)
10247c478bd9Sstevel@tonic-gate {
10257c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
10267c478bd9Sstevel@tonic-gate 	conn_t	*oconnp;
1027f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
10287c478bd9Sstevel@tonic-gate 
1029f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
10307c478bd9Sstevel@tonic-gate 
10317c478bd9Sstevel@tonic-gate 	/* Check for existing raw socket already bound to the port. */
10327c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
10337c478bd9Sstevel@tonic-gate 	for (oconnp = connfp->connf_head; oconnp != NULL;
10347c0c0508Skcpoon 	    oconnp = oconnp->conn_next) {
10357c478bd9Sstevel@tonic-gate 		if (oconnp->conn_lport == lport &&
10367c478bd9Sstevel@tonic-gate 		    oconnp->conn_zoneid == connp->conn_zoneid &&
10377c478bd9Sstevel@tonic-gate 		    oconnp->conn_af_isv6 == connp->conn_af_isv6 &&
10387c478bd9Sstevel@tonic-gate 		    ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
10397c478bd9Sstevel@tonic-gate 		    IN6_IS_ADDR_UNSPECIFIED(&oconnp->conn_srcv6) ||
10407c478bd9Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6) ||
10417c478bd9Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&oconnp->conn_srcv6)) ||
10427c478bd9Sstevel@tonic-gate 		    IN6_ARE_ADDR_EQUAL(&oconnp->conn_srcv6,
10437c478bd9Sstevel@tonic-gate 		    &connp->conn_srcv6))) {
10447c478bd9Sstevel@tonic-gate 			break;
10457c478bd9Sstevel@tonic-gate 		}
10467c478bd9Sstevel@tonic-gate 	}
10477c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
10487c478bd9Sstevel@tonic-gate 	if (oconnp != NULL)
10497c478bd9Sstevel@tonic-gate 		return (EADDRNOTAVAIL);
10507c478bd9Sstevel@tonic-gate 
10517c478bd9Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6) ||
10527c478bd9Sstevel@tonic-gate 	    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_remv6)) {
10537c478bd9Sstevel@tonic-gate 		if (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) ||
10547c478bd9Sstevel@tonic-gate 		    IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_srcv6)) {
10557c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
10567c478bd9Sstevel@tonic-gate 		} else {
10577c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
10587c478bd9Sstevel@tonic-gate 		}
10597c478bd9Sstevel@tonic-gate 	} else {
10607c478bd9Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED(connfp, connp);
10617c478bd9Sstevel@tonic-gate 	}
10627c478bd9Sstevel@tonic-gate 	return (0);
10637c478bd9Sstevel@tonic-gate }
10647c478bd9Sstevel@tonic-gate 
106545916cd2Sjpk /*
106645916cd2Sjpk  * Check for a MAC exemption conflict on a labeled system.  Note that for
106745916cd2Sjpk  * protocols that use port numbers (UDP, TCP, SCTP), we do this check up in the
106845916cd2Sjpk  * transport layer.  This check is for binding all other protocols.
106945916cd2Sjpk  *
107045916cd2Sjpk  * Returns true if there's a conflict.
107145916cd2Sjpk  */
107245916cd2Sjpk static boolean_t
1073f4b3ec61Sdh check_exempt_conflict_v4(conn_t *connp, ip_stack_t *ipst)
107445916cd2Sjpk {
107545916cd2Sjpk 	connf_t	*connfp;
107645916cd2Sjpk 	conn_t *tconn;
107745916cd2Sjpk 
1078f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp];
107945916cd2Sjpk 	mutex_enter(&connfp->connf_lock);
108045916cd2Sjpk 	for (tconn = connfp->connf_head; tconn != NULL;
108145916cd2Sjpk 	    tconn = tconn->conn_next) {
108245916cd2Sjpk 		/* We don't allow v4 fallback for v6 raw socket */
108345916cd2Sjpk 		if (connp->conn_af_isv6 != tconn->conn_af_isv6)
108445916cd2Sjpk 			continue;
108545916cd2Sjpk 		/* If neither is exempt, then there's no conflict */
108645916cd2Sjpk 		if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt)
108745916cd2Sjpk 			continue;
108845916cd2Sjpk 		/* If both are bound to different specific addrs, ok */
108945916cd2Sjpk 		if (connp->conn_src != INADDR_ANY &&
109045916cd2Sjpk 		    tconn->conn_src != INADDR_ANY &&
109145916cd2Sjpk 		    connp->conn_src != tconn->conn_src)
109245916cd2Sjpk 			continue;
109345916cd2Sjpk 		/* These two conflict; fail */
109445916cd2Sjpk 		break;
109545916cd2Sjpk 	}
109645916cd2Sjpk 	mutex_exit(&connfp->connf_lock);
109745916cd2Sjpk 	return (tconn != NULL);
109845916cd2Sjpk }
109945916cd2Sjpk 
110045916cd2Sjpk static boolean_t
1101f4b3ec61Sdh check_exempt_conflict_v6(conn_t *connp, ip_stack_t *ipst)
110245916cd2Sjpk {
110345916cd2Sjpk 	connf_t	*connfp;
110445916cd2Sjpk 	conn_t *tconn;
110545916cd2Sjpk 
1106f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_proto_fanout[connp->conn_ulp];
110745916cd2Sjpk 	mutex_enter(&connfp->connf_lock);
110845916cd2Sjpk 	for (tconn = connfp->connf_head; tconn != NULL;
110945916cd2Sjpk 	    tconn = tconn->conn_next) {
111045916cd2Sjpk 		/* We don't allow v4 fallback for v6 raw socket */
111145916cd2Sjpk 		if (connp->conn_af_isv6 != tconn->conn_af_isv6)
111245916cd2Sjpk 			continue;
111345916cd2Sjpk 		/* If neither is exempt, then there's no conflict */
111445916cd2Sjpk 		if (!connp->conn_mac_exempt && !tconn->conn_mac_exempt)
111545916cd2Sjpk 			continue;
111645916cd2Sjpk 		/* If both are bound to different addrs, ok */
111745916cd2Sjpk 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6) &&
111845916cd2Sjpk 		    !IN6_IS_ADDR_UNSPECIFIED(&tconn->conn_srcv6) &&
111945916cd2Sjpk 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_srcv6, &tconn->conn_srcv6))
112045916cd2Sjpk 			continue;
112145916cd2Sjpk 		/* These two conflict; fail */
112245916cd2Sjpk 		break;
112345916cd2Sjpk 	}
112445916cd2Sjpk 	mutex_exit(&connfp->connf_lock);
112545916cd2Sjpk 	return (tconn != NULL);
112645916cd2Sjpk }
112745916cd2Sjpk 
11287c478bd9Sstevel@tonic-gate /*
11297c478bd9Sstevel@tonic-gate  * (v4, v6) bind hash insertion routines
11307c478bd9Sstevel@tonic-gate  */
11317c478bd9Sstevel@tonic-gate int
11327c478bd9Sstevel@tonic-gate ipcl_bind_insert(conn_t *connp, uint8_t protocol, ipaddr_t src, uint16_t lport)
11337c478bd9Sstevel@tonic-gate {
11347c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
11357c478bd9Sstevel@tonic-gate #ifdef	IPCL_DEBUG
11367c478bd9Sstevel@tonic-gate 	char	buf[INET_NTOA_BUFSIZE];
11377c478bd9Sstevel@tonic-gate #endif
11387c478bd9Sstevel@tonic-gate 	int	ret = 0;
1139f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
11407c478bd9Sstevel@tonic-gate 
11417c478bd9Sstevel@tonic-gate 	ASSERT(connp);
11427c478bd9Sstevel@tonic-gate 
11437c478bd9Sstevel@tonic-gate 	IPCL_DEBUG_LVL(64, ("ipcl_bind_insert: connp %p, src = %s, "
11447c478bd9Sstevel@tonic-gate 	    "port = %d\n", (void *)connp, inet_ntoa_r(src, buf), lport));
11457c478bd9Sstevel@tonic-gate 
11467c478bd9Sstevel@tonic-gate 	connp->conn_ulp = protocol;
11477c478bd9Sstevel@tonic-gate 	IN6_IPADDR_TO_V4MAPPED(src, &connp->conn_srcv6);
11487c478bd9Sstevel@tonic-gate 	connp->conn_lport = lport;
11497c478bd9Sstevel@tonic-gate 
11507c478bd9Sstevel@tonic-gate 	switch (protocol) {
11517c478bd9Sstevel@tonic-gate 	default:
1152f4b3ec61Sdh 		if (is_system_labeled() &&
1153f4b3ec61Sdh 		    check_exempt_conflict_v4(connp, ipst))
115445916cd2Sjpk 			return (EADDRINUSE);
115545916cd2Sjpk 		/* FALLTHROUGH */
115645916cd2Sjpk 	case IPPROTO_UDP:
11577c478bd9Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
11587c478bd9Sstevel@tonic-gate 			IPCL_DEBUG_LVL(64,
11597c478bd9Sstevel@tonic-gate 			    ("ipcl_bind_insert: connp %p - udp\n",
11607c478bd9Sstevel@tonic-gate 			    (void *)connp));
1161f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_udp_fanout[
1162f4b3ec61Sdh 			    IPCL_UDP_HASH(lport, ipst)];
11637c478bd9Sstevel@tonic-gate 		} else {
11647c478bd9Sstevel@tonic-gate 			IPCL_DEBUG_LVL(64,
11657c478bd9Sstevel@tonic-gate 			    ("ipcl_bind_insert: connp %p - protocol\n",
11667c478bd9Sstevel@tonic-gate 			    (void *)connp));
1167f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_proto_fanout[protocol];
11687c478bd9Sstevel@tonic-gate 		}
11697c478bd9Sstevel@tonic-gate 
11707c478bd9Sstevel@tonic-gate 		if (connp->conn_rem != INADDR_ANY) {
11717c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
11727c478bd9Sstevel@tonic-gate 		} else if (connp->conn_src != INADDR_ANY) {
11737c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
11747c478bd9Sstevel@tonic-gate 		} else {
11757c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
11767c478bd9Sstevel@tonic-gate 		}
11777c478bd9Sstevel@tonic-gate 		break;
11787c478bd9Sstevel@tonic-gate 
11797c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
11807c478bd9Sstevel@tonic-gate 
11817c478bd9Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
118245916cd2Sjpk 		ASSERT(connp->conn_zoneid != ALL_ZONES);
1183f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_bind_fanout[
1184f4b3ec61Sdh 		    IPCL_BIND_HASH(lport, ipst)];
11857c478bd9Sstevel@tonic-gate 		if (connp->conn_src != INADDR_ANY) {
11867c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
11877c478bd9Sstevel@tonic-gate 		} else {
11887c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
11897c478bd9Sstevel@tonic-gate 		}
11907c478bd9Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
11917c478bd9Sstevel@tonic-gate 			ASSERT(!connp->conn_pkt_isv6);
11927c478bd9Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
11937c478bd9Sstevel@tonic-gate 			(*cl_inet_listen)(IPPROTO_TCP, AF_INET,
11947c478bd9Sstevel@tonic-gate 			    (uint8_t *)&connp->conn_bound_source, lport);
11957c478bd9Sstevel@tonic-gate 		}
11967c478bd9Sstevel@tonic-gate 		break;
11977c478bd9Sstevel@tonic-gate 
11987c478bd9Sstevel@tonic-gate 	case IPPROTO_SCTP:
11997c478bd9Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
12007c478bd9Sstevel@tonic-gate 		break;
12017c478bd9Sstevel@tonic-gate 	}
12027c478bd9Sstevel@tonic-gate 
12037c478bd9Sstevel@tonic-gate 	return (ret);
12047c478bd9Sstevel@tonic-gate }
12057c478bd9Sstevel@tonic-gate 
12067c478bd9Sstevel@tonic-gate int
12077c478bd9Sstevel@tonic-gate ipcl_bind_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
12087c478bd9Sstevel@tonic-gate     uint16_t lport)
12097c478bd9Sstevel@tonic-gate {
12107c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
12117c478bd9Sstevel@tonic-gate 	int	ret = 0;
1212f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
12137c478bd9Sstevel@tonic-gate 
12147c478bd9Sstevel@tonic-gate 	ASSERT(connp);
12157c478bd9Sstevel@tonic-gate 
12167c478bd9Sstevel@tonic-gate 	connp->conn_ulp = protocol;
12177c478bd9Sstevel@tonic-gate 	connp->conn_srcv6 = *src;
12187c478bd9Sstevel@tonic-gate 	connp->conn_lport = lport;
12197c478bd9Sstevel@tonic-gate 
12207c478bd9Sstevel@tonic-gate 	switch (protocol) {
12217c478bd9Sstevel@tonic-gate 	default:
1222f4b3ec61Sdh 		if (is_system_labeled() &&
1223f4b3ec61Sdh 		    check_exempt_conflict_v6(connp, ipst))
122445916cd2Sjpk 			return (EADDRINUSE);
122545916cd2Sjpk 		/* FALLTHROUGH */
122645916cd2Sjpk 	case IPPROTO_UDP:
12277c478bd9Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
12287c478bd9Sstevel@tonic-gate 			IPCL_DEBUG_LVL(128,
12297c478bd9Sstevel@tonic-gate 			    ("ipcl_bind_insert_v6: connp %p - udp\n",
12307c478bd9Sstevel@tonic-gate 			    (void *)connp));
1231f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_udp_fanout[
1232f4b3ec61Sdh 			    IPCL_UDP_HASH(lport, ipst)];
12337c478bd9Sstevel@tonic-gate 		} else {
12347c478bd9Sstevel@tonic-gate 			IPCL_DEBUG_LVL(128,
12357c478bd9Sstevel@tonic-gate 			    ("ipcl_bind_insert_v6: connp %p - protocol\n",
12367c478bd9Sstevel@tonic-gate 			    (void *)connp));
1237f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
12387c478bd9Sstevel@tonic-gate 		}
12397c478bd9Sstevel@tonic-gate 
12407c478bd9Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
12417c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
12427c478bd9Sstevel@tonic-gate 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
12437c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12447c478bd9Sstevel@tonic-gate 		} else {
12457c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12467c478bd9Sstevel@tonic-gate 		}
12477c478bd9Sstevel@tonic-gate 		break;
12487c478bd9Sstevel@tonic-gate 
12497c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
12507c478bd9Sstevel@tonic-gate 		/* XXX - Need a separate table for IN6_IS_ADDR_UNSPECIFIED? */
12517c478bd9Sstevel@tonic-gate 
12527c478bd9Sstevel@tonic-gate 		/* Insert it in the Bind Hash */
125345916cd2Sjpk 		ASSERT(connp->conn_zoneid != ALL_ZONES);
1254f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_bind_fanout[
1255f4b3ec61Sdh 		    IPCL_BIND_HASH(lport, ipst)];
12567c478bd9Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
12577c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
12587c478bd9Sstevel@tonic-gate 		} else {
12597c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
12607c478bd9Sstevel@tonic-gate 		}
12617c478bd9Sstevel@tonic-gate 		if (cl_inet_listen != NULL) {
12627c478bd9Sstevel@tonic-gate 			sa_family_t	addr_family;
12637c478bd9Sstevel@tonic-gate 			uint8_t		*laddrp;
12647c478bd9Sstevel@tonic-gate 
12657c478bd9Sstevel@tonic-gate 			if (connp->conn_pkt_isv6) {
12667c478bd9Sstevel@tonic-gate 				addr_family = AF_INET6;
12677c478bd9Sstevel@tonic-gate 				laddrp =
12687c478bd9Sstevel@tonic-gate 				    (uint8_t *)&connp->conn_bound_source_v6;
12697c478bd9Sstevel@tonic-gate 			} else {
12707c478bd9Sstevel@tonic-gate 				addr_family = AF_INET;
12717c478bd9Sstevel@tonic-gate 				laddrp = (uint8_t *)&connp->conn_bound_source;
12727c478bd9Sstevel@tonic-gate 			}
12737c478bd9Sstevel@tonic-gate 			connp->conn_flags |= IPCL_CL_LISTENER;
12747c478bd9Sstevel@tonic-gate 			(*cl_inet_listen)(IPPROTO_TCP, addr_family, laddrp,
12757c478bd9Sstevel@tonic-gate 			    lport);
12767c478bd9Sstevel@tonic-gate 		}
12777c478bd9Sstevel@tonic-gate 		break;
12787c478bd9Sstevel@tonic-gate 
12797c478bd9Sstevel@tonic-gate 	case IPPROTO_SCTP:
12807c478bd9Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
12817c478bd9Sstevel@tonic-gate 		break;
12827c478bd9Sstevel@tonic-gate 	}
12837c478bd9Sstevel@tonic-gate 
12847c478bd9Sstevel@tonic-gate 	return (ret);
12857c478bd9Sstevel@tonic-gate }
12867c478bd9Sstevel@tonic-gate 
12877c478bd9Sstevel@tonic-gate /*
12887c478bd9Sstevel@tonic-gate  * ipcl_conn_hash insertion routines.
12897c478bd9Sstevel@tonic-gate  */
12907c478bd9Sstevel@tonic-gate int
12917c478bd9Sstevel@tonic-gate ipcl_conn_insert(conn_t *connp, uint8_t protocol, ipaddr_t src,
12927c478bd9Sstevel@tonic-gate     ipaddr_t rem, uint32_t ports)
12937c478bd9Sstevel@tonic-gate {
12947c478bd9Sstevel@tonic-gate 	connf_t		*connfp;
12957c478bd9Sstevel@tonic-gate 	uint16_t	*up;
12967c478bd9Sstevel@tonic-gate 	conn_t		*tconnp;
12977c478bd9Sstevel@tonic-gate #ifdef	IPCL_DEBUG
12987c478bd9Sstevel@tonic-gate 	char	sbuf[INET_NTOA_BUFSIZE], rbuf[INET_NTOA_BUFSIZE];
12997c478bd9Sstevel@tonic-gate #endif
13007c478bd9Sstevel@tonic-gate 	in_port_t	lport;
13017c478bd9Sstevel@tonic-gate 	int		ret = 0;
1302f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
13037c478bd9Sstevel@tonic-gate 
13047c478bd9Sstevel@tonic-gate 	IPCL_DEBUG_LVL(256, ("ipcl_conn_insert: connp %p, src = %s, "
13057c478bd9Sstevel@tonic-gate 	    "dst = %s, ports = %x, protocol = %x", (void *)connp,
13067c478bd9Sstevel@tonic-gate 	    inet_ntoa_r(src, sbuf), inet_ntoa_r(rem, rbuf),
13077c478bd9Sstevel@tonic-gate 	    ports, protocol));
13087c478bd9Sstevel@tonic-gate 
13097c478bd9Sstevel@tonic-gate 	switch (protocol) {
13107c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
13117c478bd9Sstevel@tonic-gate 		if (!(connp->conn_flags & IPCL_EAGER)) {
13127c478bd9Sstevel@tonic-gate 			/*
13137c478bd9Sstevel@tonic-gate 			 * for a eager connection, i.e connections which
13147c478bd9Sstevel@tonic-gate 			 * have just been created, the initialization is
13157c478bd9Sstevel@tonic-gate 			 * already done in ip at conn_creation time, so
13167c478bd9Sstevel@tonic-gate 			 * we can skip the checks here.
13177c478bd9Sstevel@tonic-gate 			 */
13187c478bd9Sstevel@tonic-gate 			IPCL_CONN_INIT(connp, protocol, src, rem, ports);
13197c478bd9Sstevel@tonic-gate 		}
1320f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_conn_fanout[
1321f4b3ec61Sdh 		    IPCL_CONN_HASH(connp->conn_rem,
1322f4b3ec61Sdh 		    connp->conn_ports, ipst)];
13237c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
13247c478bd9Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
13257c478bd9Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
13267c478bd9Sstevel@tonic-gate 			if (IPCL_CONN_MATCH(tconnp, connp->conn_ulp,
13277c478bd9Sstevel@tonic-gate 			    connp->conn_rem, connp->conn_src,
13287c478bd9Sstevel@tonic-gate 			    connp->conn_ports)) {
13297c478bd9Sstevel@tonic-gate 
13307c478bd9Sstevel@tonic-gate 				/* Already have a conn. bail out */
13317c478bd9Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
13327c478bd9Sstevel@tonic-gate 				return (EADDRINUSE);
13337c478bd9Sstevel@tonic-gate 			}
13347c478bd9Sstevel@tonic-gate 		}
13357c478bd9Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
13367c478bd9Sstevel@tonic-gate 			/*
13377c478bd9Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
13387c478bd9Sstevel@tonic-gate 			 * rebind. Let it happen.
13397c478bd9Sstevel@tonic-gate 			 */
13407c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
13417c478bd9Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
13427c478bd9Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
13437c478bd9Sstevel@tonic-gate 		}
1344866ba9ddSjprakash 
1345866ba9ddSjprakash 		ASSERT(connp->conn_recv != NULL);
1346866ba9ddSjprakash 
13477c478bd9Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
13487c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
13497c478bd9Sstevel@tonic-gate 		break;
13507c478bd9Sstevel@tonic-gate 
13517c478bd9Sstevel@tonic-gate 	case IPPROTO_SCTP:
13527c0c0508Skcpoon 		/*
13537c0c0508Skcpoon 		 * The raw socket may have already been bound, remove it
13547c0c0508Skcpoon 		 * from the hash first.
13557c0c0508Skcpoon 		 */
13567c0c0508Skcpoon 		IPCL_HASH_REMOVE(connp);
13577c0c0508Skcpoon 		lport = htons((uint16_t)(ntohl(ports) & 0xFFFF));
13587c478bd9Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
13597c478bd9Sstevel@tonic-gate 		break;
13607c478bd9Sstevel@tonic-gate 
13617c478bd9Sstevel@tonic-gate 	default:
136245916cd2Sjpk 		/*
136345916cd2Sjpk 		 * Check for conflicts among MAC exempt bindings.  For
136445916cd2Sjpk 		 * transports with port numbers, this is done by the upper
136545916cd2Sjpk 		 * level per-transport binding logic.  For all others, it's
136645916cd2Sjpk 		 * done here.
136745916cd2Sjpk 		 */
1368f4b3ec61Sdh 		if (is_system_labeled() &&
1369f4b3ec61Sdh 		    check_exempt_conflict_v4(connp, ipst))
137045916cd2Sjpk 			return (EADDRINUSE);
137145916cd2Sjpk 		/* FALLTHROUGH */
137245916cd2Sjpk 
137345916cd2Sjpk 	case IPPROTO_UDP:
13747c478bd9Sstevel@tonic-gate 		up = (uint16_t *)&ports;
13757c478bd9Sstevel@tonic-gate 		IPCL_CONN_INIT(connp, protocol, src, rem, ports);
13767c478bd9Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
1377f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_udp_fanout[
1378f4b3ec61Sdh 			    IPCL_UDP_HASH(up[1], ipst)];
13797c478bd9Sstevel@tonic-gate 		} else {
1380f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_proto_fanout[protocol];
13817c478bd9Sstevel@tonic-gate 		}
13827c478bd9Sstevel@tonic-gate 
13837c478bd9Sstevel@tonic-gate 		if (connp->conn_rem != INADDR_ANY) {
13847c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
13857c478bd9Sstevel@tonic-gate 		} else if (connp->conn_src != INADDR_ANY) {
13867c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
13877c478bd9Sstevel@tonic-gate 		} else {
13887c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
13897c478bd9Sstevel@tonic-gate 		}
13907c478bd9Sstevel@tonic-gate 		break;
13917c478bd9Sstevel@tonic-gate 	}
13927c478bd9Sstevel@tonic-gate 
13937c478bd9Sstevel@tonic-gate 	return (ret);
13947c478bd9Sstevel@tonic-gate }
13957c478bd9Sstevel@tonic-gate 
13967c478bd9Sstevel@tonic-gate int
13977c478bd9Sstevel@tonic-gate ipcl_conn_insert_v6(conn_t *connp, uint8_t protocol, const in6_addr_t *src,
13987c478bd9Sstevel@tonic-gate     const in6_addr_t *rem, uint32_t ports, uint_t ifindex)
13997c478bd9Sstevel@tonic-gate {
14007c478bd9Sstevel@tonic-gate 	connf_t		*connfp;
14017c478bd9Sstevel@tonic-gate 	uint16_t	*up;
14027c478bd9Sstevel@tonic-gate 	conn_t		*tconnp;
14037c478bd9Sstevel@tonic-gate 	in_port_t	lport;
14047c478bd9Sstevel@tonic-gate 	int		ret = 0;
1405f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
14067c478bd9Sstevel@tonic-gate 
14077c478bd9Sstevel@tonic-gate 	switch (protocol) {
14087c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
14097c478bd9Sstevel@tonic-gate 		/* Just need to insert a conn struct */
14107c478bd9Sstevel@tonic-gate 		if (!(connp->conn_flags & IPCL_EAGER)) {
14117c478bd9Sstevel@tonic-gate 			IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
14127c478bd9Sstevel@tonic-gate 		}
1413f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_conn_fanout[
1414f4b3ec61Sdh 		    IPCL_CONN_HASH_V6(connp->conn_remv6, connp->conn_ports,
1415f4b3ec61Sdh 		    ipst)];
14167c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
14177c478bd9Sstevel@tonic-gate 		for (tconnp = connfp->connf_head; tconnp != NULL;
14187c478bd9Sstevel@tonic-gate 		    tconnp = tconnp->conn_next) {
14197c478bd9Sstevel@tonic-gate 			if (IPCL_CONN_MATCH_V6(tconnp, connp->conn_ulp,
14207c478bd9Sstevel@tonic-gate 			    connp->conn_remv6, connp->conn_srcv6,
14217c478bd9Sstevel@tonic-gate 			    connp->conn_ports) &&
14227c478bd9Sstevel@tonic-gate 			    (tconnp->conn_tcp->tcp_bound_if == 0 ||
14237c478bd9Sstevel@tonic-gate 			    tconnp->conn_tcp->tcp_bound_if == ifindex)) {
14247c478bd9Sstevel@tonic-gate 				/* Already have a conn. bail out */
14257c478bd9Sstevel@tonic-gate 				mutex_exit(&connfp->connf_lock);
14267c478bd9Sstevel@tonic-gate 				return (EADDRINUSE);
14277c478bd9Sstevel@tonic-gate 			}
14287c478bd9Sstevel@tonic-gate 		}
14297c478bd9Sstevel@tonic-gate 		if (connp->conn_fanout != NULL) {
14307c478bd9Sstevel@tonic-gate 			/*
14317c478bd9Sstevel@tonic-gate 			 * Probably a XTI/TLI application trying to do a
14327c478bd9Sstevel@tonic-gate 			 * rebind. Let it happen.
14337c478bd9Sstevel@tonic-gate 			 */
14347c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
14357c478bd9Sstevel@tonic-gate 			IPCL_HASH_REMOVE(connp);
14367c478bd9Sstevel@tonic-gate 			mutex_enter(&connfp->connf_lock);
14377c478bd9Sstevel@tonic-gate 		}
14387c478bd9Sstevel@tonic-gate 		IPCL_HASH_INSERT_CONNECTED_LOCKED(connfp, connp);
14397c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
14407c478bd9Sstevel@tonic-gate 		break;
14417c478bd9Sstevel@tonic-gate 
14427c478bd9Sstevel@tonic-gate 	case IPPROTO_SCTP:
14437c0c0508Skcpoon 		IPCL_HASH_REMOVE(connp);
14447c0c0508Skcpoon 		lport = htons((uint16_t)(ntohl(ports) & 0xFFFF));
14457c478bd9Sstevel@tonic-gate 		ret = ipcl_sctp_hash_insert(connp, lport);
14467c478bd9Sstevel@tonic-gate 		break;
14477c478bd9Sstevel@tonic-gate 
14487c478bd9Sstevel@tonic-gate 	default:
1449f4b3ec61Sdh 		if (is_system_labeled() &&
1450f4b3ec61Sdh 		    check_exempt_conflict_v6(connp, ipst))
145145916cd2Sjpk 			return (EADDRINUSE);
145245916cd2Sjpk 		/* FALLTHROUGH */
145345916cd2Sjpk 	case IPPROTO_UDP:
14547c478bd9Sstevel@tonic-gate 		up = (uint16_t *)&ports;
14557c478bd9Sstevel@tonic-gate 		IPCL_CONN_INIT_V6(connp, protocol, *src, *rem, ports);
14567c478bd9Sstevel@tonic-gate 		if (protocol == IPPROTO_UDP) {
1457f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_udp_fanout[
1458f4b3ec61Sdh 			    IPCL_UDP_HASH(up[1], ipst)];
14597c478bd9Sstevel@tonic-gate 		} else {
1460f4b3ec61Sdh 			connfp = &ipst->ips_ipcl_proto_fanout_v6[protocol];
14617c478bd9Sstevel@tonic-gate 		}
14627c478bd9Sstevel@tonic-gate 
14637c478bd9Sstevel@tonic-gate 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_remv6)) {
14647c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_CONNECTED(connfp, connp);
14657c478bd9Sstevel@tonic-gate 		} else if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_srcv6)) {
14667c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_BOUND(connfp, connp);
14677c478bd9Sstevel@tonic-gate 		} else {
14687c478bd9Sstevel@tonic-gate 			IPCL_HASH_INSERT_WILDCARD(connfp, connp);
14697c478bd9Sstevel@tonic-gate 		}
14707c478bd9Sstevel@tonic-gate 		break;
14717c478bd9Sstevel@tonic-gate 	}
14727c478bd9Sstevel@tonic-gate 
14737c478bd9Sstevel@tonic-gate 	return (ret);
14747c478bd9Sstevel@tonic-gate }
14757c478bd9Sstevel@tonic-gate 
14767c478bd9Sstevel@tonic-gate /*
14777c478bd9Sstevel@tonic-gate  * v4 packet classifying function. looks up the fanout table to
14787c478bd9Sstevel@tonic-gate  * find the conn, the packet belongs to. returns the conn with
14797c478bd9Sstevel@tonic-gate  * the reference held, null otherwise.
148045916cd2Sjpk  *
148145916cd2Sjpk  * If zoneid is ALL_ZONES, then the search rules described in the "Connection
148245916cd2Sjpk  * Lookup" comment block are applied.  Labels are also checked as described
148345916cd2Sjpk  * above.  If the packet is from the inside (looped back), and is from the same
148445916cd2Sjpk  * zone, then label checks are omitted.
14857c478bd9Sstevel@tonic-gate  */
14867c478bd9Sstevel@tonic-gate conn_t *
1487f4b3ec61Sdh ipcl_classify_v4(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid,
1488f4b3ec61Sdh     ip_stack_t *ipst)
14897c478bd9Sstevel@tonic-gate {
14907c478bd9Sstevel@tonic-gate 	ipha_t	*ipha;
14917c478bd9Sstevel@tonic-gate 	connf_t	*connfp, *bind_connfp;
14927c478bd9Sstevel@tonic-gate 	uint16_t lport;
14937c478bd9Sstevel@tonic-gate 	uint16_t fport;
14947c478bd9Sstevel@tonic-gate 	uint32_t ports;
14957c478bd9Sstevel@tonic-gate 	conn_t	*connp;
14967c478bd9Sstevel@tonic-gate 	uint16_t  *up;
149745916cd2Sjpk 	boolean_t shared_addr;
149845916cd2Sjpk 	boolean_t unlabeled;
14997c478bd9Sstevel@tonic-gate 
15007c478bd9Sstevel@tonic-gate 	ipha = (ipha_t *)mp->b_rptr;
15017c478bd9Sstevel@tonic-gate 	up = (uint16_t *)((uchar_t *)ipha + hdr_len + TCP_PORTS_OFFSET);
15027c478bd9Sstevel@tonic-gate 
15037c478bd9Sstevel@tonic-gate 	switch (protocol) {
15047c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
15057c478bd9Sstevel@tonic-gate 		ports = *(uint32_t *)up;
15067c478bd9Sstevel@tonic-gate 		connfp =
1507f4b3ec61Sdh 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_src,
1508f4b3ec61Sdh 		    ports, ipst)];
15097c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
15107c478bd9Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
15117c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
15127c478bd9Sstevel@tonic-gate 			if (IPCL_CONN_MATCH(connp, protocol,
15137c478bd9Sstevel@tonic-gate 			    ipha->ipha_src, ipha->ipha_dst, ports))
15147c478bd9Sstevel@tonic-gate 				break;
15157c478bd9Sstevel@tonic-gate 		}
15167c478bd9Sstevel@tonic-gate 
15177c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
151845916cd2Sjpk 			/*
151945916cd2Sjpk 			 * We have a fully-bound TCP connection.
152045916cd2Sjpk 			 *
152145916cd2Sjpk 			 * For labeled systems, there's no need to check the
152245916cd2Sjpk 			 * label here.  It's known to be good as we checked
152345916cd2Sjpk 			 * before allowing the connection to become bound.
152445916cd2Sjpk 			 */
15257c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
15267c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
15277c478bd9Sstevel@tonic-gate 			return (connp);
15287c478bd9Sstevel@tonic-gate 		}
15297c478bd9Sstevel@tonic-gate 
15307c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
15317c478bd9Sstevel@tonic-gate 
15327c478bd9Sstevel@tonic-gate 		lport = up[1];
153345916cd2Sjpk 		unlabeled = B_FALSE;
153445916cd2Sjpk 		/* Cred cannot be null on IPv4 */
153545916cd2Sjpk 		if (is_system_labeled())
153645916cd2Sjpk 			unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags &
153745916cd2Sjpk 			    TSLF_UNLABELED) != 0;
153845916cd2Sjpk 		shared_addr = (zoneid == ALL_ZONES);
153945916cd2Sjpk 		if (shared_addr) {
1540f4b3ec61Sdh 			/*
1541f4b3ec61Sdh 			 * No need to handle exclusive-stack zones since
1542f4b3ec61Sdh 			 * ALL_ZONES only applies to the shared stack.
1543f4b3ec61Sdh 			 */
154445916cd2Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
154545916cd2Sjpk 			/*
154645916cd2Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
154745916cd2Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
154845916cd2Sjpk 			 * search for the zone based on the packet label.
154945916cd2Sjpk 			 *
155045916cd2Sjpk 			 * If there is such a zone, we prefer to find a
155145916cd2Sjpk 			 * connection in it.  Otherwise, we look for a
155245916cd2Sjpk 			 * MAC-exempt connection in any zone whose label
155345916cd2Sjpk 			 * dominates the default label on the packet.
155445916cd2Sjpk 			 */
155545916cd2Sjpk 			if (zoneid == ALL_ZONES)
155645916cd2Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
155745916cd2Sjpk 			else
155845916cd2Sjpk 				unlabeled = B_FALSE;
155945916cd2Sjpk 		}
156045916cd2Sjpk 
1561f4b3ec61Sdh 		bind_connfp =
1562f4b3ec61Sdh 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
15637c478bd9Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
15647c478bd9Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
15657c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
156645916cd2Sjpk 			if (IPCL_BIND_MATCH(connp, protocol, ipha->ipha_dst,
15675d0bc3edSsommerfe 			    lport) && (IPCL_ZONE_MATCH(connp, zoneid) ||
156845916cd2Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
15697c478bd9Sstevel@tonic-gate 				break;
15707c478bd9Sstevel@tonic-gate 		}
15717c478bd9Sstevel@tonic-gate 
157245916cd2Sjpk 		/*
157345916cd2Sjpk 		 * If the matching connection is SLP on a private address, then
157445916cd2Sjpk 		 * the label on the packet must match the local zone's label.
157545916cd2Sjpk 		 * Otherwise, it must be in the label range defined by tnrh.
157645916cd2Sjpk 		 * This is ensured by tsol_receive_label.
157745916cd2Sjpk 		 */
157845916cd2Sjpk 		if (connp != NULL && is_system_labeled() &&
157945916cd2Sjpk 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
158045916cd2Sjpk 		    shared_addr, connp)) {
158145916cd2Sjpk 				DTRACE_PROBE3(
158245916cd2Sjpk 				    tx__ip__log__info__classify__tcp,
158345916cd2Sjpk 				    char *,
158445916cd2Sjpk 				    "connp(1) could not receive mp(2)",
158545916cd2Sjpk 				    conn_t *, connp, mblk_t *, mp);
158645916cd2Sjpk 			connp = NULL;
158745916cd2Sjpk 		}
158845916cd2Sjpk 
15897c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
159045916cd2Sjpk 			/* Have a listener at least */
15917c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
15927c478bd9Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
15937c478bd9Sstevel@tonic-gate 			return (connp);
15947c478bd9Sstevel@tonic-gate 		}
15957c478bd9Sstevel@tonic-gate 
15967c478bd9Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
15977c478bd9Sstevel@tonic-gate 
15987c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
15997c478bd9Sstevel@tonic-gate 		    ("ipcl_classify: couldn't classify mp = %p\n",
16007c478bd9Sstevel@tonic-gate 		    (void *)mp));
16017c478bd9Sstevel@tonic-gate 		break;
16027c478bd9Sstevel@tonic-gate 
16037c478bd9Sstevel@tonic-gate 	case IPPROTO_UDP:
16047c478bd9Sstevel@tonic-gate 		lport = up[1];
160545916cd2Sjpk 		unlabeled = B_FALSE;
160645916cd2Sjpk 		/* Cred cannot be null on IPv4 */
160745916cd2Sjpk 		if (is_system_labeled())
160845916cd2Sjpk 			unlabeled = (crgetlabel(DB_CRED(mp))->tsl_flags &
160945916cd2Sjpk 			    TSLF_UNLABELED) != 0;
161045916cd2Sjpk 		shared_addr = (zoneid == ALL_ZONES);
161145916cd2Sjpk 		if (shared_addr) {
1612f4b3ec61Sdh 			/*
1613f4b3ec61Sdh 			 * No need to handle exclusive-stack zones since
1614f4b3ec61Sdh 			 * ALL_ZONES only applies to the shared stack.
1615f4b3ec61Sdh 			 */
161645916cd2Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
161745916cd2Sjpk 			/*
161845916cd2Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
161945916cd2Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
162045916cd2Sjpk 			 * search for the zone based on the packet label.
162145916cd2Sjpk 			 *
162245916cd2Sjpk 			 * If there is such a zone, we prefer to find a
162345916cd2Sjpk 			 * connection in it.  Otherwise, we look for a
162445916cd2Sjpk 			 * MAC-exempt connection in any zone whose label
162545916cd2Sjpk 			 * dominates the default label on the packet.
162645916cd2Sjpk 			 */
162745916cd2Sjpk 			if (zoneid == ALL_ZONES)
162845916cd2Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
162945916cd2Sjpk 			else
163045916cd2Sjpk 				unlabeled = B_FALSE;
163145916cd2Sjpk 		}
16327c478bd9Sstevel@tonic-gate 		fport = up[0];
16337c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify %x %x", lport, fport));
1634f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
16357c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
16367c478bd9Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
16377c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
16387c478bd9Sstevel@tonic-gate 			if (IPCL_UDP_MATCH(connp, lport, ipha->ipha_dst,
16397c478bd9Sstevel@tonic-gate 			    fport, ipha->ipha_src) &&
16405d0bc3edSsommerfe 			    (IPCL_ZONE_MATCH(connp, zoneid) ||
164145916cd2Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
16427c478bd9Sstevel@tonic-gate 				break;
16437c478bd9Sstevel@tonic-gate 		}
16447c478bd9Sstevel@tonic-gate 
164545916cd2Sjpk 		if (connp != NULL && is_system_labeled() &&
164645916cd2Sjpk 		    !tsol_receive_local(mp, &ipha->ipha_dst, IPV4_VERSION,
164745916cd2Sjpk 		    shared_addr, connp)) {
164845916cd2Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__udp,
164945916cd2Sjpk 			    char *, "connp(1) could not receive mp(2)",
165045916cd2Sjpk 			    conn_t *, connp, mblk_t *, mp);
165145916cd2Sjpk 			connp = NULL;
165245916cd2Sjpk 		}
165345916cd2Sjpk 
16547c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
16557c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
16567c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
16577c478bd9Sstevel@tonic-gate 			return (connp);
16587c478bd9Sstevel@tonic-gate 		}
16597c478bd9Sstevel@tonic-gate 
16607c478bd9Sstevel@tonic-gate 		/*
16617c478bd9Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
16627c478bd9Sstevel@tonic-gate 		 */
16637c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
16647c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
16657c478bd9Sstevel@tonic-gate 		    ("ipcl_classify: cant find udp conn_t for ports : %x %x",
16667c478bd9Sstevel@tonic-gate 		    lport, fport));
16677c478bd9Sstevel@tonic-gate 		break;
16687c478bd9Sstevel@tonic-gate 	}
16697c478bd9Sstevel@tonic-gate 
16707c478bd9Sstevel@tonic-gate 	return (NULL);
16717c478bd9Sstevel@tonic-gate }
16727c478bd9Sstevel@tonic-gate 
16737c478bd9Sstevel@tonic-gate conn_t *
1674f4b3ec61Sdh ipcl_classify_v6(mblk_t *mp, uint8_t protocol, uint_t hdr_len, zoneid_t zoneid,
1675f4b3ec61Sdh     ip_stack_t *ipst)
16767c478bd9Sstevel@tonic-gate {
16777c478bd9Sstevel@tonic-gate 	ip6_t		*ip6h;
16787c478bd9Sstevel@tonic-gate 	connf_t		*connfp, *bind_connfp;
16797c478bd9Sstevel@tonic-gate 	uint16_t	lport;
16807c478bd9Sstevel@tonic-gate 	uint16_t	fport;
16817c478bd9Sstevel@tonic-gate 	tcph_t		*tcph;
16827c478bd9Sstevel@tonic-gate 	uint32_t	ports;
16837c478bd9Sstevel@tonic-gate 	conn_t		*connp;
16847c478bd9Sstevel@tonic-gate 	uint16_t	*up;
168545916cd2Sjpk 	boolean_t	shared_addr;
168645916cd2Sjpk 	boolean_t	unlabeled;
16877c478bd9Sstevel@tonic-gate 
16887c478bd9Sstevel@tonic-gate 	ip6h = (ip6_t *)mp->b_rptr;
16897c478bd9Sstevel@tonic-gate 
16907c478bd9Sstevel@tonic-gate 	switch (protocol) {
16917c478bd9Sstevel@tonic-gate 	case IPPROTO_TCP:
16927c478bd9Sstevel@tonic-gate 		tcph = (tcph_t *)&mp->b_rptr[hdr_len];
16937c478bd9Sstevel@tonic-gate 		up = (uint16_t *)tcph->th_lport;
16947c478bd9Sstevel@tonic-gate 		ports = *(uint32_t *)up;
16957c478bd9Sstevel@tonic-gate 
16967c478bd9Sstevel@tonic-gate 		connfp =
1697f4b3ec61Sdh 		    &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_src,
1698f4b3ec61Sdh 		    ports, ipst)];
16997c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
17007c478bd9Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
17017c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
17027c478bd9Sstevel@tonic-gate 			if (IPCL_CONN_MATCH_V6(connp, protocol,
17037c478bd9Sstevel@tonic-gate 			    ip6h->ip6_src, ip6h->ip6_dst, ports))
17047c478bd9Sstevel@tonic-gate 				break;
17057c478bd9Sstevel@tonic-gate 		}
17067c478bd9Sstevel@tonic-gate 
17077c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
170845916cd2Sjpk 			/*
170945916cd2Sjpk 			 * We have a fully-bound TCP connection.
171045916cd2Sjpk 			 *
171145916cd2Sjpk 			 * For labeled systems, there's no need to check the
171245916cd2Sjpk 			 * label here.  It's known to be good as we checked
171345916cd2Sjpk 			 * before allowing the connection to become bound.
171445916cd2Sjpk 			 */
17157c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
17167c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
17177c478bd9Sstevel@tonic-gate 			return (connp);
17187c478bd9Sstevel@tonic-gate 		}
17197c478bd9Sstevel@tonic-gate 
17207c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
17217c478bd9Sstevel@tonic-gate 
17227c478bd9Sstevel@tonic-gate 		lport = up[1];
172345916cd2Sjpk 		unlabeled = B_FALSE;
172445916cd2Sjpk 		/* Cred can be null on IPv6 */
172545916cd2Sjpk 		if (is_system_labeled()) {
172645916cd2Sjpk 			cred_t *cr = DB_CRED(mp);
172745916cd2Sjpk 
172845916cd2Sjpk 			unlabeled = (cr != NULL &&
172945916cd2Sjpk 			    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
173045916cd2Sjpk 		}
173145916cd2Sjpk 		shared_addr = (zoneid == ALL_ZONES);
173245916cd2Sjpk 		if (shared_addr) {
1733f4b3ec61Sdh 			/*
1734f4b3ec61Sdh 			 * No need to handle exclusive-stack zones since
1735f4b3ec61Sdh 			 * ALL_ZONES only applies to the shared stack.
1736f4b3ec61Sdh 			 */
173745916cd2Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
173845916cd2Sjpk 			/*
173945916cd2Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
174045916cd2Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
174145916cd2Sjpk 			 * search for the zone based on the packet label.
174245916cd2Sjpk 			 *
174345916cd2Sjpk 			 * If there is such a zone, we prefer to find a
174445916cd2Sjpk 			 * connection in it.  Otherwise, we look for a
174545916cd2Sjpk 			 * MAC-exempt connection in any zone whose label
174645916cd2Sjpk 			 * dominates the default label on the packet.
174745916cd2Sjpk 			 */
174845916cd2Sjpk 			if (zoneid == ALL_ZONES)
174945916cd2Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
175045916cd2Sjpk 			else
175145916cd2Sjpk 				unlabeled = B_FALSE;
175245916cd2Sjpk 		}
175345916cd2Sjpk 
1754f4b3ec61Sdh 		bind_connfp =
1755f4b3ec61Sdh 		    &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
17567c478bd9Sstevel@tonic-gate 		mutex_enter(&bind_connfp->connf_lock);
17577c478bd9Sstevel@tonic-gate 		for (connp = bind_connfp->connf_head; connp != NULL;
17587c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
17597c478bd9Sstevel@tonic-gate 			if (IPCL_BIND_MATCH_V6(connp, protocol,
17607c478bd9Sstevel@tonic-gate 			    ip6h->ip6_dst, lport) &&
17615d0bc3edSsommerfe 			    (IPCL_ZONE_MATCH(connp, zoneid) ||
176245916cd2Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
17637c478bd9Sstevel@tonic-gate 				break;
17647c478bd9Sstevel@tonic-gate 		}
17657c478bd9Sstevel@tonic-gate 
176645916cd2Sjpk 		if (connp != NULL && is_system_labeled() &&
176745916cd2Sjpk 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
176845916cd2Sjpk 		    shared_addr, connp)) {
176945916cd2Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__tcp6,
177045916cd2Sjpk 			    char *, "connp(1) could not receive mp(2)",
177145916cd2Sjpk 			    conn_t *, connp, mblk_t *, mp);
177245916cd2Sjpk 			connp = NULL;
177345916cd2Sjpk 		}
177445916cd2Sjpk 
17757c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
17767c478bd9Sstevel@tonic-gate 			/* Have a listner at least */
17777c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
17787c478bd9Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
17797c478bd9Sstevel@tonic-gate 			IPCL_DEBUG_LVL(512,
17807c478bd9Sstevel@tonic-gate 			    ("ipcl_classify_v6: found listner "
17817c478bd9Sstevel@tonic-gate 			    "connp = %p\n", (void *)connp));
17827c478bd9Sstevel@tonic-gate 
17837c478bd9Sstevel@tonic-gate 			return (connp);
17847c478bd9Sstevel@tonic-gate 		}
17857c478bd9Sstevel@tonic-gate 
17867c478bd9Sstevel@tonic-gate 		mutex_exit(&bind_connfp->connf_lock);
17877c478bd9Sstevel@tonic-gate 
17887c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
17897c478bd9Sstevel@tonic-gate 		    ("ipcl_classify_v6: couldn't classify mp = %p\n",
17907c478bd9Sstevel@tonic-gate 		    (void *)mp));
17917c478bd9Sstevel@tonic-gate 		break;
17927c478bd9Sstevel@tonic-gate 
17937c478bd9Sstevel@tonic-gate 	case IPPROTO_UDP:
17947c478bd9Sstevel@tonic-gate 		up = (uint16_t *)&mp->b_rptr[hdr_len];
17957c478bd9Sstevel@tonic-gate 		lport = up[1];
179645916cd2Sjpk 		unlabeled = B_FALSE;
179745916cd2Sjpk 		/* Cred can be null on IPv6 */
179845916cd2Sjpk 		if (is_system_labeled()) {
179945916cd2Sjpk 			cred_t *cr = DB_CRED(mp);
180045916cd2Sjpk 
180145916cd2Sjpk 			unlabeled = (cr != NULL &&
180245916cd2Sjpk 			    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
180345916cd2Sjpk 		}
180445916cd2Sjpk 		shared_addr = (zoneid == ALL_ZONES);
180545916cd2Sjpk 		if (shared_addr) {
1806f4b3ec61Sdh 			/*
1807f4b3ec61Sdh 			 * No need to handle exclusive-stack zones since
1808f4b3ec61Sdh 			 * ALL_ZONES only applies to the shared stack.
1809f4b3ec61Sdh 			 */
181045916cd2Sjpk 			zoneid = tsol_mlp_findzone(protocol, lport);
181145916cd2Sjpk 			/*
181245916cd2Sjpk 			 * If no shared MLP is found, tsol_mlp_findzone returns
181345916cd2Sjpk 			 * ALL_ZONES.  In that case, we assume it's SLP, and
181445916cd2Sjpk 			 * search for the zone based on the packet label.
181545916cd2Sjpk 			 *
181645916cd2Sjpk 			 * If there is such a zone, we prefer to find a
181745916cd2Sjpk 			 * connection in it.  Otherwise, we look for a
181845916cd2Sjpk 			 * MAC-exempt connection in any zone whose label
181945916cd2Sjpk 			 * dominates the default label on the packet.
182045916cd2Sjpk 			 */
182145916cd2Sjpk 			if (zoneid == ALL_ZONES)
182245916cd2Sjpk 				zoneid = tsol_packet_to_zoneid(mp);
182345916cd2Sjpk 			else
182445916cd2Sjpk 				unlabeled = B_FALSE;
182545916cd2Sjpk 		}
182645916cd2Sjpk 
18277c478bd9Sstevel@tonic-gate 		fport = up[0];
18287c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512, ("ipcl_udp_classify_v6 %x %x", lport,
18297c478bd9Sstevel@tonic-gate 		    fport));
1830f4b3ec61Sdh 		connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(lport, ipst)];
18317c478bd9Sstevel@tonic-gate 		mutex_enter(&connfp->connf_lock);
18327c478bd9Sstevel@tonic-gate 		for (connp = connfp->connf_head; connp != NULL;
18337c478bd9Sstevel@tonic-gate 		    connp = connp->conn_next) {
18347c478bd9Sstevel@tonic-gate 			if (IPCL_UDP_MATCH_V6(connp, lport, ip6h->ip6_dst,
18357c478bd9Sstevel@tonic-gate 			    fport, ip6h->ip6_src) &&
18365d0bc3edSsommerfe 			    (IPCL_ZONE_MATCH(connp, zoneid) ||
183745916cd2Sjpk 			    (unlabeled && connp->conn_mac_exempt)))
18387c478bd9Sstevel@tonic-gate 				break;
18397c478bd9Sstevel@tonic-gate 		}
18407c478bd9Sstevel@tonic-gate 
184145916cd2Sjpk 		if (connp != NULL && is_system_labeled() &&
184245916cd2Sjpk 		    !tsol_receive_local(mp, &ip6h->ip6_dst, IPV6_VERSION,
184345916cd2Sjpk 		    shared_addr, connp)) {
184445916cd2Sjpk 			DTRACE_PROBE3(tx__ip__log__info__classify__udp6,
184545916cd2Sjpk 			    char *, "connp(1) could not receive mp(2)",
184645916cd2Sjpk 			    conn_t *, connp, mblk_t *, mp);
184745916cd2Sjpk 			connp = NULL;
184845916cd2Sjpk 		}
184945916cd2Sjpk 
18507c478bd9Sstevel@tonic-gate 		if (connp != NULL) {
18517c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
18527c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
18537c478bd9Sstevel@tonic-gate 			return (connp);
18547c478bd9Sstevel@tonic-gate 		}
18557c478bd9Sstevel@tonic-gate 
18567c478bd9Sstevel@tonic-gate 		/*
18577c478bd9Sstevel@tonic-gate 		 * We shouldn't come here for multicast/broadcast packets
18587c478bd9Sstevel@tonic-gate 		 */
18597c478bd9Sstevel@tonic-gate 		mutex_exit(&connfp->connf_lock);
18607c478bd9Sstevel@tonic-gate 		IPCL_DEBUG_LVL(512,
18617c478bd9Sstevel@tonic-gate 		    ("ipcl_classify_v6: cant find udp conn_t for ports : %x %x",
18627c478bd9Sstevel@tonic-gate 		    lport, fport));
18637c478bd9Sstevel@tonic-gate 		break;
18647c478bd9Sstevel@tonic-gate 	}
18657c478bd9Sstevel@tonic-gate 
18667c478bd9Sstevel@tonic-gate 	return (NULL);
18677c478bd9Sstevel@tonic-gate }
18687c478bd9Sstevel@tonic-gate 
18697c478bd9Sstevel@tonic-gate /*
18707c478bd9Sstevel@tonic-gate  * wrapper around ipcl_classify_(v4,v6) routines.
18717c478bd9Sstevel@tonic-gate  */
18727c478bd9Sstevel@tonic-gate conn_t *
1873f4b3ec61Sdh ipcl_classify(mblk_t *mp, zoneid_t zoneid, ip_stack_t *ipst)
18747c478bd9Sstevel@tonic-gate {
18757c478bd9Sstevel@tonic-gate 	uint16_t	hdr_len;
18767c478bd9Sstevel@tonic-gate 	ipha_t		*ipha;
18777c478bd9Sstevel@tonic-gate 	uint8_t		*nexthdrp;
18787c478bd9Sstevel@tonic-gate 
18797c478bd9Sstevel@tonic-gate 	if (MBLKL(mp) < sizeof (ipha_t))
18807c478bd9Sstevel@tonic-gate 		return (NULL);
18817c478bd9Sstevel@tonic-gate 
18827c478bd9Sstevel@tonic-gate 	switch (IPH_HDR_VERSION(mp->b_rptr)) {
18837c478bd9Sstevel@tonic-gate 	case IPV4_VERSION:
18847c478bd9Sstevel@tonic-gate 		ipha = (ipha_t *)mp->b_rptr;
18857c478bd9Sstevel@tonic-gate 		hdr_len = IPH_HDR_LENGTH(ipha);
18867c478bd9Sstevel@tonic-gate 		return (ipcl_classify_v4(mp, ipha->ipha_protocol, hdr_len,
1887f4b3ec61Sdh 		    zoneid, ipst));
18887c478bd9Sstevel@tonic-gate 	case IPV6_VERSION:
18897c478bd9Sstevel@tonic-gate 		if (!ip_hdr_length_nexthdr_v6(mp, (ip6_t *)mp->b_rptr,
18907c478bd9Sstevel@tonic-gate 		    &hdr_len, &nexthdrp))
18917c478bd9Sstevel@tonic-gate 			return (NULL);
18927c478bd9Sstevel@tonic-gate 
1893f4b3ec61Sdh 		return (ipcl_classify_v6(mp, *nexthdrp, hdr_len, zoneid, ipst));
18947c478bd9Sstevel@tonic-gate 	}
18957c478bd9Sstevel@tonic-gate 
18967c478bd9Sstevel@tonic-gate 	return (NULL);
18977c478bd9Sstevel@tonic-gate }
18987c478bd9Sstevel@tonic-gate 
18997c478bd9Sstevel@tonic-gate conn_t *
190045916cd2Sjpk ipcl_classify_raw(mblk_t *mp, uint8_t protocol, zoneid_t zoneid,
1901f4b3ec61Sdh     uint32_t ports, ipha_t *hdr, ip_stack_t *ipst)
19027c478bd9Sstevel@tonic-gate {
190345916cd2Sjpk 	connf_t		*connfp;
19047c478bd9Sstevel@tonic-gate 	conn_t		*connp;
19057c478bd9Sstevel@tonic-gate 	in_port_t	lport;
19067c478bd9Sstevel@tonic-gate 	int		af;
190745916cd2Sjpk 	boolean_t	shared_addr;
190845916cd2Sjpk 	boolean_t	unlabeled;
190945916cd2Sjpk 	const void	*dst;
19107c478bd9Sstevel@tonic-gate 
19117c478bd9Sstevel@tonic-gate 	lport = ((uint16_t *)&ports)[1];
191245916cd2Sjpk 
191345916cd2Sjpk 	unlabeled = B_FALSE;
191445916cd2Sjpk 	/* Cred can be null on IPv6 */
191545916cd2Sjpk 	if (is_system_labeled()) {
191645916cd2Sjpk 		cred_t *cr = DB_CRED(mp);
191745916cd2Sjpk 
191845916cd2Sjpk 		unlabeled = (cr != NULL &&
191945916cd2Sjpk 		    crgetlabel(cr)->tsl_flags & TSLF_UNLABELED) != 0;
192045916cd2Sjpk 	}
192145916cd2Sjpk 	shared_addr = (zoneid == ALL_ZONES);
192245916cd2Sjpk 	if (shared_addr) {
1923f4b3ec61Sdh 		/*
1924f4b3ec61Sdh 		 * No need to handle exclusive-stack zones since ALL_ZONES
1925f4b3ec61Sdh 		 * only applies to the shared stack.
1926f4b3ec61Sdh 		 */
192745916cd2Sjpk 		zoneid = tsol_mlp_findzone(protocol, lport);
192845916cd2Sjpk 		/*
192945916cd2Sjpk 		 * If no shared MLP is found, tsol_mlp_findzone returns
193045916cd2Sjpk 		 * ALL_ZONES.  In that case, we assume it's SLP, and search for
193145916cd2Sjpk 		 * the zone based on the packet label.
193245916cd2Sjpk 		 *
193345916cd2Sjpk 		 * If there is such a zone, we prefer to find a connection in
193445916cd2Sjpk 		 * it.  Otherwise, we look for a MAC-exempt connection in any
193545916cd2Sjpk 		 * zone whose label dominates the default label on the packet.
193645916cd2Sjpk 		 */
193745916cd2Sjpk 		if (zoneid == ALL_ZONES)
193845916cd2Sjpk 			zoneid = tsol_packet_to_zoneid(mp);
193945916cd2Sjpk 		else
194045916cd2Sjpk 			unlabeled = B_FALSE;
194145916cd2Sjpk 	}
194245916cd2Sjpk 
19437c478bd9Sstevel@tonic-gate 	af = IPH_HDR_VERSION(hdr);
194445916cd2Sjpk 	dst = af == IPV4_VERSION ? (const void *)&hdr->ipha_dst :
194545916cd2Sjpk 	    (const void *)&((ip6_t *)hdr)->ip6_dst;
1946f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(ntohs(lport), ipst)];
19477c478bd9Sstevel@tonic-gate 
19487c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
19497c478bd9Sstevel@tonic-gate 	for (connp = connfp->connf_head; connp != NULL;
19507c478bd9Sstevel@tonic-gate 	    connp = connp->conn_next) {
19517c478bd9Sstevel@tonic-gate 		/* We don't allow v4 fallback for v6 raw socket. */
195245916cd2Sjpk 		if (af == (connp->conn_af_isv6 ? IPV4_VERSION :
195345916cd2Sjpk 		    IPV6_VERSION))
19547c478bd9Sstevel@tonic-gate 			continue;
19557c478bd9Sstevel@tonic-gate 		if (connp->conn_fully_bound) {
19567c478bd9Sstevel@tonic-gate 			if (af == IPV4_VERSION) {
195745916cd2Sjpk 				if (!IPCL_CONN_MATCH(connp, protocol,
195845916cd2Sjpk 				    hdr->ipha_src, hdr->ipha_dst, ports))
195945916cd2Sjpk 					continue;
19607c478bd9Sstevel@tonic-gate 			} else {
196145916cd2Sjpk 				if (!IPCL_CONN_MATCH_V6(connp, protocol,
19627c478bd9Sstevel@tonic-gate 				    ((ip6_t *)hdr)->ip6_src,
196345916cd2Sjpk 				    ((ip6_t *)hdr)->ip6_dst, ports))
196445916cd2Sjpk 					continue;
19657c478bd9Sstevel@tonic-gate 			}
19667c478bd9Sstevel@tonic-gate 		} else {
19677c478bd9Sstevel@tonic-gate 			if (af == IPV4_VERSION) {
196845916cd2Sjpk 				if (!IPCL_BIND_MATCH(connp, protocol,
196945916cd2Sjpk 				    hdr->ipha_dst, lport))
197045916cd2Sjpk 					continue;
19717c478bd9Sstevel@tonic-gate 			} else {
197245916cd2Sjpk 				if (!IPCL_BIND_MATCH_V6(connp, protocol,
197345916cd2Sjpk 				    ((ip6_t *)hdr)->ip6_dst, lport))
197445916cd2Sjpk 					continue;
19757c478bd9Sstevel@tonic-gate 			}
19767c478bd9Sstevel@tonic-gate 		}
197745916cd2Sjpk 
19785d0bc3edSsommerfe 		if (IPCL_ZONE_MATCH(connp, zoneid) ||
197945916cd2Sjpk 		    (unlabeled && connp->conn_mac_exempt))
198045916cd2Sjpk 			break;
198145916cd2Sjpk 	}
198245916cd2Sjpk 	/*
198345916cd2Sjpk 	 * If the connection is fully-bound and connection-oriented (TCP or
198445916cd2Sjpk 	 * SCTP), then we've already validated the remote system's label.
198545916cd2Sjpk 	 * There's no need to do it again for every packet.
198645916cd2Sjpk 	 */
198745916cd2Sjpk 	if (connp != NULL && is_system_labeled() && (!connp->conn_fully_bound ||
198845916cd2Sjpk 	    !(connp->conn_flags & (IPCL_TCP|IPCL_SCTPCONN))) &&
198945916cd2Sjpk 	    !tsol_receive_local(mp, dst, af, shared_addr, connp)) {
199045916cd2Sjpk 		DTRACE_PROBE3(tx__ip__log__info__classify__rawip,
199145916cd2Sjpk 		    char *, "connp(1) could not receive mp(2)",
199245916cd2Sjpk 		    conn_t *, connp, mblk_t *, mp);
199345916cd2Sjpk 		connp = NULL;
19947c478bd9Sstevel@tonic-gate 	}
19957c0c0508Skcpoon 
19967c0c0508Skcpoon 	if (connp != NULL)
19977c0c0508Skcpoon 		goto found;
19987c0c0508Skcpoon 	mutex_exit(&connfp->connf_lock);
19997c0c0508Skcpoon 
20007c0c0508Skcpoon 	/* Try to look for a wildcard match. */
2001f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_raw_fanout[IPCL_RAW_HASH(0, ipst)];
20027c0c0508Skcpoon 	mutex_enter(&connfp->connf_lock);
20037c0c0508Skcpoon 	for (connp = connfp->connf_head; connp != NULL;
20047c0c0508Skcpoon 	    connp = connp->conn_next) {
20057c0c0508Skcpoon 		/* We don't allow v4 fallback for v6 raw socket. */
20067c0c0508Skcpoon 		if ((af == (connp->conn_af_isv6 ? IPV4_VERSION :
20075d0bc3edSsommerfe 		    IPV6_VERSION)) || !IPCL_ZONE_MATCH(connp, zoneid)) {
20087c0c0508Skcpoon 			continue;
20097c0c0508Skcpoon 		}
20107c0c0508Skcpoon 		if (af == IPV4_VERSION) {
20117c0c0508Skcpoon 			if (IPCL_RAW_MATCH(connp, protocol, hdr->ipha_dst))
20127c0c0508Skcpoon 				break;
20137c0c0508Skcpoon 		} else {
20147c0c0508Skcpoon 			if (IPCL_RAW_MATCH_V6(connp, protocol,
20157c0c0508Skcpoon 			    ((ip6_t *)hdr)->ip6_dst)) {
20167c0c0508Skcpoon 				break;
20177c0c0508Skcpoon 			}
20187c0c0508Skcpoon 		}
20197c478bd9Sstevel@tonic-gate 	}
20207c0c0508Skcpoon 
20217c0c0508Skcpoon 	if (connp != NULL)
20227c0c0508Skcpoon 		goto found;
20237c0c0508Skcpoon 
20247c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
20257c478bd9Sstevel@tonic-gate 	return (NULL);
20267c0c0508Skcpoon 
20277c0c0508Skcpoon found:
20287c0c0508Skcpoon 	ASSERT(connp != NULL);
20297c0c0508Skcpoon 	CONN_INC_REF(connp);
20307c0c0508Skcpoon 	mutex_exit(&connfp->connf_lock);
20317c0c0508Skcpoon 	return (connp);
20327c478bd9Sstevel@tonic-gate }
20337c478bd9Sstevel@tonic-gate 
20347c478bd9Sstevel@tonic-gate /* ARGSUSED */
20357c478bd9Sstevel@tonic-gate static int
2036fc80c0dfSnordmark tcp_conn_constructor(void *buf, void *cdrarg, int kmflags)
20377c478bd9Sstevel@tonic-gate {
20387c478bd9Sstevel@tonic-gate 	itc_t	*itc = (itc_t *)buf;
20397c478bd9Sstevel@tonic-gate 	conn_t 	*connp = &itc->itc_conn;
2040fc80c0dfSnordmark 	tcp_t	*tcp = (tcp_t *)&itc[1];
2041fc80c0dfSnordmark 
2042fc80c0dfSnordmark 	bzero(connp, sizeof (conn_t));
2043fc80c0dfSnordmark 	bzero(tcp, sizeof (tcp_t));
2044fc80c0dfSnordmark 
2045fc80c0dfSnordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2046fc80c0dfSnordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2047*0f1702c5SYu Xiangning 	cv_init(&connp->conn_sq_cv, NULL, CV_DEFAULT, NULL);
20487c478bd9Sstevel@tonic-gate 	tcp->tcp_timercache = tcp_timermp_alloc(KM_NOSLEEP);
20497c478bd9Sstevel@tonic-gate 	connp->conn_tcp = tcp;
20507c478bd9Sstevel@tonic-gate 	connp->conn_flags = IPCL_TCPCONN;
20517c478bd9Sstevel@tonic-gate 	connp->conn_ulp = IPPROTO_TCP;
20527c478bd9Sstevel@tonic-gate 	tcp->tcp_connp = connp;
20537c478bd9Sstevel@tonic-gate 	return (0);
20547c478bd9Sstevel@tonic-gate }
20557c478bd9Sstevel@tonic-gate 
20567c478bd9Sstevel@tonic-gate /* ARGSUSED */
20577c478bd9Sstevel@tonic-gate static void
2058fc80c0dfSnordmark tcp_conn_destructor(void *buf, void *cdrarg)
2059fc80c0dfSnordmark {
2060fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2061fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2062fc80c0dfSnordmark 	tcp_t	*tcp = (tcp_t *)&itc[1];
2063fc80c0dfSnordmark 
2064fc80c0dfSnordmark 	ASSERT(connp->conn_flags & IPCL_TCPCONN);
2065fc80c0dfSnordmark 	ASSERT(tcp->tcp_connp == connp);
2066fc80c0dfSnordmark 	ASSERT(connp->conn_tcp == tcp);
2067fc80c0dfSnordmark 	tcp_timermp_free(tcp);
2068fc80c0dfSnordmark 	mutex_destroy(&connp->conn_lock);
2069fc80c0dfSnordmark 	cv_destroy(&connp->conn_cv);
2070*0f1702c5SYu Xiangning 	cv_destroy(&connp->conn_sq_cv);
2071fc80c0dfSnordmark }
2072fc80c0dfSnordmark 
2073fc80c0dfSnordmark /* ARGSUSED */
2074fc80c0dfSnordmark static int
2075fc80c0dfSnordmark ip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2076fc80c0dfSnordmark {
2077fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2078fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2079fc80c0dfSnordmark 
2080fc80c0dfSnordmark 	bzero(connp, sizeof (conn_t));
2081fc80c0dfSnordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2082fc80c0dfSnordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2083fc80c0dfSnordmark 	connp->conn_flags = IPCL_IPCCONN;
2084fc80c0dfSnordmark 
2085fc80c0dfSnordmark 	return (0);
2086fc80c0dfSnordmark }
2087fc80c0dfSnordmark 
2088fc80c0dfSnordmark /* ARGSUSED */
2089fc80c0dfSnordmark static void
2090fc80c0dfSnordmark ip_conn_destructor(void *buf, void *cdrarg)
2091fc80c0dfSnordmark {
2092fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2093fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2094fc80c0dfSnordmark 
2095fc80c0dfSnordmark 	ASSERT(connp->conn_flags & IPCL_IPCCONN);
2096fc80c0dfSnordmark 	ASSERT(connp->conn_priv == NULL);
2097fc80c0dfSnordmark 	mutex_destroy(&connp->conn_lock);
2098fc80c0dfSnordmark 	cv_destroy(&connp->conn_cv);
2099fc80c0dfSnordmark }
2100fc80c0dfSnordmark 
2101fc80c0dfSnordmark /* ARGSUSED */
2102fc80c0dfSnordmark static int
2103fc80c0dfSnordmark udp_conn_constructor(void *buf, void *cdrarg, int kmflags)
2104fc80c0dfSnordmark {
2105fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2106fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2107fc80c0dfSnordmark 	udp_t	*udp = (udp_t *)&itc[1];
2108fc80c0dfSnordmark 
2109fc80c0dfSnordmark 	bzero(connp, sizeof (conn_t));
2110fc80c0dfSnordmark 	bzero(udp, sizeof (udp_t));
2111fc80c0dfSnordmark 
2112fc80c0dfSnordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2113fc80c0dfSnordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2114fc80c0dfSnordmark 	connp->conn_udp = udp;
2115fc80c0dfSnordmark 	connp->conn_flags = IPCL_UDPCONN;
2116fc80c0dfSnordmark 	connp->conn_ulp = IPPROTO_UDP;
2117fc80c0dfSnordmark 	udp->udp_connp = connp;
2118fc80c0dfSnordmark 	return (0);
2119fc80c0dfSnordmark }
2120fc80c0dfSnordmark 
2121fc80c0dfSnordmark /* ARGSUSED */
2122fc80c0dfSnordmark static void
2123fc80c0dfSnordmark udp_conn_destructor(void *buf, void *cdrarg)
2124fc80c0dfSnordmark {
2125fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2126fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2127fc80c0dfSnordmark 	udp_t	*udp = (udp_t *)&itc[1];
2128fc80c0dfSnordmark 
2129fc80c0dfSnordmark 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
2130fc80c0dfSnordmark 	ASSERT(udp->udp_connp == connp);
2131fc80c0dfSnordmark 	ASSERT(connp->conn_udp == udp);
2132fc80c0dfSnordmark 	mutex_destroy(&connp->conn_lock);
2133fc80c0dfSnordmark 	cv_destroy(&connp->conn_cv);
2134fc80c0dfSnordmark }
2135fc80c0dfSnordmark 
2136fc80c0dfSnordmark /* ARGSUSED */
2137fc80c0dfSnordmark static int
2138fc80c0dfSnordmark rawip_conn_constructor(void *buf, void *cdrarg, int kmflags)
2139fc80c0dfSnordmark {
2140fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2141fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2142fc80c0dfSnordmark 	icmp_t	*icmp = (icmp_t *)&itc[1];
2143fc80c0dfSnordmark 
2144fc80c0dfSnordmark 	bzero(connp, sizeof (conn_t));
2145fc80c0dfSnordmark 	bzero(icmp, sizeof (icmp_t));
2146fc80c0dfSnordmark 
2147fc80c0dfSnordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2148fc80c0dfSnordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2149fc80c0dfSnordmark 	connp->conn_icmp = icmp;
2150fc80c0dfSnordmark 	connp->conn_flags = IPCL_RAWIPCONN;
2151fc80c0dfSnordmark 	connp->conn_ulp = IPPROTO_ICMP;
2152fc80c0dfSnordmark 	icmp->icmp_connp = connp;
2153fc80c0dfSnordmark 	return (0);
2154fc80c0dfSnordmark }
2155fc80c0dfSnordmark 
2156fc80c0dfSnordmark /* ARGSUSED */
2157fc80c0dfSnordmark static void
2158fc80c0dfSnordmark rawip_conn_destructor(void *buf, void *cdrarg)
2159fc80c0dfSnordmark {
2160fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2161fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2162fc80c0dfSnordmark 	icmp_t	*icmp = (icmp_t *)&itc[1];
2163fc80c0dfSnordmark 
2164fc80c0dfSnordmark 	ASSERT(connp->conn_flags & IPCL_RAWIPCONN);
2165fc80c0dfSnordmark 	ASSERT(icmp->icmp_connp == connp);
2166fc80c0dfSnordmark 	ASSERT(connp->conn_icmp == icmp);
2167fc80c0dfSnordmark 	mutex_destroy(&connp->conn_lock);
2168fc80c0dfSnordmark 	cv_destroy(&connp->conn_cv);
2169fc80c0dfSnordmark }
2170fc80c0dfSnordmark 
2171fc80c0dfSnordmark /* ARGSUSED */
2172fc80c0dfSnordmark static int
2173fc80c0dfSnordmark rts_conn_constructor(void *buf, void *cdrarg, int kmflags)
2174fc80c0dfSnordmark {
2175fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2176fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2177fc80c0dfSnordmark 	rts_t	*rts = (rts_t *)&itc[1];
2178fc80c0dfSnordmark 
2179fc80c0dfSnordmark 	bzero(connp, sizeof (conn_t));
2180fc80c0dfSnordmark 	bzero(rts, sizeof (rts_t));
2181fc80c0dfSnordmark 
2182fc80c0dfSnordmark 	mutex_init(&connp->conn_lock, NULL, MUTEX_DEFAULT, NULL);
2183fc80c0dfSnordmark 	cv_init(&connp->conn_cv, NULL, CV_DEFAULT, NULL);
2184fc80c0dfSnordmark 	connp->conn_rts = rts;
2185fc80c0dfSnordmark 	connp->conn_flags = IPCL_RTSCONN;
2186fc80c0dfSnordmark 	rts->rts_connp = connp;
2187fc80c0dfSnordmark 	return (0);
2188fc80c0dfSnordmark }
2189fc80c0dfSnordmark 
2190fc80c0dfSnordmark /* ARGSUSED */
2191fc80c0dfSnordmark static void
2192fc80c0dfSnordmark rts_conn_destructor(void *buf, void *cdrarg)
21937c478bd9Sstevel@tonic-gate {
2194fc80c0dfSnordmark 	itc_t	*itc = (itc_t *)buf;
2195fc80c0dfSnordmark 	conn_t 	*connp = &itc->itc_conn;
2196fc80c0dfSnordmark 	rts_t	*rts = (rts_t *)&itc[1];
2197fc80c0dfSnordmark 
2198fc80c0dfSnordmark 	ASSERT(connp->conn_flags & IPCL_RTSCONN);
2199fc80c0dfSnordmark 	ASSERT(rts->rts_connp == connp);
2200fc80c0dfSnordmark 	ASSERT(connp->conn_rts == rts);
2201fc80c0dfSnordmark 	mutex_destroy(&connp->conn_lock);
2202fc80c0dfSnordmark 	cv_destroy(&connp->conn_cv);
2203fc80c0dfSnordmark }
2204fc80c0dfSnordmark 
2205*0f1702c5SYu Xiangning /* ARGSUSED */
2206*0f1702c5SYu Xiangning int
2207*0f1702c5SYu Xiangning ip_helper_stream_constructor(void *buf, void *cdrarg, int kmflags)
2208*0f1702c5SYu Xiangning {
2209*0f1702c5SYu Xiangning 	int error;
2210*0f1702c5SYu Xiangning 	netstack_t	*ns;
2211*0f1702c5SYu Xiangning 	int		ret;
2212*0f1702c5SYu Xiangning 	tcp_stack_t	*tcps;
2213*0f1702c5SYu Xiangning 	ip_helper_stream_info_t	*ip_helper_str;
2214*0f1702c5SYu Xiangning 	ip_stack_t	*ipst;
2215*0f1702c5SYu Xiangning 
2216*0f1702c5SYu Xiangning 	ns = netstack_find_by_cred(kcred);
2217*0f1702c5SYu Xiangning 	ASSERT(ns != NULL);
2218*0f1702c5SYu Xiangning 	tcps = ns->netstack_tcp;
2219*0f1702c5SYu Xiangning 	ipst = ns->netstack_ip;
2220*0f1702c5SYu Xiangning 	ASSERT(tcps != NULL);
2221*0f1702c5SYu Xiangning 	ip_helper_str = (ip_helper_stream_info_t *)buf;
2222*0f1702c5SYu Xiangning 
2223*0f1702c5SYu Xiangning 	error = ldi_open_by_name(DEV_IP, IP_HELPER_STR, kcred,
2224*0f1702c5SYu Xiangning 	    &ip_helper_str->ip_helper_stream_handle, ipst->ips_ldi_ident);
2225*0f1702c5SYu Xiangning 	if (error != 0) {
2226*0f1702c5SYu Xiangning 		goto done;
2227*0f1702c5SYu Xiangning 	}
2228*0f1702c5SYu Xiangning 	error = ldi_ioctl(ip_helper_str->ip_helper_stream_handle,
2229*0f1702c5SYu Xiangning 	    SIOCSQPTR, (intptr_t)buf, FKIOCTL, kcred, &ret);
2230*0f1702c5SYu Xiangning 	if (error != 0) {
2231*0f1702c5SYu Xiangning 		(void) ldi_close(ip_helper_str->ip_helper_stream_handle, 0,
2232*0f1702c5SYu Xiangning 		    kcred);
2233*0f1702c5SYu Xiangning 	}
2234*0f1702c5SYu Xiangning done:
2235*0f1702c5SYu Xiangning 	netstack_rele(ipst->ips_netstack);
2236*0f1702c5SYu Xiangning 	return (error);
2237*0f1702c5SYu Xiangning }
2238*0f1702c5SYu Xiangning 
2239*0f1702c5SYu Xiangning /* ARGSUSED */
2240*0f1702c5SYu Xiangning static void
2241*0f1702c5SYu Xiangning ip_helper_stream_destructor(void *buf, void *cdrarg)
2242*0f1702c5SYu Xiangning {
2243*0f1702c5SYu Xiangning 	ip_helper_stream_info_t *ip_helper_str = (ip_helper_stream_info_t *)buf;
2244*0f1702c5SYu Xiangning 
2245*0f1702c5SYu Xiangning 	ip_helper_str->ip_helper_stream_rq->q_ptr =
2246*0f1702c5SYu Xiangning 	    ip_helper_str->ip_helper_stream_wq->q_ptr =
2247*0f1702c5SYu Xiangning 	    ip_helper_str->ip_helper_stream_minfo;
2248*0f1702c5SYu Xiangning 	(void) ldi_close(ip_helper_str->ip_helper_stream_handle, 0, kcred);
2249*0f1702c5SYu Xiangning }
2250*0f1702c5SYu Xiangning 
2251*0f1702c5SYu Xiangning 
2252fc80c0dfSnordmark /*
2253fc80c0dfSnordmark  * Called as part of ipcl_conn_destroy to assert and clear any pointers
2254fc80c0dfSnordmark  * in the conn_t.
2255fc80c0dfSnordmark  */
2256fc80c0dfSnordmark void
2257fc80c0dfSnordmark ipcl_conn_cleanup(conn_t *connp)
2258fc80c0dfSnordmark {
2259fc80c0dfSnordmark 	ASSERT(connp->conn_ire_cache == NULL);
2260fc80c0dfSnordmark 	ASSERT(connp->conn_latch == NULL);
2261fc80c0dfSnordmark #ifdef notdef
2262fc80c0dfSnordmark 	ASSERT(connp->conn_rq == NULL);
2263fc80c0dfSnordmark 	ASSERT(connp->conn_wq == NULL);
2264fc80c0dfSnordmark #endif
2265fc80c0dfSnordmark 	ASSERT(connp->conn_cred == NULL);
2266fc80c0dfSnordmark 	ASSERT(connp->conn_g_fanout == NULL);
2267fc80c0dfSnordmark 	ASSERT(connp->conn_g_next == NULL);
2268fc80c0dfSnordmark 	ASSERT(connp->conn_g_prev == NULL);
2269fc80c0dfSnordmark 	ASSERT(connp->conn_policy == NULL);
2270fc80c0dfSnordmark 	ASSERT(connp->conn_fanout == NULL);
2271fc80c0dfSnordmark 	ASSERT(connp->conn_next == NULL);
2272fc80c0dfSnordmark 	ASSERT(connp->conn_prev == NULL);
2273fc80c0dfSnordmark #ifdef notdef
2274fc80c0dfSnordmark 	/*
2275fc80c0dfSnordmark 	 * The ill and ipif pointers are not cleared before the conn_t
2276fc80c0dfSnordmark 	 * goes away since they do not hold a reference on the ill/ipif.
2277fc80c0dfSnordmark 	 * We should replace these pointers with ifindex/ipaddr_t to
2278fc80c0dfSnordmark 	 * make the code less complex.
2279fc80c0dfSnordmark 	 */
2280fc80c0dfSnordmark 	ASSERT(connp->conn_xmit_if_ill == NULL);
2281fc80c0dfSnordmark 	ASSERT(connp->conn_nofailover_ill == NULL);
2282fc80c0dfSnordmark 	ASSERT(connp->conn_outgoing_ill == NULL);
2283fc80c0dfSnordmark 	ASSERT(connp->conn_incoming_ill == NULL);
2284fc80c0dfSnordmark 	ASSERT(connp->conn_outgoing_pill == NULL);
2285fc80c0dfSnordmark 	ASSERT(connp->conn_multicast_ipif == NULL);
2286fc80c0dfSnordmark 	ASSERT(connp->conn_multicast_ill == NULL);
2287fc80c0dfSnordmark #endif
2288fc80c0dfSnordmark 	ASSERT(connp->conn_oper_pending_ill == NULL);
2289fc80c0dfSnordmark 	ASSERT(connp->conn_ilg == NULL);
2290fc80c0dfSnordmark 	ASSERT(connp->conn_drain_next == NULL);
2291fc80c0dfSnordmark 	ASSERT(connp->conn_drain_prev == NULL);
2292a9737be2Snordmark #ifdef notdef
2293a9737be2Snordmark 	/* conn_idl is not cleared when removed from idl list */
2294fc80c0dfSnordmark 	ASSERT(connp->conn_idl == NULL);
2295a9737be2Snordmark #endif
2296fc80c0dfSnordmark 	ASSERT(connp->conn_ipsec_opt_mp == NULL);
2297fc80c0dfSnordmark 	ASSERT(connp->conn_peercred == NULL);
2298fc80c0dfSnordmark 	ASSERT(connp->conn_netstack == NULL);
2299fc80c0dfSnordmark 
2300*0f1702c5SYu Xiangning 	ASSERT(connp->conn_helper_info == NULL);
2301fc80c0dfSnordmark 	/* Clear out the conn_t fields that are not preserved */
2302fc80c0dfSnordmark 	bzero(&connp->conn_start_clr,
2303fc80c0dfSnordmark 	    sizeof (conn_t) -
2304fc80c0dfSnordmark 	    ((uchar_t *)&connp->conn_start_clr - (uchar_t *)connp));
23057c478bd9Sstevel@tonic-gate }
23067c478bd9Sstevel@tonic-gate 
23077c478bd9Sstevel@tonic-gate /*
23087c478bd9Sstevel@tonic-gate  * All conns are inserted in a global multi-list for the benefit of
23097c478bd9Sstevel@tonic-gate  * walkers. The walk is guaranteed to walk all open conns at the time
23107c478bd9Sstevel@tonic-gate  * of the start of the walk exactly once. This property is needed to
23117c478bd9Sstevel@tonic-gate  * achieve some cleanups during unplumb of interfaces. This is achieved
23127c478bd9Sstevel@tonic-gate  * as follows.
23137c478bd9Sstevel@tonic-gate  *
23147c478bd9Sstevel@tonic-gate  * ipcl_conn_create and ipcl_conn_destroy are the only functions that
23157c478bd9Sstevel@tonic-gate  * call the insert and delete functions below at creation and deletion
23167c478bd9Sstevel@tonic-gate  * time respectively. The conn never moves or changes its position in this
23177c478bd9Sstevel@tonic-gate  * multi-list during its lifetime. CONN_CONDEMNED ensures that the refcnt
23187c478bd9Sstevel@tonic-gate  * won't increase due to walkers, once the conn deletion has started. Note
23197c478bd9Sstevel@tonic-gate  * that we can't remove the conn from the global list and then wait for
23207c478bd9Sstevel@tonic-gate  * the refcnt to drop to zero, since walkers would then see a truncated
23217c478bd9Sstevel@tonic-gate  * list. CONN_INCIPIENT ensures that walkers don't start looking at
23227c478bd9Sstevel@tonic-gate  * conns until ip_open is ready to make them globally visible.
23237c478bd9Sstevel@tonic-gate  * The global round robin multi-list locks are held only to get the
23247c478bd9Sstevel@tonic-gate  * next member/insertion/deletion and contention should be negligible
23257c478bd9Sstevel@tonic-gate  * if the multi-list is much greater than the number of cpus.
23267c478bd9Sstevel@tonic-gate  */
23277c478bd9Sstevel@tonic-gate void
23287c478bd9Sstevel@tonic-gate ipcl_globalhash_insert(conn_t *connp)
23297c478bd9Sstevel@tonic-gate {
23307c478bd9Sstevel@tonic-gate 	int	index;
2331f4b3ec61Sdh 	struct connf_s	*connfp;
2332f4b3ec61Sdh 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
23337c478bd9Sstevel@tonic-gate 
23347c478bd9Sstevel@tonic-gate 	/*
23357c478bd9Sstevel@tonic-gate 	 * No need for atomic here. Approximate even distribution
23367c478bd9Sstevel@tonic-gate 	 * in the global lists is sufficient.
23377c478bd9Sstevel@tonic-gate 	 */
2338f4b3ec61Sdh 	ipst->ips_conn_g_index++;
2339f4b3ec61Sdh 	index = ipst->ips_conn_g_index & (CONN_G_HASH_SIZE - 1);
23407c478bd9Sstevel@tonic-gate 
23417c478bd9Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
23427c478bd9Sstevel@tonic-gate 	/*
23437c478bd9Sstevel@tonic-gate 	 * Mark as INCIPIENT, so that walkers will ignore this
23447c478bd9Sstevel@tonic-gate 	 * for now, till ip_open is ready to make it visible globally.
23457c478bd9Sstevel@tonic-gate 	 */
23467c478bd9Sstevel@tonic-gate 	connp->conn_state_flags |= CONN_INCIPIENT;
23477c478bd9Sstevel@tonic-gate 
2348f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_globalhash_fanout[index];
23497c478bd9Sstevel@tonic-gate 	/* Insert at the head of the list */
2350f4b3ec61Sdh 	mutex_enter(&connfp->connf_lock);
2351f4b3ec61Sdh 	connp->conn_g_next = connfp->connf_head;
23527c478bd9Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
23537c478bd9Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp;
2354f4b3ec61Sdh 	connfp->connf_head = connp;
23557c478bd9Sstevel@tonic-gate 
23567c478bd9Sstevel@tonic-gate 	/* The fanout bucket this conn points to */
2357f4b3ec61Sdh 	connp->conn_g_fanout = connfp;
23587c478bd9Sstevel@tonic-gate 
2359f4b3ec61Sdh 	mutex_exit(&connfp->connf_lock);
23607c478bd9Sstevel@tonic-gate }
23617c478bd9Sstevel@tonic-gate 
23627c478bd9Sstevel@tonic-gate void
23637c478bd9Sstevel@tonic-gate ipcl_globalhash_remove(conn_t *connp)
23647c478bd9Sstevel@tonic-gate {
2365f4b3ec61Sdh 	struct connf_s	*connfp;
2366f4b3ec61Sdh 
23677c478bd9Sstevel@tonic-gate 	/*
23687c478bd9Sstevel@tonic-gate 	 * We were never inserted in the global multi list.
23697c478bd9Sstevel@tonic-gate 	 * IPCL_NONE variety is never inserted in the global multilist
23707c478bd9Sstevel@tonic-gate 	 * since it is presumed to not need any cleanup and is transient.
23717c478bd9Sstevel@tonic-gate 	 */
23727c478bd9Sstevel@tonic-gate 	if (connp->conn_g_fanout == NULL)
23737c478bd9Sstevel@tonic-gate 		return;
23747c478bd9Sstevel@tonic-gate 
2375f4b3ec61Sdh 	connfp = connp->conn_g_fanout;
2376f4b3ec61Sdh 	mutex_enter(&connfp->connf_lock);
23777c478bd9Sstevel@tonic-gate 	if (connp->conn_g_prev != NULL)
23787c478bd9Sstevel@tonic-gate 		connp->conn_g_prev->conn_g_next = connp->conn_g_next;
23797c478bd9Sstevel@tonic-gate 	else
2380f4b3ec61Sdh 		connfp->connf_head = connp->conn_g_next;
23817c478bd9Sstevel@tonic-gate 	if (connp->conn_g_next != NULL)
23827c478bd9Sstevel@tonic-gate 		connp->conn_g_next->conn_g_prev = connp->conn_g_prev;
2383f4b3ec61Sdh 	mutex_exit(&connfp->connf_lock);
23847c478bd9Sstevel@tonic-gate 
23857c478bd9Sstevel@tonic-gate 	/* Better to stumble on a null pointer than to corrupt memory */
23867c478bd9Sstevel@tonic-gate 	connp->conn_g_next = NULL;
23877c478bd9Sstevel@tonic-gate 	connp->conn_g_prev = NULL;
2388fc80c0dfSnordmark 	connp->conn_g_fanout = NULL;
23897c478bd9Sstevel@tonic-gate }
23907c478bd9Sstevel@tonic-gate 
23917c478bd9Sstevel@tonic-gate /*
23927c478bd9Sstevel@tonic-gate  * Walk the list of all conn_t's in the system, calling the function provided
23937c478bd9Sstevel@tonic-gate  * with the specified argument for each.
23947c478bd9Sstevel@tonic-gate  * Applies to both IPv4 and IPv6.
23957c478bd9Sstevel@tonic-gate  *
23967c478bd9Sstevel@tonic-gate  * IPCs may hold pointers to ipif/ill. To guard against stale pointers
23977c478bd9Sstevel@tonic-gate  * ipcl_walk() is called to cleanup the conn_t's, typically when an interface is
23987c478bd9Sstevel@tonic-gate  * unplumbed or removed. New conn_t's that are created while we are walking
23997c478bd9Sstevel@tonic-gate  * may be missed by this walk, because they are not necessarily inserted
24007c478bd9Sstevel@tonic-gate  * at the tail of the list. They are new conn_t's and thus don't have any
24017c478bd9Sstevel@tonic-gate  * stale pointers. The CONN_CLOSING flag ensures that no new reference
24027c478bd9Sstevel@tonic-gate  * is created to the struct that is going away.
24037c478bd9Sstevel@tonic-gate  */
24047c478bd9Sstevel@tonic-gate void
2405f4b3ec61Sdh ipcl_walk(pfv_t func, void *arg, ip_stack_t *ipst)
24067c478bd9Sstevel@tonic-gate {
24077c478bd9Sstevel@tonic-gate 	int	i;
24087c478bd9Sstevel@tonic-gate 	conn_t	*connp;
24097c478bd9Sstevel@tonic-gate 	conn_t	*prev_connp;
24107c478bd9Sstevel@tonic-gate 
24117c478bd9Sstevel@tonic-gate 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
2412f4b3ec61Sdh 		mutex_enter(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
24137c478bd9Sstevel@tonic-gate 		prev_connp = NULL;
2414f4b3ec61Sdh 		connp = ipst->ips_ipcl_globalhash_fanout[i].connf_head;
24157c478bd9Sstevel@tonic-gate 		while (connp != NULL) {
24167c478bd9Sstevel@tonic-gate 			mutex_enter(&connp->conn_lock);
24177c478bd9Sstevel@tonic-gate 			if (connp->conn_state_flags &
24187c478bd9Sstevel@tonic-gate 			    (CONN_CONDEMNED | CONN_INCIPIENT)) {
24197c478bd9Sstevel@tonic-gate 				mutex_exit(&connp->conn_lock);
24207c478bd9Sstevel@tonic-gate 				connp = connp->conn_g_next;
24217c478bd9Sstevel@tonic-gate 				continue;
24227c478bd9Sstevel@tonic-gate 			}
24237c478bd9Sstevel@tonic-gate 			CONN_INC_REF_LOCKED(connp);
24247c478bd9Sstevel@tonic-gate 			mutex_exit(&connp->conn_lock);
2425f4b3ec61Sdh 			mutex_exit(
2426f4b3ec61Sdh 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
24277c478bd9Sstevel@tonic-gate 			(*func)(connp, arg);
24287c478bd9Sstevel@tonic-gate 			if (prev_connp != NULL)
24297c478bd9Sstevel@tonic-gate 				CONN_DEC_REF(prev_connp);
2430f4b3ec61Sdh 			mutex_enter(
2431f4b3ec61Sdh 			    &ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
24327c478bd9Sstevel@tonic-gate 			prev_connp = connp;
24337c478bd9Sstevel@tonic-gate 			connp = connp->conn_g_next;
24347c478bd9Sstevel@tonic-gate 		}
2435f4b3ec61Sdh 		mutex_exit(&ipst->ips_ipcl_globalhash_fanout[i].connf_lock);
24367c478bd9Sstevel@tonic-gate 		if (prev_connp != NULL)
24377c478bd9Sstevel@tonic-gate 			CONN_DEC_REF(prev_connp);
24387c478bd9Sstevel@tonic-gate 	}
24397c478bd9Sstevel@tonic-gate }
24407c478bd9Sstevel@tonic-gate 
24417c478bd9Sstevel@tonic-gate /*
24427c478bd9Sstevel@tonic-gate  * Search for a peer TCP/IPv4 loopback conn by doing a reverse lookup on
24437c478bd9Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
24447c478bd9Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2445d0ab37afSethindra  * (peer tcp in ESTABLISHED state).
24467c478bd9Sstevel@tonic-gate  */
24477c478bd9Sstevel@tonic-gate conn_t *
2448f4b3ec61Sdh ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *connp, ipha_t *ipha, tcph_t *tcph,
2449f4b3ec61Sdh     ip_stack_t *ipst)
24507c478bd9Sstevel@tonic-gate {
24517c478bd9Sstevel@tonic-gate 	uint32_t ports;
24527c478bd9Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
24537c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
24547c478bd9Sstevel@tonic-gate 	conn_t	*tconnp;
24557c478bd9Sstevel@tonic-gate 	boolean_t zone_chk;
24567c478bd9Sstevel@tonic-gate 
24577c478bd9Sstevel@tonic-gate 	/*
24587c478bd9Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
24597c478bd9Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
24607c478bd9Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
24617c478bd9Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.
24627c478bd9Sstevel@tonic-gate 	 */
24637c478bd9Sstevel@tonic-gate 	zone_chk = (ipha->ipha_src == htonl(INADDR_LOOPBACK) ||
24647c478bd9Sstevel@tonic-gate 	    ipha->ipha_dst == htonl(INADDR_LOOPBACK));
24657c478bd9Sstevel@tonic-gate 
24667c478bd9Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
24677c478bd9Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
24687c478bd9Sstevel@tonic-gate 
2469f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2470f4b3ec61Sdh 	    ports, ipst)];
24717c478bd9Sstevel@tonic-gate 
24727c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
24737c478bd9Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
24747c478bd9Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
24757c478bd9Sstevel@tonic-gate 
24767c478bd9Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
24777c478bd9Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
2478d0ab37afSethindra 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
24797c478bd9Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
24807c478bd9Sstevel@tonic-gate 
24817c478bd9Sstevel@tonic-gate 			ASSERT(tconnp != connp);
24827c478bd9Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
24837c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
24847c478bd9Sstevel@tonic-gate 			return (tconnp);
24857c478bd9Sstevel@tonic-gate 		}
24867c478bd9Sstevel@tonic-gate 	}
24877c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
24887c478bd9Sstevel@tonic-gate 	return (NULL);
24897c478bd9Sstevel@tonic-gate }
24907c478bd9Sstevel@tonic-gate 
24917c478bd9Sstevel@tonic-gate /*
24927c478bd9Sstevel@tonic-gate  * Search for a peer TCP/IPv6 loopback conn by doing a reverse lookup on
24937c478bd9Sstevel@tonic-gate  * the {src, dst, lport, fport} quadruplet.  Returns with conn reference
24947c478bd9Sstevel@tonic-gate  * held; caller must call CONN_DEC_REF.  Only checks for connected entries
2495d0ab37afSethindra  * (peer tcp in ESTABLISHED state).
24967c478bd9Sstevel@tonic-gate  */
24977c478bd9Sstevel@tonic-gate conn_t *
2498f4b3ec61Sdh ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *connp, ip6_t *ip6h, tcph_t *tcph,
2499f4b3ec61Sdh     ip_stack_t *ipst)
25007c478bd9Sstevel@tonic-gate {
25017c478bd9Sstevel@tonic-gate 	uint32_t ports;
25027c478bd9Sstevel@tonic-gate 	uint16_t *pports = (uint16_t *)&ports;
25037c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
25047c478bd9Sstevel@tonic-gate 	conn_t	*tconnp;
25057c478bd9Sstevel@tonic-gate 	boolean_t zone_chk;
25067c478bd9Sstevel@tonic-gate 
25077c478bd9Sstevel@tonic-gate 	/*
25087c478bd9Sstevel@tonic-gate 	 * If either the source of destination address is loopback, then
25097c478bd9Sstevel@tonic-gate 	 * both endpoints must be in the same Zone.  Otherwise, both of
25107c478bd9Sstevel@tonic-gate 	 * the addresses are system-wide unique (tcp is in ESTABLISHED
25117c478bd9Sstevel@tonic-gate 	 * state) and the endpoints may reside in different Zones.  We
25127c478bd9Sstevel@tonic-gate 	 * don't do Zone check for link local address(es) because the
25137c478bd9Sstevel@tonic-gate 	 * current Zone implementation treats each link local address as
25147c478bd9Sstevel@tonic-gate 	 * being unique per system node, i.e. they belong to global Zone.
25157c478bd9Sstevel@tonic-gate 	 */
25167c478bd9Sstevel@tonic-gate 	zone_chk = (IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_src) ||
25177c478bd9Sstevel@tonic-gate 	    IN6_IS_ADDR_LOOPBACK(&ip6h->ip6_dst));
25187c478bd9Sstevel@tonic-gate 
25197c478bd9Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
25207c478bd9Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
25217c478bd9Sstevel@tonic-gate 
2522f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2523f4b3ec61Sdh 	    ports, ipst)];
25247c478bd9Sstevel@tonic-gate 
25257c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
25267c478bd9Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
25277c478bd9Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
25287c478bd9Sstevel@tonic-gate 
25297c478bd9Sstevel@tonic-gate 		/* We skip tcp_bound_if check here as this is loopback tcp */
25307c478bd9Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
25317c478bd9Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
2532d0ab37afSethindra 		    tconnp->conn_tcp->tcp_state == TCPS_ESTABLISHED &&
25337c478bd9Sstevel@tonic-gate 		    (!zone_chk || tconnp->conn_zoneid == connp->conn_zoneid)) {
25347c478bd9Sstevel@tonic-gate 
25357c478bd9Sstevel@tonic-gate 			ASSERT(tconnp != connp);
25367c478bd9Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
25377c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
25387c478bd9Sstevel@tonic-gate 			return (tconnp);
25397c478bd9Sstevel@tonic-gate 		}
25407c478bd9Sstevel@tonic-gate 	}
25417c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
25427c478bd9Sstevel@tonic-gate 	return (NULL);
25437c478bd9Sstevel@tonic-gate }
25447c478bd9Sstevel@tonic-gate 
25457c478bd9Sstevel@tonic-gate /*
25467c478bd9Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
25477c478bd9Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
25487c478bd9Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
25497c478bd9Sstevel@tonic-gate  */
25507c478bd9Sstevel@tonic-gate conn_t *
2551f4b3ec61Sdh ipcl_tcp_lookup_reversed_ipv4(ipha_t *ipha, tcph_t *tcph, int min_state,
2552f4b3ec61Sdh     ip_stack_t *ipst)
25537c478bd9Sstevel@tonic-gate {
25547c478bd9Sstevel@tonic-gate 	uint32_t ports;
25557c478bd9Sstevel@tonic-gate 	uint16_t *pports;
25567c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
25577c478bd9Sstevel@tonic-gate 	conn_t	*tconnp;
25587c478bd9Sstevel@tonic-gate 
25597c478bd9Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
25607c478bd9Sstevel@tonic-gate 	bcopy(tcph->th_fport, &pports[0], sizeof (uint16_t));
25617c478bd9Sstevel@tonic-gate 	bcopy(tcph->th_lport, &pports[1], sizeof (uint16_t));
25627c478bd9Sstevel@tonic-gate 
2563f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH(ipha->ipha_dst,
2564121e5416Skcpoon 	    ports, ipst)];
25657c478bd9Sstevel@tonic-gate 
25667c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
25677c478bd9Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
25687c478bd9Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
25697c478bd9Sstevel@tonic-gate 
25707c478bd9Sstevel@tonic-gate 		if (IPCL_CONN_MATCH(tconnp, IPPROTO_TCP,
25717c478bd9Sstevel@tonic-gate 		    ipha->ipha_dst, ipha->ipha_src, ports) &&
25727c478bd9Sstevel@tonic-gate 		    tconnp->conn_tcp->tcp_state >= min_state) {
25737c478bd9Sstevel@tonic-gate 
25747c478bd9Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
25757c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
25767c478bd9Sstevel@tonic-gate 			return (tconnp);
25777c478bd9Sstevel@tonic-gate 		}
25787c478bd9Sstevel@tonic-gate 	}
25797c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
25807c478bd9Sstevel@tonic-gate 	return (NULL);
25817c478bd9Sstevel@tonic-gate }
25827c478bd9Sstevel@tonic-gate 
25837c478bd9Sstevel@tonic-gate /*
25847c478bd9Sstevel@tonic-gate  * Find an exact {src, dst, lport, fport} match for a bounced datagram.
25857c478bd9Sstevel@tonic-gate  * Returns with conn reference held. Caller must call CONN_DEC_REF.
25867c478bd9Sstevel@tonic-gate  * Only checks for connected entries i.e. no INADDR_ANY checks.
25877c478bd9Sstevel@tonic-gate  * Match on ifindex in addition to addresses.
25887c478bd9Sstevel@tonic-gate  */
25897c478bd9Sstevel@tonic-gate conn_t *
25907c478bd9Sstevel@tonic-gate ipcl_tcp_lookup_reversed_ipv6(ip6_t *ip6h, tcpha_t *tcpha, int min_state,
2591f4b3ec61Sdh     uint_t ifindex, ip_stack_t *ipst)
25927c478bd9Sstevel@tonic-gate {
25937c478bd9Sstevel@tonic-gate 	tcp_t	*tcp;
25947c478bd9Sstevel@tonic-gate 	uint32_t ports;
25957c478bd9Sstevel@tonic-gate 	uint16_t *pports;
25967c478bd9Sstevel@tonic-gate 	connf_t	*connfp;
25977c478bd9Sstevel@tonic-gate 	conn_t	*tconnp;
25987c478bd9Sstevel@tonic-gate 
25997c478bd9Sstevel@tonic-gate 	pports = (uint16_t *)&ports;
26007c478bd9Sstevel@tonic-gate 	pports[0] = tcpha->tha_fport;
26017c478bd9Sstevel@tonic-gate 	pports[1] = tcpha->tha_lport;
26027c478bd9Sstevel@tonic-gate 
2603f4b3ec61Sdh 	connfp = &ipst->ips_ipcl_conn_fanout[IPCL_CONN_HASH_V6(ip6h->ip6_dst,
2604121e5416Skcpoon 	    ports, ipst)];
26057c478bd9Sstevel@tonic-gate 
26067c478bd9Sstevel@tonic-gate 	mutex_enter(&connfp->connf_lock);
26077c478bd9Sstevel@tonic-gate 	for (tconnp = connfp->connf_head; tconnp != NULL;
26087c478bd9Sstevel@tonic-gate 	    tconnp = tconnp->conn_next) {
26097c478bd9Sstevel@tonic-gate 
26107c478bd9Sstevel@tonic-gate 		tcp = tconnp->conn_tcp;
26117c478bd9Sstevel@tonic-gate 		if (IPCL_CONN_MATCH_V6(tconnp, IPPROTO_TCP,
26127c478bd9Sstevel@tonic-gate 		    ip6h->ip6_dst, ip6h->ip6_src, ports) &&
26137c478bd9Sstevel@tonic-gate 		    tcp->tcp_state >= min_state &&
26147c478bd9Sstevel@tonic-gate 		    (tcp->tcp_bound_if == 0 ||
26157c478bd9Sstevel@tonic-gate 		    tcp->tcp_bound_if == ifindex)) {
26167c478bd9Sstevel@tonic-gate 
26177c478bd9Sstevel@tonic-gate 			CONN_INC_REF(tconnp);
26187c478bd9Sstevel@tonic-gate 			mutex_exit(&connfp->connf_lock);
26197c478bd9Sstevel@tonic-gate 			return (tconnp);
26207c478bd9Sstevel@tonic-gate 		}
26217c478bd9Sstevel@tonic-gate 	}
26227c478bd9Sstevel@tonic-gate 	mutex_exit(&connfp->connf_lock);
26237c478bd9Sstevel@tonic-gate 	return (NULL);
26247c478bd9Sstevel@tonic-gate }
26257c478bd9Sstevel@tonic-gate 
26267c478bd9Sstevel@tonic-gate /*
262745916cd2Sjpk  * Finds a TCP/IPv4 listening connection; called by tcp_disconnect to locate
262845916cd2Sjpk  * a listener when changing state.
26297c478bd9Sstevel@tonic-gate  */
26307c478bd9Sstevel@tonic-gate conn_t *
2631f4b3ec61Sdh ipcl_lookup_listener_v4(uint16_t lport, ipaddr_t laddr, zoneid_t zoneid,
2632f4b3ec61Sdh     ip_stack_t *ipst)
26337c478bd9Sstevel@tonic-gate {
26347c478bd9Sstevel@tonic-gate 	connf_t		*bind_connfp;
26357c478bd9Sstevel@tonic-gate 	conn_t		*connp;
26367c478bd9Sstevel@tonic-gate 	tcp_t		*tcp;
26377c478bd9Sstevel@tonic-gate 
26387c478bd9Sstevel@tonic-gate 	/*
26397c478bd9Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
26407c478bd9Sstevel@tonic-gate 	 * all zeros.
26417c478bd9Sstevel@tonic-gate 	 */
26427c478bd9Sstevel@tonic-gate 	if (laddr == 0)
26437c478bd9Sstevel@tonic-gate 		return (NULL);
26447c478bd9Sstevel@tonic-gate 
264545916cd2Sjpk 	ASSERT(zoneid != ALL_ZONES);
264645916cd2Sjpk 
2647f4b3ec61Sdh 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
26487c478bd9Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
26497c478bd9Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
26507c478bd9Sstevel@tonic-gate 	    connp = connp->conn_next) {
26517c478bd9Sstevel@tonic-gate 		tcp = connp->conn_tcp;
26527c478bd9Sstevel@tonic-gate 		if (IPCL_BIND_MATCH(connp, IPPROTO_TCP, laddr, lport) &&
26535d0bc3edSsommerfe 		    IPCL_ZONE_MATCH(connp, zoneid) &&
26547c478bd9Sstevel@tonic-gate 		    (tcp->tcp_listener == NULL)) {
26557c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
26567c478bd9Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
26577c478bd9Sstevel@tonic-gate 			return (connp);
26587c478bd9Sstevel@tonic-gate 		}
26597c478bd9Sstevel@tonic-gate 	}
26607c478bd9Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
26617c478bd9Sstevel@tonic-gate 	return (NULL);
26627c478bd9Sstevel@tonic-gate }
26637c478bd9Sstevel@tonic-gate 
266445916cd2Sjpk /*
266545916cd2Sjpk  * Finds a TCP/IPv6 listening connection; called by tcp_disconnect to locate
266645916cd2Sjpk  * a listener when changing state.
266745916cd2Sjpk  */
26687c478bd9Sstevel@tonic-gate conn_t *
26697c478bd9Sstevel@tonic-gate ipcl_lookup_listener_v6(uint16_t lport, in6_addr_t *laddr, uint_t ifindex,
2670f4b3ec61Sdh     zoneid_t zoneid, ip_stack_t *ipst)
26717c478bd9Sstevel@tonic-gate {
26727c478bd9Sstevel@tonic-gate 	connf_t		*bind_connfp;
26737c478bd9Sstevel@tonic-gate 	conn_t		*connp = NULL;
26747c478bd9Sstevel@tonic-gate 	tcp_t		*tcp;
26757c478bd9Sstevel@tonic-gate 
26767c478bd9Sstevel@tonic-gate 	/*
26777c478bd9Sstevel@tonic-gate 	 * Avoid false matches for packets sent to an IP destination of
26787c478bd9Sstevel@tonic-gate 	 * all zeros.
26797c478bd9Sstevel@tonic-gate 	 */
26807c478bd9Sstevel@tonic-gate 	if (IN6_IS_ADDR_UNSPECIFIED(laddr))
26817c478bd9Sstevel@tonic-gate 		return (NULL);
26827c478bd9Sstevel@tonic-gate 
268345916cd2Sjpk 	ASSERT(zoneid != ALL_ZONES);
26847c478bd9Sstevel@tonic-gate 
2685f4b3ec61Sdh 	bind_connfp = &ipst->ips_ipcl_bind_fanout[IPCL_BIND_HASH(lport, ipst)];
26867c478bd9Sstevel@tonic-gate 	mutex_enter(&bind_connfp->connf_lock);
26877c478bd9Sstevel@tonic-gate 	for (connp = bind_connfp->connf_head; connp != NULL;
26887c478bd9Sstevel@tonic-gate 	    connp = connp->conn_next) {
26897c478bd9Sstevel@tonic-gate 		tcp = connp->conn_tcp;
26907c478bd9Sstevel@tonic-gate 		if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP, *laddr, lport) &&
26915d0bc3edSsommerfe 		    IPCL_ZONE_MATCH(connp, zoneid) &&
26927c478bd9Sstevel@tonic-gate 		    (tcp->tcp_bound_if == 0 ||
26937c478bd9Sstevel@tonic-gate 		    tcp->tcp_bound_if == ifindex) &&
26947c478bd9Sstevel@tonic-gate 		    tcp->tcp_listener == NULL) {
26957c478bd9Sstevel@tonic-gate 			CONN_INC_REF(connp);
26967c478bd9Sstevel@tonic-gate 			mutex_exit(&bind_connfp->connf_lock);
26977c478bd9Sstevel@tonic-gate 			return (connp);
26987c478bd9Sstevel@tonic-gate 		}
26997c478bd9Sstevel@tonic-gate 	}
27007c478bd9Sstevel@tonic-gate 	mutex_exit(&bind_connfp->connf_lock);
27017c478bd9Sstevel@tonic-gate 	return (NULL);
27027c478bd9Sstevel@tonic-gate }
27037c478bd9Sstevel@tonic-gate 
2704ff550d0eSmasputra /*
2705ff550d0eSmasputra  * ipcl_get_next_conn
2706ff550d0eSmasputra  *	get the next entry in the conn global list
2707ff550d0eSmasputra  *	and put a reference on the next_conn.
2708ff550d0eSmasputra  *	decrement the reference on the current conn.
2709ff550d0eSmasputra  *
2710ff550d0eSmasputra  * This is an iterator based walker function that also provides for
2711ff550d0eSmasputra  * some selection by the caller. It walks through the conn_hash bucket
2712ff550d0eSmasputra  * searching for the next valid connp in the list, and selects connections
2713ff550d0eSmasputra  * that are neither closed nor condemned. It also REFHOLDS the conn
2714ff550d0eSmasputra  * thus ensuring that the conn exists when the caller uses the conn.
2715ff550d0eSmasputra  */
2716ff550d0eSmasputra conn_t *
2717ff550d0eSmasputra ipcl_get_next_conn(connf_t *connfp, conn_t *connp, uint32_t conn_flags)
2718ff550d0eSmasputra {
2719ff550d0eSmasputra 	conn_t	*next_connp;
2720ff550d0eSmasputra 
2721ff550d0eSmasputra 	if (connfp == NULL)
2722ff550d0eSmasputra 		return (NULL);
2723ff550d0eSmasputra 
2724ff550d0eSmasputra 	mutex_enter(&connfp->connf_lock);
2725ff550d0eSmasputra 
2726ff550d0eSmasputra 	next_connp = (connp == NULL) ?
2727ff550d0eSmasputra 	    connfp->connf_head : connp->conn_g_next;
2728ff550d0eSmasputra 
2729ff550d0eSmasputra 	while (next_connp != NULL) {
2730ff550d0eSmasputra 		mutex_enter(&next_connp->conn_lock);
2731ff550d0eSmasputra 		if (!(next_connp->conn_flags & conn_flags) ||
2732ff550d0eSmasputra 		    (next_connp->conn_state_flags &
2733ff550d0eSmasputra 		    (CONN_CONDEMNED | CONN_INCIPIENT))) {
2734ff550d0eSmasputra 			/*
2735ff550d0eSmasputra 			 * This conn has been condemned or
2736ff550d0eSmasputra 			 * is closing, or the flags don't match
2737ff550d0eSmasputra 			 */
2738ff550d0eSmasputra 			mutex_exit(&next_connp->conn_lock);
2739ff550d0eSmasputra 			next_connp = next_connp->conn_g_next;
2740ff550d0eSmasputra 			continue;
2741ff550d0eSmasputra 		}
2742ff550d0eSmasputra 		CONN_INC_REF_LOCKED(next_connp);
2743ff550d0eSmasputra 		mutex_exit(&next_connp->conn_lock);
2744ff550d0eSmasputra 		break;
2745ff550d0eSmasputra 	}
2746ff550d0eSmasputra 
2747ff550d0eSmasputra 	mutex_exit(&connfp->connf_lock);
2748ff550d0eSmasputra 
2749ff550d0eSmasputra 	if (connp != NULL)
2750ff550d0eSmasputra 		CONN_DEC_REF(connp);
2751ff550d0eSmasputra 
2752ff550d0eSmasputra 	return (next_connp);
2753ff550d0eSmasputra }
2754ff550d0eSmasputra 
27557c478bd9Sstevel@tonic-gate #ifdef CONN_DEBUG
27567c478bd9Sstevel@tonic-gate /*
27577c478bd9Sstevel@tonic-gate  * Trace of the last NBUF refhold/refrele
27587c478bd9Sstevel@tonic-gate  */
27597c478bd9Sstevel@tonic-gate int
27607c478bd9Sstevel@tonic-gate conn_trace_ref(conn_t *connp)
27617c478bd9Sstevel@tonic-gate {
27627c478bd9Sstevel@tonic-gate 	int	last;
27637c478bd9Sstevel@tonic-gate 	conn_trace_t	*ctb;
27647c478bd9Sstevel@tonic-gate 
27657c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
27667c478bd9Sstevel@tonic-gate 	last = connp->conn_trace_last;
27677c478bd9Sstevel@tonic-gate 	last++;
27687c478bd9Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
27697c478bd9Sstevel@tonic-gate 		last = 0;
27707c478bd9Sstevel@tonic-gate 
27717c478bd9Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
27726a8288c7Scarlsonj 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
27737c478bd9Sstevel@tonic-gate 	connp->conn_trace_last = last;
27747c478bd9Sstevel@tonic-gate 	return (1);
27757c478bd9Sstevel@tonic-gate }
27767c478bd9Sstevel@tonic-gate 
27777c478bd9Sstevel@tonic-gate int
27787c478bd9Sstevel@tonic-gate conn_untrace_ref(conn_t *connp)
27797c478bd9Sstevel@tonic-gate {
27807c478bd9Sstevel@tonic-gate 	int	last;
27817c478bd9Sstevel@tonic-gate 	conn_trace_t	*ctb;
27827c478bd9Sstevel@tonic-gate 
27837c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&connp->conn_lock));
27847c478bd9Sstevel@tonic-gate 	last = connp->conn_trace_last;
27857c478bd9Sstevel@tonic-gate 	last++;
27867c478bd9Sstevel@tonic-gate 	if (last == CONN_TRACE_MAX)
27877c478bd9Sstevel@tonic-gate 		last = 0;
27887c478bd9Sstevel@tonic-gate 
27897c478bd9Sstevel@tonic-gate 	ctb = &connp->conn_trace_buf[last];
27906a8288c7Scarlsonj 	ctb->ctb_depth = getpcstack(ctb->ctb_stack, CONN_STACK_DEPTH);
27917c478bd9Sstevel@tonic-gate 	connp->conn_trace_last = last;
27927c478bd9Sstevel@tonic-gate 	return (1);
27937c478bd9Sstevel@tonic-gate }
27947c478bd9Sstevel@tonic-gate #endif
2795