xref: /illumos-gate/usr/src/uts/common/inet/ip/tnet.c (revision de8c4a14)
145916cd2Sjpk /*
245916cd2Sjpk  * CDDL HEADER START
345916cd2Sjpk  *
445916cd2Sjpk  * The contents of this file are subject to the terms of the
545916cd2Sjpk  * Common Development and Distribution License (the "License").
645916cd2Sjpk  * You may not use this file except in compliance with the License.
745916cd2Sjpk  *
845916cd2Sjpk  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
945916cd2Sjpk  * or http://www.opensolaris.org/os/licensing.
1045916cd2Sjpk  * See the License for the specific language governing permissions
1145916cd2Sjpk  * and limitations under the License.
1245916cd2Sjpk  *
1345916cd2Sjpk  * When distributing Covered Code, include this CDDL HEADER in each
1445916cd2Sjpk  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1545916cd2Sjpk  * If applicable, add the following below this CDDL HEADER, with the
1645916cd2Sjpk  * fields enclosed by brackets "[]" replaced with your own identifying
1745916cd2Sjpk  * information: Portions Copyright [yyyy] [name of copyright owner]
1845916cd2Sjpk  *
1945916cd2Sjpk  * CDDL HEADER END
2045916cd2Sjpk  */
2145916cd2Sjpk /*
22*de8c4a14SErik Nordmark  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
2345916cd2Sjpk  * Use is subject to license terms.
2445916cd2Sjpk  */
2545916cd2Sjpk 
2645916cd2Sjpk #include <sys/types.h>
2745916cd2Sjpk #include <sys/stream.h>
2845916cd2Sjpk #include <sys/strsubr.h>
2945916cd2Sjpk #include <sys/stropts.h>
3045916cd2Sjpk #include <sys/sunddi.h>
3145916cd2Sjpk #include <sys/cred.h>
3245916cd2Sjpk #include <sys/debug.h>
3345916cd2Sjpk #include <sys/kmem.h>
3445916cd2Sjpk #include <sys/errno.h>
3545916cd2Sjpk #include <sys/disp.h>
3645916cd2Sjpk #include <netinet/in.h>
3745916cd2Sjpk #include <netinet/in_systm.h>
3845916cd2Sjpk #include <netinet/ip.h>
3945916cd2Sjpk #include <netinet/ip_icmp.h>
4045916cd2Sjpk #include <netinet/tcp.h>
4145916cd2Sjpk #include <inet/common.h>
4245916cd2Sjpk #include <inet/ipclassifier.h>
4345916cd2Sjpk #include <inet/ip.h>
4445916cd2Sjpk #include <inet/mib2.h>
4545916cd2Sjpk #include <inet/nd.h>
4645916cd2Sjpk #include <inet/tcp.h>
4745916cd2Sjpk #include <inet/ip_rts.h>
4845916cd2Sjpk #include <inet/ip_ire.h>
4945916cd2Sjpk #include <inet/ip_if.h>
5045916cd2Sjpk #include <sys/modhash.h>
5145916cd2Sjpk 
5245916cd2Sjpk #include <sys/tsol/label.h>
5345916cd2Sjpk #include <sys/tsol/label_macro.h>
5445916cd2Sjpk #include <sys/tsol/tnet.h>
5545916cd2Sjpk #include <sys/tsol/tndb.h>
5645916cd2Sjpk #include <sys/strsun.h>
5745916cd2Sjpk 
5845916cd2Sjpk /* tunable for strict error-reply behavior (TCP RST and ICMP Unreachable) */
5945916cd2Sjpk int tsol_strict_error;
6045916cd2Sjpk 
6145916cd2Sjpk /*
6245916cd2Sjpk  * Some notes on the Trusted Solaris IRE gateway security attributes:
6345916cd2Sjpk  *
6445916cd2Sjpk  * When running in Trusted mode, the routing subsystem determines whether or
6545916cd2Sjpk  * not a packet can be delivered to an off-link host (not directly reachable
6645916cd2Sjpk  * through an interface) based on the accreditation checks of the packet's
6745916cd2Sjpk  * security attributes against those associated with the next-hop gateway.
6845916cd2Sjpk  *
6945916cd2Sjpk  * The next-hop gateway's security attributes can be derived from two sources
7045916cd2Sjpk  * (in order of preference): route-related and the host database.  A Trusted
7145916cd2Sjpk  * system must be configured with at least the host database containing an
7245916cd2Sjpk  * entry for the next-hop gateway, or otherwise no accreditation checks can
7345916cd2Sjpk  * be performed, which may result in the inability to send packets to any
7445916cd2Sjpk  * off-link destination host.
7545916cd2Sjpk  *
7645916cd2Sjpk  * The major differences between the two sources are the number and type of
7745916cd2Sjpk  * security attributes used for accreditation checks.  A host database entry
7845916cd2Sjpk  * can contain at most one set of security attributes, specific only to the
7945916cd2Sjpk  * next-hop gateway.  On contrast, route-related security attributes are made
8045916cd2Sjpk  * up of a collection of security attributes for the distant networks, and
8145916cd2Sjpk  * are grouped together per next-hop gateway used to reach those networks.
8245916cd2Sjpk  * This is the preferred method, and the routing subsystem will fallback to
8345916cd2Sjpk  * the host database entry only if there are no route-related attributes
8445916cd2Sjpk  * associated with the next-hop gateway.
8545916cd2Sjpk  *
8645916cd2Sjpk  * In Trusted mode, all of the IRE entries (except LOCAL/LOOPBACK/BROADCAST/
8745916cd2Sjpk  * INTERFACE type) are initialized to contain a placeholder to store this
8845916cd2Sjpk  * information.  The ire_gw_secattr structure gets allocated, initialized
8945916cd2Sjpk  * and associated with the IRE during the time of the IRE creation.  The
9045916cd2Sjpk  * initialization process also includes resolving the host database entry
9145916cd2Sjpk  * of the next-hop gateway for fallback purposes.  It does not include any
9245916cd2Sjpk  * route-related attribute setup, as that process comes separately as part
9345916cd2Sjpk  * of the route requests (add/change) made to the routing subsystem.
9445916cd2Sjpk  *
9545916cd2Sjpk  * The underlying logic which involves associating IREs with the gateway
9645916cd2Sjpk  * security attributes are represented by the following data structures:
9745916cd2Sjpk  *
9845916cd2Sjpk  * tsol_gcdb_t, or "gcdb"
9945916cd2Sjpk  *
10045916cd2Sjpk  *	- This is a system-wide collection of records containing the
10145916cd2Sjpk  *	  currently used route-related security attributes, which are fed
10245916cd2Sjpk  *	  through the routing socket interface, e.g. "route add/change".
10345916cd2Sjpk  *
10445916cd2Sjpk  * tsol_gc_t, or "gc"
10545916cd2Sjpk  *
10645916cd2Sjpk  *	- This is the gateway credential structure, and it provides for the
10745916cd2Sjpk  *	  only mechanism to access the contents of gcdb.  More than one gc
10845916cd2Sjpk  *	  entries may refer to the same gcdb record.  gc's in the system are
10945916cd2Sjpk  *	  grouped according to the next-hop gateway address.
11045916cd2Sjpk  *
11145916cd2Sjpk  * tsol_gcgrp_t, or "gcgrp"
11245916cd2Sjpk  *
11345916cd2Sjpk  *	- Group of gateway credentials, and is unique per next-hop gateway
11445916cd2Sjpk  *	  address.  When the group is not empty, i.e. when gcgrp_count is
11545916cd2Sjpk  *	  greater than zero, it contains one or more gc's, each pointing to
11645916cd2Sjpk  *	  a gcdb record which indicates the gateway security attributes
11745916cd2Sjpk  *	  associated with the next-hop gateway.
11845916cd2Sjpk  *
11945916cd2Sjpk  * The fields of the tsol_ire_gw_secattr_t used from within the IRE are:
12045916cd2Sjpk  *
12145916cd2Sjpk  * igsa_lock
12245916cd2Sjpk  *
12345916cd2Sjpk  *	- Lock that protects all fields within tsol_ire_gw_secattr_t.
12445916cd2Sjpk  *
12545916cd2Sjpk  * igsa_rhc
12645916cd2Sjpk  *
12745916cd2Sjpk  *	- Remote host cache database entry of next-hop gateway.  This is
12845916cd2Sjpk  *	  used in the case when there are no route-related attributes
12945916cd2Sjpk  *	  configured for the IRE.
13045916cd2Sjpk  *
13145916cd2Sjpk  * igsa_gc
13245916cd2Sjpk  *
13345916cd2Sjpk  *	- A set of route-related attributes that only get set for prefix
13445916cd2Sjpk  *	  IREs.  If this is non-NULL, the prefix IRE has been associated
13545916cd2Sjpk  *	  with a set of gateway security attributes by way of route add/
13645916cd2Sjpk  *	  change functionality.  This field stays NULL for IRE_CACHEs.
13745916cd2Sjpk  *
13845916cd2Sjpk  * igsa_gcgrp
13945916cd2Sjpk  *
14045916cd2Sjpk  *	- Group of gc's which only gets set for IRE_CACHEs.  Each of the gc
14145916cd2Sjpk  *	  points to a gcdb record that contains the security attributes
14245916cd2Sjpk  *	  used to perform the credential checks of the packet which uses
14345916cd2Sjpk  *	  the IRE.  If the group is not empty, the list of gc's can be
14445916cd2Sjpk  *	  traversed starting at gcgrp_head.  This field stays NULL for
14545916cd2Sjpk  *	  prefix IREs.
14645916cd2Sjpk  */
14745916cd2Sjpk 
14845916cd2Sjpk static kmem_cache_t *ire_gw_secattr_cache;
14945916cd2Sjpk 
15045916cd2Sjpk #define	GCDB_HASH_SIZE	101
15145916cd2Sjpk #define	GCGRP_HASH_SIZE	101
15245916cd2Sjpk 
15345916cd2Sjpk #define	GCDB_REFRELE(p) {		\
15445916cd2Sjpk 	mutex_enter(&gcdb_lock);	\
15545916cd2Sjpk 	ASSERT((p)->gcdb_refcnt > 0);	\
15645916cd2Sjpk 	if (--((p)->gcdb_refcnt) == 0)	\
15745916cd2Sjpk 		gcdb_inactive(p);	\
15845916cd2Sjpk 	ASSERT(MUTEX_HELD(&gcdb_lock));	\
15945916cd2Sjpk 	mutex_exit(&gcdb_lock);		\
16045916cd2Sjpk }
16145916cd2Sjpk 
16245916cd2Sjpk static int gcdb_hash_size = GCDB_HASH_SIZE;
16345916cd2Sjpk static int gcgrp_hash_size = GCGRP_HASH_SIZE;
16445916cd2Sjpk static mod_hash_t *gcdb_hash;
16545916cd2Sjpk static mod_hash_t *gcgrp4_hash;
16645916cd2Sjpk static mod_hash_t *gcgrp6_hash;
16745916cd2Sjpk 
16845916cd2Sjpk static kmutex_t gcdb_lock;
16945916cd2Sjpk kmutex_t gcgrp_lock;
17045916cd2Sjpk 
17145916cd2Sjpk static uint_t gcdb_hash_by_secattr(void *, mod_hash_key_t);
17245916cd2Sjpk static int gcdb_hash_cmp(mod_hash_key_t, mod_hash_key_t);
17345916cd2Sjpk static tsol_gcdb_t *gcdb_lookup(struct rtsa_s *, boolean_t);
17445916cd2Sjpk static void gcdb_inactive(tsol_gcdb_t *);
17545916cd2Sjpk 
17645916cd2Sjpk static uint_t gcgrp_hash_by_addr(void *, mod_hash_key_t);
17745916cd2Sjpk static int gcgrp_hash_cmp(mod_hash_key_t, mod_hash_key_t);
17845916cd2Sjpk 
17945916cd2Sjpk static int ire_gw_secattr_constructor(void *, void *, int);
18045916cd2Sjpk static void ire_gw_secattr_destructor(void *, void *);
18145916cd2Sjpk 
18245916cd2Sjpk void
18345916cd2Sjpk tnet_init(void)
18445916cd2Sjpk {
18545916cd2Sjpk 	ire_gw_secattr_cache = kmem_cache_create("ire_gw_secattr_cache",
18645916cd2Sjpk 	    sizeof (tsol_ire_gw_secattr_t), 64, ire_gw_secattr_constructor,
18745916cd2Sjpk 	    ire_gw_secattr_destructor, NULL, NULL, NULL, 0);
18845916cd2Sjpk 
18945916cd2Sjpk 	gcdb_hash = mod_hash_create_extended("gcdb_hash",
19045916cd2Sjpk 	    gcdb_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
19145916cd2Sjpk 	    gcdb_hash_by_secattr, NULL, gcdb_hash_cmp, KM_SLEEP);
19245916cd2Sjpk 
19345916cd2Sjpk 	gcgrp4_hash = mod_hash_create_extended("gcgrp4_hash",
19445916cd2Sjpk 	    gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
19545916cd2Sjpk 	    gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
19645916cd2Sjpk 
19745916cd2Sjpk 	gcgrp6_hash = mod_hash_create_extended("gcgrp6_hash",
19845916cd2Sjpk 	    gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
19945916cd2Sjpk 	    gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
20045916cd2Sjpk 
20145916cd2Sjpk 	mutex_init(&gcdb_lock, NULL, MUTEX_DEFAULT, NULL);
20245916cd2Sjpk 	mutex_init(&gcgrp_lock, NULL, MUTEX_DEFAULT, NULL);
20345916cd2Sjpk }
20445916cd2Sjpk 
20545916cd2Sjpk void
20645916cd2Sjpk tnet_fini(void)
20745916cd2Sjpk {
20845916cd2Sjpk 	kmem_cache_destroy(ire_gw_secattr_cache);
20945916cd2Sjpk 	mod_hash_destroy_hash(gcdb_hash);
21045916cd2Sjpk 	mod_hash_destroy_hash(gcgrp4_hash);
21145916cd2Sjpk 	mod_hash_destroy_hash(gcgrp6_hash);
21245916cd2Sjpk 	mutex_destroy(&gcdb_lock);
21345916cd2Sjpk 	mutex_destroy(&gcgrp_lock);
21445916cd2Sjpk }
21545916cd2Sjpk 
21645916cd2Sjpk /* ARGSUSED */
21745916cd2Sjpk static int
21845916cd2Sjpk ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags)
21945916cd2Sjpk {
22045916cd2Sjpk 	tsol_ire_gw_secattr_t *attrp = buf;
22145916cd2Sjpk 
22245916cd2Sjpk 	mutex_init(&attrp->igsa_lock, NULL, MUTEX_DEFAULT, NULL);
22345916cd2Sjpk 
22445916cd2Sjpk 	attrp->igsa_rhc = NULL;
22545916cd2Sjpk 	attrp->igsa_gc = NULL;
22645916cd2Sjpk 	attrp->igsa_gcgrp = NULL;
22745916cd2Sjpk 
22845916cd2Sjpk 	return (0);
22945916cd2Sjpk }
23045916cd2Sjpk 
23145916cd2Sjpk /* ARGSUSED */
23245916cd2Sjpk static void
23345916cd2Sjpk ire_gw_secattr_destructor(void *buf, void *cdrarg)
23445916cd2Sjpk {
23545916cd2Sjpk 	tsol_ire_gw_secattr_t *attrp = (tsol_ire_gw_secattr_t *)buf;
23645916cd2Sjpk 
23745916cd2Sjpk 	mutex_destroy(&attrp->igsa_lock);
23845916cd2Sjpk }
23945916cd2Sjpk 
24045916cd2Sjpk tsol_ire_gw_secattr_t *
24145916cd2Sjpk ire_gw_secattr_alloc(int kmflags)
24245916cd2Sjpk {
24345916cd2Sjpk 	return (kmem_cache_alloc(ire_gw_secattr_cache, kmflags));
24445916cd2Sjpk }
24545916cd2Sjpk 
24645916cd2Sjpk void
24745916cd2Sjpk ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp)
24845916cd2Sjpk {
24945916cd2Sjpk 	ASSERT(MUTEX_NOT_HELD(&attrp->igsa_lock));
25045916cd2Sjpk 
25145916cd2Sjpk 	if (attrp->igsa_rhc != NULL) {
25245916cd2Sjpk 		TNRHC_RELE(attrp->igsa_rhc);
25345916cd2Sjpk 		attrp->igsa_rhc = NULL;
25445916cd2Sjpk 	}
25545916cd2Sjpk 
25645916cd2Sjpk 	if (attrp->igsa_gc != NULL) {
25745916cd2Sjpk 		GC_REFRELE(attrp->igsa_gc);
25845916cd2Sjpk 		attrp->igsa_gc = NULL;
25945916cd2Sjpk 	}
26045916cd2Sjpk 	if (attrp->igsa_gcgrp != NULL) {
26145916cd2Sjpk 		GCGRP_REFRELE(attrp->igsa_gcgrp);
26245916cd2Sjpk 		attrp->igsa_gcgrp = NULL;
26345916cd2Sjpk 	}
26445916cd2Sjpk 
26545916cd2Sjpk 	ASSERT(attrp->igsa_rhc == NULL);
26645916cd2Sjpk 	ASSERT(attrp->igsa_gc == NULL);
26745916cd2Sjpk 	ASSERT(attrp->igsa_gcgrp == NULL);
26845916cd2Sjpk 
26945916cd2Sjpk 	kmem_cache_free(ire_gw_secattr_cache, attrp);
27045916cd2Sjpk }
27145916cd2Sjpk 
27245916cd2Sjpk /* ARGSUSED */
27345916cd2Sjpk static uint_t
27445916cd2Sjpk gcdb_hash_by_secattr(void *hash_data, mod_hash_key_t key)
27545916cd2Sjpk {
27645916cd2Sjpk 	const struct rtsa_s *rp = (struct rtsa_s *)key;
27745916cd2Sjpk 	const uint32_t *up, *ue;
27845916cd2Sjpk 	uint_t hash;
27945916cd2Sjpk 	int i;
28045916cd2Sjpk 
28145916cd2Sjpk 	ASSERT(rp != NULL);
28245916cd2Sjpk 
28345916cd2Sjpk 	/* See comments in hash_bylabel in zone.c for details */
28445916cd2Sjpk 	hash = rp->rtsa_doi + (rp->rtsa_doi << 1);
28545916cd2Sjpk 	up = (const uint32_t *)&rp->rtsa_slrange;
28645916cd2Sjpk 	ue = up + sizeof (rp->rtsa_slrange) / sizeof (*up);
28745916cd2Sjpk 	i = 1;
28845916cd2Sjpk 	while (up < ue) {
28945916cd2Sjpk 		/* using 2^n + 1, 1 <= n <= 16 as source of many primes */
29045916cd2Sjpk 		hash += *up + (*up << ((i % 16) + 1));
29145916cd2Sjpk 		up++;
29245916cd2Sjpk 		i++;
29345916cd2Sjpk 	}
29445916cd2Sjpk 	return (hash);
29545916cd2Sjpk }
29645916cd2Sjpk 
29745916cd2Sjpk static int
29845916cd2Sjpk gcdb_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
29945916cd2Sjpk {
30045916cd2Sjpk 	struct rtsa_s *rp1 = (struct rtsa_s *)key1;
30145916cd2Sjpk 	struct rtsa_s *rp2 = (struct rtsa_s *)key2;
30245916cd2Sjpk 
30345916cd2Sjpk 	ASSERT(rp1 != NULL && rp2 != NULL);
30445916cd2Sjpk 
30545916cd2Sjpk 	if (blequal(&rp1->rtsa_slrange.lower_bound,
30645916cd2Sjpk 	    &rp2->rtsa_slrange.lower_bound) &&
30745916cd2Sjpk 	    blequal(&rp1->rtsa_slrange.upper_bound,
30845916cd2Sjpk 	    &rp2->rtsa_slrange.upper_bound) &&
30945916cd2Sjpk 	    rp1->rtsa_doi == rp2->rtsa_doi)
31045916cd2Sjpk 		return (0);
31145916cd2Sjpk 
31245916cd2Sjpk 	/* No match; not found */
31345916cd2Sjpk 	return (-1);
31445916cd2Sjpk }
31545916cd2Sjpk 
31645916cd2Sjpk /* ARGSUSED */
31745916cd2Sjpk static uint_t
31845916cd2Sjpk gcgrp_hash_by_addr(void *hash_data, mod_hash_key_t key)
31945916cd2Sjpk {
32045916cd2Sjpk 	tsol_gcgrp_addr_t *ga = (tsol_gcgrp_addr_t *)key;
32145916cd2Sjpk 	uint_t		idx = 0;
32245916cd2Sjpk 	uint32_t	*ap;
32345916cd2Sjpk 
32445916cd2Sjpk 	ASSERT(ga != NULL);
32545916cd2Sjpk 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
32645916cd2Sjpk 
32745916cd2Sjpk 	ap = (uint32_t *)&ga->ga_addr.s6_addr32[0];
32845916cd2Sjpk 	idx ^= *ap++;
32945916cd2Sjpk 	idx ^= *ap++;
33045916cd2Sjpk 	idx ^= *ap++;
33145916cd2Sjpk 	idx ^= *ap;
33245916cd2Sjpk 
33345916cd2Sjpk 	return (idx);
33445916cd2Sjpk }
33545916cd2Sjpk 
33645916cd2Sjpk static int
33745916cd2Sjpk gcgrp_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
33845916cd2Sjpk {
33945916cd2Sjpk 	tsol_gcgrp_addr_t *ga1 = (tsol_gcgrp_addr_t *)key1;
34045916cd2Sjpk 	tsol_gcgrp_addr_t *ga2 = (tsol_gcgrp_addr_t *)key2;
34145916cd2Sjpk 
34245916cd2Sjpk 	ASSERT(ga1 != NULL && ga2 != NULL);
34345916cd2Sjpk 
34445916cd2Sjpk 	/* Address family must match */
34545916cd2Sjpk 	if (ga1->ga_af != ga2->ga_af)
34645916cd2Sjpk 		return (-1);
34745916cd2Sjpk 
34845916cd2Sjpk 	if (ga1->ga_addr.s6_addr32[0] == ga2->ga_addr.s6_addr32[0] &&
34945916cd2Sjpk 	    ga1->ga_addr.s6_addr32[1] == ga2->ga_addr.s6_addr32[1] &&
35045916cd2Sjpk 	    ga1->ga_addr.s6_addr32[2] == ga2->ga_addr.s6_addr32[2] &&
35145916cd2Sjpk 	    ga1->ga_addr.s6_addr32[3] == ga2->ga_addr.s6_addr32[3])
35245916cd2Sjpk 		return (0);
35345916cd2Sjpk 
35445916cd2Sjpk 	/* No match; not found */
35545916cd2Sjpk 	return (-1);
35645916cd2Sjpk }
35745916cd2Sjpk 
35845916cd2Sjpk #define	RTSAFLAGS	"\20\11cipso\3doi\2max_sl\1min_sl"
35945916cd2Sjpk 
36045916cd2Sjpk int
36145916cd2Sjpk rtsa_validate(const struct rtsa_s *rp)
36245916cd2Sjpk {
36345916cd2Sjpk 	uint32_t mask = rp->rtsa_mask;
36445916cd2Sjpk 
36545916cd2Sjpk 	/* RTSA_CIPSO must be set, and DOI must not be zero */
36645916cd2Sjpk 	if ((mask & RTSA_CIPSO) == 0 || rp->rtsa_doi == 0) {
36745916cd2Sjpk 		DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
36845916cd2Sjpk 		    "rtsa(1) lacks flag or has 0 doi.",
36945916cd2Sjpk 		    rtsa_s *, rp);
37045916cd2Sjpk 		return (EINVAL);
37145916cd2Sjpk 	}
37245916cd2Sjpk 	/*
37345916cd2Sjpk 	 * SL range must be specified, and it must have its
37445916cd2Sjpk 	 * upper bound dominating its lower bound.
37545916cd2Sjpk 	 */
37645916cd2Sjpk 	if ((mask & RTSA_SLRANGE) != RTSA_SLRANGE ||
37745916cd2Sjpk 	    !bldominates(&rp->rtsa_slrange.upper_bound,
37845916cd2Sjpk 	    &rp->rtsa_slrange.lower_bound)) {
37945916cd2Sjpk 		DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
38045916cd2Sjpk 		    "rtsa(1) min_sl and max_sl not set or max_sl is "
38145916cd2Sjpk 		    "not dominating.", rtsa_s *, rp);
38245916cd2Sjpk 		return (EINVAL);
38345916cd2Sjpk 	}
38445916cd2Sjpk 	return (0);
38545916cd2Sjpk }
38645916cd2Sjpk 
38745916cd2Sjpk /*
38845916cd2Sjpk  * A brief explanation of the reference counting scheme:
38945916cd2Sjpk  *
39045916cd2Sjpk  * Prefix IREs have a non-NULL igsa_gc and a NULL igsa_gcgrp;
39145916cd2Sjpk  * IRE_CACHEs have it vice-versa.
39245916cd2Sjpk  *
39345916cd2Sjpk  * Apart from dynamic references due to to reference holds done
39445916cd2Sjpk  * actively by threads, we have the following references:
39545916cd2Sjpk  *
39645916cd2Sjpk  * gcdb_refcnt:
39745916cd2Sjpk  *	- Every tsol_gc_t pointing to a tsol_gcdb_t contributes a reference
39845916cd2Sjpk  *	  to the gcdb_refcnt.
39945916cd2Sjpk  *
40045916cd2Sjpk  * gc_refcnt:
40145916cd2Sjpk  *	- A prefix IRE that points to an igsa_gc contributes a reference
40245916cd2Sjpk  *	  to the gc_refcnt.
40345916cd2Sjpk  *
40445916cd2Sjpk  * gcgrp_refcnt:
40545916cd2Sjpk  *	- An IRE_CACHE that points to an igsa_gcgrp contributes a reference
40645916cd2Sjpk  *	  to the gcgrp_refcnt of the associated tsol_gcgrp_t.
40745916cd2Sjpk  *	- Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes
40845916cd2Sjpk  *	  a reference to the gcgrp_refcnt.
40945916cd2Sjpk  */
41045916cd2Sjpk static tsol_gcdb_t *
41145916cd2Sjpk gcdb_lookup(struct rtsa_s *rp, boolean_t alloc)
41245916cd2Sjpk {
41345916cd2Sjpk 	tsol_gcdb_t *gcdb = NULL;
41445916cd2Sjpk 
41545916cd2Sjpk 	if (rtsa_validate(rp) != 0)
41645916cd2Sjpk 		return (NULL);
41745916cd2Sjpk 
41845916cd2Sjpk 	mutex_enter(&gcdb_lock);
41945916cd2Sjpk 	/* Find a copy in the cache; otherwise, create one and cache it */
42045916cd2Sjpk 	if (mod_hash_find(gcdb_hash, (mod_hash_key_t)rp,
42145916cd2Sjpk 	    (mod_hash_val_t *)&gcdb) == 0) {
42245916cd2Sjpk 		gcdb->gcdb_refcnt++;
42345916cd2Sjpk 		ASSERT(gcdb->gcdb_refcnt != 0);
42445916cd2Sjpk 
42545916cd2Sjpk 		DTRACE_PROBE2(tx__gcdb__log__info__gcdb__lookup, char *,
42645916cd2Sjpk 		    "gcdb(1) is in gcdb_hash(global)", tsol_gcdb_t *, gcdb);
42745916cd2Sjpk 	} else if (alloc) {
42845916cd2Sjpk 		gcdb = kmem_zalloc(sizeof (*gcdb), KM_NOSLEEP);
42945916cd2Sjpk 		if (gcdb != NULL) {
43045916cd2Sjpk 			gcdb->gcdb_refcnt = 1;
43145916cd2Sjpk 			gcdb->gcdb_mask = rp->rtsa_mask;
43245916cd2Sjpk 			gcdb->gcdb_doi = rp->rtsa_doi;
43345916cd2Sjpk 			gcdb->gcdb_slrange = rp->rtsa_slrange;
43445916cd2Sjpk 
43545916cd2Sjpk 			if (mod_hash_insert(gcdb_hash,
43645916cd2Sjpk 			    (mod_hash_key_t)&gcdb->gcdb_attr,
43745916cd2Sjpk 			    (mod_hash_val_t)gcdb) != 0) {
43845916cd2Sjpk 				mutex_exit(&gcdb_lock);
43945916cd2Sjpk 				kmem_free(gcdb, sizeof (*gcdb));
44045916cd2Sjpk 				return (NULL);
44145916cd2Sjpk 			}
44245916cd2Sjpk 
44345916cd2Sjpk 			DTRACE_PROBE2(tx__gcdb__log__info__gcdb__insert, char *,
44445916cd2Sjpk 			    "gcdb(1) inserted in gcdb_hash(global)",
44545916cd2Sjpk 			    tsol_gcdb_t *, gcdb);
44645916cd2Sjpk 		}
44745916cd2Sjpk 	}
44845916cd2Sjpk 	mutex_exit(&gcdb_lock);
44945916cd2Sjpk 	return (gcdb);
45045916cd2Sjpk }
45145916cd2Sjpk 
45245916cd2Sjpk static void
45345916cd2Sjpk gcdb_inactive(tsol_gcdb_t *gcdb)
45445916cd2Sjpk {
45545916cd2Sjpk 	ASSERT(MUTEX_HELD(&gcdb_lock));
45645916cd2Sjpk 	ASSERT(gcdb != NULL && gcdb->gcdb_refcnt == 0);
45745916cd2Sjpk 
45845916cd2Sjpk 	(void) mod_hash_remove(gcdb_hash, (mod_hash_key_t)&gcdb->gcdb_attr,
45945916cd2Sjpk 	    (mod_hash_val_t *)&gcdb);
46045916cd2Sjpk 
46145916cd2Sjpk 	DTRACE_PROBE2(tx__gcdb__log__info__gcdb__remove, char *,
46245916cd2Sjpk 	    "gcdb(1) removed from gcdb_hash(global)",
46345916cd2Sjpk 	    tsol_gcdb_t *, gcdb);
46445916cd2Sjpk 	kmem_free(gcdb, sizeof (*gcdb));
46545916cd2Sjpk }
46645916cd2Sjpk 
46745916cd2Sjpk tsol_gc_t *
46845916cd2Sjpk gc_create(struct rtsa_s *rp, tsol_gcgrp_t *gcgrp, boolean_t *gcgrp_xtrarefp)
46945916cd2Sjpk {
47045916cd2Sjpk 	tsol_gc_t *gc;
47145916cd2Sjpk 	tsol_gcdb_t *gcdb;
47245916cd2Sjpk 
47345916cd2Sjpk 	*gcgrp_xtrarefp = B_TRUE;
47445916cd2Sjpk 
47545916cd2Sjpk 	rw_enter(&gcgrp->gcgrp_rwlock, RW_WRITER);
47645916cd2Sjpk 	if ((gcdb = gcdb_lookup(rp, B_TRUE)) == NULL) {
47745916cd2Sjpk 		rw_exit(&gcgrp->gcgrp_rwlock);
47845916cd2Sjpk 		return (NULL);
47945916cd2Sjpk 	}
48045916cd2Sjpk 
48145916cd2Sjpk 	for (gc = gcgrp->gcgrp_head; gc != NULL; gc = gc->gc_next) {
48245916cd2Sjpk 		if (gc->gc_db == gcdb) {
48345916cd2Sjpk 			ASSERT(gc->gc_grp == gcgrp);
48445916cd2Sjpk 
48545916cd2Sjpk 			gc->gc_refcnt++;
48645916cd2Sjpk 			ASSERT(gc->gc_refcnt != 0);
48745916cd2Sjpk 
48845916cd2Sjpk 			GCDB_REFRELE(gcdb);
48945916cd2Sjpk 
49045916cd2Sjpk 			DTRACE_PROBE3(tx__gcdb__log__info__gc__create,
49145916cd2Sjpk 			    char *, "found gc(1) in gcgrp(2)",
49245916cd2Sjpk 			    tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
49345916cd2Sjpk 			rw_exit(&gcgrp->gcgrp_rwlock);
49445916cd2Sjpk 			return (gc);
49545916cd2Sjpk 		}
49645916cd2Sjpk 	}
49745916cd2Sjpk 
49845916cd2Sjpk 	gc = kmem_zalloc(sizeof (*gc), KM_NOSLEEP);
49945916cd2Sjpk 	if (gc != NULL) {
50045916cd2Sjpk 		if (gcgrp->gcgrp_head == NULL) {
50145916cd2Sjpk 			gcgrp->gcgrp_head = gcgrp->gcgrp_tail = gc;
50245916cd2Sjpk 		} else {
50345916cd2Sjpk 			gcgrp->gcgrp_tail->gc_next = gc;
50445916cd2Sjpk 			gc->gc_prev = gcgrp->gcgrp_tail;
50545916cd2Sjpk 			gcgrp->gcgrp_tail = gc;
50645916cd2Sjpk 		}
50745916cd2Sjpk 		gcgrp->gcgrp_count++;
50845916cd2Sjpk 		ASSERT(gcgrp->gcgrp_count != 0);
50945916cd2Sjpk 
51045916cd2Sjpk 		/* caller has incremented gcgrp reference for us */
51145916cd2Sjpk 		gc->gc_grp = gcgrp;
51245916cd2Sjpk 
51345916cd2Sjpk 		gc->gc_db = gcdb;
51445916cd2Sjpk 		gc->gc_refcnt = 1;
51545916cd2Sjpk 
51645916cd2Sjpk 		DTRACE_PROBE3(tx__gcdb__log__info__gc__create, char *,
51745916cd2Sjpk 		    "added gc(1) to gcgrp(2)", tsol_gc_t *, gc,
51845916cd2Sjpk 		    tsol_gcgrp_t *, gcgrp);
51945916cd2Sjpk 
52045916cd2Sjpk 		*gcgrp_xtrarefp = B_FALSE;
52145916cd2Sjpk 	}
52245916cd2Sjpk 	rw_exit(&gcgrp->gcgrp_rwlock);
52345916cd2Sjpk 
52445916cd2Sjpk 	return (gc);
52545916cd2Sjpk }
52645916cd2Sjpk 
52745916cd2Sjpk void
52845916cd2Sjpk gc_inactive(tsol_gc_t *gc)
52945916cd2Sjpk {
53045916cd2Sjpk 	tsol_gcgrp_t *gcgrp = gc->gc_grp;
53145916cd2Sjpk 
53245916cd2Sjpk 	ASSERT(gcgrp != NULL);
53345916cd2Sjpk 	ASSERT(RW_WRITE_HELD(&gcgrp->gcgrp_rwlock));
53445916cd2Sjpk 	ASSERT(gc->gc_refcnt == 0);
53545916cd2Sjpk 
53645916cd2Sjpk 	if (gc->gc_prev != NULL)
53745916cd2Sjpk 		gc->gc_prev->gc_next = gc->gc_next;
53845916cd2Sjpk 	else
53945916cd2Sjpk 		gcgrp->gcgrp_head = gc->gc_next;
54045916cd2Sjpk 	if (gc->gc_next != NULL)
54145916cd2Sjpk 		gc->gc_next->gc_prev = gc->gc_prev;
54245916cd2Sjpk 	else
54345916cd2Sjpk 		gcgrp->gcgrp_tail = gc->gc_prev;
54445916cd2Sjpk 	ASSERT(gcgrp->gcgrp_count > 0);
54545916cd2Sjpk 	gcgrp->gcgrp_count--;
54645916cd2Sjpk 
54745916cd2Sjpk 	/* drop lock before it's destroyed */
54845916cd2Sjpk 	rw_exit(&gcgrp->gcgrp_rwlock);
54945916cd2Sjpk 
55045916cd2Sjpk 	DTRACE_PROBE3(tx__gcdb__log__info__gc__remove, char *,
55145916cd2Sjpk 	    "removed inactive gc(1) from gcgrp(2)",
55245916cd2Sjpk 	    tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
55345916cd2Sjpk 
55445916cd2Sjpk 	GCGRP_REFRELE(gcgrp);
55545916cd2Sjpk 
55645916cd2Sjpk 	gc->gc_grp = NULL;
55745916cd2Sjpk 	gc->gc_prev = gc->gc_next = NULL;
55845916cd2Sjpk 
55945916cd2Sjpk 	if (gc->gc_db != NULL)
56045916cd2Sjpk 		GCDB_REFRELE(gc->gc_db);
56145916cd2Sjpk 
56245916cd2Sjpk 	kmem_free(gc, sizeof (*gc));
56345916cd2Sjpk }
56445916cd2Sjpk 
56545916cd2Sjpk tsol_gcgrp_t *
56645916cd2Sjpk gcgrp_lookup(tsol_gcgrp_addr_t *ga, boolean_t alloc)
56745916cd2Sjpk {
56845916cd2Sjpk 	tsol_gcgrp_t *gcgrp = NULL;
56945916cd2Sjpk 	mod_hash_t *hashp;
57045916cd2Sjpk 
57145916cd2Sjpk 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
57245916cd2Sjpk 
57345916cd2Sjpk 	hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
57445916cd2Sjpk 
57545916cd2Sjpk 	mutex_enter(&gcgrp_lock);
57645916cd2Sjpk 	if (mod_hash_find(hashp, (mod_hash_key_t)ga,
57745916cd2Sjpk 	    (mod_hash_val_t *)&gcgrp) == 0) {
57845916cd2Sjpk 		gcgrp->gcgrp_refcnt++;
57945916cd2Sjpk 		ASSERT(gcgrp->gcgrp_refcnt != 0);
58045916cd2Sjpk 
58145916cd2Sjpk 		DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__lookup, char *,
58245916cd2Sjpk 		    "found gcgrp(1) in hash(2)", tsol_gcgrp_t *, gcgrp,
58345916cd2Sjpk 		    mod_hash_t *, hashp);
58445916cd2Sjpk 
58545916cd2Sjpk 	} else if (alloc) {
58645916cd2Sjpk 		gcgrp = kmem_zalloc(sizeof (*gcgrp), KM_NOSLEEP);
58745916cd2Sjpk 		if (gcgrp != NULL) {
58845916cd2Sjpk 			gcgrp->gcgrp_refcnt = 1;
58945916cd2Sjpk 			rw_init(&gcgrp->gcgrp_rwlock, NULL, RW_DEFAULT, NULL);
59045916cd2Sjpk 			bcopy(ga, &gcgrp->gcgrp_addr, sizeof (*ga));
59145916cd2Sjpk 
59245916cd2Sjpk 			if (mod_hash_insert(hashp,
59345916cd2Sjpk 			    (mod_hash_key_t)&gcgrp->gcgrp_addr,
59445916cd2Sjpk 			    (mod_hash_val_t)gcgrp) != 0) {
59545916cd2Sjpk 				mutex_exit(&gcgrp_lock);
59645916cd2Sjpk 				kmem_free(gcgrp, sizeof (*gcgrp));
59745916cd2Sjpk 				return (NULL);
59845916cd2Sjpk 			}
59945916cd2Sjpk 
60045916cd2Sjpk 			DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__insert,
60145916cd2Sjpk 			    char *, "inserted gcgrp(1) in hash(2)",
60245916cd2Sjpk 			    tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
60345916cd2Sjpk 		}
60445916cd2Sjpk 	}
60545916cd2Sjpk 	mutex_exit(&gcgrp_lock);
60645916cd2Sjpk 	return (gcgrp);
60745916cd2Sjpk }
60845916cd2Sjpk 
60945916cd2Sjpk void
61045916cd2Sjpk gcgrp_inactive(tsol_gcgrp_t *gcgrp)
61145916cd2Sjpk {
61245916cd2Sjpk 	tsol_gcgrp_addr_t *ga;
61345916cd2Sjpk 	mod_hash_t *hashp;
61445916cd2Sjpk 
61545916cd2Sjpk 	ASSERT(MUTEX_HELD(&gcgrp_lock));
61645916cd2Sjpk 	ASSERT(!RW_LOCK_HELD(&gcgrp->gcgrp_rwlock));
61745916cd2Sjpk 	ASSERT(gcgrp != NULL && gcgrp->gcgrp_refcnt == 0);
61845916cd2Sjpk 	ASSERT(gcgrp->gcgrp_head == NULL && gcgrp->gcgrp_count == 0);
61945916cd2Sjpk 
62045916cd2Sjpk 	ga = &gcgrp->gcgrp_addr;
62145916cd2Sjpk 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
62245916cd2Sjpk 
62345916cd2Sjpk 	hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
62445916cd2Sjpk 	(void) mod_hash_remove(hashp, (mod_hash_key_t)ga,
62545916cd2Sjpk 	    (mod_hash_val_t *)&gcgrp);
62645916cd2Sjpk 	rw_destroy(&gcgrp->gcgrp_rwlock);
62745916cd2Sjpk 
62845916cd2Sjpk 	DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__remove, char *,
62945916cd2Sjpk 	    "removed inactive gcgrp(1) from hash(2)",
63045916cd2Sjpk 	    tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
63145916cd2Sjpk 
63245916cd2Sjpk 	kmem_free(gcgrp, sizeof (*gcgrp));
63345916cd2Sjpk }
63445916cd2Sjpk 
63545916cd2Sjpk /*
63645916cd2Sjpk  * Converts CIPSO option to sensitivity label.
63745916cd2Sjpk  * Validity checks based on restrictions defined in
63845916cd2Sjpk  * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) (draft-ietf-cipso-ipsecurity)
63945916cd2Sjpk  */
64045916cd2Sjpk static boolean_t
64145916cd2Sjpk cipso_to_sl(const uchar_t *option, bslabel_t *sl)
64245916cd2Sjpk {
64345916cd2Sjpk 	const struct cipso_option *co = (const struct cipso_option *)option;
64445916cd2Sjpk 	const struct cipso_tag_type_1 *tt1;
64545916cd2Sjpk 
64645916cd2Sjpk 	tt1 = (struct cipso_tag_type_1 *)&co->cipso_tag_type[0];
64745916cd2Sjpk 	if (tt1->tag_type != 1 ||
64845916cd2Sjpk 	    tt1->tag_length < TSOL_TT1_MIN_LENGTH ||
64945916cd2Sjpk 	    tt1->tag_length > TSOL_TT1_MAX_LENGTH ||
65045916cd2Sjpk 	    tt1->tag_length + TSOL_CIPSO_TAG_OFFSET > co->cipso_length)
65145916cd2Sjpk 		return (B_FALSE);
65245916cd2Sjpk 
65345916cd2Sjpk 	bsllow(sl);	/* assumed: sets compartments to all zeroes */
65445916cd2Sjpk 	LCLASS_SET((_bslabel_impl_t *)sl, tt1->tag_sl);
65545916cd2Sjpk 	bcopy(tt1->tag_cat, &((_bslabel_impl_t *)sl)->compartments,
65645916cd2Sjpk 	    tt1->tag_length - TSOL_TT1_MIN_LENGTH);
65745916cd2Sjpk 	return (B_TRUE);
65845916cd2Sjpk }
65945916cd2Sjpk 
66045916cd2Sjpk /*
66145916cd2Sjpk  * Parse the CIPSO label in the incoming packet and construct a ts_label_t
66245916cd2Sjpk  * that reflects the CIPSO label and attach it to the dblk cred. Later as
66345916cd2Sjpk  * the mblk flows up through the stack any code that needs to examine the
66445916cd2Sjpk  * packet label can inspect the label from the dblk cred. This function is
66545916cd2Sjpk  * called right in ip_rput for all packets, i.e. locally destined and
66645916cd2Sjpk  * to be forwarded packets. The forwarding path needs to examine the label
66745916cd2Sjpk  * to determine how to forward the packet.
66845916cd2Sjpk  *
66945916cd2Sjpk  * For IPv4, IP header options have been pulled up, but other headers might not
67045916cd2Sjpk  * have been.  For IPv6, any hop-by-hop options have been pulled up, but any
67145916cd2Sjpk  * other headers might not be present.
67245916cd2Sjpk  */
67345916cd2Sjpk boolean_t
67445916cd2Sjpk tsol_get_pkt_label(mblk_t *mp, int version)
67545916cd2Sjpk {
67645916cd2Sjpk 	tsol_tpc_t	*src_rhtp;
67745916cd2Sjpk 	uchar_t		*opt_ptr = NULL;
67845916cd2Sjpk 	const ipha_t	*ipha;
67945916cd2Sjpk 	bslabel_t	sl;
68045916cd2Sjpk 	uint32_t	doi;
68145916cd2Sjpk 	tsol_ip_label_t	label_type;
68245916cd2Sjpk 	const cipso_option_t *co;
68345916cd2Sjpk 	const void	*src;
68445916cd2Sjpk 	const ip6_t	*ip6h;
685*de8c4a14SErik Nordmark 	cred_t		*credp;
686*de8c4a14SErik Nordmark 	pid_t		cpid;
68745916cd2Sjpk 
68845916cd2Sjpk 	ASSERT(DB_TYPE(mp) == M_DATA);
68945916cd2Sjpk 
69045916cd2Sjpk 	if (version == IPV4_VERSION) {
69145916cd2Sjpk 		ipha = (const ipha_t *)mp->b_rptr;
69245916cd2Sjpk 		src = &ipha->ipha_src;
69345916cd2Sjpk 		label_type = tsol_get_option(mp, &opt_ptr);
69445916cd2Sjpk 	} else {
69545916cd2Sjpk 		uchar_t		*after_secopt;
69645916cd2Sjpk 		boolean_t	hbh_needed;
69745916cd2Sjpk 		const uchar_t	*ip6hbh;
69845916cd2Sjpk 		size_t		optlen;
69945916cd2Sjpk 
70045916cd2Sjpk 		label_type = OPT_NONE;
70145916cd2Sjpk 		ip6h = (const ip6_t *)mp->b_rptr;
70245916cd2Sjpk 		src = &ip6h->ip6_src;
70345916cd2Sjpk 		if (ip6h->ip6_nxt == IPPROTO_HOPOPTS) {
70445916cd2Sjpk 			ip6hbh = (const uchar_t *)&ip6h[1];
70545916cd2Sjpk 			optlen = (ip6hbh[1] + 1) << 3;
70645916cd2Sjpk 			ASSERT(ip6hbh + optlen <= mp->b_wptr);
70745916cd2Sjpk 			opt_ptr = tsol_find_secopt_v6(ip6hbh, optlen,
70845916cd2Sjpk 			    &after_secopt, &hbh_needed);
70945916cd2Sjpk 			/* tsol_find_secopt_v6 guarantees some sanity */
71045916cd2Sjpk 			if (opt_ptr != NULL &&
71145916cd2Sjpk 			    (optlen = opt_ptr[1]) >= 8) {
71245916cd2Sjpk 				opt_ptr += 2;
71345916cd2Sjpk 				bcopy(opt_ptr, &doi, sizeof (doi));
71445916cd2Sjpk 				doi = ntohl(doi);
71545916cd2Sjpk 				if (doi == IP6LS_DOI_V4 &&
71645916cd2Sjpk 				    opt_ptr[4] == IP6LS_TT_V4 &&
71745916cd2Sjpk 				    opt_ptr[5] <= optlen - 4 &&
71845916cd2Sjpk 				    opt_ptr[7] <= optlen - 6) {
71945916cd2Sjpk 					opt_ptr += sizeof (doi) + 2;
72045916cd2Sjpk 					label_type = OPT_CIPSO;
72145916cd2Sjpk 				}
72245916cd2Sjpk 			}
72345916cd2Sjpk 		}
72445916cd2Sjpk 	}
72545916cd2Sjpk 
72645916cd2Sjpk 	switch (label_type) {
72745916cd2Sjpk 	case OPT_CIPSO:
72845916cd2Sjpk 		/*
72945916cd2Sjpk 		 * Convert the CIPSO label to the internal format
73045916cd2Sjpk 		 * and attach it to the dblk cred.
73145916cd2Sjpk 		 * Validity checks based on restrictions defined in
73245916cd2Sjpk 		 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2)
73345916cd2Sjpk 		 * (draft-ietf-cipso-ipsecurity)
73445916cd2Sjpk 		 */
73545916cd2Sjpk 		if (version == IPV6_VERSION && ip6opt_ls == 0)
73645916cd2Sjpk 			return (B_FALSE);
73745916cd2Sjpk 		co = (const struct cipso_option *)opt_ptr;
73845916cd2Sjpk 		if ((co->cipso_length <
73945916cd2Sjpk 		    TSOL_CIPSO_TAG_OFFSET + TSOL_TT1_MIN_LENGTH) ||
74045916cd2Sjpk 		    (co->cipso_length > IP_MAX_OPT_LENGTH))
74145916cd2Sjpk 			return (B_FALSE);
74245916cd2Sjpk 		bcopy(co->cipso_doi, &doi, sizeof (doi));
74345916cd2Sjpk 		doi = ntohl(doi);
74445916cd2Sjpk 		if (!cipso_to_sl(opt_ptr, &sl))
74545916cd2Sjpk 			return (B_FALSE);
74645916cd2Sjpk 		setbltype(&sl, SUN_SL_ID);
74745916cd2Sjpk 		break;
74845916cd2Sjpk 
74945916cd2Sjpk 	case OPT_NONE:
75045916cd2Sjpk 		/*
75145916cd2Sjpk 		 * Handle special cases that are not currently labeled, even
75245916cd2Sjpk 		 * though the sending system may otherwise be configured as
75345916cd2Sjpk 		 * labeled.
75445916cd2Sjpk 		 *	- IGMP
75545916cd2Sjpk 		 *	- IPv4 ICMP Router Discovery
75645916cd2Sjpk 		 *	- IPv6 Neighbor Discovery
75745916cd2Sjpk 		 */
75845916cd2Sjpk 		if (version == IPV4_VERSION) {
75945916cd2Sjpk 			if (ipha->ipha_protocol == IPPROTO_IGMP)
76045916cd2Sjpk 				return (B_TRUE);
76145916cd2Sjpk 			if (ipha->ipha_protocol == IPPROTO_ICMP) {
76245916cd2Sjpk 				const struct icmp *icmp = (const struct icmp *)
76345916cd2Sjpk 				    (mp->b_rptr + IPH_HDR_LENGTH(ipha));
76445916cd2Sjpk 
76545916cd2Sjpk 				if ((uchar_t *)icmp > mp->b_wptr) {
76645916cd2Sjpk 					if (!pullupmsg(mp,
76745916cd2Sjpk 					    (uchar_t *)icmp - mp->b_rptr + 1))
76845916cd2Sjpk 						return (B_FALSE);
76945916cd2Sjpk 					icmp = (const struct icmp *)
77045916cd2Sjpk 					    (mp->b_rptr +
77145916cd2Sjpk 					    IPH_HDR_LENGTH(ipha));
77245916cd2Sjpk 				}
77345916cd2Sjpk 				if (icmp->icmp_type == ICMP_ROUTERADVERT ||
77445916cd2Sjpk 				    icmp->icmp_type == ICMP_ROUTERSOLICIT)
77545916cd2Sjpk 					return (B_TRUE);
77645916cd2Sjpk 			}
77745916cd2Sjpk 			src = &ipha->ipha_src;
77845916cd2Sjpk 		} else {
77945916cd2Sjpk 			if (ip6h->ip6_nxt == IPPROTO_ICMPV6) {
78045916cd2Sjpk 				const icmp6_t *icmp6 = (const icmp6_t *)
78145916cd2Sjpk 				    (mp->b_rptr + IPV6_HDR_LEN);
78245916cd2Sjpk 
78345916cd2Sjpk 				if ((uchar_t *)icmp6 + ICMP6_MINLEN >
78445916cd2Sjpk 				    mp->b_wptr) {
78545916cd2Sjpk 					if (!pullupmsg(mp,
78645916cd2Sjpk 					    (uchar_t *)icmp6 - mp->b_rptr +
78745916cd2Sjpk 					    ICMP6_MINLEN))
78845916cd2Sjpk 						return (B_FALSE);
78945916cd2Sjpk 					icmp6 = (const icmp6_t *)
79045916cd2Sjpk 					    (mp->b_rptr + IPV6_HDR_LEN);
79145916cd2Sjpk 				}
79245916cd2Sjpk 				if (icmp6->icmp6_type >= MLD_LISTENER_QUERY &&
79345916cd2Sjpk 				    icmp6->icmp6_type <= ICMP6_MAX_INFO_TYPE)
79445916cd2Sjpk 					return (B_TRUE);
79545916cd2Sjpk 			}
79645916cd2Sjpk 			src = &ip6h->ip6_src;
79745916cd2Sjpk 		}
79845916cd2Sjpk 
79945916cd2Sjpk 		/*
80045916cd2Sjpk 		 * Look up the tnrhtp database and get the implicit label
80145916cd2Sjpk 		 * that is associated with this unlabeled host and attach
80245916cd2Sjpk 		 * it to the packet.
80345916cd2Sjpk 		 */
80445916cd2Sjpk 		if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
80545916cd2Sjpk 			return (B_FALSE);
80645916cd2Sjpk 
80745916cd2Sjpk 		/* If the sender is labeled, drop the unlabeled packet. */
80845916cd2Sjpk 		if (src_rhtp->tpc_tp.host_type != UNLABELED) {
80945916cd2Sjpk 			TPC_RELE(src_rhtp);
81045916cd2Sjpk 			pr_addr_dbg("unlabeled packet forged from %s\n",
81145916cd2Sjpk 			    version == IPV4_VERSION ? AF_INET : AF_INET6, src);
81245916cd2Sjpk 			return (B_FALSE);
81345916cd2Sjpk 		}
81445916cd2Sjpk 
81545916cd2Sjpk 		sl = src_rhtp->tpc_tp.tp_def_label;
81645916cd2Sjpk 		setbltype(&sl, SUN_SL_ID);
81745916cd2Sjpk 		doi = src_rhtp->tpc_tp.tp_doi;
81845916cd2Sjpk 		TPC_RELE(src_rhtp);
81945916cd2Sjpk 		break;
82045916cd2Sjpk 
82145916cd2Sjpk 	default:
82245916cd2Sjpk 		return (B_FALSE);
82345916cd2Sjpk 	}
82445916cd2Sjpk 
82545916cd2Sjpk 	/* Make sure no other thread is messing with this mblk */
82645916cd2Sjpk 	ASSERT(DB_REF(mp) == 1);
827*de8c4a14SErik Nordmark 	/* Preserve db_cpid */
828*de8c4a14SErik Nordmark 	credp = msg_extractcred(mp, &cpid);
829*de8c4a14SErik Nordmark 	if (credp == NULL) {
830*de8c4a14SErik Nordmark 		credp = newcred_from_bslabel(&sl, doi, KM_NOSLEEP);
831*de8c4a14SErik Nordmark 		if (credp == NULL)
83245916cd2Sjpk 			return (B_FALSE);
833*de8c4a14SErik Nordmark 		mblk_setcred(mp, credp, cpid);
83445916cd2Sjpk 	} else {
83545916cd2Sjpk 		cred_t	*newcr;
83645916cd2Sjpk 
837*de8c4a14SErik Nordmark 		newcr = copycred_from_bslabel(credp, &sl, doi,
83845916cd2Sjpk 		    KM_NOSLEEP);
839*de8c4a14SErik Nordmark 		crfree(credp);
84045916cd2Sjpk 		if (newcr == NULL)
84145916cd2Sjpk 			return (B_FALSE);
842*de8c4a14SErik Nordmark 		mblk_setcred(mp, newcr, cpid);
843*de8c4a14SErik Nordmark 		credp = newcr;
84445916cd2Sjpk 	}
84545916cd2Sjpk 
84645916cd2Sjpk 	/*
84745916cd2Sjpk 	 * If the source was unlabeled, then flag as such,
84845916cd2Sjpk 	 * while remembering that CIPSO routers add headers.
84945916cd2Sjpk 	 */
850*de8c4a14SErik Nordmark 	if (label_type == OPT_NONE) {
851*de8c4a14SErik Nordmark 		crgetlabel(credp)->tsl_flags |= TSLF_UNLABELED;
852*de8c4a14SErik Nordmark 	} else if (label_type == OPT_CIPSO) {
85345916cd2Sjpk 		if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
85445916cd2Sjpk 			return (B_FALSE);
85545916cd2Sjpk 		if (src_rhtp->tpc_tp.host_type == UNLABELED)
856*de8c4a14SErik Nordmark 			crgetlabel(credp)->tsl_flags |= TSLF_UNLABELED;
85745916cd2Sjpk 		TPC_RELE(src_rhtp);
85845916cd2Sjpk 	}
85945916cd2Sjpk 
86045916cd2Sjpk 	return (B_TRUE);
86145916cd2Sjpk }
86245916cd2Sjpk 
86345916cd2Sjpk /*
86445916cd2Sjpk  * This routine determines whether the given packet should be accepted locally.
86545916cd2Sjpk  * It does a range/set check on the packet's label by looking up the given
86645916cd2Sjpk  * address in the remote host database.
86745916cd2Sjpk  */
86845916cd2Sjpk boolean_t
86945916cd2Sjpk tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version,
87045916cd2Sjpk     boolean_t shared_addr, const conn_t *connp)
87145916cd2Sjpk {
87245916cd2Sjpk 	const cred_t *credp;
87345916cd2Sjpk 	ts_label_t *plabel, *conn_plabel;
87445916cd2Sjpk 	tsol_tpc_t *tp;
87545916cd2Sjpk 	boolean_t retv;
87645916cd2Sjpk 	const bslabel_t *label, *conn_label;
87745916cd2Sjpk 
87845916cd2Sjpk 	/*
87945916cd2Sjpk 	 * The cases in which this can happen are:
88045916cd2Sjpk 	 *	- IPv6 Router Alert, where ip_rput_data_v6 deliberately skips
88145916cd2Sjpk 	 *	  over the label attachment process.
88245916cd2Sjpk 	 *	- MLD output looped-back to ourselves.
88345916cd2Sjpk 	 *	- IPv4 Router Discovery, where tsol_get_pkt_label intentionally
88445916cd2Sjpk 	 *	  avoids the labeling process.
88545916cd2Sjpk 	 * We trust that all valid paths in the code set the cred pointer when
88645916cd2Sjpk 	 * needed.
88745916cd2Sjpk 	 */
888*de8c4a14SErik Nordmark 	if ((credp = msg_getcred(mp, NULL)) == NULL)
88945916cd2Sjpk 		return (B_TRUE);
89045916cd2Sjpk 
89145916cd2Sjpk 	/*
89245916cd2Sjpk 	 * If this packet is from the inside (not a remote host) and has the
89345916cd2Sjpk 	 * same zoneid as the selected destination, then no checks are
89445916cd2Sjpk 	 * necessary.  Membership in the zone is enough proof.  This is
89545916cd2Sjpk 	 * intended to be a hot path through this function.
89645916cd2Sjpk 	 */
89745916cd2Sjpk 	if (!crisremote(credp) &&
89845916cd2Sjpk 	    crgetzone(credp) == crgetzone(connp->conn_cred))
89945916cd2Sjpk 		return (B_TRUE);
90045916cd2Sjpk 
90145916cd2Sjpk 	plabel = crgetlabel(credp);
90245916cd2Sjpk 	conn_plabel = crgetlabel(connp->conn_cred);
90345916cd2Sjpk 	ASSERT(plabel != NULL && conn_plabel != NULL);
90445916cd2Sjpk 
90545916cd2Sjpk 	label = label2bslabel(plabel);
90645916cd2Sjpk 	conn_label = label2bslabel(crgetlabel(connp->conn_cred));
90745916cd2Sjpk 
90845916cd2Sjpk 	/*
90945916cd2Sjpk 	 * MLPs are always validated using the range and set of the local
91045916cd2Sjpk 	 * address, even when the remote host is unlabeled.
91145916cd2Sjpk 	 */
91245916cd2Sjpk 	if (connp->conn_mlp_type == mlptBoth ||
91345916cd2Sjpk 	/* LINTED: no consequent */
91445916cd2Sjpk 	    connp->conn_mlp_type == (shared_addr ? mlptShared : mlptPrivate)) {
91545916cd2Sjpk 		;
91645916cd2Sjpk 
91745916cd2Sjpk 	/*
91845916cd2Sjpk 	 * If this is a packet from an unlabeled sender, then we must apply
91945916cd2Sjpk 	 * different rules.  If the label is equal to the zone's label, then
92045916cd2Sjpk 	 * it's allowed.  If it's not equal, but the zone is either the global
92145916cd2Sjpk 	 * zone or the label is dominated by the zone's label, then allow it
92245916cd2Sjpk 	 * as long as it's in the range configured for the destination.
92345916cd2Sjpk 	 */
92445916cd2Sjpk 	} else if (plabel->tsl_flags & TSLF_UNLABELED) {
92545916cd2Sjpk 		if (plabel->tsl_doi == conn_plabel->tsl_doi &&
92645916cd2Sjpk 		    blequal(label, conn_label))
92745916cd2Sjpk 			return (B_TRUE);
92845916cd2Sjpk 
929f4b3ec61Sdh 		/*
930f4b3ec61Sdh 		 * conn_zoneid is global for an exclusive stack, thus we use
931f4b3ec61Sdh 		 * conn_cred to get the zoneid
932f4b3ec61Sdh 		 */
93345916cd2Sjpk 		if (!connp->conn_mac_exempt ||
934f4b3ec61Sdh 		    (crgetzoneid(connp->conn_cred) != GLOBAL_ZONEID &&
93545916cd2Sjpk 		    (plabel->tsl_doi != conn_plabel->tsl_doi ||
93645916cd2Sjpk 		    !bldominates(conn_label, label)))) {
93745916cd2Sjpk 			DTRACE_PROBE3(
93845916cd2Sjpk 			    tx__ip__log__drop__receivelocal__mac_unl,
93945916cd2Sjpk 			    char *,
94045916cd2Sjpk 			    "unlabeled packet mp(1) fails mac for conn(2)",
94145916cd2Sjpk 			    mblk_t *, mp, conn_t *, connp);
94245916cd2Sjpk 			return (B_FALSE);
94345916cd2Sjpk 		}
94445916cd2Sjpk 
94545916cd2Sjpk 	/*
946e071b5fbSkp 	 * If this is a packet from a labeled sender, verify the
947e071b5fbSkp 	 * label on the packet matches the connection label.
94845916cd2Sjpk 	 */
949e071b5fbSkp 	} else {
950e071b5fbSkp 		if (plabel->tsl_doi != conn_plabel->tsl_doi ||
951e071b5fbSkp 		    !blequal(label, conn_label)) {
952e071b5fbSkp 			DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac__slp,
953e071b5fbSkp 			    char *,
954e071b5fbSkp 			    "packet mp(1) failed label match to SLP conn(2)",
955e071b5fbSkp 			    mblk_t *, mp, conn_t *, connp);
956e071b5fbSkp 			return (B_FALSE);
957e071b5fbSkp 		}
95845916cd2Sjpk 		/*
959e071b5fbSkp 		 * No further checks will be needed if this is a zone-
960e071b5fbSkp 		 * specific address because (1) The process for bringing up
961e071b5fbSkp 		 * the interface ensures the zone's label is within the zone-
962e071b5fbSkp 		 * specific address's valid label range; (2) For cases where
963e071b5fbSkp 		 * the conn is bound to the unspecified addresses, ip fanout
964e071b5fbSkp 		 * logic ensures conn's zoneid equals the dest addr's zoneid;
965e071b5fbSkp 		 * (3) Mac-exempt and mlp logic above already handle all
966e071b5fbSkp 		 * cases where the zone label may not be the same as the
967e071b5fbSkp 		 * conn label.
96845916cd2Sjpk 		 */
969e071b5fbSkp 		if (!shared_addr)
97045916cd2Sjpk 			return (B_TRUE);
97145916cd2Sjpk 	}
97245916cd2Sjpk 
97345916cd2Sjpk 	tp = find_tpc(addr, version, B_FALSE);
97445916cd2Sjpk 	if (tp == NULL) {
97545916cd2Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__no__tnr,
97645916cd2Sjpk 		    char *, "dropping mp(1), host(2) lacks entry",
97745916cd2Sjpk 		    mblk_t *, mp, void *, addr);
97845916cd2Sjpk 		return (B_FALSE);
97945916cd2Sjpk 	}
98045916cd2Sjpk 
98145916cd2Sjpk 	/*
98245916cd2Sjpk 	 * The local host address should not be unlabeled at this point.  The
98345916cd2Sjpk 	 * only way this can happen is that the destination isn't unicast.  We
98445916cd2Sjpk 	 * assume that the packet should not have had a label, and thus should
98545916cd2Sjpk 	 * have been handled by the TSLF_UNLABELED logic above.
98645916cd2Sjpk 	 */
98745916cd2Sjpk 	if (tp->tpc_tp.host_type == UNLABELED) {
98845916cd2Sjpk 		retv = B_FALSE;
98945916cd2Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__flag, char *,
99045916cd2Sjpk 		    "mp(1) unlabeled source, but tp is not unlabeled.",
99145916cd2Sjpk 		    mblk_t *, mp, tsol_tpc_t *, tp);
99245916cd2Sjpk 
99345916cd2Sjpk 	} else if (tp->tpc_tp.host_type != SUN_CIPSO) {
99445916cd2Sjpk 		retv = B_FALSE;
99545916cd2Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__tptype, char *,
99645916cd2Sjpk 		    "delivering mp(1), found unrecognized tpc(2) type.",
99745916cd2Sjpk 		    mblk_t *, mp, tsol_tpc_t *, tp);
99845916cd2Sjpk 
99945916cd2Sjpk 	} else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
100045916cd2Sjpk 		retv = B_FALSE;
100145916cd2Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
100245916cd2Sjpk 		    "mp(1) could not be delievered to tp(2), doi mismatch",
100345916cd2Sjpk 		    mblk_t *, mp, tsol_tpc_t *, tp);
100445916cd2Sjpk 
100545916cd2Sjpk 	} else if (!_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) &&
100645916cd2Sjpk 	    !blinlset(label, tp->tpc_tp.tp_sl_set_cipso)) {
100745916cd2Sjpk 		retv = B_FALSE;
100845916cd2Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
100945916cd2Sjpk 		    "mp(1) could not be delievered to tp(2), bad mac",
101045916cd2Sjpk 		    mblk_t *, mp, tsol_tpc_t *, tp);
101145916cd2Sjpk 	} else {
101245916cd2Sjpk 		retv = B_TRUE;
101345916cd2Sjpk 	}
101445916cd2Sjpk 
101545916cd2Sjpk 	TPC_RELE(tp);
101645916cd2Sjpk 
101745916cd2Sjpk 	return (retv);
101845916cd2Sjpk }
101945916cd2Sjpk 
102045916cd2Sjpk boolean_t
102145916cd2Sjpk tsol_can_accept_raw(mblk_t *mp, boolean_t check_host)
102245916cd2Sjpk {
102345916cd2Sjpk 	ts_label_t	*plabel = NULL;
102445916cd2Sjpk 	tsol_tpc_t	*src_rhtp, *dst_rhtp;
102545916cd2Sjpk 	boolean_t	retv;
1026*de8c4a14SErik Nordmark 	cred_t		*credp;
102745916cd2Sjpk 
1028*de8c4a14SErik Nordmark 	credp = msg_getcred(mp, NULL);
1029*de8c4a14SErik Nordmark 	if (credp != NULL)
1030*de8c4a14SErik Nordmark 		plabel = crgetlabel(credp);
103145916cd2Sjpk 
103245916cd2Sjpk 	/* We are bootstrapping or the internal template was never deleted */
103345916cd2Sjpk 	if (plabel == NULL)
103445916cd2Sjpk 		return (B_TRUE);
103545916cd2Sjpk 
103645916cd2Sjpk 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
103745916cd2Sjpk 		ipha_t *ipha = (ipha_t *)mp->b_rptr;
103845916cd2Sjpk 
103945916cd2Sjpk 		src_rhtp = find_tpc(&ipha->ipha_src, IPV4_VERSION,
104045916cd2Sjpk 		    B_FALSE);
104145916cd2Sjpk 		if (src_rhtp == NULL)
104245916cd2Sjpk 			return (B_FALSE);
104345916cd2Sjpk 		dst_rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION,
104445916cd2Sjpk 		    B_FALSE);
104545916cd2Sjpk 	} else {
104645916cd2Sjpk 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
104745916cd2Sjpk 
104845916cd2Sjpk 		src_rhtp = find_tpc(&ip6h->ip6_src, IPV6_VERSION,
104945916cd2Sjpk 		    B_FALSE);
105045916cd2Sjpk 		if (src_rhtp == NULL)
105145916cd2Sjpk 			return (B_FALSE);
105245916cd2Sjpk 		dst_rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION,
105345916cd2Sjpk 		    B_FALSE);
105445916cd2Sjpk 	}
105545916cd2Sjpk 	if (dst_rhtp == NULL) {
105645916cd2Sjpk 		TPC_RELE(src_rhtp);
105745916cd2Sjpk 		return (B_FALSE);
105845916cd2Sjpk 	}
105945916cd2Sjpk 
106045916cd2Sjpk 	if (label2doi(plabel) != src_rhtp->tpc_tp.tp_doi) {
106145916cd2Sjpk 		retv = B_FALSE;
106245916cd2Sjpk 
106345916cd2Sjpk 	/*
106445916cd2Sjpk 	 * Check that the packet's label is in the correct range for labeled
106545916cd2Sjpk 	 * sender, or is equal to the default label for unlabeled sender.
106645916cd2Sjpk 	 */
106745916cd2Sjpk 	} else if ((src_rhtp->tpc_tp.host_type != UNLABELED &&
106845916cd2Sjpk 	    !_blinrange(label2bslabel(plabel),
106945916cd2Sjpk 	    &src_rhtp->tpc_tp.tp_sl_range_cipso) &&
107045916cd2Sjpk 	    !blinlset(label2bslabel(plabel),
107145916cd2Sjpk 	    src_rhtp->tpc_tp.tp_sl_set_cipso)) ||
107245916cd2Sjpk 	    (src_rhtp->tpc_tp.host_type == UNLABELED &&
107345916cd2Sjpk 	    !blequal(&plabel->tsl_label, &src_rhtp->tpc_tp.tp_def_label))) {
107445916cd2Sjpk 		retv = B_FALSE;
107545916cd2Sjpk 
107645916cd2Sjpk 	} else if (check_host) {
107745916cd2Sjpk 		retv = B_TRUE;
107845916cd2Sjpk 
107945916cd2Sjpk 	/*
108045916cd2Sjpk 	 * Until we have SL range in the Zone structure, pass it
108145916cd2Sjpk 	 * when our own address lookup returned an internal entry.
108245916cd2Sjpk 	 */
108345916cd2Sjpk 	} else switch (dst_rhtp->tpc_tp.host_type) {
108445916cd2Sjpk 	case UNLABELED:
108545916cd2Sjpk 		retv = B_TRUE;
108645916cd2Sjpk 		break;
108745916cd2Sjpk 
108845916cd2Sjpk 	case SUN_CIPSO:
108945916cd2Sjpk 		retv = _blinrange(label2bslabel(plabel),
109045916cd2Sjpk 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) ||
109145916cd2Sjpk 		    blinlset(label2bslabel(plabel),
109245916cd2Sjpk 		    dst_rhtp->tpc_tp.tp_sl_set_cipso);
109345916cd2Sjpk 		break;
109445916cd2Sjpk 
109545916cd2Sjpk 	default:
109645916cd2Sjpk 		retv = B_FALSE;
109745916cd2Sjpk 	}
109845916cd2Sjpk 	TPC_RELE(src_rhtp);
109945916cd2Sjpk 	TPC_RELE(dst_rhtp);
110045916cd2Sjpk 	return (retv);
110145916cd2Sjpk }
110245916cd2Sjpk 
110345916cd2Sjpk /*
110445916cd2Sjpk  * This routine determines whether a response to a failed packet delivery or
110545916cd2Sjpk  * connection should be sent back.  By default, the policy is to allow such
110645916cd2Sjpk  * messages to be sent at all times, as these messages reveal little useful
110745916cd2Sjpk  * information and are healthy parts of TCP/IP networking.
110845916cd2Sjpk  *
110945916cd2Sjpk  * If tsol_strict_error is set, then we do strict tests: if the packet label is
111045916cd2Sjpk  * within the label range/set of this host/zone, return B_TRUE; otherwise
111145916cd2Sjpk  * return B_FALSE, which causes the packet to be dropped silently.
111245916cd2Sjpk  *
111345916cd2Sjpk  * Note that tsol_get_pkt_label will cause the packet to drop if the sender is
111445916cd2Sjpk  * marked as labeled in the remote host database, but the packet lacks a label.
111545916cd2Sjpk  * This means that we don't need to do a lookup on the source; the
111645916cd2Sjpk  * TSLF_UNLABELED flag is sufficient.
111745916cd2Sjpk  */
111845916cd2Sjpk boolean_t
111945916cd2Sjpk tsol_can_reply_error(const mblk_t *mp)
112045916cd2Sjpk {
112145916cd2Sjpk 	ts_label_t	*plabel = NULL;
112245916cd2Sjpk 	tsol_tpc_t	*rhtp;
112345916cd2Sjpk 	const ipha_t	*ipha;
112445916cd2Sjpk 	const ip6_t	*ip6h;
112545916cd2Sjpk 	boolean_t	retv;
112645916cd2Sjpk 	bslabel_t	*pktbs;
1127*de8c4a14SErik Nordmark 	cred_t		*credp;
112845916cd2Sjpk 
112945916cd2Sjpk 	/* Caller must pull up at least the IP header */
113045916cd2Sjpk 	ASSERT(MBLKL(mp) >= (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ?
113145916cd2Sjpk 	    sizeof (*ipha) : sizeof (*ip6h)));
113245916cd2Sjpk 
113345916cd2Sjpk 	if (!tsol_strict_error)
113445916cd2Sjpk 		return (B_TRUE);
113545916cd2Sjpk 
1136*de8c4a14SErik Nordmark 	credp = msg_getcred(mp, NULL);
1137*de8c4a14SErik Nordmark 	if (credp != NULL)
1138*de8c4a14SErik Nordmark 		plabel = crgetlabel(credp);
113945916cd2Sjpk 
114045916cd2Sjpk 	/* We are bootstrapping or the internal template was never deleted */
114145916cd2Sjpk 	if (plabel == NULL)
114245916cd2Sjpk 		return (B_TRUE);
114345916cd2Sjpk 
114445916cd2Sjpk 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
114545916cd2Sjpk 		ipha = (const ipha_t *)mp->b_rptr;
114645916cd2Sjpk 		rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, B_FALSE);
114745916cd2Sjpk 	} else {
114845916cd2Sjpk 		ip6h = (const ip6_t *)mp->b_rptr;
114945916cd2Sjpk 		rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, B_FALSE);
115045916cd2Sjpk 	}
115145916cd2Sjpk 
115245916cd2Sjpk 	if (rhtp == NULL || label2doi(plabel) != rhtp->tpc_tp.tp_doi) {
115345916cd2Sjpk 		retv = B_FALSE;
115445916cd2Sjpk 	} else {
115545916cd2Sjpk 		/*
115645916cd2Sjpk 		 * If we're in the midst of forwarding, then the destination
115745916cd2Sjpk 		 * address might not be labeled.  In that case, allow unlabeled
115845916cd2Sjpk 		 * packets through only if the default label is the same, and
115945916cd2Sjpk 		 * labeled ones if they dominate.
116045916cd2Sjpk 		 */
116145916cd2Sjpk 		pktbs = label2bslabel(plabel);
116245916cd2Sjpk 		switch (rhtp->tpc_tp.host_type) {
116345916cd2Sjpk 		case UNLABELED:
116445916cd2Sjpk 			if (plabel->tsl_flags & TSLF_UNLABELED) {
116545916cd2Sjpk 				retv = blequal(pktbs,
116645916cd2Sjpk 				    &rhtp->tpc_tp.tp_def_label);
116745916cd2Sjpk 			} else {
116845916cd2Sjpk 				retv = bldominates(pktbs,
116945916cd2Sjpk 				    &rhtp->tpc_tp.tp_def_label);
117045916cd2Sjpk 			}
117145916cd2Sjpk 			break;
117245916cd2Sjpk 
117345916cd2Sjpk 		case SUN_CIPSO:
117445916cd2Sjpk 			retv = _blinrange(pktbs,
117545916cd2Sjpk 			    &rhtp->tpc_tp.tp_sl_range_cipso) ||
117645916cd2Sjpk 			    blinlset(pktbs, rhtp->tpc_tp.tp_sl_set_cipso);
117745916cd2Sjpk 			break;
117845916cd2Sjpk 
117945916cd2Sjpk 		default:
118045916cd2Sjpk 			retv = B_FALSE;
118145916cd2Sjpk 			break;
118245916cd2Sjpk 		}
118345916cd2Sjpk 	}
118445916cd2Sjpk 
118545916cd2Sjpk 	if (rhtp != NULL)
118645916cd2Sjpk 		TPC_RELE(rhtp);
118745916cd2Sjpk 
118845916cd2Sjpk 	return (retv);
118945916cd2Sjpk }
119045916cd2Sjpk 
119145916cd2Sjpk /*
119245916cd2Sjpk  * Finds the zone associated with the given packet.  Returns GLOBAL_ZONEID if
119345916cd2Sjpk  * the zone cannot be located.
119445916cd2Sjpk  *
119545916cd2Sjpk  * This is used by the classifier when the packet matches an ALL_ZONES IRE, and
119645916cd2Sjpk  * there's no MLP defined.
1197f4b3ec61Sdh  *
1198f4b3ec61Sdh  * Note that we assume that this is only invoked in the ALL_ZONES case.
1199f4b3ec61Sdh  * Handling other cases would require handle exclusive stack zones where either
1200f4b3ec61Sdh  * this routine or the callers would have to map from
1201f4b3ec61Sdh  * the zoneid (zone->zone_id) to what IP uses in conn_zoneid etc.
120245916cd2Sjpk  */
120345916cd2Sjpk zoneid_t
120445916cd2Sjpk tsol_packet_to_zoneid(const mblk_t *mp)
120545916cd2Sjpk {
1206*de8c4a14SErik Nordmark 	cred_t *cr = msg_getcred(mp, NULL);
120745916cd2Sjpk 	zone_t *zone;
120845916cd2Sjpk 	ts_label_t *label;
120945916cd2Sjpk 
121045916cd2Sjpk 	if (cr != NULL) {
121145916cd2Sjpk 		if ((label = crgetlabel(cr)) != NULL) {
121245916cd2Sjpk 			zone = zone_find_by_label(label);
121345916cd2Sjpk 			if (zone != NULL) {
121445916cd2Sjpk 				zoneid_t zoneid = zone->zone_id;
121545916cd2Sjpk 
121645916cd2Sjpk 				zone_rele(zone);
121745916cd2Sjpk 				return (zoneid);
121845916cd2Sjpk 			}
121945916cd2Sjpk 		}
122045916cd2Sjpk 	}
122145916cd2Sjpk 	return (GLOBAL_ZONEID);
122245916cd2Sjpk }
122345916cd2Sjpk 
122445916cd2Sjpk int
122545916cd2Sjpk tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl)
122645916cd2Sjpk {
122745916cd2Sjpk 	int		error = 0;
122845916cd2Sjpk 	tsol_ire_gw_secattr_t *attrp = NULL;
122945916cd2Sjpk 	tsol_tnrhc_t	*gw_rhc = NULL;
123045916cd2Sjpk 	tsol_gcgrp_t	*gcgrp = NULL;
123145916cd2Sjpk 	tsol_gc_t	*gc = NULL;
123245916cd2Sjpk 	in_addr_t	ga_addr4;
123345916cd2Sjpk 	void		*paddr = NULL;
123445916cd2Sjpk 
123545916cd2Sjpk 	/* Not in Trusted mode or IRE is local/loopback/broadcast/interface */
123645916cd2Sjpk 	if (!is_system_labeled() ||
123745916cd2Sjpk 	    (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST |
123845916cd2Sjpk 	    IRE_INTERFACE)))
123945916cd2Sjpk 		goto done;
124045916cd2Sjpk 
124145916cd2Sjpk 	/*
124245916cd2Sjpk 	 * If we don't have a label to compare with, or the IRE does not
124345916cd2Sjpk 	 * contain any gateway security attributes, there's not much that
124445916cd2Sjpk 	 * we can do.  We let the former case pass, and the latter fail,
124545916cd2Sjpk 	 * since the IRE doesn't qualify for a match due to the lack of
124645916cd2Sjpk 	 * security attributes.
124745916cd2Sjpk 	 */
124845916cd2Sjpk 	if (tsl == NULL || ire->ire_gw_secattr == NULL) {
124945916cd2Sjpk 		if (tsl != NULL) {
1250222c5bceSkp 			DTRACE_PROBE3(tx__ip__log__drop__irematch__nogwsec,
1251222c5bceSkp 			    char *,
1252222c5bceSkp 			    "ire(1) lacks ire_gw_secattr matching label(2)",
1253222c5bceSkp 			    ire_t *, ire, ts_label_t *, tsl);
125445916cd2Sjpk 			error = EACCES;
125545916cd2Sjpk 		}
125645916cd2Sjpk 		goto done;
125745916cd2Sjpk 	}
125845916cd2Sjpk 
125945916cd2Sjpk 	attrp = ire->ire_gw_secattr;
126045916cd2Sjpk 
126145916cd2Sjpk 	/*
126245916cd2Sjpk 	 * The possible lock order scenarios related to the tsol gateway
126345916cd2Sjpk 	 * attribute locks are documented at the beginning of ip.c in the
126445916cd2Sjpk 	 * lock order scenario section.
126545916cd2Sjpk 	 */
126645916cd2Sjpk 	mutex_enter(&attrp->igsa_lock);
126745916cd2Sjpk 
126845916cd2Sjpk 	/*
126945916cd2Sjpk 	 * Depending on the IRE type (prefix vs. cache), we seek the group
127045916cd2Sjpk 	 * structure which contains all security credentials of the gateway.
127145916cd2Sjpk 	 * A prefix IRE is associated with at most one gateway credential,
127245916cd2Sjpk 	 * while a cache IRE is associated with every credentials that the
127345916cd2Sjpk 	 * gateway has.
127445916cd2Sjpk 	 */
127545916cd2Sjpk 	if ((gc = attrp->igsa_gc) != NULL) {			/* prefix */
127645916cd2Sjpk 		gcgrp = gc->gc_grp;
127745916cd2Sjpk 		ASSERT(gcgrp != NULL);
127845916cd2Sjpk 		rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
127945916cd2Sjpk 	} else if ((gcgrp = attrp->igsa_gcgrp) != NULL) {	/* cache */
128045916cd2Sjpk 		rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
128145916cd2Sjpk 		gc = gcgrp->gcgrp_head;
128245916cd2Sjpk 		if (gc == NULL) {
128345916cd2Sjpk 			/* gc group is empty, so the drop lock now */
128445916cd2Sjpk 			ASSERT(gcgrp->gcgrp_count == 0);
128545916cd2Sjpk 			rw_exit(&gcgrp->gcgrp_rwlock);
128645916cd2Sjpk 			gcgrp = NULL;
128745916cd2Sjpk 		}
128845916cd2Sjpk 	}
128945916cd2Sjpk 
129045916cd2Sjpk 	if (gcgrp != NULL)
129145916cd2Sjpk 		GCGRP_REFHOLD(gcgrp);
129245916cd2Sjpk 
129345916cd2Sjpk 	if ((gw_rhc = attrp->igsa_rhc) != NULL) {
129445916cd2Sjpk 		/*
129545916cd2Sjpk 		 * If our cached entry has grown stale, then discard it so we
129645916cd2Sjpk 		 * can get a new one.
129745916cd2Sjpk 		 */
129845916cd2Sjpk 		if (gw_rhc->rhc_invalid || gw_rhc->rhc_tpc->tpc_invalid) {
129945916cd2Sjpk 			TNRHC_RELE(gw_rhc);
130045916cd2Sjpk 			attrp->igsa_rhc = gw_rhc = NULL;
130145916cd2Sjpk 		} else {
130245916cd2Sjpk 			TNRHC_HOLD(gw_rhc)
130345916cd2Sjpk 		}
130445916cd2Sjpk 	}
130545916cd2Sjpk 
130645916cd2Sjpk 	/* Last attempt at loading the template had failed; try again */
130745916cd2Sjpk 	if (gw_rhc == NULL) {
130845916cd2Sjpk 		if (gcgrp != NULL) {
130945916cd2Sjpk 			tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr;
131045916cd2Sjpk 
131145916cd2Sjpk 			if (ire->ire_ipversion == IPV4_VERSION) {
131245916cd2Sjpk 				ASSERT(ga->ga_af == AF_INET);
131345916cd2Sjpk 				IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4);
131445916cd2Sjpk 				paddr = &ga_addr4;
131545916cd2Sjpk 			} else {
131645916cd2Sjpk 				ASSERT(ga->ga_af == AF_INET6);
131745916cd2Sjpk 				paddr = &ga->ga_addr;
131845916cd2Sjpk 			}
131945916cd2Sjpk 		} else if (ire->ire_ipversion == IPV6_VERSION &&
132045916cd2Sjpk 		    !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) {
132145916cd2Sjpk 			paddr = &ire->ire_gateway_addr_v6;
132245916cd2Sjpk 		} else if (ire->ire_ipversion == IPV4_VERSION &&
132345916cd2Sjpk 		    ire->ire_gateway_addr != INADDR_ANY) {
132445916cd2Sjpk 			paddr = &ire->ire_gateway_addr;
132545916cd2Sjpk 		}
132645916cd2Sjpk 
132745916cd2Sjpk 		/* We've found a gateway address to do the template lookup */
132845916cd2Sjpk 		if (paddr != NULL) {
132945916cd2Sjpk 			ASSERT(gw_rhc == NULL);
1330bfabfc35Skp 			gw_rhc = find_rhc(paddr, ire->ire_ipversion, B_FALSE);
133145916cd2Sjpk 			if (gw_rhc != NULL) {
133245916cd2Sjpk 				/*
133345916cd2Sjpk 				 * Note that if the lookup above returned an
133445916cd2Sjpk 				 * internal template, we'll use it for the
133545916cd2Sjpk 				 * time being, and do another lookup next
133645916cd2Sjpk 				 * time around.
133745916cd2Sjpk 				 */
133845916cd2Sjpk 				/* Another thread has loaded the template? */
133945916cd2Sjpk 				if (attrp->igsa_rhc != NULL) {
134045916cd2Sjpk 					TNRHC_RELE(gw_rhc)
134145916cd2Sjpk 					/* reload, it could be different */
134245916cd2Sjpk 					gw_rhc = attrp->igsa_rhc;
134345916cd2Sjpk 				} else {
134445916cd2Sjpk 					attrp->igsa_rhc = gw_rhc;
134545916cd2Sjpk 				}
134645916cd2Sjpk 				/*
134745916cd2Sjpk 				 * Hold an extra reference just like we did
134845916cd2Sjpk 				 * above prior to dropping the igsa_lock.
134945916cd2Sjpk 				 */
135045916cd2Sjpk 				TNRHC_HOLD(gw_rhc)
135145916cd2Sjpk 			}
135245916cd2Sjpk 		}
135345916cd2Sjpk 	}
135445916cd2Sjpk 
135545916cd2Sjpk 	mutex_exit(&attrp->igsa_lock);
135645916cd2Sjpk 	/* Gateway template not found */
135745916cd2Sjpk 	if (gw_rhc == NULL) {
135845916cd2Sjpk 		/*
135945916cd2Sjpk 		 * If destination address is directly reachable through an
136045916cd2Sjpk 		 * interface rather than through a learned route, pass it.
136145916cd2Sjpk 		 */
136245916cd2Sjpk 		if (paddr != NULL) {
136345916cd2Sjpk 			DTRACE_PROBE3(
136445916cd2Sjpk 			    tx__ip__log__drop__irematch__nogwtmpl, char *,
136545916cd2Sjpk 			    "ire(1), label(2) off-link with no gw_rhc",
136645916cd2Sjpk 			    ire_t *, ire, ts_label_t *, tsl);
136745916cd2Sjpk 			error = EINVAL;
136845916cd2Sjpk 		}
136945916cd2Sjpk 		goto done;
137045916cd2Sjpk 	}
137145916cd2Sjpk 
137245916cd2Sjpk 	if (gc != NULL) {
137345916cd2Sjpk 		tsol_gcdb_t *gcdb;
137445916cd2Sjpk 		/*
137545916cd2Sjpk 		 * In the case of IRE_CACHE we've got one or more gateway
137645916cd2Sjpk 		 * security credentials to compare against the passed in label.
137745916cd2Sjpk 		 * Perform label range comparison against each security
137845916cd2Sjpk 		 * credential of the gateway. In the case of a prefix ire
137945916cd2Sjpk 		 * we need to match against the security attributes of
138045916cd2Sjpk 		 * just the route itself, so the loop is executed only once.
138145916cd2Sjpk 		 */
138245916cd2Sjpk 		ASSERT(gcgrp != NULL);
138345916cd2Sjpk 		do {
138445916cd2Sjpk 			gcdb = gc->gc_db;
138545916cd2Sjpk 			if (tsl->tsl_doi == gcdb->gcdb_doi &&
138645916cd2Sjpk 			    _blinrange(&tsl->tsl_label, &gcdb->gcdb_slrange))
138745916cd2Sjpk 				break;
138845916cd2Sjpk 			if (ire->ire_type == IRE_CACHE)
138945916cd2Sjpk 				gc = gc->gc_next;
139045916cd2Sjpk 			else
139145916cd2Sjpk 				gc = NULL;
139245916cd2Sjpk 		} while (gc != NULL);
139345916cd2Sjpk 
139445916cd2Sjpk 		if (gc == NULL) {
139545916cd2Sjpk 			DTRACE_PROBE3(
139645916cd2Sjpk 			    tx__ip__log__drop__irematch__nogcmatched,
139745916cd2Sjpk 			    char *, "ire(1), tsl(2): all gc failed match",
139845916cd2Sjpk 			    ire_t *, ire, ts_label_t *, tsl);
139945916cd2Sjpk 			error = EACCES;
140045916cd2Sjpk 		}
140145916cd2Sjpk 	} else {
140245916cd2Sjpk 		/*
140345916cd2Sjpk 		 * We didn't find any gateway credentials in the IRE
140445916cd2Sjpk 		 * attributes; fall back to the gateway's template for
140545916cd2Sjpk 		 * label range checks, if we are required to do so.
140645916cd2Sjpk 		 */
140745916cd2Sjpk 		ASSERT(gw_rhc != NULL);
140845916cd2Sjpk 		switch (gw_rhc->rhc_tpc->tpc_tp.host_type) {
140945916cd2Sjpk 		case SUN_CIPSO:
1410222c5bceSkp 			if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi ||
141145916cd2Sjpk 			    (!_blinrange(&tsl->tsl_label,
1412222c5bceSkp 			    &gw_rhc->rhc_tpc->tpc_tp.tp_sl_range_cipso) &&
141345916cd2Sjpk 			    !blinlset(&tsl->tsl_label,
141445916cd2Sjpk 			    gw_rhc->rhc_tpc->tpc_tp.tp_sl_set_cipso))) {
141545916cd2Sjpk 				error = EACCES;
141645916cd2Sjpk 				DTRACE_PROBE4(
141745916cd2Sjpk 				    tx__ip__log__drop__irematch__deftmpl,
141845916cd2Sjpk 				    char *, "ire(1), tsl(2), gw_rhc(3) "
141945916cd2Sjpk 				    "failed match (cipso gw)",
142045916cd2Sjpk 				    ire_t *, ire, ts_label_t *, tsl,
142145916cd2Sjpk 				    tsol_tnrhc_t *, gw_rhc);
142245916cd2Sjpk 			}
142345916cd2Sjpk 			break;
142445916cd2Sjpk 
142545916cd2Sjpk 		case UNLABELED:
1426222c5bceSkp 			if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi ||
142745916cd2Sjpk 			    (!_blinrange(&tsl->tsl_label,
142845916cd2Sjpk 			    &gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_range) &&
142945916cd2Sjpk 			    !blinlset(&tsl->tsl_label,
143045916cd2Sjpk 			    gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_set))) {
143145916cd2Sjpk 				error = EACCES;
143245916cd2Sjpk 				DTRACE_PROBE4(
143345916cd2Sjpk 				    tx__ip__log__drop__irematch__deftmpl,
143445916cd2Sjpk 				    char *, "ire(1), tsl(2), gw_rhc(3) "
143545916cd2Sjpk 				    "failed match (unlabeled gw)",
143645916cd2Sjpk 				    ire_t *, ire, ts_label_t *, tsl,
143745916cd2Sjpk 				    tsol_tnrhc_t *, gw_rhc);
143845916cd2Sjpk 			}
143945916cd2Sjpk 			break;
144045916cd2Sjpk 		}
144145916cd2Sjpk 	}
144245916cd2Sjpk 
144345916cd2Sjpk done:
144445916cd2Sjpk 
144545916cd2Sjpk 	if (gcgrp != NULL) {
144645916cd2Sjpk 		rw_exit(&gcgrp->gcgrp_rwlock);
144745916cd2Sjpk 		GCGRP_REFRELE(gcgrp);
144845916cd2Sjpk 	}
144945916cd2Sjpk 
145045916cd2Sjpk 	if (gw_rhc != NULL)
145145916cd2Sjpk 		TNRHC_RELE(gw_rhc)
145245916cd2Sjpk 
145345916cd2Sjpk 	return (error);
145445916cd2Sjpk }
145545916cd2Sjpk 
145645916cd2Sjpk /*
145745916cd2Sjpk  * Performs label accreditation checks for packet forwarding.
145845916cd2Sjpk  *
145945916cd2Sjpk  * Returns a pointer to the modified mblk if allowed for forwarding,
146045916cd2Sjpk  * or NULL if the packet must be dropped.
146145916cd2Sjpk  */
146245916cd2Sjpk mblk_t *
146345916cd2Sjpk tsol_ip_forward(ire_t *ire, mblk_t *mp)
146445916cd2Sjpk {
146545916cd2Sjpk 	tsol_ire_gw_secattr_t *attrp = NULL;
146645916cd2Sjpk 	ipha_t		*ipha;
146745916cd2Sjpk 	ip6_t		*ip6h;
146845916cd2Sjpk 	const void	*pdst;
146945916cd2Sjpk 	const void	*psrc;
147045916cd2Sjpk 	boolean_t	off_link;
147145916cd2Sjpk 	tsol_tpc_t	*dst_rhtp, *gw_rhtp;
147245916cd2Sjpk 	tsol_ip_label_t label_type;
147345916cd2Sjpk 	uchar_t		*opt_ptr = NULL;
147445916cd2Sjpk 	ts_label_t	*tsl;
147545916cd2Sjpk 	uint8_t		proto;
147645916cd2Sjpk 	int		af, adjust;
147745916cd2Sjpk 	uint16_t	iplen;
1478c793af95Ssangeeta 	boolean_t	need_tpc_rele = B_FALSE;
1479c793af95Ssangeeta 	ipaddr_t	*gw;
1480f4b3ec61Sdh 	ip_stack_t	*ipst = ire->ire_ipst;
1481*de8c4a14SErik Nordmark 	cred_t		*credp;
148245916cd2Sjpk 
148345916cd2Sjpk 	ASSERT(ire != NULL && mp != NULL);
148445916cd2Sjpk 	ASSERT(ire->ire_stq != NULL);
148545916cd2Sjpk 
148645916cd2Sjpk 	af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6;
148745916cd2Sjpk 
148845916cd2Sjpk 	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
148945916cd2Sjpk 		ASSERT(ire->ire_ipversion == IPV4_VERSION);
149045916cd2Sjpk 		ipha = (ipha_t *)mp->b_rptr;
149145916cd2Sjpk 		psrc = &ipha->ipha_src;
149245916cd2Sjpk 		pdst = &ipha->ipha_dst;
149345916cd2Sjpk 		proto = ipha->ipha_protocol;
149445916cd2Sjpk 
1495c793af95Ssangeeta 		/*
1496c793af95Ssangeeta 		 * off_link is TRUE if destination not directly reachable.
1497c793af95Ssangeeta 		 * Surya note: we avoid creation of per-dst IRE_CACHE entries
1498c793af95Ssangeeta 		 * for forwarded packets, so we set off_link to be TRUE
1499c793af95Ssangeeta 		 * if the packet dst is different from the ire_addr of
1500c793af95Ssangeeta 		 * the ire for the nexthop.
1501c793af95Ssangeeta 		 */
1502c793af95Ssangeeta 		off_link = ((ipha->ipha_dst != ire->ire_addr) ||
1503c793af95Ssangeeta 		    (ire->ire_gateway_addr != INADDR_ANY));
150445916cd2Sjpk 	} else {
150545916cd2Sjpk 		ASSERT(ire->ire_ipversion == IPV6_VERSION);
150645916cd2Sjpk 		ip6h = (ip6_t *)mp->b_rptr;
150745916cd2Sjpk 		psrc = &ip6h->ip6_src;
150845916cd2Sjpk 		pdst = &ip6h->ip6_dst;
150945916cd2Sjpk 		proto = ip6h->ip6_nxt;
151045916cd2Sjpk 
151145916cd2Sjpk 		if (proto != IPPROTO_TCP && proto != IPPROTO_UDP &&
151245916cd2Sjpk 		    proto != IPPROTO_ICMPV6) {
151345916cd2Sjpk 			uint8_t *nexthdrp;
151445916cd2Sjpk 			uint16_t hdr_len;
151545916cd2Sjpk 
151645916cd2Sjpk 			if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len,
151745916cd2Sjpk 			    &nexthdrp)) {
151845916cd2Sjpk 				/* malformed packet; drop it */
151945916cd2Sjpk 				return (NULL);
152045916cd2Sjpk 			}
152145916cd2Sjpk 			proto = *nexthdrp;
152245916cd2Sjpk 		}
152345916cd2Sjpk 
152445916cd2Sjpk 		/* destination not directly reachable? */
152545916cd2Sjpk 		off_link = !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6);
152645916cd2Sjpk 	}
152745916cd2Sjpk 
1528*de8c4a14SErik Nordmark 	if ((tsl = msg_getlabel(mp)) == NULL)
152945916cd2Sjpk 		return (mp);
153045916cd2Sjpk 
153145916cd2Sjpk 	label_type = tsol_get_option(mp, &opt_ptr);
153245916cd2Sjpk 
153345916cd2Sjpk 	ASSERT(psrc != NULL && pdst != NULL);
153445916cd2Sjpk 	dst_rhtp = find_tpc(pdst, ire->ire_ipversion, B_FALSE);
153545916cd2Sjpk 
153645916cd2Sjpk 	if (dst_rhtp == NULL) {
153745916cd2Sjpk 		/*
153845916cd2Sjpk 		 * Without a template we do not know if forwarding
153945916cd2Sjpk 		 * violates MAC
154045916cd2Sjpk 		 */
154145916cd2Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__forward__nodst, char *,
154245916cd2Sjpk 		    "mp(1) dropped, no template for destination ip4|6(2)",
154345916cd2Sjpk 		    mblk_t *, mp, void *, pdst);
154445916cd2Sjpk 		return (NULL);
154545916cd2Sjpk 	}
154645916cd2Sjpk 
154745916cd2Sjpk 	/*
154845916cd2Sjpk 	 * Gateway template must have existed for off-link destinations,
154945916cd2Sjpk 	 * since tsol_ire_match_gwattr has ensured such condition.
155045916cd2Sjpk 	 */
1551c793af95Ssangeeta 	if (ire->ire_ipversion == IPV4_VERSION && off_link) {
1552c793af95Ssangeeta 		/*
1553c793af95Ssangeeta 		 * Surya note: first check if we can get the gw_rhtp from
1554c793af95Ssangeeta 		 * the ire_gw_secattr->igsa_rhc; if this is null, then
1555c793af95Ssangeeta 		 * do a lookup based on the ire_addr (address of gw)
1556c793af95Ssangeeta 		 */
1557c793af95Ssangeeta 		if (ire->ire_gw_secattr != NULL &&
1558c793af95Ssangeeta 		    ire->ire_gw_secattr->igsa_rhc != NULL) {
1559c793af95Ssangeeta 			attrp = ire->ire_gw_secattr;
1560c793af95Ssangeeta 			gw_rhtp = attrp->igsa_rhc->rhc_tpc;
1561c793af95Ssangeeta 		} else  {
1562c793af95Ssangeeta 			/*
1563c793af95Ssangeeta 			 * use the ire_addr if this is the IRE_CACHE of nexthop
1564c793af95Ssangeeta 			 */
1565c793af95Ssangeeta 			gw = (ire->ire_gateway_addr == NULL? &ire->ire_addr :
1566c793af95Ssangeeta 			    &ire->ire_gateway_addr);
1567c793af95Ssangeeta 			gw_rhtp = find_tpc(gw, ire->ire_ipversion, B_FALSE);
1568c793af95Ssangeeta 			need_tpc_rele = B_TRUE;
1569c793af95Ssangeeta 		}
1570c793af95Ssangeeta 		if (gw_rhtp == NULL) {
1571c793af95Ssangeeta 			DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *,
1572c793af95Ssangeeta 			    "mp(1) dropped, no gateway in ire attributes(2)",
1573c793af95Ssangeeta 			    mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp);
1574c793af95Ssangeeta 			mp = NULL;
1575c793af95Ssangeeta 			goto keep_label;
1576c793af95Ssangeeta 		}
1577c793af95Ssangeeta 	}
1578c793af95Ssangeeta 	if (ire->ire_ipversion == IPV6_VERSION &&
1579c793af95Ssangeeta 	    ((attrp = ire->ire_gw_secattr) == NULL || attrp->igsa_rhc == NULL ||
158045916cd2Sjpk 	    (gw_rhtp = attrp->igsa_rhc->rhc_tpc) == NULL) && off_link) {
158145916cd2Sjpk 		DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *,
158245916cd2Sjpk 		    "mp(1) dropped, no gateway in ire attributes(2)",
158345916cd2Sjpk 		    mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp);
158445916cd2Sjpk 		mp = NULL;
158545916cd2Sjpk 		goto keep_label;
158645916cd2Sjpk 	}
158745916cd2Sjpk 
158845916cd2Sjpk 	/*
158945916cd2Sjpk 	 * Check that the label for the packet is acceptable
159045916cd2Sjpk 	 * by destination host; otherwise, drop it.
159145916cd2Sjpk 	 */
159245916cd2Sjpk 	switch (dst_rhtp->tpc_tp.host_type) {
159345916cd2Sjpk 	case SUN_CIPSO:
159445916cd2Sjpk 		if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi ||
159545916cd2Sjpk 		    (!_blinrange(&tsl->tsl_label,
159645916cd2Sjpk 		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
159745916cd2Sjpk 		    !blinlset(&tsl->tsl_label,
159845916cd2Sjpk 		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
159945916cd2Sjpk 			DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *,
160045916cd2Sjpk 			    "labeled packet mp(1) dropped, label(2) fails "
160145916cd2Sjpk 			    "destination(3) accredation check",
160245916cd2Sjpk 			    mblk_t *, mp, ts_label_t *, tsl,
160345916cd2Sjpk 			    tsol_tpc_t *, dst_rhtp);
160445916cd2Sjpk 			mp = NULL;
160545916cd2Sjpk 			goto keep_label;
160645916cd2Sjpk 		}
160745916cd2Sjpk 		break;
160845916cd2Sjpk 
160945916cd2Sjpk 
161045916cd2Sjpk 	case UNLABELED:
161145916cd2Sjpk 		if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi ||
161245916cd2Sjpk 		    !blequal(&dst_rhtp->tpc_tp.tp_def_label,
161345916cd2Sjpk 		    &tsl->tsl_label)) {
161445916cd2Sjpk 			DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *,
161545916cd2Sjpk 			    "unlabeled packet mp(1) dropped, label(2) fails "
161645916cd2Sjpk 			    "destination(3) accredation check",
161745916cd2Sjpk 			    mblk_t *, mp, ts_label_t *, tsl,
161845916cd2Sjpk 			    tsol_tpc_t *, dst_rhtp);
161945916cd2Sjpk 			mp = NULL;
162045916cd2Sjpk 			goto keep_label;
162145916cd2Sjpk 		}
162245916cd2Sjpk 		break;
162345916cd2Sjpk 	}
162445916cd2Sjpk 	if (label_type == OPT_CIPSO) {
162545916cd2Sjpk 		/*
162645916cd2Sjpk 		 * We keep the label on any of the following cases:
162745916cd2Sjpk 		 *
162845916cd2Sjpk 		 *   1. The destination is labeled (on/off-link).
162945916cd2Sjpk 		 *   2. The unlabeled destination is off-link,
163045916cd2Sjpk 		 *	and the next hop gateway is labeled.
163145916cd2Sjpk 		 */
163245916cd2Sjpk 		if (dst_rhtp->tpc_tp.host_type != UNLABELED ||
163345916cd2Sjpk 		    (off_link &&
163445916cd2Sjpk 		    gw_rhtp->tpc_tp.host_type != UNLABELED))
163545916cd2Sjpk 			goto keep_label;
163645916cd2Sjpk 
163745916cd2Sjpk 		/*
163845916cd2Sjpk 		 * Strip off the CIPSO option from the packet because: the
163945916cd2Sjpk 		 * unlabeled destination host is directly reachable through
164045916cd2Sjpk 		 * an interface (on-link); or, the unlabeled destination host
164145916cd2Sjpk 		 * is not directly reachable (off-link), and the next hop
164245916cd2Sjpk 		 * gateway is unlabeled.
164345916cd2Sjpk 		 */
164445916cd2Sjpk 		adjust = (af == AF_INET) ? tsol_remove_secopt(ipha, MBLKL(mp)) :
164545916cd2Sjpk 		    tsol_remove_secopt_v6(ip6h, MBLKL(mp));
164645916cd2Sjpk 
164745916cd2Sjpk 		ASSERT(adjust <= 0);
164845916cd2Sjpk 		if (adjust != 0) {
164945916cd2Sjpk 
165045916cd2Sjpk 			/* adjust is negative */
165145916cd2Sjpk 			ASSERT((mp->b_wptr + adjust) >= mp->b_rptr);
165245916cd2Sjpk 			mp->b_wptr += adjust;
165345916cd2Sjpk 
165445916cd2Sjpk 			if (af == AF_INET) {
165545916cd2Sjpk 				ipha = (ipha_t *)mp->b_rptr;
165645916cd2Sjpk 				iplen = ntohs(ipha->ipha_length) + adjust;
165745916cd2Sjpk 				ipha->ipha_length = htons(iplen);
165845916cd2Sjpk 				ipha->ipha_hdr_checksum = 0;
165945916cd2Sjpk 				ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
166045916cd2Sjpk 			}
166145916cd2Sjpk 			DTRACE_PROBE3(tx__ip__log__info__forward__adjust,
166245916cd2Sjpk 			    char *,
166345916cd2Sjpk 			    "mp(1) adjusted(2) for CIPSO option removal",
166445916cd2Sjpk 			    mblk_t *, mp, int, adjust);
166545916cd2Sjpk 		}
166645916cd2Sjpk 		goto keep_label;
166745916cd2Sjpk 	}
166845916cd2Sjpk 
166945916cd2Sjpk 	ASSERT(label_type == OPT_NONE);
167045916cd2Sjpk 	ASSERT(dst_rhtp != NULL);
167145916cd2Sjpk 
167245916cd2Sjpk 	/*
167345916cd2Sjpk 	 * We need to add CIPSO option if the destination or the next hop
167445916cd2Sjpk 	 * gateway is labeled.  Otherwise, pass the packet as is.
167545916cd2Sjpk 	 */
167645916cd2Sjpk 	if (dst_rhtp->tpc_tp.host_type == UNLABELED &&
167745916cd2Sjpk 	    (!off_link || gw_rhtp->tpc_tp.host_type == UNLABELED))
167845916cd2Sjpk 		goto keep_label;
167945916cd2Sjpk 
1680*de8c4a14SErik Nordmark 
1681*de8c4a14SErik Nordmark 	credp = msg_getcred(mp, NULL);
168245916cd2Sjpk 	if ((af == AF_INET &&
1683*de8c4a14SErik Nordmark 	    tsol_check_label(credp, &mp, B_FALSE, ipst) != 0) ||
168445916cd2Sjpk 	    (af == AF_INET6 &&
1685*de8c4a14SErik Nordmark 	    tsol_check_label_v6(credp, &mp, B_FALSE, ipst) != 0)) {
168645916cd2Sjpk 		mp = NULL;
168745916cd2Sjpk 		goto keep_label;
168845916cd2Sjpk 	}
168945916cd2Sjpk 
1690222c5bceSkp 	if (af == AF_INET) {
1691222c5bceSkp 		ipha = (ipha_t *)mp->b_rptr;
1692222c5bceSkp 		ipha->ipha_hdr_checksum = 0;
1693222c5bceSkp 		ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
169445916cd2Sjpk 	}
169545916cd2Sjpk 
169645916cd2Sjpk keep_label:
169745916cd2Sjpk 	TPC_RELE(dst_rhtp);
1698c793af95Ssangeeta 	if (need_tpc_rele && gw_rhtp != NULL)
1699c793af95Ssangeeta 		TPC_RELE(gw_rhtp);
170045916cd2Sjpk 	return (mp);
170145916cd2Sjpk }
170245916cd2Sjpk 
17030ec92a15Swy /*
17040ec92a15Swy  * Name:	tsol_pmtu_adjust()
17050ec92a15Swy  *
17060ec92a15Swy  * Returns the adjusted mtu after removing security option.
17070ec92a15Swy  * Removes/subtracts the option if the packet's cred indicates an unlabeled
17080ec92a15Swy  * sender or if pkt_diff indicates this system enlarged the packet.
17090ec92a15Swy  */
17100ec92a15Swy uint32_t
17110ec92a15Swy tsol_pmtu_adjust(mblk_t *mp, uint32_t mtu, int pkt_diff, int af)
17120ec92a15Swy {
17130ec92a15Swy 	int		label_adj = 0;
17140ec92a15Swy 	uint32_t	min_mtu = IP_MIN_MTU;
17150ec92a15Swy 	tsol_tpc_t	*src_rhtp;
17160ec92a15Swy 	void		*src;
17170ec92a15Swy 
17180ec92a15Swy 	/*
17190ec92a15Swy 	 * Note: label_adj is non-positive, indicating the number of
17200ec92a15Swy 	 * bytes removed by removing the security option from the
17210ec92a15Swy 	 * header.
17220ec92a15Swy 	 */
17230ec92a15Swy 	if (af == AF_INET6) {
17240ec92a15Swy 		ip6_t	*ip6h;
17250ec92a15Swy 
17260ec92a15Swy 		min_mtu = IPV6_MIN_MTU;
17270ec92a15Swy 		ip6h = (ip6_t *)mp->b_rptr;
17280ec92a15Swy 		src = &ip6h->ip6_src;
17290ec92a15Swy 		if ((src_rhtp = find_tpc(src, IPV6_VERSION, B_FALSE)) == NULL)
17300ec92a15Swy 			return (mtu);
17310ec92a15Swy 		if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED) {
17320ec92a15Swy 			label_adj = tsol_remove_secopt_v6(
17330ec92a15Swy 			    (ip6_t *)mp->b_rptr, MBLKL(mp));
17340ec92a15Swy 		}
17350ec92a15Swy 	} else {
17360ec92a15Swy 		ipha_t    *ipha;
17370ec92a15Swy 
17380ec92a15Swy 		ASSERT(af == AF_INET);
17390ec92a15Swy 		ipha = (ipha_t *)mp->b_rptr;
17400ec92a15Swy 		src = &ipha->ipha_src;
17410ec92a15Swy 		if ((src_rhtp = find_tpc(src, IPV4_VERSION, B_FALSE)) == NULL)
17420ec92a15Swy 			return (mtu);
17430ec92a15Swy 		if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED)
17440ec92a15Swy 			label_adj = tsol_remove_secopt(
17450ec92a15Swy 			    (ipha_t *)mp->b_rptr, MBLKL(mp));
17460ec92a15Swy 	}
17470ec92a15Swy 	/*
17480ec92a15Swy 	 * Make pkt_diff non-negative and the larger of the bytes
17490ec92a15Swy 	 * previously added (if any) or just removed, since label
17500ec92a15Swy 	 * addition + subtraction may not be completely idempotent.
17510ec92a15Swy 	 */
17520ec92a15Swy 	if (pkt_diff < -label_adj)
17530ec92a15Swy 		pkt_diff = -label_adj;
17540ec92a15Swy 	if (pkt_diff > 0 && pkt_diff < mtu)
17550ec92a15Swy 		mtu -= pkt_diff;
17560ec92a15Swy 
17570ec92a15Swy 	TPC_RELE(src_rhtp);
17580ec92a15Swy 	return (MAX(mtu, min_mtu));
17590ec92a15Swy }
17600ec92a15Swy 
176145916cd2Sjpk /*
176245916cd2Sjpk  * Name:	tsol_rtsa_init()
176345916cd2Sjpk  *
176445916cd2Sjpk  * Normal:	Sanity checks on the route security attributes provided by
176545916cd2Sjpk  *		user.  Convert it into a route security parameter list to
176645916cd2Sjpk  *		be returned to caller.
176745916cd2Sjpk  *
176845916cd2Sjpk  * Output:	EINVAL if bad security attributes in the routing message
176945916cd2Sjpk  *		ENOMEM if unable to allocate data structures
177045916cd2Sjpk  *		0 otherwise.
177145916cd2Sjpk  *
177245916cd2Sjpk  * Note:	On input, cp must point to the end of any addresses in
177345916cd2Sjpk  *		the rt_msghdr_t structure.
177445916cd2Sjpk  */
177545916cd2Sjpk int
177645916cd2Sjpk tsol_rtsa_init(rt_msghdr_t *rtm, tsol_rtsecattr_t *sp, caddr_t cp)
177745916cd2Sjpk {
177845916cd2Sjpk 	uint_t	sacnt;
177945916cd2Sjpk 	int	err;
178045916cd2Sjpk 	caddr_t	lim;
178145916cd2Sjpk 	tsol_rtsecattr_t *tp;
178245916cd2Sjpk 
178345916cd2Sjpk 	ASSERT((cp >= (caddr_t)&rtm[1]) && sp != NULL);
178445916cd2Sjpk 
178545916cd2Sjpk 	/*
178645916cd2Sjpk 	 * In theory, we could accept as many security attributes configured
178745916cd2Sjpk 	 * per route destination.  However, the current design is limited
178845916cd2Sjpk 	 * such that at most only one set security attributes is allowed to
178945916cd2Sjpk 	 * be associated with a prefix IRE.  We therefore assert for now.
179045916cd2Sjpk 	 */
179145916cd2Sjpk 	/* LINTED */
179245916cd2Sjpk 	ASSERT(TSOL_RTSA_REQUEST_MAX == 1);
179345916cd2Sjpk 
179445916cd2Sjpk 	sp->rtsa_cnt = 0;
179545916cd2Sjpk 	lim = (caddr_t)rtm + rtm->rtm_msglen;
179645916cd2Sjpk 	ASSERT(cp <= lim);
179745916cd2Sjpk 
179845916cd2Sjpk 	if ((lim - cp) < sizeof (rtm_ext_t) ||
179945916cd2Sjpk 	    ((rtm_ext_t *)cp)->rtmex_type != RTMEX_GATEWAY_SECATTR)
180045916cd2Sjpk 		return (0);
180145916cd2Sjpk 
180245916cd2Sjpk 	if (((rtm_ext_t *)cp)->rtmex_len < sizeof (tsol_rtsecattr_t))
180345916cd2Sjpk 		return (EINVAL);
180445916cd2Sjpk 
180545916cd2Sjpk 	cp += sizeof (rtm_ext_t);
180645916cd2Sjpk 
180745916cd2Sjpk 	if ((lim - cp) < sizeof (*tp) ||
180845916cd2Sjpk 	    (tp = (tsol_rtsecattr_t *)cp, (sacnt = tp->rtsa_cnt) == 0) ||
180945916cd2Sjpk 	    (lim - cp) < TSOL_RTSECATTR_SIZE(sacnt))
181045916cd2Sjpk 		return (EINVAL);
181145916cd2Sjpk 
181245916cd2Sjpk 	/*
181345916cd2Sjpk 	 * Trying to add route security attributes when system
181445916cd2Sjpk 	 * labeling service is not available, or when user supllies
181545916cd2Sjpk 	 * more than the maximum number of security attributes
181645916cd2Sjpk 	 * allowed per request.
181745916cd2Sjpk 	 */
181845916cd2Sjpk 	if ((sacnt > 0 && !is_system_labeled()) ||
181945916cd2Sjpk 	    sacnt > TSOL_RTSA_REQUEST_MAX)
182045916cd2Sjpk 		return (EINVAL);
182145916cd2Sjpk 
182245916cd2Sjpk 	/* Ensure valid credentials */
182345916cd2Sjpk 	if ((err = rtsa_validate(&((tsol_rtsecattr_t *)cp)->
182445916cd2Sjpk 	    rtsa_attr[0])) != 0) {
182545916cd2Sjpk 		cp += sizeof (*sp);
182645916cd2Sjpk 		return (err);
182745916cd2Sjpk 	}
182845916cd2Sjpk 
182945916cd2Sjpk 	bcopy(cp, sp, sizeof (*sp));
183045916cd2Sjpk 	cp += sizeof (*sp);
183145916cd2Sjpk 	return (0);
183245916cd2Sjpk }
183345916cd2Sjpk 
183445916cd2Sjpk int
183545916cd2Sjpk tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc,
183645916cd2Sjpk     tsol_gcgrp_t *gcgrp)
183745916cd2Sjpk {
183845916cd2Sjpk 	tsol_ire_gw_secattr_t *attrp;
183945916cd2Sjpk 	boolean_t exists = B_FALSE;
184045916cd2Sjpk 	in_addr_t ga_addr4;
184145916cd2Sjpk 	void *paddr = NULL;
184245916cd2Sjpk 
184345916cd2Sjpk 	ASSERT(ire != NULL);
184445916cd2Sjpk 
184545916cd2Sjpk 	/*
184645916cd2Sjpk 	 * The only time that attrp can be NULL is when this routine is
184745916cd2Sjpk 	 * called for the first time during the creation/initialization
184845916cd2Sjpk 	 * of the corresponding IRE.  It will only get cleared when the
184945916cd2Sjpk 	 * IRE is deleted.
185045916cd2Sjpk 	 */
185145916cd2Sjpk 	if ((attrp = ire->ire_gw_secattr) == NULL) {
185245916cd2Sjpk 		attrp = ire_gw_secattr_alloc(KM_NOSLEEP);
185345916cd2Sjpk 		if (attrp == NULL)
185445916cd2Sjpk 			return (ENOMEM);
185545916cd2Sjpk 		ire->ire_gw_secattr = attrp;
185645916cd2Sjpk 	} else {
185745916cd2Sjpk 		exists = B_TRUE;
185845916cd2Sjpk 		mutex_enter(&attrp->igsa_lock);
185945916cd2Sjpk 
186045916cd2Sjpk 		if (attrp->igsa_rhc != NULL) {
186145916cd2Sjpk 			TNRHC_RELE(attrp->igsa_rhc);
186245916cd2Sjpk 			attrp->igsa_rhc = NULL;
186345916cd2Sjpk 		}
186445916cd2Sjpk 
186545916cd2Sjpk 		if (attrp->igsa_gc != NULL)
186645916cd2Sjpk 			GC_REFRELE(attrp->igsa_gc);
186745916cd2Sjpk 		if (attrp->igsa_gcgrp != NULL)
186845916cd2Sjpk 			GCGRP_REFRELE(attrp->igsa_gcgrp);
186945916cd2Sjpk 	}
187045916cd2Sjpk 	ASSERT(!exists || MUTEX_HELD(&attrp->igsa_lock));
187145916cd2Sjpk 
187245916cd2Sjpk 	/*
187345916cd2Sjpk 	 * References already held by caller and we keep them;
187445916cd2Sjpk 	 * note that both gc and gcgrp may be set to NULL to
187545916cd2Sjpk 	 * clear out igsa_gc and igsa_gcgrp, respectively.
187645916cd2Sjpk 	 */
187745916cd2Sjpk 	attrp->igsa_gc = gc;
187845916cd2Sjpk 	attrp->igsa_gcgrp = gcgrp;
187945916cd2Sjpk 
188045916cd2Sjpk 	if (gcgrp == NULL && gc != NULL) {
188145916cd2Sjpk 		gcgrp = gc->gc_grp;
188245916cd2Sjpk 		ASSERT(gcgrp != NULL);
188345916cd2Sjpk 	}
188445916cd2Sjpk 
188545916cd2Sjpk 	/*
188645916cd2Sjpk 	 * Intialize the template for gateway; we use the gateway's
188745916cd2Sjpk 	 * address found in either the passed in gateway credential
188845916cd2Sjpk 	 * or group pointer, or the ire_gateway_addr{_v6} field.
188945916cd2Sjpk 	 */
189045916cd2Sjpk 	if (gcgrp != NULL) {
189145916cd2Sjpk 		tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr;
189245916cd2Sjpk 
189345916cd2Sjpk 		/*
189445916cd2Sjpk 		 * Caller is holding a reference, and that we don't
189545916cd2Sjpk 		 * need to hold any lock to access the address.
189645916cd2Sjpk 		 */
189745916cd2Sjpk 		if (ipversion == IPV4_VERSION) {
189845916cd2Sjpk 			ASSERT(ga->ga_af == AF_INET);
189945916cd2Sjpk 			IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4);
190045916cd2Sjpk 			paddr = &ga_addr4;
190145916cd2Sjpk 		} else {
190245916cd2Sjpk 			ASSERT(ga->ga_af == AF_INET6);
190345916cd2Sjpk 			paddr = &ga->ga_addr;
190445916cd2Sjpk 		}
190545916cd2Sjpk 	} else if (ipversion == IPV6_VERSION &&
190645916cd2Sjpk 	    !IN6_IS_ADDR_UNSPECIFIED(&ire->ire_gateway_addr_v6)) {
190745916cd2Sjpk 		paddr = &ire->ire_gateway_addr_v6;
190845916cd2Sjpk 	} else if (ipversion == IPV4_VERSION &&
190945916cd2Sjpk 	    ire->ire_gateway_addr != INADDR_ANY) {
191045916cd2Sjpk 		paddr = &ire->ire_gateway_addr;
191145916cd2Sjpk 	}
191245916cd2Sjpk 
191345916cd2Sjpk 	/*
191445916cd2Sjpk 	 * Lookup the gateway template; note that we could get an internal
191545916cd2Sjpk 	 * template here, which we cache anyway.  During IRE matching, we'll
191645916cd2Sjpk 	 * try to update this gateway template cache and hopefully get a
191745916cd2Sjpk 	 * real one.
191845916cd2Sjpk 	 */
191945916cd2Sjpk 	if (paddr != NULL) {
1920bfabfc35Skp 		attrp->igsa_rhc = find_rhc(paddr, ipversion, B_FALSE);
192145916cd2Sjpk 	}
192245916cd2Sjpk 
192345916cd2Sjpk 	if (exists)
192445916cd2Sjpk 		mutex_exit(&attrp->igsa_lock);
192545916cd2Sjpk 
192645916cd2Sjpk 	return (0);
192745916cd2Sjpk }
192845916cd2Sjpk 
192945916cd2Sjpk /*
193045916cd2Sjpk  * This function figures the type of MLP that we'll be using based on the
193145916cd2Sjpk  * address that the user is binding and the zone.  If the address is
193245916cd2Sjpk  * unspecified, then we're looking at both private and shared.  If it's one
193345916cd2Sjpk  * of the zone's private addresses, then it's private only.  If it's one
193445916cd2Sjpk  * of the global addresses, then it's shared only.
193545916cd2Sjpk  *
193645916cd2Sjpk  * If we can't figure out what it is, then return mlptSingle.  That's actually
193745916cd2Sjpk  * an error case.
1938f4b3ec61Sdh  *
1939f4b3ec61Sdh  * The callers are assume to pass in zone->zone_id and not the zoneid that
1940f4b3ec61Sdh  * is stored in a conn_t (since the latter will be GLOBAL_ZONEID in an
1941f4b3ec61Sdh  * exclusive stack zone).
194245916cd2Sjpk  */
194345916cd2Sjpk mlp_type_t
1944f4b3ec61Sdh tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr,
1945f4b3ec61Sdh     ip_stack_t *ipst)
194645916cd2Sjpk {
194745916cd2Sjpk 	in_addr_t in4;
194845916cd2Sjpk 	ire_t *ire;
194945916cd2Sjpk 	ipif_t *ipif;
195045916cd2Sjpk 	zoneid_t addrzone;
1951f4b3ec61Sdh 	zoneid_t ip_zoneid;
195245916cd2Sjpk 
195345916cd2Sjpk 	ASSERT(addr != NULL);
195445916cd2Sjpk 
1955f4b3ec61Sdh 	/*
1956f4b3ec61Sdh 	 * For exclusive stacks we set the zoneid to zero
1957f4b3ec61Sdh 	 * to operate as if in the global zone for IRE and conn_t comparisons.
1958f4b3ec61Sdh 	 */
1959f4b3ec61Sdh 	if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
1960f4b3ec61Sdh 		ip_zoneid = GLOBAL_ZONEID;
1961f4b3ec61Sdh 	else
1962f4b3ec61Sdh 		ip_zoneid = zoneid;
1963f4b3ec61Sdh 
196445916cd2Sjpk 	if (version == IPV6_VERSION &&
196545916cd2Sjpk 	    IN6_IS_ADDR_V4MAPPED((const in6_addr_t *)addr)) {
196645916cd2Sjpk 		IN6_V4MAPPED_TO_IPADDR((const in6_addr_t *)addr, in4);
196745916cd2Sjpk 		addr = &in4;
196845916cd2Sjpk 		version = IPV4_VERSION;
196945916cd2Sjpk 	}
197045916cd2Sjpk 
197145916cd2Sjpk 	if (version == IPV4_VERSION) {
197245916cd2Sjpk 		in4 = *(const in_addr_t *)addr;
1973f4b3ec61Sdh 		if (in4 == INADDR_ANY) {
197445916cd2Sjpk 			return (mlptBoth);
1975f4b3ec61Sdh 		}
1976f4b3ec61Sdh 		ire = ire_cache_lookup(in4, ip_zoneid, NULL, ipst);
197745916cd2Sjpk 	} else {
1978f4b3ec61Sdh 		if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr)) {
197945916cd2Sjpk 			return (mlptBoth);
1980f4b3ec61Sdh 		}
1981f4b3ec61Sdh 		ire = ire_cache_lookup_v6(addr, ip_zoneid, NULL, ipst);
198245916cd2Sjpk 	}
198345916cd2Sjpk 	/*
198445916cd2Sjpk 	 * If we can't find the IRE, then we have to behave exactly like
198545916cd2Sjpk 	 * ip_bind_laddr{,_v6}.  That means looking up the IPIF so that users
198645916cd2Sjpk 	 * can bind to addresses on "down" interfaces.
198745916cd2Sjpk 	 *
198845916cd2Sjpk 	 * If we can't find that either, then the bind is going to fail, so
198945916cd2Sjpk 	 * just give up.  Note that there's a miniscule chance that the address
199045916cd2Sjpk 	 * is in transition, but we don't bother handling that.
199145916cd2Sjpk 	 */
199245916cd2Sjpk 	if (ire == NULL) {
199345916cd2Sjpk 		if (version == IPV4_VERSION)
199445916cd2Sjpk 			ipif = ipif_lookup_addr(*(const in_addr_t *)addr, NULL,
1995f4b3ec61Sdh 			    ip_zoneid, NULL, NULL, NULL, NULL, ipst);
199645916cd2Sjpk 		else
199745916cd2Sjpk 			ipif = ipif_lookup_addr_v6((const in6_addr_t *)addr,
1998f4b3ec61Sdh 			    NULL, ip_zoneid, NULL, NULL, NULL, NULL, ipst);
1999f4b3ec61Sdh 		if (ipif == NULL) {
200045916cd2Sjpk 			return (mlptSingle);
2001f4b3ec61Sdh 		}
200245916cd2Sjpk 		addrzone = ipif->ipif_zoneid;
200345916cd2Sjpk 		ipif_refrele(ipif);
200445916cd2Sjpk 	} else {
200545916cd2Sjpk 		addrzone = ire->ire_zoneid;
200645916cd2Sjpk 		ire_refrele(ire);
200745916cd2Sjpk 	}
200845916cd2Sjpk 	return (addrzone == ALL_ZONES ? mlptShared : mlptPrivate);
200945916cd2Sjpk }
201045916cd2Sjpk 
201145916cd2Sjpk /*
201245916cd2Sjpk  * Since we are configuring local interfaces, and we know trusted
201345916cd2Sjpk  * extension CDE requires local interfaces to be cipso host type in
201445916cd2Sjpk  * order to function correctly, we'll associate a cipso template
201545916cd2Sjpk  * to each local interface and let the interface come up.  Configuring
201645916cd2Sjpk  * a local interface to be "unlabeled" host type is a configuration error.
201745916cd2Sjpk  * We'll override that error and make the interface host type to be cipso
201845916cd2Sjpk  * here.
201945916cd2Sjpk  *
202045916cd2Sjpk  * The code is optimized for the usual "success" case and unwinds things on
202145916cd2Sjpk  * error.  We don't want to go to the trouble and expense of formatting the
202245916cd2Sjpk  * interface name for the usual case where everything is configured correctly.
202345916cd2Sjpk  */
202445916cd2Sjpk boolean_t
202545916cd2Sjpk tsol_check_interface_address(const ipif_t *ipif)
202645916cd2Sjpk {
202745916cd2Sjpk 	tsol_tpc_t *tp;
202845916cd2Sjpk 	char addrbuf[INET6_ADDRSTRLEN];
202945916cd2Sjpk 	int af;
203045916cd2Sjpk 	const void *addr;
203145916cd2Sjpk 	zone_t *zone;
203245916cd2Sjpk 	ts_label_t *plabel;
203345916cd2Sjpk 	const bslabel_t *label;
203445916cd2Sjpk 	char ifbuf[LIFNAMSIZ + 10];
203545916cd2Sjpk 	const char *ifname;
203645916cd2Sjpk 	boolean_t retval;
203745916cd2Sjpk 	tsol_rhent_t rhent;
2038f4b3ec61Sdh 	netstack_t *ns = ipif->ipif_ill->ill_ipst->ips_netstack;
203945916cd2Sjpk 
204045916cd2Sjpk 	if (IN6_IS_ADDR_V4MAPPED(&ipif->ipif_v6lcl_addr)) {
204145916cd2Sjpk 		af = AF_INET;
204245916cd2Sjpk 		addr = &V4_PART_OF_V6(ipif->ipif_v6lcl_addr);
204345916cd2Sjpk 	} else {
204445916cd2Sjpk 		af = AF_INET6;
204545916cd2Sjpk 		addr = &ipif->ipif_v6lcl_addr;
204645916cd2Sjpk 	}
204745916cd2Sjpk 
204845916cd2Sjpk 	tp = find_tpc(&ipif->ipif_v6lcl_addr, IPV6_VERSION, B_FALSE);
2049f4b3ec61Sdh 
2050f4b3ec61Sdh 	/* assumes that ALL_ZONES implies that there is no exclusive stack */
2051f4b3ec61Sdh 	if (ipif->ipif_zoneid == ALL_ZONES) {
2052f4b3ec61Sdh 		zone = NULL;
2053f4b3ec61Sdh 	} else if (ns->netstack_stackid == GLOBAL_NETSTACKID) {
2054f4b3ec61Sdh 		/* Shared stack case */
2055f4b3ec61Sdh 		zone = zone_find_by_id(ipif->ipif_zoneid);
2056f4b3ec61Sdh 	} else {
2057f4b3ec61Sdh 		/* Exclusive stack case */
2058f4b3ec61Sdh 		zone = zone_find_by_id(crgetzoneid(ipif->ipif_ill->ill_credp));
2059f4b3ec61Sdh 	}
206045916cd2Sjpk 	if (zone != NULL) {
206145916cd2Sjpk 		plabel = zone->zone_slabel;
206245916cd2Sjpk 		ASSERT(plabel != NULL);
206345916cd2Sjpk 		label = label2bslabel(plabel);
206445916cd2Sjpk 	}
206545916cd2Sjpk 
206645916cd2Sjpk 	/*
206745916cd2Sjpk 	 * If it's CIPSO and an all-zones address, then we're done.
206845916cd2Sjpk 	 * If it's a CIPSO zone specific address, the zone's label
206945916cd2Sjpk 	 * must be in the range or set specified in the template.
207045916cd2Sjpk 	 * When the remote host entry is missing or the template
207145916cd2Sjpk 	 * type is incorrect for this interface, we create a
207245916cd2Sjpk 	 * CIPSO host entry in kernel and allow the interface to be
207345916cd2Sjpk 	 * brought up as CIPSO type.
207445916cd2Sjpk 	 */
207545916cd2Sjpk 	if (tp != NULL && (
207645916cd2Sjpk 	    /* The all-zones case */
207745916cd2Sjpk 	    (tp->tpc_tp.host_type == SUN_CIPSO &&
207845916cd2Sjpk 	    tp->tpc_tp.tp_doi == default_doi &&
207945916cd2Sjpk 	    ipif->ipif_zoneid == ALL_ZONES) ||
208045916cd2Sjpk 	    /* The local-zone case */
208145916cd2Sjpk 	    (zone != NULL && plabel->tsl_doi == tp->tpc_tp.tp_doi &&
208245916cd2Sjpk 	    ((tp->tpc_tp.host_type == SUN_CIPSO &&
208345916cd2Sjpk 	    (_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) ||
208445916cd2Sjpk 	    blinlset(label, tp->tpc_tp.tp_sl_set_cipso))))))) {
208545916cd2Sjpk 		if (zone != NULL)
208645916cd2Sjpk 			zone_rele(zone);
208745916cd2Sjpk 		TPC_RELE(tp);
208845916cd2Sjpk 		return (B_TRUE);
208945916cd2Sjpk 	}
209045916cd2Sjpk 
209145916cd2Sjpk 	ifname = ipif->ipif_ill->ill_name;
209245916cd2Sjpk 	if (ipif->ipif_id != 0) {
209345916cd2Sjpk 		(void) snprintf(ifbuf, sizeof (ifbuf), "%s:%u", ifname,
209445916cd2Sjpk 		    ipif->ipif_id);
209545916cd2Sjpk 		ifname = ifbuf;
209645916cd2Sjpk 	}
209745916cd2Sjpk 	(void) inet_ntop(af, addr, addrbuf, sizeof (addrbuf));
209845916cd2Sjpk 
209945916cd2Sjpk 	if (tp == NULL) {
210045916cd2Sjpk 		cmn_err(CE_NOTE, "template entry for %s missing. Default to "
210145916cd2Sjpk 		    "CIPSO type for %s", ifname, addrbuf);
210245916cd2Sjpk 		retval = B_TRUE;
210345916cd2Sjpk 	} else if (tp->tpc_tp.host_type == UNLABELED) {
210445916cd2Sjpk 		cmn_err(CE_NOTE, "template type for %s incorrectly configured. "
210545916cd2Sjpk 		    "Change to CIPSO type for %s", ifname, addrbuf);
210645916cd2Sjpk 		retval = B_TRUE;
210745916cd2Sjpk 	} else if (ipif->ipif_zoneid == ALL_ZONES) {
210845916cd2Sjpk 		if (tp->tpc_tp.host_type != SUN_CIPSO) {
210945916cd2Sjpk 			cmn_err(CE_NOTE, "%s failed: %s isn't set to CIPSO for "
211045916cd2Sjpk 			    "all-zones. Converted to CIPSO.", ifname, addrbuf);
211145916cd2Sjpk 			retval = B_TRUE;
211245916cd2Sjpk 		} else {
211345916cd2Sjpk 			cmn_err(CE_NOTE, "%s failed: %s has wrong DOI %d "
211445916cd2Sjpk 			    "instead of %d", ifname, addrbuf,
211545916cd2Sjpk 			    tp->tpc_tp.tp_doi, default_doi);
211645916cd2Sjpk 			retval = B_FALSE;
211745916cd2Sjpk 		}
211845916cd2Sjpk 	} else if (zone == NULL) {
211945916cd2Sjpk 		cmn_err(CE_NOTE, "%s failed: zoneid %d unknown",
212045916cd2Sjpk 		    ifname, ipif->ipif_zoneid);
212145916cd2Sjpk 		retval = B_FALSE;
212245916cd2Sjpk 	} else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
212345916cd2Sjpk 		cmn_err(CE_NOTE, "%s failed: zone %s has DOI %d but %s has "
212445916cd2Sjpk 		    "DOI %d", ifname, zone->zone_name, plabel->tsl_doi,
212545916cd2Sjpk 		    addrbuf, tp->tpc_tp.tp_doi);
212645916cd2Sjpk 		retval = B_FALSE;
212745916cd2Sjpk 	} else {
212845916cd2Sjpk 		cmn_err(CE_NOTE, "%s failed: zone %s label incompatible with "
212945916cd2Sjpk 		    "%s", ifname, zone->zone_name, addrbuf);
213045916cd2Sjpk 		tsol_print_label(label, "zone label");
213145916cd2Sjpk 		retval = B_FALSE;
213245916cd2Sjpk 	}
213345916cd2Sjpk 
213445916cd2Sjpk 	if (zone != NULL)
213545916cd2Sjpk 		zone_rele(zone);
213645916cd2Sjpk 	if (tp != NULL)
213745916cd2Sjpk 		TPC_RELE(tp);
213845916cd2Sjpk 	if (retval) {
213945916cd2Sjpk 		/*
214045916cd2Sjpk 		 * we've corrected a config error and let the interface
214145916cd2Sjpk 		 * come up as cipso. Need to insert an rhent.
214245916cd2Sjpk 		 */
214345916cd2Sjpk 		if ((rhent.rh_address.ta_family = af) == AF_INET) {
214445916cd2Sjpk 			rhent.rh_prefix = 32;
214545916cd2Sjpk 			rhent.rh_address.ta_addr_v4 = *(struct in_addr *)addr;
214645916cd2Sjpk 		} else {
214745916cd2Sjpk 			rhent.rh_prefix = 128;
214845916cd2Sjpk 			rhent.rh_address.ta_addr_v6 = *(in6_addr_t *)addr;
214945916cd2Sjpk 		}
215045916cd2Sjpk 		(void) strcpy(rhent.rh_template, "cipso");
215145916cd2Sjpk 		if (tnrh_load(&rhent) != 0) {
215245916cd2Sjpk 			cmn_err(CE_NOTE, "%s failed: Cannot insert CIPSO "
215345916cd2Sjpk 			    "template for local addr %s", ifname, addrbuf);
215445916cd2Sjpk 			retval = B_FALSE;
215545916cd2Sjpk 		}
215645916cd2Sjpk 	}
215745916cd2Sjpk 	return (retval);
215845916cd2Sjpk }
2159