xref: /illumos-gate/usr/src/uts/common/inet/ip/tnet.c (revision ab82c29b)
145916cd2Sjpk /*
245916cd2Sjpk  * CDDL HEADER START
345916cd2Sjpk  *
445916cd2Sjpk  * The contents of this file are subject to the terms of the
545916cd2Sjpk  * Common Development and Distribution License (the "License").
645916cd2Sjpk  * You may not use this file except in compliance with the License.
745916cd2Sjpk  *
845916cd2Sjpk  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
945916cd2Sjpk  * or http://www.opensolaris.org/os/licensing.
1045916cd2Sjpk  * See the License for the specific language governing permissions
1145916cd2Sjpk  * and limitations under the License.
1245916cd2Sjpk  *
1345916cd2Sjpk  * When distributing Covered Code, include this CDDL HEADER in each
1445916cd2Sjpk  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1545916cd2Sjpk  * If applicable, add the following below this CDDL HEADER, with the
1645916cd2Sjpk  * fields enclosed by brackets "[]" replaced with your own identifying
1745916cd2Sjpk  * information: Portions Copyright [yyyy] [name of copyright owner]
1845916cd2Sjpk  *
1945916cd2Sjpk  * CDDL HEADER END
2045916cd2Sjpk  */
2145916cd2Sjpk /*
22de8c4a14SErik Nordmark  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
2345916cd2Sjpk  * Use is subject to license terms.
2445916cd2Sjpk  */
2545916cd2Sjpk 
2645916cd2Sjpk #include <sys/types.h>
2745916cd2Sjpk #include <sys/stream.h>
2845916cd2Sjpk #include <sys/strsubr.h>
2945916cd2Sjpk #include <sys/stropts.h>
3045916cd2Sjpk #include <sys/sunddi.h>
3145916cd2Sjpk #include <sys/cred.h>
3245916cd2Sjpk #include <sys/debug.h>
3345916cd2Sjpk #include <sys/kmem.h>
3445916cd2Sjpk #include <sys/errno.h>
3545916cd2Sjpk #include <sys/disp.h>
3645916cd2Sjpk #include <netinet/in.h>
3745916cd2Sjpk #include <netinet/in_systm.h>
3845916cd2Sjpk #include <netinet/ip.h>
3945916cd2Sjpk #include <netinet/ip_icmp.h>
4045916cd2Sjpk #include <netinet/tcp.h>
4145916cd2Sjpk #include <inet/common.h>
4245916cd2Sjpk #include <inet/ipclassifier.h>
4345916cd2Sjpk #include <inet/ip.h>
4445916cd2Sjpk #include <inet/mib2.h>
4545916cd2Sjpk #include <inet/nd.h>
4645916cd2Sjpk #include <inet/tcp.h>
4745916cd2Sjpk #include <inet/ip_rts.h>
4845916cd2Sjpk #include <inet/ip_ire.h>
4945916cd2Sjpk #include <inet/ip_if.h>
5045916cd2Sjpk #include <sys/modhash.h>
5145916cd2Sjpk 
5245916cd2Sjpk #include <sys/tsol/label.h>
5345916cd2Sjpk #include <sys/tsol/label_macro.h>
5445916cd2Sjpk #include <sys/tsol/tnet.h>
5545916cd2Sjpk #include <sys/tsol/tndb.h>
5645916cd2Sjpk #include <sys/strsun.h>
5745916cd2Sjpk 
5845916cd2Sjpk /* tunable for strict error-reply behavior (TCP RST and ICMP Unreachable) */
5945916cd2Sjpk int tsol_strict_error;
6045916cd2Sjpk 
6145916cd2Sjpk /*
6245916cd2Sjpk  * Some notes on the Trusted Solaris IRE gateway security attributes:
6345916cd2Sjpk  *
6445916cd2Sjpk  * When running in Trusted mode, the routing subsystem determines whether or
6545916cd2Sjpk  * not a packet can be delivered to an off-link host (not directly reachable
6645916cd2Sjpk  * through an interface) based on the accreditation checks of the packet's
6745916cd2Sjpk  * security attributes against those associated with the next-hop gateway.
6845916cd2Sjpk  *
6945916cd2Sjpk  * The next-hop gateway's security attributes can be derived from two sources
7045916cd2Sjpk  * (in order of preference): route-related and the host database.  A Trusted
7145916cd2Sjpk  * system must be configured with at least the host database containing an
7245916cd2Sjpk  * entry for the next-hop gateway, or otherwise no accreditation checks can
7345916cd2Sjpk  * be performed, which may result in the inability to send packets to any
7445916cd2Sjpk  * off-link destination host.
7545916cd2Sjpk  *
7645916cd2Sjpk  * The major differences between the two sources are the number and type of
7745916cd2Sjpk  * security attributes used for accreditation checks.  A host database entry
7845916cd2Sjpk  * can contain at most one set of security attributes, specific only to the
7945916cd2Sjpk  * next-hop gateway.  On contrast, route-related security attributes are made
8045916cd2Sjpk  * up of a collection of security attributes for the distant networks, and
8145916cd2Sjpk  * are grouped together per next-hop gateway used to reach those networks.
8245916cd2Sjpk  * This is the preferred method, and the routing subsystem will fallback to
8345916cd2Sjpk  * the host database entry only if there are no route-related attributes
8445916cd2Sjpk  * associated with the next-hop gateway.
8545916cd2Sjpk  *
8645916cd2Sjpk  * In Trusted mode, all of the IRE entries (except LOCAL/LOOPBACK/BROADCAST/
8745916cd2Sjpk  * INTERFACE type) are initialized to contain a placeholder to store this
8845916cd2Sjpk  * information.  The ire_gw_secattr structure gets allocated, initialized
8945916cd2Sjpk  * and associated with the IRE during the time of the IRE creation.  The
9045916cd2Sjpk  * initialization process also includes resolving the host database entry
9145916cd2Sjpk  * of the next-hop gateway for fallback purposes.  It does not include any
9245916cd2Sjpk  * route-related attribute setup, as that process comes separately as part
9345916cd2Sjpk  * of the route requests (add/change) made to the routing subsystem.
9445916cd2Sjpk  *
9545916cd2Sjpk  * The underlying logic which involves associating IREs with the gateway
9645916cd2Sjpk  * security attributes are represented by the following data structures:
9745916cd2Sjpk  *
9845916cd2Sjpk  * tsol_gcdb_t, or "gcdb"
9945916cd2Sjpk  *
10045916cd2Sjpk  *	- This is a system-wide collection of records containing the
10145916cd2Sjpk  *	  currently used route-related security attributes, which are fed
10245916cd2Sjpk  *	  through the routing socket interface, e.g. "route add/change".
10345916cd2Sjpk  *
10445916cd2Sjpk  * tsol_gc_t, or "gc"
10545916cd2Sjpk  *
10645916cd2Sjpk  *	- This is the gateway credential structure, and it provides for the
10745916cd2Sjpk  *	  only mechanism to access the contents of gcdb.  More than one gc
10845916cd2Sjpk  *	  entries may refer to the same gcdb record.  gc's in the system are
10945916cd2Sjpk  *	  grouped according to the next-hop gateway address.
11045916cd2Sjpk  *
11145916cd2Sjpk  * tsol_gcgrp_t, or "gcgrp"
11245916cd2Sjpk  *
11345916cd2Sjpk  *	- Group of gateway credentials, and is unique per next-hop gateway
11445916cd2Sjpk  *	  address.  When the group is not empty, i.e. when gcgrp_count is
11545916cd2Sjpk  *	  greater than zero, it contains one or more gc's, each pointing to
11645916cd2Sjpk  *	  a gcdb record which indicates the gateway security attributes
11745916cd2Sjpk  *	  associated with the next-hop gateway.
11845916cd2Sjpk  *
11945916cd2Sjpk  * The fields of the tsol_ire_gw_secattr_t used from within the IRE are:
12045916cd2Sjpk  *
12145916cd2Sjpk  * igsa_lock
12245916cd2Sjpk  *
12345916cd2Sjpk  *	- Lock that protects all fields within tsol_ire_gw_secattr_t.
12445916cd2Sjpk  *
12545916cd2Sjpk  * igsa_rhc
12645916cd2Sjpk  *
12745916cd2Sjpk  *	- Remote host cache database entry of next-hop gateway.  This is
12845916cd2Sjpk  *	  used in the case when there are no route-related attributes
12945916cd2Sjpk  *	  configured for the IRE.
13045916cd2Sjpk  *
13145916cd2Sjpk  * igsa_gc
13245916cd2Sjpk  *
13345916cd2Sjpk  *	- A set of route-related attributes that only get set for prefix
13445916cd2Sjpk  *	  IREs.  If this is non-NULL, the prefix IRE has been associated
13545916cd2Sjpk  *	  with a set of gateway security attributes by way of route add/
136bd670b35SErik Nordmark  *	  change functionality.
13745916cd2Sjpk  */
13845916cd2Sjpk 
13945916cd2Sjpk static kmem_cache_t *ire_gw_secattr_cache;
14045916cd2Sjpk 
14145916cd2Sjpk #define	GCDB_HASH_SIZE	101
14245916cd2Sjpk #define	GCGRP_HASH_SIZE	101
14345916cd2Sjpk 
14445916cd2Sjpk #define	GCDB_REFRELE(p) {		\
14545916cd2Sjpk 	mutex_enter(&gcdb_lock);	\
14645916cd2Sjpk 	ASSERT((p)->gcdb_refcnt > 0);	\
14745916cd2Sjpk 	if (--((p)->gcdb_refcnt) == 0)	\
14845916cd2Sjpk 		gcdb_inactive(p);	\
14945916cd2Sjpk 	ASSERT(MUTEX_HELD(&gcdb_lock));	\
15045916cd2Sjpk 	mutex_exit(&gcdb_lock);		\
15145916cd2Sjpk }
15245916cd2Sjpk 
15345916cd2Sjpk static int gcdb_hash_size = GCDB_HASH_SIZE;
15445916cd2Sjpk static int gcgrp_hash_size = GCGRP_HASH_SIZE;
15545916cd2Sjpk static mod_hash_t *gcdb_hash;
15645916cd2Sjpk static mod_hash_t *gcgrp4_hash;
15745916cd2Sjpk static mod_hash_t *gcgrp6_hash;
15845916cd2Sjpk 
15945916cd2Sjpk static kmutex_t gcdb_lock;
16045916cd2Sjpk kmutex_t gcgrp_lock;
16145916cd2Sjpk 
16245916cd2Sjpk static uint_t gcdb_hash_by_secattr(void *, mod_hash_key_t);
16345916cd2Sjpk static int gcdb_hash_cmp(mod_hash_key_t, mod_hash_key_t);
16445916cd2Sjpk static tsol_gcdb_t *gcdb_lookup(struct rtsa_s *, boolean_t);
16545916cd2Sjpk static void gcdb_inactive(tsol_gcdb_t *);
16645916cd2Sjpk 
16745916cd2Sjpk static uint_t gcgrp_hash_by_addr(void *, mod_hash_key_t);
16845916cd2Sjpk static int gcgrp_hash_cmp(mod_hash_key_t, mod_hash_key_t);
16945916cd2Sjpk 
17045916cd2Sjpk static int ire_gw_secattr_constructor(void *, void *, int);
17145916cd2Sjpk static void ire_gw_secattr_destructor(void *, void *);
17245916cd2Sjpk 
17345916cd2Sjpk void
tnet_init(void)17445916cd2Sjpk tnet_init(void)
17545916cd2Sjpk {
17645916cd2Sjpk 	ire_gw_secattr_cache = kmem_cache_create("ire_gw_secattr_cache",
17745916cd2Sjpk 	    sizeof (tsol_ire_gw_secattr_t), 64, ire_gw_secattr_constructor,
17845916cd2Sjpk 	    ire_gw_secattr_destructor, NULL, NULL, NULL, 0);
17945916cd2Sjpk 
18045916cd2Sjpk 	gcdb_hash = mod_hash_create_extended("gcdb_hash",
18145916cd2Sjpk 	    gcdb_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
18245916cd2Sjpk 	    gcdb_hash_by_secattr, NULL, gcdb_hash_cmp, KM_SLEEP);
18345916cd2Sjpk 
18445916cd2Sjpk 	gcgrp4_hash = mod_hash_create_extended("gcgrp4_hash",
18545916cd2Sjpk 	    gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
18645916cd2Sjpk 	    gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
18745916cd2Sjpk 
18845916cd2Sjpk 	gcgrp6_hash = mod_hash_create_extended("gcgrp6_hash",
18945916cd2Sjpk 	    gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
19045916cd2Sjpk 	    gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
19145916cd2Sjpk 
19245916cd2Sjpk 	mutex_init(&gcdb_lock, NULL, MUTEX_DEFAULT, NULL);
19345916cd2Sjpk 	mutex_init(&gcgrp_lock, NULL, MUTEX_DEFAULT, NULL);
19445916cd2Sjpk }
19545916cd2Sjpk 
19645916cd2Sjpk void
tnet_fini(void)19745916cd2Sjpk tnet_fini(void)
19845916cd2Sjpk {
19945916cd2Sjpk 	kmem_cache_destroy(ire_gw_secattr_cache);
20045916cd2Sjpk 	mod_hash_destroy_hash(gcdb_hash);
20145916cd2Sjpk 	mod_hash_destroy_hash(gcgrp4_hash);
20245916cd2Sjpk 	mod_hash_destroy_hash(gcgrp6_hash);
20345916cd2Sjpk 	mutex_destroy(&gcdb_lock);
20445916cd2Sjpk 	mutex_destroy(&gcgrp_lock);
20545916cd2Sjpk }
20645916cd2Sjpk 
20745916cd2Sjpk /* ARGSUSED */
20845916cd2Sjpk static int
ire_gw_secattr_constructor(void * buf,void * cdrarg,int kmflags)20945916cd2Sjpk ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags)
21045916cd2Sjpk {
21145916cd2Sjpk 	tsol_ire_gw_secattr_t *attrp = buf;
21245916cd2Sjpk 
21345916cd2Sjpk 	mutex_init(&attrp->igsa_lock, NULL, MUTEX_DEFAULT, NULL);
21445916cd2Sjpk 
21545916cd2Sjpk 	attrp->igsa_rhc = NULL;
21645916cd2Sjpk 	attrp->igsa_gc = NULL;
21745916cd2Sjpk 
21845916cd2Sjpk 	return (0);
21945916cd2Sjpk }
22045916cd2Sjpk 
22145916cd2Sjpk /* ARGSUSED */
22245916cd2Sjpk static void
ire_gw_secattr_destructor(void * buf,void * cdrarg)22345916cd2Sjpk ire_gw_secattr_destructor(void *buf, void *cdrarg)
22445916cd2Sjpk {
22545916cd2Sjpk 	tsol_ire_gw_secattr_t *attrp = (tsol_ire_gw_secattr_t *)buf;
22645916cd2Sjpk 
22745916cd2Sjpk 	mutex_destroy(&attrp->igsa_lock);
22845916cd2Sjpk }
22945916cd2Sjpk 
23045916cd2Sjpk tsol_ire_gw_secattr_t *
ire_gw_secattr_alloc(int kmflags)23145916cd2Sjpk ire_gw_secattr_alloc(int kmflags)
23245916cd2Sjpk {
23345916cd2Sjpk 	return (kmem_cache_alloc(ire_gw_secattr_cache, kmflags));
23445916cd2Sjpk }
23545916cd2Sjpk 
23645916cd2Sjpk void
ire_gw_secattr_free(tsol_ire_gw_secattr_t * attrp)23745916cd2Sjpk ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp)
23845916cd2Sjpk {
23945916cd2Sjpk 	ASSERT(MUTEX_NOT_HELD(&attrp->igsa_lock));
24045916cd2Sjpk 
24145916cd2Sjpk 	if (attrp->igsa_rhc != NULL) {
24245916cd2Sjpk 		TNRHC_RELE(attrp->igsa_rhc);
24345916cd2Sjpk 		attrp->igsa_rhc = NULL;
24445916cd2Sjpk 	}
24545916cd2Sjpk 
24645916cd2Sjpk 	if (attrp->igsa_gc != NULL) {
24745916cd2Sjpk 		GC_REFRELE(attrp->igsa_gc);
24845916cd2Sjpk 		attrp->igsa_gc = NULL;
24945916cd2Sjpk 	}
25045916cd2Sjpk 
25145916cd2Sjpk 	ASSERT(attrp->igsa_rhc == NULL);
25245916cd2Sjpk 	ASSERT(attrp->igsa_gc == NULL);
25345916cd2Sjpk 
25445916cd2Sjpk 	kmem_cache_free(ire_gw_secattr_cache, attrp);
25545916cd2Sjpk }
25645916cd2Sjpk 
25745916cd2Sjpk /* ARGSUSED */
25845916cd2Sjpk static uint_t
gcdb_hash_by_secattr(void * hash_data,mod_hash_key_t key)25945916cd2Sjpk gcdb_hash_by_secattr(void *hash_data, mod_hash_key_t key)
26045916cd2Sjpk {
26145916cd2Sjpk 	const struct rtsa_s *rp = (struct rtsa_s *)key;
26245916cd2Sjpk 	const uint32_t *up, *ue;
26345916cd2Sjpk 	uint_t hash;
26445916cd2Sjpk 	int i;
26545916cd2Sjpk 
26645916cd2Sjpk 	ASSERT(rp != NULL);
26745916cd2Sjpk 
26845916cd2Sjpk 	/* See comments in hash_bylabel in zone.c for details */
26945916cd2Sjpk 	hash = rp->rtsa_doi + (rp->rtsa_doi << 1);
27045916cd2Sjpk 	up = (const uint32_t *)&rp->rtsa_slrange;
27145916cd2Sjpk 	ue = up + sizeof (rp->rtsa_slrange) / sizeof (*up);
27245916cd2Sjpk 	i = 1;
27345916cd2Sjpk 	while (up < ue) {
27445916cd2Sjpk 		/* using 2^n + 1, 1 <= n <= 16 as source of many primes */
27545916cd2Sjpk 		hash += *up + (*up << ((i % 16) + 1));
27645916cd2Sjpk 		up++;
27745916cd2Sjpk 		i++;
27845916cd2Sjpk 	}
27945916cd2Sjpk 	return (hash);
28045916cd2Sjpk }
28145916cd2Sjpk 
28245916cd2Sjpk static int
gcdb_hash_cmp(mod_hash_key_t key1,mod_hash_key_t key2)28345916cd2Sjpk gcdb_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
28445916cd2Sjpk {
28545916cd2Sjpk 	struct rtsa_s *rp1 = (struct rtsa_s *)key1;
28645916cd2Sjpk 	struct rtsa_s *rp2 = (struct rtsa_s *)key2;
28745916cd2Sjpk 
28845916cd2Sjpk 	ASSERT(rp1 != NULL && rp2 != NULL);
28945916cd2Sjpk 
29045916cd2Sjpk 	if (blequal(&rp1->rtsa_slrange.lower_bound,
29145916cd2Sjpk 	    &rp2->rtsa_slrange.lower_bound) &&
29245916cd2Sjpk 	    blequal(&rp1->rtsa_slrange.upper_bound,
29345916cd2Sjpk 	    &rp2->rtsa_slrange.upper_bound) &&
29445916cd2Sjpk 	    rp1->rtsa_doi == rp2->rtsa_doi)
29545916cd2Sjpk 		return (0);
29645916cd2Sjpk 
29745916cd2Sjpk 	/* No match; not found */
29845916cd2Sjpk 	return (-1);
29945916cd2Sjpk }
30045916cd2Sjpk 
30145916cd2Sjpk /* ARGSUSED */
30245916cd2Sjpk static uint_t
gcgrp_hash_by_addr(void * hash_data,mod_hash_key_t key)30345916cd2Sjpk gcgrp_hash_by_addr(void *hash_data, mod_hash_key_t key)
30445916cd2Sjpk {
30545916cd2Sjpk 	tsol_gcgrp_addr_t *ga = (tsol_gcgrp_addr_t *)key;
30645916cd2Sjpk 	uint_t		idx = 0;
30745916cd2Sjpk 	uint32_t	*ap;
30845916cd2Sjpk 
30945916cd2Sjpk 	ASSERT(ga != NULL);
31045916cd2Sjpk 	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
31145916cd2Sjpk 
31245916cd2Sjpk 	ap = (uint32_t *)&ga->ga_addr.s6_addr32[0];
31345916cd2Sjpk 	idx ^= *ap++;
31445916cd2Sjpk 	idx ^= *ap++;
31545916cd2Sjpk 	idx ^= *ap++;
31645916cd2Sjpk 	idx ^= *ap;
31745916cd2Sjpk 
31845916cd2Sjpk 	return (idx);
31945916cd2Sjpk }
32045916cd2Sjpk 
32145916cd2Sjpk static int
gcgrp_hash_cmp(mod_hash_key_t key1,mod_hash_key_t key2)32245916cd2Sjpk gcgrp_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
32345916cd2Sjpk {
32445916cd2Sjpk 	tsol_gcgrp_addr_t *ga1 = (tsol_gcgrp_addr_t *)key1;
32545916cd2Sjpk 	tsol_gcgrp_addr_t *ga2 = (tsol_gcgrp_addr_t *)key2;
32645916cd2Sjpk 
32745916cd2Sjpk 	ASSERT(ga1 != NULL && ga2 != NULL);
32845916cd2Sjpk 
32945916cd2Sjpk 	/* Address family must match */
33045916cd2Sjpk 	if (ga1->ga_af != ga2->ga_af)
33145916cd2Sjpk 		return (-1);
33245916cd2Sjpk 
33345916cd2Sjpk 	if (ga1->ga_addr.s6_addr32[0] == ga2->ga_addr.s6_addr32[0] &&
33445916cd2Sjpk 	    ga1->ga_addr.s6_addr32[1] == ga2->ga_addr.s6_addr32[1] &&
33545916cd2Sjpk 	    ga1->ga_addr.s6_addr32[2] == ga2->ga_addr.s6_addr32[2] &&
33645916cd2Sjpk 	    ga1->ga_addr.s6_addr32[3] == ga2->ga_addr.s6_addr32[3])
33745916cd2Sjpk 		return (0);
33845916cd2Sjpk 
33945916cd2Sjpk 	/* No match; not found */
34045916cd2Sjpk 	return (-1);
34145916cd2Sjpk }
34245916cd2Sjpk 
34345916cd2Sjpk #define	RTSAFLAGS	"\20\11cipso\3doi\2max_sl\1min_sl"
34445916cd2Sjpk 
34545916cd2Sjpk int
rtsa_validate(const struct rtsa_s * rp)34645916cd2Sjpk rtsa_validate(const struct rtsa_s *rp)
34745916cd2Sjpk {
34845916cd2Sjpk 	uint32_t mask = rp->rtsa_mask;
34945916cd2Sjpk 
35045916cd2Sjpk 	/* RTSA_CIPSO must be set, and DOI must not be zero */
35145916cd2Sjpk 	if ((mask & RTSA_CIPSO) == 0 || rp->rtsa_doi == 0) {
35245916cd2Sjpk 		DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
35345916cd2Sjpk 		    "rtsa(1) lacks flag or has 0 doi.",
35445916cd2Sjpk 		    rtsa_s *, rp);
35545916cd2Sjpk 		return (EINVAL);
35645916cd2Sjpk 	}
35745916cd2Sjpk 	/*
35845916cd2Sjpk 	 * SL range must be specified, and it must have its
35945916cd2Sjpk 	 * upper bound dominating its lower bound.
36045916cd2Sjpk 	 */
36145916cd2Sjpk 	if ((mask & RTSA_SLRANGE) != RTSA_SLRANGE ||
36245916cd2Sjpk 	    !bldominates(&rp->rtsa_slrange.upper_bound,
36345916cd2Sjpk 	    &rp->rtsa_slrange.lower_bound)) {
36445916cd2Sjpk 		DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
36545916cd2Sjpk 		    "rtsa(1) min_sl and max_sl not set or max_sl is "
36645916cd2Sjpk 		    "not dominating.", rtsa_s *, rp);
36745916cd2Sjpk 		return (EINVAL);
36845916cd2Sjpk 	}
36945916cd2Sjpk 	return (0);
37045916cd2Sjpk }
37145916cd2Sjpk 
37245916cd2Sjpk /*
37345916cd2Sjpk  * A brief explanation of the reference counting scheme:
37445916cd2Sjpk  *
37545916cd2Sjpk  * Apart from dynamic references due to to reference holds done
37645916cd2Sjpk  * actively by threads, we have the following references:
37745916cd2Sjpk  *
37845916cd2Sjpk  * gcdb_refcnt:
37945916cd2Sjpk  *	- Every tsol_gc_t pointing to a tsol_gcdb_t contributes a reference
38045916cd2Sjpk  *	  to the gcdb_refcnt.
38145916cd2Sjpk  *
38245916cd2Sjpk  * gc_refcnt:
38345916cd2Sjpk  *	- A prefix IRE that points to an igsa_gc contributes a reference
38445916cd2Sjpk  *	  to the gc_refcnt.
38545916cd2Sjpk  *
38645916cd2Sjpk  * gcgrp_refcnt:
38745916cd2Sjpk  *	- Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes
38845916cd2Sjpk  *	  a reference to the gcgrp_refcnt.
38945916cd2Sjpk  */
39045916cd2Sjpk static tsol_gcdb_t *
gcdb_lookup(struct rtsa_s * rp,boolean_t alloc)39145916cd2Sjpk gcdb_lookup(struct rtsa_s *rp, boolean_t alloc)
39245916cd2Sjpk {
39345916cd2Sjpk 	tsol_gcdb_t *gcdb = NULL;
39445916cd2Sjpk 
39545916cd2Sjpk 	if (rtsa_validate(rp) != 0)
39645916cd2Sjpk 		return (NULL);
39745916cd2Sjpk 
39845916cd2Sjpk 	mutex_enter(&gcdb_lock);
39945916cd2Sjpk 	/* Find a copy in the cache; otherwise, create one and cache it */
40045916cd2Sjpk 	if (mod_hash_find(gcdb_hash, (mod_hash_key_t)rp,
40145916cd2Sjpk 	    (mod_hash_val_t *)&gcdb) == 0) {
40245916cd2Sjpk 		gcdb->gcdb_refcnt++;
40345916cd2Sjpk 		ASSERT(gcdb->gcdb_refcnt != 0);
40445916cd2Sjpk 
40545916cd2Sjpk 		DTRACE_PROBE2(tx__gcdb__log__info__gcdb__lookup, char *,
40645916cd2Sjpk 		    "gcdb(1) is in gcdb_hash(global)", tsol_gcdb_t *, gcdb);
40745916cd2Sjpk 	} else if (alloc) {
40845916cd2Sjpk 		gcdb = kmem_zalloc(sizeof (*gcdb), KM_NOSLEEP);
40945916cd2Sjpk 		if (gcdb != NULL) {
41045916cd2Sjpk 			gcdb->gcdb_refcnt = 1;
41145916cd2Sjpk 			gcdb->gcdb_mask = rp->rtsa_mask;
41245916cd2Sjpk 			gcdb->gcdb_doi = rp->rtsa_doi;
41345916cd2Sjpk 			gcdb->gcdb_slrange = rp->rtsa_slrange;
41445916cd2Sjpk 
41545916cd2Sjpk 			if (mod_hash_insert(gcdb_hash,
41645916cd2Sjpk 			    (mod_hash_key_t)&gcdb->gcdb_attr,
41745916cd2Sjpk 			    (mod_hash_val_t)gcdb) != 0) {
41845916cd2Sjpk 				mutex_exit(&gcdb_lock);
41945916cd2Sjpk 				kmem_free(gcdb, sizeof (*gcdb));
42045916cd2Sjpk 				return (NULL);
42145916cd2Sjpk 			}
42245916cd2Sjpk 
42345916cd2Sjpk 			DTRACE_PROBE2(tx__gcdb__log__info__gcdb__insert, char *,
42445916cd2Sjpk 			    "gcdb(1) inserted in gcdb_hash(global)",
42545916cd2Sjpk 			    tsol_gcdb_t *, gcdb);
42645916cd2Sjpk 		}
42745916cd2Sjpk 	}
42845916cd2Sjpk 	mutex_exit(&gcdb_lock);
42945916cd2Sjpk 	return (gcdb);
43045916cd2Sjpk }
43145916cd2Sjpk 
43245916cd2Sjpk static void
gcdb_inactive(tsol_gcdb_t * gcdb)43345916cd2Sjpk gcdb_inactive(tsol_gcdb_t *gcdb)
43445916cd2Sjpk {
43545916cd2Sjpk 	ASSERT(MUTEX_HELD(&gcdb_lock));
43645916cd2Sjpk 	ASSERT(gcdb != NULL && gcdb->gcdb_refcnt == 0);
43745916cd2Sjpk 
43845916cd2Sjpk 	(void) mod_hash_remove(gcdb_hash, (mod_hash_key_t)&gcdb->gcdb_attr,
43945916cd2Sjpk 	    (mod_hash_val_t *)&gcdb);
44045916cd2Sjpk 
44145916cd2Sjpk 	DTRACE_PROBE2(tx__gcdb__log__info__gcdb__remove, char *,
44245916cd2Sjpk 	    "gcdb(1) removed from gcdb_hash(global)",
44345916cd2Sjpk 	    tsol_gcdb_t *, gcdb);
44445916cd2Sjpk 	kmem_free(gcdb, sizeof (*gcdb));
44545916cd2Sjpk }
44645916cd2Sjpk 
44745916cd2Sjpk tsol_gc_t *
gc_create(struct rtsa_s * rp,tsol_gcgrp_t * gcgrp,boolean_t * gcgrp_xtrarefp)44845916cd2Sjpk gc_create(struct rtsa_s *rp, tsol_gcgrp_t *gcgrp, boolean_t *gcgrp_xtrarefp)
449