xref: /illumos-gate/usr/src/uts/common/os/labelsys.c (revision 2d6eb4a5)
145916cd2Sjpk /*
245916cd2Sjpk  * CDDL HEADER START
345916cd2Sjpk  *
445916cd2Sjpk  * The contents of this file are subject to the terms of the
545916cd2Sjpk  * Common Development and Distribution License (the "License").
645916cd2Sjpk  * You may not use this file except in compliance with the License.
745916cd2Sjpk  *
845916cd2Sjpk  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
945916cd2Sjpk  * or http://www.opensolaris.org/os/licensing.
1045916cd2Sjpk  * See the License for the specific language governing permissions
1145916cd2Sjpk  * and limitations under the License.
1245916cd2Sjpk  *
1345916cd2Sjpk  * When distributing Covered Code, include this CDDL HEADER in each
1445916cd2Sjpk  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1545916cd2Sjpk  * If applicable, add the following below this CDDL HEADER, with the
1645916cd2Sjpk  * fields enclosed by brackets "[]" replaced with your own identifying
1745916cd2Sjpk  * information: Portions Copyright [yyyy] [name of copyright owner]
1845916cd2Sjpk  *
1945916cd2Sjpk  * CDDL HEADER END
2045916cd2Sjpk  */
2145916cd2Sjpk /*
2245916cd2Sjpk  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
2345916cd2Sjpk  * Use is subject to license terms.
2445916cd2Sjpk  */
2545916cd2Sjpk 
2645916cd2Sjpk #include <sys/systm.h>
2745916cd2Sjpk #include <sys/types.h>
2845916cd2Sjpk #include <sys/stream.h>
2945916cd2Sjpk #include <sys/kmem.h>
3045916cd2Sjpk #include <sys/strsubr.h>
3145916cd2Sjpk #include <sys/cmn_err.h>
3245916cd2Sjpk #include <sys/debug.h>
3345916cd2Sjpk #include <sys/param.h>
3445916cd2Sjpk #include <sys/model.h>
3545916cd2Sjpk #include <sys/errno.h>
3645916cd2Sjpk #include <sys/modhash.h>
3745916cd2Sjpk 
3845916cd2Sjpk #include <sys/policy.h>
3945916cd2Sjpk #include <sys/tsol/label.h>
4045916cd2Sjpk #include <sys/tsol/tsyscall.h>
4145916cd2Sjpk #include <sys/tsol/tndb.h>
4245916cd2Sjpk #include <sys/tsol/tnet.h>
4345916cd2Sjpk #include <sys/disp.h>
4445916cd2Sjpk 
4545916cd2Sjpk #include <inet/ip.h>
4645916cd2Sjpk #include <inet/ip6.h>
4745916cd2Sjpk #include <sys/sdt.h>
4845916cd2Sjpk 
4945916cd2Sjpk static mod_hash_t *tpc_name_hash;	/* hash of cache entries by name */
5045916cd2Sjpk static kmutex_t tpc_lock;
5145916cd2Sjpk 
5245916cd2Sjpk static tsol_tpc_t *tpc_unlab;
5345916cd2Sjpk 
5445916cd2Sjpk /*
5545916cd2Sjpk  * tnrhc_table and tnrhc_table_v6 are similar to the IP forwarding tables
5645916cd2Sjpk  * in organization and search. The tnrhc_table[_v6] is an array of 33/129
5745916cd2Sjpk  * pointers to the 33/129 tnrhc tables indexed by the prefix length.
58*bfabfc35Skp  * A largest prefix match search is done by find_rhc and it walks the
5945916cd2Sjpk  * tables from the most specific to the least specific table. Table 0
6045916cd2Sjpk  * corresponds to the single entry for 0.0.0.0/0 or ::0/0.
6145916cd2Sjpk  */
6245916cd2Sjpk tnrhc_hash_t *tnrhc_table[TSOL_MASK_TABLE_SIZE];
6345916cd2Sjpk tnrhc_hash_t *tnrhc_table_v6[TSOL_MASK_TABLE_SIZE_V6];
6445916cd2Sjpk kmutex_t tnrhc_g_lock;
6545916cd2Sjpk 
6645916cd2Sjpk static void tsol_create_i_tmpls(void);
6745916cd2Sjpk 
6845916cd2Sjpk static void tsol_create_i_tnrh(const tnaddr_t *);
6945916cd2Sjpk 
7045916cd2Sjpk /* List of MLPs on valid on shared addresses */
7145916cd2Sjpk static tsol_mlp_list_t shared_mlps;
7245916cd2Sjpk 
7345916cd2Sjpk /*
7445916cd2Sjpk  * Convert length for a mask to the mask.
7545916cd2Sjpk  */
7645916cd2Sjpk static ipaddr_t
tsol_plen_to_mask(uint_t masklen)7745916cd2Sjpk tsol_plen_to_mask(uint_t masklen)
7845916cd2Sjpk {
7945916cd2Sjpk 	return (masklen == 0 ? 0 : htonl(IP_HOST_MASK << (IP_ABITS - masklen)));
8045916cd2Sjpk }
8145916cd2Sjpk 
8245916cd2Sjpk /*
8345916cd2Sjpk  * Convert a prefix length to the mask for that prefix.
8445916cd2Sjpk  * Returns the argument bitmask.
8545916cd2Sjpk  */
8645916cd2Sjpk static void
tsol_plen_to_mask_v6(uint_t plen,in6_addr_t * bitmask)8745916cd2Sjpk tsol_plen_to_mask_v6(uint_t plen, in6_addr_t *bitmask)
8845916cd2Sjpk {
8945916cd2Sjpk 	uint32_t *ptr;
9045916cd2Sjpk 
9145916cd2Sjpk 	ASSERT(plen <= IPV6_ABITS);
9245916cd2Sjpk 
9345916cd2Sjpk 	ptr = (uint32_t *)bitmask;
9445916cd2Sjpk 	while (plen >= 32) {
9545916cd2Sjpk 		*ptr++ = 0xffffffffU;
9645916cd2Sjpk 		plen -= 32;
9745916cd2Sjpk 	}
9845916cd2Sjpk 	if (plen > 0)
9945916cd2Sjpk 		*ptr++ = htonl(0xffffffff << (32 - plen));
10045916cd2Sjpk 	while (ptr < (uint32_t *)(bitmask + 1))
10145916cd2Sjpk 		*ptr++ = 0;
10245916cd2Sjpk }
10345916cd2Sjpk 
10445916cd2Sjpk boolean_t
tnrhc_init_table(tnrhc_hash_t * table[],short prefix_len,int kmflag)10545916cd2Sjpk tnrhc_init_table(tnrhc_hash_t *table[], short prefix_len, int kmflag)
10645916cd2Sjpk {
10745916cd2Sjpk 	int	i;
10845916cd2Sjpk 
10945916cd2Sjpk 	mutex_enter(&tnrhc_g_lock);
11045916cd2Sjpk 
11145916cd2Sjpk 	if (table[prefix_len] == NULL) {
11245916cd2Sjpk 		table[prefix_len] = (tnrhc_hash_t *)
11345916cd2Sjpk 		    kmem_zalloc(TNRHC_SIZE * sizeof (tnrhc_hash_t), kmflag);
11445916cd2Sjpk 		if (table[prefix_len] == NULL) {
11545916cd2Sjpk 			mutex_exit(&tnrhc_g_lock);
11645916cd2Sjpk 			return (B_FALSE);
11745916cd2Sjpk 		}
11845916cd2Sjpk 		for (i = 0; i < TNRHC_SIZE; i++) {
11945916cd2Sjpk 			mutex_init(&table[prefix_len][i].tnrh_lock,
12045916cd2Sjpk 			    NULL, MUTEX_DEFAULT, 0);
12145916cd2Sjpk 		}
12245916cd2Sjpk 	}
12345916cd2Sjpk 	mutex_exit(&tnrhc_g_lock);
12445916cd2Sjpk 	return (B_TRUE);
12545916cd2Sjpk }
12645916cd2Sjpk 
12745916cd2Sjpk void
tcache_init(void)12845916cd2Sjpk tcache_init(void)
12945916cd2Sjpk {
13045916cd2Sjpk 	tnaddr_t address;
13145916cd2Sjpk 
13245916cd2Sjpk 	/*
13345916cd2Sjpk 	 * Note: unable to use mod_hash_create_strhash here, since it's
13445916cd2Sjpk 	 * assymetric.  It assumes that the user has allocated exactly
13545916cd2Sjpk 	 * strlen(key) + 1 bytes for the key when inserted, and attempts to
13645916cd2Sjpk 	 * kmem_free that memory on a delete.
13745916cd2Sjpk 	 */
13845916cd2Sjpk 	tpc_name_hash = mod_hash_create_extended("tnrhtpc_by_name", 256,
13945916cd2Sjpk 	    mod_hash_null_keydtor,  mod_hash_null_valdtor, mod_hash_bystr,
14045916cd2Sjpk 	    NULL, mod_hash_strkey_cmp, KM_SLEEP);
14145916cd2Sjpk 	mutex_init(&tpc_lock, NULL, MUTEX_DEFAULT, NULL);
14245916cd2Sjpk 
14345916cd2Sjpk 	mutex_init(&tnrhc_g_lock, NULL, MUTEX_DEFAULT, NULL);
14445916cd2Sjpk 
14545916cd2Sjpk 	/* label_init always called before tcache_init */
14645916cd2Sjpk 	ASSERT(l_admin_low != NULL && l_admin_high != NULL);
14745916cd2Sjpk 
14845916cd2Sjpk 	/* Initialize the zeroth table prior to loading the 0.0.0.0 entry */
14945916cd2Sjpk 	(void) tnrhc_init_table(tnrhc_table, 0, KM_SLEEP);
15045916cd2Sjpk 	(void) tnrhc_init_table(tnrhc_table_v6, 0, KM_SLEEP);
15145916cd2Sjpk 	/*
15245916cd2Sjpk 	 * create an internal host template called "_unlab"
15345916cd2Sjpk 	 */
15445916cd2Sjpk 	tsol_create_i_tmpls();
15545916cd2Sjpk 
15645916cd2Sjpk 	/*
15745916cd2Sjpk 	 * create a host entry, 0.0.0.0 = _unlab
15845916cd2Sjpk 	 */
15945916cd2Sjpk 	bzero(&address, sizeof (tnaddr_t));
16045916cd2Sjpk 	address.ta_family = AF_INET;
16145916cd2Sjpk 	tsol_create_i_tnrh(&address);
16245916cd2Sjpk 
16345916cd2Sjpk 	/*
16445916cd2Sjpk 	 * create a host entry, ::0 = _unlab
16545916cd2Sjpk 	 */
16645916cd2Sjpk 	address.ta_family = AF_INET6;
16745916cd2Sjpk 	tsol_create_i_tnrh(&address);
16845916cd2Sjpk 
16945916cd2Sjpk 	rw_init(&shared_mlps.mlpl_rwlock, NULL, RW_DEFAULT, NULL);
17045916cd2Sjpk }
17145916cd2Sjpk 
17245916cd2Sjpk /* Called only by the TNRHC_RELE macro when the refcount goes to zero. */
17345916cd2Sjpk void
tnrhc_free(tsol_tnrhc_t * tnrhc)17445916cd2Sjpk tnrhc_free(tsol_tnrhc_t *tnrhc)
17545916cd2Sjpk {
17645916cd2Sjpk 	/*
17745916cd2Sjpk 	 * We assert rhc_invalid here to make sure that no new thread could
17845916cd2Sjpk 	 * possibly end up finding this entry.  If it could, then the
17945916cd2Sjpk 	 * mutex_destroy would panic.
18045916cd2Sjpk 	 */
18145916cd2Sjpk 	DTRACE_PROBE1(tx__tndb__l3__tnrhcfree, tsol_tnrhc_t *, tnrhc);
18245916cd2Sjpk 	ASSERT(tnrhc->rhc_next == NULL && tnrhc->rhc_invalid);
18345916cd2Sjpk 	mutex_exit(&tnrhc->rhc_lock);
18445916cd2Sjpk 	mutex_destroy(&tnrhc->rhc_lock);
18545916cd2Sjpk 	if (tnrhc->rhc_tpc != NULL)
18645916cd2Sjpk 		TPC_RELE(tnrhc->rhc_tpc);
18745916cd2Sjpk 	kmem_free(tnrhc, sizeof (*tnrhc));
18845916cd2Sjpk }
18945916cd2Sjpk 
19045916cd2Sjpk /* Called only by the TPC_RELE macro when the refcount goes to zero. */
19145916cd2Sjpk void
tpc_free(tsol_tpc_t * tpc)19245916cd2Sjpk tpc_free(tsol_tpc_t *tpc)
19345916cd2Sjpk {
19445916cd2Sjpk 	DTRACE_PROBE1(tx__tndb__l3__tpcfree, tsol_tpc_t *, tpc);
19545916cd2Sjpk 	ASSERT(tpc->tpc_invalid);
19645916cd2Sjpk 	mutex_exit(&tpc->tpc_lock);
19745916cd2Sjpk 	mutex_destroy(&tpc->tpc_lock);
19845916cd2Sjpk 	kmem_free(tpc, sizeof (*tpc));
19945916cd2Sjpk }
20045916cd2Sjpk 
20145916cd2Sjpk /*
20245916cd2Sjpk  * Find and hold a reference to a template entry by name.  Ignores entries that
20345916cd2Sjpk  * are being deleted.
20445916cd2Sjpk  */
20545916cd2Sjpk static tsol_tpc_t *
tnrhtp_find(const char * name,mod_hash_t * hash)20645916cd2Sjpk tnrhtp_find(const char *name, mod_hash_t *hash)
20745916cd2Sjpk {
20845916cd2Sjpk 	mod_hash_val_t hv;
20945916cd2Sjpk 	tsol_tpc_t *tpc = NULL;
21045916cd2Sjpk 
21145916cd2Sjpk 	mutex_enter(&tpc_lock);
21245916cd2Sjpk 	if (mod_hash_find(hash, (mod_hash_key_t)name, &hv) == 0) {
21345916cd2Sjpk 		tpc = (tsol_tpc_t *)hv;
21445916cd2Sjpk 		if (tpc->tpc_invalid)
21545916cd2Sjpk 			tpc = NULL;
21645916cd2Sjpk 		else
21745916cd2Sjpk 			TPC_HOLD(tpc);
21845916cd2Sjpk 	}
21945916cd2Sjpk 	mutex_exit(&tpc_lock);
22045916cd2Sjpk 	return (tpc);
22145916cd2Sjpk }
22245916cd2Sjpk 
22345916cd2Sjpk static int
tnrh_delete(const tsol_rhent_t * rhent)22445916cd2Sjpk tnrh_delete(const tsol_rhent_t *rhent)
22545916cd2Sjpk {
22645916cd2Sjpk 	tsol_tnrhc_t *current;
22745916cd2Sjpk 	tsol_tnrhc_t **prevp;
22845916cd2Sjpk 	ipaddr_t tmpmask;
22945916cd2Sjpk 	in6_addr_t tmpmask_v6;
23045916cd2Sjpk 	tnrhc_hash_t *tnrhc_hash;
23145916cd2Sjpk 
23245916cd2Sjpk 	if (rhent->rh_address.ta_family == AF_INET) {
23345916cd2Sjpk 		if (rhent->rh_prefix < 0 || rhent->rh_prefix > IP_ABITS)
23445916cd2Sjpk 			return (EINVAL);
23545916cd2Sjpk 		if (tnrhc_table[rhent->rh_prefix] == NULL)
23645916cd2Sjpk 			return (ENOENT);
23745916cd2Sjpk 		tmpmask = tsol_plen_to_mask(rhent->rh_prefix);
23845916cd2Sjpk 		tnrhc_hash = &tnrhc_table[rhent->rh_prefix][
23945916cd2Sjpk 		    TSOL_ADDR_HASH(rhent->rh_address.ta_addr_v4.s_addr &
24045916cd2Sjpk 		    tmpmask, TNRHC_SIZE)];
24145916cd2Sjpk 	} else if (rhent->rh_address.ta_family == AF_INET6) {
24245916cd2Sjpk 		if (rhent->rh_prefix < 0 || rhent->rh_prefix > IPV6_ABITS)
24345916cd2Sjpk 			return (EINVAL);
24445916cd2Sjpk 		if (tnrhc_table_v6[rhent->rh_prefix] == NULL)
24545916cd2Sjpk 			return (ENOENT);
24645916cd2Sjpk 		tsol_plen_to_mask_v6(rhent->rh_prefix, &tmpmask_v6);
24745916cd2Sjpk 		tnrhc_hash = &tnrhc_table_v6[rhent->rh_prefix][
24845916cd2Sjpk 		    TSOL_ADDR_MASK_HASH_V6(rhent->rh_address.ta_addr_v6,
24945916cd2Sjpk 		    tmpmask_v6, TNRHC_SIZE)];
25045916cd2Sjpk 	} else {
25145916cd2Sjpk 		return (EAFNOSUPPORT);
25245916cd2Sjpk 	}
25345916cd2Sjpk 
25445916cd2Sjpk 	/* search for existing entry */
25545916cd2Sjpk 	mutex_enter(&tnrhc_hash->tnrh_lock);
25645916cd2Sjpk 	prevp = &tnrhc_hash->tnrh_list;
25745916cd2Sjpk 	while ((current = *prevp) != NULL) {
25845916cd2Sjpk 		if (TNADDR_EQ(&rhent->rh_address, &current->rhc_host))
25945916cd2Sjpk 			break;
26045916cd2Sjpk 		prevp = &current->rhc_next;
26145916cd2Sjpk 	}
26245916cd2Sjpk 
26345916cd2Sjpk 	if (current != NULL) {
26445916cd2Sjpk 		DTRACE_PROBE(tx__tndb__l2__tnrhdelete_existingrhentry);
26545916cd2Sjpk 		*prevp = current->rhc_next;
26645916cd2Sjpk 		mutex_enter(&current->rhc_lock);
26745916cd2Sjpk 		current->rhc_next = NULL;
26845916cd2Sjpk 		current->rhc_invalid = 1;
26945916cd2Sjpk 		mutex_exit(&current->rhc_lock);
27045916cd2Sjpk 		TNRHC_RELE(current);
27145916cd2Sjpk 	}
27245916cd2Sjpk 	mutex_exit(&tnrhc_hash->tnrh_lock);
27345916cd2Sjpk 	return (current == NULL ? ENOENT : 0);
27445916cd2Sjpk }
27545916cd2Sjpk 
27645916cd2Sjpk /*
27745916cd2Sjpk  * Flush all remote host entries from the database.
27845916cd2Sjpk  *
27945916cd2Sjpk  * Note that the htable arrays themselves do not have reference counters, so,
28045916cd2Sjpk  * unlike the remote host entries, they cannot be freed.
28145916cd2Sjpk  */
28245916cd2Sjpk static void
flush_rh_table(tnrhc_hash_t ** htable,int nbits)28345916cd2Sjpk flush_rh_table(tnrhc_hash_t **htable, int nbits)
28445916cd2Sjpk {
28545916cd2Sjpk 	tnrhc_hash_t *hent, *hend;
28645916cd2Sjpk 	tsol_tnrhc_t *rhc, *rhnext;
28745916cd2Sjpk 
28845916cd2Sjpk 	while (--nbits >= 0) {
28945916cd2Sjpk 		if ((hent = htable[nbits]) == NULL)
29045916cd2Sjpk 			continue;
29145916cd2Sjpk 		hend = hent + TNRHC_SIZE;
29245916cd2Sjpk 		while (hent < hend) {
29345916cd2Sjpk 			/*
29445916cd2Sjpk 			 * List walkers hold this lock during the walk.  It
29545916cd2Sjpk 			 * protects tnrh_list and rhc_next.
29645916cd2Sjpk 			 */
29745916cd2Sjpk 			mutex_enter(&hent->tnrh_lock);
29845916cd2Sjpk 			rhnext = hent->tnrh_list;
29945916cd2Sjpk 			hent->tnrh_list = NULL;
30045916cd2Sjpk 			mutex_exit(&hent->tnrh_lock);
30145916cd2Sjpk 			/*
30245916cd2Sjpk 			 * There may still be users of the rhcs at this point,
30345916cd2Sjpk 			 * but not of the list or its next pointer.  Thus, the
30445916cd2Sjpk 			 * only thing that would need to be done under a lock
30545916cd2Sjpk 			 * is setting the invalid bit, but that's atomic
30645916cd2Sjpk 			 * anyway, so no locks needed here.
30745916cd2Sjpk 			 */
30845916cd2Sjpk 			while ((rhc = rhnext) != NULL) {
30945916cd2Sjpk 				rhnext = rhc->rhc_next;
31045916cd2Sjpk 				rhc->rhc_next = NULL;
31145916cd2Sjpk 				rhc->rhc_invalid = 1;
31245916cd2Sjpk 				TNRHC_RELE(rhc);
31345916cd2Sjpk 			}
31445916cd2Sjpk 			hent++;
31545916cd2Sjpk 		}
31645916cd2Sjpk 	}
31745916cd2Sjpk }
31845916cd2Sjpk 
31945916cd2Sjpk /*
32045916cd2Sjpk  * Load a remote host entry into kernel cache.  Create a new one if a matching
32145916cd2Sjpk  * entry isn't found, otherwise replace the contents of the previous one by
32245916cd2Sjpk  * deleting it and recreating it.  (Delete and recreate is used to avoid
32345916cd2Sjpk  * allowing other threads to see an unstable data structure.)
32445916cd2Sjpk  *
32545916cd2Sjpk  * A "matching" entry is the one whose address matches that of the one
32645916cd2Sjpk  * being loaded.
32745916cd2Sjpk  *
32845916cd2Sjpk  * Return 0 for success, error code for failure.
32945916cd2Sjpk  */
330*bfabfc35Skp static int
tnrh_hash_add(tsol_tnrhc_t * new,short prefix)331*bfabfc35Skp tnrh_hash_add(tsol_tnrhc_t *new, short prefix)
33245916cd2Sjpk {
33345916cd2Sjpk 	tsol_tnrhc_t **rhp;
334*bfabfc35Skp 	tsol_tnrhc_t *rh;
33545916cd2Sjpk 	ipaddr_t tmpmask;
33645916cd2Sjpk 	in6_addr_t tmpmask_v6;
33745916cd2Sjpk 	tnrhc_hash_t *tnrhc_hash;
33845916cd2Sjpk 
33945916cd2Sjpk 	/* Find the existing entry, if any, leaving the hash locked */
340*bfabfc35Skp 	if (new->rhc_host.ta_family == AF_INET) {
341*bfabfc35Skp 		if (prefix < 0 || prefix > IP_ABITS)
34245916cd2Sjpk 			return (EINVAL);
343*bfabfc35Skp 		if (tnrhc_table[prefix] == NULL &&
344*bfabfc35Skp 		    !tnrhc_init_table(tnrhc_table, prefix,
34545916cd2Sjpk 		    KM_NOSLEEP))
34645916cd2Sjpk 			return (ENOMEM);
347*bfabfc35Skp 		tmpmask = tsol_plen_to_mask(prefix);
348*bfabfc35Skp 		tnrhc_hash = &tnrhc_table[prefix][
349*bfabfc35Skp 		    TSOL_ADDR_HASH(new->rhc_host.ta_addr_v4.s_addr &
35045916cd2Sjpk 		    tmpmask, TNRHC_SIZE)];
35145916cd2Sjpk 		mutex_enter(&tnrhc_hash->tnrh_lock);
35245916cd2Sjpk 		for (rhp = &tnrhc_hash->tnrh_list; (rh = *rhp) != NULL;
35345916cd2Sjpk 		    rhp = &rh->rhc_next) {
35445916cd2Sjpk 			ASSERT(rh->rhc_host.ta_family == AF_INET);
35545916cd2Sjpk 			if (((rh->rhc_host.ta_addr_v4.s_addr ^
356*bfabfc35Skp 			    new->rhc_host.ta_addr_v4.s_addr) & tmpmask) ==
35745916cd2Sjpk 			    0)
35845916cd2Sjpk 				break;
35945916cd2Sjpk 		}
360*bfabfc35Skp 	} else if (new->rhc_host.ta_family == AF_INET6) {
361*bfabfc35Skp 		if (prefix < 0 || prefix > IPV6_ABITS)
36245916cd2Sjpk 			return (EINVAL);
363*bfabfc35Skp 		if (tnrhc_table_v6[prefix] == NULL &&
364*bfabfc35Skp 		    !tnrhc_init_table(tnrhc_table_v6, prefix,
36545916cd2Sjpk 		    KM_NOSLEEP))
36645916cd2Sjpk 			return (ENOMEM);
367*bfabfc35Skp 		tsol_plen_to_mask_v6(prefix, &tmpmask_v6);
368*bfabfc35Skp 		tnrhc_hash = &tnrhc_table_v6[prefix][
369*bfabfc35Skp 		    TSOL_ADDR_MASK_HASH_V6(new->rhc_host.ta_addr_v6,
37045916cd2Sjpk 		    tmpmask_v6, TNRHC_SIZE)];
37145916cd2Sjpk 		mutex_enter(&tnrhc_hash->tnrh_lock);
37245916cd2Sjpk 		for (rhp = &tnrhc_hash->tnrh_list; (rh = *rhp) != NULL;
37345916cd2Sjpk 		    rhp = &rh->rhc_next) {
37445916cd2Sjpk 			ASSERT(rh->rhc_host.ta_family == AF_INET6);
37545916cd2Sjpk 			if (V6_MASK_EQ_2(rh->rhc_host.ta_addr_v6, tmpmask_v6,
376*bfabfc35Skp 			    new->rhc_host.ta_addr_v6))
37745916cd2Sjpk 				break;
37845916cd2Sjpk 		}
37945916cd2Sjpk 	} else {
38045916cd2Sjpk 		return (EAFNOSUPPORT);
38145916cd2Sjpk 	}
38245916cd2Sjpk 
38345916cd2Sjpk 	/* Clobber the old remote host entry. */
38445916cd2Sjpk 	if (rh != NULL) {
38545916cd2Sjpk 		ASSERT(!rh->rhc_invalid);
38645916cd2Sjpk 		rh->rhc_invalid = 1;
38745916cd2Sjpk 		*rhp = rh->rhc_next;
38845916cd2Sjpk 		rh->rhc_next = NULL;
389*bfabfc35Skp 		DTRACE_PROBE1(tx__tndb__l2__tnrhhashadd__invalidaterh,
390*bfabfc35Skp 		    tsol_tnrhc_t *, rh);
39145916cd2Sjpk 		TNRHC_RELE(rh);
39245916cd2Sjpk 	}
39345916cd2Sjpk 
394*bfabfc35Skp 	TNRHC_HOLD(new);
395*bfabfc35Skp 	new->rhc_next = tnrhc_hash->tnrh_list;
396*bfabfc35Skp 	tnrhc_hash->tnrh_list = new;
397*bfabfc35Skp 	DTRACE_PROBE1(tx__tndb__l2__tnrhhashadd__addedrh, tsol_tnrhc_t *, new);
398*bfabfc35Skp 	mutex_exit(&tnrhc_hash->tnrh_lock);
399*bfabfc35Skp 
400*bfabfc35Skp 	return (0);
401*bfabfc35Skp }
402*bfabfc35Skp 
403*bfabfc35Skp /*
404*bfabfc35Skp  * Load a remote host entry into kernel cache.
405*bfabfc35Skp  *
406*bfabfc35Skp  * Return 0 for success, error code for failure.
407*bfabfc35Skp  */
408*bfabfc35Skp int
tnrh_load(const tsol_rhent_t * rhent)409*bfabfc35Skp tnrh_load(const tsol_rhent_t *rhent)
410*bfabfc35Skp {
411*bfabfc35Skp 	tsol_tnrhc_t *new;
412*bfabfc35Skp 	tsol_tpc_t *tpc;
413*bfabfc35Skp 	int status;
414*bfabfc35Skp 
415*bfabfc35Skp 	/* Find and bump the reference count on the named template */
416*bfabfc35Skp 	if ((tpc = tnrhtp_find(rhent->rh_template, tpc_name_hash)) == NULL) {
417*bfabfc35Skp 		return (EINVAL);
418*bfabfc35Skp 	}
419*bfabfc35Skp 	ASSERT(tpc->tpc_tp.host_type == UNLABELED ||
420*bfabfc35Skp 	    tpc->tpc_tp.host_type == SUN_CIPSO);
421*bfabfc35Skp 
422*bfabfc35Skp 	if ((new = kmem_zalloc(sizeof (*new), KM_NOSLEEP)) == NULL) {
423*bfabfc35Skp 		TPC_RELE(tpc);
424*bfabfc35Skp 		return (ENOMEM);
425*bfabfc35Skp 	}
426*bfabfc35Skp 
42745916cd2Sjpk 	/* Initialize the new entry. */
42845916cd2Sjpk 	mutex_init(&new->rhc_lock, NULL, MUTEX_DEFAULT, NULL);
42945916cd2Sjpk 	new->rhc_host = rhent->rh_address;
43045916cd2Sjpk 
43145916cd2Sjpk 	/* The rhc now owns this tpc reference, so no TPC_RELE past here */
43245916cd2Sjpk 	new->rhc_tpc = tpc;
43345916cd2Sjpk 
434*bfabfc35Skp 	/*
435*bfabfc35Skp 	 * tnrh_hash_add handles the tnrh entry ref count for hash
436*bfabfc35Skp 	 * table inclusion. The ref count is incremented and decremented
437*bfabfc35Skp 	 * here to trigger deletion of the new hash table entry in the
438*bfabfc35Skp 	 * event that tnrh_hash_add fails.
439*bfabfc35Skp 	 */
44045916cd2Sjpk 	TNRHC_HOLD(new);
441*bfabfc35Skp 	status = tnrh_hash_add(new, rhent->rh_prefix);
442*bfabfc35Skp 	TNRHC_RELE(new);
44345916cd2Sjpk 
444*bfabfc35Skp 	return (status);
44545916cd2Sjpk }
44645916cd2Sjpk 
44745916cd2Sjpk static int
tnrh_get(tsol_rhent_t * rhent)44845916cd2Sjpk tnrh_get(tsol_rhent_t *rhent)
44945916cd2Sjpk {
45045916cd2Sjpk 	tsol_tpc_t *tpc;
45145916cd2Sjpk 
45245916cd2Sjpk 	switch (rhent->rh_address.ta_family) {
45345916cd2Sjpk 	case AF_INET:
45445916cd2Sjpk 		tpc = find_tpc(&rhent->rh_address.ta_addr_v4, IPV4_VERSION,
45545916cd2Sjpk 		    B_TRUE);
45645916cd2Sjpk 		break;
45745916cd2Sjpk 
45845916cd2Sjpk 	case AF_INET6:
45945916cd2Sjpk 		tpc = find_tpc(&rhent->rh_address.ta_addr_v6, IPV6_VERSION,
46045916cd2Sjpk 		    B_TRUE);
46145916cd2Sjpk 		break;
46245916cd2Sjpk 
46345916cd2Sjpk 	default:
46445916cd2Sjpk 		return (EINVAL);
46545916cd2Sjpk 	}
46645916cd2Sjpk 	if (tpc == NULL)
46745916cd2Sjpk 		return (ENOENT);
46845916cd2Sjpk 
46945916cd2Sjpk 	DTRACE_PROBE2(tx__tndb__l4__tnrhget__foundtpc, tsol_rhent_t *,
47045916cd2Sjpk 	    rhent, tsol_tpc_t *, tpc);
47145916cd2Sjpk 	bcopy(tpc->tpc_tp.name, rhent->rh_template,
47245916cd2Sjpk 	    sizeof (rhent->rh_template));
47345916cd2Sjpk 	TPC_RELE(tpc);
47445916cd2Sjpk 	return (0);
47545916cd2Sjpk }
47645916cd2Sjpk 
47745916cd2Sjpk static boolean_t
template_name_ok(const char * name)47845916cd2Sjpk template_name_ok(const char *name)
47945916cd2Sjpk {
48045916cd2Sjpk 	const char *name_end = name + TNTNAMSIZ;
48145916cd2Sjpk 
48245916cd2Sjpk 	while (name < name_end) {
48345916cd2Sjpk 		if (*name == '\0')
48445916cd2Sjpk 			break;
48545916cd2Sjpk 		name++;
48645916cd2Sjpk 	}
48745916cd2Sjpk 	return (name < name_end);
48845916cd2Sjpk }
48945916cd2Sjpk 
49045916cd2Sjpk static int
tnrh(int cmd,void * buf)49145916cd2Sjpk tnrh(int cmd, void *buf)
49245916cd2Sjpk {
49345916cd2Sjpk 	int retv;
49445916cd2Sjpk 	tsol_rhent_t rhent;
49545916cd2Sjpk 
49645916cd2Sjpk 	/* Make sure user has sufficient privilege */
49745916cd2Sjpk 	if (cmd != TNDB_GET &&
49845916cd2Sjpk 	    (retv = secpolicy_net_config(CRED(), B_FALSE)) != 0)
49945916cd2Sjpk 		return (set_errno(retv));
50045916cd2Sjpk 
50145916cd2Sjpk 	/*
50245916cd2Sjpk 	 * Get arguments
50345916cd2Sjpk 	 */
50445916cd2Sjpk 	if (cmd != TNDB_FLUSH &&
50545916cd2Sjpk 	    copyin(buf, &rhent, sizeof (rhent)) != 0) {
50645916cd2Sjpk 		DTRACE_PROBE(tx__tndb__l0__tnrhdelete__copyin);
50745916cd2Sjpk 		return (set_errno(EFAULT));
50845916cd2Sjpk 	}
50945916cd2Sjpk 
51045916cd2Sjpk 	switch (cmd) {
51145916cd2Sjpk 	case TNDB_LOAD:
51245916cd2Sjpk 		DTRACE_PROBE(tx__tndb__l2__tnrhdelete__tndbload);
51345916cd2Sjpk 		if (!template_name_ok(rhent.rh_template)) {
51445916cd2Sjpk 			retv = EINVAL;
51545916cd2Sjpk 		} else {
51645916cd2Sjpk 			retv = tnrh_load(&rhent);
51745916cd2Sjpk 		}
51845916cd2Sjpk 		break;
51945916cd2Sjpk 
52045916cd2Sjpk 	case TNDB_DELETE:
52145916cd2Sjpk 		DTRACE_PROBE(tx__tndb__l2__tnrhdelete__tndbdelete);
52245916cd2Sjpk 		retv = tnrh_delete(&rhent);
52345916cd2Sjpk 		break;
52445916cd2Sjpk 
52545916cd2Sjpk 	case TNDB_GET:
52645916cd2Sjpk 		DTRACE_PROBE(tx__tndb__l4__tnrhdelete__tndbget);
52745916cd2Sjpk 		if (!template_name_ok(rhent.rh_template)) {
52845916cd2Sjpk 			retv = EINVAL;
52945916cd2Sjpk 			break;
53045916cd2Sjpk 		}
53145916cd2Sjpk 
53245916cd2Sjpk 		retv = tnrh_get(&rhent);
53345916cd2Sjpk 		if (retv != 0)
53445916cd2Sjpk 			break;
53545916cd2Sjpk 
53645916cd2Sjpk 		/*
53745916cd2Sjpk 		 * Copy out result
53845916cd2Sjpk 		 */
53945916cd2Sjpk 		if (copyout(&rhent, buf, sizeof (rhent)) != 0) {
54045916cd2Sjpk 			DTRACE_PROBE(tx__tndb__l0__tnrhdelete__copyout);
54145916cd2Sjpk 			retv = EFAULT;
54245916cd2Sjpk 		}
54345916cd2Sjpk 		break;
54445916cd2Sjpk 
54545916cd2Sjpk 	case TNDB_FLUSH:
54645916cd2Sjpk 		DTRACE_PROBE(tx__tndb__l2__tnrhdelete__flush);
54745916cd2Sjpk 		flush_rh_table(tnrhc_table, TSOL_MASK_TABLE_SIZE);
54845916cd2Sjpk 		flush_rh_table(tnrhc_table_v6, TSOL_MASK_TABLE_SIZE_V6);
54945916cd2Sjpk 		break;
55045916cd2Sjpk 
55145916cd2Sjpk 	default:
55245916cd2Sjpk 		DTRACE_PROBE1(tx__tndb__l0__tnrhdelete__unknowncmd,
55345916cd2Sjpk 		    int, cmd);
55445916cd2Sjpk 		retv = EOPNOTSUPP;
55545916cd2Sjpk 		break;
55645916cd2Sjpk 	}
55745916cd2Sjpk 
55845916cd2Sjpk 	if (retv != 0)
55945916cd2Sjpk 		return (set_errno(retv));
56045916cd2Sjpk 	else
56145916cd2Sjpk 		return (retv);
56245916cd2Sjpk }
56345916cd2Sjpk 
56445916cd2Sjpk static tsol_tpc_t *
tnrhtp_create(const tsol_tpent_t * tpent,int kmflags)56545916cd2Sjpk tnrhtp_create(const tsol_tpent_t *tpent, int kmflags)
56645916cd2Sjpk {
56745916cd2Sjpk 	tsol_tpc_t *tpc;
56845916cd2Sjpk 	mod_hash_val_t hv;
56945916cd2Sjpk 
57045916cd2Sjpk 	/*
57145916cd2Sjpk 	 * We intentionally allocate a new entry before taking the lock on the
57245916cd2Sjpk 	 * entire database.
57345916cd2Sjpk 	 */
57445916cd2Sjpk 	if ((tpc = kmem_zalloc(sizeof (*tpc), kmflags)) == NULL)
57545916cd2Sjpk 		return (NULL);
57645916cd2Sjpk 
57745916cd2Sjpk 	mutex_enter(&tpc_lock);
57845916cd2Sjpk 	if (mod_hash_find(tpc_name_hash, (mod_hash_key_t)tpent->name,
57945916cd2Sjpk 	    &hv) == 0) {
58045916cd2Sjpk 		tsol_tpc_t *found_tpc = (tsol_tpc_t *)hv;
58145916cd2Sjpk 
58245916cd2Sjpk 		found_tpc->tpc_invalid = 1;
58345916cd2Sjpk 		(void) mod_hash_destroy(tpc_name_hash,
58445916cd2Sjpk 		    (mod_hash_key_t)tpent->name);
58545916cd2Sjpk 		TPC_RELE(found_tpc);
58645916cd2Sjpk 	}
58745916cd2Sjpk 
58845916cd2Sjpk 	mutex_init(&tpc->tpc_lock, NULL, MUTEX_DEFAULT, NULL);
58945916cd2Sjpk 	/* tsol_tpent_t is the same on LP64 and ILP32 */
59045916cd2Sjpk 	bcopy(tpent, &tpc->tpc_tp, sizeof (tpc->tpc_tp));
59145916cd2Sjpk 	(void) mod_hash_insert(tpc_name_hash, (mod_hash_key_t)tpc->tpc_tp.name,
59245916cd2Sjpk 	    (mod_hash_val_t)tpc);
59345916cd2Sjpk 	TPC_HOLD(tpc);
59445916cd2Sjpk 	mutex_exit(&tpc_lock);
59545916cd2Sjpk 
59645916cd2Sjpk 	return (tpc);
59745916cd2Sjpk }
59845916cd2Sjpk 
59945916cd2Sjpk static int
tnrhtp_delete(const char * tname)60045916cd2Sjpk tnrhtp_delete(const char *tname)
60145916cd2Sjpk {
60245916cd2Sjpk 	tsol_tpc_t *tpc;
60345916cd2Sjpk 	mod_hash_val_t hv;
60445916cd2Sjpk 	int retv = ENOENT;
60545916cd2Sjpk 
60645916cd2Sjpk 	mutex_enter(&tpc_lock);
60745916cd2Sjpk 	if (mod_hash_find(tpc_name_hash, (mod_hash_key_t)tname, &hv) == 0) {
60845916cd2Sjpk 		tpc = (tsol_tpc_t *)hv;
60945916cd2Sjpk 		ASSERT(!tpc->tpc_invalid);
61045916cd2Sjpk 		tpc->tpc_invalid = 1;
61145916cd2Sjpk 		(void) mod_hash_destroy(tpc_name_hash,
61245916cd2Sjpk 		    (mod_hash_key_t)tpc->tpc_tp.name);
61345916cd2Sjpk 		TPC_RELE(tpc);
61445916cd2Sjpk 		retv = 0;
61545916cd2Sjpk 	}
61645916cd2Sjpk 	mutex_exit(&tpc_lock);
61745916cd2Sjpk 	return (retv);
61845916cd2Sjpk }
61945916cd2Sjpk 
62045916cd2Sjpk /* ARGSUSED */
62145916cd2Sjpk static uint_t
tpc_delete(mod_hash_key_t key,mod_hash_val_t * val,void * arg)62245916cd2Sjpk tpc_delete(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
62345916cd2Sjpk {
62445916cd2Sjpk 	tsol_tpc_t *tpc = (tsol_tpc_t *)val;
62545916cd2Sjpk 
62645916cd2Sjpk 	ASSERT(!tpc->tpc_invalid);
62745916cd2Sjpk 	tpc->tpc_invalid = 1;
62845916cd2Sjpk 	TPC_RELE(tpc);
62945916cd2Sjpk 	return (MH_WALK_CONTINUE);
63045916cd2Sjpk }
63145916cd2Sjpk 
63245916cd2Sjpk static void
tnrhtp_flush(void)63345916cd2Sjpk tnrhtp_flush(void)
63445916cd2Sjpk {
63545916cd2Sjpk 	mutex_enter(&tpc_lock);
63645916cd2Sjpk 	mod_hash_walk(tpc_name_hash, tpc_delete, NULL);
63745916cd2Sjpk 	mod_hash_clear(tpc_name_hash);
63845916cd2Sjpk 	mutex_exit(&tpc_lock);
63945916cd2Sjpk }
64045916cd2Sjpk 
64145916cd2Sjpk static int
tnrhtp(int cmd,void * buf)64245916cd2Sjpk tnrhtp(int cmd, void *buf)
64345916cd2Sjpk {
64445916cd2Sjpk 	int retv;
64545916cd2Sjpk 	int type;
64645916cd2Sjpk 	tsol_tpent_t rhtpent;
64745916cd2Sjpk 	tsol_tpc_t *tpc;
64845916cd2Sjpk 
64945916cd2Sjpk 	/* Make sure user has sufficient privilege */
65045916cd2Sjpk 	if (cmd != TNDB_GET &&
65145916cd2Sjpk 	    (retv = secpolicy_net_config(CRED(), B_FALSE)) != 0)
65245916cd2Sjpk 		return (set_errno(retv));
65345916cd2Sjpk 
65445916cd2Sjpk 	/*
65545916cd2Sjpk 	 * Get argument.  Note that tsol_tpent_t is the same on LP64 and ILP32,
65645916cd2Sjpk 	 * so no special handling is required.
65745916cd2Sjpk 	 */
65845916cd2Sjpk 	if (cmd != TNDB_FLUSH) {
65945916cd2Sjpk 		if (copyin(buf, &rhtpent, sizeof (rhtpent)) != 0) {
66045916cd2Sjpk 			DTRACE_PROBE(tx__tndb__l0__tnrhtp__copyin);
66145916cd2Sjpk 			return (set_errno(EFAULT));
66245916cd2Sjpk 		}
66345916cd2Sjpk 
66445916cd2Sjpk 		/*
66545916cd2Sjpk 		 * Don't let the user give us a bogus (unterminated) template
66645916cd2Sjpk 		 * name.
66745916cd2Sjpk 		 */
66845916cd2Sjpk 		if (!template_name_ok(rhtpent.name))
66945916cd2Sjpk 			return (set_errno(EINVAL));
67045916cd2Sjpk 	}
67145916cd2Sjpk 
67245916cd2Sjpk 	switch (cmd) {
67345916cd2Sjpk 	case TNDB_LOAD:
67445916cd2Sjpk 		DTRACE_PROBE1(tx__tndb__l2__tnrhtp__tndbload, char *,
67545916cd2Sjpk 			rhtpent.name);
67645916cd2Sjpk 		type = rhtpent.host_type;
67745916cd2Sjpk 		if (type != UNLABELED && type != SUN_CIPSO) {
67845916cd2Sjpk 			retv = EINVAL;
67945916cd2Sjpk 			break;
68045916cd2Sjpk 		}
68145916cd2Sjpk 
68245916cd2Sjpk 		if (tnrhtp_create(&rhtpent, KM_NOSLEEP) == NULL)
68345916cd2Sjpk 			retv = ENOMEM;
68445916cd2Sjpk 		else
68545916cd2Sjpk 			retv = 0;
68645916cd2Sjpk 		break;
68745916cd2Sjpk 
68845916cd2Sjpk 	case TNDB_GET:
68945916cd2Sjpk 		DTRACE_PROBE1(tx__tndb__l4__tnrhtp__tndbget, char *,
69045916cd2Sjpk 		    rhtpent.name);
69145916cd2Sjpk 		tpc = tnrhtp_find(rhtpent.name, tpc_name_hash);
69245916cd2Sjpk 		if (tpc == NULL) {
69345916cd2Sjpk 			retv = ENOENT;
69445916cd2Sjpk 			break;
69545916cd2Sjpk 		}
69645916cd2Sjpk 
69745916cd2Sjpk 		/* Copy out result */
69845916cd2Sjpk 		if (copyout(&tpc->tpc_tp, buf, sizeof (tpc->tpc_tp)) != 0) {
69945916cd2Sjpk 			DTRACE_PROBE(tx__tndb__l0__tnrhtp__copyout);
70045916cd2Sjpk 			retv = EFAULT;
70145916cd2Sjpk 		} else {
70245916cd2Sjpk 			retv = 0;
70345916cd2Sjpk 		}
70445916cd2Sjpk 		TPC_RELE(tpc);
70545916cd2Sjpk 		break;
70645916cd2Sjpk 
70745916cd2Sjpk 	case TNDB_DELETE:
70845916cd2Sjpk 		DTRACE_PROBE1(tx__tndb__l4__tnrhtp__tndbdelete, char *,
70945916cd2Sjpk 		    rhtpent.name);
71045916cd2Sjpk 		retv = tnrhtp_delete(rhtpent.name);
71145916cd2Sjpk 		break;
71245916cd2Sjpk 
71345916cd2Sjpk 	case TNDB_FLUSH:
71445916cd2Sjpk 		DTRACE_PROBE(tx__tndb__l4__tnrhtp__flush);
71545916cd2Sjpk 		tnrhtp_flush();
71645916cd2Sjpk 		retv = 0;
71745916cd2Sjpk 		break;
71845916cd2Sjpk 
71945916cd2Sjpk 	default:
72045916cd2Sjpk 		DTRACE_PROBE1(tx__tndb__l0__tnrhtp__unknowncmd, int,
72145916cd2Sjpk 		    cmd);
72245916cd2Sjpk 		retv = EOPNOTSUPP;
72345916cd2Sjpk 		break;
72445916cd2Sjpk 	}
72545916cd2Sjpk 
72645916cd2Sjpk 	if (retv != 0)
72745916cd2Sjpk 		return (set_errno(retv));
72845916cd2Sjpk 	else
72945916cd2Sjpk 		return (retv);
73045916cd2Sjpk }
73145916cd2Sjpk 
73245916cd2Sjpk /*
73345916cd2Sjpk  * MLP entry ordering logic
73445916cd2Sjpk  *
73545916cd2Sjpk  * There are two loops in this routine.  The first loop finds the entry that
73645916cd2Sjpk  * either logically follows the new entry to be inserted, or is the entry that
73745916cd2Sjpk  * precedes and overlaps the new entry, or is NULL to mean end-of-list.  This
73845916cd2Sjpk  * is 'tme.'  The second loop scans ahead from that point to find any overlap
73945916cd2Sjpk  * on the front or back of this new entry.
74045916cd2Sjpk  *
74145916cd2Sjpk  * For the first loop, we can have the following cases in the list (note that
74245916cd2Sjpk  * the port-portmax range is inclusive):
74345916cd2Sjpk  *
74445916cd2Sjpk  *	       port   portmax
74545916cd2Sjpk  *		+--------+
74645916cd2Sjpk  * 1: +------+ ................... precedes; skip to next
74745916cd2Sjpk  * 2:	    +------+ ............. overlaps; stop here if same protocol
74845916cd2Sjpk  * 3:		+------+ ......... overlaps; stop if same or higher protocol
74945916cd2Sjpk  * 4:		    +-------+ .... overlaps or succeeds; stop here
75045916cd2Sjpk  *
75145916cd2Sjpk  * For the second loop, we can have the following cases (note that we need not
75245916cd2Sjpk  * care about other protocol entries at this point, because we're only looking
75345916cd2Sjpk  * for overlap, not an insertion point):
75445916cd2Sjpk  *
75545916cd2Sjpk  *	       port   portmax
75645916cd2Sjpk  *		+--------+
75745916cd2Sjpk  * 5:	    +------+ ............. overlaps; stop if same protocol
75845916cd2Sjpk  * 6:		+------+ ......... overlaps; stop if same protocol
75945916cd2Sjpk  * 7:		    +-------+ .... overlaps; stop if same protocol
76045916cd2Sjpk  * 8:			   +---+ . follows; search is done
76145916cd2Sjpk  *
76245916cd2Sjpk  * In other words, this second search needs to consider only whether the entry
76345916cd2Sjpk  * has a starting port number that's greater than the end point of the new
76445916cd2Sjpk  * entry.  All others are overlaps.
76545916cd2Sjpk  */
76645916cd2Sjpk static int
mlp_add_del(tsol_mlp_list_t * mlpl,zoneid_t zoneid,uint8_t proto,uint16_t port,uint16_t portmax,boolean_t addflag)76745916cd2Sjpk mlp_add_del(tsol_mlp_list_t *mlpl, zoneid_t zoneid, uint8_t proto,
76845916cd2Sjpk     uint16_t port, uint16_t portmax, boolean_t addflag)
76945916cd2Sjpk {
77045916cd2Sjpk 	int retv;
77145916cd2Sjpk 	tsol_mlp_entry_t *tme, *tme2, *newent;
77245916cd2Sjpk 
77345916cd2Sjpk 	if (addflag) {
77445916cd2Sjpk 		if ((newent = kmem_zalloc(sizeof (*newent), KM_NOSLEEP)) ==
77545916cd2Sjpk 		    NULL)
77645916cd2Sjpk 			return (ENOMEM);
77745916cd2Sjpk 	} else {
77845916cd2Sjpk 		newent = NULL;
77945916cd2Sjpk 	}
78045916cd2Sjpk 	rw_enter(&mlpl->mlpl_rwlock, RW_WRITER);
78145916cd2Sjpk 
78245916cd2Sjpk 	/*
78345916cd2Sjpk 	 * First loop: find logical insertion point or overlap.  Table is kept
78445916cd2Sjpk 	 * in order of port number first, and then, within that, by protocol
78545916cd2Sjpk 	 * number.
78645916cd2Sjpk 	 */
78745916cd2Sjpk 	for (tme = mlpl->mlpl_first; tme != NULL; tme = tme->mlpe_next) {
78845916cd2Sjpk 		/* logically next (case 4) */
78945916cd2Sjpk 		if (tme->mlpe_mlp.mlp_port > port)
79045916cd2Sjpk 			break;
79145916cd2Sjpk 		/* if this is logically next or overlap, then stop (case 3) */
79245916cd2Sjpk 		if (tme->mlpe_mlp.mlp_port == port &&
79345916cd2Sjpk 		    tme->mlpe_mlp.mlp_ipp >= proto)
79445916cd2Sjpk 			break;
79545916cd2Sjpk 		/* earlier or same port sequence; check for overlap (case 2) */
79645916cd2Sjpk 		if (tme->mlpe_mlp.mlp_ipp == proto &&
79745916cd2Sjpk 		    tme->mlpe_mlp.mlp_port_upper >= port)
79845916cd2Sjpk 			break;
79945916cd2Sjpk 		/* otherwise, loop again (case 1) */
80045916cd2Sjpk 	}
80145916cd2Sjpk 
80245916cd2Sjpk 	/* Second loop: scan ahead for overlap */
80345916cd2Sjpk 	for (tme2 = tme; tme2 != NULL; tme2 = tme2->mlpe_next) {
80445916cd2Sjpk 		/* check if entry follows; no overlap (case 8) */
80545916cd2Sjpk 		if (tme2->mlpe_mlp.mlp_port > portmax) {
80645916cd2Sjpk 			tme2 = NULL;
80745916cd2Sjpk 			break;
80845916cd2Sjpk 		}
80945916cd2Sjpk 		/* only exact protocol matches at this point (cases 5-7) */
81045916cd2Sjpk 		if (tme2->mlpe_mlp.mlp_ipp == proto)
81145916cd2Sjpk 			break;
81245916cd2Sjpk 	}
81345916cd2Sjpk 
81445916cd2Sjpk 	retv = 0;
81545916cd2Sjpk 	if (addflag) {
81645916cd2Sjpk 		if (tme2 != NULL) {
81745916cd2Sjpk 			retv = EEXIST;
81845916cd2Sjpk 		} else {
81945916cd2Sjpk 			newent->mlpe_zoneid = zoneid;
82045916cd2Sjpk 			newent->mlpe_mlp.mlp_ipp = proto;
82145916cd2Sjpk 			newent->mlpe_mlp.mlp_port = port;
82245916cd2Sjpk 			newent->mlpe_mlp.mlp_port_upper = portmax;
82345916cd2Sjpk 			newent->mlpe_next = tme;
82445916cd2Sjpk 			if (tme == NULL) {
82545916cd2Sjpk 				tme2 = mlpl->mlpl_last;
82645916cd2Sjpk 				mlpl->mlpl_last = newent;
82745916cd2Sjpk 			} else {
82845916cd2Sjpk 				tme2 = tme->mlpe_prev;
82945916cd2Sjpk 				tme->mlpe_prev = newent;
83045916cd2Sjpk 			}
83145916cd2Sjpk 			newent->mlpe_prev = tme2;
83245916cd2Sjpk 			if (tme2 == NULL)
83345916cd2Sjpk 				mlpl->mlpl_first = newent;
83445916cd2Sjpk 			else
83545916cd2Sjpk 				tme2->mlpe_next = newent;
83645916cd2Sjpk 			newent = NULL;
83745916cd2Sjpk 		}
83845916cd2Sjpk 	} else {
83945916cd2Sjpk 		if (tme2 == NULL || tme2->mlpe_mlp.mlp_port != port ||
84045916cd2Sjpk 		    tme2->mlpe_mlp.mlp_port_upper != portmax) {
84145916cd2Sjpk 			retv = ENOENT;
84245916cd2Sjpk 		} else {
84345916cd2Sjpk 			if ((tme2 = tme->mlpe_prev) == NULL)
84445916cd2Sjpk 				mlpl->mlpl_first = tme->mlpe_next;
84545916cd2Sjpk 			else
84645916cd2Sjpk 				tme2->mlpe_next = tme->mlpe_next;
84745916cd2Sjpk 			if ((tme2 = tme->mlpe_next) == NULL)
84845916cd2Sjpk 				mlpl->mlpl_last = tme->mlpe_prev;
84945916cd2Sjpk 			else
85045916cd2Sjpk 				tme2->mlpe_prev = tme->mlpe_prev;
85145916cd2Sjpk 			newent = tme;
85245916cd2Sjpk 		}
85345916cd2Sjpk 	}
85445916cd2Sjpk 	rw_exit(&mlpl->mlpl_rwlock);
85545916cd2Sjpk 
85645916cd2Sjpk 	if (newent != NULL)
85745916cd2Sjpk 		kmem_free(newent, sizeof (*newent));
85845916cd2Sjpk 
85945916cd2Sjpk 	return (retv);
86045916cd2Sjpk }
86145916cd2Sjpk 
86245916cd2Sjpk /*
86345916cd2Sjpk  * Add or remove an MLP entry from the database so that the classifier can find
86445916cd2Sjpk  * it.
86545916cd2Sjpk  *
86645916cd2Sjpk  * Note: port number is in host byte order.
86745916cd2Sjpk  */
86845916cd2Sjpk int
tsol_mlp_anon(zone_t * zone,mlp_type_t mlptype,uchar_t proto,uint16_t port,boolean_t addflag)86945916cd2Sjpk tsol_mlp_anon(zone_t *zone, mlp_type_t mlptype, uchar_t proto, uint16_t port,
87045916cd2Sjpk     boolean_t addflag)
87145916cd2Sjpk {
87245916cd2Sjpk 	int retv = 0;
87345916cd2Sjpk 
87445916cd2Sjpk 	if (mlptype == mlptBoth || mlptype == mlptPrivate)
87545916cd2Sjpk 		retv = mlp_add_del(&zone->zone_mlps, zone->zone_id, proto,
87645916cd2Sjpk 		    port, port, addflag);
87745916cd2Sjpk 	if ((retv == 0 || !addflag) &&
87845916cd2Sjpk 	    (mlptype == mlptBoth || mlptype == mlptShared)) {
87945916cd2Sjpk 		retv = mlp_add_del(&shared_mlps, zone->zone_id, proto, port,
88045916cd2Sjpk 		    port, addflag);
88145916cd2Sjpk 		if (retv != 0 && addflag)
88245916cd2Sjpk 			(void) mlp_add_del(&zone->zone_mlps, zone->zone_id,
88345916cd2Sjpk 			    proto, port, port, B_FALSE);
88445916cd2Sjpk 	}
88545916cd2Sjpk 	return (retv);
88645916cd2Sjpk }
88745916cd2Sjpk 
88845916cd2Sjpk static void
mlp_flush(tsol_mlp_list_t * mlpl,zoneid_t zoneid)88945916cd2Sjpk mlp_flush(tsol_mlp_list_t *mlpl, zoneid_t zoneid)
89045916cd2Sjpk {
89145916cd2Sjpk 	tsol_mlp_entry_t *tme, *tme2, *tmnext;
89245916cd2Sjpk 
89345916cd2Sjpk 	rw_enter(&mlpl->mlpl_rwlock, RW_WRITER);
89445916cd2Sjpk 	for (tme = mlpl->mlpl_first; tme != NULL; tme = tmnext) {
89545916cd2Sjpk 		tmnext = tme->mlpe_next;
89645916cd2Sjpk 		if (zoneid == ALL_ZONES || tme->mlpe_zoneid == zoneid) {
89745916cd2Sjpk 			if ((tme2 = tme->mlpe_prev) == NULL)
89845916cd2Sjpk 				mlpl->mlpl_first = tmnext;
89945916cd2Sjpk 			else
90045916cd2Sjpk 				tme2->mlpe_next = tmnext;
90145916cd2Sjpk 			if (tmnext == NULL)
90245916cd2Sjpk 				mlpl->mlpl_last = tme2;
90345916cd2Sjpk 			else
90445916cd2Sjpk 				tmnext->mlpe_prev = tme2;
90545916cd2Sjpk 			kmem_free(tme, sizeof (*tme));
90645916cd2Sjpk 		}
90745916cd2Sjpk 	}
90845916cd2Sjpk 	rw_exit(&mlpl->mlpl_rwlock);
90945916cd2Sjpk }
91045916cd2Sjpk 
91145916cd2Sjpk /*
91245916cd2Sjpk  * Note: user supplies port numbers in host byte order.
91345916cd2Sjpk  */
91445916cd2Sjpk static int
tnmlp(int cmd,void * buf)91545916cd2Sjpk tnmlp(int cmd, void *buf)
91645916cd2Sjpk {
91745916cd2Sjpk 	int retv;
91845916cd2Sjpk 	tsol_mlpent_t tsme;
91945916cd2Sjpk 	zone_t *zone;
92045916cd2Sjpk 	tsol_mlp_list_t *mlpl;
92145916cd2Sjpk 	tsol_mlp_entry_t *tme;
92245916cd2Sjpk 
92345916cd2Sjpk 	/* Make sure user has sufficient privilege */
92445916cd2Sjpk 	if (cmd != TNDB_GET &&
92545916cd2Sjpk 	    (retv = secpolicy_net_config(CRED(), B_FALSE)) != 0)
92645916cd2Sjpk 		return (set_errno(retv));
92745916cd2Sjpk 
92845916cd2Sjpk 	/*
92945916cd2Sjpk 	 * Get argument.  Note that tsol_mlpent_t is the same on LP64 and
93045916cd2Sjpk 	 * ILP32, so no special handling is required.
93145916cd2Sjpk 	 */
93245916cd2Sjpk 	if (copyin(buf, &tsme, sizeof (tsme)) != 0) {
93345916cd2Sjpk 		DTRACE_PROBE(tx__tndb__l0__tnmlp__copyin);
93445916cd2Sjpk 		return (set_errno(EFAULT));
93545916cd2Sjpk 	}
93645916cd2Sjpk 
93745916cd2Sjpk 	/* MLPs on shared IP addresses */
93845916cd2Sjpk 	if (tsme.tsme_flags & TSOL_MEF_SHARED) {
93945916cd2Sjpk 		zone = NULL;
94045916cd2Sjpk 		mlpl = &shared_mlps;
94145916cd2Sjpk 	} else {
94245916cd2Sjpk 		zone = zone_find_by_id(tsme.tsme_zoneid);
94345916cd2Sjpk 		if (zone == NULL)
94445916cd2Sjpk 			return (set_errno(EINVAL));
94545916cd2Sjpk 		mlpl = &zone->zone_mlps;
94645916cd2Sjpk 	}
94745916cd2Sjpk 	if (tsme.tsme_mlp.mlp_port_upper == 0)
94845916cd2Sjpk 		tsme.tsme_mlp.mlp_port_upper = tsme.tsme_mlp.mlp_port;
94945916cd2Sjpk 
95045916cd2Sjpk 	switch (cmd) {
95145916cd2Sjpk 	case TNDB_LOAD:
95245916cd2Sjpk 		DTRACE_PROBE1(tx__tndb__l2__tnmlp__tndbload,
95345916cd2Sjpk 		    tsol_mlpent_t *, &tsme);
95445916cd2Sjpk 		if (tsme.tsme_mlp.mlp_ipp == 0 || tsme.tsme_mlp.mlp_port == 0 ||
95545916cd2Sjpk 		    tsme.tsme_mlp.mlp_port > tsme.tsme_mlp.mlp_port_upper) {
95645916cd2Sjpk 			retv = EINVAL;
95745916cd2Sjpk 			break;
95845916cd2Sjpk 		}
95945916cd2Sjpk 		retv = mlp_add_del(mlpl, tsme.tsme_zoneid,
96045916cd2Sjpk 		    tsme.tsme_mlp.mlp_ipp, tsme.tsme_mlp.mlp_port,
96145916cd2Sjpk 		    tsme.tsme_mlp.mlp_port_upper, B_TRUE);
96245916cd2Sjpk 		break;
96345916cd2Sjpk 
96445916cd2Sjpk 	case TNDB_GET:
96545916cd2Sjpk 		DTRACE_PROBE1(tx__tndb__l2__tnmlp__tndbget,
96645916cd2Sjpk 		    tsol_mlpent_t *, &tsme);
96745916cd2Sjpk 
96845916cd2Sjpk 		/*
96945916cd2Sjpk 		 * Search for the requested element or, failing that, the one
97045916cd2Sjpk 		 * that's logically next in the sequence.
97145916cd2Sjpk 		 */
97245916cd2Sjpk 		rw_enter(&mlpl->mlpl_rwlock, RW_READER);
97345916cd2Sjpk 		for (tme = mlpl->mlpl_first; tme != NULL;
97445916cd2Sjpk 		    tme = tme->mlpe_next) {
97545916cd2Sjpk 			if (tsme.tsme_zoneid != ALL_ZONES &&
97645916cd2Sjpk 			    tme->mlpe_zoneid != tsme.tsme_zoneid)
97745916cd2Sjpk 				continue;
97845916cd2Sjpk 			if (tme->mlpe_mlp.mlp_ipp >= tsme.tsme_mlp.mlp_ipp &&
97945916cd2Sjpk 			    tme->mlpe_mlp.mlp_port == tsme.tsme_mlp.mlp_port)
98045916cd2Sjpk 				break;
98145916cd2Sjpk 			if (tme->mlpe_mlp.mlp_port > tsme.tsme_mlp.mlp_port)
98245916cd2Sjpk 				break;
98345916cd2Sjpk 		}
98445916cd2Sjpk 		if (tme == NULL) {
98545916cd2Sjpk 			retv = ENOENT;
98645916cd2Sjpk 		} else {
98745916cd2Sjpk 			tsme.tsme_zoneid = tme->mlpe_zoneid;
98845916cd2Sjpk 			tsme.tsme_mlp = tme->mlpe_mlp;
98945916cd2Sjpk 			retv = 0;
99045916cd2Sjpk 		}
99145916cd2Sjpk 		rw_exit(&mlpl->mlpl_rwlock);
99245916cd2Sjpk 		break;
99345916cd2Sjpk 
99445916cd2Sjpk 	case TNDB_DELETE:
99545916cd2Sjpk 		DTRACE_PROBE1(tx__tndb__l4__tnmlp__tndbdelete,
99645916cd2Sjpk 		    tsol_mlpent_t *, &tsme);
99745916cd2Sjpk 		retv = mlp_add_del(mlpl, tsme.tsme_zoneid,
99845916cd2Sjpk 		    tsme.tsme_mlp.mlp_ipp, tsme.tsme_mlp.mlp_port,
99945916cd2Sjpk 		    tsme.tsme_mlp.mlp_port_upper, B_FALSE);
100045916cd2Sjpk 		break;
100145916cd2Sjpk 
100245916cd2Sjpk 	case TNDB_FLUSH:
100345916cd2Sjpk 		DTRACE_PROBE1(tx__tndb__l4__tnmlp__tndbflush,
100445916cd2Sjpk 		    tsol_mlpent_t *, &tsme);
100545916cd2Sjpk 		mlp_flush(mlpl, ALL_ZONES);
100645916cd2Sjpk 		mlp_flush(&shared_mlps, tsme.tsme_zoneid);
100745916cd2Sjpk 		retv = 0;
100845916cd2Sjpk 		break;
100945916cd2Sjpk 
101045916cd2Sjpk 	default:
101145916cd2Sjpk 		DTRACE_PROBE1(tx__tndb__l0__tnmlp__unknowncmd, int,
101245916cd2Sjpk 		    cmd);
101345916cd2Sjpk 		retv = EOPNOTSUPP;
101445916cd2Sjpk 		break;
101545916cd2Sjpk 	}
101645916cd2Sjpk 
101745916cd2Sjpk 	if (zone != NULL)
101845916cd2Sjpk 		zone_rele(zone);
101945916cd2Sjpk 
102045916cd2Sjpk 	if (cmd == TNDB_GET && retv == 0) {
102145916cd2Sjpk 		/* Copy out result */
102245916cd2Sjpk 		if (copyout(&tsme, buf, sizeof (tsme)) != 0) {
102345916cd2Sjpk 			DTRACE_PROBE(tx__tndb__l0__tnmlp__copyout);
102445916cd2Sjpk 			retv = EFAULT;
102545916cd2Sjpk 		}
102645916cd2Sjpk 	}
102745916cd2Sjpk 
102845916cd2Sjpk 	if (retv != 0)
102945916cd2Sjpk 		return (set_errno(retv));
103045916cd2Sjpk 	else
103145916cd2Sjpk 		return (retv);
103245916cd2Sjpk }
103345916cd2Sjpk 
103445916cd2Sjpk /*
103545916cd2Sjpk  * Returns a tnrhc matching the addr address.
103645916cd2Sjpk  * The returned rhc's refcnt is incremented.
103745916cd2Sjpk  */
103845916cd2Sjpk tsol_tnrhc_t *
find_rhc(const void * addr,uchar_t version,boolean_t staleok)1039*bfabfc35Skp find_rhc(const void *addr, uchar_t version, boolean_t staleok)
104045916cd2Sjpk {
104145916cd2Sjpk 	tsol_tnrhc_t *rh = NULL;
1042*bfabfc35Skp 	tsol_tnrhc_t *new;
1043*bfabfc35Skp 	tsol_tpc_t *tpc;
104445916cd2Sjpk 	tnrhc_hash_t *tnrhc_hash;
104545916cd2Sjpk 	ipaddr_t tmpmask;
1046*bfabfc35Skp 	in_addr_t *in4 = (in_addr_t *)addr;
1047*bfabfc35Skp 	in6_addr_t *in6 = (in6_addr_t *)addr;
1048*bfabfc35Skp 	in_addr_t tmpin4;
1049*bfabfc35Skp 	in6_addr_t tmpmask6;
105045916cd2Sjpk 	int	i;
1051*bfabfc35Skp 	int	prefix;
105245916cd2Sjpk 
1053*bfabfc35Skp 	/*
1054*bfabfc35Skp 	 * An IPv4-mapped IPv6 address is really an IPv4 address
1055*bfabfc35Skp 	 * in IPv6 format.
1056*bfabfc35Skp 	 */
1057*bfabfc35Skp 	if (version == IPV6_VERSION &&
1058*bfabfc35Skp 	    IN6_IS_ADDR_V4MAPPED(in6)) {
1059*bfabfc35Skp 		IN6_V4MAPPED_TO_IPADDR(in6, tmpin4);
1060*bfabfc35Skp 		version = IPV4_VERSION;
1061*bfabfc35Skp 		in4 = &tmpin4;
1062*bfabfc35Skp 	}
106345916cd2Sjpk 
1064*bfabfc35Skp 	/*
1065*bfabfc35Skp 	 * Search the tnrh hash table for each prefix length,
1066*bfabfc35Skp 	 * starting at longest prefix length, until a matching
1067*bfabfc35Skp 	 * rhc entry is found.
1068*bfabfc35Skp 	 */
1069*bfabfc35Skp 	if (version == IPV4_VERSION) {
1070*bfabfc35Skp 		for (i = (TSOL_MASK_TABLE_SIZE - 1); i >= 0; i--) {
107145916cd2Sjpk 
1072*bfabfc35Skp 			if ((tnrhc_table[i]) == NULL)
1073*bfabfc35Skp 				continue;
107445916cd2Sjpk 
1075*bfabfc35Skp 			tmpmask = tsol_plen_to_mask(i);
1076*bfabfc35Skp 			tnrhc_hash = &tnrhc_table[i][
1077*bfabfc35Skp 			    TSOL_ADDR_HASH(*in4 & tmpmask, TNRHC_SIZE)];
1078*bfabfc35Skp 
1079*bfabfc35Skp 			mutex_enter(&tnrhc_hash->tnrh_lock);
1080*bfabfc35Skp 			for (rh = tnrhc_hash->tnrh_list; rh != NULL;
1081*bfabfc35Skp 			    rh = rh->rhc_next) {
1082*bfabfc35Skp 				if ((rh->rhc_host.ta_family == AF_INET) &&
1083*bfabfc35Skp 				    ((rh->rhc_host.ta_addr_v4.s_addr &
1084*bfabfc35Skp 				    tmpmask) == (*in4 & tmpmask))) {
1085*bfabfc35Skp 					prefix = i;
1086*bfabfc35Skp 					TNRHC_HOLD(rh);
1087*bfabfc35Skp 					break;
1088*bfabfc35Skp 				}
108945916cd2Sjpk 			}
1090*bfabfc35Skp 			mutex_exit(&tnrhc_hash->tnrh_lock);
1091*bfabfc35Skp 			if (rh != NULL)
1092*bfabfc35Skp 				break;
109345916cd2Sjpk 		}
1094*bfabfc35Skp 		if (rh == NULL)
1095*bfabfc35Skp 			DTRACE_PROBE1(tx__tndb__l1__findrhc__norhv4ent,
1096*bfabfc35Skp 			    in_addr_t *, in4);
1097*bfabfc35Skp 	} else {
1098*bfabfc35Skp 		for (i = (TSOL_MASK_TABLE_SIZE_V6 - 1); i >= 0; i--) {
1099*bfabfc35Skp 			if ((tnrhc_table_v6[i]) == NULL)
1100*bfabfc35Skp 				continue;
110145916cd2Sjpk 
1102*bfabfc35Skp 			tsol_plen_to_mask_v6(i, &tmpmask6);
1103*bfabfc35Skp 			tnrhc_hash = &tnrhc_table_v6[i][
1104*bfabfc35Skp 			    TSOL_ADDR_MASK_HASH_V6(*in6, tmpmask6, TNRHC_SIZE)];
1105*bfabfc35Skp 
1106*bfabfc35Skp 			mutex_enter(&tnrhc_hash->tnrh_lock);
1107*bfabfc35Skp 			for (rh = tnrhc_hash->tnrh_list; rh != NULL;
1108*bfabfc35Skp 			    rh = rh->rhc_next) {
1109*bfabfc35Skp 				if ((rh->rhc_host.ta_family == AF_INET6) &&
1110*bfabfc35Skp 				    V6_MASK_EQ_2(rh->rhc_host.ta_addr_v6,
1111*bfabfc35Skp 				    tmpmask6, *in6)) {
1112*bfabfc35Skp 					prefix = i;
1113*bfabfc35Skp 					TNRHC_HOLD(rh);
1114*bfabfc35Skp 					break;
1115*bfabfc35Skp 				}
1116*bfabfc35Skp 			}
1117*bfabfc35Skp 			mutex_exit(&tnrhc_hash->tnrh_lock);
1118*bfabfc35Skp 			if (rh != NULL)
1119*bfabfc35Skp 				break;
1120*bfabfc35Skp 		}
1121*bfabfc35Skp 		if (rh == NULL)
1122*bfabfc35Skp 			DTRACE_PROBE1(tx__tndb__l1__findrhc__norhv6ent,
1123*bfabfc35Skp 			    in6_addr_t *, in6);
112445916cd2Sjpk 	}
112545916cd2Sjpk 
1126*bfabfc35Skp 	/*
1127*bfabfc35Skp 	 * Does the tnrh entry point to a stale template?
1128*bfabfc35Skp 	 * This can happen any time the user deletes or modifies
1129*bfabfc35Skp 	 * a template that has existing tnrh entries pointing
1130*bfabfc35Skp 	 * to it. Try to find a new version of the template.
1131*bfabfc35Skp 	 * If there is no template, then just give up.
1132*bfabfc35Skp 	 * If the template exists, reload the tnrh entry.
1133*bfabfc35Skp 	 */
1134*bfabfc35Skp 	if (rh != NULL && rh->rhc_tpc->tpc_invalid) {
1135*bfabfc35Skp 		tpc = tnrhtp_find(rh->rhc_tpc->tpc_tp.name, tpc_name_hash);
1136*bfabfc35Skp 		if (tpc == NULL) {
1137*bfabfc35Skp 			if (!staleok) {
1138*bfabfc35Skp 				DTRACE_PROBE2(tx__tndb__l1__findrhc__staletpc,
1139*bfabfc35Skp 				    tsol_tnrhc_t *, rh, tsol_tpc_t *,
1140*bfabfc35Skp 				    rh->rhc_tpc);
1141*bfabfc35Skp 				TNRHC_RELE(rh);
1142*bfabfc35Skp 				rh = NULL;
1143*bfabfc35Skp 			}
1144*bfabfc35Skp 		} else {
1145*bfabfc35Skp 			ASSERT(tpc->tpc_tp.host_type == UNLABELED ||
1146*bfabfc35Skp 			    tpc->tpc_tp.host_type == SUN_CIPSO);
1147*bfabfc35Skp 
1148*bfabfc35Skp 			if ((new = kmem_zalloc(sizeof (*new),
1149*bfabfc35Skp 			    KM_NOSLEEP)) == NULL) {
1150*bfabfc35Skp 				DTRACE_PROBE(tx__tndb__l1__findrhc__nomem);
1151*bfabfc35Skp 				TNRHC_RELE(rh);
1152*bfabfc35Skp 				TPC_RELE(tpc);
1153*bfabfc35Skp 				return (NULL);
1154*bfabfc35Skp 			}
115545916cd2Sjpk 
1156*bfabfc35Skp 			mutex_init(&new->rhc_lock, NULL, MUTEX_DEFAULT, NULL);
1157*bfabfc35Skp 			new->rhc_host = rh->rhc_host;
1158*bfabfc35Skp 			new->rhc_tpc = tpc;
1159*bfabfc35Skp 			new->rhc_isbcast = rh->rhc_isbcast;
1160*bfabfc35Skp 			new->rhc_local = rh->rhc_local;
1161*bfabfc35Skp 			TNRHC_RELE(rh);
1162*bfabfc35Skp 			rh = new;
116345916cd2Sjpk 
1164*bfabfc35Skp 			/*
1165*bfabfc35Skp 			 * This function increments the tnrh entry ref count
1166*bfabfc35Skp 			 * for the pointer returned to the caller.
1167*bfabfc35Skp 			 * tnrh_hash_add increments the tnrh entry ref count
1168*bfabfc35Skp 			 * for the pointer in the hash table.
1169*bfabfc35Skp 			 */
1170*bfabfc35Skp 			TNRHC_HOLD(rh);
1171*bfabfc35Skp 			if (tnrh_hash_add(new, prefix) != 0) {
1172*bfabfc35Skp 				TNRHC_RELE(rh);
1173*bfabfc35Skp 				rh = NULL;
117445916cd2Sjpk 			}
117545916cd2Sjpk 		}
117645916cd2Sjpk 	}
1177*bfabfc35Skp 	return (rh);
117845916cd2Sjpk }
117945916cd2Sjpk 
118045916cd2Sjpk tsol_tpc_t *
find_tpc(const void * addr,uchar_t version,boolean_t staleok)118145916cd2Sjpk find_tpc(const void *addr, uchar_t version, boolean_t staleok)
118245916cd2Sjpk {
118345916cd2Sjpk 	tsol_tpc_t *tpc;
118445916cd2Sjpk 	tsol_tnrhc_t *rhc;
118545916cd2Sjpk 
1186*bfabfc35Skp 	if ((rhc = find_rhc(addr, version, staleok)) == NULL)
1187*bfabfc35Skp 		return (NULL);
118845916cd2Sjpk 
1189*bfabfc35Skp 	tpc = rhc->rhc_tpc;
1190*bfabfc35Skp 	TPC_HOLD(tpc);
1191*bfabfc35Skp 	TNRHC_RELE(rhc);
1192*bfabfc35Skp 	return (tpc);
119345916cd2Sjpk }
119445916cd2Sjpk 
119545916cd2Sjpk /*
119645916cd2Sjpk  * create an internal template called "_unlab":
119745916cd2Sjpk  *
119845916cd2Sjpk  * _unlab;\
119945916cd2Sjpk  *	host_type = unlabeled;\
120045916cd2Sjpk  *	def_label = ADMIN_LOW[ADMIN_LOW];\
120145916cd2Sjpk  *	min_sl = ADMIN_LOW;\
120245916cd2Sjpk  *	max_sl = ADMIN_HIGH;
120345916cd2Sjpk  */
120445916cd2Sjpk static void
tsol_create_i_tmpls(void)120545916cd2Sjpk tsol_create_i_tmpls(void)
120645916cd2Sjpk {
120745916cd2Sjpk 	tsol_tpent_t rhtpent;
120845916cd2Sjpk 
120945916cd2Sjpk 	bzero(&rhtpent, sizeof (rhtpent));
121045916cd2Sjpk 
121145916cd2Sjpk 	/* create _unlab */
121245916cd2Sjpk 	(void) strcpy(rhtpent.name, "_unlab");
121345916cd2Sjpk 
121445916cd2Sjpk 	rhtpent.host_type = UNLABELED;
121545916cd2Sjpk 	rhtpent.tp_mask_unl = TSOL_MSK_DEF_LABEL | TSOL_MSK_DEF_CL |
121645916cd2Sjpk 	    TSOL_MSK_SL_RANGE_TSOL;
121745916cd2Sjpk 
121845916cd2Sjpk 	rhtpent.tp_gw_sl_range.lower_bound = *label2bslabel(l_admin_low);
121945916cd2Sjpk 	rhtpent.tp_def_label = rhtpent.tp_gw_sl_range.lower_bound;
122045916cd2Sjpk 	rhtpent.tp_gw_sl_range.upper_bound = *label2bslabel(l_admin_high);
122145916cd2Sjpk 	rhtpent.tp_cipso_doi_unl = default_doi;
122245916cd2Sjpk 	tpc_unlab = tnrhtp_create(&rhtpent, KM_SLEEP);
122345916cd2Sjpk }
122445916cd2Sjpk 
122545916cd2Sjpk /*
122645916cd2Sjpk  * set up internal host template, called from kernel only.
122745916cd2Sjpk  */
122845916cd2Sjpk static void
tsol_create_i_tnrh(const tnaddr_t * sa)122945916cd2Sjpk tsol_create_i_tnrh(const tnaddr_t *sa)
123045916cd2Sjpk {
123145916cd2Sjpk 	tsol_tnrhc_t *rh, *new;
123245916cd2Sjpk 	tnrhc_hash_t *tnrhc_hash;
123345916cd2Sjpk 
123445916cd2Sjpk 	/* Allocate a new entry before taking the lock */
123545916cd2Sjpk 	new = kmem_zalloc(sizeof (*new), KM_SLEEP);
123645916cd2Sjpk 
123745916cd2Sjpk 	tnrhc_hash = (sa->ta_family == AF_INET) ? &tnrhc_table[0][0] :
123845916cd2Sjpk 	    &tnrhc_table_v6[0][0];
123945916cd2Sjpk 
124045916cd2Sjpk 	mutex_enter(&tnrhc_hash->tnrh_lock);
124145916cd2Sjpk 	rh = tnrhc_hash->tnrh_list;
124245916cd2Sjpk 
124345916cd2Sjpk 	if (rh == NULL) {
124445916cd2Sjpk 		/* We're keeping the new entry. */
124545916cd2Sjpk 		rh = new;
124645916cd2Sjpk 		new = NULL;
124745916cd2Sjpk 		rh->rhc_host = *sa;
124845916cd2Sjpk 		mutex_init(&rh->rhc_lock, NULL, MUTEX_DEFAULT, NULL);
124945916cd2Sjpk 		TNRHC_HOLD(rh);
125045916cd2Sjpk 		tnrhc_hash->tnrh_list = rh;
125145916cd2Sjpk 	}
125245916cd2Sjpk 
125345916cd2Sjpk 	/*
125445916cd2Sjpk 	 * Link the entry to internal_unlab
125545916cd2Sjpk 	 */
125645916cd2Sjpk 	if (rh->rhc_tpc != tpc_unlab) {
125745916cd2Sjpk 		if (rh->rhc_tpc != NULL)
125845916cd2Sjpk 			TPC_RELE(rh->rhc_tpc);
125945916cd2Sjpk 		rh->rhc_tpc = tpc_unlab;
126045916cd2Sjpk 		TPC_HOLD(tpc_unlab);
126145916cd2Sjpk 	}
126245916cd2Sjpk 	mutex_exit(&tnrhc_hash->tnrh_lock);
126345916cd2Sjpk 	if (new != NULL)
126445916cd2Sjpk 		kmem_free(new, sizeof (*new));
126545916cd2Sjpk }
126645916cd2Sjpk 
126745916cd2Sjpk /*
126845916cd2Sjpk  * Returns 0 if the port is known to be SLP.  Returns next possible port number
126945916cd2Sjpk  * (wrapping through 1) if port is MLP on shared or global.  Administrator
127045916cd2Sjpk  * should not make all ports MLP.  If that's done, then we'll just pretend
127145916cd2Sjpk  * everything is SLP to avoid looping forever.
127245916cd2Sjpk  *
127345916cd2Sjpk  * Note: port is in host byte order.
127445916cd2Sjpk  */
127545916cd2Sjpk in_port_t
tsol_next_port(zone_t * zone,in_port_t port,int proto,boolean_t upward)127645916cd2Sjpk tsol_next_port(zone_t *zone, in_port_t port, int proto, boolean_t upward)
127745916cd2Sjpk {
127845916cd2Sjpk 	boolean_t loop;
127945916cd2Sjpk 	tsol_mlp_entry_t *tme;
128045916cd2Sjpk 	int newport = port;
128145916cd2Sjpk 
128245916cd2Sjpk 	loop = B_FALSE;
128345916cd2Sjpk 	for (;;) {
128445916cd2Sjpk 		if (zone != NULL && zone->zone_mlps.mlpl_first != NULL) {
128545916cd2Sjpk 			rw_enter(&zone->zone_mlps.mlpl_rwlock, RW_READER);
128645916cd2Sjpk 			for (tme = zone->zone_mlps.mlpl_first; tme != NULL;
128745916cd2Sjpk 			    tme = tme->mlpe_next) {
128845916cd2Sjpk 				if (proto == tme->mlpe_mlp.mlp_ipp &&
128945916cd2Sjpk 				    newport >= tme->mlpe_mlp.mlp_port &&
129045916cd2Sjpk 				    newport <= tme->mlpe_mlp.mlp_port_upper)
129145916cd2Sjpk 					newport = upward ?
129245916cd2Sjpk 					    tme->mlpe_mlp.mlp_port_upper + 1 :
129345916cd2Sjpk 					    tme->mlpe_mlp.mlp_port - 1;
129445916cd2Sjpk 			}
129545916cd2Sjpk 			rw_exit(&zone->zone_mlps.mlpl_rwlock);
129645916cd2Sjpk 		}
129745916cd2Sjpk 		if (shared_mlps.mlpl_first != NULL) {
129845916cd2Sjpk 			rw_enter(&shared_mlps.mlpl_rwlock, RW_READER);
129945916cd2Sjpk 			for (tme = shared_mlps.mlpl_first; tme != NULL;
130045916cd2Sjpk 			    tme = tme->mlpe_next) {
130145916cd2Sjpk 				if (proto == tme->mlpe_mlp.mlp_ipp &&
130245916cd2Sjpk 				    newport >= tme->mlpe_mlp.mlp_port &&
130345916cd2Sjpk 				    newport <= tme->mlpe_mlp.mlp_port_upper)
130445916cd2Sjpk 					newport = upward ?
130545916cd2Sjpk 					    tme->mlpe_mlp.mlp_port_upper + 1 :
130645916cd2Sjpk 					    tme->mlpe_mlp.mlp_port - 1;
130745916cd2Sjpk 			}
130845916cd2Sjpk 			rw_exit(&shared_mlps.mlpl_rwlock);
130945916cd2Sjpk 		}
131045916cd2Sjpk 		if (newport <= 65535 && newport > 0)
131145916cd2Sjpk 			break;
131245916cd2Sjpk 		if (loop)
131345916cd2Sjpk 			return (0);
131445916cd2Sjpk 		loop = B_TRUE;
131545916cd2Sjpk 		newport = upward ? 1 : 65535;
131645916cd2Sjpk 	}
131745916cd2Sjpk 	return (newport == port ? 0 : newport);
131845916cd2Sjpk }
131945916cd2Sjpk 
132045916cd2Sjpk /*
132145916cd2Sjpk  * tsol_mlp_port_type will check if the given (zone, proto, port) is a
132245916cd2Sjpk  * multilevel port.  If it is, return the type (shared, private, or both), or
132345916cd2Sjpk  * indicate that it's single-level.
132445916cd2Sjpk  *
132545916cd2Sjpk  * Note: port is given in host byte order, not network byte order.
132645916cd2Sjpk  */
132745916cd2Sjpk mlp_type_t
tsol_mlp_port_type(zone_t * zone,uchar_t proto,uint16_t port,mlp_type_t mlptype)132845916cd2Sjpk tsol_mlp_port_type(zone_t *zone, uchar_t proto, uint16_t port,
132945916cd2Sjpk     mlp_type_t mlptype)
133045916cd2Sjpk {
133145916cd2Sjpk 	tsol_mlp_entry_t *tme;
133245916cd2Sjpk 
133345916cd2Sjpk 	if (mlptype == mlptBoth || mlptype == mlptPrivate) {
133445916cd2Sjpk 		tme = NULL;
133545916cd2Sjpk 		if (zone->zone_mlps.mlpl_first != NULL) {
133645916cd2Sjpk 			rw_enter(&zone->zone_mlps.mlpl_rwlock, RW_READER);
133745916cd2Sjpk 			for (tme = zone->zone_mlps.mlpl_first; tme != NULL;
133845916cd2Sjpk 			    tme = tme->mlpe_next) {
133945916cd2Sjpk 				if (proto == tme->mlpe_mlp.mlp_ipp &&
134045916cd2Sjpk 				    port >= tme->mlpe_mlp.mlp_port &&
134145916cd2Sjpk 				    port <= tme->mlpe_mlp.mlp_port_upper)
134245916cd2Sjpk 					break;
134345916cd2Sjpk 			}
134445916cd2Sjpk 			rw_exit(&zone->zone_mlps.mlpl_rwlock);
134545916cd2Sjpk 		}
134645916cd2Sjpk 		if (tme == NULL) {
134745916cd2Sjpk 			if (mlptype == mlptBoth)
134845916cd2Sjpk 				mlptype = mlptShared;
134945916cd2Sjpk 			else if (mlptype == mlptPrivate)
135045916cd2Sjpk 				mlptype = mlptSingle;
135145916cd2Sjpk 		}
135245916cd2Sjpk 	}
135345916cd2Sjpk 	if (mlptype == mlptBoth || mlptype == mlptShared) {
135445916cd2Sjpk 		tme = NULL;
135545916cd2Sjpk 		if (shared_mlps.mlpl_first != NULL) {
135645916cd2Sjpk 			rw_enter(&shared_mlps.mlpl_rwlock, RW_READER);
135745916cd2Sjpk 			for (tme = shared_mlps.mlpl_first; tme != NULL;
135845916cd2Sjpk 			    tme = tme->mlpe_next) {
135945916cd2Sjpk 				if (proto == tme->mlpe_mlp.mlp_ipp &&
136045916cd2Sjpk 				    port >= tme->mlpe_mlp.mlp_port &&
136145916cd2Sjpk 				    port <= tme->mlpe_mlp.mlp_port_upper)
136245916cd2Sjpk 					break;
136345916cd2Sjpk 			}
136445916cd2Sjpk 			rw_exit(&shared_mlps.mlpl_rwlock);
136545916cd2Sjpk 		}
136645916cd2Sjpk 		if (tme == NULL) {
136745916cd2Sjpk 			if (mlptype == mlptBoth)
136845916cd2Sjpk 				mlptype = mlptPrivate;
136945916cd2Sjpk 			else if (mlptype == mlptShared)
137045916cd2Sjpk 				mlptype = mlptSingle;
137145916cd2Sjpk 		}
137245916cd2Sjpk 	}
137345916cd2Sjpk 	return (mlptype);
137445916cd2Sjpk }
137545916cd2Sjpk 
137645916cd2Sjpk /*
137745916cd2Sjpk  * tsol_mlp_findzone will check if the given (proto, port) is a multilevel port
137845916cd2Sjpk  * on a shared address.  If it is, return the owning zone.
137945916cd2Sjpk  *
138045916cd2Sjpk  * Note: lport is in network byte order, unlike the other MLP functions,
138145916cd2Sjpk  * because the callers of this function are all dealing with packets off the
138245916cd2Sjpk  * wire.
138345916cd2Sjpk  */
138445916cd2Sjpk zoneid_t
tsol_mlp_findzone(uchar_t proto,uint16_t lport)138545916cd2Sjpk tsol_mlp_findzone(uchar_t proto, uint16_t lport)
138645916cd2Sjpk {
138745916cd2Sjpk 	tsol_mlp_entry_t *tme;
138845916cd2Sjpk 	zoneid_t zoneid;
138945916cd2Sjpk 	uint16_t port;
139045916cd2Sjpk 
139145916cd2Sjpk 	if (shared_mlps.mlpl_first == NULL)
139245916cd2Sjpk 		return (ALL_ZONES);
139345916cd2Sjpk 	port = ntohs(lport);
139445916cd2Sjpk 	rw_enter(&shared_mlps.mlpl_rwlock, RW_READER);
139545916cd2Sjpk 	for (tme = shared_mlps.mlpl_first; tme != NULL; tme = tme->mlpe_next) {
139645916cd2Sjpk 		if (proto == tme->mlpe_mlp.mlp_ipp &&
139745916cd2Sjpk 		    port >= tme->mlpe_mlp.mlp_port &&
139845916cd2Sjpk 		    port <= tme->mlpe_mlp.mlp_port_upper)
139945916cd2Sjpk 			break;
140045916cd2Sjpk 	}
140145916cd2Sjpk 	zoneid = tme == NULL ? ALL_ZONES : tme->mlpe_zoneid;
140245916cd2Sjpk 	rw_exit(&shared_mlps.mlpl_rwlock);
140345916cd2Sjpk 	return (zoneid);
140445916cd2Sjpk }
140545916cd2Sjpk 
140645916cd2Sjpk /* Debug routine */
140745916cd2Sjpk void
tsol_print_label(const blevel_t * blev,const char * name)140845916cd2Sjpk tsol_print_label(const blevel_t *blev, const char *name)
140945916cd2Sjpk {
141045916cd2Sjpk 	const _blevel_impl_t *bli = (const _blevel_impl_t *)blev;
141145916cd2Sjpk 
141245916cd2Sjpk 	/* We really support only sensitivity labels */
141345916cd2Sjpk 	cmn_err(CE_NOTE, "%s %x:%x:%08x%08x%08x%08x%08x%08x%08x%08x",
141445916cd2Sjpk 	    name, bli->id, LCLASS(bli), ntohl(bli->_comps.c1),
141545916cd2Sjpk 	    ntohl(bli->_comps.c2), ntohl(bli->_comps.c3), ntohl(bli->_comps.c4),
141645916cd2Sjpk 	    ntohl(bli->_comps.c5), ntohl(bli->_comps.c6), ntohl(bli->_comps.c7),
141745916cd2Sjpk 	    ntohl(bli->_comps.c8));
141845916cd2Sjpk }
141945916cd2Sjpk 
142045916cd2Sjpk /*
142145916cd2Sjpk  * Name:	labelsys()
142245916cd2Sjpk  *
142345916cd2Sjpk  * Normal:	Routes TSOL syscalls.
142445916cd2Sjpk  *
142545916cd2Sjpk  * Output:	As defined for each TSOL syscall.
142645916cd2Sjpk  *		Returns ENOSYS for unrecognized calls.
142745916cd2Sjpk  */
142845916cd2Sjpk /* ARGSUSED */
142945916cd2Sjpk int
labelsys(int op,void * a1,void * a2,void * a3,void * a4,void * a5)143045916cd2Sjpk labelsys(int op, void *a1, void *a2, void *a3, void *a4, void *a5)
143145916cd2Sjpk {
143245916cd2Sjpk 	switch (op) {
143345916cd2Sjpk 	case TSOL_SYSLABELING:
143445916cd2Sjpk 		return (sys_labeling);
143545916cd2Sjpk 	case TSOL_TNRH:
143645916cd2Sjpk 		return (tnrh((int)(uintptr_t)a1, a2));
143745916cd2Sjpk 	case TSOL_TNRHTP:
143845916cd2Sjpk 		return (tnrhtp((int)(uintptr_t)a1, a2));
143945916cd2Sjpk 	case TSOL_TNMLP:
144045916cd2Sjpk 		return (tnmlp((int)(uintptr_t)a1, a2));
144145916cd2Sjpk 	case TSOL_GETLABEL:
144245916cd2Sjpk 		return (getlabel((char *)a1, (bslabel_t *)a2));
144345916cd2Sjpk 	case TSOL_FGETLABEL:
144445916cd2Sjpk 		return (fgetlabel((int)(uintptr_t)a1, (bslabel_t *)a2));
144545916cd2Sjpk 	default:
144645916cd2Sjpk 		return (set_errno(ENOSYS));
144745916cd2Sjpk 	}
144845916cd2Sjpk 	/* NOTREACHED */
144945916cd2Sjpk }
1450