1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <sys/stream.h>
28#include <sys/strsubr.h>
29#include <sys/stropts.h>
30#include <sys/sunddi.h>
31#include <sys/cred.h>
32#include <sys/debug.h>
33#include <sys/kmem.h>
34#include <sys/errno.h>
35#include <sys/disp.h>
36#include <netinet/in.h>
37#include <netinet/in_systm.h>
38#include <netinet/ip.h>
39#include <netinet/ip_icmp.h>
40#include <netinet/tcp.h>
41#include <inet/common.h>
42#include <inet/ipclassifier.h>
43#include <inet/ip.h>
44#include <inet/mib2.h>
45#include <inet/nd.h>
46#include <inet/tcp.h>
47#include <inet/ip_rts.h>
48#include <inet/ip_ire.h>
49#include <inet/ip_if.h>
50#include <sys/modhash.h>
51
52#include <sys/tsol/label.h>
53#include <sys/tsol/label_macro.h>
54#include <sys/tsol/tnet.h>
55#include <sys/tsol/tndb.h>
56#include <sys/strsun.h>
57
58/* tunable for strict error-reply behavior (TCP RST and ICMP Unreachable) */
59int tsol_strict_error;
60
61/*
62 * Some notes on the Trusted Solaris IRE gateway security attributes:
63 *
64 * When running in Trusted mode, the routing subsystem determines whether or
65 * not a packet can be delivered to an off-link host (not directly reachable
66 * through an interface) based on the accreditation checks of the packet's
67 * security attributes against those associated with the next-hop gateway.
68 *
69 * The next-hop gateway's security attributes can be derived from two sources
70 * (in order of preference): route-related and the host database.  A Trusted
71 * system must be configured with at least the host database containing an
72 * entry for the next-hop gateway, or otherwise no accreditation checks can
73 * be performed, which may result in the inability to send packets to any
74 * off-link destination host.
75 *
76 * The major differences between the two sources are the number and type of
77 * security attributes used for accreditation checks.  A host database entry
78 * can contain at most one set of security attributes, specific only to the
79 * next-hop gateway.  On contrast, route-related security attributes are made
80 * up of a collection of security attributes for the distant networks, and
81 * are grouped together per next-hop gateway used to reach those networks.
82 * This is the preferred method, and the routing subsystem will fallback to
83 * the host database entry only if there are no route-related attributes
84 * associated with the next-hop gateway.
85 *
86 * In Trusted mode, all of the IRE entries (except LOCAL/LOOPBACK/BROADCAST/
87 * INTERFACE type) are initialized to contain a placeholder to store this
88 * information.  The ire_gw_secattr structure gets allocated, initialized
89 * and associated with the IRE during the time of the IRE creation.  The
90 * initialization process also includes resolving the host database entry
91 * of the next-hop gateway for fallback purposes.  It does not include any
92 * route-related attribute setup, as that process comes separately as part
93 * of the route requests (add/change) made to the routing subsystem.
94 *
95 * The underlying logic which involves associating IREs with the gateway
96 * security attributes are represented by the following data structures:
97 *
98 * tsol_gcdb_t, or "gcdb"
99 *
100 *	- This is a system-wide collection of records containing the
101 *	  currently used route-related security attributes, which are fed
102 *	  through the routing socket interface, e.g. "route add/change".
103 *
104 * tsol_gc_t, or "gc"
105 *
106 *	- This is the gateway credential structure, and it provides for the
107 *	  only mechanism to access the contents of gcdb.  More than one gc
108 *	  entries may refer to the same gcdb record.  gc's in the system are
109 *	  grouped according to the next-hop gateway address.
110 *
111 * tsol_gcgrp_t, or "gcgrp"
112 *
113 *	- Group of gateway credentials, and is unique per next-hop gateway
114 *	  address.  When the group is not empty, i.e. when gcgrp_count is
115 *	  greater than zero, it contains one or more gc's, each pointing to
116 *	  a gcdb record which indicates the gateway security attributes
117 *	  associated with the next-hop gateway.
118 *
119 * The fields of the tsol_ire_gw_secattr_t used from within the IRE are:
120 *
121 * igsa_lock
122 *
123 *	- Lock that protects all fields within tsol_ire_gw_secattr_t.
124 *
125 * igsa_rhc
126 *
127 *	- Remote host cache database entry of next-hop gateway.  This is
128 *	  used in the case when there are no route-related attributes
129 *	  configured for the IRE.
130 *
131 * igsa_gc
132 *
133 *	- A set of route-related attributes that only get set for prefix
134 *	  IREs.  If this is non-NULL, the prefix IRE has been associated
135 *	  with a set of gateway security attributes by way of route add/
136 *	  change functionality.
137 */
138
139static kmem_cache_t *ire_gw_secattr_cache;
140
141#define	GCDB_HASH_SIZE	101
142#define	GCGRP_HASH_SIZE	101
143
144#define	GCDB_REFRELE(p) {		\
145	mutex_enter(&gcdb_lock);	\
146	ASSERT((p)->gcdb_refcnt > 0);	\
147	if (--((p)->gcdb_refcnt) == 0)	\
148		gcdb_inactive(p);	\
149	ASSERT(MUTEX_HELD(&gcdb_lock));	\
150	mutex_exit(&gcdb_lock);		\
151}
152
153static int gcdb_hash_size = GCDB_HASH_SIZE;
154static int gcgrp_hash_size = GCGRP_HASH_SIZE;
155static mod_hash_t *gcdb_hash;
156static mod_hash_t *gcgrp4_hash;
157static mod_hash_t *gcgrp6_hash;
158
159static kmutex_t gcdb_lock;
160kmutex_t gcgrp_lock;
161
162static uint_t gcdb_hash_by_secattr(void *, mod_hash_key_t);
163static int gcdb_hash_cmp(mod_hash_key_t, mod_hash_key_t);
164static tsol_gcdb_t *gcdb_lookup(struct rtsa_s *, boolean_t);
165static void gcdb_inactive(tsol_gcdb_t *);
166
167static uint_t gcgrp_hash_by_addr(void *, mod_hash_key_t);
168static int gcgrp_hash_cmp(mod_hash_key_t, mod_hash_key_t);
169
170static int ire_gw_secattr_constructor(void *, void *, int);
171static void ire_gw_secattr_destructor(void *, void *);
172
173void
174tnet_init(void)
175{
176	ire_gw_secattr_cache = kmem_cache_create("ire_gw_secattr_cache",
177	    sizeof (tsol_ire_gw_secattr_t), 64, ire_gw_secattr_constructor,
178	    ire_gw_secattr_destructor, NULL, NULL, NULL, 0);
179
180	gcdb_hash = mod_hash_create_extended("gcdb_hash",
181	    gcdb_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
182	    gcdb_hash_by_secattr, NULL, gcdb_hash_cmp, KM_SLEEP);
183
184	gcgrp4_hash = mod_hash_create_extended("gcgrp4_hash",
185	    gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
186	    gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
187
188	gcgrp6_hash = mod_hash_create_extended("gcgrp6_hash",
189	    gcgrp_hash_size, mod_hash_null_keydtor, mod_hash_null_valdtor,
190	    gcgrp_hash_by_addr, NULL, gcgrp_hash_cmp, KM_SLEEP);
191
192	mutex_init(&gcdb_lock, NULL, MUTEX_DEFAULT, NULL);
193	mutex_init(&gcgrp_lock, NULL, MUTEX_DEFAULT, NULL);
194}
195
196void
197tnet_fini(void)
198{
199	kmem_cache_destroy(ire_gw_secattr_cache);
200	mod_hash_destroy_hash(gcdb_hash);
201	mod_hash_destroy_hash(gcgrp4_hash);
202	mod_hash_destroy_hash(gcgrp6_hash);
203	mutex_destroy(&gcdb_lock);
204	mutex_destroy(&gcgrp_lock);
205}
206
207/* ARGSUSED */
208static int
209ire_gw_secattr_constructor(void *buf, void *cdrarg, int kmflags)
210{
211	tsol_ire_gw_secattr_t *attrp = buf;
212
213	mutex_init(&attrp->igsa_lock, NULL, MUTEX_DEFAULT, NULL);
214
215	attrp->igsa_rhc = NULL;
216	attrp->igsa_gc = NULL;
217
218	return (0);
219}
220
221/* ARGSUSED */
222static void
223ire_gw_secattr_destructor(void *buf, void *cdrarg)
224{
225	tsol_ire_gw_secattr_t *attrp = (tsol_ire_gw_secattr_t *)buf;
226
227	mutex_destroy(&attrp->igsa_lock);
228}
229
230tsol_ire_gw_secattr_t *
231ire_gw_secattr_alloc(int kmflags)
232{
233	return (kmem_cache_alloc(ire_gw_secattr_cache, kmflags));
234}
235
236void
237ire_gw_secattr_free(tsol_ire_gw_secattr_t *attrp)
238{
239	ASSERT(MUTEX_NOT_HELD(&attrp->igsa_lock));
240
241	if (attrp->igsa_rhc != NULL) {
242		TNRHC_RELE(attrp->igsa_rhc);
243		attrp->igsa_rhc = NULL;
244	}
245
246	if (attrp->igsa_gc != NULL) {
247		GC_REFRELE(attrp->igsa_gc);
248		attrp->igsa_gc = NULL;
249	}
250
251	ASSERT(attrp->igsa_rhc == NULL);
252	ASSERT(attrp->igsa_gc == NULL);
253
254	kmem_cache_free(ire_gw_secattr_cache, attrp);
255}
256
257/* ARGSUSED */
258static uint_t
259gcdb_hash_by_secattr(void *hash_data, mod_hash_key_t key)
260{
261	const struct rtsa_s *rp = (struct rtsa_s *)key;
262	const uint32_t *up, *ue;
263	uint_t hash;
264	int i;
265
266	ASSERT(rp != NULL);
267
268	/* See comments in hash_bylabel in zone.c for details */
269	hash = rp->rtsa_doi + (rp->rtsa_doi << 1);
270	up = (const uint32_t *)&rp->rtsa_slrange;
271	ue = up + sizeof (rp->rtsa_slrange) / sizeof (*up);
272	i = 1;
273	while (up < ue) {
274		/* using 2^n + 1, 1 <= n <= 16 as source of many primes */
275		hash += *up + (*up << ((i % 16) + 1));
276		up++;
277		i++;
278	}
279	return (hash);
280}
281
282static int
283gcdb_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
284{
285	struct rtsa_s *rp1 = (struct rtsa_s *)key1;
286	struct rtsa_s *rp2 = (struct rtsa_s *)key2;
287
288	ASSERT(rp1 != NULL && rp2 != NULL);
289
290	if (blequal(&rp1->rtsa_slrange.lower_bound,
291	    &rp2->rtsa_slrange.lower_bound) &&
292	    blequal(&rp1->rtsa_slrange.upper_bound,
293	    &rp2->rtsa_slrange.upper_bound) &&
294	    rp1->rtsa_doi == rp2->rtsa_doi)
295		return (0);
296
297	/* No match; not found */
298	return (-1);
299}
300
301/* ARGSUSED */
302static uint_t
303gcgrp_hash_by_addr(void *hash_data, mod_hash_key_t key)
304{
305	tsol_gcgrp_addr_t *ga = (tsol_gcgrp_addr_t *)key;
306	uint_t		idx = 0;
307	uint32_t	*ap;
308
309	ASSERT(ga != NULL);
310	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
311
312	ap = (uint32_t *)&ga->ga_addr.s6_addr32[0];
313	idx ^= *ap++;
314	idx ^= *ap++;
315	idx ^= *ap++;
316	idx ^= *ap;
317
318	return (idx);
319}
320
321static int
322gcgrp_hash_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
323{
324	tsol_gcgrp_addr_t *ga1 = (tsol_gcgrp_addr_t *)key1;
325	tsol_gcgrp_addr_t *ga2 = (tsol_gcgrp_addr_t *)key2;
326
327	ASSERT(ga1 != NULL && ga2 != NULL);
328
329	/* Address family must match */
330	if (ga1->ga_af != ga2->ga_af)
331		return (-1);
332
333	if (ga1->ga_addr.s6_addr32[0] == ga2->ga_addr.s6_addr32[0] &&
334	    ga1->ga_addr.s6_addr32[1] == ga2->ga_addr.s6_addr32[1] &&
335	    ga1->ga_addr.s6_addr32[2] == ga2->ga_addr.s6_addr32[2] &&
336	    ga1->ga_addr.s6_addr32[3] == ga2->ga_addr.s6_addr32[3])
337		return (0);
338
339	/* No match; not found */
340	return (-1);
341}
342
343#define	RTSAFLAGS	"\20\11cipso\3doi\2max_sl\1min_sl"
344
345int
346rtsa_validate(const struct rtsa_s *rp)
347{
348	uint32_t mask = rp->rtsa_mask;
349
350	/* RTSA_CIPSO must be set, and DOI must not be zero */
351	if ((mask & RTSA_CIPSO) == 0 || rp->rtsa_doi == 0) {
352		DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
353		    "rtsa(1) lacks flag or has 0 doi.",
354		    rtsa_s *, rp);
355		return (EINVAL);
356	}
357	/*
358	 * SL range must be specified, and it must have its
359	 * upper bound dominating its lower bound.
360	 */
361	if ((mask & RTSA_SLRANGE) != RTSA_SLRANGE ||
362	    !bldominates(&rp->rtsa_slrange.upper_bound,
363	    &rp->rtsa_slrange.lower_bound)) {
364		DTRACE_PROBE2(tx__gcdb__log__error__rtsa__validate, char *,
365		    "rtsa(1) min_sl and max_sl not set or max_sl is "
366		    "not dominating.", rtsa_s *, rp);
367		return (EINVAL);
368	}
369	return (0);
370}
371
372/*
373 * A brief explanation of the reference counting scheme:
374 *
375 * Apart from dynamic references due to to reference holds done
376 * actively by threads, we have the following references:
377 *
378 * gcdb_refcnt:
379 *	- Every tsol_gc_t pointing to a tsol_gcdb_t contributes a reference
380 *	  to the gcdb_refcnt.
381 *
382 * gc_refcnt:
383 *	- A prefix IRE that points to an igsa_gc contributes a reference
384 *	  to the gc_refcnt.
385 *
386 * gcgrp_refcnt:
387 *	- Every tsol_gc_t in the chain headed by tsol_gcgrp_t contributes
388 *	  a reference to the gcgrp_refcnt.
389 */
390static tsol_gcdb_t *
391gcdb_lookup(struct rtsa_s *rp, boolean_t alloc)
392{
393	tsol_gcdb_t *gcdb = NULL;
394
395	if (rtsa_validate(rp) != 0)
396		return (NULL);
397
398	mutex_enter(&gcdb_lock);
399	/* Find a copy in the cache; otherwise, create one and cache it */
400	if (mod_hash_find(gcdb_hash, (mod_hash_key_t)rp,
401	    (mod_hash_val_t *)&gcdb) == 0) {
402		gcdb->gcdb_refcnt++;
403		ASSERT(gcdb->gcdb_refcnt != 0);
404
405		DTRACE_PROBE2(tx__gcdb__log__info__gcdb__lookup, char *,
406		    "gcdb(1) is in gcdb_hash(global)", tsol_gcdb_t *, gcdb);
407	} else if (alloc) {
408		gcdb = kmem_zalloc(sizeof (*gcdb), KM_NOSLEEP);
409		if (gcdb != NULL) {
410			gcdb->gcdb_refcnt = 1;
411			gcdb->gcdb_mask = rp->rtsa_mask;
412			gcdb->gcdb_doi = rp->rtsa_doi;
413			gcdb->gcdb_slrange = rp->rtsa_slrange;
414
415			if (mod_hash_insert(gcdb_hash,
416			    (mod_hash_key_t)&gcdb->gcdb_attr,
417			    (mod_hash_val_t)gcdb) != 0) {
418				mutex_exit(&gcdb_lock);
419				kmem_free(gcdb, sizeof (*gcdb));
420				return (NULL);
421			}
422
423			DTRACE_PROBE2(tx__gcdb__log__info__gcdb__insert, char *,
424			    "gcdb(1) inserted in gcdb_hash(global)",
425			    tsol_gcdb_t *, gcdb);
426		}
427	}
428	mutex_exit(&gcdb_lock);
429	return (gcdb);
430}
431
432static void
433gcdb_inactive(tsol_gcdb_t *gcdb)
434{
435	ASSERT(MUTEX_HELD(&gcdb_lock));
436	ASSERT(gcdb != NULL && gcdb->gcdb_refcnt == 0);
437
438	(void) mod_hash_remove(gcdb_hash, (mod_hash_key_t)&gcdb->gcdb_attr,
439	    (mod_hash_val_t *)&gcdb);
440
441	DTRACE_PROBE2(tx__gcdb__log__info__gcdb__remove, char *,
442	    "gcdb(1) removed from gcdb_hash(global)",
443	    tsol_gcdb_t *, gcdb);
444	kmem_free(gcdb, sizeof (*gcdb));
445}
446
447tsol_gc_t *
448gc_create(struct rtsa_s *rp, tsol_gcgrp_t *gcgrp, boolean_t *gcgrp_xtrarefp)
449{
450	tsol_gc_t *gc;
451	tsol_gcdb_t *gcdb;
452
453	*gcgrp_xtrarefp = B_TRUE;
454
455	rw_enter(&gcgrp->gcgrp_rwlock, RW_WRITER);
456	if ((gcdb = gcdb_lookup(rp, B_TRUE)) == NULL) {
457		rw_exit(&gcgrp->gcgrp_rwlock);
458		return (NULL);
459	}
460
461	for (gc = gcgrp->gcgrp_head; gc != NULL; gc = gc->gc_next) {
462		if (gc->gc_db == gcdb) {
463			ASSERT(gc->gc_grp == gcgrp);
464
465			gc->gc_refcnt++;
466			ASSERT(gc->gc_refcnt != 0);
467
468			GCDB_REFRELE(gcdb);
469
470			DTRACE_PROBE3(tx__gcdb__log__info__gc__create,
471			    char *, "found gc(1) in gcgrp(2)",
472			    tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
473			rw_exit(&gcgrp->gcgrp_rwlock);
474			return (gc);
475		}
476	}
477
478	gc = kmem_zalloc(sizeof (*gc), KM_NOSLEEP);
479	if (gc != NULL) {
480		if (gcgrp->gcgrp_head == NULL) {
481			gcgrp->gcgrp_head = gcgrp->gcgrp_tail = gc;
482		} else {
483			gcgrp->gcgrp_tail->gc_next = gc;
484			gc->gc_prev = gcgrp->gcgrp_tail;
485			gcgrp->gcgrp_tail = gc;
486		}
487		gcgrp->gcgrp_count++;
488		ASSERT(gcgrp->gcgrp_count != 0);
489
490		/* caller has incremented gcgrp reference for us */
491		gc->gc_grp = gcgrp;
492
493		gc->gc_db = gcdb;
494		gc->gc_refcnt = 1;
495
496		DTRACE_PROBE3(tx__gcdb__log__info__gc__create, char *,
497		    "added gc(1) to gcgrp(2)", tsol_gc_t *, gc,
498		    tsol_gcgrp_t *, gcgrp);
499
500		*gcgrp_xtrarefp = B_FALSE;
501	}
502	rw_exit(&gcgrp->gcgrp_rwlock);
503
504	return (gc);
505}
506
507void
508gc_inactive(tsol_gc_t *gc)
509{
510	tsol_gcgrp_t *gcgrp = gc->gc_grp;
511
512	ASSERT(gcgrp != NULL);
513	ASSERT(RW_WRITE_HELD(&gcgrp->gcgrp_rwlock));
514	ASSERT(gc->gc_refcnt == 0);
515
516	if (gc->gc_prev != NULL)
517		gc->gc_prev->gc_next = gc->gc_next;
518	else
519		gcgrp->gcgrp_head = gc->gc_next;
520	if (gc->gc_next != NULL)
521		gc->gc_next->gc_prev = gc->gc_prev;
522	else
523		gcgrp->gcgrp_tail = gc->gc_prev;
524	ASSERT(gcgrp->gcgrp_count > 0);
525	gcgrp->gcgrp_count--;
526
527	/* drop lock before it's destroyed */
528	rw_exit(&gcgrp->gcgrp_rwlock);
529
530	DTRACE_PROBE3(tx__gcdb__log__info__gc__remove, char *,
531	    "removed inactive gc(1) from gcgrp(2)",
532	    tsol_gc_t *, gc, tsol_gcgrp_t *, gcgrp);
533
534	GCGRP_REFRELE(gcgrp);
535
536	gc->gc_grp = NULL;
537	gc->gc_prev = gc->gc_next = NULL;
538
539	if (gc->gc_db != NULL)
540		GCDB_REFRELE(gc->gc_db);
541
542	kmem_free(gc, sizeof (*gc));
543}
544
545tsol_gcgrp_t *
546gcgrp_lookup(tsol_gcgrp_addr_t *ga, boolean_t alloc)
547{
548	tsol_gcgrp_t *gcgrp = NULL;
549	mod_hash_t *hashp;
550
551	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
552
553	hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
554
555	mutex_enter(&gcgrp_lock);
556	if (mod_hash_find(hashp, (mod_hash_key_t)ga,
557	    (mod_hash_val_t *)&gcgrp) == 0) {
558		gcgrp->gcgrp_refcnt++;
559		ASSERT(gcgrp->gcgrp_refcnt != 0);
560
561		DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__lookup, char *,
562		    "found gcgrp(1) in hash(2)", tsol_gcgrp_t *, gcgrp,
563		    mod_hash_t *, hashp);
564
565	} else if (alloc) {
566		gcgrp = kmem_zalloc(sizeof (*gcgrp), KM_NOSLEEP);
567		if (gcgrp != NULL) {
568			gcgrp->gcgrp_refcnt = 1;
569			rw_init(&gcgrp->gcgrp_rwlock, NULL, RW_DEFAULT, NULL);
570			bcopy(ga, &gcgrp->gcgrp_addr, sizeof (*ga));
571
572			if (mod_hash_insert(hashp,
573			    (mod_hash_key_t)&gcgrp->gcgrp_addr,
574			    (mod_hash_val_t)gcgrp) != 0) {
575				mutex_exit(&gcgrp_lock);
576				kmem_free(gcgrp, sizeof (*gcgrp));
577				return (NULL);
578			}
579
580			DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__insert,
581			    char *, "inserted gcgrp(1) in hash(2)",
582			    tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
583		}
584	}
585	mutex_exit(&gcgrp_lock);
586	return (gcgrp);
587}
588
589void
590gcgrp_inactive(tsol_gcgrp_t *gcgrp)
591{
592	tsol_gcgrp_addr_t *ga;
593	mod_hash_t *hashp;
594
595	ASSERT(MUTEX_HELD(&gcgrp_lock));
596	ASSERT(gcgrp != NULL && gcgrp->gcgrp_refcnt == 0);
597	ASSERT(gcgrp->gcgrp_head == NULL && gcgrp->gcgrp_count == 0);
598
599	ga = &gcgrp->gcgrp_addr;
600	ASSERT(ga->ga_af == AF_INET || ga->ga_af == AF_INET6);
601
602	hashp = (ga->ga_af == AF_INET) ? gcgrp4_hash : gcgrp6_hash;
603	(void) mod_hash_remove(hashp, (mod_hash_key_t)ga,
604	    (mod_hash_val_t *)&gcgrp);
605	rw_destroy(&gcgrp->gcgrp_rwlock);
606
607	DTRACE_PROBE3(tx__gcdb__log__info__gcgrp__remove, char *,
608	    "removed inactive gcgrp(1) from hash(2)",
609	    tsol_gcgrp_t *, gcgrp, mod_hash_t *, hashp);
610
611	kmem_free(gcgrp, sizeof (*gcgrp));
612}
613
614
615/*
616 * Assign a sensitivity label to inbound traffic which arrived without
617 * an explicit on-the-wire label.
618 *
619 * In the case of CIPSO-type hosts, we assume packets arriving without
620 * a label are at the most sensitive label known for the host, most
621 * likely involving out-of-band key management traffic (such as IKE,
622 * etc.,)
623 */
624static boolean_t
625tsol_find_unlabeled_label(tsol_tpc_t *rhtp, bslabel_t *sl, uint32_t *doi)
626{
627	*doi = rhtp->tpc_tp.tp_doi;
628	switch (rhtp->tpc_tp.host_type) {
629	case UNLABELED:
630		*sl = rhtp->tpc_tp.tp_def_label;
631		break;
632	case SUN_CIPSO:
633		*sl = rhtp->tpc_tp.tp_sl_range_cipso.upper_bound;
634		break;
635	default:
636		return (B_FALSE);
637	}
638	setbltype(sl, SUN_SL_ID);
639	return (B_TRUE);
640}
641
642/*
643 * Converts CIPSO option to sensitivity label.
644 * Validity checks based on restrictions defined in
645 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) (draft-ietf-cipso-ipsecurity)
646 */
647static boolean_t
648cipso_to_sl(const uchar_t *option, bslabel_t *sl)
649{
650	const struct cipso_option *co = (const struct cipso_option *)option;
651	const struct cipso_tag_type_1 *tt1;
652
653	tt1 = (struct cipso_tag_type_1 *)&co->cipso_tag_type[0];
654	if (tt1->tag_type != 1 ||
655	    tt1->tag_length < TSOL_TT1_MIN_LENGTH ||
656	    tt1->tag_length > TSOL_TT1_MAX_LENGTH ||
657	    tt1->tag_length + TSOL_CIPSO_TAG_OFFSET > co->cipso_length)
658		return (B_FALSE);
659
660	bsllow(sl);	/* assumed: sets compartments to all zeroes */
661	LCLASS_SET((_bslabel_impl_t *)sl, tt1->tag_sl);
662	bcopy(tt1->tag_cat, &((_bslabel_impl_t *)sl)->compartments,
663	    tt1->tag_length - TSOL_TT1_MIN_LENGTH);
664	return (B_TRUE);
665}
666
667/*
668 * If present, parse the CIPSO label in the incoming packet and
669 * construct a ts_label_t that reflects the CIPSO label and put it in
670 * the ip_recv_attr_t. Later as the packet flows up through the stack any
671 * code that needs to examine the packet label can inspect the label
672 * from the ira_tsl. This function is
673 * called right in ip_input for all packets, i.e. locally destined and
674 * to be forwarded packets. The forwarding path needs to examine the label
675 * to determine how to forward the packet.
676 *
677 * This routine pulls all message text up into the first mblk.
678 * For IPv4, only the first 20 bytes of the IP header are guaranteed
679 * to exist. For IPv6, only the IPv6 header is guaranteed to exist.
680 */
681boolean_t
682tsol_get_pkt_label(mblk_t *mp, int version, ip_recv_attr_t *ira)
683{
684	tsol_tpc_t	*src_rhtp = NULL;
685	uchar_t		*opt_ptr = NULL;
686	const ipha_t	*ipha;
687	bslabel_t	sl;
688	uint32_t	doi;
689	tsol_ip_label_t	label_type;
690	uint32_t	label_flags = 0; /* flags to set in label */
691	const cipso_option_t *co;
692	const void	*src;
693	const ip6_t	*ip6h;
694	cred_t		*credp;
695	int 		proto;
696
697	ASSERT(DB_TYPE(mp) == M_DATA);
698
699	if (mp->b_cont != NULL && !pullupmsg(mp, -1))
700		return (B_FALSE);
701
702	if (version == IPV4_VERSION) {
703		ASSERT(MBLKL(mp) >= IP_SIMPLE_HDR_LENGTH);
704		ipha = (const ipha_t *)mp->b_rptr;
705		src = &ipha->ipha_src;
706		if (!tsol_get_option_v4(mp, &label_type, &opt_ptr))
707			return (B_FALSE);
708	} else {
709		ASSERT(MBLKL(mp) >= IPV6_HDR_LEN);
710		ip6h = (const ip6_t *)mp->b_rptr;
711		src = &ip6h->ip6_src;
712		if (!tsol_get_option_v6(mp, &label_type, &opt_ptr))
713			return (B_FALSE);
714	}
715
716	switch (label_type) {
717	case OPT_CIPSO:
718		/*
719		 * Convert the CIPSO label to the internal format
720		 * and attach it to the dblk cred.
721		 * Validity checks based on restrictions defined in
722		 * COMMERCIAL IP SECURITY OPTION (CIPSO 2.2)
723		 * (draft-ietf-cipso-ipsecurity)
724		 */
725		if (version == IPV6_VERSION && ip6opt_ls == 0)
726			return (B_FALSE);
727		co = (const struct cipso_option *)opt_ptr;
728		if ((co->cipso_length <
729		    TSOL_CIPSO_TAG_OFFSET + TSOL_TT1_MIN_LENGTH) ||
730		    (co->cipso_length > IP_MAX_OPT_LENGTH))
731			return (B_FALSE);
732		bcopy(co->cipso_doi, &doi, sizeof (doi));
733		doi = ntohl(doi);
734		if (!cipso_to_sl(opt_ptr, &sl))
735			return (B_FALSE);
736		setbltype(&sl, SUN_SL_ID);
737
738		/*
739		 * If the source was unlabeled, then flag as such,
740		 * (since CIPSO routers may add headers)
741		 */
742
743		if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
744			return (B_FALSE);
745
746		if (src_rhtp->tpc_tp.host_type == UNLABELED)
747			label_flags = TSLF_UNLABELED;
748
749		TPC_RELE(src_rhtp);
750
751		break;
752
753	case OPT_NONE:
754		/*
755		 * Handle special cases that may not be labeled, even
756		 * though the sending system may otherwise be configured as
757		 * labeled.
758		 *	- IGMP
759		 *	- IPv4 ICMP Router Discovery
760		 *	- IPv6 Neighbor Discovery
761		 *	- IPsec ESP
762		 */
763		if (version == IPV4_VERSION) {
764			proto = ipha->ipha_protocol;
765			if (proto == IPPROTO_IGMP)
766				return (B_TRUE);
767			if (proto == IPPROTO_ICMP) {
768				const struct icmp *icmp = (const struct icmp *)
769				    (mp->b_rptr + IPH_HDR_LENGTH(ipha));
770
771				if ((uchar_t *)icmp + ICMP_MINLEN > mp->b_wptr)
772					return (B_FALSE);
773				if (icmp->icmp_type == ICMP_ROUTERADVERT ||
774				    icmp->icmp_type == ICMP_ROUTERSOLICIT)
775					return (B_TRUE);
776			}
777		} else {
778			proto = ip6h->ip6_nxt;
779			if (proto == IPPROTO_ICMPV6) {
780				const icmp6_t *icmp6 = (const icmp6_t *)
781				    (mp->b_rptr + IPV6_HDR_LEN);
782
783				if ((uchar_t *)icmp6 + ICMP6_MINLEN >
784				    mp->b_wptr)
785					return (B_FALSE);
786				if (icmp6->icmp6_type >= MLD_LISTENER_QUERY &&
787				    icmp6->icmp6_type <= ICMP6_MAX_INFO_TYPE)
788					return (B_TRUE);
789			}
790		}
791
792		/*
793		 * Look up the tnrhtp database and get the implicit label
794		 * that is associated with the sending host and attach
795		 * it to the packet.
796		 */
797		if ((src_rhtp = find_tpc(src, version, B_FALSE)) == NULL)
798			return (B_FALSE);
799
800		/*
801		 * If peer is label-aware, mark as "implicit" rather than
802		 * "unlabeled" to cause appropriate mac-exempt processing
803		 * to happen.
804		 */
805		if (src_rhtp->tpc_tp.host_type == SUN_CIPSO)
806			label_flags = TSLF_IMPLICIT_IN;
807		else if (src_rhtp->tpc_tp.host_type == UNLABELED)
808			label_flags = TSLF_UNLABELED;
809		else {
810			DTRACE_PROBE2(tx__get__pkt__label, char *,
811			    "template(1) has unknown hosttype",
812			    tsol_tpc_t *, src_rhtp);
813		}
814
815
816		if (!tsol_find_unlabeled_label(src_rhtp, &sl, &doi)) {
817			TPC_RELE(src_rhtp);
818			return (B_FALSE);
819		}
820		TPC_RELE(src_rhtp);
821		break;
822
823	default:
824		return (B_FALSE);
825	}
826
827	if (ira->ira_cred == NULL) {
828		credp = newcred_from_bslabel(&sl, doi, KM_NOSLEEP);
829		if (credp == NULL)
830			return (B_FALSE);
831	} else {
832		credp = copycred_from_bslabel(ira->ira_cred, &sl, doi,
833		    KM_NOSLEEP);
834		if (credp == NULL)
835			return (B_FALSE);
836		if (ira->ira_free_flags & IRA_FREE_CRED) {
837			crfree(ira->ira_cred);
838			ira->ira_free_flags &= ~IRA_FREE_CRED;
839			ira->ira_cred = NULL;
840		}
841	}
842
843	/*
844	 * Put the label in ira_tsl for convinience, while keeping
845	 * the cred in ira_cred for getpeerucred which is used to get
846	 * labels with TX.
847	 * Note: no explicit refcnt/free_flag for ira_tsl. The free_flag
848	 * for IRA_FREE_CRED is sufficient for both.
849	 */
850	ira->ira_tsl = crgetlabel(credp);
851	ira->ira_cred = credp;
852	ira->ira_free_flags |= IRA_FREE_CRED;
853
854	ira->ira_tsl->tsl_flags |= label_flags;
855	return (B_TRUE);
856}
857
858/*
859 * This routine determines whether the given packet should be accepted locally.
860 * It does a range/set check on the packet's label by looking up the given
861 * address in the remote host database.
862 */
863boolean_t
864tsol_receive_local(const mblk_t *mp, const void *addr, uchar_t version,
865    ip_recv_attr_t *ira, const conn_t *connp)
866{
867	const cred_t *credp;
868	ts_label_t *plabel, *conn_plabel;
869	tsol_tpc_t *tp;
870	boolean_t retv;
871	const bslabel_t *label, *conn_label;
872	boolean_t shared_addr = (ira->ira_flags & IRAF_TX_SHARED_ADDR);
873
874	/*
875	 * tsol_get_pkt_label intentionally avoids the labeling process for:
876	 *	- IPv6 router and neighbor discovery as well as redirects.
877	 *	- MLD packets. (Anything between ICMPv6 code 130 and 138.)
878	 *	- IGMP packets.
879	 *	- IPv4 router discovery.
880	 * In those cases ira_cred is NULL.
881	 */
882	credp = ira->ira_cred;
883	if (credp == NULL)
884		return (B_TRUE);
885
886	/*
887	 * If this packet is from the inside (not a remote host) and has the
888	 * same zoneid as the selected destination, then no checks are
889	 * necessary.  Membership in the zone is enough proof.  This is
890	 * intended to be a hot path through this function.
891	 * Note: Using crgetzone here is ok since the peer is local.
892	 */
893	if (!crisremote(credp) &&
894	    crgetzone(credp) == crgetzone(connp->conn_cred))
895		return (B_TRUE);
896
897	plabel = ira->ira_tsl;
898	conn_plabel = crgetlabel(connp->conn_cred);
899	ASSERT(plabel != NULL && conn_plabel != NULL);
900
901	label = label2bslabel(plabel);
902	conn_label = label2bslabel(conn_plabel);
903
904
905	/*
906	 * Implicitly labeled packets from label-aware sources
907	 * go only to privileged receivers
908	 */
909	if ((plabel->tsl_flags & TSLF_IMPLICIT_IN) &&
910	    (connp->conn_mac_mode != CONN_MAC_IMPLICIT)) {
911		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac_impl,
912		    char *,
913		    "implicitly labeled packet mp(1) for conn(2) "
914		    "which isn't in implicit mac mode",
915		    mblk_t *, mp, conn_t *, connp);
916
917		return (B_FALSE);
918	}
919
920
921	/*
922	 * MLPs are always validated using the range and set of the local
923	 * address, even when the remote host is unlabeled.
924	 */
925	if (connp->conn_mlp_type == mlptBoth ||
926	/* LINTED: no consequent */
927	    connp->conn_mlp_type == (shared_addr ? mlptShared : mlptPrivate)) {
928		;
929
930	/*
931	 * If this is a packet from an unlabeled sender, then we must apply
932	 * different rules.  If the label is equal to the zone's label, then
933	 * it's allowed.  If it's not equal, but the zone is either the global
934	 * zone or the label is dominated by the zone's label, then allow it
935	 * as long as it's in the range configured for the destination.
936	 */
937	} else if (plabel->tsl_flags & TSLF_UNLABELED) {
938		if (plabel->tsl_doi == conn_plabel->tsl_doi &&
939		    blequal(label, conn_label))
940			return (B_TRUE);
941
942		if ((connp->conn_mac_mode == CONN_MAC_DEFAULT) ||
943		    (!connp->conn_zone_is_global &&
944		    (plabel->tsl_doi != conn_plabel->tsl_doi ||
945		    !bldominates(conn_label, label)))) {
946			DTRACE_PROBE3(
947			    tx__ip__log__drop__receivelocal__mac_unl,
948			    char *,
949			    "unlabeled packet mp(1) fails mac for conn(2)",
950			    mblk_t *, mp, conn_t *, connp);
951			return (B_FALSE);
952		}
953
954	/*
955	 * If this is a packet from a labeled sender, verify the
956	 * label on the packet matches the connection label.
957	 */
958	} else {
959		if (plabel->tsl_doi != conn_plabel->tsl_doi ||
960		    !blequal(label, conn_label)) {
961			DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac__slp,
962			    char *,
963			    "packet mp(1) failed label match to SLP conn(2)",
964			    mblk_t *, mp, conn_t *, connp);
965			return (B_FALSE);
966		}
967		/*
968		 * No further checks will be needed if this is a zone-
969		 * specific address because (1) The process for bringing up
970		 * the interface ensures the zone's label is within the zone-
971		 * specific address's valid label range; (2) For cases where
972		 * the conn is bound to the unspecified addresses, ip fanout
973		 * logic ensures conn's zoneid equals the dest addr's zoneid;
974		 * (3) Mac-exempt and mlp logic above already handle all
975		 * cases where the zone label may not be the same as the
976		 * conn label.
977		 */
978		if (!shared_addr)
979			return (B_TRUE);
980	}
981
982	tp = find_tpc(addr, version, B_FALSE);
983	if (tp == NULL) {
984		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__no__tnr,
985		    char *, "dropping mp(1), host(2) lacks entry",
986		    mblk_t *, mp, void *, addr);
987		return (B_FALSE);
988	}
989
990	/*
991	 * The local host address should not be unlabeled at this point.  The
992	 * only way this can happen is that the destination isn't unicast.  We
993	 * assume that the packet should not have had a label, and thus should
994	 * have been handled by the TSLF_UNLABELED logic above.
995	 */
996	if (tp->tpc_tp.host_type == UNLABELED) {
997		retv = B_FALSE;
998		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__flag, char *,
999		    "mp(1) unlabeled source, but tp is not unlabeled.",
1000		    mblk_t *, mp, tsol_tpc_t *, tp);
1001
1002	} else if (tp->tpc_tp.host_type != SUN_CIPSO) {
1003		retv = B_FALSE;
1004		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__tptype, char *,
1005		    "delivering mp(1), found unrecognized tpc(2) type.",
1006		    mblk_t *, mp, tsol_tpc_t *, tp);
1007
1008	} else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
1009		retv = B_FALSE;
1010		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
1011		    "mp(1) could not be delievered to tp(2), doi mismatch",
1012		    mblk_t *, mp, tsol_tpc_t *, tp);
1013
1014	} else if (!_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) &&
1015	    !blinlset(label, tp->tpc_tp.tp_sl_set_cipso)) {
1016		retv = B_FALSE;
1017		DTRACE_PROBE3(tx__ip__log__drop__receivelocal__mac, char *,
1018		    "mp(1) could not be delievered to tp(2), bad mac",
1019		    mblk_t *, mp, tsol_tpc_t *, tp);
1020	} else {
1021		retv = B_TRUE;
1022	}
1023
1024	TPC_RELE(tp);
1025
1026	return (retv);
1027}
1028
1029boolean_t
1030tsol_can_accept_raw(mblk_t *mp, ip_recv_attr_t *ira, boolean_t check_host)
1031{
1032	ts_label_t	*plabel = NULL;
1033	tsol_tpc_t	*src_rhtp, *dst_rhtp;
1034	boolean_t	retv;
1035
1036	plabel = ira->ira_tsl;
1037
1038	/* We are bootstrapping or the internal template was never deleted */
1039	if (plabel == NULL)
1040		return (B_TRUE);
1041
1042	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
1043		ipha_t *ipha = (ipha_t *)mp->b_rptr;
1044
1045		src_rhtp = find_tpc(&ipha->ipha_src, IPV4_VERSION,
1046		    B_FALSE);
1047		if (src_rhtp == NULL)
1048			return (B_FALSE);
1049		dst_rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION,
1050		    B_FALSE);
1051	} else {
1052		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
1053
1054		src_rhtp = find_tpc(&ip6h->ip6_src, IPV6_VERSION,
1055		    B_FALSE);
1056		if (src_rhtp == NULL)
1057			return (B_FALSE);
1058		dst_rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION,
1059		    B_FALSE);
1060	}
1061	if (dst_rhtp == NULL) {
1062		TPC_RELE(src_rhtp);
1063		return (B_FALSE);
1064	}
1065
1066	if (label2doi(plabel) != src_rhtp->tpc_tp.tp_doi) {
1067		retv = B_FALSE;
1068
1069	/*
1070	 * Check that the packet's label is in the correct range for labeled
1071	 * sender, or is equal to the default label for unlabeled sender.
1072	 */
1073	} else if ((src_rhtp->tpc_tp.host_type != UNLABELED &&
1074	    !_blinrange(label2bslabel(plabel),
1075	    &src_rhtp->tpc_tp.tp_sl_range_cipso) &&
1076	    !blinlset(label2bslabel(plabel),
1077	    src_rhtp->tpc_tp.tp_sl_set_cipso)) ||
1078	    (src_rhtp->tpc_tp.host_type == UNLABELED &&
1079	    !blequal(&plabel->tsl_label, &src_rhtp->tpc_tp.tp_def_label))) {
1080		retv = B_FALSE;
1081
1082	} else if (check_host) {
1083		retv = B_TRUE;
1084
1085	/*
1086	 * Until we have SL range in the Zone structure, pass it
1087	 * when our own address lookup returned an internal entry.
1088	 */
1089	} else switch (dst_rhtp->tpc_tp.host_type) {
1090	case UNLABELED:
1091		retv = B_TRUE;
1092		break;
1093
1094	case SUN_CIPSO:
1095		retv = _blinrange(label2bslabel(plabel),
1096		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) ||
1097		    blinlset(label2bslabel(plabel),
1098		    dst_rhtp->tpc_tp.tp_sl_set_cipso);
1099		break;
1100
1101	default:
1102		retv = B_FALSE;
1103	}
1104	TPC_RELE(src_rhtp);
1105	TPC_RELE(dst_rhtp);
1106	return (retv);
1107}
1108
1109/*
1110 * This routine determines whether a response to a failed packet delivery or
1111 * connection should be sent back.  By default, the policy is to allow such
1112 * messages to be sent at all times, as these messages reveal little useful
1113 * information and are healthy parts of TCP/IP networking.
1114 *
1115 * If tsol_strict_error is set, then we do strict tests: if the packet label is
1116 * within the label range/set of this host/zone, return B_TRUE; otherwise
1117 * return B_FALSE, which causes the packet to be dropped silently.
1118 *
1119 * Note that tsol_get_pkt_label will cause the packet to drop if the sender is
1120 * marked as labeled in the remote host database, but the packet lacks a label.
1121 * This means that we don't need to do a lookup on the source; the
1122 * TSLF_UNLABELED flag is sufficient.
1123 */
1124boolean_t
1125tsol_can_reply_error(const mblk_t *mp, ip_recv_attr_t *ira)
1126{
1127	ts_label_t	*plabel = NULL;
1128	tsol_tpc_t	*rhtp;
1129	const ipha_t	*ipha;
1130	const ip6_t	*ip6h;
1131	boolean_t	retv;
1132	bslabel_t	*pktbs;
1133
1134	/* Caller must pull up at least the IP header */
1135	ASSERT(MBLKL(mp) >= (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION ?
1136	    sizeof (*ipha) : sizeof (*ip6h)));
1137
1138	if (!tsol_strict_error)
1139		return (B_TRUE);
1140
1141	plabel = ira->ira_tsl;
1142
1143	/* We are bootstrapping or the internal template was never deleted */
1144	if (plabel == NULL)
1145		return (B_TRUE);
1146
1147	if (plabel->tsl_flags & TSLF_IMPLICIT_IN) {
1148		DTRACE_PROBE3(tx__ip__log__drop__replyerror__unresolved__label,
1149		    char *,
1150		    "cannot send error report for packet mp(1) with "
1151		    "unresolved security label sl(2)",
1152		    mblk_t *, mp, ts_label_t *, plabel);
1153		return (B_FALSE);
1154	}
1155
1156
1157	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
1158		ipha = (const ipha_t *)mp->b_rptr;
1159		rhtp = find_tpc(&ipha->ipha_dst, IPV4_VERSION, B_FALSE);
1160	} else {
1161		ip6h = (const ip6_t *)mp->b_rptr;
1162		rhtp = find_tpc(&ip6h->ip6_dst, IPV6_VERSION, B_FALSE);
1163	}
1164
1165	if (rhtp == NULL || label2doi(plabel) != rhtp->tpc_tp.tp_doi) {
1166		retv = B_FALSE;
1167	} else {
1168		/*
1169		 * If we're in the midst of forwarding, then the destination
1170		 * address might not be labeled.  In that case, allow unlabeled
1171		 * packets through only if the default label is the same, and
1172		 * labeled ones if they dominate.
1173		 */
1174		pktbs = label2bslabel(plabel);
1175		switch (rhtp->tpc_tp.host_type) {
1176		case UNLABELED:
1177			if (plabel->tsl_flags & TSLF_UNLABELED) {
1178				retv = blequal(pktbs,
1179				    &rhtp->tpc_tp.tp_def_label);
1180			} else {
1181				retv = bldominates(pktbs,
1182				    &rhtp->tpc_tp.tp_def_label);
1183			}
1184			break;
1185
1186		case SUN_CIPSO:
1187			retv = _blinrange(pktbs,
1188			    &rhtp->tpc_tp.tp_sl_range_cipso) ||
1189			    blinlset(pktbs, rhtp->tpc_tp.tp_sl_set_cipso);
1190			break;
1191
1192		default:
1193			retv = B_FALSE;
1194			break;
1195		}
1196	}
1197
1198	if (rhtp != NULL)
1199		TPC_RELE(rhtp);
1200
1201	return (retv);
1202}
1203
1204/*
1205 * Finds the zone associated with the receive attributes.  Returns GLOBAL_ZONEID
1206 * if the zone cannot be located.
1207 *
1208 * This is used by the classifier when the packet matches an ALL_ZONES IRE, and
1209 * there's no MLP defined.
1210 *
1211 * Note that we assume that this is only invoked in the ALL_ZONES case.
1212 * Handling other cases would require handling exclusive IP zones where either
1213 * this routine or the callers would have to map from
1214 * the zoneid (zone->zone_id) to what IP uses in conn_zoneid etc.
1215 */
1216zoneid_t
1217tsol_attr_to_zoneid(const ip_recv_attr_t *ira)
1218{
1219	zone_t *zone;
1220	ts_label_t *label;
1221
1222	if ((label = ira->ira_tsl) != NULL) {
1223		zone = zone_find_by_label(label);
1224		if (zone != NULL) {
1225			zoneid_t zoneid = zone->zone_id;
1226
1227			zone_rele(zone);
1228			return (zoneid);
1229		}
1230	}
1231	return (GLOBAL_ZONEID);
1232}
1233
1234int
1235tsol_ire_match_gwattr(ire_t *ire, const ts_label_t *tsl)
1236{
1237	int		error = 0;
1238	tsol_ire_gw_secattr_t *attrp = NULL;
1239	tsol_tnrhc_t	*gw_rhc = NULL;
1240	tsol_gcgrp_t	*gcgrp = NULL;
1241	tsol_gc_t	*gc = NULL;
1242	in_addr_t	ga_addr4;
1243	void		*paddr = NULL;
1244
1245	/* Not in Trusted mode or IRE is local/loopback/broadcast/interface */
1246	if (!is_system_labeled() ||
1247	    (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK | IRE_BROADCAST |
1248	    IRE_IF_ALL | IRE_MULTICAST | IRE_NOROUTE)))
1249		goto done;
1250
1251	/*
1252	 * If we don't have a label to compare with, or the IRE does not
1253	 * contain any gateway security attributes, there's not much that
1254	 * we can do.  We let the former case pass, and the latter fail,
1255	 * since the IRE doesn't qualify for a match due to the lack of
1256	 * security attributes.
1257	 */
1258	if (tsl == NULL || ire->ire_gw_secattr == NULL) {
1259		if (tsl != NULL) {
1260			DTRACE_PROBE3(
1261			    tx__ip__log__drop__irematch__nogwsec, char *,
1262			    "ire(1) lacks ire_gw_secattr when matching "
1263			    "label(2)", ire_t *, ire, ts_label_t *, tsl);
1264			error = EACCES;
1265		}
1266		goto done;
1267	}
1268
1269	attrp = ire->ire_gw_secattr;
1270
1271	/*
1272	 * The possible lock order scenarios related to the tsol gateway
1273	 * attribute locks are documented at the beginning of ip.c in the
1274	 * lock order scenario section.
1275	 */
1276	mutex_enter(&attrp->igsa_lock);
1277
1278	/*
1279	 * We seek the group
1280	 * structure which contains all security credentials of the gateway.
1281	 * An offline IRE is associated with at most one gateway credential.
1282	 */
1283	if ((gc = attrp->igsa_gc) != NULL) {
1284		gcgrp = gc->gc_grp;
1285		ASSERT(gcgrp != NULL);
1286		rw_enter(&gcgrp->gcgrp_rwlock, RW_READER);
1287		GCGRP_REFHOLD(gcgrp);
1288	}
1289
1290	if ((gw_rhc = attrp->igsa_rhc) != NULL) {
1291		/*
1292		 * If our cached entry has grown stale, then discard it so we
1293		 * can get a new one.
1294		 */
1295		if (gw_rhc->rhc_invalid || gw_rhc->rhc_tpc->tpc_invalid) {
1296			TNRHC_RELE(gw_rhc);
1297			attrp->igsa_rhc = gw_rhc = NULL;
1298		} else {
1299			TNRHC_HOLD(gw_rhc)
1300		}
1301	}
1302
1303	/* Last attempt at loading the template had failed; try again */
1304	if (gw_rhc == NULL) {
1305		if (gcgrp != NULL) {
1306			tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr;
1307
1308			if (ire->ire_ipversion == IPV4_VERSION) {
1309				ASSERT(ga->ga_af == AF_INET);
1310				IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4);
1311				paddr = &ga_addr4;
1312			} else {
1313				ASSERT(ga->ga_af == AF_INET6);
1314				paddr = &ga->ga_addr;
1315			}
1316		} else if (ire->ire_type & IRE_OFFLINK) {
1317			if (ire->ire_ipversion == IPV6_VERSION)
1318				paddr = &ire->ire_gateway_addr_v6;
1319			else if (ire->ire_ipversion == IPV4_VERSION)
1320				paddr = &ire->ire_gateway_addr;
1321		}
1322
1323		/* We've found a gateway address to do the template lookup */
1324		if (paddr != NULL) {
1325			ASSERT(gw_rhc == NULL);
1326			gw_rhc = find_rhc(paddr, ire->ire_ipversion, B_FALSE);
1327			if (gw_rhc != NULL) {
1328				/*
1329				 * Note that if the lookup above returned an
1330				 * internal template, we'll use it for the
1331				 * time being, and do another lookup next
1332				 * time around.
1333				 */
1334				/* Another thread has loaded the template? */
1335				if (attrp->igsa_rhc != NULL) {
1336					TNRHC_RELE(gw_rhc)
1337					/* reload, it could be different */
1338					gw_rhc = attrp->igsa_rhc;
1339				} else {
1340					attrp->igsa_rhc = gw_rhc;
1341				}
1342				/*
1343				 * Hold an extra reference just like we did
1344				 * above prior to dropping the igsa_lock.
1345				 */
1346				TNRHC_HOLD(gw_rhc)
1347			}
1348		}
1349	}
1350
1351	mutex_exit(&attrp->igsa_lock);
1352	/* Gateway template not found */
1353	if (gw_rhc == NULL) {
1354		/*
1355		 * If destination address is directly reachable through an
1356		 * interface rather than through a learned route, pass it.
1357		 */
1358		if (paddr != NULL) {
1359			DTRACE_PROBE3(
1360			    tx__ip__log__drop__irematch__nogwtmpl, char *,
1361			    "ire(1), label(2) off-link with no gw_rhc",
1362			    ire_t *, ire, ts_label_t *, tsl);
1363			error = EINVAL;
1364		}
1365		goto done;
1366	}
1367
1368	if (gc != NULL) {
1369
1370		tsol_gcdb_t *gcdb;
1371		/*
1372		 * In the case of IRE_CACHE we've got one or more gateway
1373		 * security credentials to compare against the passed in label.
1374		 * Perform label range comparison against each security
1375		 * credential of the gateway. In the case of a prefix ire
1376		 * we need to match against the security attributes of
1377		 * just the route itself, so the loop is executed only once.
1378		 */
1379		ASSERT(gcgrp != NULL);
1380		gcdb = gc->gc_db;
1381		if (tsl->tsl_doi != gcdb->gcdb_doi ||
1382		    !_blinrange(&tsl->tsl_label, &gcdb->gcdb_slrange)) {
1383			DTRACE_PROBE3(
1384			    tx__ip__log__drop__irematch__nogcmatched,
1385			    char *, "ire(1), tsl(2): all gc failed match",
1386			    ire_t *, ire, ts_label_t *, tsl);
1387			error = EACCES;
1388		}
1389	} else {
1390		/*
1391		 * We didn't find any gateway credentials in the IRE
1392		 * attributes; fall back to the gateway's template for
1393		 * label range checks, if we are required to do so.
1394		 */
1395		ASSERT(gw_rhc != NULL);
1396		switch (gw_rhc->rhc_tpc->tpc_tp.host_type) {
1397		case SUN_CIPSO:
1398			if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi ||
1399			    (!_blinrange(&tsl->tsl_label,
1400			    &gw_rhc->rhc_tpc->tpc_tp.tp_sl_range_cipso) &&
1401			    !blinlset(&tsl->tsl_label,
1402			    gw_rhc->rhc_tpc->tpc_tp.tp_sl_set_cipso))) {
1403				error = EACCES;
1404				DTRACE_PROBE4(
1405				    tx__ip__log__drop__irematch__deftmpl,
1406				    char *, "ire(1), tsl(2), gw_rhc(3) "
1407				    "failed match (cipso gw)",
1408				    ire_t *, ire, ts_label_t *, tsl,
1409				    tsol_tnrhc_t *, gw_rhc);
1410			}
1411			break;
1412
1413		case UNLABELED:
1414			if (tsl->tsl_doi != gw_rhc->rhc_tpc->tpc_tp.tp_doi ||
1415			    (!_blinrange(&tsl->tsl_label,
1416			    &gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_range) &&
1417			    !blinlset(&tsl->tsl_label,
1418			    gw_rhc->rhc_tpc->tpc_tp.tp_gw_sl_set))) {
1419				error = EACCES;
1420				DTRACE_PROBE4(
1421				    tx__ip__log__drop__irematch__deftmpl,
1422				    char *, "ire(1), tsl(2), gw_rhc(3) "
1423				    "failed match (unlabeled gw)",
1424				    ire_t *, ire, ts_label_t *, tsl,
1425				    tsol_tnrhc_t *, gw_rhc);
1426			}
1427			break;
1428		}
1429	}
1430
1431done:
1432
1433	if (gcgrp != NULL) {
1434		rw_exit(&gcgrp->gcgrp_rwlock);
1435		GCGRP_REFRELE(gcgrp);
1436	}
1437
1438	if (gw_rhc != NULL)
1439		TNRHC_RELE(gw_rhc)
1440
1441	return (error);
1442}
1443
1444/*
1445 * Performs label accreditation checks for packet forwarding.
1446 * Add or remove a CIPSO option as needed.
1447 *
1448 * Returns a pointer to the modified mblk if allowed for forwarding,
1449 * or NULL if the packet must be dropped.
1450 */
1451mblk_t *
1452tsol_ip_forward(ire_t *ire, mblk_t *mp, const ip_recv_attr_t *ira)
1453{
1454	tsol_ire_gw_secattr_t *attrp = NULL;
1455	ipha_t		*ipha;
1456	ip6_t		*ip6h;
1457	const void	*pdst;
1458	const void	*psrc;
1459	boolean_t	off_link;
1460	tsol_tpc_t	*dst_rhtp, *gw_rhtp;
1461	tsol_ip_label_t label_type;
1462	uchar_t		*opt_ptr = NULL;
1463	ts_label_t	*tsl;
1464	uint8_t		proto;
1465	int		af, adjust;
1466	uint16_t	iplen;
1467	boolean_t	need_tpc_rele = B_FALSE;
1468	ipaddr_t	*gw;
1469	ip_stack_t	*ipst = ire->ire_ipst;
1470	int		err;
1471	ts_label_t	*effective_tsl = NULL;
1472
1473	ASSERT(ire != NULL && mp != NULL);
1474	/*
1475	 * Note that the ire is the first one found, i.e., an IRE_OFFLINK if
1476	 * the destination is offlink.
1477	 */
1478
1479	af = (ire->ire_ipversion == IPV4_VERSION) ? AF_INET : AF_INET6;
1480
1481	if (IPH_HDR_VERSION(mp->b_rptr) == IPV4_VERSION) {
1482		ASSERT(ire->ire_ipversion == IPV4_VERSION);
1483		ipha = (ipha_t *)mp->b_rptr;
1484		psrc = &ipha->ipha_src;
1485		pdst = &ipha->ipha_dst;
1486		proto = ipha->ipha_protocol;
1487		if (!tsol_get_option_v4(mp, &label_type, &opt_ptr))
1488			return (NULL);
1489	} else {
1490		ASSERT(ire->ire_ipversion == IPV6_VERSION);
1491		ip6h = (ip6_t *)mp->b_rptr;
1492		psrc = &ip6h->ip6_src;
1493		pdst = &ip6h->ip6_dst;
1494		proto = ip6h->ip6_nxt;
1495
1496		if (proto != IPPROTO_TCP && proto != IPPROTO_UDP &&
1497		    proto != IPPROTO_ICMPV6) {
1498			uint8_t *nexthdrp;
1499			uint16_t hdr_len;
1500
1501			if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_len,
1502			    &nexthdrp)) {
1503				/* malformed packet; drop it */
1504				return (NULL);
1505			}
1506			proto = *nexthdrp;
1507		}
1508		if (!tsol_get_option_v6(mp, &label_type, &opt_ptr))
1509			return (NULL);
1510	}
1511	/*
1512	 * off_link is TRUE if destination not directly reachable.
1513	 */
1514	off_link = (ire->ire_type & IRE_OFFLINK);
1515
1516	if ((tsl = ira->ira_tsl) == NULL)
1517		return (mp);
1518
1519	if (tsl->tsl_flags & TSLF_IMPLICIT_IN) {
1520		DTRACE_PROBE3(tx__ip__log__drop__forward__unresolved__label,
1521		    char *,
1522		    "cannot forward packet mp(1) with unresolved "
1523		    "security label sl(2)",
1524		    mblk_t *, mp, ts_label_t *, tsl);
1525
1526		return (NULL);
1527	}
1528
1529
1530	ASSERT(psrc != NULL && pdst != NULL);
1531	dst_rhtp = find_tpc(pdst, ire->ire_ipversion, B_FALSE);
1532
1533	if (dst_rhtp == NULL) {
1534		/*
1535		 * Without a template we do not know if forwarding
1536		 * violates MAC
1537		 */
1538		DTRACE_PROBE3(tx__ip__log__drop__forward__nodst, char *,
1539		    "mp(1) dropped, no template for destination ip4|6(2)",
1540		    mblk_t *, mp, void *, pdst);
1541		return (NULL);
1542	}
1543
1544	/*
1545	 * Gateway template must have existed for off-link destinations,
1546	 * since tsol_ire_match_gwattr has ensured such condition.
1547	 */
1548	if (ire->ire_ipversion == IPV4_VERSION && off_link) {
1549		/*
1550		 * Surya note: first check if we can get the gw_rhtp from
1551		 * the ire_gw_secattr->igsa_rhc; if this is null, then
1552		 * do a lookup based on the ire_addr (address of gw)
1553		 */
1554		if (ire->ire_gw_secattr != NULL &&
1555		    ire->ire_gw_secattr->igsa_rhc != NULL) {
1556			attrp = ire->ire_gw_secattr;
1557			gw_rhtp = attrp->igsa_rhc->rhc_tpc;
1558		} else  {
1559			gw = &ire->ire_gateway_addr;
1560			gw_rhtp = find_tpc(gw, ire->ire_ipversion, B_FALSE);
1561			need_tpc_rele = B_TRUE;
1562		}
1563		if (gw_rhtp == NULL) {
1564			DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *,
1565			    "mp(1) dropped, no gateway in ire attributes(2)",
1566			    mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp);
1567			mp = NULL;
1568			goto keep_label;
1569		}
1570	}
1571	if (ire->ire_ipversion == IPV6_VERSION &&
1572	    ((attrp = ire->ire_gw_secattr) == NULL || attrp->igsa_rhc == NULL ||
1573	    (gw_rhtp = attrp->igsa_rhc->rhc_tpc) == NULL) && off_link) {
1574		DTRACE_PROBE3(tx__ip__log__drop__forward__nogw, char *,
1575		    "mp(1) dropped, no gateway in ire attributes(2)",
1576		    mblk_t *, mp, tsol_ire_gw_secattr_t *, attrp);
1577		mp = NULL;
1578		goto keep_label;
1579	}
1580
1581	/*
1582	 * Check that the label for the packet is acceptable
1583	 * by destination host; otherwise, drop it.
1584	 */
1585	switch (dst_rhtp->tpc_tp.host_type) {
1586	case SUN_CIPSO:
1587		if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi ||
1588		    (!_blinrange(&tsl->tsl_label,
1589		    &dst_rhtp->tpc_tp.tp_sl_range_cipso) &&
1590		    !blinlset(&tsl->tsl_label,
1591		    dst_rhtp->tpc_tp.tp_sl_set_cipso))) {
1592			DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *,
1593			    "labeled packet mp(1) dropped, label(2) fails "
1594			    "destination(3) accredation check",
1595			    mblk_t *, mp, ts_label_t *, tsl,
1596			    tsol_tpc_t *, dst_rhtp);
1597			mp = NULL;
1598			goto keep_label;
1599		}
1600		break;
1601
1602
1603	case UNLABELED:
1604		if (tsl->tsl_doi != dst_rhtp->tpc_tp.tp_doi ||
1605		    !blequal(&dst_rhtp->tpc_tp.tp_def_label,
1606		    &tsl->tsl_label)) {
1607			DTRACE_PROBE4(tx__ip__log__drop__forward__mac, char *,
1608			    "unlabeled packet mp(1) dropped, label(2) fails "
1609			    "destination(3) accredation check",
1610			    mblk_t *, mp, ts_label_t *, tsl,
1611			    tsol_tpc_t *, dst_rhtp);
1612			mp = NULL;
1613			goto keep_label;
1614		}
1615		break;
1616	}
1617	if (label_type == OPT_CIPSO) {
1618		/*
1619		 * We keep the label on any of the following cases:
1620		 *
1621		 *   1. The destination is labeled (on/off-link).
1622		 *   2. The unlabeled destination is off-link,
1623		 *	and the next hop gateway is labeled.
1624		 */
1625		if (dst_rhtp->tpc_tp.host_type != UNLABELED ||
1626		    (off_link &&
1627		    gw_rhtp->tpc_tp.host_type != UNLABELED))
1628			goto keep_label;
1629
1630		/*
1631		 * Strip off the CIPSO option from the packet because: the
1632		 * unlabeled destination host is directly reachable through
1633		 * an interface (on-link); or, the unlabeled destination host
1634		 * is not directly reachable (off-link), and the next hop
1635		 * gateway is unlabeled.
1636		 */
1637		adjust = (af == AF_INET) ? tsol_remove_secopt(ipha, MBLKL(mp)) :
1638		    tsol_remove_secopt_v6(ip6h, MBLKL(mp));
1639
1640		ASSERT(adjust <= 0);
1641		if (adjust != 0) {
1642
1643			/* adjust is negative */
1644			ASSERT((mp->b_wptr + adjust) >= mp->b_rptr);
1645			mp->b_wptr += adjust;
1646			/*
1647			 * Note that caller adjusts ira_pktlen and
1648			 * ira_ip_hdr_length
1649			 *
1650			 * For AF_INET6 note that tsol_remove_secopt_v6
1651			 * adjusted ip6_plen.
1652			 */
1653			if (af == AF_INET) {
1654				ipha = (ipha_t *)mp->b_rptr;
1655				iplen = ntohs(ipha->ipha_length) + adjust;
1656				ipha->ipha_length = htons(iplen);
1657				ipha->ipha_hdr_checksum = 0;
1658				ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1659			}
1660			DTRACE_PROBE3(tx__ip__log__info__forward__adjust,
1661			    char *,
1662			    "mp(1) adjusted(2) for CIPSO option removal",
1663			    mblk_t *, mp, int, adjust);
1664		}
1665		goto keep_label;
1666	}
1667
1668	ASSERT(label_type == OPT_NONE);
1669	ASSERT(dst_rhtp != NULL);
1670
1671	/*
1672	 * We need to add CIPSO option if the destination or the next hop
1673	 * gateway is labeled.  Otherwise, pass the packet as is.
1674	 */
1675	if (dst_rhtp->tpc_tp.host_type == UNLABELED &&
1676	    (!off_link || gw_rhtp->tpc_tp.host_type == UNLABELED))
1677		goto keep_label;
1678
1679	/*
1680	 * Since we are forwarding packets we use GLOBAL_ZONEID for
1681	 * the IRE lookup in tsol_check_label.
1682	 * Since mac_exempt is false the zoneid isn't used for anything
1683	 * but the IRE lookup, hence we set zone_is_global to false.
1684	 */
1685	if (af == AF_INET) {
1686		err = tsol_check_label_v4(tsl, GLOBAL_ZONEID, &mp,
1687		    CONN_MAC_DEFAULT, B_FALSE, ipst, &effective_tsl);
1688	} else {
1689		err = tsol_check_label_v6(tsl, GLOBAL_ZONEID, &mp,
1690		    CONN_MAC_DEFAULT, B_FALSE, ipst, &effective_tsl);
1691	}
1692	if (err != 0) {
1693		BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards);
1694		ip_drop_output("tsol_check_label", mp, NULL);
1695		freemsg(mp);
1696		mp = NULL;
1697		goto keep_label;
1698	}
1699
1700	/*
1701	 * The effective_tsl must never affect the routing decision, hence
1702	 * we ignore it here.
1703	 */
1704	if (effective_tsl != NULL)
1705		label_rele(effective_tsl);
1706
1707	if (af == AF_INET) {
1708		ipha = (ipha_t *)mp->b_rptr;
1709		ipha->ipha_hdr_checksum = 0;
1710		ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
1711	}
1712
1713keep_label:
1714	TPC_RELE(dst_rhtp);
1715	if (need_tpc_rele && gw_rhtp != NULL)
1716		TPC_RELE(gw_rhtp);
1717	return (mp);
1718}
1719
1720/*
1721 * Name:	tsol_pmtu_adjust()
1722 *
1723 * Returns the adjusted mtu after removing security option.
1724 * Removes/subtracts the option if the packet's cred indicates an unlabeled
1725 * sender or if pkt_diff indicates this system enlarged the packet.
1726 */
1727uint32_t
1728tsol_pmtu_adjust(mblk_t *mp, uint32_t mtu, int pkt_diff, int af)
1729{
1730	int		label_adj = 0;
1731	uint32_t	min_mtu = IP_MIN_MTU;
1732	tsol_tpc_t	*src_rhtp;
1733	void		*src;
1734
1735	/*
1736	 * Note: label_adj is non-positive, indicating the number of
1737	 * bytes removed by removing the security option from the
1738	 * header.
1739	 */
1740	if (af == AF_INET6) {
1741		ip6_t	*ip6h;
1742
1743		min_mtu = IPV6_MIN_MTU;
1744		ip6h = (ip6_t *)mp->b_rptr;
1745		src = &ip6h->ip6_src;
1746		if ((src_rhtp = find_tpc(src, IPV6_VERSION, B_FALSE)) == NULL)
1747			return (mtu);
1748		if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED) {
1749			label_adj = tsol_remove_secopt_v6(
1750			    (ip6_t *)mp->b_rptr, MBLKL(mp));
1751		}
1752	} else {
1753		ipha_t    *ipha;
1754
1755		ASSERT(af == AF_INET);
1756		ipha = (ipha_t *)mp->b_rptr;
1757		src = &ipha->ipha_src;
1758		if ((src_rhtp = find_tpc(src, IPV4_VERSION, B_FALSE)) == NULL)
1759			return (mtu);
1760		if (pkt_diff > 0 || src_rhtp->tpc_tp.host_type == UNLABELED)
1761			label_adj = tsol_remove_secopt(
1762			    (ipha_t *)mp->b_rptr, MBLKL(mp));
1763	}
1764	/*
1765	 * Make pkt_diff non-negative and the larger of the bytes
1766	 * previously added (if any) or just removed, since label
1767	 * addition + subtraction may not be completely idempotent.
1768	 */
1769	if (pkt_diff < -label_adj)
1770		pkt_diff = -label_adj;
1771	if (pkt_diff > 0 && pkt_diff < mtu)
1772		mtu -= pkt_diff;
1773
1774	TPC_RELE(src_rhtp);
1775	return (MAX(mtu, min_mtu));
1776}
1777
1778/*
1779 * Name:	tsol_rtsa_init()
1780 *
1781 * Normal:	Sanity checks on the route security attributes provided by
1782 *		user.  Convert it into a route security parameter list to
1783 *		be returned to caller.
1784 *
1785 * Output:	EINVAL if bad security attributes in the routing message
1786 *		ENOMEM if unable to allocate data structures
1787 *		0 otherwise.
1788 *
1789 * Note:	On input, cp must point to the end of any addresses in
1790 *		the rt_msghdr_t structure.
1791 */
1792int
1793tsol_rtsa_init(rt_msghdr_t *rtm, tsol_rtsecattr_t *sp, caddr_t cp)
1794{
1795	uint_t	sacnt;
1796	int	err;
1797	caddr_t	lim;
1798	tsol_rtsecattr_t *tp;
1799
1800	ASSERT((cp >= (caddr_t)&rtm[1]) && sp != NULL);
1801
1802	/*
1803	 * In theory, we could accept as many security attributes configured
1804	 * per route destination.  However, the current design is limited
1805	 * such that at most only one set security attributes is allowed to
1806	 * be associated with a prefix IRE.  We therefore assert for now.
1807	 */
1808	/* LINTED */
1809	ASSERT(TSOL_RTSA_REQUEST_MAX == 1);
1810
1811	sp->rtsa_cnt = 0;
1812	lim = (caddr_t)rtm + rtm->rtm_msglen;
1813	ASSERT(cp <= lim);
1814
1815	if ((lim - cp) < sizeof (rtm_ext_t) ||
1816	    ((rtm_ext_t *)cp)->rtmex_type != RTMEX_GATEWAY_SECATTR)
1817		return (0);
1818
1819	if (((rtm_ext_t *)cp)->rtmex_len < sizeof (tsol_rtsecattr_t))
1820		return (EINVAL);
1821
1822	cp += sizeof (rtm_ext_t);
1823
1824	if ((lim - cp) < sizeof (*tp) ||
1825	    (tp = (tsol_rtsecattr_t *)cp, (sacnt = tp->rtsa_cnt) == 0) ||
1826	    (lim - cp) < TSOL_RTSECATTR_SIZE(sacnt))
1827		return (EINVAL);
1828
1829	/*
1830	 * Trying to add route security attributes when system
1831	 * labeling service is not available, or when user supllies
1832	 * more than the maximum number of security attributes
1833	 * allowed per request.
1834	 */
1835	if ((sacnt > 0 && !is_system_labeled()) ||
1836	    sacnt > TSOL_RTSA_REQUEST_MAX)
1837		return (EINVAL);
1838
1839	/* Ensure valid credentials */
1840	if ((err = rtsa_validate(&((tsol_rtsecattr_t *)cp)->
1841	    rtsa_attr[0])) != 0) {
1842		cp += sizeof (*sp);
1843		return (err);
1844	}
1845
1846	bcopy(cp, sp, sizeof (*sp));
1847	cp += sizeof (*sp);
1848	return (0);
1849}
1850
1851int
1852tsol_ire_init_gwattr(ire_t *ire, uchar_t ipversion, tsol_gc_t *gc)
1853{
1854	tsol_ire_gw_secattr_t *attrp;
1855	boolean_t exists = B_FALSE;
1856	in_addr_t ga_addr4;
1857	void *paddr = NULL;
1858	tsol_gcgrp_t *gcgrp = NULL;
1859
1860	ASSERT(ire != NULL);
1861
1862	/*
1863	 * The only time that attrp can be NULL is when this routine is
1864	 * called for the first time during the creation/initialization
1865	 * of the corresponding IRE.  It will only get cleared when the
1866	 * IRE is deleted.
1867	 */
1868	if ((attrp = ire->ire_gw_secattr) == NULL) {
1869		attrp = ire_gw_secattr_alloc(KM_NOSLEEP);
1870		if (attrp == NULL)
1871			return (ENOMEM);
1872		ire->ire_gw_secattr = attrp;
1873	} else {
1874		exists = B_TRUE;
1875		mutex_enter(&attrp->igsa_lock);
1876
1877		if (attrp->igsa_rhc != NULL) {
1878			TNRHC_RELE(attrp->igsa_rhc);
1879			attrp->igsa_rhc = NULL;
1880		}
1881
1882		if (attrp->igsa_gc != NULL)
1883			GC_REFRELE(attrp->igsa_gc);
1884	}
1885	ASSERT(!exists || MUTEX_HELD(&attrp->igsa_lock));
1886
1887	/*
1888	 * References already held by caller and we keep them;
1889	 * note that gc may be set to NULL to clear out igsa_gc.
1890	 */
1891	attrp->igsa_gc = gc;
1892
1893	if (gc != NULL) {
1894		gcgrp = gc->gc_grp;
1895		ASSERT(gcgrp != NULL);
1896	}
1897
1898	/*
1899	 * Intialize the template for gateway; we use the gateway's
1900	 * address found in either the passed in gateway credential
1901	 * or group pointer, or the ire_gateway_addr{_v6} field.
1902	 */
1903	if (gcgrp != NULL) {
1904		tsol_gcgrp_addr_t *ga = &gcgrp->gcgrp_addr;
1905
1906		/*
1907		 * Caller is holding a reference, and that we don't
1908		 * need to hold any lock to access the address.
1909		 */
1910		if (ipversion == IPV4_VERSION) {
1911			ASSERT(ga->ga_af == AF_INET);
1912			IN6_V4MAPPED_TO_IPADDR(&ga->ga_addr, ga_addr4);
1913			paddr = &ga_addr4;
1914		} else {
1915			ASSERT(ga->ga_af == AF_INET6);
1916			paddr = &ga->ga_addr;
1917		}
1918	} else if (ire->ire_type & IRE_OFFLINK) {
1919		if (ipversion == IPV6_VERSION)
1920			paddr = &ire->ire_gateway_addr_v6;
1921		else if (ipversion == IPV4_VERSION)
1922			paddr = &ire->ire_gateway_addr;
1923	}
1924
1925	/*
1926	 * Lookup the gateway template; note that we could get an internal
1927	 * template here, which we cache anyway.  During IRE matching, we'll
1928	 * try to update this gateway template cache and hopefully get a
1929	 * real one.
1930	 */
1931	if (paddr != NULL) {
1932		attrp->igsa_rhc = find_rhc(paddr, ipversion, B_FALSE);
1933	}
1934
1935	if (exists)
1936		mutex_exit(&attrp->igsa_lock);
1937
1938	return (0);
1939}
1940
1941/*
1942 * This function figures the type of MLP that we'll be using based on the
1943 * address that the user is binding and the zone.  If the address is
1944 * unspecified, then we're looking at both private and shared.  If it's one
1945 * of the zone's private addresses, then it's private only.  If it's one
1946 * of the global addresses, then it's shared only. Multicast addresses are
1947 * treated same as unspecified address.
1948 *
1949 * If we can't figure out what it is, then return mlptSingle.  That's actually
1950 * an error case.
1951 *
1952 * The callers are assumed to pass in zone->zone_id and not the zoneid that
1953 * is stored in a conn_t (since the latter will be GLOBAL_ZONEID in an
1954 * exclusive stack zone).
1955 */
1956mlp_type_t
1957tsol_mlp_addr_type(zoneid_t zoneid, uchar_t version, const void *addr,
1958    ip_stack_t *ipst)
1959{
1960	in_addr_t in4;
1961	ire_t *ire;
1962	ipif_t *ipif;
1963	zoneid_t addrzone;
1964	zoneid_t ip_zoneid;
1965
1966	ASSERT(addr != NULL);
1967
1968	/*
1969	 * For exclusive stacks we set the zoneid to zero
1970	 * to operate as if in the global zone for IRE and conn_t comparisons.
1971	 */
1972	if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
1973		ip_zoneid = GLOBAL_ZONEID;
1974	else
1975		ip_zoneid = zoneid;
1976
1977	if (version == IPV6_VERSION &&
1978	    IN6_IS_ADDR_V4MAPPED((const in6_addr_t *)addr)) {
1979		IN6_V4MAPPED_TO_IPADDR((const in6_addr_t *)addr, in4);
1980		addr = &in4;
1981		version = IPV4_VERSION;
1982	}
1983
1984	/* Check whether the IRE_LOCAL (or ipif) is ALL_ZONES */
1985	if (version == IPV4_VERSION) {
1986		in4 = *(const in_addr_t *)addr;
1987		if ((in4 == INADDR_ANY) || CLASSD(in4)) {
1988			return (mlptBoth);
1989		}
1990		ire = ire_ftable_lookup_v4(in4, 0, 0, IRE_LOCAL|IRE_LOOPBACK,
1991		    NULL, ip_zoneid, NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY,
1992		    0, ipst, NULL);
1993	} else {
1994		if (IN6_IS_ADDR_UNSPECIFIED((const in6_addr_t *)addr) ||
1995		    IN6_IS_ADDR_MULTICAST((const in6_addr_t *)addr)) {
1996			return (mlptBoth);
1997		}
1998		ire = ire_ftable_lookup_v6(addr, 0, 0, IRE_LOCAL|IRE_LOOPBACK,
1999		    NULL, ip_zoneid, NULL, MATCH_IRE_TYPE | MATCH_IRE_ZONEONLY,
2000		    0, ipst, NULL);
2001	}
2002	/*
2003	 * If we can't find the IRE, then we have to behave exactly like
2004	 * ip_laddr_verify_{v4,v6}.  That means looking up the IPIF so that
2005	 * users can bind to addresses on "down" interfaces.
2006	 *
2007	 * If we can't find that either, then the bind is going to fail, so
2008	 * just give up.  Note that there's a miniscule chance that the address
2009	 * is in transition, but we don't bother handling that.
2010	 */
2011	if (ire == NULL) {
2012		if (version == IPV4_VERSION)
2013			ipif = ipif_lookup_addr(*(const in_addr_t *)addr, NULL,
2014			    ip_zoneid, ipst);
2015		else
2016			ipif = ipif_lookup_addr_v6((const in6_addr_t *)addr,
2017			    NULL, ip_zoneid, ipst);
2018		if (ipif == NULL) {
2019			return (mlptSingle);
2020		}
2021		addrzone = ipif->ipif_zoneid;
2022		ipif_refrele(ipif);
2023	} else {
2024		addrzone = ire->ire_zoneid;
2025		ire_refrele(ire);
2026	}
2027	return (addrzone == ALL_ZONES ? mlptShared : mlptPrivate);
2028}
2029
2030/*
2031 * Since we are configuring local interfaces, and we know trusted
2032 * extension CDE requires local interfaces to be cipso host type in
2033 * order to function correctly, we'll associate a cipso template
2034 * to each local interface and let the interface come up.  Configuring
2035 * a local interface to be "unlabeled" host type is a configuration error.
2036 * We'll override that error and make the interface host type to be cipso
2037 * here.
2038 *
2039 * The code is optimized for the usual "success" case and unwinds things on
2040 * error.  We don't want to go to the trouble and expense of formatting the
2041 * interface name for the usual case where everything is configured correctly.
2042 */
2043boolean_t
2044tsol_check_interface_address(const ipif_t *ipif)
2045{
2046	tsol_tpc_t *tp;
2047	char addrbuf[INET6_ADDRSTRLEN];
2048	int af;
2049	const void *addr;
2050	zone_t *zone;
2051	ts_label_t *plabel;
2052	const bslabel_t *label;
2053	char ifname[LIFNAMSIZ];
2054	boolean_t retval;
2055	tsol_rhent_t rhent;
2056	netstack_t *ns = ipif->ipif_ill->ill_ipst->ips_netstack;
2057
2058	if (IN6_IS_ADDR_V4MAPPED(&ipif->ipif_v6lcl_addr)) {
2059		af = AF_INET;
2060		addr = &V4_PART_OF_V6(ipif->ipif_v6lcl_addr);
2061	} else {
2062		af = AF_INET6;
2063		addr = &ipif->ipif_v6lcl_addr;
2064	}
2065
2066	tp = find_tpc(&ipif->ipif_v6lcl_addr, IPV6_VERSION, B_FALSE);
2067
2068	/* assumes that ALL_ZONES implies that there is no exclusive stack */
2069	if (ipif->ipif_zoneid == ALL_ZONES) {
2070		zone = NULL;
2071	} else if (ns->netstack_stackid == GLOBAL_NETSTACKID) {
2072		/* Shared stack case */
2073		zone = zone_find_by_id(ipif->ipif_zoneid);
2074	} else {
2075		/* Exclusive stack case */
2076		zone = zone_find_by_id(crgetzoneid(ipif->ipif_ill->ill_credp));
2077	}
2078	if (zone != NULL) {
2079		plabel = zone->zone_slabel;
2080		ASSERT(plabel != NULL);
2081		label = label2bslabel(plabel);
2082	}
2083
2084	/*
2085	 * If it's CIPSO and an all-zones address, then we're done.
2086	 * If it's a CIPSO zone specific address, the zone's label
2087	 * must be in the range or set specified in the template.
2088	 * When the remote host entry is missing or the template
2089	 * type is incorrect for this interface, we create a
2090	 * CIPSO host entry in kernel and allow the interface to be
2091	 * brought up as CIPSO type.
2092	 */
2093	if (tp != NULL && (
2094	    /* The all-zones case */
2095	    (tp->tpc_tp.host_type == SUN_CIPSO &&
2096	    tp->tpc_tp.tp_doi == default_doi &&
2097	    ipif->ipif_zoneid == ALL_ZONES) ||
2098	    /* The local-zone case */
2099	    (zone != NULL && plabel->tsl_doi == tp->tpc_tp.tp_doi &&
2100	    ((tp->tpc_tp.host_type == SUN_CIPSO &&
2101	    (_blinrange(label, &tp->tpc_tp.tp_sl_range_cipso) ||
2102	    blinlset(label, tp->tpc_tp.tp_sl_set_cipso))))))) {
2103		if (zone != NULL)
2104			zone_rele(zone);
2105		TPC_RELE(tp);
2106		return (B_TRUE);
2107	}
2108
2109	ipif_get_name(ipif, ifname, sizeof (ifname));
2110	(void) inet_ntop(af, addr, addrbuf, sizeof (addrbuf));
2111
2112	if (tp == NULL) {
2113		cmn_err(CE_NOTE, "template entry for %s missing. Default to "
2114		    "CIPSO type for %s", ifname, addrbuf);
2115		retval = B_TRUE;
2116	} else if (tp->tpc_tp.host_type == UNLABELED) {
2117		cmn_err(CE_NOTE, "template type for %s incorrectly configured. "
2118		    "Change to CIPSO type for %s", ifname, addrbuf);
2119		retval = B_TRUE;
2120	} else if (ipif->ipif_zoneid == ALL_ZONES) {
2121		if (tp->tpc_tp.host_type != SUN_CIPSO) {
2122			cmn_err(CE_NOTE, "%s failed: %s isn't set to CIPSO for "
2123			    "all-zones. Converted to CIPSO.", ifname, addrbuf);
2124			retval = B_TRUE;
2125		} else {
2126			cmn_err(CE_NOTE, "%s failed: %s has wrong DOI %d "
2127			    "instead of %d", ifname, addrbuf,
2128			    tp->tpc_tp.tp_doi, default_doi);
2129			retval = B_FALSE;
2130		}
2131	} else if (zone == NULL) {
2132		cmn_err(CE_NOTE, "%s failed: zoneid %d unknown",
2133		    ifname, ipif->ipif_zoneid);
2134		retval = B_FALSE;
2135	} else if (plabel->tsl_doi != tp->tpc_tp.tp_doi) {
2136		cmn_err(CE_NOTE, "%s failed: zone %s has DOI %d but %s has "
2137		    "DOI %d", ifname, zone->zone_name, plabel->tsl_doi,
2138		    addrbuf, tp->tpc_tp.tp_doi);
2139		retval = B_FALSE;
2140	} else {
2141		cmn_err(CE_NOTE, "%s failed: zone %s label incompatible with "
2142		    "%s", ifname, zone->zone_name, addrbuf);
2143		tsol_print_label(label, "zone label");
2144		retval = B_FALSE;
2145	}
2146
2147	if (zone != NULL)
2148		zone_rele(zone);
2149	if (tp != NULL)
2150		TPC_RELE(tp);
2151	if (retval) {
2152		/*
2153		 * we've corrected a config error and let the interface
2154		 * come up as cipso. Need to insert an rhent.
2155		 */
2156		if ((rhent.rh_address.ta_family = af) == AF_INET) {
2157			rhent.rh_prefix = 32;
2158			rhent.rh_address.ta_addr_v4 = *(struct in_addr *)addr;
2159		} else {
2160			rhent.rh_prefix = 128;
2161			rhent.rh_address.ta_addr_v6 = *(in6_addr_t *)addr;
2162		}
2163		(void) strcpy(rhent.rh_template, "cipso");
2164		if (tnrh_load(&rhent) != 0) {
2165			cmn_err(CE_NOTE, "%s failed: Cannot insert CIPSO "
2166			    "template for local addr %s", ifname, addrbuf);
2167			retval = B_FALSE;
2168		}
2169	}
2170	return (retval);
2171}
2172