xref: /illumos-gate/usr/src/uts/common/inet/ip/sadb.c (revision cd434274)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
25  * Copyright (c) 2018 Joyent, Inc.
26  * Copyright 2022 MNX Cloud, Inc.
27  */
28 
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/stropts.h>
32 #include <sys/strsubr.h>
33 #include <sys/errno.h>
34 #include <sys/ddi.h>
35 #include <sys/debug.h>
36 #include <sys/cmn_err.h>
37 #include <sys/stream.h>
38 #include <sys/strlog.h>
39 #include <sys/kmem.h>
40 #include <sys/sunddi.h>
41 #include <sys/tihdr.h>
42 #include <sys/atomic.h>
43 #include <sys/socket.h>
44 #include <sys/sysmacros.h>
45 #include <sys/crypto/common.h>
46 #include <sys/crypto/api.h>
47 #include <sys/zone.h>
48 #include <netinet/in.h>
49 #include <net/if.h>
50 #include <net/pfkeyv2.h>
51 #include <net/pfpolicy.h>
52 #include <inet/common.h>
53 #include <netinet/ip6.h>
54 #include <inet/ip.h>
55 #include <inet/ip_ire.h>
56 #include <inet/ip6.h>
57 #include <inet/ipsec_info.h>
58 #include <inet/tcp.h>
59 #include <inet/sadb.h>
60 #include <inet/ipsec_impl.h>
61 #include <inet/ipsecah.h>
62 #include <inet/ipsecesp.h>
63 #include <sys/random.h>
64 #include <sys/dlpi.h>
65 #include <sys/strsun.h>
66 #include <sys/strsubr.h>
67 #include <inet/ip_if.h>
68 #include <inet/ipdrop.h>
69 #include <inet/ipclassifier.h>
70 #include <inet/sctp_ip.h>
71 #include <sys/tsol/tnet.h>
72 
73 /*
74  * This source file contains Security Association Database (SADB) common
75  * routines.  They are linked in with the AH module.  Since AH has no chance
76  * of falling under export control, it was safe to link it in there.
77  */
78 
79 static uint8_t *sadb_action_to_ecomb(uint8_t *, uint8_t *, ipsec_action_t *,
80     netstack_t *);
81 static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *);
82 static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
83 			    netstack_t *);
84 static void sadb_destroy(sadb_t *, netstack_t *);
85 static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
86 static ts_label_t *sadb_label_from_sens(sadb_sens_t *, uint64_t *);
87 
88 static time_t sadb_add_time(time_t, uint64_t);
89 static void lifetime_fuzz(ipsa_t *);
90 static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
91 static int get_ipsa_pair(ipsa_query_t *, ipsap_t *, int *);
92 static void init_ipsa_pair(ipsap_t *);
93 static void destroy_ipsa_pair(ipsap_t *);
94 static int update_pairing(ipsap_t *, ipsa_query_t *, keysock_in_t *, int *);
95 static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
96 
97 /*
98  * ipsacq_maxpackets is defined here to make it tunable
99  * from /etc/system.
100  */
101 extern uint64_t ipsacq_maxpackets;
102 
103 #define	SET_EXPIRE(sa, delta, exp) {				\
104 	if (((sa)->ipsa_ ## delta) != 0) {				\
105 		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
106 			(sa)->ipsa_ ## delta);				\
107 	}								\
108 }
109 
110 #define	UPDATE_EXPIRE(sa, delta, exp) {					\
111 	if (((sa)->ipsa_ ## delta) != 0) {				\
112 		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
113 			(sa)->ipsa_ ## delta);				\
114 		if (((sa)->ipsa_ ## exp) == 0)				\
115 			(sa)->ipsa_ ## exp = tmp;			\
116 		else							\
117 			(sa)->ipsa_ ## exp =				\
118 			    MIN((sa)->ipsa_ ## exp, tmp);		\
119 	}								\
120 }
121 
122 
123 /* wrap the macro so we can pass it as a function pointer */
124 void
sadb_sa_refrele(void * target)125 sadb_sa_refrele(void *target)
126 {
127 	IPSA_REFRELE(((ipsa_t *)target));
128 }
129 
130 /*
131  * We presume that sizeof (long) == sizeof (time_t) and that time_t is
132  * a signed type.
133  */
134 #define	TIME_MAX LONG_MAX
135 
136 /*
137  * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
138  * time_t is defined to be a signed type with the same range as
139  * "long".  On ILP32 systems, we thus run the risk of wrapping around
140  * at end of time, as well as "overwrapping" the clock back around
141  * into a seemingly valid but incorrect future date earlier than the
142  * desired expiration.
143  *
144  * In order to avoid odd behavior (either negative lifetimes or loss
145  * of high order bits) when someone asks for bizarrely long SA
146  * lifetimes, we do a saturating add for expire times.
147  *
148  * We presume that ILP32 systems will be past end of support life when
149  * the 32-bit time_t overflows (a dangerous assumption, mind you..).
150  *
151  * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
152  * will hopefully have figured out clever ways to avoid the use of
153  * fixed-sized integers in computation.
154  */
155 static time_t
sadb_add_time(time_t base,uint64_t delta)156 sadb_add_time(time_t base, uint64_t delta)
157 {
158 	/*
159 	 * Clip delta to the maximum possible time_t value to
160 	 * prevent "overwrapping" back into a shorter-than-desired
161 	 * future time.
162 	 */
163 	if (delta > TIME_MAX)
164 		delta = TIME_MAX;
165 
166 	if (base > 0) {
167 		if (TIME_MAX - base < delta)
168 			return (TIME_MAX);	/* Overflow */
169 	}
170 	return (base + delta);
171 }
172 
173 /*
174  * Callers of this function have already created a working security
175  * association, and have found the appropriate table & hash chain.  All this
176  * function does is check duplicates, and insert the SA.  The caller needs to
177  * hold the hash bucket lock and increment the refcnt before insertion.
178  *
179  * Return 0 if success, EEXIST if collision.
180  */
181 #define	SA_UNIQUE_MATCH(sa1, sa2) \
182 	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
183 	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
184 
185 int
sadb_insertassoc(ipsa_t * ipsa,isaf_t * bucket)186 sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
187 {
188 	ipsa_t **ptpn = NULL;
189 	ipsa_t *walker;
190 	boolean_t unspecsrc;
191 
192 	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
193 
194 	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
195 
196 	walker = bucket->isaf_ipsa;
197 	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
198 
199 	/*
200 	 * Find insertion point (pointed to with **ptpn).  Insert at the head
201 	 * of the list unless there's an unspecified source address, then
202 	 * insert it after the last SA with a specified source address.
203 	 *
204 	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
205 	 * checking for collisions.
206 	 */
207 
208 	while (walker != NULL) {
209 		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
210 		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
211 			if (walker->ipsa_spi == ipsa->ipsa_spi)
212 				return (EEXIST);
213 
214 			mutex_enter(&walker->ipsa_lock);
215 			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
216 			    (walker->ipsa_flags & IPSA_F_USED) &&
217 			    SA_UNIQUE_MATCH(walker, ipsa)) {
218 				walker->ipsa_flags |= IPSA_F_CINVALID;
219 			}
220 			mutex_exit(&walker->ipsa_lock);
221 		}
222 
223 		if (ptpn == NULL && unspecsrc) {
224 			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
225 			    walker->ipsa_addrfam))
226 				ptpn = walker->ipsa_ptpn;
227 			else if (walker->ipsa_next == NULL)
228 				ptpn = &walker->ipsa_next;
229 		}
230 
231 		walker = walker->ipsa_next;
232 	}
233 
234 	if (ptpn == NULL)
235 		ptpn = &bucket->isaf_ipsa;
236 	ipsa->ipsa_next = *ptpn;
237 	ipsa->ipsa_ptpn = ptpn;
238 	if (ipsa->ipsa_next != NULL)
239 		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
240 	*ptpn = ipsa;
241 	ipsa->ipsa_linklock = &bucket->isaf_lock;
242 
243 	return (0);
244 }
245 #undef SA_UNIQUE_MATCH
246 
247 /*
248  * Free a security association.  Its reference count is 0, which means
249  * I must free it.  The SA must be unlocked and must not be linked into
250  * any fanout list.
251  */
252 static void
sadb_freeassoc(ipsa_t * ipsa)253 sadb_freeassoc(ipsa_t *ipsa)
254 {
255 	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
256 	mblk_t		*asyncmp, *mp;
257 
258 	ASSERT(ipss != NULL);
259 	ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
260 	ASSERT(ipsa->ipsa_refcnt == 0);
261 	ASSERT(ipsa->ipsa_next == NULL);
262 	ASSERT(ipsa->ipsa_ptpn == NULL);
263 
264 
265 	asyncmp = sadb_clear_lpkt(ipsa);
266 	if (asyncmp != NULL) {
267 		mp = ip_recv_attr_free_mblk(asyncmp);
268 		ip_drop_packet(mp, B_TRUE, NULL,
269 		    DROPPER(ipss, ipds_sadb_inlarval_timeout),
270 		    &ipss->ipsec_sadb_dropper);
271 	}
272 	mutex_enter(&ipsa->ipsa_lock);
273 
274 	if (ipsa->ipsa_tsl != NULL) {
275 		label_rele(ipsa->ipsa_tsl);
276 		ipsa->ipsa_tsl = NULL;
277 	}
278 
279 	if (ipsa->ipsa_otsl != NULL) {
280 		label_rele(ipsa->ipsa_otsl);
281 		ipsa->ipsa_otsl = NULL;
282 	}
283 
284 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
285 	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
286 	mutex_exit(&ipsa->ipsa_lock);
287 
288 	/* bzero() these fields for paranoia's sake. */
289 	if (ipsa->ipsa_authkey != NULL) {
290 		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
291 		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
292 	}
293 	if (ipsa->ipsa_encrkey != NULL) {
294 		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
295 		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
296 	}
297 	if (ipsa->ipsa_nonce_buf != NULL) {
298 		bzero(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
299 		kmem_free(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
300 	}
301 	if (ipsa->ipsa_src_cid != NULL) {
302 		IPSID_REFRELE(ipsa->ipsa_src_cid);
303 	}
304 	if (ipsa->ipsa_dst_cid != NULL) {
305 		IPSID_REFRELE(ipsa->ipsa_dst_cid);
306 	}
307 	if (ipsa->ipsa_emech.cm_param != NULL)
308 		kmem_free(ipsa->ipsa_emech.cm_param,
309 		    ipsa->ipsa_emech.cm_param_len);
310 
311 	mutex_destroy(&ipsa->ipsa_lock);
312 	kmem_free(ipsa, sizeof (*ipsa));
313 }
314 
315 /*
316  * Unlink a security association from a hash bucket.  Assume the hash bucket
317  * lock is held, but the association's lock is not.
318  *
319  * Note that we do not bump the bucket's generation number here because
320  * we might not be making a visible change to the set of visible SA's.
321  * All callers MUST bump the bucket's generation number before they unlock
322  * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
323  * was present in the bucket at the time it was locked.
324  */
325 void
sadb_unlinkassoc(ipsa_t * ipsa)326 sadb_unlinkassoc(ipsa_t *ipsa)
327 {
328 	ASSERT(ipsa->ipsa_linklock != NULL);
329 	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
330 
331 	/* Sometimes someone beats us here with the same SA. Check now. */
332 	if (ipsa->ipsa_ptpn == NULL)
333 		return;
334 
335 	/* These fields are protected by the link lock. */
336 	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
337 	if (ipsa->ipsa_next != NULL) {
338 		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
339 		ipsa->ipsa_next = NULL;
340 	}
341 	ipsa->ipsa_ptpn = NULL;
342 
343 	/* This may destroy the SA. */
344 	IPSA_REFRELE(ipsa);
345 }
346 
347 void
sadb_delete_cluster(ipsa_t * assoc)348 sadb_delete_cluster(ipsa_t *assoc)
349 {
350 	uint8_t protocol;
351 
352 	if (cl_inet_deletespi &&
353 	    ((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
354 	    (assoc->ipsa_state == IPSA_STATE_MATURE))) {
355 		protocol = (assoc->ipsa_type == SADB_SATYPE_AH) ?
356 		    IPPROTO_AH : IPPROTO_ESP;
357 		cl_inet_deletespi(assoc->ipsa_netstack->netstack_stackid,
358 		    protocol, assoc->ipsa_spi, NULL);
359 	}
360 }
361 
362 /*
363  * Create a larval security association with the specified SPI.	 All other
364  * fields are zeroed.
365  */
366 static ipsa_t *
sadb_makelarvalassoc(uint32_t spi,uint32_t * src,uint32_t * dst,int addrfam,netstack_t * ns)367 sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
368     netstack_t *ns)
369 {
370 	ipsa_t *newbie;
371 
372 	/*
373 	 * Allocate...
374 	 */
375 
376 	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
377 	if (newbie == NULL) {
378 		/* Can't make new larval SA. */
379 		return (NULL);
380 	}
381 
382 	/* Assigned requested SPI, assume caller does SPI allocation magic. */
383 	newbie->ipsa_spi = spi;
384 	newbie->ipsa_netstack = ns;	/* No netstack_hold */
385 
386 	/*
387 	 * Copy addresses...
388 	 */
389 
390 	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
391 	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
392 
393 	newbie->ipsa_addrfam = addrfam;
394 
395 	/*
396 	 * Set common initialization values, including refcnt.
397 	 */
398 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
399 	newbie->ipsa_state = IPSA_STATE_LARVAL;
400 	newbie->ipsa_refcnt = 1;
401 	newbie->ipsa_freefunc = sadb_freeassoc;
402 
403 	/*
404 	 * There aren't a lot of other common initialization values, as
405 	 * they are copied in from the PF_KEY message.
406 	 */
407 
408 	return (newbie);
409 }
410 
411 /*
412  * Call me to initialize a security association fanout.
413  */
414 static int
sadb_init_fanout(isaf_t ** tablep,uint_t size,int kmflag)415 sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
416 {
417 	isaf_t *table;
418 	int i;
419 
420 	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
421 	*tablep = table;
422 
423 	if (table == NULL)
424 		return (ENOMEM);
425 
426 	for (i = 0; i < size; i++) {
427 		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
428 		table[i].isaf_ipsa = NULL;
429 		table[i].isaf_gen = 0;
430 	}
431 
432 	return (0);
433 }
434 
435 /*
436  * Call me to initialize an acquire fanout
437  */
438 static int
sadb_init_acfanout(iacqf_t ** tablep,uint_t size,int kmflag)439 sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
440 {
441 	iacqf_t *table;
442 	int i;
443 
444 	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
445 	*tablep = table;
446 
447 	if (table == NULL)
448 		return (ENOMEM);
449 
450 	for (i = 0; i < size; i++) {
451 		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
452 		table[i].iacqf_ipsacq = NULL;
453 	}
454 
455 	return (0);
456 }
457 
458 /*
459  * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
460  * caller must clean up partial allocations.
461  */
462 static int
sadb_init_trial(sadb_t * sp,uint_t size,int kmflag)463 sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
464 {
465 	ASSERT(sp->sdb_of == NULL);
466 	ASSERT(sp->sdb_if == NULL);
467 	ASSERT(sp->sdb_acq == NULL);
468 
469 	sp->sdb_hashsize = size;
470 	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
471 		return (ENOMEM);
472 	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
473 		return (ENOMEM);
474 	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
475 		return (ENOMEM);
476 
477 	return (0);
478 }
479 
480 /*
481  * Call me to initialize an SADB instance; fall back to default size on failure.
482  */
483 static void
sadb_init(const char * name,sadb_t * sp,uint_t size,uint_t ver,netstack_t * ns)484 sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
485     netstack_t *ns)
486 {
487 	ASSERT(sp->sdb_of == NULL);
488 	ASSERT(sp->sdb_if == NULL);
489 	ASSERT(sp->sdb_acq == NULL);
490 
491 	if (size < IPSEC_DEFAULT_HASH_SIZE)
492 		size = IPSEC_DEFAULT_HASH_SIZE;
493 
494 	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
495 
496 		cmn_err(CE_WARN,
497 		    "Unable to allocate %u entry IPv%u %s SADB hash table",
498 		    size, ver, name);
499 
500 		sadb_destroy(sp, ns);
501 		size = IPSEC_DEFAULT_HASH_SIZE;
502 		cmn_err(CE_WARN, "Falling back to %d entries", size);
503 		(void) sadb_init_trial(sp, size, KM_SLEEP);
504 	}
505 }
506 
507 
508 /*
509  * Initialize an SADB-pair.
510  */
511 void
sadbp_init(const char * name,sadbp_t * sp,int type,int size,netstack_t * ns)512 sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
513 {
514 	sadb_init(name, &sp->s_v4, size, 4, ns);
515 	sadb_init(name, &sp->s_v6, size, 6, ns);
516 
517 	sp->s_satype = type;
518 
519 	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
520 	if (type == SADB_SATYPE_AH) {
521 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
522 
523 		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
524 		sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
525 		sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
526 	} else {
527 		sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
528 		sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
529 	}
530 }
531 
532 /*
533  * Deliver a single SADB_DUMP message representing a single SA.  This is
534  * called many times by sadb_dump().
535  *
536  * If the return value of this is ENOBUFS (not the same as ENOMEM), then
537  * the caller should take that as a hint that dupb() on the "original answer"
538  * failed, and that perhaps the caller should try again with a copyb()ed
539  * "original answer".
540  */
541 static int
sadb_dump_deliver(queue_t * pfkey_q,mblk_t * original_answer,ipsa_t * ipsa,sadb_msg_t * samsg)542 sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
543     sadb_msg_t *samsg)
544 {
545 	mblk_t *answer;
546 
547 	answer = dupb(original_answer);
548 	if (answer == NULL)
549 		return (ENOBUFS);
550 	answer->b_cont = sadb_sa2msg(ipsa, samsg);
551 	if (answer->b_cont == NULL) {
552 		freeb(answer);
553 		return (ENOMEM);
554 	}
555 
556 	/* Just do a putnext, and let keysock deal with flow control. */
557 	putnext(pfkey_q, answer);
558 	return (0);
559 }
560 
561 /*
562  * Common function to allocate and prepare a keysock_out_t M_CTL message.
563  */
564 mblk_t *
sadb_keysock_out(minor_t serial)565 sadb_keysock_out(minor_t serial)
566 {
567 	mblk_t *mp;
568 	keysock_out_t *kso;
569 
570 	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
571 	if (mp != NULL) {
572 		mp->b_datap->db_type = M_CTL;
573 		mp->b_wptr += sizeof (ipsec_info_t);
574 		kso = (keysock_out_t *)mp->b_rptr;
575 		kso->ks_out_type = KEYSOCK_OUT;
576 		kso->ks_out_len = sizeof (*kso);
577 		kso->ks_out_serial = serial;
578 	}
579 
580 	return (mp);
581 }
582 
583 /*
584  * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
585  * to keysock.
586  */
587 static int
sadb_dump_fanout(queue_t * pfkey_q,mblk_t * mp,minor_t serial,isaf_t * fanout,int num_entries,boolean_t do_peers,time_t active_time)588 sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
589     int num_entries, boolean_t do_peers, time_t active_time)
590 {
591 	int i, error = 0;
592 	mblk_t *original_answer;
593 	ipsa_t *walker;
594 	sadb_msg_t *samsg;
595 	time_t	current;
596 
597 	/*
598 	 * For each IPSA hash bucket do:
599 	 *	- Hold the mutex
600 	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
601 	 */
602 	ASSERT(mp->b_cont != NULL);
603 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
604 
605 	original_answer = sadb_keysock_out(serial);
606 	if (original_answer == NULL)
607 		return (ENOMEM);
608 
609 	current = gethrestime_sec();
610 	for (i = 0; i < num_entries; i++) {
611 		mutex_enter(&fanout[i].isaf_lock);
612 		for (walker = fanout[i].isaf_ipsa; walker != NULL;
613 		    walker = walker->ipsa_next) {
614 			if (!do_peers && walker->ipsa_haspeer)
615 				continue;
616 			if ((active_time != 0) &&
617 			    ((current - walker->ipsa_lastuse) > active_time))
618 				continue;
619 			error = sadb_dump_deliver(pfkey_q, original_answer,
620 			    walker, samsg);
621 			if (error == ENOBUFS) {
622 				mblk_t *new_original_answer;
623 
624 				/* Ran out of dupb's.  Try a copyb. */
625 				new_original_answer = copyb(original_answer);
626 				if (new_original_answer == NULL) {
627 					error = ENOMEM;
628 				} else {
629 					freeb(original_answer);
630 					original_answer = new_original_answer;
631 					error = sadb_dump_deliver(pfkey_q,
632 					    original_answer, walker, samsg);
633 				}
634 			}
635 			if (error != 0)
636 				break;	/* out of for loop. */
637 		}
638 		mutex_exit(&fanout[i].isaf_lock);
639 		if (error != 0)
640 			break;	/* out of for loop. */
641 	}
642 
643 	freeb(original_answer);
644 	return (error);
645 }
646 
647 /*
648  * Dump an entire SADB; outbound first, then inbound.
649  */
650 
651 int
sadb_dump(queue_t * pfkey_q,mblk_t * mp,keysock_in_t * ksi,sadb_t * sp)652 sadb_dump(queue_t *pfkey_q, mblk_t *mp, keysock_in_t *ksi, sadb_t *sp)
653 {
654 	int error;
655 	time_t	active_time = 0;
656 	sadb_x_edump_t	*edump =
657 	    (sadb_x_edump_t *)ksi->ks_in_extv[SADB_X_EXT_EDUMP];
658 
659 	if (edump != NULL) {
660 		active_time = edump->sadb_x_edump_timeout;
661 	}
662 
663 	/* Dump outbound */
664 	error = sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_of,
665 	    sp->sdb_hashsize, B_TRUE, active_time);
666 	if (error)
667 		return (error);
668 
669 	/* Dump inbound */
670 	return sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_if,
671 	    sp->sdb_hashsize, B_FALSE, active_time);
672 }
673 
674 /*
675  * Generic sadb table walker.
676  *
677  * Call "walkfn" for each SA in each bucket in "table"; pass the
678  * bucket, the entry and "cookie" to the callback function.
679  * Take care to ensure that walkfn can delete the SA without screwing
680  * up our traverse.
681  *
682  * The bucket is locked for the duration of the callback, both so that the
683  * callback can just call sadb_unlinkassoc() when it wants to delete something,
684  * and so that no new entries are added while we're walking the list.
685  */
686 static void
sadb_walker(isaf_t * table,uint_t numentries,void (* walkfn)(isaf_t * head,ipsa_t * entry,void * cookie),void * cookie)687 sadb_walker(isaf_t *table, uint_t numentries,
688     void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
689     void *cookie)
690 {
691 	int i;
692 	for (i = 0; i < numentries; i++) {
693 		ipsa_t *entry, *next;
694 
695 		mutex_enter(&table[i].isaf_lock);
696 
697 		for (entry = table[i].isaf_ipsa; entry != NULL;
698 		    entry = next) {
699 			next = entry->ipsa_next;
700 			(*walkfn)(&table[i], entry, cookie);
701 		}
702 		mutex_exit(&table[i].isaf_lock);
703 	}
704 }
705 
706 /*
707  * Call me to free up a security association fanout.  Use the forever
708  * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
709  * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
710  * when a module is unloaded).
711  */
712 static void
sadb_destroyer(isaf_t ** tablep,uint_t numentries,boolean_t forever,boolean_t inbound)713 sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever,
714     boolean_t inbound)
715 {
716 	int i;
717 	isaf_t *table = *tablep;
718 	uint8_t protocol;
719 	ipsa_t *sa;
720 	netstackid_t sid;
721 
722 	if (table == NULL)
723 		return;
724 
725 	for (i = 0; i < numentries; i++) {
726 		mutex_enter(&table[i].isaf_lock);
727 		while ((sa = table[i].isaf_ipsa) != NULL) {
728 			if (inbound && cl_inet_deletespi &&
729 			    (sa->ipsa_state != IPSA_STATE_ACTIVE_ELSEWHERE) &&
730 			    (sa->ipsa_state != IPSA_STATE_IDLE)) {
731 				protocol = (sa->ipsa_type == SADB_SATYPE_AH) ?
732 				    IPPROTO_AH : IPPROTO_ESP;
733 				sid = sa->ipsa_netstack->netstack_stackid;
734 				cl_inet_deletespi(sid, protocol, sa->ipsa_spi,
735 				    NULL);
736 			}
737 			sadb_unlinkassoc(sa);
738 		}
739 		table[i].isaf_gen++;
740 		mutex_exit(&table[i].isaf_lock);
741 		if (forever)
742 			mutex_destroy(&(table[i].isaf_lock));
743 	}
744 
745 	if (forever) {
746 		*tablep = NULL;
747 		kmem_free(table, numentries * sizeof (*table));
748 	}
749 }
750 
751 /*
752  * Entry points to sadb_destroyer().
753  */
754 static void
sadb_flush(sadb_t * sp,netstack_t * ns)755 sadb_flush(sadb_t *sp, netstack_t *ns)
756 {
757 	/*
758 	 * Flush out each bucket, one at a time.  Were it not for keysock's
759 	 * enforcement, there would be a subtlety where I could add on the
760 	 * heels of a flush.  With keysock's enforcement, however, this
761 	 * makes ESP's job easy.
762 	 */
763 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE, B_FALSE);
764 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE, B_TRUE);
765 
766 	/* For each acquire, destroy it; leave the bucket mutex alone. */
767 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
768 }
769 
770 static void
sadb_destroy(sadb_t * sp,netstack_t * ns)771 sadb_destroy(sadb_t *sp, netstack_t *ns)
772 {
773 	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE, B_FALSE);
774 	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE, B_TRUE);
775 
776 	/* For each acquire, destroy it, including the bucket mutex. */
777 	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
778 
779 	ASSERT(sp->sdb_of == NULL);
780 	ASSERT(sp->sdb_if == NULL);
781 	ASSERT(sp->sdb_acq == NULL);
782 }
783 
784 void
sadbp_flush(sadbp_t * spp,netstack_t * ns)785 sadbp_flush(sadbp_t *spp, netstack_t *ns)
786 {
787 	sadb_flush(&spp->s_v4, ns);
788 	sadb_flush(&spp->s_v6, ns);
789 }
790 
791 void
sadbp_destroy(sadbp_t * spp,netstack_t * ns)792 sadbp_destroy(sadbp_t *spp, netstack_t *ns)
793 {
794 	sadb_destroy(&spp->s_v4, ns);
795 	sadb_destroy(&spp->s_v6, ns);
796 
797 	if (spp->s_satype == SADB_SATYPE_AH) {
798 		ipsec_stack_t	*ipss = ns->netstack_ipsec;
799 
800 		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
801 	}
802 }
803 
804 
805 /*
806  * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
807  * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
808  * EINVAL.
809  */
810 int
sadb_hardsoftchk(sadb_lifetime_t * hard,sadb_lifetime_t * soft,sadb_lifetime_t * idle)811 sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft,
812     sadb_lifetime_t *idle)
813 {
814 	if (hard == NULL || soft == NULL)
815 		return (0);
816 
817 	if (hard->sadb_lifetime_allocations != 0 &&
818 	    soft->sadb_lifetime_allocations != 0 &&
819 	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
820 		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
821 
822 	if (hard->sadb_lifetime_bytes != 0 &&
823 	    soft->sadb_lifetime_bytes != 0 &&
824 	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
825 		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
826 
827 	if (hard->sadb_lifetime_addtime != 0 &&
828 	    soft->sadb_lifetime_addtime != 0 &&
829 	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
830 		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
831 
832 	if (hard->sadb_lifetime_usetime != 0 &&
833 	    soft->sadb_lifetime_usetime != 0 &&
834 	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
835 		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
836 
837 	if (idle != NULL) {
838 		if (hard->sadb_lifetime_addtime != 0 &&
839 		    idle->sadb_lifetime_addtime != 0 &&
840 		    hard->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
841 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
842 
843 		if (soft->sadb_lifetime_addtime != 0 &&
844 		    idle->sadb_lifetime_addtime != 0 &&
845 		    soft->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
846 			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
847 
848 		if (hard->sadb_lifetime_usetime != 0 &&
849 		    idle->sadb_lifetime_usetime != 0 &&
850 		    hard->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
851 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
852 
853 		if (soft->sadb_lifetime_usetime != 0 &&
854 		    idle->sadb_lifetime_usetime != 0 &&
855 		    soft->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
856 			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
857 	}
858 
859 	return (0);
860 }
861 
862 /*
863  * Sanity check sensitivity labels.
864  *
865  * For now, just reject labels on unlabeled systems.
866  */
867 int
sadb_labelchk(keysock_in_t * ksi)868 sadb_labelchk(keysock_in_t *ksi)
869 {
870 	if (!is_system_labeled()) {
871 		if (ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL)
872 			return (SADB_X_DIAGNOSTIC_BAD_LABEL);
873 
874 		if (ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS] != NULL)
875 			return (SADB_X_DIAGNOSTIC_BAD_LABEL);
876 	}
877 
878 	return (0);
879 }
880 
881 /*
882  * Clone a security association for the purposes of inserting a single SA
883  * into inbound and outbound tables respectively. This function should only
884  * be called from sadb_common_add().
885  */
886 static ipsa_t *
sadb_cloneassoc(ipsa_t * ipsa)887 sadb_cloneassoc(ipsa_t *ipsa)
888 {
889 	ipsa_t *newbie;
890 	boolean_t error = B_FALSE;
891 
892 	ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
893 
894 	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
895 	if (newbie == NULL)
896 		return (NULL);
897 
898 	/* Copy over what we can. */
899 	*newbie = *ipsa;
900 
901 	/* bzero and initialize locks, in case *_init() allocates... */
902 	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
903 
904 	if (newbie->ipsa_tsl != NULL)
905 		label_hold(newbie->ipsa_tsl);
906 
907 	if (newbie->ipsa_otsl != NULL)
908 		label_hold(newbie->ipsa_otsl);
909 
910 	/*
911 	 * While somewhat dain-bramaged, the most graceful way to
912 	 * recover from errors is to keep plowing through the
913 	 * allocations, and getting what I can.  It's easier to call
914 	 * sadb_freeassoc() on the stillborn clone when all the
915 	 * pointers aren't pointing to the parent's data.
916 	 */
917 
918 	if (ipsa->ipsa_authkey != NULL) {
919 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
920 		    KM_NOSLEEP);
921 		if (newbie->ipsa_authkey == NULL) {
922 			error = B_TRUE;
923 		} else {
924 			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
925 			    newbie->ipsa_authkeylen);
926 
927 			newbie->ipsa_kcfauthkey.ck_data =
928 			    newbie->ipsa_authkey;
929 		}
930 
931 		if (newbie->ipsa_amech.cm_param != NULL) {
932 			newbie->ipsa_amech.cm_param =
933 			    (char *)&newbie->ipsa_mac_len;
934 		}
935 	}
936 
937 	if (ipsa->ipsa_encrkey != NULL) {
938 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
939 		    KM_NOSLEEP);
940 		if (newbie->ipsa_encrkey == NULL) {
941 			error = B_TRUE;
942 		} else {
943 			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
944 			    newbie->ipsa_encrkeylen);
945 
946 			newbie->ipsa_kcfencrkey.ck_data =
947 			    newbie->ipsa_encrkey;
948 		}
949 	}
950 
951 	newbie->ipsa_authtmpl = NULL;
952 	newbie->ipsa_encrtmpl = NULL;
953 	newbie->ipsa_haspeer = B_TRUE;
954 
955 	if (ipsa->ipsa_src_cid != NULL) {
956 		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
957 		IPSID_REFHOLD(ipsa->ipsa_src_cid);
958 	}
959 
960 	if (ipsa->ipsa_dst_cid != NULL) {
961 		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
962 		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
963 	}
964 
965 	if (error) {
966 		sadb_freeassoc(newbie);
967 		return (NULL);
968 	}
969 
970 	return (newbie);
971 }
972 
973 /*
974  * Initialize a SADB address extension at the address specified by addrext.
975  * Return a pointer to the end of the new address extension.
976  */
977 static uint8_t *
sadb_make_addr_ext(uint8_t * start,uint8_t * end,uint16_t exttype,sa_family_t af,uint32_t * addr,uint16_t port,uint8_t proto,int prefix)978 sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
979     sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
980 {
981 	struct sockaddr_in *sin;
982 	struct sockaddr_in6 *sin6;
983 	uint8_t *cur = start;
984 	int addrext_len;
985 	int sin_len;
986 	sadb_address_t *addrext	= (sadb_address_t *)cur;
987 
988 	if (cur == NULL)
989 		return (NULL);
990 
991 	cur += sizeof (*addrext);
992 	if (cur > end)
993 		return (NULL);
994 
995 	addrext->sadb_address_proto = proto;
996 	addrext->sadb_address_prefixlen = prefix;
997 	addrext->sadb_address_reserved = 0;
998 	addrext->sadb_address_exttype = exttype;
999 
1000 	switch (af) {
1001 	case AF_INET:
1002 		sin = (struct sockaddr_in *)cur;
1003 		sin_len = sizeof (*sin);
1004 		cur += sin_len;
1005 		if (cur > end)
1006 			return (NULL);
1007 
1008 		sin->sin_family = af;
1009 		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1010 		sin->sin_port = port;
1011 		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1012 		break;
1013 	case AF_INET6:
1014 		sin6 = (struct sockaddr_in6 *)cur;
1015 		sin_len = sizeof (*sin6);
1016 		cur += sin_len;
1017 		if (cur > end)
1018 			return (NULL);
1019 
1020 		bzero(sin6, sizeof (*sin6));
1021 		sin6->sin6_family = af;
1022 		sin6->sin6_port = port;
1023 		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1024 		break;
1025 	}
1026 
1027 	addrext_len = roundup(cur - start, sizeof (uint64_t));
1028 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1029 
1030 	cur = start + addrext_len;
1031 	if (cur > end)
1032 		cur = NULL;
1033 
1034 	return (cur);
1035 }
1036 
1037 /*
1038  * Construct a key management cookie extension.
1039  */
1040 
1041 static uint8_t *
sadb_make_kmc_ext(uint8_t * cur,uint8_t * end,uint32_t kmp,uint64_t kmc)1042 sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint64_t kmc)
1043 {
1044 	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1045 
1046 	if (cur == NULL)
1047 		return (NULL);
1048 
1049 	cur += sizeof (*kmcext);
1050 
1051 	if (cur > end)
1052 		return (NULL);
1053 
1054 	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1055 	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1056 	kmcext->sadb_x_kmc_proto = kmp;
1057 	kmcext->sadb_x_kmc_cookie64 = kmc;
1058 
1059 	return (cur);
1060 }
1061 
1062 /*
1063  * Given an original message header with sufficient space following it, and an
1064  * SA, construct a full PF_KEY message with all of the relevant extensions.
1065  * This is mostly used for SADB_GET, and SADB_DUMP.
1066  */
1067 static mblk_t *
sadb_sa2msg(ipsa_t * ipsa,sadb_msg_t * samsg)1068 sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1069 {
1070 	int alloclen, addrsize, paddrsize, authsize, encrsize;
1071 	int srcidsize, dstidsize, senslen, osenslen;
1072 	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1073 				/* src/dst and proxy sockaddrs. */
1074 
1075 	authsize = 0;
1076 	encrsize = 0;
1077 	pfam = 0;
1078 	srcidsize = 0;
1079 	dstidsize = 0;
1080 	paddrsize = 0;
1081 	senslen = 0;
1082 	osenslen = 0;
1083 	/*
1084 	 * The following are pointers into the PF_KEY message this PF_KEY
1085 	 * message creates.
1086 	 */
1087 	sadb_msg_t *newsamsg;
1088 	sadb_sa_t *assoc;
1089 	sadb_lifetime_t *lt;
1090 	sadb_key_t *key;
1091 	sadb_ident_t *ident;
1092 	sadb_sens_t *sens;
1093 	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1094 	sadb_x_replay_ctr_t *repl_ctr;
1095 	sadb_x_pair_t *pair_ext;
1096 
1097 	mblk_t *mp;
1098 	uint8_t *cur, *end;
1099 	/* These indicate the presence of the above extension fields. */
1100 	boolean_t soft = B_FALSE, hard = B_FALSE;
1101 	boolean_t isrc = B_FALSE, idst = B_FALSE;
1102 	boolean_t auth = B_FALSE, encr = B_FALSE;
1103 	boolean_t sensinteg = B_FALSE, osensinteg = B_FALSE;
1104 	boolean_t srcid = B_FALSE, dstid = B_FALSE;
1105 	boolean_t idle;
1106 	boolean_t paired;
1107 	uint32_t otherspi;
1108 
1109 	/* First off, figure out the allocation length for this message. */
1110 	/*
1111 	 * Constant stuff.  This includes base, SA, address (src, dst),
1112 	 * and lifetime (current).
1113 	 */
1114 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1115 	    sizeof (sadb_lifetime_t);
1116 	otherspi = 0;
1117 
1118 	fam = ipsa->ipsa_addrfam;
1119 	switch (fam) {
1120 	case AF_INET:
1121 		addrsize = roundup(sizeof (struct sockaddr_in) +
1122 		    sizeof (sadb_address_t), sizeof (uint64_t));
1123 		break;
1124 	case AF_INET6:
1125 		addrsize = roundup(sizeof (struct sockaddr_in6) +
1126 		    sizeof (sadb_address_t), sizeof (uint64_t));
1127 		break;
1128 	default:
1129 		return (NULL);
1130 	}
1131 	/*
1132 	 * Allocate TWO address extensions, for source and destination.
1133 	 * (Thus, the * 2.)
1134 	 */
1135 	alloclen += addrsize * 2;
1136 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1137 		alloclen += addrsize;
1138 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1139 		alloclen += addrsize;
1140 
1141 	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1142 		paired = B_TRUE;
1143 		alloclen += sizeof (sadb_x_pair_t);
1144 		otherspi = ipsa->ipsa_otherspi;
1145 	} else {
1146 		paired = B_FALSE;
1147 	}
1148 
1149 	/* How 'bout other lifetimes? */
1150 	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1151 	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1152 		alloclen += sizeof (sadb_lifetime_t);
1153 		soft = B_TRUE;
1154 	}
1155 
1156 	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1157 	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1158 		alloclen += sizeof (sadb_lifetime_t);
1159 		hard = B_TRUE;
1160 	}
1161 
1162 	if (ipsa->ipsa_idleaddlt != 0 || ipsa->ipsa_idleuselt != 0) {
1163 		alloclen += sizeof (sadb_lifetime_t);
1164 		idle = B_TRUE;
1165 	} else {
1166 		idle = B_FALSE;
1167 	}
1168 
1169 	/* Inner addresses. */
1170 	if (ipsa->ipsa_innerfam != 0) {
1171 		pfam = ipsa->ipsa_innerfam;
1172 		switch (pfam) {
1173 		case AF_INET6:
1174 			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1175 			    sizeof (sadb_address_t), sizeof (uint64_t));
1176 			break;
1177 		case AF_INET:
1178 			paddrsize = roundup(sizeof (struct sockaddr_in) +
1179 			    sizeof (sadb_address_t), sizeof (uint64_t));
1180 			break;
1181 		default:
1182 			cmn_err(CE_PANIC,
1183 			    "IPsec SADB: Proxy length failure.\n");
1184 			break;
1185 		}
1186 		isrc = B_TRUE;
1187 		idst = B_TRUE;
1188 		alloclen += 2 * paddrsize;
1189 	}
1190 
1191 	/* For the following fields, assume that length != 0 ==> stuff */
1192 	if (ipsa->ipsa_authkeylen != 0) {
1193 		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1194 		    sizeof (uint64_t));
1195 		alloclen += authsize;
1196 		auth = B_TRUE;
1197 	}
1198 
1199 	if (ipsa->ipsa_encrkeylen != 0) {
1200 		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen +
1201 		    ipsa->ipsa_nonce_len, sizeof (uint64_t));
1202 		alloclen += encrsize;
1203 		encr = B_TRUE;
1204 	} else {
1205 		encr = B_FALSE;
1206 	}
1207 
1208 	if (ipsa->ipsa_tsl != NULL) {
1209 		senslen = sadb_sens_len_from_label(ipsa->ipsa_tsl);
1210 		alloclen += senslen;
1211 		sensinteg = B_TRUE;
1212 	}
1213 
1214 	if (ipsa->ipsa_otsl != NULL) {
1215 		osenslen = sadb_sens_len_from_label(ipsa->ipsa_otsl);
1216 		alloclen += osenslen;
1217 		osensinteg = B_TRUE;
1218 	}
1219 
1220 	/*
1221 	 * Must use strlen() here for lengths.	Identities use NULL
1222 	 * pointers to indicate their nonexistence.
1223 	 */
1224 	if (ipsa->ipsa_src_cid != NULL) {
1225 		srcidsize = roundup(sizeof (sadb_ident_t) +
1226 		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1227 		    sizeof (uint64_t));
1228 		alloclen += srcidsize;
1229 		srcid = B_TRUE;
1230 	}
1231 
1232 	if (ipsa->ipsa_dst_cid != NULL) {
1233 		dstidsize = roundup(sizeof (sadb_ident_t) +
1234 		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1235 		    sizeof (uint64_t));
1236 		alloclen += dstidsize;
1237 		dstid = B_TRUE;
1238 	}
1239 
1240 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1241 		alloclen += sizeof (sadb_x_kmc_t);
1242 
1243 	if (ipsa->ipsa_replay != 0) {
1244 		alloclen += sizeof (sadb_x_replay_ctr_t);
1245 	}
1246 
1247 	/* Make sure the allocation length is a multiple of 8 bytes. */
1248 	ASSERT((alloclen & 0x7) == 0);
1249 
1250 	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1251 	mp = allocb(alloclen, BPRI_HI);
1252 	if (mp == NULL)
1253 		return (NULL);
1254 	bzero(mp->b_rptr, alloclen);
1255 
1256 	mp->b_wptr += alloclen;
1257 	end = mp->b_wptr;
1258 	newsamsg = (sadb_msg_t *)mp->b_rptr;
1259 	*newsamsg = *samsg;
1260 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1261 
1262 	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1263 
1264 	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1265 
1266 	assoc = (sadb_sa_t *)(newsamsg + 1);
1267 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1268 	assoc->sadb_sa_exttype = SADB_EXT_SA;
1269 	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1270 	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1271 	assoc->sadb_sa_state = ipsa->ipsa_state;
1272 	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1273 	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1274 	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1275 
1276 	lt = (sadb_lifetime_t *)(assoc + 1);
1277 	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1278 	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1279 	/* We do not support the concept. */
1280 	lt->sadb_lifetime_allocations = 0;
1281 	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1282 	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1283 	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1284 
1285 	if (hard) {
1286 		lt++;
1287 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1288 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1289 		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1290 		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1291 		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1292 		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1293 	}
1294 
1295 	if (soft) {
1296 		lt++;
1297 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1298 		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1299 		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1300 		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1301 		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1302 		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1303 	}
1304 
1305 	if (idle) {
1306 		lt++;
1307 		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1308 		lt->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
1309 		lt->sadb_lifetime_addtime = ipsa->ipsa_idleaddlt;
1310 		lt->sadb_lifetime_usetime = ipsa->ipsa_idleuselt;
1311 	}
1312 
1313 	cur = (uint8_t *)(lt + 1);
1314 
1315 	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1316 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1317 	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1318 	    SA_PROTO(ipsa), 0);
1319 	if (cur == NULL) {
1320 		freemsg(mp);
1321 		mp = NULL;
1322 		goto bail;
1323 	}
1324 
1325 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1326 	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1327 	    SA_PROTO(ipsa), 0);
1328 	if (cur == NULL) {
1329 		freemsg(mp);
1330 		mp = NULL;
1331 		goto bail;
1332 	}
1333 
1334 	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1335 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1336 		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1337 		    IPPROTO_UDP, 0);
1338 		if (cur == NULL) {
1339 			freemsg(mp);
1340 			mp = NULL;
1341 			goto bail;
1342 		}
1343 	}
1344 
1345 	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1346 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1347 		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1348 		    IPPROTO_UDP, 0);
1349 		if (cur == NULL) {
1350 			freemsg(mp);
1351 			mp = NULL;
1352 			goto bail;
1353 		}
1354 	}
1355 
1356 	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1357 	if (isrc) {
1358 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1359 		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1360 		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1361 		if (cur == NULL) {
1362 			freemsg(mp);
1363 			mp = NULL;
1364 			goto bail;
1365 		}
1366 	}
1367 
1368 	if (idst) {
1369 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1370 		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1371 		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1372 		if (cur == NULL) {
1373 			freemsg(mp);
1374 			mp = NULL;
1375 			goto bail;
1376 		}
1377 	}
1378 
1379 	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1380 		cur = sadb_make_kmc_ext(cur, end,
1381 		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1382 		if (cur == NULL) {
1383 			freemsg(mp);
1384 			mp = NULL;
1385 			goto bail;
1386 		}
1387 	}
1388 
1389 	walker = (sadb_ext_t *)cur;
1390 	if (auth) {
1391 		key = (sadb_key_t *)walker;
1392 		key->sadb_key_len = SADB_8TO64(authsize);
1393 		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1394 		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1395 		key->sadb_key_reserved = 0;
1396 		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1397 		walker = (sadb_ext_t *)((uint64_t *)walker +
1398 		    walker->sadb_ext_len);
1399 	}
1400 
1401 	if (encr) {
1402 		uint8_t *buf_ptr;
1403 		key = (sadb_key_t *)walker;
1404 		key->sadb_key_len = SADB_8TO64(encrsize);
1405 		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1406 		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1407 		key->sadb_key_reserved = ipsa->ipsa_saltbits;
1408 		buf_ptr = (uint8_t *)(key + 1);
1409 		bcopy(ipsa->ipsa_encrkey, buf_ptr, ipsa->ipsa_encrkeylen);
1410 		if (ipsa->ipsa_salt != NULL) {
1411 			buf_ptr += ipsa->ipsa_encrkeylen;
1412 			bcopy(ipsa->ipsa_salt, buf_ptr, ipsa->ipsa_saltlen);
1413 		}
1414 		walker = (sadb_ext_t *)((uint64_t *)walker +
1415 		    walker->sadb_ext_len);
1416 	}
1417 
1418 	if (srcid) {
1419 		ident = (sadb_ident_t *)walker;
1420 		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1421 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1422 		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1423 		ident->sadb_ident_id = 0;
1424 		ident->sadb_ident_reserved = 0;
1425 		(void) strcpy((char *)(ident + 1),
1426 		    ipsa->ipsa_src_cid->ipsid_cid);
1427 		walker = (sadb_ext_t *)((uint64_t *)walker +
1428 		    walker->sadb_ext_len);
1429 	}
1430 
1431 	if (dstid) {
1432 		ident = (sadb_ident_t *)walker;
1433 		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1434 		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1435 		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1436 		ident->sadb_ident_id = 0;
1437 		ident->sadb_ident_reserved = 0;
1438 		(void) strcpy((char *)(ident + 1),
1439 		    ipsa->ipsa_dst_cid->ipsid_cid);
1440 		walker = (sadb_ext_t *)((uint64_t *)walker +
1441 		    walker->sadb_ext_len);
1442 	}
1443 
1444 	if (sensinteg) {
1445 		sens = (sadb_sens_t *)walker;
1446 		sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
1447 		    ipsa->ipsa_tsl, senslen);
1448 
1449 		walker = (sadb_ext_t *)((uint64_t *)walker +
1450 		    walker->sadb_ext_len);
1451 	}
1452 
1453 	if (osensinteg) {
1454 		sens = (sadb_sens_t *)walker;
1455 
1456 		sadb_sens_from_label(sens, SADB_X_EXT_OUTER_SENS,
1457 		    ipsa->ipsa_otsl, osenslen);
1458 		if (ipsa->ipsa_mac_exempt)
1459 			sens->sadb_x_sens_flags = SADB_X_SENS_IMPLICIT;
1460 
1461 		walker = (sadb_ext_t *)((uint64_t *)walker +
1462 		    walker->sadb_ext_len);
1463 	}
1464 
1465 	if (paired) {
1466 		pair_ext = (sadb_x_pair_t *)walker;
1467 
1468 		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1469 		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1470 		pair_ext->sadb_x_pair_spi = otherspi;
1471 
1472 		walker = (sadb_ext_t *)((uint64_t *)walker +
1473 		    walker->sadb_ext_len);
1474 	}
1475 
1476 	if (ipsa->ipsa_replay != 0) {
1477 		repl_ctr = (sadb_x_replay_ctr_t *)walker;
1478 		repl_ctr->sadb_x_rc_len = SADB_8TO64(sizeof (*repl_ctr));
1479 		repl_ctr->sadb_x_rc_exttype = SADB_X_EXT_REPLAY_VALUE;
1480 		repl_ctr->sadb_x_rc_replay32 = ipsa->ipsa_replay;
1481 		repl_ctr->sadb_x_rc_replay64 = 0;
1482 		walker = (sadb_ext_t *)(repl_ctr + 1);
1483 	}
1484 
1485 bail:
1486 	/* Pardon any delays... */
1487 	mutex_exit(&ipsa->ipsa_lock);
1488 
1489 	return (mp);
1490 }
1491 
1492 /*
1493  * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1494  * and adjust base message accordingly.
1495  *
1496  * Assume message is pulled up in one piece of contiguous memory.
1497  *
1498  * Say if we start off with:
1499  *
1500  * +------+----+-------------+-----------+---------------+---------------+
1501  * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1502  * +------+----+-------------+-----------+---------------+---------------+
1503  *
1504  * we will end up with
1505  *
1506  * +------+----+-------------+-----------+---------------+
1507  * | base | SA | source addr | dest addr | soft lifetime |
1508  * +------+----+-------------+-----------+---------------+
1509  */
1510 static void
sadb_strip(sadb_msg_t * samsg)1511 sadb_strip(sadb_msg_t *samsg)
1512 {
1513 	sadb_ext_t *ext;
1514 	uint8_t *target = NULL;
1515 	uint8_t *msgend;
1516 	int sofar = SADB_8TO64(sizeof (*samsg));
1517 	int copylen;
1518 
1519 	ext = (sadb_ext_t *)(samsg + 1);
1520 	msgend = (uint8_t *)samsg;
1521 	msgend += SADB_64TO8(samsg->sadb_msg_len);
1522 	while ((uint8_t *)ext < msgend) {
1523 		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1524 		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1525 		    ext->sadb_ext_type == SADB_X_EXT_EDUMP ||
1526 		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1527 			/*
1528 			 * Aha!	 I found a header to be erased.
1529 			 */
1530 
1531 			if (target != NULL) {
1532 				/*
1533 				 * If I had a previous header to be erased,
1534 				 * copy over it.  I can get away with just
1535 				 * copying backwards because the target will
1536 				 * always be 8 bytes behind the source.
1537 				 */
1538 				copylen = ((uint8_t *)ext) - (target +
1539 				    SADB_64TO8(
1540 				    ((sadb_ext_t *)target)->sadb_ext_len));
1541 				ovbcopy(((uint8_t *)ext - copylen), target,
1542 				    copylen);
1543 				target += copylen;
1544 				((sadb_ext_t *)target)->sadb_ext_len =
1545 				    SADB_8TO64(((uint8_t *)ext) - target +
1546 				    SADB_64TO8(ext->sadb_ext_len));
1547 			} else {
1548 				target = (uint8_t *)ext;
1549 			}
1550 		} else {
1551 			sofar += ext->sadb_ext_len;
1552 		}
1553 
1554 		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1555 	}
1556 
1557 	ASSERT((uint8_t *)ext == msgend);
1558 
1559 	if (target != NULL) {
1560 		copylen = ((uint8_t *)ext) - (target +
1561 		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1562 		if (copylen != 0)
1563 			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1564 	}
1565 
1566 	/* Adjust samsg. */
1567 	samsg->sadb_msg_len = (uint16_t)sofar;
1568 
1569 	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1570 }
1571 
1572 /*
1573  * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1574  * followed by an M_DATA with a PF_KEY message in it.  The serial of
1575  * the sending keysock instance is included.
1576  */
1577 void
sadb_pfkey_error(queue_t * pfkey_q,mblk_t * mp,int error,int diagnostic,uint_t serial)1578 sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1579     uint_t serial)
1580 {
1581 	mblk_t *msg = mp->b_cont;
1582 	sadb_msg_t *samsg;
1583 	keysock_out_t *kso;
1584 
1585 	/*
1586 	 * Enough functions call this to merit a NULL queue check.
1587 	 */
1588 	if (pfkey_q == NULL) {
1589 		freemsg(mp);
1590 		return;
1591 	}
1592 
1593 	ASSERT(msg != NULL);
1594 	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1595 	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1596 	samsg = (sadb_msg_t *)msg->b_rptr;
1597 	kso = (keysock_out_t *)mp->b_rptr;
1598 
1599 	kso->ks_out_type = KEYSOCK_OUT;
1600 	kso->ks_out_len = sizeof (*kso);
1601 	kso->ks_out_serial = serial;
1602 
1603 	/*
1604 	 * Only send the base message up in the event of an error.
1605 	 * Don't worry about bzero()-ing, because it was probably bogus
1606 	 * anyway.
1607 	 */
1608 	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1609 	samsg = (sadb_msg_t *)msg->b_rptr;
1610 	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1611 	samsg->sadb_msg_errno = (uint8_t)error;
1612 	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1613 		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1614 
1615 	putnext(pfkey_q, mp);
1616 }
1617 
1618 /*
1619  * Send a successful return packet back to keysock via the queue in pfkey_q.
1620  *
1621  * Often, an SA is associated with the reply message, it's passed in if needed,
1622  * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1623  * and the caller will release said refcnt.
1624  */
1625 void
sadb_pfkey_echo(queue_t * pfkey_q,mblk_t * mp,sadb_msg_t * samsg,keysock_in_t * ksi,ipsa_t * ipsa)1626 sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1627     keysock_in_t *ksi, ipsa_t *ipsa)
1628 {
1629 	keysock_out_t *kso;
1630 	mblk_t *mp1;
1631 	sadb_msg_t *newsamsg;
1632 	uint8_t *oldend;
1633 
1634 	ASSERT((mp->b_cont != NULL) &&
1635 	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1636 	    ((void *)mp->b_rptr == (void *)ksi));
1637 
1638 	switch (samsg->sadb_msg_type) {
1639 	case SADB_ADD:
1640 	case SADB_UPDATE:
1641 	case SADB_X_UPDATEPAIR:
1642 	case SADB_X_DELPAIR_STATE:
1643 	case SADB_FLUSH:
1644 	case SADB_DUMP:
1645 		/*
1646 		 * I have all of the message already.  I just need to strip
1647 		 * out the keying material and echo the message back.
1648 		 *
1649 		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1650 		 * work.  When DUMP reaches here, it should only be a base
1651 		 * message.
1652 		 */
1653 	justecho:
1654 		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1655 		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL ||
1656 		    ksi->ks_in_extv[SADB_X_EXT_EDUMP] != NULL) {
1657 			sadb_strip(samsg);
1658 			/* Assume PF_KEY message is contiguous. */
1659 			ASSERT(mp->b_cont->b_cont == NULL);
1660 			oldend = mp->b_cont->b_wptr;
1661 			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1662 			    SADB_64TO8(samsg->sadb_msg_len);
1663 			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1664 		}
1665 		break;
1666 	case SADB_GET:
1667 		/*
1668 		 * Do a lot of work here, because of the ipsa I just found.
1669 		 * First construct the new PF_KEY message, then abandon
1670 		 * the old one.
1671 		 */
1672 		mp1 = sadb_sa2msg(ipsa, samsg);
1673 		if (mp1 == NULL) {
1674 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1675 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1676 			return;
1677 		}
1678 		freemsg(mp->b_cont);
1679 		mp->b_cont = mp1;
1680 		break;
1681 	case SADB_DELETE:
1682 	case SADB_X_DELPAIR:
1683 		if (ipsa == NULL)
1684 			goto justecho;
1685 		/*
1686 		 * Because listening KMds may require more info, treat
1687 		 * DELETE like a special case of GET.
1688 		 */
1689 		mp1 = sadb_sa2msg(ipsa, samsg);
1690 		if (mp1 == NULL) {
1691 			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1692 			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1693 			return;
1694 		}
1695 		newsamsg = (sadb_msg_t *)mp1->b_rptr;
1696 		sadb_strip(newsamsg);
1697 		oldend = mp1->b_wptr;
1698 		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
1699 		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
1700 		freemsg(mp->b_cont);
1701 		mp->b_cont = mp1;
1702 		break;
1703 	default:
1704 		freemsg(mp);
1705 		return;
1706 	}
1707 
1708 	/* ksi is now null and void. */
1709 	kso = (keysock_out_t *)ksi;
1710 	kso->ks_out_type = KEYSOCK_OUT;
1711 	kso->ks_out_len = sizeof (*kso);
1712 	kso->ks_out_serial = ksi->ks_in_serial;
1713 	/* We're ready to send... */
1714 	putnext(pfkey_q, mp);
1715 }
1716 
1717 /*
1718  * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
1719  */
1720 void
sadb_keysock_hello(queue_t ** pfkey_qp,queue_t * q,mblk_t * mp,void (* ager)(void *),void * agerarg,timeout_id_t * top,int satype)1721 sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
1722     void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
1723 {
1724 	keysock_hello_ack_t *kha;
1725 	queue_t *oldq;
1726 
1727 	ASSERT(OTHERQ(q) != NULL);
1728 
1729 	/*
1730 	 * First, check atomically that I'm the first and only keysock
1731 	 * instance.
1732 	 *
1733 	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
1734 	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
1735 	 * messages.
1736 	 */
1737 
1738 	oldq = atomic_cas_ptr((void **)pfkey_qp, NULL, OTHERQ(q));
1739 	if (oldq != NULL) {
1740 		ASSERT(oldq != q);
1741 		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
1742 		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
1743 		freemsg(mp);
1744 		return;
1745 	}
1746 
1747 	kha = (keysock_hello_ack_t *)mp->b_rptr;
1748 	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
1749 	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
1750 	kha->ks_hello_satype = (uint8_t)satype;
1751 
1752 	/*
1753 	 * If we made it past the atomic_cas_ptr, then we have "exclusive"
1754 	 * access to the timeout handle.  Fire it off after the default ager
1755 	 * interval.
1756 	 */
1757 	*top = qtimeout(*pfkey_qp, ager, agerarg,
1758 	    drv_usectohz(SADB_AGE_INTERVAL_DEFAULT * 1000));
1759 
1760 	putnext(*pfkey_qp, mp);
1761 }
1762 
1763 /*
1764  * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
1765  *
1766  * Check addresses themselves for wildcard or multicast.
1767  * Check ire table for local/non-local/broadcast.
1768  */
1769 int
sadb_addrcheck(queue_t * pfkey_q,mblk_t * mp,sadb_ext_t * ext,uint_t serial,netstack_t * ns)1770 sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
1771     netstack_t *ns)
1772 {
1773 	sadb_address_t *addr = (sadb_address_t *)ext;
1774 	struct sockaddr_in *sin;
1775 	struct sockaddr_in6 *sin6;
1776 	int diagnostic, type;
1777 	boolean_t normalized = B_FALSE;
1778 
1779 	ASSERT(ext != NULL);
1780 	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
1781 	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
1782 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
1783 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
1784 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
1785 	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
1786 
1787 	diagnostic = 0;
1788 
1789 	/* Assign both sockaddrs, the compiler will do the right thing. */
1790 	sin = (struct sockaddr_in *)(addr + 1);
1791 	sin6 = (struct sockaddr_in6 *)(addr + 1);
1792 
1793 	if (sin6->sin6_family == AF_INET6) {
1794 		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1795 			/*
1796 			 * Convert to an AF_INET sockaddr.  This means the
1797 			 * return messages will have the extra space, but have
1798 			 * AF_INET sockaddrs instead of AF_INET6.
1799 			 *
1800 			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
1801 			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
1802 			 * equal to AF_INET <v4>, it shouldnt be a huge
1803 			 * problem.
1804 			 */
1805 			sin->sin_family = AF_INET;
1806 			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
1807 			    &sin->sin_addr);
1808 			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
1809 			normalized = B_TRUE;
1810 		}
1811 	} else if (sin->sin_family != AF_INET) {
1812 		switch (ext->sadb_ext_type) {
1813 		case SADB_EXT_ADDRESS_SRC:
1814 			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
1815 			break;
1816 		case SADB_EXT_ADDRESS_DST:
1817 			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
1818 			break;
1819 		case SADB_X_EXT_ADDRESS_INNER_SRC:
1820 			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
1821 			break;
1822 		case SADB_X_EXT_ADDRESS_INNER_DST:
1823 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
1824 			break;
1825 		case SADB_X_EXT_ADDRESS_NATT_LOC:
1826 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
1827 			break;
1828 		case SADB_X_EXT_ADDRESS_NATT_REM:
1829 			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
1830 			break;
1831 			/* There is no default, see above ASSERT. */
1832 		}
1833 bail:
1834 		if (pfkey_q != NULL) {
1835 			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
1836 			    serial);
1837 		} else {
1838 			/*
1839 			 * Scribble in sadb_msg that we got passed in.
1840 			 * Overload "mp" to be an sadb_msg pointer.
1841 			 */
1842 			sadb_msg_t *samsg = (sadb_msg_t *)mp;
1843 
1844 			samsg->sadb_msg_errno = EINVAL;
1845 			samsg->sadb_x_msg_diagnostic = diagnostic;
1846 		}
1847 		return (KS_IN_ADDR_UNKNOWN);
1848 	}
1849 
1850 	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
1851 	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
1852 		/*
1853 		 * We need only check for prefix issues.
1854 		 */
1855 
1856 		/* Set diagnostic now, in case we need it later. */
1857 		diagnostic =
1858 		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
1859 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
1860 		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
1861 
1862 		if (normalized)
1863 			addr->sadb_address_prefixlen -= 96;
1864 
1865 		/*
1866 		 * Verify and mask out inner-addresses based on prefix length.
1867 		 */
1868 		if (sin->sin_family == AF_INET) {
1869 			if (addr->sadb_address_prefixlen > 32)
1870 				goto bail;
1871 			sin->sin_addr.s_addr &=
1872 			    ip_plen_to_mask(addr->sadb_address_prefixlen);
1873 		} else {
1874 			in6_addr_t mask;
1875 
1876 			ASSERT(sin->sin_family == AF_INET6);
1877 			/*
1878 			 * ip_plen_to_mask_v6() returns NULL if the value in
1879 			 * question is out of range.
1880 			 */
1881 			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
1882 			    &mask) == NULL)
1883 				goto bail;
1884 			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
1885 			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
1886 			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
1887 			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
1888 		}
1889 
1890 		/* We don't care in these cases. */
1891 		return (KS_IN_ADDR_DONTCARE);
1892 	}
1893 
1894 	if (sin->sin_family == AF_INET6) {
1895 		/* Check the easy ones now. */
1896 		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
1897 			return (KS_IN_ADDR_MBCAST);
1898 		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
1899 			return (KS_IN_ADDR_UNSPEC);
1900 		/*
1901 		 * At this point, we're a unicast IPv6 address.
1902 		 *
1903 		 * XXX Zones alert -> me/notme decision needs to be tempered
1904 		 * by what zone we're in when we go to zone-aware IPsec.
1905 		 */
1906 		if (ip_type_v6(&sin6->sin6_addr, ns->netstack_ip) ==
1907 		    IRE_LOCAL) {
1908 			/* Hey hey, it's local. */
1909 			return (KS_IN_ADDR_ME);
1910 		}
1911 	} else {
1912 		ASSERT(sin->sin_family == AF_INET);
1913 		if (sin->sin_addr.s_addr == INADDR_ANY)
1914 			return (KS_IN_ADDR_UNSPEC);
1915 		if (CLASSD(sin->sin_addr.s_addr))
1916 			return (KS_IN_ADDR_MBCAST);
1917 		/*
1918 		 * At this point we're a unicast or broadcast IPv4 address.
1919 		 *
1920 		 * Check if the address is IRE_BROADCAST or IRE_LOCAL.
1921 		 *
1922 		 * XXX Zones alert -> me/notme decision needs to be tempered
1923 		 * by what zone we're in when we go to zone-aware IPsec.
1924 		 */
1925 		type = ip_type_v4(sin->sin_addr.s_addr, ns->netstack_ip);
1926 		switch (type) {
1927 		case IRE_LOCAL:
1928 			return (KS_IN_ADDR_ME);
1929 		case IRE_BROADCAST:
1930 			return (KS_IN_ADDR_MBCAST);
1931 		}
1932 	}
1933 
1934 	return (KS_IN_ADDR_NOTME);
1935 }
1936 
1937 /*
1938  * Address normalizations and reality checks for inbound PF_KEY messages.
1939  *
1940  * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
1941  * the source to AF_INET.  Do the same for the inner sources.
1942  */
1943 boolean_t
sadb_addrfix(keysock_in_t * ksi,queue_t * pfkey_q,mblk_t * mp,netstack_t * ns)1944 sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
1945 {
1946 	struct sockaddr_in *src, *isrc;
1947 	struct sockaddr_in6 *dst, *idst;
1948 	sadb_address_t *srcext, *dstext;
1949 	uint16_t sport;
1950 	sadb_ext_t **extv = ksi->ks_in_extv;
1951 	int rc;
1952 
1953 	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
1954 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
1955 		    ksi->ks_in_serial, ns);
1956 		if (rc == KS_IN_ADDR_UNKNOWN)
1957 			return (B_FALSE);
1958 		if (rc == KS_IN_ADDR_MBCAST) {
1959 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1960 			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
1961 			return (B_FALSE);
1962 		}
1963 		ksi->ks_in_srctype = rc;
1964 	}
1965 
1966 	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
1967 		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
1968 		    ksi->ks_in_serial, ns);
1969 		if (rc == KS_IN_ADDR_UNKNOWN)
1970 			return (B_FALSE);
1971 		if (rc == KS_IN_ADDR_UNSPEC) {
1972 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1973 			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
1974 			return (B_FALSE);
1975 		}
1976 		ksi->ks_in_dsttype = rc;
1977 	}
1978 
1979 	/*
1980 	 * NAT-Traversal addrs are simple enough to not require all of
1981 	 * the checks in sadb_addrcheck().  Just normalize or reject if not
1982 	 * AF_INET.
1983 	 */
1984 	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
1985 		rc = sadb_addrcheck(pfkey_q, mp,
1986 		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
1987 
1988 		/*
1989 		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
1990 		 * always be NOTME, or UNSPEC (to handle both tunnel mode
1991 		 * AND local-port flexibility).
1992 		 */
1993 		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
1994 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1995 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
1996 			    ksi->ks_in_serial);
1997 			return (B_FALSE);
1998 		}
1999 		src = (struct sockaddr_in *)
2000 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
2001 		if (src->sin_family != AF_INET) {
2002 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2003 			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
2004 			    ksi->ks_in_serial);
2005 			return (B_FALSE);
2006 		}
2007 	}
2008 
2009 	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2010 		rc = sadb_addrcheck(pfkey_q, mp,
2011 		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2012 
2013 		/*
2014 		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2015 		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2016 		 */
2017 		if (rc != KS_IN_ADDR_NOTME &&
2018 		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2019 		    rc == KS_IN_ADDR_UNSPEC)) {
2020 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2021 			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2022 			    ksi->ks_in_serial);
2023 			return (B_FALSE);
2024 		}
2025 		src = (struct sockaddr_in *)
2026 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2027 		if (src->sin_family != AF_INET) {
2028 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2029 			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2030 			    ksi->ks_in_serial);
2031 			return (B_FALSE);
2032 		}
2033 	}
2034 
2035 	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2036 		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2037 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2038 			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2039 			    ksi->ks_in_serial);
2040 			return (B_FALSE);
2041 		}
2042 
2043 		if (sadb_addrcheck(pfkey_q, mp,
2044 		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2045 		    == KS_IN_ADDR_UNKNOWN ||
2046 		    sadb_addrcheck(pfkey_q, mp,
2047 		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2048 		    == KS_IN_ADDR_UNKNOWN)
2049 			return (B_FALSE);
2050 
2051 		isrc = (struct sockaddr_in *)
2052 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2053 		    1);
2054 		idst = (struct sockaddr_in6 *)
2055 		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2056 		    1);
2057 		if (isrc->sin_family != idst->sin6_family) {
2058 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2059 			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2060 			    ksi->ks_in_serial);
2061 			return (B_FALSE);
2062 		}
2063 	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2064 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2065 			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2066 			    ksi->ks_in_serial);
2067 			return (B_FALSE);
2068 	} else {
2069 		isrc = NULL;	/* For inner/outer port check below. */
2070 	}
2071 
2072 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2073 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2074 
2075 	if (dstext == NULL || srcext == NULL)
2076 		return (B_TRUE);
2077 
2078 	dst = (struct sockaddr_in6 *)(dstext + 1);
2079 	src = (struct sockaddr_in *)(srcext + 1);
2080 
2081 	if (isrc != NULL &&
2082 	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2083 	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2084 		/* Can't set inner and outer ports in one SA. */
2085 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2086 		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2087 		    ksi->ks_in_serial);
2088 		return (B_FALSE);
2089 	}
2090 
2091 	if (dst->sin6_family == src->sin_family)
2092 		return (B_TRUE);
2093 
2094 	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2095 		if (srcext->sadb_address_proto == 0) {
2096 			srcext->sadb_address_proto = dstext->sadb_address_proto;
2097 		} else if (dstext->sadb_address_proto == 0) {
2098 			dstext->sadb_address_proto = srcext->sadb_address_proto;
2099 		} else {
2100 			/* Inequal protocols, neither were 0.  Report error. */
2101 			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2102 			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2103 			    ksi->ks_in_serial);
2104 			return (B_FALSE);
2105 		}
2106 	}
2107 
2108 	/*
2109 	 * With the exception of an unspec IPv6 source and an IPv4
2110 	 * destination, address families MUST me matched.
2111 	 */
2112 	if (src->sin_family == AF_INET ||
2113 	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2114 		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2115 		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2116 		return (B_FALSE);
2117 	}
2118 
2119 	/*
2120 	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2121 	 * in the same place for sockaddr_in and sockaddr_in6.
2122 	 */
2123 	sport = src->sin_port;
2124 	bzero(src, sizeof (*src));
2125 	src->sin_family = AF_INET;
2126 	src->sin_port = sport;
2127 
2128 	return (B_TRUE);
2129 }
2130 
2131 /*
2132  * Set the results in "addrtype", given an IRE as requested by
2133  * sadb_addrcheck().
2134  */
2135 int
sadb_addrset(ire_t * ire)2136 sadb_addrset(ire_t *ire)
2137 {
2138 	if ((ire->ire_type & IRE_BROADCAST) ||
2139 	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2140 	    (ire->ire_ipversion == IPV6_VERSION &&
2141 	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2142 		return (KS_IN_ADDR_MBCAST);
2143 	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2144 		return (KS_IN_ADDR_ME);
2145 	return (KS_IN_ADDR_NOTME);
2146 }
2147 
2148 /*
2149  * Match primitives..
2150  * !!! TODO: short term: inner selectors
2151  *		ipv6 scope id (ifindex)
2152  * longer term:  zone id.  sensitivity label. uid.
2153  */
2154 boolean_t
sadb_match_spi(ipsa_query_t * sq,ipsa_t * sa)2155 sadb_match_spi(ipsa_query_t *sq, ipsa_t *sa)
2156 {
2157 	return (sq->spi == sa->ipsa_spi);
2158 }
2159 
2160 boolean_t
sadb_match_dst_v6(ipsa_query_t * sq,ipsa_t * sa)2161 sadb_match_dst_v6(ipsa_query_t *sq, ipsa_t *sa)
2162 {
2163 	return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_dstaddr, sq->dstaddr, AF_INET6));
2164 }
2165 
2166 boolean_t
sadb_match_src_v6(ipsa_query_t * sq,ipsa_t * sa)2167 sadb_match_src_v6(ipsa_query_t *sq, ipsa_t *sa)
2168 {
2169 	return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_srcaddr, sq->srcaddr, AF_INET6));
2170 }
2171 
2172 boolean_t
sadb_match_dst_v4(ipsa_query_t * sq,ipsa_t * sa)2173 sadb_match_dst_v4(ipsa_query_t *sq, ipsa_t *sa)
2174 {
2175 	return (sq->dstaddr[0] == sa->ipsa_dstaddr[0]);
2176 }
2177 
2178 boolean_t
sadb_match_src_v4(ipsa_query_t * sq,ipsa_t * sa)2179 sadb_match_src_v4(ipsa_query_t *sq, ipsa_t *sa)
2180 {
2181 	return (sq->srcaddr[0] == sa->ipsa_srcaddr[0]);
2182 }
2183 
2184 boolean_t
sadb_match_dstid(ipsa_query_t * sq,ipsa_t * sa)2185 sadb_match_dstid(ipsa_query_t *sq, ipsa_t *sa)
2186 {
2187 	return ((sa->ipsa_dst_cid != NULL) &&
2188 	    (sq->didtype == sa->ipsa_dst_cid->ipsid_type) &&
2189 	    (strcmp(sq->didstr, sa->ipsa_dst_cid->ipsid_cid) == 0));
2190 
2191 }
2192 boolean_t
sadb_match_srcid(ipsa_query_t * sq,ipsa_t * sa)2193 sadb_match_srcid(ipsa_query_t *sq, ipsa_t *sa)
2194 {
2195 	return ((sa->ipsa_src_cid != NULL) &&
2196 	    (sq->sidtype == sa->ipsa_src_cid->ipsid_type) &&
2197 	    (strcmp(sq->sidstr, sa->ipsa_src_cid->ipsid_cid) == 0));
2198 }
2199 
2200 boolean_t
sadb_match_kmc(ipsa_query_t * sq,ipsa_t * sa)2201 sadb_match_kmc(ipsa_query_t *sq, ipsa_t *sa)
2202 {
2203 #define	M(a, b) (((a) == 0) || ((b) == 0) || ((a) == (b)))
2204 
2205 	return (M(sq->kmc, sa->ipsa_kmc) && M(sq->kmp, sa->ipsa_kmp));
2206 
2207 #undef M
2208 }
2209 
2210 /*
2211  * Common function which extracts several PF_KEY extensions for ease of
2212  * SADB matching.
2213  *
2214  * XXX TODO: weed out ipsa_query_t fields not used during matching
2215  * or afterwards?
2216  */
2217 int
sadb_form_query(keysock_in_t * ksi,uint32_t req,uint32_t match,ipsa_query_t * sq,int * diagnostic)2218 sadb_form_query(keysock_in_t *ksi, uint32_t req, uint32_t match,
2219     ipsa_query_t *sq, int *diagnostic)
2220 {
2221 	int i;
2222 	ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2223 
2224 	for (i = 0; i < IPSA_NMATCH; i++)
2225 		sq->matchers[i] = NULL;
2226 
2227 	ASSERT((req & ~match) == 0);
2228 
2229 	sq->req = req;
2230 	sq->dstext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2231 	sq->srcext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2232 	sq->assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2233 
2234 	if ((req & IPSA_Q_DST) && (sq->dstext == NULL)) {
2235 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2236 		return (EINVAL);
2237 	}
2238 	if ((req & IPSA_Q_SRC) && (sq->srcext == NULL)) {
2239 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2240 		return (EINVAL);
2241 	}
2242 	if ((req & IPSA_Q_SA) && (sq->assoc == NULL)) {
2243 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2244 		return (EINVAL);
2245 	}
2246 
2247 	if (match & IPSA_Q_SA) {
2248 		*mfpp++ = sadb_match_spi;
2249 		sq->spi = sq->assoc->sadb_sa_spi;
2250 	}
2251 
2252 	if (sq->dstext != NULL)
2253 		sq->dst = (struct sockaddr_in *)(sq->dstext + 1);
2254 	else {
2255 		sq->dst = NULL;
2256 		sq->dst6 = NULL;
2257 		sq->dstaddr = NULL;
2258 	}
2259 
2260 	if (sq->srcext != NULL)
2261 		sq->src = (struct sockaddr_in *)(sq->srcext + 1);
2262 	else {
2263 		sq->src = NULL;
2264 		sq->src6 = NULL;
2265 		sq->srcaddr = NULL;
2266 	}
2267 
2268 	if (sq->dst != NULL)
2269 		sq->af = sq->dst->sin_family;
2270 	else if (sq->src != NULL)
2271 		sq->af = sq->src->sin_family;
2272 	else
2273 		sq->af = AF_INET;
2274 
2275 	if (sq->af == AF_INET6) {
2276 		if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2277 			*mfpp++ = sadb_match_dst_v6;
2278 			sq->dst6 = (struct sockaddr_in6 *)sq->dst;
2279 			sq->dstaddr = (uint32_t *)&(sq->dst6->sin6_addr);
2280 		} else {
2281 			match &= ~IPSA_Q_DST;
2282 			sq->dstaddr = ALL_ZEROES_PTR;
2283 		}
2284 
2285 		if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2286 			sq->src6 = (struct sockaddr_in6 *)(sq->srcext + 1);
2287 			sq->srcaddr = (uint32_t *)&sq->src6->sin6_addr;
2288 			if (sq->src6->sin6_family != AF_INET6) {
2289 				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2290 				return (EINVAL);
2291 			}
2292 			*mfpp++ = sadb_match_src_v6;
2293 		} else {
2294 			match &= ~IPSA_Q_SRC;
2295 			sq->srcaddr = ALL_ZEROES_PTR;
2296 		}
2297 	} else {
2298 		sq->src6 = sq->dst6 = NULL;
2299 		if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2300 			*mfpp++ = sadb_match_dst_v4;
2301 			sq->dstaddr = (uint32_t *)&sq->dst->sin_addr;
2302 		} else {
2303 			match &= ~IPSA_Q_DST;
2304 			sq->dstaddr = ALL_ZEROES_PTR;
2305 		}
2306 		if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2307 			sq->srcaddr = (uint32_t *)&sq->src->sin_addr;
2308 			if (sq->src->sin_family != AF_INET) {
2309 				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2310 				return (EINVAL);
2311 			}
2312 			*mfpp++ = sadb_match_src_v4;
2313 		} else {
2314 			match &= ~IPSA_Q_SRC;
2315 			sq->srcaddr = ALL_ZEROES_PTR;
2316 		}
2317 	}
2318 
2319 	sq->dstid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2320 	if ((match & IPSA_Q_DSTID) && (sq->dstid != NULL)) {
2321 		sq->didstr = (char *)(sq->dstid + 1);
2322 		sq->didtype = sq->dstid->sadb_ident_type;
2323 		*mfpp++ = sadb_match_dstid;
2324 	}
2325 
2326 	sq->srcid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2327 
2328 	if ((match & IPSA_Q_SRCID) && (sq->srcid != NULL)) {
2329 		sq->sidstr = (char *)(sq->srcid + 1);
2330 		sq->sidtype = sq->srcid->sadb_ident_type;
2331 		*mfpp++ = sadb_match_srcid;
2332 	}
2333 
2334 	sq->kmcext = (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2335 	sq->kmc = 0;
2336 	sq->kmp = 0;
2337 
2338 	if ((match & IPSA_Q_KMC) && (sq->kmcext)) {
2339 		sq->kmp = sq->kmcext->sadb_x_kmc_proto;
2340 		/*
2341 		 * Be liberal in what we receive.  Special-case the IKEv1
2342 		 * cookie, which closed-source in.iked assumes is 32 bits.
2343 		 * Now that we store all 64 bits, we should pre-zero the
2344 		 * reserved field on behalf of closed-source in.iked.
2345 		 */
2346 		if (sq->kmp == SADB_X_KMP_IKE) {
2347 			/* Just in case in.iked is misbehaving... */
2348 			sq->kmcext->sadb_x_kmc_reserved = 0;
2349 		}
2350 		sq->kmc = sq->kmcext->sadb_x_kmc_cookie64;
2351 		*mfpp++ = sadb_match_kmc;
2352 	}
2353 
2354 	if (match & (IPSA_Q_INBOUND|IPSA_Q_OUTBOUND)) {
2355 		if (sq->af == AF_INET6)
2356 			sq->sp = &sq->spp->s_v6;
2357 		else
2358 			sq->sp = &sq->spp->s_v4;
2359 	} else {
2360 		sq->sp = NULL;
2361 	}
2362 
2363 	if (match & IPSA_Q_INBOUND) {
2364 		sq->inhash = INBOUND_HASH(sq->sp, sq->assoc->sadb_sa_spi);
2365 		sq->inbound = &sq->sp->sdb_if[sq->inhash];
2366 	} else {
2367 		sq->inhash = 0;
2368 		sq->inbound = NULL;
2369 	}
2370 
2371 	if (match & IPSA_Q_OUTBOUND) {
2372 		if (sq->af == AF_INET6) {
2373 			sq->outhash = OUTBOUND_HASH_V6(sq->sp, *(sq->dstaddr));
2374 		} else {
2375 			sq->outhash = OUTBOUND_HASH_V4(sq->sp, *(sq->dstaddr));
2376 		}
2377 		sq->outbound = &sq->sp->sdb_of[sq->outhash];
2378 	} else {
2379 		sq->outhash = 0;
2380 		sq->outbound = NULL;
2381 	}
2382 	sq->match = match;
2383 	return (0);
2384 }
2385 
2386 /*
2387  * Match an initialized query structure with a security association;
2388  * return B_TRUE on a match, B_FALSE on a miss.
2389  * Applies match functions set up by sadb_form_query() until one returns false.
2390  */
2391 boolean_t
sadb_match_query(ipsa_query_t * sq,ipsa_t * sa)2392 sadb_match_query(ipsa_query_t *sq, ipsa_t *sa)
2393 {
2394 	ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2395 	ipsa_match_fn_t mfp;
2396 
2397 	for (mfp = *mfpp++; mfp != NULL; mfp = *mfpp++) {
2398 		if (!mfp(sq, sa))
2399 			return (B_FALSE);
2400 	}
2401 	return (B_TRUE);
2402 }
2403 
2404 /*
2405  * Walker callback function to delete sa's based on src/dst address.
2406  * Assumes that we're called with *head locked, no other locks held;
2407  * Conveniently, and not coincidentally, this is both what sadb_walker
2408  * gives us and also what sadb_unlinkassoc expects.
2409  */
2410 struct sadb_purge_state
2411 {
2412 	ipsa_query_t sq;
2413 	boolean_t inbnd;
2414 	uint8_t sadb_sa_state;
2415 };
2416 
2417 static void
sadb_purge_cb(isaf_t * head,ipsa_t * entry,void * cookie)2418 sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2419 {
2420 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2421 
2422 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2423 
2424 	mutex_enter(&entry->ipsa_lock);
2425 
2426 	if (entry->ipsa_state == IPSA_STATE_LARVAL ||
2427 	    !sadb_match_query(&ps->sq, entry)) {
2428 		mutex_exit(&entry->ipsa_lock);
2429 		return;
2430 	}
2431 
2432 	if (ps->inbnd) {
2433 		sadb_delete_cluster(entry);
2434 	}
2435 	entry->ipsa_state = IPSA_STATE_DEAD;
2436 	(void) sadb_torch_assoc(head, entry);
2437 }
2438 
2439 /*
2440  * Common code to purge an SA with a matching src or dst address.
2441  * Don't kill larval SA's in such a purge.
2442  */
2443 int
sadb_purge_sa(mblk_t * mp,keysock_in_t * ksi,sadb_t * sp,int * diagnostic,queue_t * pfkey_q)2444 sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp,
2445     int *diagnostic, queue_t *pfkey_q)
2446 {
2447 	struct sadb_purge_state ps;
2448 	int error = sadb_form_query(ksi, 0,
2449 	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2450 	    &ps.sq, diagnostic);
2451 
2452 	if (error != 0)
2453 		return (error);
2454 
2455 	/*
2456 	 * This is simple, crude, and effective.
2457 	 * Unimplemented optimizations (TBD):
2458 	 * - we can limit how many places we search based on where we
2459 	 * think the SA is filed.
2460 	 * - if we get a dst address, we can hash based on dst addr to find
2461 	 * the correct bucket in the outbound table.
2462 	 */
2463 	ps.inbnd = B_TRUE;
2464 	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2465 	ps.inbnd = B_FALSE;
2466 	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2467 
2468 	ASSERT(mp->b_cont != NULL);
2469 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2470 	    NULL);
2471 	return (0);
2472 }
2473 
2474 static void
sadb_delpair_state_one(isaf_t * head,ipsa_t * entry,void * cookie)2475 sadb_delpair_state_one(isaf_t *head, ipsa_t *entry, void *cookie)
2476 {
2477 	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2478 	isaf_t  *inbound_bucket;
2479 	ipsa_t *peer_assoc;
2480 	ipsa_query_t *sq = &ps->sq;
2481 
2482 	ASSERT(MUTEX_HELD(&head->isaf_lock));
2483 
2484 	mutex_enter(&entry->ipsa_lock);
2485 
2486 	if ((entry->ipsa_state != ps->sadb_sa_state) ||
2487 	    ((sq->srcaddr != NULL) &&
2488 	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, sq->srcaddr, sq->af))) {
2489 		mutex_exit(&entry->ipsa_lock);
2490 		return;
2491 	}
2492 
2493 	/*
2494 	 * The isaf_t *, which is passed in , is always an outbound bucket,
2495 	 * and we are preserving the outbound-then-inbound hash-bucket lock
2496 	 * ordering. The sadb_walker() which triggers this function is called
2497 	 * only on the outbound fanout, and the corresponding inbound bucket
2498 	 * lock is safe to acquire here.
2499 	 */
2500 
2501 	if (entry->ipsa_haspeer) {
2502 		inbound_bucket = INBOUND_BUCKET(sq->sp, entry->ipsa_spi);
2503 		mutex_enter(&inbound_bucket->isaf_lock);
2504 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2505 		    entry->ipsa_spi, entry->ipsa_srcaddr,
2506 		    entry->ipsa_dstaddr, entry->ipsa_addrfam);
2507 	} else {
2508 		inbound_bucket = INBOUND_BUCKET(sq->sp, entry->ipsa_otherspi);
2509 		mutex_enter(&inbound_bucket->isaf_lock);
2510 		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2511 		    entry->ipsa_otherspi, entry->ipsa_dstaddr,
2512 		    entry->ipsa_srcaddr, entry->ipsa_addrfam);
2513 	}
2514 
2515 	entry->ipsa_state = IPSA_STATE_DEAD;
2516 	(void) sadb_torch_assoc(head, entry);
2517 	if (peer_assoc != NULL) {
2518 		mutex_enter(&peer_assoc->ipsa_lock);
2519 		peer_assoc->ipsa_state = IPSA_STATE_DEAD;
2520 		(void) sadb_torch_assoc(inbound_bucket, peer_assoc);
2521 	}
2522 	mutex_exit(&inbound_bucket->isaf_lock);
2523 }
2524 
2525 static int
sadb_delpair_state(mblk_t * mp,keysock_in_t * ksi,sadbp_t * spp,int * diagnostic,queue_t * pfkey_q)2526 sadb_delpair_state(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2527     int *diagnostic, queue_t *pfkey_q)
2528 {
2529 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2530 	struct sadb_purge_state ps;
2531 	int error;
2532 
2533 	ps.sq.spp = spp;		/* XXX param */
2534 
2535 	error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SRC,
2536 	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2537 	    &ps.sq, diagnostic);
2538 	if (error != 0)
2539 		return (error);
2540 
2541 	ps.inbnd = B_FALSE;
2542 	ps.sadb_sa_state = assoc->sadb_sa_state;
2543 	sadb_walker(ps.sq.sp->sdb_of, ps.sq.sp->sdb_hashsize,
2544 	    sadb_delpair_state_one, &ps);
2545 
2546 	ASSERT(mp->b_cont != NULL);
2547 	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
2548 	    ksi, NULL);
2549 	return (0);
2550 }
2551 
2552 /*
2553  * Common code to delete/get an SA.
2554  */
2555 int
sadb_delget_sa(mblk_t * mp,keysock_in_t * ksi,sadbp_t * spp,int * diagnostic,queue_t * pfkey_q,uint8_t sadb_msg_type)2556 sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2557     int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2558 {
2559 	ipsa_query_t sq;
2560 	ipsa_t *echo_target = NULL;
2561 	ipsap_t ipsapp;
2562 	uint_t	error = 0;
2563 
2564 	if (sadb_msg_type == SADB_X_DELPAIR_STATE)
2565 		return (sadb_delpair_state(mp, ksi, spp, diagnostic, pfkey_q));
2566 
2567 	sq.spp = spp;		/* XXX param */
2568 	error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SA,
2569 	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
2570 	    &sq, diagnostic);
2571 	if (error != 0)
2572 		return (error);
2573 
2574 	error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
2575 	if (error != 0) {
2576 		return (error);
2577 	}
2578 
2579 	echo_target = ipsapp.ipsap_sa_ptr;
2580 	if (echo_target == NULL)
2581 		echo_target = ipsapp.ipsap_psa_ptr;
2582 
2583 	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2584 		/*
2585 		 * Bucket locks will be required if SA is actually unlinked.
2586 		 * get_ipsa_pair() returns valid hash bucket pointers even
2587 		 * if it can't find a pair SA pointer. To prevent a potential
2588 		 * deadlock, always lock the outbound bucket before the inbound.
2589 		 */
2590 		if (ipsapp.in_inbound_table) {
2591 			mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2592 			mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2593 		} else {
2594 			mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2595 			mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2596 		}
2597 
2598 		if (ipsapp.ipsap_sa_ptr != NULL) {
2599 			mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
2600 			if (ipsapp.ipsap_sa_ptr->ipsa_flags & IPSA_F_INBOUND) {
2601 				sadb_delete_cluster(ipsapp.ipsap_sa_ptr);
2602 			}
2603 			ipsapp.ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2604 			(void) sadb_torch_assoc(ipsapp.ipsap_bucket,
2605 			    ipsapp.ipsap_sa_ptr);
2606 			/*
2607 			 * sadb_torch_assoc() releases the ipsa_lock
2608 			 * and calls sadb_unlinkassoc() which does a
2609 			 * IPSA_REFRELE.
2610 			 */
2611 		}
2612 		if (ipsapp.ipsap_psa_ptr != NULL) {
2613 			mutex_enter(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2614 			if (sadb_msg_type == SADB_X_DELPAIR ||
2615 			    ipsapp.ipsap_psa_ptr->ipsa_haspeer) {
2616 				if (ipsapp.ipsap_psa_ptr->ipsa_flags &
2617 				    IPSA_F_INBOUND) {
2618 					sadb_delete_cluster
2619 					    (ipsapp.ipsap_psa_ptr);
2620 				}
2621 				ipsapp.ipsap_psa_ptr->ipsa_state =
2622 				    IPSA_STATE_DEAD;
2623 				(void) sadb_torch_assoc(ipsapp.ipsap_pbucket,
2624 				    ipsapp.ipsap_psa_ptr);
2625 			} else {
2626 				/*
2627 				 * Only half of the "pair" has been deleted.
2628 				 * Update the remaining SA and remove references
2629 				 * to its pair SA, which is now gone.
2630 				 */
2631 				ipsapp.ipsap_psa_ptr->ipsa_otherspi = 0;
2632 				ipsapp.ipsap_psa_ptr->ipsa_flags &=
2633 				    ~IPSA_F_PAIRED;
2634 				mutex_exit(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2635 			}
2636 		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2637 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2638 			error = ESRCH;
2639 		}
2640 		mutex_exit(&ipsapp.ipsap_bucket->isaf_lock);
2641 		mutex_exit(&ipsapp.ipsap_pbucket->isaf_lock);
2642 	}
2643 
2644 	ASSERT(mp->b_cont != NULL);
2645 
2646 	if (error == 0)
2647 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2648 		    mp->b_cont->b_rptr, ksi, echo_target);
2649 
2650 	destroy_ipsa_pair(&ipsapp);
2651 
2652 	return (error);
2653 }
2654 
2655 /*
2656  * This function takes a sadb_sa_t and finds the ipsa_t structure
2657  * and the isaf_t (hash bucket) that its stored under. If the security
2658  * association has a peer, the ipsa_t structure and bucket for that security
2659  * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2660  * are returned as a ipsap_t.
2661  *
2662  * The hash buckets are returned for convenience, if the calling function
2663  * needs to use the hash bucket locks, say to remove the SA's, it should
2664  * take care to observe the convention of locking outbound bucket then
2665  * inbound bucket. The flag in_inbound_table provides direction.
2666  *
2667  * Note that a "pair" is defined as one (but not both) of the following:
2668  *
2669  * A security association which has a soft reference to another security
2670  * association via its SPI.
2671  *
2672  * A security association that is not obviously "inbound" or "outbound" so
2673  * it appears in both hash tables, the "peer" being the same security
2674  * association in the other hash table.
2675  *
2676  * This function will return NULL if the ipsa_t can't be found in the
2677  * inbound or outbound  hash tables (not found). If only one ipsa_t is
2678  * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2679  * provided at least one ipsa_t is found.
2680  */
2681 static int
get_ipsa_pair(ipsa_query_t * sq,ipsap_t * ipsapp,int * diagnostic)2682 get_ipsa_pair(ipsa_query_t *sq, ipsap_t *ipsapp, int *diagnostic)
2683 {
2684 	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2685 	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2686 	uint32_t pair_spi;
2687 
2688 	init_ipsa_pair(ipsapp);
2689 
2690 	ipsapp->in_inbound_table = B_FALSE;
2691 
2692 	/* Lock down both buckets. */
2693 	mutex_enter(&sq->outbound->isaf_lock);
2694 	mutex_enter(&sq->inbound->isaf_lock);
2695 
2696 	if (sq->assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2697 		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2698 		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2699 		if (ipsapp->ipsap_sa_ptr != NULL) {
2700 			ipsapp->ipsap_bucket = sq->inbound;
2701 			ipsapp->ipsap_pbucket = sq->outbound;
2702 			ipsapp->in_inbound_table = B_TRUE;
2703 		} else {
2704 			ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->outbound,
2705 			    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2706 			    sq->af);
2707 			ipsapp->ipsap_bucket = sq->outbound;
2708 			ipsapp->ipsap_pbucket = sq->inbound;
2709 		}
2710 	} else {
2711 		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2712 		ipsapp->ipsap_sa_ptr =
2713 		    ipsec_getassocbyspi(sq->outbound,
2714 		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2715 		if (ipsapp->ipsap_sa_ptr != NULL) {
2716 			ipsapp->ipsap_bucket = sq->outbound;
2717 			ipsapp->ipsap_pbucket = sq->inbound;
2718 		} else {
2719 			ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2720 			    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2721 			    sq->af);
2722 			ipsapp->ipsap_bucket = sq->inbound;
2723 			ipsapp->ipsap_pbucket = sq->outbound;
2724 			if (ipsapp->ipsap_sa_ptr != NULL)
2725 				ipsapp->in_inbound_table = B_TRUE;
2726 		}
2727 	}
2728 
2729 	if (ipsapp->ipsap_sa_ptr == NULL) {
2730 		mutex_exit(&sq->outbound->isaf_lock);
2731 		mutex_exit(&sq->inbound->isaf_lock);
2732 		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2733 		return (ESRCH);
2734 	}
2735 
2736 	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2737 	    ipsapp->in_inbound_table) {
2738 		mutex_exit(&sq->outbound->isaf_lock);
2739 		mutex_exit(&sq->inbound->isaf_lock);
2740 		return (0);
2741 	}
2742 
2743 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2744 	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2745 		/*
2746 		 * haspeer implies no sa_pairing, look for same spi
2747 		 * in other hashtable.
2748 		 */
2749 		ipsapp->ipsap_psa_ptr =
2750 		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2751 		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2752 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2753 		mutex_exit(&sq->outbound->isaf_lock);
2754 		mutex_exit(&sq->inbound->isaf_lock);
2755 		return (0);
2756 	}
2757 	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2758 	IPSA_COPY_ADDR(&pair_srcaddr,
2759 	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, sq->af);
2760 	IPSA_COPY_ADDR(&pair_dstaddr,
2761 	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, sq->af);
2762 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2763 	mutex_exit(&sq->inbound->isaf_lock);
2764 	mutex_exit(&sq->outbound->isaf_lock);
2765 
2766 	if (pair_spi == 0) {
2767 		ASSERT(ipsapp->ipsap_bucket != NULL);
2768 		ASSERT(ipsapp->ipsap_pbucket != NULL);
2769 		return (0);
2770 	}
2771 
2772 	/* found sa in outbound sadb, peer should be inbound */
2773 
2774 	if (ipsapp->in_inbound_table) {
2775 		/* Found SA in inbound table, pair will be in outbound. */
2776 		if (sq->af == AF_INET6) {
2777 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sq->sp,
2778 			    *(uint32_t *)pair_srcaddr);
2779 		} else {
2780 			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sq->sp,
2781 			    *(uint32_t *)pair_srcaddr);
2782 		}
2783 	} else {
2784 		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sq->sp, pair_spi);
2785 	}
2786 	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2787 	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2788 	    pair_spi, pair_dstaddr, pair_srcaddr, sq->af);
2789 	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2790 	ASSERT(ipsapp->ipsap_bucket != NULL);
2791 	ASSERT(ipsapp->ipsap_pbucket != NULL);
2792 	return (0);
2793 }
2794 
2795 /*
2796  * Perform NAT-traversal cached checksum offset calculations here.
2797  */
2798 static void
sadb_nat_calculations(ipsa_t * newbie,sadb_address_t * natt_loc_ext,sadb_address_t * natt_rem_ext,uint32_t * src_addr_ptr,uint32_t * dst_addr_ptr)2799 sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
2800     sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
2801     uint32_t *dst_addr_ptr)
2802 {
2803 	struct sockaddr_in *natt_loc, *natt_rem;
2804 	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
2805 	uint32_t running_sum = 0;
2806 
2807 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
2808 
2809 	if (natt_rem_ext != NULL) {
2810 		uint32_t l_src;
2811 		uint32_t l_rem;
2812 
2813 		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
2814 
2815 		/* Ensured by sadb_addrfix(). */
2816 		ASSERT(natt_rem->sin_family == AF_INET);
2817 
2818 		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
2819 		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
2820 		l_src = *src_addr_ptr;
2821 		l_rem = *natt_rem_ptr;
2822 
2823 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2824 		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
2825 
2826 		l_src = ntohl(l_src);
2827 		DOWN_SUM(l_src);
2828 		DOWN_SUM(l_src);
2829 		l_rem = ntohl(l_rem);
2830 		DOWN_SUM(l_rem);
2831 		DOWN_SUM(l_rem);
2832 
2833 		/*
2834 		 * We're 1's complement for checksums, so check for wraparound
2835 		 * here.
2836 		 */
2837 		if (l_rem > l_src)
2838 			l_src--;
2839 
2840 		running_sum += l_src - l_rem;
2841 
2842 		DOWN_SUM(running_sum);
2843 		DOWN_SUM(running_sum);
2844 	}
2845 
2846 	if (natt_loc_ext != NULL) {
2847 		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
2848 
2849 		/* Ensured by sadb_addrfix(). */
2850 		ASSERT(natt_loc->sin_family == AF_INET);
2851 
2852 		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
2853 		newbie->ipsa_local_nat_port = natt_loc->sin_port;
2854 
2855 		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2856 		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
2857 
2858 		/*
2859 		 * NAT-T port agility means we may have natt_loc_ext, but
2860 		 * only for a local-port change.
2861 		 */
2862 		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
2863 			uint32_t l_dst = ntohl(*dst_addr_ptr);
2864 			uint32_t l_loc = ntohl(*natt_loc_ptr);
2865 
2866 			DOWN_SUM(l_loc);
2867 			DOWN_SUM(l_loc);
2868 			DOWN_SUM(l_dst);
2869 			DOWN_SUM(l_dst);
2870 
2871 			/*
2872 			 * We're 1's complement for checksums, so check for
2873 			 * wraparound here.
2874 			 */
2875 			if (l_loc > l_dst)
2876 				l_dst--;
2877 
2878 			running_sum += l_dst - l_loc;
2879 			DOWN_SUM(running_sum);
2880 			DOWN_SUM(running_sum);
2881 		}
2882 	}
2883 
2884 	newbie->ipsa_inbound_cksum = running_sum;
2885 #undef DOWN_SUM
2886 }
2887 
2888 /*
2889  * This function is called from consumers that need to insert a fully-grown
2890  * security association into its tables.  This function takes into account that
2891  * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
2892  * hash bucket parameters are set in order of what the SA will be most of the
2893  * time.  (For example, an SA with an unspecified source, and a multicast
2894  * destination will primarily be an outbound SA.  OTOH, if that destination
2895  * is unicast for this node, then the SA will primarily be inbound.)
2896  *
2897  * It takes a lot of parameters because even if clone is B_FALSE, this needs
2898  * to check both buckets for purposes of collision.
2899  *
2900  * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
2901  * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
2902  * with additional diagnostic information because there is at least one EINVAL
2903  * case here.
2904  */
2905 int
sadb_common_add(queue_t * pfkey_q,mblk_t * mp,sadb_msg_t * samsg,keysock_in_t * ksi,isaf_t * primary,isaf_t * secondary,ipsa_t * newbie,boolean_t clone,boolean_t is_inbound,int * diagnostic,netstack_t * ns,sadbp_t * spp)2906 sadb_common_add(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
2907     keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
2908     ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
2909     netstack_t *ns, sadbp_t *spp)
2910 {
2911 	ipsa_t *newbie_clone = NULL, *scratch;
2912 	ipsap_t ipsapp;
2913 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2914 	sadb_address_t *srcext =
2915 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2916 	sadb_address_t *dstext =
2917 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2918 	sadb_address_t *isrcext =
2919 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2920 	sadb_address_t *idstext =
2921 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2922 	sadb_x_kmc_t *kmcext =
2923 	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2924 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2925 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2926 	sadb_sens_t *sens =
2927 	    (sadb_sens_t *)ksi->ks_in_extv[SADB_EXT_SENSITIVITY];
2928 	sadb_sens_t *osens =
2929 	    (sadb_sens_t *)ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS];
2930 	sadb_x_pair_t *pair_ext =
2931 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
2932 	sadb_x_replay_ctr_t *replayext =
2933 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
2934 	uint8_t protocol =
2935 	    (samsg->sadb_msg_satype == SADB_SATYPE_AH) ? IPPROTO_AH:IPPROTO_ESP;
2936 	int salt_offset;
2937 	uint8_t *buf_ptr;
2938 	struct sockaddr_in *src, *dst, *isrc, *idst;
2939 	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
2940 	sadb_lifetime_t *soft =
2941 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
2942 	sadb_lifetime_t *hard =
2943 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
2944 	sadb_lifetime_t	*idle =
2945 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
2946 	sa_family_t af;
2947 	int error = 0;
2948 	boolean_t isupdate = (newbie != NULL);
2949 	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
2950 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2951 	ip_stack_t	*ipst = ns->netstack_ip;
2952 	ipsec_alginfo_t *alg;
2953 	int		rcode;
2954 	boolean_t	async = B_FALSE;
2955 
2956 	init_ipsa_pair(&ipsapp);
2957 
2958 	if (srcext == NULL) {
2959 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2960 		return (EINVAL);
2961 	}
2962 	if (dstext == NULL) {
2963 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2964 		return (EINVAL);
2965 	}
2966 	if (assoc == NULL) {
2967 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2968 		return (EINVAL);
2969 	}
2970 
2971 	src = (struct sockaddr_in *)(srcext + 1);
2972 	src6 = (struct sockaddr_in6 *)(srcext + 1);
2973 	dst = (struct sockaddr_in *)(dstext + 1);
2974 	dst6 = (struct sockaddr_in6 *)(dstext + 1);
2975 	if (isrcext != NULL) {
2976 		isrc = (struct sockaddr_in *)(isrcext + 1);
2977 		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
2978 		ASSERT(idstext != NULL);
2979 		idst = (struct sockaddr_in *)(idstext + 1);
2980 		idst6 = (struct sockaddr_in6 *)(idstext + 1);
2981 	} else {
2982 		isrc = NULL;
2983 		isrc6 = NULL;
2984 	}
2985 
2986 	af = src->sin_family;
2987 
2988 	if (af == AF_INET) {
2989 		src_addr_ptr = (uint32_t *)&src->sin_addr;
2990 		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
2991 	} else {
2992 		ASSERT(af == AF_INET6);
2993 		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
2994 		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
2995 	}
2996 
2997 	if (!isupdate && (clone == B_TRUE || is_inbound == B_TRUE) &&
2998 	    cl_inet_checkspi &&
2999 	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
3000 		rcode = cl_inet_checkspi(ns->netstack_stackid, protocol,
3001 		    assoc->sadb_sa_spi, NULL);
3002 		if (rcode == -1) {
3003 			return (EEXIST);
3004 		}
3005 	}
3006 
3007 	/*
3008 	 * Check to see if the new SA will be cloned AND paired. The
3009 	 * reason a SA will be cloned is the source or destination addresses
3010 	 * are not specific enough to determine if the SA goes in the outbound
3011 	 * or the inbound hash table, so its cloned and put in both. If
3012 	 * the SA is paired, it's soft linked to another SA for the other
3013 	 * direction. Keeping track and looking up SA's that are direction
3014 	 * unspecific and linked is too hard.
3015 	 */
3016 	if (clone && (pair_ext != NULL)) {
3017 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3018 		return (EINVAL);
3019 	}
3020 
3021 	if (!isupdate) {
3022 		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3023 		    src_addr_ptr, dst_addr_ptr, af, ns);
3024 		if (newbie == NULL)
3025 			return (ENOMEM);
3026 	}
3027 
3028 	mutex_enter(&newbie->ipsa_lock);
3029 
3030 	if (isrc != NULL) {
3031 		if (isrc->sin_family == AF_INET) {
3032 			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3033 				if (srcext->sadb_address_proto != 0) {
3034 					/*
3035 					 * Mismatched outer-packet protocol
3036 					 * and inner-packet address family.
3037 					 */
3038 					mutex_exit(&newbie->ipsa_lock);
3039 					error = EPROTOTYPE;
3040 					*diagnostic =
3041 					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3042 					goto error;
3043 				} else {
3044 					/* Fill in with explicit protocol. */
3045 					srcext->sadb_address_proto =
3046 					    IPPROTO_ENCAP;
3047 					dstext->sadb_address_proto =
3048 					    IPPROTO_ENCAP;
3049 				}
3050 			}
3051 			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3052 			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3053 		} else {
3054 			ASSERT(isrc->sin_family == AF_INET6);
3055 			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3056 				if (srcext->sadb_address_proto != 0) {
3057 					/*
3058 					 * Mismatched outer-packet protocol
3059 					 * and inner-packet address family.
3060 					 */
3061 					mutex_exit(&newbie->ipsa_lock);
3062 					error = EPROTOTYPE;
3063 					*diagnostic =
3064 					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3065 					goto error;
3066 				} else {
3067 					/* Fill in with explicit protocol. */
3068 					srcext->sadb_address_proto =
3069 					    IPPROTO_IPV6;
3070 					dstext->sadb_address_proto =
3071 					    IPPROTO_IPV6;
3072 				}
3073 			}
3074 			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3075 			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3076 		}
3077 		newbie->ipsa_innerfam = isrc->sin_family;
3078 
3079 		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3080 		    newbie->ipsa_innerfam);
3081 		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3082 		    newbie->ipsa_innerfam);
3083 		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3084 		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3085 
3086 		/* Unique value uses inner-ports for Tunnel Mode... */
3087 		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3088 		    idst->sin_port, dstext->sadb_address_proto,
3089 		    idstext->sadb_address_proto);
3090 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3091 		    idst->sin_port, dstext->sadb_address_proto,
3092 		    idstext->sadb_address_proto);
3093 	} else {
3094 		/* ... and outer-ports for Transport Mode. */
3095 		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3096 		    dst->sin_port, dstext->sadb_address_proto, 0);
3097 		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3098 		    dst->sin_port, dstext->sadb_address_proto, 0);
3099 	}
3100 	if (newbie->ipsa_unique_mask != (uint64_t)0)
3101 		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3102 
3103 	sadb_nat_calculations(newbie,
3104 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3105 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3106 	    src_addr_ptr, dst_addr_ptr);
3107 
3108 	newbie->ipsa_type = samsg->sadb_msg_satype;
3109 
3110 	ASSERT((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
3111 	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE));
3112 	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3113 	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3114 
3115 	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3116 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3117 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) {
3118 		mutex_exit(&newbie->ipsa_lock);
3119 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3120 		error = EINVAL;
3121 		goto error;
3122 	}
3123 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3124 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) {
3125 		mutex_exit(&newbie->ipsa_lock);
3126 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3127 		error = EINVAL;
3128 		goto error;
3129 	}
3130 	if (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3131 	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL) {
3132 		mutex_exit(&newbie->ipsa_lock);
3133 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3134 		error = EINVAL;
3135 		goto error;
3136 	}
3137 	/*
3138 	 * If unspecified source address, force replay_wsize to 0.
3139 	 * This is because an SA that has multiple sources of secure
3140 	 * traffic cannot enforce a replay counter w/o synchronizing the
3141 	 * senders.
3142 	 */
3143 	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3144 		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3145 	else
3146 		newbie->ipsa_replay_wsize = 0;
3147 
3148 	newbie->ipsa_addtime = gethrestime_sec();
3149 
3150 	if (kmcext != NULL) {
3151 		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3152 		/*
3153 		 * Be liberal in what we receive.  Special-case the IKEv1
3154 		 * cookie, which closed-source in.iked assumes is 32 bits.
3155 		 * Now that we store all 64 bits, we should pre-zero the
3156 		 * reserved field on behalf of closed-source in.iked.
3157 		 */
3158 		if (newbie->ipsa_kmp == SADB_X_KMP_IKE) {
3159 			/* Just in case in.iked is misbehaving... */
3160 			kmcext->sadb_x_kmc_reserved = 0;
3161 		}
3162 		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie64;
3163 	}
3164 
3165 	/*
3166 	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3167 	 * The spec says that one can update current lifetimes, but
3168 	 * that seems impractical, especially in the larval-to-mature
3169 	 * update that this function performs.
3170 	 */
3171 	if (soft != NULL) {
3172 		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3173 		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3174 		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3175 		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3176 		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3177 	}
3178 	if (hard != NULL) {
3179 		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3180 		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3181 		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3182 		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3183 		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3184 	}
3185 	if (idle != NULL) {
3186 		newbie->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
3187 		newbie->ipsa_idleuselt = idle->sadb_lifetime_usetime;
3188 		newbie->ipsa_idleexpiretime = newbie->ipsa_addtime +
3189 		    newbie->ipsa_idleaddlt;
3190 		newbie->ipsa_idletime = newbie->ipsa_idleaddlt;
3191 	}
3192 
3193 	newbie->ipsa_authtmpl = NULL;
3194 	newbie->ipsa_encrtmpl = NULL;
3195 
3196 #ifdef IPSEC_LATENCY_TEST
3197 	if (akey != NULL && newbie->ipsa_auth_alg != SADB_AALG_NONE) {
3198 #else
3199 	if (akey != NULL) {
3200 #endif
3201 		async = (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
3202 		    IPSEC_ALGS_EXEC_ASYNC);
3203 
3204 		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3205 		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3206 		/* In case we have to round up to the next byte... */
3207 		if ((akey->sadb_key_bits & 0x7) != 0)
3208 			newbie->ipsa_authkeylen++;
3209 		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3210 		    KM_NOSLEEP);
3211 		if (newbie->ipsa_authkey == NULL) {
3212 			error = ENOMEM;
3213 			mutex_exit(&newbie->ipsa_lock);
3214 			goto error;
3215 		}
3216 		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3217 		bzero(akey + 1, newbie->ipsa_authkeylen);
3218 
3219 		/*
3220 		 * Pre-initialize the kernel crypto framework key
3221 		 * structure.
3222 		 */
3223 		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3224 		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3225 		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3226 
3227 		rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3228 		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3229 		    [newbie->ipsa_auth_alg];
3230 		if (alg != NULL && ALG_VALID(alg)) {
3231 			newbie->ipsa_amech.cm_type = alg->alg_mech_type;
3232 			newbie->ipsa_amech.cm_param =
3233 			    (char *)&newbie->ipsa_mac_len;
3234 			newbie->ipsa_amech.cm_param_len = sizeof (size_t);
3235 			newbie->ipsa_mac_len = (size_t)alg->alg_datalen;
3236 		} else {
3237 			newbie->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3238 		}
3239 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3240 		rw_exit(&ipss->ipsec_alg_lock);
3241 		if (error != 0) {
3242 			mutex_exit(&newbie->ipsa_lock);
3243 			/*
3244 			 * An error here indicates that alg is the wrong type
3245 			 * (IE: not authentication) or its not in the alg tables
3246 			 * created by ipsecalgs(8), or Kcf does not like the
3247 			 * parameters passed in with this algorithm, which is
3248 			 * probably a coding error!
3249 			 */
3250 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3251 
3252 			goto error;
3253 		}
3254 	}
3255 
3256 	if (ekey != NULL) {
3257 		rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3258 		async = async || (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
3259 		    IPSEC_ALGS_EXEC_ASYNC);
3260 		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3261 		    [newbie->ipsa_encr_alg];
3262 
3263 		if (alg != NULL && ALG_VALID(alg)) {
3264 			newbie->ipsa_emech.cm_type = alg->alg_mech_type;
3265 			newbie->ipsa_datalen = alg->alg_datalen;
3266 			if (alg->alg_flags & ALG_FLAG_COUNTERMODE)
3267 				newbie->ipsa_flags |= IPSA_F_COUNTERMODE;
3268 
3269 			if (alg->alg_flags & ALG_FLAG_COMBINED) {
3270 				newbie->ipsa_flags |= IPSA_F_COMBINED;
3271 				newbie->ipsa_mac_len =  alg->alg_icvlen;
3272 			}
3273 
3274 			if (alg->alg_flags & ALG_FLAG_CCM)
3275 				newbie->ipsa_noncefunc = ccm_params_init;
3276 			else if (alg->alg_flags & ALG_FLAG_GCM)
3277 				newbie->ipsa_noncefunc = gcm_params_init;
3278 			else newbie->ipsa_noncefunc = cbc_params_init;
3279 
3280 			newbie->ipsa_saltlen = alg->alg_saltlen;
3281 			newbie->ipsa_saltbits = SADB_8TO1(newbie->ipsa_saltlen);
3282 			newbie->ipsa_iv_len = alg->alg_ivlen;
3283 			newbie->ipsa_nonce_len = newbie->ipsa_saltlen +
3284 			    newbie->ipsa_iv_len;
3285 			newbie->ipsa_emech.cm_param = NULL;
3286 			newbie->ipsa_emech.cm_param_len = 0;
3287 		} else {
3288 			newbie->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3289 		}
3290 		rw_exit(&ipss->ipsec_alg_lock);
3291 
3292 		/*
3293 		 * The byte stream following the sadb_key_t is made up of:
3294 		 * key bytes, [salt bytes], [IV initial value]
3295 		 * All of these have variable length. The IV is typically
3296 		 * randomly generated by this function and not passed in.
3297 		 * By supporting the injection of a known IV, the whole
3298 		 * IPsec subsystem and the underlying crypto subsystem
3299 		 * can be tested with known test vectors.
3300 		 *
3301 		 * The keying material has been checked by ext_check()
3302 		 * and ipsec_valid_key_size(), after removing salt/IV
3303 		 * bits, whats left is the encryption key. If this is too
3304 		 * short, ipsec_create_ctx_tmpl() will fail and the SA
3305 		 * won't get created.
3306 		 *
3307 		 * set ipsa_encrkeylen to length of key only.
3308 		 */
3309 		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3310 		newbie->ipsa_encrkeybits -= ekey->sadb_key_reserved;
3311 		newbie->ipsa_encrkeybits -= newbie->ipsa_saltbits;
3312 		newbie->ipsa_encrkeylen = SADB_1TO8(newbie->ipsa_encrkeybits);
3313 
3314 		/* In case we have to round up to the next byte... */
3315 		if ((ekey->sadb_key_bits & 0x7) != 0)
3316 			newbie->ipsa_encrkeylen++;
3317 
3318 		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3319 		    KM_NOSLEEP);
3320 		if (newbie->ipsa_encrkey == NULL) {
3321 			error = ENOMEM;
3322 			mutex_exit(&newbie->ipsa_lock);
3323 			goto error;
3324 		}
3325 
3326 		buf_ptr = (uint8_t *)(ekey + 1);
3327 		bcopy(buf_ptr, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3328 
3329 		if (newbie->ipsa_flags & IPSA_F_COMBINED) {
3330 			/*
3331 			 * Combined mode algs need a nonce. Copy the salt and
3332 			 * IV into a buffer. The ipsa_nonce is a pointer into
3333 			 * this buffer, some bytes at the start of the buffer
3334 			 * may be unused, depends on the salt length. The IV
3335 			 * is 64 bit aligned so it can be incremented as a
3336 			 * uint64_t. Zero out key in samsg_t before freeing.
3337 			 */
3338 
3339 			newbie->ipsa_nonce_buf = kmem_alloc(
3340 			    sizeof (ipsec_nonce_t), KM_NOSLEEP);
3341 			if (newbie->ipsa_nonce_buf == NULL) {
3342 				error = ENOMEM;
3343 				mutex_exit(&newbie->ipsa_lock);
3344 				goto error;
3345 			}
3346 			/*
3347 			 * Initialize nonce and salt pointers to point
3348 			 * to the nonce buffer. This is just in case we get
3349 			 * bad data, the pointers will be valid, the data
3350 			 * won't be.
3351 			 *
3352 			 * See sadb.h for layout of nonce.
3353 			 */
3354 			newbie->ipsa_iv = &newbie->ipsa_nonce_buf->iv;
3355 			newbie->ipsa_salt = (uint8_t *)newbie->ipsa_nonce_buf;
3356 			newbie->ipsa_nonce = newbie->ipsa_salt;
3357 			if (newbie->ipsa_saltlen != 0) {
3358 				salt_offset = MAXSALTSIZE -
3359 				    newbie->ipsa_saltlen;
3360 				newbie->ipsa_salt = (uint8_t *)
3361 				    &newbie->ipsa_nonce_buf->salt[salt_offset];
3362 				newbie->ipsa_nonce = newbie->ipsa_salt;
3363 				buf_ptr += newbie->ipsa_encrkeylen;
3364 				bcopy(buf_ptr, newbie->ipsa_salt,
3365 				    newbie->ipsa_saltlen);
3366 			}
3367 			/*
3368 			 * The IV for CCM/GCM mode increments, it should not
3369 			 * repeat. Get a random value for the IV, make a
3370 			 * copy, the SA will expire when/if the IV ever
3371 			 * wraps back to the initial value. If an Initial IV
3372 			 * is passed in via PF_KEY, save this in the SA.
3373 			 * Initialising IV for inbound is pointless as its
3374 			 * taken from the inbound packet.
3375 			 */
3376 			if (!is_inbound) {
3377 				if (ekey->sadb_key_reserved != 0) {
3378 					buf_ptr += newbie->ipsa_saltlen;
3379 					bcopy(buf_ptr, (uint8_t *)newbie->
3380 					    ipsa_iv, SADB_1TO8(ekey->
3381 					    sadb_key_reserved));
3382 				} else {
3383 					(void) random_get_pseudo_bytes(
3384 					    (uint8_t *)newbie->ipsa_iv,
3385 					    newbie->ipsa_iv_len);
3386 				}
3387 				newbie->ipsa_iv_softexpire =
3388 				    (*newbie->ipsa_iv) << 9;
3389 				newbie->ipsa_iv_hardexpire = *newbie->ipsa_iv;
3390 			}
3391 		}
3392 		bzero((ekey + 1), SADB_1TO8(ekey->sadb_key_bits));
3393 
3394 		/*
3395 		 * Pre-initialize the kernel crypto framework key
3396 		 * structure.
3397 		 */
3398 		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3399 		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3400 		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3401 
3402 		rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3403 		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3404 		rw_exit(&ipss->ipsec_alg_lock);
3405 		if (error != 0) {
3406 			mutex_exit(&newbie->ipsa_lock);
3407 			/* See above for error explanation. */
3408 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3409 			goto error;
3410 		}
3411 	}
3412 
3413 	if (async)
3414 		newbie->ipsa_flags |= IPSA_F_ASYNC;
3415 
3416 	/*
3417 	 * Ptrs to processing functions.
3418 	 */
3419 	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3420 		ipsecesp_init_funcs(newbie);
3421 	else
3422 		ipsecah_init_funcs(newbie);
3423 	ASSERT(newbie->ipsa_output_func != NULL &&
3424 	    newbie->ipsa_input_func != NULL);
3425 
3426 	/*
3427 	 * Certificate ID stuff.
3428 	 */
3429 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3430 		sadb_ident_t *id =
3431 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3432 
3433 		/*
3434 		 * Can assume strlen() will return okay because ext_check() in
3435 		 * keysock.c prepares the string for us.
3436 		 */
3437 		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3438 		    (char *)(id+1), ns);
3439 		if (newbie->ipsa_src_cid == NULL) {
3440 			error = ENOMEM;
3441 			mutex_exit(&newbie->ipsa_lock);
3442 			goto error;
3443 		}
3444 	}
3445 
3446 	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3447 		sadb_ident_t *id =
3448 		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3449 
3450 		/*
3451 		 * Can assume strlen() will return okay because ext_check() in
3452 		 * keysock.c prepares the string for us.
3453 		 */
3454 		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3455 		    (char *)(id+1), ns);
3456 		if (newbie->ipsa_dst_cid == NULL) {
3457 			error = ENOMEM;
3458 			mutex_exit(&newbie->ipsa_lock);
3459 			goto error;
3460 		}
3461 	}
3462 
3463 	/*
3464 	 * sensitivity label handling code:
3465 	 * Convert sens + bitmap into cred_t, and associate it
3466 	 * with the new SA.
3467 	 */
3468 	if (sens != NULL) {
3469 		uint64_t *bitmap = (uint64_t *)(sens + 1);
3470 
3471 		newbie->ipsa_tsl = sadb_label_from_sens(sens, bitmap);
3472 	}
3473 
3474 	/*
3475 	 * Likewise for outer sensitivity.
3476 	 */
3477 	if (osens != NULL) {
3478 		uint64_t *bitmap = (uint64_t *)(osens + 1);
3479 		ts_label_t *tsl, *effective_tsl;
3480 		uint32_t *peer_addr_ptr;
3481 		zoneid_t zoneid = GLOBAL_ZONEID;
3482 		zone_t *zone;
3483 
3484 		peer_addr_ptr = is_inbound ? src_addr_ptr : dst_addr_ptr;
3485 
3486 		tsl = sadb_label_from_sens(osens, bitmap);
3487 		newbie->ipsa_mac_exempt = CONN_MAC_DEFAULT;
3488 
3489 		if (osens->sadb_x_sens_flags & SADB_X_SENS_IMPLICIT) {
3490 			newbie->ipsa_mac_exempt = CONN_MAC_IMPLICIT;
3491 		}
3492 
3493 		error = tsol_check_dest(tsl, peer_addr_ptr,
3494 		    (af == AF_INET6)?IPV6_VERSION:IPV4_VERSION,
3495 		    newbie->ipsa_mac_exempt, B_TRUE, &effective_tsl);
3496 		if (error != 0) {
3497 			label_rele(tsl);
3498 			mutex_exit(&newbie->ipsa_lock);
3499 			goto error;
3500 		}
3501 
3502 		if (effective_tsl != NULL) {
3503 			label_rele(tsl);
3504 			tsl = effective_tsl;
3505 		}
3506 
3507 		newbie->ipsa_otsl = tsl;
3508 
3509 		zone = zone_find_by_label(tsl);
3510 		if (zone != NULL) {
3511 			zoneid = zone->zone_id;
3512 			zone_rele(zone);
3513 		}
3514 		/*
3515 		 * For exclusive stacks we set the zoneid to zero to operate
3516 		 * as if in the global zone for tsol_compute_label_v4/v6
3517 		 */
3518 		if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
3519 			zoneid = GLOBAL_ZONEID;
3520 
3521 		if (af == AF_INET6) {
3522 			error = tsol_compute_label_v6(tsl, zoneid,
3523 			    (in6_addr_t *)peer_addr_ptr,
3524 			    newbie->ipsa_opt_storage, ipst);
3525 		} else {
3526 			error = tsol_compute_label_v4(tsl, zoneid,
3527 			    *peer_addr_ptr, newbie->ipsa_opt_storage, ipst);
3528 		}
3529 		if (error != 0) {
3530 			mutex_exit(&newbie->ipsa_lock);
3531 			goto error;
3532 		}
3533 	}
3534 
3535 
3536 	if (replayext != NULL) {
3537 		if ((replayext->sadb_x_rc_replay32 == 0) &&
3538 		    (replayext->sadb_x_rc_replay64 != 0)) {
3539 			error = EOPNOTSUPP;
3540 			*diagnostic = SADB_X_DIAGNOSTIC_INVALID_REPLAY;
3541 			mutex_exit(&newbie->ipsa_lock);
3542 			goto error;
3543 		}
3544 		newbie->ipsa_replay = replayext->sadb_x_rc_replay32;
3545 	}
3546 
3547 	/* now that the SA has been updated, set its new state */
3548 	newbie->ipsa_state = assoc->sadb_sa_state;
3549 
3550 	if (clone) {
3551 		newbie->ipsa_haspeer = B_TRUE;
3552 	} else {
3553 		if (!is_inbound) {
3554 			lifetime_fuzz(newbie);
3555 		}
3556 	}
3557 	/*
3558 	 * The less locks I hold when doing an insertion and possible cloning,
3559 	 * the better!
3560 	 */
3561 	mutex_exit(&newbie->ipsa_lock);
3562 
3563 	if (clone) {
3564 		newbie_clone = sadb_cloneassoc(newbie);
3565 
3566 		if (newbie_clone == NULL) {
3567 			error = ENOMEM;
3568 			goto error;
3569 		}
3570 	}
3571 
3572 	/*
3573 	 * Enter the bucket locks.  The order of entry is outbound,
3574 	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3575 	 * based on the destination address type.  If the destination address
3576 	 * type is for a node that isn't mine (or potentially mine), the
3577 	 * "primary" bucket is the outbound one.
3578 	 */
3579 	if (!is_inbound) {
3580 		/* primary == outbound */
3581 		mutex_enter(&primary->isaf_lock);
3582 		mutex_enter(&secondary->isaf_lock);
3583 	} else {
3584 		/* primary == inbound */
3585 		mutex_enter(&secondary->isaf_lock);
3586 		mutex_enter(&primary->isaf_lock);
3587 	}
3588 
3589 	/*
3590 	 * sadb_insertassoc() doesn't increment the reference
3591 	 * count.  We therefore have to increment the
3592 	 * reference count one more time to reflect the
3593 	 * pointers of the table that reference this SA.
3594 	 */
3595 	IPSA_REFHOLD(newbie);
3596 
3597 	if (isupdate) {
3598 		/*
3599 		 * Unlink from larval holding cell in the "inbound" fanout.
3600 		 */
3601 		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3602 		    newbie->ipsa_linklock == &secondary->isaf_lock);
3603 		sadb_unlinkassoc(newbie);
3604 	}
3605 
3606 	mutex_enter(&newbie->ipsa_lock);
3607 	error = sadb_insertassoc(newbie, primary);
3608 	mutex_exit(&newbie->ipsa_lock);
3609 
3610 	if (error != 0) {
3611 		/*
3612 		 * Since sadb_insertassoc() failed, we must decrement the
3613 		 * refcount again so the cleanup code will actually free
3614 		 * the offending SA.
3615 		 */
3616 		IPSA_REFRELE(newbie);
3617 		goto error_unlock;
3618 	}
3619 
3620 	if (newbie_clone != NULL) {
3621 		mutex_enter(&newbie_clone->ipsa_lock);
3622 		error = sadb_insertassoc(newbie_clone, secondary);
3623 		mutex_exit(&newbie_clone->ipsa_lock);
3624 		if (error != 0) {
3625 			/* Collision in secondary table. */
3626 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3627 			goto error_unlock;
3628 		}
3629 		IPSA_REFHOLD(newbie_clone);
3630 	} else {
3631 		ASSERT(primary != secondary);
3632 		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3633 		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3634 		if (scratch != NULL) {
3635 			/* Collision in secondary table. */
3636 			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3637 			/* Set the error, since ipsec_getassocbyspi() can't. */
3638 			error = EEXIST;
3639 			goto error_unlock;
3640 		}
3641 	}
3642 
3643 	/* OKAY!  So let's do some reality check assertions. */
3644 
3645 	ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3646 	ASSERT(newbie_clone == NULL ||
3647 	    (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3648 
3649 error_unlock:
3650 
3651 	/*
3652 	 * We can exit the locks in any order.	Only entrance needs to
3653 	 * follow any protocol.
3654 	 */
3655 	mutex_exit(&secondary->isaf_lock);
3656 	mutex_exit(&primary->isaf_lock);
3657 
3658 	if (pair_ext != NULL && error == 0) {
3659 		/* update pair_spi if it exists. */
3660 		ipsa_query_t sq;
3661 
3662 		sq.spp = spp;		/* XXX param */
3663 		error = sadb_form_query(ksi, IPSA_Q_DST, IPSA_Q_SRC|IPSA_Q_DST|
3664 		    IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND, &sq, diagnostic);
3665 		if (error)
3666 			return (error);
3667 
3668 		error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
3669 
3670 		if (error != 0)
3671 			goto error;
3672 
3673 		if (ipsapp.ipsap_psa_ptr != NULL) {
3674 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3675 			error = EINVAL;
3676 		} else {
3677 			/* update_pairing() sets diagnostic */
3678 			error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
3679 		}
3680 	}
3681 	/* Common error point for this routine. */
3682 error:
3683 	if (newbie != NULL) {
3684 		if (error != 0) {
3685 			/* This SA is broken, let the reaper clean up. */
3686 			mutex_enter(&newbie->ipsa_lock);
3687 			newbie->ipsa_state = IPSA_STATE_DEAD;
3688 			newbie->ipsa_hardexpiretime = 1;
3689 			mutex_exit(&newbie->ipsa_lock);
3690 		}
3691 		IPSA_REFRELE(newbie);
3692 	}
3693 	if (newbie_clone != NULL) {
3694 		IPSA_REFRELE(newbie_clone);
3695 	}
3696 
3697 	if (error == 0) {
3698 		/*
3699 		 * Construct favorable PF_KEY return message and send to
3700 		 * keysock. Update the flags in the original keysock message
3701 		 * to reflect the actual flags in the new SA.
3702 		 *  (Q:  Do I need to pass "newbie"?  If I do,
3703 		 * make sure to REFHOLD, call, then REFRELE.)
3704 		 */
3705 		assoc->sadb_sa_flags = newbie->ipsa_flags;
3706 		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3707 	}
3708 
3709 	destroy_ipsa_pair(&ipsapp);
3710 	return (error);
3711 }
3712 
3713 /*
3714  * Set the time of first use for a security association.  Update any
3715  * expiration times as a result.
3716  */
3717 void
3718 sadb_set_usetime(ipsa_t *assoc)
3719 {
3720 	time_t snapshot = gethrestime_sec();
3721 
3722 	mutex_enter(&assoc->ipsa_lock);
3723 	assoc->ipsa_lastuse = snapshot;
3724 	assoc->ipsa_idleexpiretime = snapshot + assoc->ipsa_idletime;
3725 
3726 	/*
3727 	 * Caller does check usetime before calling me usually, and
3728 	 * double-checking is better than a mutex_enter/exit hit.
3729 	 */
3730 	if (assoc->ipsa_usetime == 0) {
3731 		/*
3732 		 * This is redundant for outbound SA's, as
3733 		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3734 		 * Inbound SAs, however, have no such protection.
3735 		 */
3736 		assoc->ipsa_flags |= IPSA_F_USED;
3737 		assoc->ipsa_usetime = snapshot;
3738 
3739 		/*
3740 		 * After setting the use time, see if we have a use lifetime
3741 		 * that would cause the actual SA expiration time to shorten.
3742 		 */
3743 		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3744 		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3745 	}
3746 	mutex_exit(&assoc->ipsa_lock);
3747 }
3748 
3749 /*
3750  * Send up a PF_KEY expire message for this association.
3751  */
3752 static void
3753 sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3754 {
3755 	mblk_t *mp, *mp1;
3756 	int alloclen, af;
3757 	sadb_msg_t *samsg;
3758 	sadb_lifetime_t *current, *expire;
3759 	sadb_sa_t *saext;
3760 	uint8_t *end;
3761 	boolean_t tunnel_mode;
3762 
3763 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3764 
3765 	/* Don't bother sending if there's no queue. */
3766 	if (pfkey_q == NULL)
3767 		return;
3768 
3769 	mp = sadb_keysock_out(0);
3770 	if (mp == NULL) {
3771 		/* cmn_err(CE_WARN, */
3772 		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3773 		return;
3774 	}
3775 
3776 	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3777 	    2 * sizeof (sadb_address_t) + sizeof (*saext) +
3778 	    sizeof (sadb_x_kmc_t);
3779 
3780 	af = assoc->ipsa_addrfam;
3781 	switch (af) {
3782 	case AF_INET:
3783 		alloclen += 2 * sizeof (struct sockaddr_in);
3784 		break;
3785 	case AF_INET6:
3786 		alloclen += 2 * sizeof (struct sockaddr_in6);
3787 		break;
3788 	default:
3789 		/* Won't happen unless there's a kernel bug. */
3790 		freeb(mp);
3791 		cmn_err(CE_WARN,
3792 		    "sadb_expire_assoc: Unknown address length.\n");
3793 		return;
3794 	}
3795 
3796 	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3797 	if (tunnel_mode) {
3798 		alloclen += 2 * sizeof (sadb_address_t);
3799 		switch (assoc->ipsa_innerfam) {
3800 		case AF_INET:
3801 			alloclen += 2 * sizeof (struct sockaddr_in);
3802 			break;
3803 		case AF_INET6:
3804 			alloclen += 2 * sizeof (struct sockaddr_in6);
3805 			break;
3806 		default:
3807 			/* Won't happen unless there's a kernel bug. */
3808 			freeb(mp);
3809 			cmn_err(CE_WARN, "sadb_expire_assoc: "
3810 			    "Unknown inner address length.\n");
3811 			return;
3812 		}
3813 	}
3814 
3815 	mp->b_cont = allocb(alloclen, BPRI_HI);
3816 	if (mp->b_cont == NULL) {
3817 		freeb(mp);
3818 		/* cmn_err(CE_WARN, */
3819 		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3820 		return;
3821 	}
3822 
3823 	mp1 = mp;
3824 	mp = mp->b_cont;
3825 	end = mp->b_wptr + alloclen;
3826 
3827 	samsg = (sadb_msg_t *)mp->b_wptr;
3828 	mp->b_wptr += sizeof (*samsg);
3829 	samsg->sadb_msg_version = PF_KEY_V2;
3830 	samsg->sadb_msg_type = SADB_EXPIRE;
3831 	samsg->sadb_msg_errno = 0;
3832 	samsg->sadb_msg_satype = assoc->ipsa_type;
3833 	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3834 	samsg->sadb_msg_reserved = 0;
3835 	samsg->sadb_msg_seq = 0;
3836 	samsg->sadb_msg_pid = 0;
3837 
3838 	saext = (sadb_sa_t *)mp->b_wptr;
3839 	mp->b_wptr += sizeof (*saext);
3840 	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3841 	saext->sadb_sa_exttype = SADB_EXT_SA;
3842 	saext->sadb_sa_spi = assoc->ipsa_spi;
3843 	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3844 	saext->sadb_sa_state = assoc->ipsa_state;
3845 	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3846 	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3847 	saext->sadb_sa_flags = assoc->ipsa_flags;
3848 
3849 	current = (sadb_lifetime_t *)mp->b_wptr;
3850 	mp->b_wptr += sizeof (sadb_lifetime_t);
3851 	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3852 	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3853 	/* We do not support the concept. */
3854 	current->sadb_lifetime_allocations = 0;
3855 	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3856 	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3857 	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3858 
3859 	expire = (sadb_lifetime_t *)mp->b_wptr;
3860 	mp->b_wptr += sizeof (*expire);
3861 	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3862 
3863 	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3864 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3865 		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3866 		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3867 		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3868 		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3869 	} else if (assoc->ipsa_state == IPSA_STATE_DYING) {
3870 		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3871 		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3872 		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3873 		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3874 		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3875 	} else {
3876 		ASSERT(assoc->ipsa_state == IPSA_STATE_MATURE);
3877 		expire->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
3878 		expire->sadb_lifetime_allocations = 0;
3879 		expire->sadb_lifetime_bytes = 0;
3880 		expire->sadb_lifetime_addtime = assoc->ipsa_idleaddlt;
3881 		expire->sadb_lifetime_usetime = assoc->ipsa_idleuselt;
3882 	}
3883 
3884 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3885 	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3886 	    SA_PROTO(assoc), 0);
3887 	ASSERT(mp->b_wptr != NULL);
3888 
3889 	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3890 	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3891 	    SA_PROTO(assoc), 0);
3892 	ASSERT(mp->b_wptr != NULL);
3893 
3894 	if (tunnel_mode) {
3895 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3896 		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3897 		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3898 		    assoc->ipsa_innersrcpfx);
3899 		ASSERT(mp->b_wptr != NULL);
3900 		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3901 		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3902 		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3903 		    assoc->ipsa_innerdstpfx);
3904 		ASSERT(mp->b_wptr != NULL);
3905 	}
3906 
3907 	mp->b_wptr = sadb_make_kmc_ext(mp->b_wptr, end, assoc->ipsa_kmp,
3908 	    assoc->ipsa_kmc);
3909 	ASSERT(mp->b_wptr != NULL);
3910 
3911 	/* Can just putnext, we're ready to go! */
3912 	putnext(pfkey_q, mp1);
3913 }
3914 
3915 /*
3916  * "Age" the SA with the number of bytes that was used to protect traffic.
3917  * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
3918  * enough "charge" left in the SA to protect the data.	Return B_FALSE
3919  * otherwise.  (If B_FALSE is returned, the association either was, or became
3920  * DEAD.)
3921  */
3922 boolean_t
3923 sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3924     boolean_t sendmsg)
3925 {
3926 	boolean_t rc = B_TRUE;
3927 	uint64_t newtotal;
3928 
3929 	mutex_enter(&assoc->ipsa_lock);
3930 	newtotal = assoc->ipsa_bytes + bytes;
3931 	if (assoc->ipsa_hardbyteslt != 0 &&
3932 	    newtotal >= assoc->ipsa_hardbyteslt) {
3933 		if (assoc->ipsa_state != IPSA_STATE_DEAD) {
3934 			sadb_delete_cluster(assoc);
3935 			/*
3936 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3937 			 * this off on another non-interrupt thread.  Also
3938 			 * unlink this SA immediately.
3939 			 */
3940 			assoc->ipsa_state = IPSA_STATE_DEAD;
3941 			if (sendmsg)
3942 				sadb_expire_assoc(pfkey_q, assoc);
3943 			/*
3944 			 * Set non-zero expiration time so sadb_age_assoc()
3945 			 * will work when reaping.
3946 			 */
3947 			assoc->ipsa_hardexpiretime = (time_t)1;
3948 		} /* Else someone beat me to it! */
3949 		rc = B_FALSE;
3950 	} else if (assoc->ipsa_softbyteslt != 0 &&
3951 	    (newtotal >= assoc->ipsa_softbyteslt)) {
3952 		if (assoc->ipsa_state < IPSA_STATE_DYING) {
3953 			/*
3954 			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3955 			 * this off on another non-interrupt thread.
3956 			 */
3957 			assoc->ipsa_state = IPSA_STATE_DYING;
3958 			assoc->ipsa_bytes = newtotal;
3959 			if (sendmsg)
3960 				sadb_expire_assoc(pfkey_q, assoc);
3961 		} /* Else someone beat me to it! */
3962 	}
3963 	if (rc == B_TRUE)
3964 		assoc->ipsa_bytes = newtotal;
3965 	mutex_exit(&assoc->ipsa_lock);
3966 	return (rc);
3967 }
3968 
3969 /*
3970  * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
3971  *     sadb_age_assoc().
3972  */
3973 static ipsa_t *
3974 sadb_torch_assoc(isaf_t *head, ipsa_t *sa)
3975 {
3976 	ASSERT(MUTEX_HELD(&head->isaf_lock));
3977 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
3978 	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
3979 
3980 	/*
3981 	 * Force cached SAs to be revalidated..
3982 	 */
3983 	head->isaf_gen++;
3984 
3985 	mutex_exit(&sa->ipsa_lock);
3986 	sadb_unlinkassoc(sa);
3987 
3988 	return (NULL);
3989 }
3990 
3991 /*
3992  * Do various SA-is-idle activities depending on delta (the number of idle
3993  * seconds on the SA) and/or other properties of the SA.
3994  *
3995  * Return B_TRUE if I've sent a packet, because I have to drop the
3996  * association's mutex before sending a packet out the wire.
3997  */
3998 /* ARGSUSED */
3999 static boolean_t
4000 sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
4001 {
4002 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
4003 	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
4004 
4005 	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
4006 
4007 	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
4008 	    delta >= nat_t_interval &&
4009 	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
4010 		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
4011 		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
4012 		mutex_exit(&assoc->ipsa_lock);
4013 		ipsecesp_send_keepalive(assoc);
4014 		return (B_TRUE);
4015 	}
4016 	return (B_FALSE);
4017 }
4018 
4019 /*
4020  * Return "assoc" if haspeer is true and I send an expire.  This allows
4021  * the consumers' aging functions to tidy up an expired SA's peer.
4022  */
4023 static ipsa_t *
4024 sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
4025     time_t current, int reap_delay, boolean_t inbound)
4026 {
4027 	ipsa_t *retval = NULL;
4028 	boolean_t dropped_mutex = B_FALSE;
4029 
4030 	ASSERT(MUTEX_HELD(&head->isaf_lock));
4031 
4032 	mutex_enter(&assoc->ipsa_lock);
4033 
4034 	if (((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
4035 	    ((assoc->ipsa_state == IPSA_STATE_IDLE) ||
4036 	    (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) &&
4037 	    (assoc->ipsa_hardexpiretime != 0))) &&
4038 	    (assoc->ipsa_hardexpiretime <= current)) {
4039 		assoc->ipsa_state = IPSA_STATE_DEAD;
4040 		return (sadb_torch_assoc(head, assoc));
4041 	}
4042 
4043 	/*
4044 	 * Check lifetimes.  Fortunately, SA setup is done
4045 	 * such that there are only two times to look at,
4046 	 * softexpiretime, and hardexpiretime.
4047 	 *
4048 	 * Check hard first.
4049 	 */
4050 
4051 	if (assoc->ipsa_hardexpiretime != 0 &&
4052 	    assoc->ipsa_hardexpiretime <= current) {
4053 		if (assoc->ipsa_state == IPSA_STATE_DEAD)
4054 			return (sadb_torch_assoc(head, assoc));
4055 
4056 		if (inbound) {
4057 			sadb_delete_cluster(assoc);
4058 		}
4059 
4060 		/*
4061 		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
4062 		 */
4063 		assoc->ipsa_state = IPSA_STATE_DEAD;
4064 		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
4065 			/*
4066 			 * If the SA is paired or peered with another, put
4067 			 * a copy on a list which can be processed later, the
4068 			 * pair/peer SA needs to be updated so the both die
4069 			 * at the same time.
4070 			 *
4071 			 * If I return assoc, I have to bump up its reference
4072 			 * count to keep with the ipsa_t reference count
4073 			 * semantics.
4074 			 */
4075 			IPSA_REFHOLD(assoc);
4076 			retval = assoc;
4077 		}
4078 		sadb_expire_assoc(pfkey_q, assoc);
4079 		assoc->ipsa_hardexpiretime = current + reap_delay;
4080 	} else if (assoc->ipsa_softexpiretime != 0 &&
4081 	    assoc->ipsa_softexpiretime <= current &&
4082 	    assoc->ipsa_state < IPSA_STATE_DYING) {
4083 		/*
4084 		 * Send EXPIRE message to PF_KEY.  May wish to pawn
4085 		 * this off on another non-interrupt thread.
4086 		 */
4087 		assoc->ipsa_state = IPSA_STATE_DYING;
4088 		if (assoc->ipsa_haspeer) {
4089 			/*
4090 			 * If the SA has a peer, update the peer's state
4091 			 * on SOFT_EXPIRE, this is mostly to prevent two
4092 			 * expire messages from effectively the same SA.
4093 			 *
4094 			 * Don't care about paired SA's, then can (and should)
4095 			 * be able to soft expire at different times.
4096 			 *
4097 			 * If I return assoc, I have to bump up its
4098 			 * reference count to keep with the ipsa_t reference
4099 			 * count semantics.
4100 			 */
4101 			IPSA_REFHOLD(assoc);
4102 			retval = assoc;
4103 		}
4104 		sadb_expire_assoc(pfkey_q, assoc);
4105 	} else if (assoc->ipsa_idletime != 0 &&
4106 	    assoc->ipsa_idleexpiretime <= current) {
4107 		if (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) {
4108 			assoc->ipsa_state = IPSA_STATE_IDLE;
4109 		}
4110 
4111 		/*
4112 		 * Need to handle Mature case
4113 		 */
4114 		if (assoc->ipsa_state == IPSA_STATE_MATURE) {
4115 			sadb_expire_assoc(pfkey_q, assoc);
4116 		}
4117 	} else {
4118 		/* Check idle time activities. */
4119 		dropped_mutex = sadb_idle_activities(assoc,
4120 		    current - assoc->ipsa_lastuse, inbound);
4121 	}
4122 
4123 	if (!dropped_mutex)
4124 		mutex_exit(&assoc->ipsa_lock);
4125 	return (retval);
4126 }
4127 
4128 /*
4129  * Called by a consumer protocol to do ther dirty work of reaping dead
4130  * Security Associations.
4131  *
4132  * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
4133  * SA's that are already marked DEAD, so expired SA's are only reaped
4134  * the second time sadb_ager() runs.
4135  */
4136 void
4137 sadb_ager(sadb_t *sp, queue_t *pfkey_q, int reap_delay, netstack_t *ns)
4138 {
4139 	int i;
4140 	isaf_t *bucket;
4141 	ipsa_t *assoc, *spare;
4142 	iacqf_t *acqlist;
4143 	ipsacq_t *acqrec, *spareacq;
4144 	templist_t *haspeerlist, *newbie;
4145 	/* Snapshot current time now. */
4146 	time_t current = gethrestime_sec();
4147 	haspeerlist = NULL;
4148 
4149 	/*
4150 	 * Do my dirty work.  This includes aging real entries, aging
4151 	 * larvals, and aging outstanding ACQUIREs.
4152 	 *
4153 	 * I hope I don't tie up resources for too long.
4154 	 */
4155 
4156 	/* Age acquires. */
4157 
4158 	for (i = 0; i < sp->sdb_hashsize; i++) {
4159 		acqlist = &sp->sdb_acq[i];
4160 		mutex_enter(&acqlist->iacqf_lock);
4161 		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
4162 		    acqrec = spareacq) {
4163 			spareacq = acqrec->ipsacq_next;
4164 			if (current > acqrec->ipsacq_expire)
4165 				sadb_destroy_acquire(acqrec, ns);
4166 		}
4167 		mutex_exit(&acqlist->iacqf_lock);
4168 	}
4169 
4170 	/* Age inbound associations. */
4171 	for (i = 0; i < sp->sdb_hashsize; i++) {
4172 		bucket = &(sp->sdb_if[i]);
4173 		mutex_enter(&bucket->isaf_lock);
4174 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4175 		    assoc = spare) {
4176 			spare = assoc->ipsa_next;
4177 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4178 			    reap_delay, B_TRUE) != NULL) {
4179 				/*
4180 				 * Put SA's which have a peer or SA's which
4181 				 * are paired on a list for processing after
4182 				 * all the hash tables have been walked.
4183 				 *
4184 				 * sadb_age_assoc() increments the refcnt,
4185 				 * effectively doing an IPSA_REFHOLD().
4186 				 */
4187 				newbie = kmem_alloc(sizeof (*newbie),
4188 				    KM_NOSLEEP);
4189 				if (newbie == NULL) {
4190 					/*
4191 					 * Don't forget to REFRELE().
4192 					 */
4193 					IPSA_REFRELE(assoc);
4194 					continue;	/* for loop... */
4195 				}
4196 				newbie->next = haspeerlist;
4197 				newbie->ipsa = assoc;
4198 				haspeerlist = newbie;
4199 			}
4200 		}
4201 		mutex_exit(&bucket->isaf_lock);
4202 	}
4203 
4204 	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4205 	haspeerlist = NULL;
4206 
4207 	/* Age outbound associations. */
4208 	for (i = 0; i < sp->sdb_hashsize; i++) {
4209 		bucket = &(sp->sdb_of[i]);
4210 		mutex_enter(&bucket->isaf_lock);
4211 		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4212 		    assoc = spare) {
4213 			spare = assoc->ipsa_next;
4214 			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4215 			    reap_delay, B_FALSE) != NULL) {
4216 				/*
4217 				 * sadb_age_assoc() increments the refcnt,
4218 				 * effectively doing an IPSA_REFHOLD().
4219 				 */
4220 				newbie = kmem_alloc(sizeof (*newbie),
4221 				    KM_NOSLEEP);
4222 				if (newbie == NULL) {
4223 					/*
4224 					 * Don't forget to REFRELE().
4225 					 */
4226 					IPSA_REFRELE(assoc);
4227 					continue;	/* for loop... */
4228 				}
4229 				newbie->next = haspeerlist;
4230 				newbie->ipsa = assoc;
4231 				haspeerlist = newbie;
4232 			}
4233 		}
4234 		mutex_exit(&bucket->isaf_lock);
4235 	}
4236 
4237 	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4238 
4239 	/*
4240 	 * Run a GC pass to clean out dead identities.
4241 	 */
4242 	ipsid_gc(ns);
4243 }
4244 
4245 /*
4246  * Figure out when to reschedule the ager.
4247  */
4248 timeout_id_t
4249 sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4250     void *agerarg, uint_t *intp, uint_t intmax, short mid)
4251 {
4252 	hrtime_t end = gethrtime();
4253 	uint_t interval = *intp;	/* "interval" is in ms. */
4254 
4255 	/*
4256 	 * See how long this took.  If it took too long, increase the
4257 	 * aging interval.
4258 	 */
4259 	if ((end - begin) > MSEC2NSEC(interval)) {
4260 		if (interval >= intmax) {
4261 			/* XXX Rate limit this?  Or recommend flush? */
4262 			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4263 			    "Too many SA's to age out in %d msec.\n",
4264 			    intmax);
4265 		} else {
4266 			/* Double by shifting by one bit. */
4267 			interval <<= 1;
4268 			interval = min(interval, intmax);
4269 		}
4270 	} else if ((end - begin) <= (MSEC2NSEC(interval) / 2) &&
4271 	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4272 		/*
4273 		 * If I took less than half of the interval, then I should
4274 		 * ratchet the interval back down.  Never automatically
4275 		 * shift below the default aging interval.
4276 		 *
4277 		 * NOTE:This even overrides manual setting of the age
4278 		 *	interval using NDD to lower the setting past the
4279 		 *	default.  In other words, if you set the interval
4280 		 *	lower than the default, and your SADB gets too big,
4281 		 *	the interval will only self-lower back to the default.
4282 		 */
4283 		/* Halve by shifting one bit. */
4284 		interval >>= 1;
4285 		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4286 	}
4287 	*intp = interval;
4288 	return (qtimeout(pfkey_q, ager, agerarg,
4289 	    drv_usectohz(interval * (MICROSEC / MILLISEC))));
4290 }
4291 
4292 
4293 /*
4294  * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4295  * message takes when updating a MATURE or DYING SA.
4296  */
4297 static void
4298 sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4299     sadb_lifetime_t *soft, sadb_lifetime_t *idle, boolean_t outbound)
4300 {
4301 	mutex_enter(&assoc->ipsa_lock);
4302 
4303 	/*
4304 	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4305 	 * passed in during an update message.	We currently don't handle
4306 	 * these.
4307 	 */
4308 
4309 	if (hard != NULL) {
4310 		if (hard->sadb_lifetime_bytes != 0)
4311 			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4312 		if (hard->sadb_lifetime_usetime != 0)
4313 			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4314 		if (hard->sadb_lifetime_addtime != 0)
4315 			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4316 		if (assoc->ipsa_hardaddlt != 0) {
4317 			assoc->ipsa_hardexpiretime =
4318 			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4319 		}
4320 		if (assoc->ipsa_harduselt != 0 &&
4321 		    assoc->ipsa_flags & IPSA_F_USED) {
4322 			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4323 		}
4324 		if (hard->sadb_lifetime_allocations != 0)
4325 			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4326 	}
4327 
4328 	if (soft != NULL) {
4329 		if (soft->sadb_lifetime_bytes != 0) {
4330 			if (soft->sadb_lifetime_bytes >
4331 			    assoc->ipsa_hardbyteslt) {
4332 				assoc->ipsa_softbyteslt =
4333 				    assoc->ipsa_hardbyteslt;
4334 			} else {
4335 				assoc->ipsa_softbyteslt =
4336 				    soft->sadb_lifetime_bytes;
4337 			}
4338 		}
4339 		if (soft->sadb_lifetime_usetime != 0) {
4340 			if (soft->sadb_lifetime_usetime >
4341 			    assoc->ipsa_harduselt) {
4342 				assoc->ipsa_softuselt =
4343 				    assoc->ipsa_harduselt;
4344 			} else {
4345 				assoc->ipsa_softuselt =
4346 				    soft->sadb_lifetime_usetime;
4347 			}
4348 		}
4349 		if (soft->sadb_lifetime_addtime != 0) {
4350 			if (soft->sadb_lifetime_addtime >
4351 			    assoc->ipsa_hardexpiretime) {
4352 				assoc->ipsa_softexpiretime =
4353 				    assoc->ipsa_hardexpiretime;
4354 			} else {
4355 				assoc->ipsa_softaddlt =
4356 				    soft->sadb_lifetime_addtime;
4357 			}
4358 		}
4359 		if (assoc->ipsa_softaddlt != 0) {
4360 			assoc->ipsa_softexpiretime =
4361 			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4362 		}
4363 		if (assoc->ipsa_softuselt != 0 &&
4364 		    assoc->ipsa_flags & IPSA_F_USED) {
4365 			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4366 		}
4367 		if (outbound && assoc->ipsa_softexpiretime != 0) {
4368 			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4369 				lifetime_fuzz(assoc);
4370 		}
4371 
4372 		if (soft->sadb_lifetime_allocations != 0)
4373 			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4374 	}
4375 
4376 	if (idle != NULL) {
4377 		time_t current = gethrestime_sec();
4378 		if ((assoc->ipsa_idleexpiretime <= current) &&
4379 		    (assoc->ipsa_idleaddlt == idle->sadb_lifetime_addtime)) {
4380 			assoc->ipsa_idleexpiretime =
4381 			    current + assoc->ipsa_idleaddlt;
4382 		}
4383 		if (idle->sadb_lifetime_addtime != 0)
4384 			assoc->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
4385 		if (idle->sadb_lifetime_usetime != 0)
4386 			assoc->ipsa_idleuselt = idle->sadb_lifetime_usetime;
4387 		if (assoc->ipsa_idleaddlt != 0) {
4388 			assoc->ipsa_idleexpiretime =
4389 			    current + idle->sadb_lifetime_addtime;
4390 			assoc->ipsa_idletime = idle->sadb_lifetime_addtime;
4391 		}
4392 		if (assoc->ipsa_idleuselt != 0) {
4393 			if (assoc->ipsa_idletime != 0) {
4394 				assoc->ipsa_idletime = min(assoc->ipsa_idletime,
4395 				    assoc->ipsa_idleuselt);
4396 				assoc->ipsa_idleexpiretime =
4397 				    current + assoc->ipsa_idletime;
4398 			} else {
4399 				assoc->ipsa_idleexpiretime =
4400 				    current + assoc->ipsa_idleuselt;
4401 				assoc->ipsa_idletime = assoc->ipsa_idleuselt;
4402 			}
4403 		}
4404 	}
4405 	mutex_exit(&assoc->ipsa_lock);
4406 }
4407 
4408 static int
4409 sadb_update_state(ipsa_t *assoc, uint_t new_state, mblk_t **ipkt_lst)
4410 {
4411 	int rcode = 0;
4412 	time_t current = gethrestime_sec();
4413 
4414 	mutex_enter(&assoc->ipsa_lock);
4415 
4416 	switch (new_state) {
4417 	case SADB_X_SASTATE_ACTIVE_ELSEWHERE:
4418 		if (assoc->ipsa_state == SADB_X_SASTATE_IDLE) {
4419 			assoc->ipsa_state = IPSA_STATE_ACTIVE_ELSEWHERE;
4420 			assoc->ipsa_idleexpiretime =
4421 			    current + assoc->ipsa_idletime;
4422 		}
4423 		break;
4424 	case SADB_X_SASTATE_IDLE:
4425 		if (assoc->ipsa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4426 			assoc->ipsa_state = IPSA_STATE_IDLE;
4427 			assoc->ipsa_idleexpiretime =
4428 			    current + assoc->ipsa_idletime;
4429 		} else {
4430 			rcode = EINVAL;
4431 		}
4432 		break;
4433 
4434 	case SADB_X_SASTATE_ACTIVE:
4435 		if (assoc->ipsa_state != SADB_X_SASTATE_IDLE) {
4436 			rcode = EINVAL;
4437 			break;
4438 		}
4439 		assoc->ipsa_state = IPSA_STATE_MATURE;
4440 		assoc->ipsa_idleexpiretime = current + assoc->ipsa_idletime;
4441 
4442 		if (ipkt_lst == NULL) {
4443 			break;
4444 		}
4445 
4446 		if (assoc->ipsa_bpkt_head != NULL) {
4447 			*ipkt_lst = assoc->ipsa_bpkt_head;
4448 			assoc->ipsa_bpkt_head = assoc->ipsa_bpkt_tail = NULL;
4449 			assoc->ipsa_mblkcnt = 0;
4450 		} else {
4451 			*ipkt_lst = NULL;
4452 		}
4453 		break;
4454 	default:
4455 		rcode = EINVAL;
4456 		break;
4457 	}
4458 
4459 	mutex_exit(&assoc->ipsa_lock);
4460 	return (rcode);
4461 }
4462 
4463 /*
4464  * Check a proposed KMC update for sanity.
4465  */
4466 static int
4467 sadb_check_kmc(ipsa_query_t *sq, ipsa_t *sa, int *diagnostic)
4468 {
4469 	uint32_t kmp = sq->kmp;
4470 	uint64_t kmc = sq->kmc;
4471 
4472 	if (sa == NULL)
4473 		return (0);
4474 
4475 	if (sa->ipsa_state == IPSA_STATE_DEAD)
4476 		return (ESRCH);	/* DEAD == Not there, in this case. */
4477 
4478 	if ((kmp != 0) && (sa->ipsa_kmp != 0) && (sa->ipsa_kmp != kmp)) {
4479 		*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4480 		return (EINVAL);
4481 	}
4482 
4483 	/* Allow IKEv2 KMCs to update the kmc value for rekeying */
4484 	if ((kmp != SADB_X_KMP_IKEV2) && (kmc != 0) && (sa->ipsa_kmc != 0) &&
4485 	    (sa->ipsa_kmc != kmc)) {
4486 		*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4487 		return (EINVAL);
4488 	}
4489 
4490 	return (0);
4491 }
4492 
4493 /*
4494  * Actually update the KMC info.
4495  */
4496 static void
4497 sadb_update_kmc(ipsa_query_t *sq, ipsa_t *sa)
4498 {
4499 	uint32_t kmp = sq->kmp;
4500 	uint64_t kmc = sq->kmc;
4501 
4502 	if (kmp != 0)
4503 		sa->ipsa_kmp = kmp;
4504 	if (kmc != 0)
4505 		sa->ipsa_kmc = kmc;
4506 }
4507 
4508 /*
4509  * Common code to update an SA.
4510  */
4511 
4512 int
4513 sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, mblk_t **ipkt_lst,
4514     sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4515     int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4516     netstack_t *ns, uint8_t sadb_msg_type)
4517 {
4518 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4519 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4520 	sadb_x_replay_ctr_t *replext =
4521 	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
4522 	sadb_lifetime_t *soft =
4523 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4524 	sadb_lifetime_t *hard =
4525 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4526 	sadb_lifetime_t *idle =
4527 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
4528 	sadb_x_pair_t *pair_ext =
4529 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4530 	ipsa_t *echo_target = NULL;
4531 	ipsap_t ipsapp;
4532 	ipsa_query_t sq;
4533 	time_t current = gethrestime_sec();
4534 
4535 	sq.spp = spp;		/* XXX param */
4536 	int error = sadb_form_query(ksi, IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA,
4537 	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND|
4538 	    IPSA_Q_KMC,
4539 	    &sq, diagnostic);
4540 
4541 	if (error != 0)
4542 		return (error);
4543 
4544 	error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
4545 	if (error != 0)
4546 		return (error);
4547 
4548 	if (ipsapp.ipsap_psa_ptr == NULL && ipsapp.ipsap_sa_ptr != NULL) {
4549 		if (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4550 			/*
4551 			 * REFRELE the target and let the add_sa_func()
4552 			 * deal with updating a larval SA.
4553 			 */
4554 			destroy_ipsa_pair(&ipsapp);
4555 			return (add_sa_func(mp, ksi, diagnostic, ns));
4556 		}
4557 	}
4558 
4559 	/*
4560 	 * At this point we have an UPDATE to a MATURE SA. There should
4561 	 * not be any keying material present.
4562 	 */
4563 	if (akey != NULL) {
4564 		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4565 		error = EINVAL;
4566 		goto bail;
4567 	}
4568 	if (ekey != NULL) {
4569 		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4570 		error = EINVAL;
4571 		goto bail;
4572 	}
4573 
4574 	if (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4575 		if (ipsapp.ipsap_sa_ptr != NULL &&
4576 		    ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4577 			if ((error = sadb_update_state(ipsapp.ipsap_sa_ptr,
4578 			    sq.assoc->sadb_sa_state, NULL)) != 0) {
4579 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4580 				goto bail;
4581 			}
4582 		}
4583 		if (ipsapp.ipsap_psa_ptr != NULL &&
4584 		    ipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4585 			if ((error = sadb_update_state(ipsapp.ipsap_psa_ptr,
4586 			    sq.assoc->sadb_sa_state, NULL)) != 0) {
4587 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4588 				goto bail;
4589 			}
4590 		}
4591 	}
4592 	if (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE) {
4593 		if (ipsapp.ipsap_sa_ptr != NULL) {
4594 			error = sadb_update_state(ipsapp.ipsap_sa_ptr,
4595 			    sq.assoc->sadb_sa_state,
4596 			    (ipsapp.ipsap_sa_ptr->ipsa_flags &
4597 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4598 			if (error) {
4599 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4600 				goto bail;
4601 			}
4602 		}
4603 		if (ipsapp.ipsap_psa_ptr != NULL) {
4604 			error = sadb_update_state(ipsapp.ipsap_psa_ptr,
4605 			    sq.assoc->sadb_sa_state,
4606 			    (ipsapp.ipsap_psa_ptr->ipsa_flags &
4607 			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4608 			if (error) {
4609 				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4610 				goto bail;
4611 			}
4612 		}
4613 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4614 		    ksi, echo_target);
4615 		goto bail;
4616 	}
4617 
4618 	/*
4619 	 * Reality checks for updates of active associations.
4620 	 * Sundry first-pass UPDATE-specific reality checks.
4621 	 * Have to do the checks here, because it's after the add_sa code.
4622 	 * XXX STATS : logging/stats here?
4623 	 */
4624 
4625 	if (!((sq.assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
4626 	    (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE))) {
4627 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4628 		error = EINVAL;
4629 		goto bail;
4630 	}
4631 	if (sq.assoc->sadb_sa_flags & ~spp->s_updateflags) {
4632 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4633 		error = EINVAL;
4634 		goto bail;
4635 	}
4636 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4637 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_LIFETIME;
4638 		error = EOPNOTSUPP;
4639 		goto bail;
4640 	}
4641 
4642 	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
4643 		error = EINVAL;
4644 		goto bail;
4645 	}
4646 
4647 	if ((*diagnostic = sadb_labelchk(ksi)) != 0)
4648 		return (EINVAL);
4649 
4650 	error = sadb_check_kmc(&sq, ipsapp.ipsap_sa_ptr, diagnostic);
4651 	if (error != 0)
4652 		goto bail;
4653 
4654 	error = sadb_check_kmc(&sq, ipsapp.ipsap_psa_ptr, diagnostic);
4655 	if (error != 0)
4656 		goto bail;
4657 
4658 
4659 	if (ipsapp.ipsap_sa_ptr != NULL) {
4660 		/*
4661 		 * Do not allow replay value change for MATURE or LARVAL SA.
4662 		 */
4663 
4664 		if ((replext != NULL) &&
4665 		    ((ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) ||
4666 		    (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_MATURE))) {
4667 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4668 			error = EINVAL;
4669 			goto bail;
4670 		}
4671 	}
4672 
4673 
4674 	if (ipsapp.ipsap_sa_ptr != NULL) {
4675 		sadb_update_lifetimes(ipsapp.ipsap_sa_ptr, hard, soft,
4676 		    idle, B_TRUE);
4677 		sadb_update_kmc(&sq, ipsapp.ipsap_sa_ptr);
4678 		if ((replext != NULL) &&
4679 		    (ipsapp.ipsap_sa_ptr->ipsa_replay_wsize != 0)) {
4680 			/*
4681 			 * If an inbound SA, update the replay counter
4682 			 * and check off all the other sequence number
4683 			 */
4684 			if (ksi->ks_in_dsttype == KS_IN_ADDR_ME) {
4685 				if (!sadb_replay_check(ipsapp.ipsap_sa_ptr,
4686 				    replext->sadb_x_rc_replay32)) {
4687 					*diagnostic =
4688 					    SADB_X_DIAGNOSTIC_INVALID_REPLAY;
4689 					error = EINVAL;
4690 					goto bail;
4691 				}
4692 				mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4693 				ipsapp.ipsap_sa_ptr->ipsa_idleexpiretime =
4694 				    current +
4695 				    ipsapp.ipsap_sa_ptr->ipsa_idletime;
4696 				mutex_exit(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4697 			} else {
4698 				mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4699 				ipsapp.ipsap_sa_ptr->ipsa_replay =
4700 				    replext->sadb_x_rc_replay32;
4701 				ipsapp.ipsap_sa_ptr->ipsa_idleexpiretime =
4702 				    current +
4703 				    ipsapp.ipsap_sa_ptr->ipsa_idletime;
4704 				mutex_exit(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4705 			}
4706 		}
4707 	}
4708 
4709 	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4710 		if (ipsapp.ipsap_psa_ptr != NULL) {
4711 			sadb_update_lifetimes(ipsapp.ipsap_psa_ptr, hard, soft,
4712 			    idle, B_FALSE);
4713 			sadb_update_kmc(&sq, ipsapp.ipsap_psa_ptr);
4714 		} else {
4715 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4716 			error = ESRCH;
4717 			goto bail;
4718 		}
4719 	}
4720 
4721 	if (pair_ext != NULL)
4722 		error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
4723 
4724 	if (error == 0)
4725 		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4726 		    ksi, echo_target);
4727 bail:
4728 
4729 	destroy_ipsa_pair(&ipsapp);
4730 
4731 	return (error);
4732 }
4733 
4734 
4735 static int
4736 update_pairing(ipsap_t *ipsapp, ipsa_query_t *sq, keysock_in_t *ksi,
4737     int *diagnostic)
4738 {
4739 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4740 	sadb_x_pair_t *pair_ext =
4741 	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4742 	int error = 0;
4743 	ipsap_t oipsapp;
4744 	boolean_t undo_pair = B_FALSE;
4745 	uint32_t ipsa_flags;
4746 
4747 	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4748 	    assoc->sadb_sa_spi) {
4749 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4750 		return (EINVAL);
4751 	}
4752 
4753 	/*
4754 	 * Assume for now that the spi value provided in the SADB_UPDATE
4755 	 * message was valid, update the SA with its pair spi value.
4756 	 * If the spi turns out to be bogus or the SA no longer exists
4757 	 * then this will be detected when the reverse update is made
4758 	 * below.
4759 	 */
4760 	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4761 	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4762 	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4763 	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4764 
4765 	/*
4766 	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4767 	 * should now return pointers to the SA *AND* its pair, if this is not
4768 	 * the case, the "otherspi" either did not exist or was deleted. Also
4769 	 * check that "otherspi" is not already paired. If everything looks
4770 	 * good, complete the update. IPSA_REFRELE the first pair_pointer
4771 	 * after this update to ensure its not deleted until we are done.
4772 	 */
4773 	error = get_ipsa_pair(sq, &oipsapp, diagnostic);
4774 	if (error != 0) {
4775 		/*
4776 		 * This should never happen, calling function still has
4777 		 * IPSA_REFHELD on the SA we just updated.
4778 		 */
4779 		return (error);	/* XXX EINVAL instead of ESRCH? */
4780 	}
4781 
4782 	if (oipsapp.ipsap_psa_ptr == NULL) {
4783 		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4784 		error = EINVAL;
4785 		undo_pair = B_TRUE;
4786 	} else {
4787 		ipsa_flags = oipsapp.ipsap_psa_ptr->ipsa_flags;
4788 		if ((oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) ||
4789 		    (oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DYING)) {
4790 			/* Its dead Jim! */
4791 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4792 			undo_pair = B_TRUE;
4793 		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4794 		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4795 			/* This SA is in both hashtables. */
4796 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4797 			undo_pair = B_TRUE;
4798 		} else if (ipsa_flags & IPSA_F_PAIRED) {
4799 			/* This SA is already paired with another. */
4800 			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4801 			undo_pair = B_TRUE;
4802 		}
4803 	}
4804 
4805 	if (undo_pair) {
4806 		/* The pair SA does not exist. */
4807 		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4808 		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4809 		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4810 		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4811 	} else {
4812 		mutex_enter(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4813 		oipsapp.ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4814 		oipsapp.ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4815 		mutex_exit(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4816 	}
4817 
4818 	destroy_ipsa_pair(&oipsapp);
4819 	return (error);
4820 }
4821 
4822 /*
4823  * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4824  * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4825  * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4826  * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4827  * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4828  * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4829  * other direction's SA.
4830  */
4831 
4832 /*
4833  * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4834  * grab it, lock it, and return it.  Otherwise return NULL.
4835  *
4836  * XXX MLS number of arguments getting unwieldy here
4837  */
4838 static ipsacq_t *
4839 sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4840     uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4841     uint64_t unique_id, ts_label_t *tsl)
4842 {
4843 	ipsacq_t *walker;
4844 	sa_family_t fam;
4845 	uint32_t blank_address[4] = {0, 0, 0, 0};
4846 
4847 	if (isrc == NULL) {
4848 		ASSERT(idst == NULL);
4849 		isrc = idst = blank_address;
4850 	}
4851 
4852 	/*
4853 	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4854 	 *
4855 	 * XXX May need search for duplicates based on other things too!
4856 	 */
4857 	for (walker = bucket->iacqf_ipsacq; walker != NULL;
4858 	    walker = walker->ipsacq_next) {
4859 		mutex_enter(&walker->ipsacq_lock);
4860 		fam = walker->ipsacq_addrfam;
4861 		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4862 		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4863 		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4864 		    (in6_addr_t *)walker->ipsacq_innersrc) &&
4865 		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4866 		    (in6_addr_t *)walker->ipsacq_innerdst) &&
4867 		    (ap == walker->ipsacq_act) &&
4868 		    (pp == walker->ipsacq_policy) &&
4869 		    /* XXX do deep compares of ap/pp? */
4870 		    (unique_id == walker->ipsacq_unique_id) &&
4871 		    (ipsec_label_match(tsl, walker->ipsacq_tsl)))
4872 			break;			/* everything matched */
4873 		mutex_exit(&walker->ipsacq_lock);
4874 	}
4875 
4876 	return (walker);
4877 }
4878 
4879 /*
4880  * Generate an SADB_ACQUIRE base message mblk, including KEYSOCK_OUT metadata.
4881  * In other words, this will return, upon success, a two-mblk chain.
4882  */
4883 static inline mblk_t *
4884 sadb_acquire_msg_base(minor_t serial, uint8_t satype, uint32_t seq, pid_t pid)
4885 {
4886 	mblk_t *mp;
4887 	sadb_msg_t *samsg;
4888 
4889 	mp = sadb_keysock_out(serial);
4890 	if (mp == NULL)
4891 		return (NULL);
4892 	mp->b_cont = allocb(sizeof (sadb_msg_t), BPRI_HI);
4893 	if (mp->b_cont == NULL) {
4894 		freeb(mp);
4895 		return (NULL);
4896 	}
4897 
4898 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
4899 	mp->b_cont->b_wptr += sizeof (*samsg);
4900 	samsg->sadb_msg_version = PF_KEY_V2;
4901 	samsg->sadb_msg_type = SADB_ACQUIRE;
4902 	samsg->sadb_msg_errno = 0;
4903 	samsg->sadb_msg_reserved = 0;
4904 	samsg->sadb_msg_satype = satype;
4905 	samsg->sadb_msg_seq = seq;
4906 	samsg->sadb_msg_pid = pid;
4907 
4908 	return (mp);
4909 }
4910 
4911 /*
4912  * Generate address and TX/MLS sensitivity label PF_KEY extensions that are
4913  * common to both regular and extended ACQUIREs.
4914  */
4915 static mblk_t *
4916 sadb_acquire_msg_common(ipsec_selector_t *sel, ipsec_policy_t *pp,
4917     ipsec_action_t *ap, boolean_t tunnel_mode, ts_label_t *tsl,
4918     sadb_sens_t *sens)
4919 {
4920 	size_t len;
4921 	mblk_t *mp;
4922 	uint8_t *start, *cur, *end;
4923 	uint32_t *saddrptr, *daddrptr;
4924 	sa_family_t af;
4925 	ipsec_action_t *oldap;
4926 	ipsec_selkey_t *ipsl;
4927 	uint8_t proto, pfxlen;
4928 	uint16_t lport, rport;
4929 	int senslen = 0;
4930 
4931 	/*
4932 	 * Get action pointer set if it isn't already.
4933 	 */
4934 	oldap = ap;
4935 	if (pp != NULL) {
4936 		ap = pp->ipsp_act;
4937 		if (ap == NULL)
4938 			ap = oldap;
4939 	}
4940 
4941 	/*
4942 	 * Biggest-case scenario:
4943 	 * 4x (sadb_address_t + struct sockaddr_in6)
4944 	 *	(src, dst, isrc, idst)
4945 	 *	(COMING SOON, 6x, because of triggering-packet contents.)
4946 	 * sadb_x_kmc_t
4947 	 * sadb_sens_t
4948 	 * And wiggle room for label bitvectors.  Luckily there are
4949 	 * programmatic ways to find it.
4950 	 */
4951 	len = 4 * (sizeof (sadb_address_t) + sizeof (struct sockaddr_in6));
4952 
4953 	/* Figure out full and proper length of sensitivity labels. */
4954 	if (sens != NULL) {
4955 		ASSERT(tsl == NULL);
4956 		senslen = SADB_64TO8(sens->sadb_sens_len);
4957 	} else if (tsl != NULL) {
4958 		senslen = sadb_sens_len_from_label(tsl);
4959 	}
4960 #ifdef DEBUG
4961 	else {
4962 		ASSERT(senslen == 0);
4963 	}
4964 #endif /* DEBUG */
4965 	len += senslen;
4966 
4967 	mp = allocb(len, BPRI_HI);
4968 	if (mp == NULL)
4969 		return (NULL);
4970 
4971 	start = mp->b_rptr;
4972 	end = start + len;
4973 	cur = start;
4974 
4975 	/*
4976 	 * Address extensions first, from most-recently-defined to least.
4977 	 * (This should immediately trigger surprise or verify robustness on
4978 	 * older apps, like in.iked.)
4979 	 */
4980 	if (tunnel_mode) {
4981 		/*
4982 		 * Form inner address extensions based NOT on the inner
4983 		 * selectors (i.e. the packet data), but on the policy's
4984 		 * selector key (i.e. the policy's selector information).
4985 		 *
4986 		 * NOTE:  The position of IPv4 and IPv6 addresses is the
4987 		 * same in ipsec_selkey_t (unless the compiler does very
4988 		 * strange things with unions, consult your local C language
4989 		 * lawyer for details).
4990 		 */
4991 		ASSERT(pp != NULL);
4992 
4993 		ipsl = &(pp->ipsp_sel->ipsl_key);
4994 		if (ipsl->ipsl_valid & IPSL_IPV4) {
4995 			af = AF_INET;
4996 			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
4997 			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
4998 		} else {
4999 			af = AF_INET6;
5000 			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
5001 			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
5002 		}
5003 
5004 		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
5005 			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
5006 			pfxlen = ipsl->ipsl_local_pfxlen;
5007 		} else {
5008 			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5009 			pfxlen = 0;
5010 		}
5011 		/* XXX What about ICMP type/code? */
5012 		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5013 		    ipsl->ipsl_lport : 0;
5014 		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5015 		    ipsl->ipsl_proto : 0;
5016 
5017 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5018 		    af, saddrptr, lport, proto, pfxlen);
5019 		if (cur == NULL) {
5020 			freeb(mp);
5021 			return (NULL);
5022 		}
5023 
5024 		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5025 			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5026 			pfxlen = ipsl->ipsl_remote_pfxlen;
5027 		} else {
5028 			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5029 			pfxlen = 0;
5030 		}
5031 		/* XXX What about ICMP type/code? */
5032 		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5033 		    ipsl->ipsl_rport : 0;
5034 
5035 		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5036 		    af, daddrptr, rport, proto, pfxlen);
5037 		if (cur == NULL) {
5038 			freeb(mp);
5039 			return (NULL);
5040 		}
5041 		/*
5042 		 * TODO  - if we go to 3884's dream of transport mode IP-in-IP
5043 		 * _with_ inner-packet address selectors, we'll need to further
5044 		 * distinguish tunnel mode here.  For now, having inner
5045 		 * addresses and/or ports is sufficient.
5046 		 *
5047 		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5048 		 * outer addresses.
5049 		 */
5050 		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5051 		lport = rport = 0;
5052 	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5053 		/*
5054 		 * For cases when the policy calls out specific ports (or not).
5055 		 */
5056 		proto = 0;
5057 		lport = 0;
5058 		rport = 0;
5059 		if (pp != NULL) {
5060 			ipsl = &(pp->ipsp_sel->ipsl_key);
5061 			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5062 				proto = ipsl->ipsl_proto;
5063 			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5064 				rport = ipsl->ipsl_rport;
5065 			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5066 				lport = ipsl->ipsl_lport;
5067 		}
5068 	} else {
5069 		/*
5070 		 * For require-unique-SA policies.
5071 		 */
5072 		proto = sel->ips_protocol;
5073 		lport = sel->ips_local_port;
5074 		rport = sel->ips_remote_port;
5075 	}
5076 
5077 	/*
5078 	 * Regular addresses.  These are outer-packet ones for tunnel mode.
5079 	 * Or for transport mode, the regulard address & port information.
5080 	 */
5081 	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5082 
5083 	/*
5084 	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5085 	 * ipsec_selector_t.
5086 	 */
5087 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5088 	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5089 	if (cur == NULL) {
5090 		freeb(mp);
5091 		return (NULL);
5092 	}
5093 
5094 	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5095 	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5096 	if (cur == NULL) {
5097 		freeb(mp);
5098 		return (NULL);
5099 	}
5100 
5101 	/*
5102 	 * If present, generate a sensitivity label.
5103 	 */
5104 	if (cur + senslen > end) {
5105 		freeb(mp);
5106 		return (NULL);
5107 	}
5108 	if (sens != NULL) {
5109 		/* Explicit sadb_sens_t, usually from inverse-ACQUIRE. */
5110 		bcopy(sens, cur, senslen);
5111 	} else if (tsl != NULL) {
5112 		/* Generate sadb_sens_t from ACQUIRE source. */
5113 		sadb_sens_from_label((sadb_sens_t *)cur, SADB_EXT_SENSITIVITY,
5114 		    tsl, senslen);
5115 	}
5116 #ifdef DEBUG
5117 	else {
5118 		ASSERT(senslen == 0);
5119 	}
5120 #endif /* DEBUG */
5121 	cur += senslen;
5122 	mp->b_wptr = cur;
5123 
5124 	return (mp);
5125 }
5126 
5127 /*
5128  * Generate a regular ACQUIRE's proposal extension and KMC information..
5129  */
5130 static mblk_t *
5131 sadb_acquire_prop(ipsec_action_t *ap, netstack_t *ns, boolean_t do_esp)
5132 {
5133 	ipsec_stack_t *ipss = ns->netstack_ipsec;
5134 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
5135 	ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
5136 	mblk_t *mp = NULL;
5137 	sadb_prop_t *prop;
5138 	sadb_comb_t *comb;
5139 	ipsec_action_t *walker;
5140 	int ncombs, allocsize, ealgid, aalgid, aminbits, amaxbits, eminbits,
5141 	    emaxbits, esaltlen, replay;
5142 	uint64_t softbytes, hardbytes, softaddtime, hardaddtime, softusetime,
5143 	    hardusetime;
5144 	uint64_t kmc = 0;
5145 	uint32_t kmp = 0;
5146 
5147 	/*
5148 	 * Since it's an rwlock read, AND writing to the IPsec algorithms is
5149 	 * rare, just acquire it once up top, and drop it upon return.
5150 	 */
5151 	rw_enter(&ipss->ipsec_alg_lock, RW_READER);
5152 	if (do_esp) {
5153 		uint64_t num_aalgs, num_ealgs;
5154 
5155 		if (espstack->esp_kstats == NULL)
5156 			goto bail;
5157 
5158 		num_aalgs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
5159 		num_ealgs = ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
5160 		if (num_ealgs == 0)
5161 			goto bail;	/* IPsec not loaded yet, apparently. */
5162 		num_aalgs++;	/* No-auth or self-auth-crypto ESP. */
5163 
5164 		/* Use netstack's maximum loaded algorithms... */
5165 		ncombs = num_ealgs * num_aalgs;
5166 		replay =  espstack->ipsecesp_replay_size;
5167 	} else {
5168 		if (ahstack->ah_kstats == NULL)
5169 			goto bail;
5170 
5171 		ncombs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
5172 
5173 		if (ncombs == 0)
5174 			goto bail;	/* IPsec not loaded yet, apparently. */
5175 		replay =  ahstack->ipsecah_replay_size;
5176 	}
5177 
5178 	allocsize = sizeof (*prop) + ncombs * sizeof (*comb) +
5179 	    sizeof (sadb_x_kmc_t);
5180 	mp = allocb(allocsize, BPRI_HI);
5181 	if (mp == NULL)
5182 		goto bail;
5183 	prop = (sadb_prop_t *)mp->b_rptr;
5184 	mp->b_wptr += sizeof (*prop);
5185 	comb = (sadb_comb_t *)mp->b_wptr;
5186 	/* Decrement allocsize, if it goes to or below 0, stop. */
5187 	allocsize -= sizeof (*prop);
5188 	prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
5189 	prop->sadb_prop_len = SADB_8TO64(sizeof (*prop));
5190 	*(uint32_t *)(&prop->sadb_prop_replay) = 0;	/* Quick zero-out! */
5191 	prop->sadb_prop_replay = replay;
5192 
5193 	/*
5194 	 * Based upon algorithm properties, and what-not, prioritize a
5195 	 * proposal, based on the ordering of the ESP algorithms in the
5196 	 * alternatives in the policy rule or socket that was placed
5197 	 * in the acquire record.
5198 	 *
5199 	 * For each action in policy list
5200 	 *   Add combination.
5201 	 *   I should not hit it, but if I've hit limit, return.
5202 	 */
5203 
5204 	for (walker = ap; walker != NULL; walker = walker->ipa_next) {
5205 		ipsec_alginfo_t *ealg, *aalg;
5206 		ipsec_prot_t *prot;
5207 
5208 		if (walker->ipa_act.ipa_type != IPSEC_POLICY_APPLY)
5209 			continue;
5210 
5211 		prot = &walker->ipa_act.ipa_apply;
5212 		if (walker->ipa_act.ipa_apply.ipp_km_proto != 0)
5213 			kmp = walker->ipa_act.ipa_apply.ipp_km_proto;
5214 		if (walker->ipa_act.ipa_apply.ipp_km_cookie != 0)
5215 			kmc = walker->ipa_act.ipa_apply.ipp_km_cookie;
5216 		if (walker->ipa_act.ipa_apply.ipp_replay_depth) {
5217 			prop->sadb_prop_replay =
5218 			    walker->ipa_act.ipa_apply.ipp_replay_depth;
5219 		}
5220 
5221 		if (do_esp) {
5222 			if (!prot->ipp_use_esp)
5223 				continue;
5224 
5225 			if (prot->ipp_esp_auth_alg != 0) {
5226 				aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
5227 				    [prot->ipp_esp_auth_alg];
5228 				if (aalg == NULL || !ALG_VALID(aalg))
5229 					continue;
5230 			} else
5231 				aalg = NULL;
5232 
5233 			ASSERT(prot->ipp_encr_alg > 0);
5234 			ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
5235 			    [prot->ipp_encr_alg];
5236 			if (ealg == NULL || !ALG_VALID(ealg))
5237 				continue;
5238 
5239 			/*
5240 			 * These may want to come from policy rule..
5241 			 */
5242 			softbytes = espstack->ipsecesp_default_soft_bytes;
5243 			hardbytes = espstack->ipsecesp_default_hard_bytes;
5244 			softaddtime = espstack->ipsecesp_default_soft_addtime;
5245 			hardaddtime = espstack->ipsecesp_default_hard_addtime;
5246 			softusetime = espstack->ipsecesp_default_soft_usetime;
5247 			hardusetime = espstack->ipsecesp_default_hard_usetime;
5248 		} else {
5249 			if (!prot->ipp_use_ah)
5250 				continue;
5251 			ealg = NULL;
5252 			aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
5253 			    [prot->ipp_auth_alg];
5254 			if (aalg == NULL || !ALG_VALID(aalg))
5255 				continue;
5256 
5257 			/*
5258 			 * These may want to come from policy rule..
5259 			 */
5260 			softbytes = ahstack->ipsecah_default_soft_bytes;
5261 			hardbytes = ahstack->ipsecah_default_hard_bytes;
5262 			softaddtime = ahstack->ipsecah_default_soft_addtime;
5263 			hardaddtime = ahstack->ipsecah_default_hard_addtime;
5264 			softusetime = ahstack->ipsecah_default_soft_usetime;
5265 			hardusetime = ahstack->ipsecah_default_hard_usetime;
5266 		}
5267 
5268 		if (ealg == NULL) {
5269 			ealgid = eminbits = emaxbits = esaltlen = 0;
5270 		} else {
5271 			ealgid = ealg->alg_id;
5272 			eminbits =
5273 			    MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits);
5274 			emaxbits =
5275 			    MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits);
5276 			esaltlen = ealg->alg_saltlen;
5277 		}
5278 
5279 		if (aalg == NULL) {
5280 			aalgid = aminbits = amaxbits = 0;
5281 		} else {
5282 			aalgid = aalg->alg_id;
5283 			aminbits = MAX(prot->ipp_espa_minbits,
5284 			    aalg->alg_ef_minbits);
5285 			amaxbits = MIN(prot->ipp_espa_maxbits,
5286 			    aalg->alg_ef_maxbits);
5287 		}
5288 
5289 		comb->sadb_comb_flags = 0;
5290 		comb->sadb_comb_reserved = 0;
5291 		comb->sadb_comb_encrypt = ealgid;
5292 		comb->sadb_comb_encrypt_minbits = eminbits;
5293 		comb->sadb_comb_encrypt_maxbits = emaxbits;
5294 		comb->sadb_x_comb_encrypt_saltbits = SADB_8TO1(esaltlen);
5295 		comb->sadb_comb_auth = aalgid;
5296 		comb->sadb_comb_auth_minbits = aminbits;
5297 		comb->sadb_comb_auth_maxbits = amaxbits;
5298 		comb->sadb_comb_soft_allocations = 0;
5299 		comb->sadb_comb_hard_allocations = 0;
5300 		comb->sadb_comb_soft_bytes = softbytes;
5301 		comb->sadb_comb_hard_bytes = hardbytes;
5302 		comb->sadb_comb_soft_addtime = softaddtime;
5303 		comb->sadb_comb_hard_addtime = hardaddtime;
5304 		comb->sadb_comb_soft_usetime = softusetime;
5305 		comb->sadb_comb_hard_usetime = hardusetime;
5306 
5307 		prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
5308 		mp->b_wptr += sizeof (*comb);
5309 		allocsize -= sizeof (*comb);
5310 		/* Should never dip BELOW sizeof (KM cookie extension). */
5311 		ASSERT3S(allocsize, >=, sizeof (sadb_x_kmc_t));
5312 		if (allocsize <= sizeof (sadb_x_kmc_t))
5313 			break;	/* out of space.. */
5314 		comb++;
5315 	}
5316 
5317 	/* Don't include KMC extension if there's no room. */
5318 	if (((kmp != 0) || (kmc != 0)) && allocsize >= sizeof (sadb_x_kmc_t)) {
5319 		if (sadb_make_kmc_ext(mp->b_wptr,
5320 		    mp->b_wptr + sizeof (sadb_x_kmc_t), kmp, kmc) == NULL) {
5321 			freeb(mp);
5322 			mp = NULL;
5323 			goto bail;
5324 		}
5325 		mp->b_wptr += sizeof (sadb_x_kmc_t);
5326 		prop->sadb_prop_len += SADB_8TO64(sizeof (sadb_x_kmc_t));
5327 	}
5328 
5329 bail:
5330 	rw_exit(&ipss->ipsec_alg_lock);
5331 	return (mp);
5332 }
5333 
5334 /*
5335  * Generate an extended ACQUIRE's extended-proposal extension.
5336  */
5337 static mblk_t *
5338 sadb_acquire_extended_prop(ipsec_action_t *ap, netstack_t *ns)
5339 {
5340 	sadb_prop_t *eprop;
5341 	uint8_t *cur, *end;
5342 	mblk_t *mp;
5343 	int allocsize, numecombs = 0, numalgdescs = 0;
5344 	uint32_t kmp = 0, replay = 0;
5345 	uint64_t kmc = 0;
5346 	ipsec_action_t *walker;
5347 
5348 	allocsize = sizeof (*eprop);
5349 
5350 	/*
5351 	 * Going to walk through the action list twice.  Once for allocation
5352 	 * measurement, and once for actual construction.
5353 	 */
5354 	for (walker = ap; walker != NULL; walker = walker->ipa_next) {
5355 		ipsec_prot_t *ipp;
5356 
5357 		/*
5358 		 * Skip non-IPsec policies
5359 		 */
5360 		if (walker->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5361 			continue;
5362 
5363 		ipp = &walker->ipa_act.ipa_apply;
5364 
5365 		if (walker->ipa_act.ipa_apply.ipp_km_proto)
5366 			kmp = ipp->ipp_km_proto;
5367 		if (walker->ipa_act.ipa_apply.ipp_km_cookie)
5368 			kmc = ipp->ipp_km_cookie;
5369 		if (walker->ipa_act.ipa_apply.ipp_replay_depth)
5370 			replay = ipp->ipp_replay_depth;
5371 
5372 		if (ipp->ipp_use_ah)
5373 			numalgdescs++;
5374 		if (ipp->ipp_use_esp) {
5375 			numalgdescs++;
5376 			if (ipp->ipp_use_espa)
5377 				numalgdescs++;
5378 		}
5379 
5380 		numecombs++;
5381 	}
5382 	ASSERT(numecombs > 0);
5383 
5384 	allocsize += numecombs * sizeof (sadb_x_ecomb_t) +
5385 	    numalgdescs * sizeof (sadb_x_algdesc_t) + sizeof (sadb_x_kmc_t);
5386 	mp = allocb(allocsize, BPRI_HI);
5387 	if (mp == NULL)
5388 		return (NULL);
5389 	eprop = (sadb_prop_t *)mp->b_rptr;
5390 	end = mp->b_rptr + allocsize;
5391 	cur = mp->b_rptr + sizeof (*eprop);
5392 
5393 	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5394 	eprop->sadb_x_prop_ereserved = 0;
5395 	eprop->sadb_x_prop_numecombs = 0;
5396 	*(uint32_t *)(&eprop->sadb_prop_replay) = 0;	/* Quick zero-out! */
5397 	/* Pick ESP's replay default if need be. */
5398 	eprop->sadb_prop_replay = (replay == 0) ?
5399 	    ns->netstack_ipsecesp->ipsecesp_replay_size : replay;
5400 
5401 	/* This time, walk through and actually allocate. */
5402 	for (walker = ap; walker != NULL; walker = walker->ipa_next) {
5403 		/*
5404 		 * Skip non-IPsec policies
5405 		 */
5406 		if (walker->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5407 			continue;
5408 		cur = sadb_action_to_ecomb(cur, end, walker, ns);
5409 		if (cur == NULL) {
5410 			/* NOTE: inverse-ACQUIRE should note this as ENOMEM. */
5411 			freeb(mp);
5412 			return (NULL);
5413 		}
5414 		eprop->sadb_x_prop_numecombs++;
5415 	}
5416 
5417 	ASSERT(end - cur >= sizeof (sadb_x_kmc_t));
5418 	if ((kmp != 0) || (kmc != 0)) {
5419 		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5420 		if (cur == NULL) {
5421 			freeb(mp);
5422 			return (NULL);
5423 		}
5424 	}
5425 	mp->b_wptr = cur;
5426 	eprop->sadb_prop_len = SADB_8TO64(cur - mp->b_rptr);
5427 
5428 	return (mp);
5429 }
5430 
5431 /*
5432  * For this mblk, insert a new acquire record.  Assume bucket contains addrs
5433  * of all of the same length.  Give up (and drop) if memory
5434  * cannot be allocated for a new one; otherwise, invoke callback to
5435  * send the acquire up..
5436  *
5437  * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
5438  * list.  The ah_add_sa_finish() routines can look at the packet's attached
5439  * attributes and handle this case specially.
5440  */
5441 void
5442 sadb_acquire(mblk_t *datamp, ip_xmit_attr_t *ixa, boolean_t need_ah,
5443     boolean_t need_esp)
5444 {
5445 	mblk_t	*asyncmp, *regular, *extended, *common, *prop, *eprop;
5446 	sadbp_t *spp;
5447 	sadb_t *sp;
5448 	ipsacq_t *newbie;
5449 	iacqf_t *bucket;
5450 	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
5451 	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
5452 	uint32_t *src, *dst, *isrc, *idst;
5453 	ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
5454 	ipsec_action_t *ap = ixa->ixa_ipsec_action;
5455 	sa_family_t af;
5456 	int hashoffset;
5457 	uint32_t seq;
5458 	uint64_t unique_id = 0;
5459 	boolean_t tunnel_mode = (ixa->ixa_flags & IXAF_IPSEC_TUNNEL) != 0;
5460 	ts_label_t	*tsl;
5461 	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
5462 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5463 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
5464 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
5465 	ipsec_selector_t sel;
5466 	queue_t *q;
5467 
5468 	ASSERT((pp != NULL) || (ap != NULL));
5469 
5470 	ASSERT(need_ah || need_esp);
5471 
5472 	/* Assign sadb pointers */
5473 	if (need_esp) {
5474 		/*
5475 		 * ESP happens first if we need both AH and ESP.
5476 		 */
5477 		spp = &espstack->esp_sadb;
5478 	} else {
5479 		spp = &ahstack->ah_sadb;
5480 	}
5481 	sp = (ixa->ixa_flags & IXAF_IS_IPV4) ? &spp->s_v4 : &spp->s_v6;
5482 
5483 	if (is_system_labeled())
5484 		tsl = ixa->ixa_tsl;
5485 	else
5486 		tsl = NULL;
5487 
5488 	if (ap == NULL)
5489 		ap = pp->ipsp_act;
5490 	ASSERT(ap != NULL);
5491 
5492 	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
5493 		unique_id = SA_FORM_UNIQUE_ID(ixa);
5494 
5495 	/*
5496 	 * Set up an ACQUIRE record.
5497 	 *
5498 	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
5499 	 * below the lowest point allowed in the kernel.  (In other words,
5500 	 * make sure the high bit on the sequence number is set.)
5501 	 */
5502 
5503 	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
5504 
5505 	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
5506 		src = (uint32_t *)&ipha->ipha_src;
5507 		dst = (uint32_t *)&ipha->ipha_dst;
5508 		af = AF_INET;
5509 		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
5510 		ASSERT(ixa->ixa_flags & IXAF_IS_IPV4);
5511 	} else {
5512 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
5513 		src = (uint32_t *)&ip6h->ip6_src;
5514 		dst = (uint32_t *)&ip6h->ip6_dst;
5515 		af = AF_INET6;
5516 		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
5517 		ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
5518 	}
5519 
5520 	if (tunnel_mode) {
5521 		if (pp == NULL) {
5522 			/*
5523 			 * Tunnel mode with no policy pointer means this is a
5524 			 * reflected ICMP (like a ECHO REQUEST) that came in
5525 			 * with self-encapsulated protection.  Until we better
5526 			 * support this, drop the packet.
5527 			 */
5528 			ip_drop_packet(datamp, B_FALSE, NULL,
5529 			    DROPPER(ipss, ipds_spd_got_selfencap),
5530 			    &ipss->ipsec_spd_dropper);
5531 			return;
5532 		}
5533 		/* Snag inner addresses. */
5534 		isrc = ixa->ixa_ipsec_insrc;
5535 		idst = ixa->ixa_ipsec_indst;
5536 	} else {
5537 		isrc = idst = NULL;
5538 	}
5539 
5540 	/*
5541 	 * Check buckets to see if there is an existing entry.  If so,
5542 	 * grab it.  sadb_checkacquire locks newbie if found.
5543 	 */
5544 	bucket = &(sp->sdb_acq[hashoffset]);
5545 	mutex_enter(&bucket->iacqf_lock);
5546 	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
5547 	    unique_id, tsl);
5548 
5549 	if (newbie == NULL) {
5550 		/*
5551 		 * Otherwise, allocate a new one.
5552 		 */
5553 		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
5554 		if (newbie == NULL) {
5555 			mutex_exit(&bucket->iacqf_lock);
5556 			ip_drop_packet(datamp, B_FALSE, NULL,
5557 			    DROPPER(ipss, ipds_sadb_acquire_nomem),
5558 			    &ipss->ipsec_sadb_dropper);
5559 			return;
5560 		}
5561 		newbie->ipsacq_policy = pp;
5562 		if (pp != NULL) {
5563 			IPPOL_REFHOLD(pp);
5564 		}
5565 		IPACT_REFHOLD(ap);
5566 		newbie->ipsacq_act = ap;
5567 		newbie->ipsacq_linklock = &bucket->iacqf_lock;
5568 		newbie->ipsacq_next = bucket->iacqf_ipsacq;
5569 		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
5570 		if (newbie->ipsacq_next != NULL)
5571 			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
5572 
5573 		bucket->iacqf_ipsacq = newbie;
5574 		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
5575 		mutex_enter(&newbie->ipsacq_lock);
5576 	}
5577 
5578 	/*
5579 	 * XXX MLS does it actually help us to drop the bucket lock here?
5580 	 * we have inserted a half-built, locked acquire record into the
5581 	 * bucket.  any competing thread will now be able to lock the bucket
5582 	 * to scan it, but will immediately pile up on the new acquire
5583 	 * record's lock; I don't think we gain anything here other than to
5584 	 * disperse blame for lock contention.
5585 	 *
5586 	 * we might be able to dispense with acquire record locks entirely..
5587 	 * just use the bucket locks..
5588 	 */
5589 
5590 	mutex_exit(&bucket->iacqf_lock);
5591 
5592 	/*
5593 	 * This assert looks silly for now, but we may need to enter newbie's
5594 	 * mutex during a search.
5595 	 */
5596 	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
5597 
5598 	/*
5599 	 * Make the ip_xmit_attr_t into something we can queue.
5600 	 * If no memory it frees datamp.
5601 	 */
5602 	asyncmp = ip_xmit_attr_to_mblk(ixa);
5603 	if (asyncmp != NULL)
5604 		linkb(asyncmp, datamp);
5605 
5606 	/* Queue up packet.  Use b_next. */
5607 
5608 	if (asyncmp == NULL) {
5609 		/* Statistics for allocation failure */
5610 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
5611 			BUMP_MIB(&ixa->ixa_ipst->ips_ip_mib,
5612 			    ipIfStatsOutDiscards);
5613 		} else {
5614 			BUMP_MIB(&ixa->ixa_ipst->ips_ip6_mib,
5615 			    ipIfStatsOutDiscards);
5616 		}
5617 		ip_drop_output("No memory for asyncmp", datamp, NULL);
5618 		freemsg(datamp);
5619 		/*
5620 		 * The acquire record will be freed quickly if it's new
5621 		 * (ipsacq_expire == 0), and will proceed as if no packet
5622 		 * showed up if not.
5623 		 */
5624 		mutex_exit(&newbie->ipsacq_lock);
5625 		return;
5626 	} else if (newbie->ipsacq_numpackets == 0) {
5627 		/* First one. */
5628 		newbie->ipsacq_mp = asyncmp;
5629 		newbie->ipsacq_numpackets = 1;
5630 		newbie->ipsacq_expire = gethrestime_sec();
5631 		/*
5632 		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
5633 		 * value.
5634 		 */
5635 		newbie->ipsacq_expire += *spp->s_acquire_timeout;
5636 		newbie->ipsacq_seq = seq;
5637 		newbie->ipsacq_addrfam = af;
5638 
5639 		newbie->ipsacq_srcport = ixa->ixa_ipsec_src_port;
5640 		newbie->ipsacq_dstport = ixa->ixa_ipsec_dst_port;
5641 		newbie->ipsacq_icmp_type = ixa->ixa_ipsec_icmp_type;
5642 		newbie->ipsacq_icmp_code = ixa->ixa_ipsec_icmp_code;
5643 		if (tunnel_mode) {
5644 			newbie->ipsacq_inneraddrfam = ixa->ixa_ipsec_inaf;
5645 			newbie->ipsacq_proto = ixa->ixa_ipsec_inaf == AF_INET6 ?
5646 			    IPPROTO_IPV6 : IPPROTO_ENCAP;
5647 			newbie->ipsacq_innersrcpfx = ixa->ixa_ipsec_insrcpfx;
5648 			newbie->ipsacq_innerdstpfx = ixa->ixa_ipsec_indstpfx;
5649 			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
5650 			    ixa->ixa_ipsec_insrc, ixa->ixa_ipsec_inaf);
5651 			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
5652 			    ixa->ixa_ipsec_indst, ixa->ixa_ipsec_inaf);
5653 		} else {
5654 			newbie->ipsacq_proto = ixa->ixa_ipsec_proto;
5655 		}
5656 		newbie->ipsacq_unique_id = unique_id;
5657 
5658 		if (tsl != NULL) {
5659 			label_hold(tsl);
5660 			newbie->ipsacq_tsl = tsl;
5661 		}
5662 	} else {
5663 		/* Scan to the end of the list & insert. */
5664 		mblk_t *lastone = newbie->ipsacq_mp;
5665 
5666 		while (lastone->b_next != NULL)
5667 			lastone = lastone->b_next;
5668 		lastone->b_next = asyncmp;
5669 		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
5670 			newbie->ipsacq_numpackets = ipsacq_maxpackets;
5671 			lastone = newbie->ipsacq_mp;
5672 			newbie->ipsacq_mp = lastone->b_next;
5673 			lastone->b_next = NULL;
5674 
5675 			/* Freeing the async message */
5676 			lastone = ip_xmit_attr_free_mblk(lastone);
5677 			ip_drop_packet(lastone, B_FALSE, NULL,
5678 			    DROPPER(ipss, ipds_sadb_acquire_toofull),
5679 			    &ipss->ipsec_sadb_dropper);
5680 		} else {
5681 			IP_ACQUIRE_STAT(ipss, qhiwater,
5682 			    newbie->ipsacq_numpackets);
5683 		}
5684 	}
5685 
5686 	/*
5687 	 * Reset addresses.  Set them to the most recently added mblk chain,
5688 	 * so that the address pointers in the acquire record will point
5689 	 * at an mblk still attached to the acquire list.
5690 	 */
5691 
5692 	newbie->ipsacq_srcaddr = src;
5693 	newbie->ipsacq_dstaddr = dst;
5694 
5695 	/*
5696 	 * If the acquire record has more than one queued packet, we've
5697 	 * already sent an ACQUIRE, and don't need to repeat ourself.
5698 	 */
5699 	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
5700 		/* I have an acquire outstanding already! */
5701 		mutex_exit(&newbie->ipsacq_lock);
5702 		return;
5703 	}
5704 
5705 	if (need_esp) {
5706 		ESP_BUMP_STAT(espstack, acquire_requests);
5707 		q = espstack->esp_pfkey_q;
5708 	} else {
5709 		/*
5710 		 * Two cases get us here:
5711 		 * 1.) AH-only policy.
5712 		 *
5713 		 * 2.) A continuation of an AH+ESP policy, and this is the
5714 		 * post-ESP, AH-needs-to-send-a-regular-ACQUIRE case.
5715 		 * (i.e. called from esp_do_outbound_ah().)
5716 		 */
5717 		AH_BUMP_STAT(ahstack, acquire_requests);
5718 		q = ahstack->ah_pfkey_q;
5719 	}
5720 
5721 	/*
5722 	 * Get selectors and other policy-expression bits needed for an
5723 	 * ACQUIRE.
5724 	 */
5725 	bzero(&sel, sizeof (sel));
5726 	sel.ips_isv4 = (ixa->ixa_flags & IXAF_IS_IPV4) != 0;
5727 	if (tunnel_mode) {
5728 		sel.ips_protocol = (ixa->ixa_ipsec_inaf == AF_INET) ?
5729 		    IPPROTO_ENCAP : IPPROTO_IPV6;
5730 	} else {
5731 		sel.ips_protocol = ixa->ixa_ipsec_proto;
5732 		sel.ips_local_port = ixa->ixa_ipsec_src_port;
5733 		sel.ips_remote_port = ixa->ixa_ipsec_dst_port;
5734 	}
5735 	sel.ips_icmp_type = ixa->ixa_ipsec_icmp_type;
5736 	sel.ips_icmp_code = ixa->ixa_ipsec_icmp_code;
5737 	sel.ips_is_icmp_inv_acq = 0;
5738 	if (af == AF_INET) {
5739 		sel.ips_local_addr_v4 = ipha->ipha_src;
5740 		sel.ips_remote_addr_v4 = ipha->ipha_dst;
5741 	} else {
5742 		sel.ips_local_addr_v6 = ip6h->ip6_src;
5743 		sel.ips_remote_addr_v6 = ip6h->ip6_dst;
5744 	}
5745 
5746 
5747 	/*
5748 	 * 1. Generate addresses, kmc, and sensitivity.  These are "common"
5749 	 * and should be an mblk pointed to by common. TBD -- eventually it
5750 	 * will include triggering packet contents as more address extensions.
5751 	 *
5752 	 * 2. Generate ACQUIRE & KEYSOCK_OUT and single-protocol proposal.
5753 	 * These are "regular" and "prop".  String regular->b_cont->b_cont =
5754 	 * common, common->b_cont = prop.
5755 	 *
5756 	 * 3. If extended register got turned on, generate EXT_ACQUIRE &
5757 	 * KEYSOCK_OUT and multi-protocol eprop. These are "extended" and
5758 	 * "eprop".  String extended->b_cont->b_cont = dupb(common) and
5759 	 * extended->b_cont->b_cont->b_cont = prop.
5760 	 *
5761 	 * 4. Deliver:  putnext(q, regular) and if there, putnext(q, extended).
5762 	 */
5763 
5764 	regular = extended = prop = eprop = NULL;
5765 
5766 	common = sadb_acquire_msg_common(&sel, pp, ap, tunnel_mode, tsl, NULL);
5767 	if (common == NULL)
5768 		goto bail;
5769 
5770 	regular = sadb_acquire_msg_base(0, (need_esp ?
5771 	    SADB_SATYPE_ESP : SADB_SATYPE_AH), newbie->ipsacq_seq, 0);
5772 	if (regular == NULL)
5773 		goto bail;
5774 
5775 	/*
5776 	 * Pardon the boolean cleverness. At least one of need_* must be true.
5777 	 * If they are equal, it's an AH & ESP policy and ESP needs to go
5778 	 * first.  If they aren't, just check the contents of need_esp.
5779 	 */
5780 	prop = sadb_acquire_prop(ap, ns, need_esp);
5781 	if (prop == NULL)
5782 		goto bail;
5783 
5784 	/* Link the parts together. */
5785 	regular->b_cont->b_cont = common;
5786 	common->b_cont = prop;
5787 	/*
5788 	 * Prop is now linked, so don't freemsg() it if the extended
5789 	 * construction goes off the rails.
5790 	 */
5791 	prop = NULL;
5792 
5793 	((sadb_msg_t *)(regular->b_cont->b_rptr))->sadb_msg_len =
5794 	    SADB_8TO64(msgsize(regular->b_cont));
5795 
5796 	/*
5797 	 * If we need an extended ACQUIRE, build it here.
5798 	 */
5799 	if (keysock_extended_reg(ns)) {
5800 		/* NOTE: "common" still points to what we need. */
5801 		extended = sadb_acquire_msg_base(0, 0, newbie->ipsacq_seq, 0);
5802 		if (extended == NULL) {
5803 			common = NULL;
5804 			goto bail;
5805 		}
5806 
5807 		extended->b_cont->b_cont = dupb(common);
5808 		common = NULL;
5809 		if (extended->b_cont->b_cont == NULL)
5810 			goto bail;
5811 
5812 		eprop = sadb_acquire_extended_prop(ap, ns);
5813 		if (eprop == NULL)
5814 			goto bail;
5815 		extended->b_cont->b_cont->b_cont = eprop;
5816 
5817 		((sadb_msg_t *)(extended->b_cont->b_rptr))->sadb_msg_len =
5818 		    SADB_8TO64(msgsize(extended->b_cont));
5819 	}
5820 
5821 	/* So we don't hold a lock across putnext()... */
5822 	mutex_exit(&newbie->ipsacq_lock);
5823 
5824 	if (extended != NULL)
5825 		putnext(q, extended);
5826 	ASSERT(regular != NULL);
5827 	putnext(q, regular);
5828 	return;
5829 
5830 bail:
5831 	/* Make this acquire record go away quickly... */
5832 	newbie->ipsacq_expire = 0;
5833 	/* Exploit freemsg(NULL) being legal for fun & profit. */
5834 	freemsg(common);
5835 	freemsg(prop);
5836 	freemsg(extended);
5837 	freemsg(regular);
5838 	mutex_exit(&newbie->ipsacq_lock);
5839 }
5840 
5841 /*
5842  * Unlink and free an acquire record.
5843  */
5844 void
5845 sadb_destroy_acquire(ipsacq_t *acqrec, netstack_t *ns)
5846 {
5847 	mblk_t		*mp;
5848 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5849 
5850 	ASSERT(MUTEX_HELD(acqrec->ipsacq_linklock));
5851 
5852 	if (acqrec->ipsacq_policy != NULL) {
5853 		IPPOL_REFRELE(acqrec->ipsacq_policy);
5854 	}
5855 	if (acqrec->ipsacq_act != NULL) {
5856 		IPACT_REFRELE(acqrec->ipsacq_act);
5857 	}
5858 
5859 	/* Unlink */
5860 	*(acqrec->ipsacq_ptpn) = acqrec->ipsacq_next;
5861 	if (acqrec->ipsacq_next != NULL)
5862 		acqrec->ipsacq_next->ipsacq_ptpn = acqrec->ipsacq_ptpn;
5863 
5864 	if (acqrec->ipsacq_tsl != NULL) {
5865 		label_rele(acqrec->ipsacq_tsl);
5866 		acqrec->ipsacq_tsl = NULL;
5867 	}
5868 
5869 	/*
5870 	 * Free hanging mp's.
5871 	 *
5872 	 * XXX Instead of freemsg(), perhaps use IPSEC_REQ_FAILED.
5873 	 */
5874 
5875 	mutex_enter(&acqrec->ipsacq_lock);
5876 	while (acqrec->ipsacq_mp != NULL) {
5877 		mp = acqrec->ipsacq_mp;
5878 		acqrec->ipsacq_mp = mp->b_next;
5879 		mp->b_next = NULL;
5880 		/* Freeing the async message */
5881 		mp = ip_xmit_attr_free_mblk(mp);
5882 		ip_drop_packet(mp, B_FALSE, NULL,
5883 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
5884 		    &ipss->ipsec_sadb_dropper);
5885 	}
5886 	mutex_exit(&acqrec->ipsacq_lock);
5887 
5888 	/* Free */
5889 	mutex_destroy(&acqrec->ipsacq_lock);
5890 	kmem_free(acqrec, sizeof (*acqrec));
5891 }
5892 
5893 /*
5894  * Destroy an acquire list fanout.
5895  */
5896 static void
5897 sadb_destroy_acqlist(iacqf_t **listp, uint_t numentries, boolean_t forever,
5898     netstack_t *ns)
5899 {
5900 	int i;
5901 	iacqf_t *list = *listp;
5902 
5903 	if (list == NULL)
5904 		return;
5905 
5906 	for (i = 0; i < numentries; i++) {
5907 		mutex_enter(&(list[i].iacqf_lock));
5908 		while (list[i].iacqf_ipsacq != NULL)
5909 			sadb_destroy_acquire(list[i].iacqf_ipsacq, ns);
5910 		mutex_exit(&(list[i].iacqf_lock));
5911 		if (forever)
5912 			mutex_destroy(&(list[i].iacqf_lock));
5913 	}
5914 
5915 	if (forever) {
5916 		*listp = NULL;
5917 		kmem_free(list, numentries * sizeof (*list));
5918 	}
5919 }
5920 
5921 /*
5922  * Create an algorithm descriptor for an extended ACQUIRE.  Filter crypto
5923  * framework's view of reality vs. IPsec's.  EF's wins, BTW.
5924  */
5925 static uint8_t *
5926 sadb_new_algdesc(uint8_t *start, uint8_t *limit,
5927     sadb_x_ecomb_t *ecomb, uint8_t satype, uint8_t algtype,
5928     uint8_t alg, uint16_t minbits, uint16_t maxbits, ipsec_stack_t *ipss)
5929 {
5930 	uint8_t *cur = start;
5931 	ipsec_alginfo_t *algp;
5932 	sadb_x_algdesc_t *algdesc = (sadb_x_algdesc_t *)cur;
5933 
5934 	cur += sizeof (*algdesc);
5935 	if (cur >= limit)
5936 		return (NULL);
5937 
5938 	ecomb->sadb_x_ecomb_numalgs++;
5939 
5940 	/*
5941 	 * Normalize vs. crypto framework's limits.  This way, you can specify
5942 	 * a stronger policy, and when the framework loads a stronger version,
5943 	 * you can just keep plowing w/o rewhacking your SPD.
5944 	 */
5945 	rw_enter(&ipss->ipsec_alg_lock, RW_READER);
5946 	algp = ipss->ipsec_alglists[(algtype == SADB_X_ALGTYPE_AUTH) ?
5947 	    IPSEC_ALG_AUTH : IPSEC_ALG_ENCR][alg];
5948 	if (algp == NULL) {
5949 		rw_exit(&ipss->ipsec_alg_lock);
5950 		return (NULL);	/* Algorithm doesn't exist.  Fail gracefully. */
5951 	}
5952 	if (minbits < algp->alg_ef_minbits)
5953 		minbits = algp->alg_ef_minbits;
5954 	if (maxbits > algp->alg_ef_maxbits)
5955 		maxbits = algp->alg_ef_maxbits;
5956 	rw_exit(&ipss->ipsec_alg_lock);
5957 
5958 	algdesc->sadb_x_algdesc_saltbits = SADB_8TO1(algp->alg_saltlen);
5959 	algdesc->sadb_x_algdesc_satype = satype;
5960 	algdesc->sadb_x_algdesc_algtype = algtype;
5961 	algdesc->sadb_x_algdesc_alg = alg;
5962 	algdesc->sadb_x_algdesc_minbits = minbits;
5963 	algdesc->sadb_x_algdesc_maxbits = maxbits;
5964 
5965 	return (cur);
5966 }
5967 
5968 /*
5969  * Convert the given ipsec_action_t into an ecomb starting at *ecomb
5970  * which must fit before *limit
5971  *
5972  * return NULL if we ran out of room or a pointer to the end of the ecomb.
5973  */
5974 static uint8_t *
5975 sadb_action_to_ecomb(uint8_t *start, uint8_t *limit, ipsec_action_t *act,
5976     netstack_t *ns)
5977 {
5978 	uint8_t *cur = start;
5979 	sadb_x_ecomb_t *ecomb = (sadb_x_ecomb_t *)cur;
5980 	ipsec_prot_t *ipp;
5981 	ipsec_stack_t *ipss = ns->netstack_ipsec;
5982 
5983 	cur += sizeof (*ecomb);
5984 	if (cur >= limit)
5985 		return (NULL);
5986 
5987 	ASSERT(act->ipa_act.ipa_type == IPSEC_ACT_APPLY);
5988 
5989 	ipp = &act->ipa_act.ipa_apply;
5990 
5991 	ecomb->sadb_x_ecomb_numalgs = 0;
5992 	ecomb->sadb_x_ecomb_reserved = 0;
5993 	ecomb->sadb_x_ecomb_reserved2 = 0;
5994 	/*
5995 	 * No limits on allocations, since we really don't support that
5996 	 * concept currently.
5997 	 */
5998 	ecomb->sadb_x_ecomb_soft_allocations = 0;
5999 	ecomb->sadb_x_ecomb_hard_allocations = 0;
6000 
6001 	/*
6002 	 * XXX TBD: Policy or global parameters will eventually be
6003 	 * able to fill in some of these.
6004 	 */
6005 	ecomb->sadb_x_ecomb_flags = 0;
6006 	ecomb->sadb_x_ecomb_soft_bytes = 0;
6007 	ecomb->sadb_x_ecomb_hard_bytes = 0;
6008 	ecomb->sadb_x_ecomb_soft_addtime = 0;
6009 	ecomb->sadb_x_ecomb_hard_addtime = 0;
6010 	ecomb->sadb_x_ecomb_soft_usetime = 0;
6011 	ecomb->sadb_x_ecomb_hard_usetime = 0;
6012 
6013 	if (ipp->ipp_use_ah) {
6014 		cur = sadb_new_algdesc(cur, limit, ecomb,
6015 		    SADB_SATYPE_AH, SADB_X_ALGTYPE_AUTH, ipp->ipp_auth_alg,
6016 		    ipp->ipp_ah_minbits, ipp->ipp_ah_maxbits, ipss);
6017 		if (cur == NULL)
6018 			return (NULL);
6019 		ipsecah_fill_defs(ecomb, ns);
6020 	}
6021 
6022 	if (ipp->ipp_use_esp) {
6023 		if (ipp->ipp_use_espa) {
6024 			cur = sadb_new_algdesc(cur, limit, ecomb,
6025 			    SADB_SATYPE_ESP, SADB_X_ALGTYPE_AUTH,
6026 			    ipp->ipp_esp_auth_alg,
6027 			    ipp->ipp_espa_minbits,
6028 			    ipp->ipp_espa_maxbits, ipss);
6029 			if (cur == NULL)
6030 				return (NULL);
6031 		}
6032 
6033 		cur = sadb_new_algdesc(cur, limit, ecomb,
6034 		    SADB_SATYPE_ESP, SADB_X_ALGTYPE_CRYPT,
6035 		    ipp->ipp_encr_alg,
6036 		    ipp->ipp_espe_minbits,
6037 		    ipp->ipp_espe_maxbits, ipss);
6038 		if (cur == NULL)
6039 			return (NULL);
6040 		/* Fill in lifetimes if and only if AH didn't already... */
6041 		if (!ipp->ipp_use_ah)
6042 			ipsecesp_fill_defs(ecomb, ns);
6043 	}
6044 
6045 	return (cur);
6046 }
6047 
6048 #include <sys/tsol/label_macro.h> /* XXX should not need this */
6049 
6050 /*
6051  * From a cred_t, construct a sensitivity label extension
6052  *
6053  * We send up a fixed-size sensitivity label bitmap, and are perhaps
6054  * overly chummy with the underlying data structures here.
6055  */
6056 
6057 /* ARGSUSED */
6058 int
6059 sadb_sens_len_from_label(ts_label_t *tsl)
6060 {
6061 	int baselen = sizeof (sadb_sens_t) + _C_LEN * 4;
6062 	return (roundup(baselen, sizeof (uint64_t)));
6063 }
6064 
6065 void
6066 sadb_sens_from_label(sadb_sens_t *sens, int exttype, ts_label_t *tsl,
6067     int senslen)
6068 {
6069 	uint8_t *bitmap;
6070 	bslabel_t *sl;
6071 
6072 	/* LINTED */
6073 	ASSERT((_C_LEN & 1) == 0);
6074 	ASSERT((senslen & 7) == 0);
6075 
6076 	sl = label2bslabel(tsl);
6077 
6078 	sens->sadb_sens_exttype = exttype;
6079 	sens->sadb_sens_len = SADB_8TO64(senslen);
6080 
6081 	sens->sadb_sens_dpd = tsl->tsl_doi;
6082 	sens->sadb_sens_sens_level = LCLASS(sl);
6083 	sens->sadb_sens_integ_level = 0; /* TBD */
6084 	sens->sadb_sens_sens_len = _C_LEN >> 1;
6085 	sens->sadb_sens_integ_len = 0; /* TBD */
6086 	sens->sadb_x_sens_flags = 0;
6087 
6088 	bitmap = (uint8_t *)(sens + 1);
6089 	bcopy(&(((_bslabel_impl_t *)sl)->compartments), bitmap, _C_LEN * 4);
6090 }
6091 
6092 /*
6093  * Okay, how do we report errors/invalid labels from this?
6094  * With a special designated "not a label" cred_t ?
6095  */
6096 /* ARGSUSED */
6097 ts_label_t *
6098 sadb_label_from_sens(sadb_sens_t *sens, uint64_t *bitmap)
6099 {
6100 	int bitmap_len = SADB_64TO8(sens->sadb_sens_sens_len);
6101 	bslabel_t sl;
6102 	ts_label_t *tsl;
6103 
6104 	if (sens->sadb_sens_integ_level != 0)
6105 		return (NULL);
6106 	if (sens->sadb_sens_integ_len != 0)
6107 		return (NULL);
6108 	if (bitmap_len > _C_LEN * 4)
6109 		return (NULL);
6110 
6111 	bsllow(&sl);
6112 	LCLASS_SET((_bslabel_impl_t *)&sl,
6113 	    (uint16_t)sens->sadb_sens_sens_level);
6114 	bcopy(bitmap, &((_bslabel_impl_t *)&sl)->compartments,
6115 	    bitmap_len);
6116 
6117 	tsl = labelalloc(&sl, sens->sadb_sens_dpd, KM_NOSLEEP);
6118 	if (tsl == NULL)
6119 		return (NULL);
6120 
6121 	if (sens->sadb_x_sens_flags & SADB_X_SENS_UNLABELED)
6122 		tsl->tsl_flags |= TSLF_UNLABELED;
6123 	return (tsl);
6124 }
6125 
6126 /* End XXX label-library-leakage */
6127 
6128 /*
6129  * Given an SADB_GETSPI message, find an appropriately ranged SA and
6130  * allocate an SA.  If there are message improprieties, return (ipsa_t *)-1.
6131  * If there was a memory allocation error, return NULL.	 (Assume NULL !=
6132  * (ipsa_t *)-1).
6133  *
6134  * master_spi is passed in host order.
6135  */
6136 ipsa_t *
6137 sadb_getspi(keysock_in_t *ksi, uint32_t master_spi, int *diagnostic,
6138     netstack_t *ns, uint_t sa_type)
6139 {
6140 	sadb_address_t *src =
6141 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC],
6142 	    *dst = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
6143 	sadb_spirange_t *range =
6144 	    (sadb_spirange_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
6145 	struct sockaddr_in *ssa, *dsa;
6146 	struct sockaddr_in6 *ssa6, *dsa6;
6147 	uint32_t *srcaddr, *dstaddr;
6148 	sa_family_t af;
6149 	uint32_t add, min, max;
6150 	uint8_t protocol =
6151 	    (sa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP;
6152 
6153 	if (src == NULL) {
6154 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
6155 		return ((ipsa_t *)-1);
6156 	}
6157 	if (dst == NULL) {
6158 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
6159 		return ((ipsa_t *)-1);
6160 	}
6161 	if (range == NULL) {
6162 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_RANGE;
6163 		return ((ipsa_t *)-1);
6164 	}
6165 
6166 	min = ntohl(range->sadb_spirange_min);
6167 	max = ntohl(range->sadb_spirange_max);
6168 	dsa = (struct sockaddr_in *)(dst + 1);
6169 	dsa6 = (struct sockaddr_in6 *)dsa;
6170 
6171 	ssa = (struct sockaddr_in *)(src + 1);
6172 	ssa6 = (struct sockaddr_in6 *)ssa;
6173 	ASSERT(dsa->sin_family == ssa->sin_family);
6174 
6175 	srcaddr = ALL_ZEROES_PTR;
6176 	af = dsa->sin_family;
6177 	switch (af) {
6178 	case AF_INET:
6179 		if (src != NULL)
6180 			srcaddr = (uint32_t *)(&ssa->sin_addr);
6181 		dstaddr = (uint32_t *)(&dsa->sin_addr);
6182 		break;
6183 	case AF_INET6:
6184 		if (src != NULL)
6185 			srcaddr = (uint32_t *)(&ssa6->sin6_addr);
6186 		dstaddr = (uint32_t *)(&dsa6->sin6_addr);
6187 		break;
6188 	default:
6189 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
6190 		return ((ipsa_t *)-1);
6191 	}
6192 
6193 	if (master_spi < min || master_spi > max) {
6194 		/* Return a random value in the range. */
6195 		if (cl_inet_getspi) {
6196 			cl_inet_getspi(ns->netstack_stackid, protocol,
6197 			    (uint8_t *)&add, sizeof (add), NULL);
6198 		} else {
6199 			(void) random_get_pseudo_bytes((uint8_t *)&add,
6200 			    sizeof (add));
6201 		}
6202 		master_spi = min + (add % (max - min + 1));
6203 	}
6204 
6205 	/*
6206 	 * Since master_spi is passed in host order, we need to htonl() it
6207 	 * for the purposes of creating a new SA.
6208 	 */
6209 	return (sadb_makelarvalassoc(htonl(master_spi), srcaddr, dstaddr, af,
6210 	    ns));
6211 }
6212 
6213 /*
6214  *
6215  * Locate an ACQUIRE and nuke it.  If I have an samsg that's larger than the
6216  * base header, just ignore it.	 Otherwise, lock down the whole ACQUIRE list
6217  * and scan for the sequence number in question.  I may wish to accept an
6218  * address pair with it, for easier searching.
6219  *
6220  * Caller frees the message, so we don't have to here.
6221  *
6222  * NOTE:	The pfkey_q parameter may be used in the future for ACQUIRE
6223  *		failures.
6224  */
6225 /* ARGSUSED */
6226 void
6227 sadb_in_acquire(sadb_msg_t *samsg, sadbp_t *sp, queue_t *pfkey_q,
6228     netstack_t *ns)
6229 {
6230 	int i;
6231 	ipsacq_t *acqrec;
6232 	iacqf_t *bucket;
6233 
6234 	/*
6235 	 * I only accept the base header for this!
6236 	 * Though to be honest, requiring the dst address would help
6237 	 * immensely.
6238 	 *
6239 	 * XXX	There are already cases where I can get the dst address.
6240 	 */
6241 	if (samsg->sadb_msg_len > SADB_8TO64(sizeof (*samsg)))
6242 		return;
6243 
6244 	/*
6245 	 * Using the samsg->sadb_msg_seq, find the ACQUIRE record, delete it,
6246 	 * (and in the future send a message to IP with the appropriate error
6247 	 * number).
6248 	 *
6249 	 * Q: Do I want to reject if pid != 0?
6250 	 */
6251 
6252 	for (i = 0; i < sp->s_v4.sdb_hashsize; i++) {
6253 		bucket = &sp->s_v4.sdb_acq[i];
6254 		mutex_enter(&bucket->iacqf_lock);
6255 		for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6256 		    acqrec = acqrec->ipsacq_next) {
6257 			if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6258 				break;	/* for acqrec... loop. */
6259 		}
6260 		if (acqrec != NULL)
6261 			break;	/* for i = 0... loop. */
6262 
6263 		mutex_exit(&bucket->iacqf_lock);
6264 	}
6265 
6266 	if (acqrec == NULL) {
6267 		for (i = 0; i < sp->s_v6.sdb_hashsize; i++) {
6268 			bucket = &sp->s_v6.sdb_acq[i];
6269 			mutex_enter(&bucket->iacqf_lock);
6270 			for (acqrec = bucket->iacqf_ipsacq; acqrec != NULL;
6271 			    acqrec = acqrec->ipsacq_next) {
6272 				if (samsg->sadb_msg_seq == acqrec->ipsacq_seq)
6273 					break;	/* for acqrec... loop. */
6274 			}
6275 			if (acqrec != NULL)
6276 				break;	/* for i = 0... loop. */
6277 
6278 			mutex_exit(&bucket->iacqf_lock);
6279 		}
6280 	}
6281 
6282 
6283 	if (acqrec == NULL)
6284 		return;
6285 
6286 	/*
6287 	 * What do I do with the errno and IP?	I may need mp's services a
6288 	 * little more.	 See sadb_destroy_acquire() for future directions
6289 	 * beyond free the mblk chain on the acquire record.
6290 	 */
6291 
6292 	ASSERT(&bucket->iacqf_lock == acqrec->ipsacq_linklock);
6293 	sadb_destroy_acquire(acqrec, ns);
6294 	/* Have to exit mutex here, because of breaking out of for loop. */
6295 	mutex_exit(&bucket->iacqf_lock);
6296 }
6297 
6298 /*
6299  * The following functions work with the replay windows of an SA.  They assume
6300  * the ipsa->ipsa_replay_arr is an array of uint64_t, and that the bit vector
6301  * represents the highest sequence number packet received, and back
6302  * (ipsa->ipsa_replay_wsize) packets.
6303  */
6304 
6305 /*
6306  * Is the replay bit set?
6307  */
6308 static boolean_t
6309 ipsa_is_replay_set(ipsa_t *ipsa, uint32_t offset)
6310 {
6311 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6312 
6313 	return ((bit & ipsa->ipsa_replay_arr[offset >> 6]) ? B_TRUE : B_FALSE);
6314 }
6315 
6316 /*
6317  * Shift the bits of the replay window over.
6318  */
6319 static void
6320 ipsa_shift_replay(ipsa_t *ipsa, uint32_t shift)
6321 {
6322 	int i;
6323 	int jump = ((shift - 1) >> 6) + 1;
6324 
6325 	if (shift == 0)
6326 		return;
6327 
6328 	for (i = (ipsa->ipsa_replay_wsize - 1) >> 6; i >= 0; i--) {
6329 		if (i + jump <= (ipsa->ipsa_replay_wsize - 1) >> 6) {
6330 			ipsa->ipsa_replay_arr[i + jump] |=
6331 			    ipsa->ipsa_replay_arr[i] >> (64 - (shift & 63));
6332 		}
6333 		ipsa->ipsa_replay_arr[i] <<= shift;
6334 	}
6335 }
6336 
6337 /*
6338  * Set a bit in the bit vector.
6339  */
6340 static void
6341 ipsa_set_replay(ipsa_t *ipsa, uint32_t offset)
6342 {
6343 	uint64_t bit = (uint64_t)1 << (uint64_t)(offset & 63);
6344 
6345 	ipsa->ipsa_replay_arr[offset >> 6] |= bit;
6346 }
6347 
6348 #define	SADB_MAX_REPLAY_VALUE 0xffffffff
6349 
6350 /*
6351  * Assume caller has NOT done ntohl() already on seq.  Check to see
6352  * if replay sequence number "seq" has been seen already.
6353  */
6354 boolean_t
6355 sadb_replay_check(ipsa_t *ipsa, uint32_t seq)
6356 {
6357 	boolean_t rc;
6358 	uint32_t diff;
6359 
6360 	if (ipsa->ipsa_replay_wsize == 0)
6361 		return (B_TRUE);
6362 
6363 	/*
6364 	 * NOTE:  I've already checked for 0 on the wire in sadb_replay_peek().
6365 	 */
6366 
6367 	/* Convert sequence number into host order before holding the mutex. */
6368 	seq = ntohl(seq);
6369 
6370 	mutex_enter(&ipsa->ipsa_lock);
6371 
6372 	/* Initialize inbound SA's ipsa_replay field to last one received. */
6373 	if (ipsa->ipsa_replay == 0)
6374 		ipsa->ipsa_replay = 1;
6375 
6376 	if (seq > ipsa->ipsa_replay) {
6377 		/*
6378 		 * I have received a new "highest value received".  Shift
6379 		 * the replay window over.
6380 		 */
6381 		diff = seq - ipsa->ipsa_replay;
6382 		if (diff < ipsa->ipsa_replay_wsize) {
6383 			/* In replay window, shift bits over. */
6384 			ipsa_shift_replay(ipsa, diff);
6385 		} else {
6386 			/* WAY FAR AHEAD, clear bits and start again. */
6387 			bzero(ipsa->ipsa_replay_arr,
6388 			    sizeof (ipsa->ipsa_replay_arr));
6389 		}
6390 		ipsa_set_replay(ipsa, 0);
6391 		ipsa->ipsa_replay = seq;
6392 		rc = B_TRUE;
6393 		goto done;
6394 	}
6395 	diff = ipsa->ipsa_replay - seq;
6396 	if (diff >= ipsa->ipsa_replay_wsize || ipsa_is_replay_set(ipsa, diff)) {
6397 		rc = B_FALSE;
6398 		goto done;
6399 	}
6400 	/* Set this packet as seen. */
6401 	ipsa_set_replay(ipsa, diff);
6402 
6403 	rc = B_TRUE;
6404 done:
6405 	mutex_exit(&ipsa->ipsa_lock);
6406 	return (rc);
6407 }
6408 
6409 /*
6410  * "Peek" and see if we should even bother going through the effort of
6411  * running an authentication check on the sequence number passed in.
6412  * this takes into account packets that are below the replay window,
6413  * and collisions with already replayed packets.  Return B_TRUE if it
6414  * is okay to proceed, B_FALSE if this packet should be dropped immediately.
6415  * Assume same byte-ordering as sadb_replay_check.
6416  */
6417 boolean_t
6418 sadb_replay_peek(ipsa_t *ipsa, uint32_t seq)
6419 {
6420 	boolean_t rc = B_FALSE;
6421 	uint32_t diff;
6422 
6423 	if (ipsa->ipsa_replay_wsize == 0)
6424 		return (B_TRUE);
6425 
6426 	/*
6427 	 * 0 is 0, regardless of byte order... :)
6428 	 *
6429 	 * If I get 0 on the wire (and there is a replay window) then the
6430 	 * sender most likely wrapped.	This ipsa may need to be marked or
6431 	 * something.
6432 	 */
6433 	if (seq == 0)
6434 		return (B_FALSE);
6435 
6436 	seq = ntohl(seq);
6437 	mutex_enter(&ipsa->ipsa_lock);
6438 	if (seq < ipsa->ipsa_replay - ipsa->ipsa_replay_wsize &&
6439 	    ipsa->ipsa_replay >= ipsa->ipsa_replay_wsize)
6440 		goto done;
6441 
6442 	/*
6443 	 * If I've hit 0xffffffff, then quite honestly, I don't need to
6444 	 * bother with formalities.  I'm not accepting any more packets
6445 	 * on this SA.
6446 	 */
6447 	if (ipsa->ipsa_replay == SADB_MAX_REPLAY_VALUE) {
6448 		/*
6449 		 * Since we're already holding the lock, update the
6450 		 * expire time ala. sadb_replay_delete() and return.
6451 		 */
6452 		ipsa->ipsa_hardexpiretime = (time_t)1;
6453 		goto done;
6454 	}
6455 
6456 	if (seq <= ipsa->ipsa_replay) {
6457 		/*
6458 		 * This seq is in the replay window.  I'm not below it,
6459 		 * because I already checked for that above!
6460 		 */
6461 		diff = ipsa->ipsa_replay - seq;
6462 		if (ipsa_is_replay_set(ipsa, diff))
6463 			goto done;
6464 	}
6465 	/* Else return B_TRUE, I'm going to advance the window. */
6466 
6467 	rc = B_TRUE;
6468 done:
6469 	mutex_exit(&ipsa->ipsa_lock);
6470 	return (rc);
6471 }
6472 
6473 /*
6474  * Delete a single SA.
6475  *
6476  * For now, use the quick-and-dirty trick of making the association's
6477  * hard-expire lifetime (time_t)1, ensuring deletion by the *_ager().
6478  */
6479 void
6480 sadb_replay_delete(ipsa_t *assoc)
6481 {
6482 	mutex_enter(&assoc->ipsa_lock);
6483 	assoc->ipsa_hardexpiretime = (time_t)1;
6484 	mutex_exit(&assoc->ipsa_lock);
6485 }
6486 
6487 /*
6488  * Special front-end to ipsec_rl_strlog() dealing with SA failure.
6489  * this is designed to take only a format string with "* %x * %s *", so
6490  * that "spi" is printed first, then "addr" is converted using inet_pton().
6491  *
6492  * This is abstracted out to save the stack space for only when inet_pton()
6493  * is called.  Make sure "spi" is in network order; it usually is when this
6494  * would get called.
6495  */
6496 void
6497 ipsec_assocfailure(short mid, short sid, char level, ushort_t sl, char *fmt,
6498     uint32_t spi, void *addr, int af, netstack_t *ns)
6499 {
6500 	char buf[INET6_ADDRSTRLEN];
6501 
6502 	ASSERT(af == AF_INET6 || af == AF_INET);
6503 
6504 	ipsec_rl_strlog(ns, mid, sid, level, sl, fmt, ntohl(spi),
6505 	    inet_ntop(af, addr, buf, sizeof (buf)));
6506 }
6507 
6508 /*
6509  * Fills in a reference to the policy, if any, from the conn, in *ppp
6510  */
6511 static void
6512 ipsec_conn_pol(ipsec_selector_t *sel, conn_t *connp, ipsec_policy_t **ppp)
6513 {
6514 	ipsec_policy_t	*pp;
6515 	ipsec_latch_t	*ipl = connp->conn_latch;
6516 
6517 	if ((ipl != NULL) && (connp->conn_ixa->ixa_ipsec_policy != NULL)) {
6518 		pp = connp->conn_ixa->ixa_ipsec_policy;
6519 		IPPOL_REFHOLD(pp);
6520 	} else {
6521 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, connp, sel,
6522 		    connp->conn_netstack);
6523 	}
6524 	*ppp = pp;
6525 }
6526 
6527 /*
6528  * The following functions scan through active conn_t structures
6529  * and return a reference to the best-matching policy it can find.
6530  * Caller must release the reference.
6531  */
6532 static void
6533 ipsec_udp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6534 {
6535 	connf_t *connfp;
6536 	conn_t *connp = NULL;
6537 	ipsec_selector_t portonly;
6538 
6539 	bzero((void *)&portonly, sizeof (portonly));
6540 
6541 	if (sel->ips_local_port == 0)
6542 		return;
6543 
6544 	connfp = &ipst->ips_ipcl_udp_fanout[IPCL_UDP_HASH(sel->ips_local_port,
6545 	    ipst)];
6546 	mutex_enter(&connfp->connf_lock);
6547 
6548 	if (sel->ips_isv4) {
6549 		connp = connfp->connf_head;
6550 		while (connp != NULL) {
6551 			if (IPCL_UDP_MATCH(connp, sel->ips_local_port,
6552 			    sel->ips_local_addr_v4, sel->ips_remote_port,
6553 			    sel->ips_remote_addr_v4))
6554 				break;
6555 			connp = connp->conn_next;
6556 		}
6557 
6558 		if (connp == NULL) {
6559 			/* Try port-only match in IPv6. */
6560 			portonly.ips_local_port = sel->ips_local_port;
6561 			sel = &portonly;
6562 		}
6563 	}
6564 
6565 	if (connp == NULL) {
6566 		connp = connfp->connf_head;
6567 		while (connp != NULL) {
6568 			if (IPCL_UDP_MATCH_V6(connp, sel->ips_local_port,
6569 			    sel->ips_local_addr_v6, sel->ips_remote_port,
6570 			    sel->ips_remote_addr_v6))
6571 				break;
6572 			connp = connp->conn_next;
6573 		}
6574 
6575 		if (connp == NULL) {
6576 			mutex_exit(&connfp->connf_lock);
6577 			return;
6578 		}
6579 	}
6580 
6581 	CONN_INC_REF(connp);
6582 	mutex_exit(&connfp->connf_lock);
6583 
6584 	ipsec_conn_pol(sel, connp, ppp);
6585 	CONN_DEC_REF(connp);
6586 }
6587 
6588 static conn_t *
6589 ipsec_find_listen_conn(uint16_t *pptr, ipsec_selector_t *sel, ip_stack_t *ipst)
6590 {
6591 	connf_t *connfp;
6592 	conn_t *connp = NULL;
6593 	const in6_addr_t *v6addrmatch = &sel->ips_local_addr_v6;
6594 
6595 	if (sel->ips_local_port == 0)
6596 		return (NULL);
6597 
6598 	connfp = &ipst->ips_ipcl_bind_fanout[
6599 	    IPCL_BIND_HASH(sel->ips_local_port, ipst)];
6600 	mutex_enter(&connfp->connf_lock);
6601 
6602 	if (sel->ips_isv4) {
6603 		connp = connfp->connf_head;
6604 		while (connp != NULL) {
6605 			if (IPCL_BIND_MATCH(connp, IPPROTO_TCP,
6606 			    sel->ips_local_addr_v4, pptr[1]))
6607 				break;
6608 			connp = connp->conn_next;
6609 		}
6610 
6611 		if (connp == NULL) {
6612 			/* Match to all-zeroes. */
6613 			v6addrmatch = &ipv6_all_zeros;
6614 		}
6615 	}
6616 
6617 	if (connp == NULL) {
6618 		connp = connfp->connf_head;
6619 		while (connp != NULL) {
6620 			if (IPCL_BIND_MATCH_V6(connp, IPPROTO_TCP,
6621 			    *v6addrmatch, pptr[1]))
6622 				break;
6623 			connp = connp->conn_next;
6624 		}
6625 
6626 		if (connp == NULL) {
6627 			mutex_exit(&connfp->connf_lock);
6628 			return (NULL);
6629 		}
6630 	}
6631 
6632 	CONN_INC_REF(connp);
6633 	mutex_exit(&connfp->connf_lock);
6634 	return (connp);
6635 }
6636 
6637 static void
6638 ipsec_tcp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp, ip_stack_t *ipst)
6639 {
6640 	connf_t		*connfp;
6641 	conn_t		*connp;
6642 	uint32_t	ports;
6643 	uint16_t	*pptr = (uint16_t *)&ports;
6644 
6645 	/*
6646 	 * Find TCP state in the following order:
6647 	 * 1.) Connected conns.
6648 	 * 2.) Listeners.
6649 	 *
6650 	 * Even though #2 will be the common case for inbound traffic, only
6651 	 * following this order insures correctness.
6652 	 */
6653 
6654 	if (sel->ips_local_port == 0)
6655 		return;
6656 
6657 	/*
6658 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6659 	 * See ipsec_construct_inverse_acquire() for details.
6660 	 */
6661 	pptr[0] = sel->ips_remote_port;
6662 	pptr[1] = sel->ips_local_port;
6663 
6664 	connfp = &ipst->ips_ipcl_conn_fanout[
6665 	    IPCL_CONN_HASH(sel->ips_remote_addr_v4, ports, ipst)];
6666 	mutex_enter(&connfp->connf_lock);
6667 	connp = connfp->connf_head;
6668 
6669 	if (sel->ips_isv4) {
6670 		while (connp != NULL) {
6671 			if (IPCL_CONN_MATCH(connp, IPPROTO_TCP,
6672 			    sel->ips_remote_addr_v4, sel->ips_local_addr_v4,
6673 			    ports))
6674 				break;
6675 			connp = connp->conn_next;
6676 		}
6677 	} else {
6678 		while (connp != NULL) {
6679 			if (IPCL_CONN_MATCH_V6(connp, IPPROTO_TCP,
6680 			    sel->ips_remote_addr_v6, sel->ips_local_addr_v6,
6681 			    ports))
6682 				break;
6683 			connp = connp->conn_next;
6684 		}
6685 	}
6686 
6687 	if (connp != NULL) {
6688 		CONN_INC_REF(connp);
6689 		mutex_exit(&connfp->connf_lock);
6690 	} else {
6691 		mutex_exit(&connfp->connf_lock);
6692 
6693 		/* Try the listen hash. */
6694 		if ((connp = ipsec_find_listen_conn(pptr, sel, ipst)) == NULL)
6695 			return;
6696 	}
6697 
6698 	ipsec_conn_pol(sel, connp, ppp);
6699 	CONN_DEC_REF(connp);
6700 }
6701 
6702 static void
6703 ipsec_sctp_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6704     ip_stack_t *ipst)
6705 {
6706 	conn_t		*connp;
6707 	uint32_t	ports;
6708 	uint16_t	*pptr = (uint16_t *)&ports;
6709 
6710 	/*
6711 	 * Find SCP state in the following order:
6712 	 * 1.) Connected conns.
6713 	 * 2.) Listeners.
6714 	 *
6715 	 * Even though #2 will be the common case for inbound traffic, only
6716 	 * following this order insures correctness.
6717 	 */
6718 
6719 	if (sel->ips_local_port == 0)
6720 		return;
6721 
6722 	/*
6723 	 * 0 should be fport, 1 should be lport.  SRC is the local one here.
6724 	 * See ipsec_construct_inverse_acquire() for details.
6725 	 */
6726 	pptr[0] = sel->ips_remote_port;
6727 	pptr[1] = sel->ips_local_port;
6728 
6729 	/*
6730 	 * For labeled systems, there's no need to check the
6731 	 * label here.  It's known to be good as we checked
6732 	 * before allowing the connection to become bound.
6733 	 */
6734 	if (sel->ips_isv4) {
6735 		in6_addr_t	src, dst;
6736 
6737 		IN6_IPADDR_TO_V4MAPPED(sel->ips_remote_addr_v4, &dst);
6738 		IN6_IPADDR_TO_V4MAPPED(sel->ips_local_addr_v4, &src);
6739 		connp = sctp_find_conn(&dst, &src, ports, ALL_ZONES,
6740 		    0, ipst->ips_netstack->netstack_sctp);
6741 	} else {
6742 		connp = sctp_find_conn(&sel->ips_remote_addr_v6,
6743 		    &sel->ips_local_addr_v6, ports, ALL_ZONES,
6744 		    0, ipst->ips_netstack->netstack_sctp);
6745 	}
6746 	if (connp == NULL)
6747 		return;
6748 	ipsec_conn_pol(sel, connp, ppp);
6749 	CONN_DEC_REF(connp);
6750 }
6751 
6752 /*
6753  * Fill in a query for the SPD (in "sel") using two PF_KEY address extensions.
6754  * Returns 0 or errno, and always sets *diagnostic to something appropriate
6755  * to PF_KEY.
6756  *
6757  * NOTE:  For right now, this function (and ipsec_selector_t for that matter),
6758  * ignore prefix lengths in the address extension.  Since we match on first-
6759  * entered policies, this shouldn't matter.  Also, since we normalize prefix-
6760  * set addresses to mask out the lower bits, we should get a suitable search
6761  * key for the SPD anyway.  This is the function to change if the assumption
6762  * about suitable search keys is wrong.
6763  */
6764 static int
6765 ipsec_get_inverse_acquire_sel(ipsec_selector_t *sel, sadb_address_t *srcext,
6766     sadb_address_t *dstext, int *diagnostic)
6767 {
6768 	struct sockaddr_in *src, *dst;
6769 	struct sockaddr_in6 *src6, *dst6;
6770 
6771 	*diagnostic = 0;
6772 
6773 	bzero(sel, sizeof (*sel));
6774 	sel->ips_protocol = srcext->sadb_address_proto;
6775 	dst = (struct sockaddr_in *)(dstext + 1);
6776 	if (dst->sin_family == AF_INET6) {
6777 		dst6 = (struct sockaddr_in6 *)dst;
6778 		src6 = (struct sockaddr_in6 *)(srcext + 1);
6779 		if (src6->sin6_family != AF_INET6) {
6780 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6781 			return (EINVAL);
6782 		}
6783 		sel->ips_remote_addr_v6 = dst6->sin6_addr;
6784 		sel->ips_local_addr_v6 = src6->sin6_addr;
6785 		if (sel->ips_protocol == IPPROTO_ICMPV6) {
6786 			sel->ips_is_icmp_inv_acq = 1;
6787 		} else {
6788 			sel->ips_remote_port = dst6->sin6_port;
6789 			sel->ips_local_port = src6->sin6_port;
6790 		}
6791 		sel->ips_isv4 = B_FALSE;
6792 	} else {
6793 		src = (struct sockaddr_in *)(srcext + 1);
6794 		if (src->sin_family != AF_INET) {
6795 			*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
6796 			return (EINVAL);
6797 		}
6798 		sel->ips_remote_addr_v4 = dst->sin_addr.s_addr;
6799 		sel->ips_local_addr_v4 = src->sin_addr.s_addr;
6800 		if (sel->ips_protocol == IPPROTO_ICMP) {
6801 			sel->ips_is_icmp_inv_acq = 1;
6802 		} else {
6803 			sel->ips_remote_port = dst->sin_port;
6804 			sel->ips_local_port = src->sin_port;
6805 		}
6806 		sel->ips_isv4 = B_TRUE;
6807 	}
6808 	return (0);
6809 }
6810 
6811 /*
6812  * We have encapsulation.
6813  * - Lookup tun_t by address and look for an associated
6814  *   tunnel policy
6815  * - If there are inner selectors
6816  *   - check ITPF_P_TUNNEL and ITPF_P_ACTIVE
6817  *   - Look up tunnel policy based on selectors
6818  * - Else
6819  *   - Sanity check the negotation
6820  *   - If appropriate, fall through to global policy
6821  */
6822 static int
6823 ipsec_tun_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6824     sadb_address_t *innsrcext, sadb_address_t *inndstext, ipsec_tun_pol_t *itp,
6825     int *diagnostic)
6826 {
6827 	int err;
6828 	ipsec_policy_head_t *polhead;
6829 
6830 	*diagnostic = 0;
6831 
6832 	/* Check for inner selectors and act appropriately */
6833 
6834 	if (innsrcext != NULL) {
6835 		/* Inner selectors present */
6836 		ASSERT(inndstext != NULL);
6837 		if ((itp == NULL) ||
6838 		    (itp->itp_flags & (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) !=
6839 		    (ITPF_P_ACTIVE | ITPF_P_TUNNEL)) {
6840 			/*
6841 			 * If inner packet selectors, we must have negotiate
6842 			 * tunnel and active policy.  If the tunnel has
6843 			 * transport-mode policy set on it, or has no policy,
6844 			 * fail.
6845 			 */
6846 			return (ENOENT);
6847 		} else {
6848 			/*
6849 			 * Reset "sel" to indicate inner selectors.  Pass
6850 			 * inner PF_KEY address extensions for this to happen.
6851 			 */
6852 			if ((err = ipsec_get_inverse_acquire_sel(sel,
6853 			    innsrcext, inndstext, diagnostic)) != 0)
6854 				return (err);
6855 			/*
6856 			 * Now look for a tunnel policy based on those inner
6857 			 * selectors.  (Common code is below.)
6858 			 */
6859 		}
6860 	} else {
6861 		/* No inner selectors present */
6862 		if ((itp == NULL) || !(itp->itp_flags & ITPF_P_ACTIVE)) {
6863 			/*
6864 			 * Transport mode negotiation with no tunnel policy
6865 			 * configured - return to indicate a global policy
6866 			 * check is needed.
6867 			 */
6868 			return (0);
6869 		} else if (itp->itp_flags & ITPF_P_TUNNEL) {
6870 			/* Tunnel mode set with no inner selectors. */
6871 			return (ENOENT);
6872 		}
6873 		/*
6874 		 * Else, this is a tunnel policy configured with ifconfig(8)
6875 		 * or "negotiate transport" with ipsecconf(8).  We have an
6876 		 * itp with policy set based on any match, so don't bother
6877 		 * changing fields in "sel".
6878 		 */
6879 	}
6880 
6881 	ASSERT(itp != NULL);
6882 	polhead = itp->itp_policy;
6883 	ASSERT(polhead != NULL);
6884 	rw_enter(&polhead->iph_lock, RW_READER);
6885 	*ppp = ipsec_find_policy_head(NULL, polhead, IPSEC_TYPE_INBOUND, sel);
6886 	rw_exit(&polhead->iph_lock);
6887 
6888 	/*
6889 	 * Don't default to global if we didn't find a matching policy entry.
6890 	 * Instead, send ENOENT, just like if we hit a transport-mode tunnel.
6891 	 */
6892 	if (*ppp == NULL)
6893 		return (ENOENT);
6894 
6895 	return (0);
6896 }
6897 
6898 /*
6899  * For sctp conn_faddr is the primary address, hence this is of limited
6900  * use for sctp.
6901  */
6902 static void
6903 ipsec_oth_pol(ipsec_selector_t *sel, ipsec_policy_t **ppp,
6904     ip_stack_t *ipst)
6905 {
6906 	boolean_t	isv4 = sel->ips_isv4;
6907 	connf_t		*connfp;
6908 	conn_t		*connp;
6909 
6910 	if (isv4) {
6911 		connfp = &ipst->ips_ipcl_proto_fanout_v4[sel->ips_protocol];
6912 	} else {
6913 		connfp = &ipst->ips_ipcl_proto_fanout_v6[sel->ips_protocol];
6914 	}
6915 
6916 	mutex_enter(&connfp->connf_lock);
6917 	for (connp = connfp->connf_head; connp != NULL;
6918 	    connp = connp->conn_next) {
6919 		if (isv4) {
6920 			if ((connp->conn_laddr_v4 == INADDR_ANY ||
6921 			    connp->conn_laddr_v4 == sel->ips_local_addr_v4) &&
6922 			    (connp->conn_faddr_v4 == INADDR_ANY ||
6923 			    connp->conn_faddr_v4 == sel->ips_remote_addr_v4))
6924 				break;
6925 		} else {
6926 			if ((IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6) ||
6927 			    IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
6928 			    &sel->ips_local_addr_v6)) &&
6929 			    (IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) ||
6930 			    IN6_ARE_ADDR_EQUAL(&connp->conn_faddr_v6,
6931 			    &sel->ips_remote_addr_v6)))
6932 				break;
6933 		}
6934 	}
6935 	if (connp == NULL) {
6936 		mutex_exit(&connfp->connf_lock);
6937 		return;
6938 	}
6939 
6940 	CONN_INC_REF(connp);
6941 	mutex_exit(&connfp->connf_lock);
6942 
6943 	ipsec_conn_pol(sel, connp, ppp);
6944 	CONN_DEC_REF(connp);
6945 }
6946 
6947 /*
6948  * Construct an inverse ACQUIRE reply based on:
6949  *
6950  * 1.) Current global policy.
6951  * 2.) An conn_t match depending on what all was passed in the extv[].
6952  * 3.) A tunnel's policy head.
6953  * ...
6954  * N.) Other stuff TBD (e.g. identities)
6955  *
6956  * If there is an error, set sadb_msg_errno and sadb_x_msg_diagnostic
6957  * in this function so the caller can extract them where appropriately.
6958  *
6959  * The SRC address is the local one - just like an outbound ACQUIRE message.
6960  *
6961  * XXX MLS: key management supplies a label which we just reflect back up
6962  * again.  clearly we need to involve the label in the rest of the checks.
6963  */
6964 mblk_t *
6965 ipsec_construct_inverse_acquire(sadb_msg_t *samsg, sadb_ext_t *extv[],
6966     netstack_t *ns)
6967 {
6968 	int err;
6969 	int diagnostic;
6970 	sadb_address_t *srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC],
6971 	    *dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST],
6972 	    *innsrcext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC],
6973 	    *inndstext = (sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST];
6974 	sadb_sens_t *sens = (sadb_sens_t *)extv[SADB_EXT_SENSITIVITY];
6975 	struct sockaddr_in6 *src, *dst;
6976 	struct sockaddr_in6 *isrc, *idst;
6977 	ipsec_tun_pol_t *itp = NULL;
6978 	ipsec_policy_t *pp = NULL;
6979 	ipsec_selector_t sel, isel;
6980 	mblk_t *retmp = NULL;
6981 	ip_stack_t	*ipst = ns->netstack_ip;
6982 
6983 
6984 	/* Normalize addresses */
6985 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)srcext, 0, ns)
6986 	    == KS_IN_ADDR_UNKNOWN) {
6987 		err = EINVAL;
6988 		diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC;
6989 		goto bail;
6990 	}
6991 	src = (struct sockaddr_in6 *)(srcext + 1);
6992 	if (sadb_addrcheck(NULL, (mblk_t *)samsg, (sadb_ext_t *)dstext, 0, ns)
6993 	    == KS_IN_ADDR_UNKNOWN) {
6994 		err = EINVAL;
6995 		diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
6996 		goto bail;
6997 	}
6998 	dst = (struct sockaddr_in6 *)(dstext + 1);
6999 	if (src->sin6_family != dst->sin6_family) {
7000 		err = EINVAL;
7001 		diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
7002 		goto bail;
7003 	}
7004 
7005 	/* Check for tunnel mode and act appropriately */
7006 	if (innsrcext != NULL) {
7007 		if (inndstext == NULL) {
7008 			err = EINVAL;
7009 			diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
7010 			goto bail;
7011 		}
7012 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
7013 		    (sadb_ext_t *)innsrcext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
7014 			err = EINVAL;
7015 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
7016 			goto bail;
7017 		}
7018 		isrc = (struct sockaddr_in6 *)(innsrcext + 1);
7019 		if (sadb_addrcheck(NULL, (mblk_t *)samsg,
7020 		    (sadb_ext_t *)inndstext, 0, ns) == KS_IN_ADDR_UNKNOWN) {
7021 			err = EINVAL;
7022 			diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
7023 			goto bail;
7024 		}
7025 		idst = (struct sockaddr_in6 *)(inndstext + 1);
7026 		if (isrc->sin6_family != idst->sin6_family) {
7027 			err = EINVAL;
7028 			diagnostic = SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
7029 			goto bail;
7030 		}
7031 		if (isrc->sin6_family != AF_INET &&
7032 		    isrc->sin6_family != AF_INET6) {
7033 			err = EINVAL;
7034 			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_SRC_AF;
7035 			goto bail;
7036 		}
7037 	} else if (inndstext != NULL) {
7038 		err = EINVAL;
7039 		diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
7040 		goto bail;
7041 	}
7042 
7043 	/* Get selectors first, based on outer addresses */
7044 	err = ipsec_get_inverse_acquire_sel(&sel, srcext, dstext, &diagnostic);
7045 	if (err != 0)
7046 		goto bail;
7047 
7048 	/* Check for tunnel mode mismatches. */
7049 	if (innsrcext != NULL &&
7050 	    ((isrc->sin6_family == AF_INET &&
7051 	    sel.ips_protocol != IPPROTO_ENCAP && sel.ips_protocol != 0) ||
7052 	    (isrc->sin6_family == AF_INET6 &&
7053 	    sel.ips_protocol != IPPROTO_IPV6 && sel.ips_protocol != 0))) {
7054 		err = EPROTOTYPE;
7055 		goto bail;
7056 	}
7057 
7058 	/*
7059 	 * Okay, we have the addresses and other selector information.
7060 	 * Let's first find a conn...
7061 	 */
7062 	pp = NULL;
7063 	switch (sel.ips_protocol) {
7064 	case IPPROTO_TCP:
7065 		ipsec_tcp_pol(&sel, &pp, ipst);
7066 		break;
7067 	case IPPROTO_UDP:
7068 		ipsec_udp_pol(&sel, &pp, ipst);
7069 		break;
7070 	case IPPROTO_SCTP:
7071 		ipsec_sctp_pol(&sel, &pp, ipst);
7072 		break;
7073 	case IPPROTO_ENCAP:
7074 	case IPPROTO_IPV6:
7075 		/*
7076 		 * Assume sel.ips_remote_addr_* has the right address at
7077 		 * that exact position.
7078 		 */
7079 		itp = itp_get_byaddr((uint32_t *)(&sel.ips_local_addr_v6),
7080 		    (uint32_t *)(&sel.ips_remote_addr_v6), src->sin6_family,
7081 		    ipst);
7082 
7083 		if (innsrcext == NULL) {
7084 			/*
7085 			 * Transport-mode tunnel, make sure we fake out isel
7086 			 * to contain something based on the outer protocol.
7087 			 */
7088 			bzero(&isel, sizeof (isel));
7089 			isel.ips_isv4 = (sel.ips_protocol == IPPROTO_ENCAP);
7090 		} /* Else isel is initialized by ipsec_tun_pol(). */
7091 		err = ipsec_tun_pol(&isel, &pp, innsrcext, inndstext, itp,
7092 		    &diagnostic);
7093 		/*
7094 		 * NOTE:  isel isn't used for now, but in RFC 430x IPsec, it
7095 		 * may be.
7096 		 */
7097 		if (err != 0)
7098 			goto bail;
7099 		break;
7100 	default:
7101 		ipsec_oth_pol(&sel, &pp, ipst);
7102 		break;
7103 	}
7104 
7105 	/*
7106 	 * If we didn't find a matching conn_t or other policy head, take a
7107 	 * look in the global policy.
7108 	 */
7109 	if (pp == NULL) {
7110 		pp = ipsec_find_policy(IPSEC_TYPE_OUTBOUND, NULL, &sel, ns);
7111 		if (pp == NULL) {
7112 			/* There's no global policy. */
7113 			err = ENOENT;
7114 			diagnostic = 0;
7115 			goto bail;
7116 		}
7117 	}
7118 
7119 	ASSERT(pp != NULL);
7120 	retmp = sadb_acquire_msg_base(0, 0, samsg->sadb_msg_seq,
7121 	    samsg->sadb_msg_pid);
7122 	if (retmp != NULL) {
7123 		/* Remove KEYSOCK_OUT, because caller constructs it instead. */
7124 		mblk_t *kso = retmp;
7125 
7126 		retmp = retmp->b_cont;
7127 		freeb(kso);
7128 		/* Append addresses... */
7129 		retmp->b_cont = sadb_acquire_msg_common(&sel, pp, NULL,
7130 		    (itp != NULL && (itp->itp_flags & ITPF_P_TUNNEL)), NULL,
7131 		    sens);
7132 		if (retmp->b_cont == NULL) {
7133 			freemsg(retmp);
7134 			retmp = NULL;
7135 		}
7136 		/* And the policy result. */
7137 		retmp->b_cont->b_cont =
7138 		    sadb_acquire_extended_prop(pp->ipsp_act, ns);
7139 		if (retmp->b_cont->b_cont == NULL) {
7140 			freemsg(retmp);
7141 			retmp = NULL;
7142 		}
7143 		((sadb_msg_t *)retmp->b_rptr)->sadb_msg_len =
7144 		    SADB_8TO64(msgsize(retmp));
7145 	}
7146 
7147 	if (pp != NULL) {
7148 		IPPOL_REFRELE(pp);
7149 	}
7150 	ASSERT(err == 0 && diagnostic == 0);
7151 	if (retmp == NULL)
7152 		err = ENOMEM;
7153 bail:
7154 	if (itp != NULL) {
7155 		ITP_REFRELE(itp, ns);
7156 	}
7157 	samsg->sadb_msg_errno = (uint8_t)err;
7158 	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
7159 	return (retmp);
7160 }
7161 
7162 /*
7163  * ipsa_lpkt is a one-element queue, only manipulated by the next two
7164  * functions.  They have to hold the ipsa_lock because of potential races
7165  * between key management using SADB_UPDATE, and inbound packets that may
7166  * queue up on the larval SA (hence the 'l' in "lpkt").
7167  */
7168 
7169 /*
7170  * sadb_set_lpkt:
7171  *
7172  * Returns the passed-in packet if the SA is no longer larval.
7173  *
7174  * Returns NULL if the SA is larval, and needs to be swapped into the SA for
7175  * processing after an SADB_UPDATE.
7176  */
7177 mblk_t *
7178 sadb_set_lpkt(ipsa_t *ipsa, mblk_t *npkt, ip_recv_attr_t *ira)
7179 {
7180 	mblk_t		*opkt;
7181 
7182 	mutex_enter(&ipsa->ipsa_lock);
7183 	opkt = ipsa->ipsa_lpkt;
7184 	if (ipsa->ipsa_state == IPSA_STATE_LARVAL) {
7185 		/*
7186 		 * Consume npkt and place it in the LARVAL SA's inbound
7187 		 * packet slot.
7188 		 */
7189 		mblk_t	*attrmp;
7190 
7191 		attrmp = ip_recv_attr_to_mblk(ira);
7192 		if (attrmp == NULL) {
7193 			ill_t *ill = ira->ira_ill;
7194 
7195 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsInDiscards);
7196 			ip_drop_input("ipIfStatsInDiscards", npkt, ill);
7197 			freemsg(npkt);
7198 			opkt = NULL;
7199 		} else {
7200 			ASSERT(attrmp->b_cont == NULL);
7201 			attrmp->b_cont = npkt;
7202 			ipsa->ipsa_lpkt = attrmp;
7203 		}
7204 		npkt = NULL;
7205 	} else {
7206 		/*
7207 		 * If not larval, we lost the race.  NOTE: ipsa_lpkt may still
7208 		 * have been non-NULL in the non-larval case, because of
7209 		 * inbound packets arriving prior to sadb_common_add()
7210 		 * transferring the SA completely out of larval state, but
7211 		 * after lpkt was grabbed by the AH/ESP-specific add routines.
7212 		 * We should clear the old ipsa_lpkt in this case to make sure
7213 		 * that it doesn't linger on the now-MATURE IPsec SA, or get
7214 		 * picked up as an out-of-order packet.
7215 		 */
7216 		ipsa->ipsa_lpkt = NULL;
7217 	}
7218 	mutex_exit(&ipsa->ipsa_lock);
7219 
7220 	if (opkt != NULL) {
7221 		ipsec_stack_t	*ipss;
7222 
7223 		ipss = ira->ira_ill->ill_ipst->ips_netstack->netstack_ipsec;
7224 		opkt = ip_recv_attr_free_mblk(opkt);
7225 		ip_drop_packet(opkt, B_TRUE, ira->ira_ill,
7226 		    DROPPER(ipss, ipds_sadb_inlarval_replace),
7227 		    &ipss->ipsec_sadb_dropper);
7228 	}
7229 	return (npkt);
7230 }
7231 
7232 /*
7233  * sadb_clear_lpkt: Atomically clear ipsa->ipsa_lpkt and return the
7234  * previous value.
7235  */
7236 mblk_t *
7237 sadb_clear_lpkt(ipsa_t *ipsa)
7238 {
7239 	mblk_t *opkt;
7240 
7241 	mutex_enter(&ipsa->ipsa_lock);
7242 	opkt = ipsa->ipsa_lpkt;
7243 	ipsa->ipsa_lpkt = NULL;
7244 	mutex_exit(&ipsa->ipsa_lock);
7245 	return (opkt);
7246 }
7247 
7248 /*
7249  * Buffer a packet that's in IDLE state as set by Solaris Clustering.
7250  */
7251 void
7252 sadb_buf_pkt(ipsa_t *ipsa, mblk_t *bpkt, ip_recv_attr_t *ira)
7253 {
7254 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
7255 	ipsec_stack_t   *ipss = ns->netstack_ipsec;
7256 	in6_addr_t *srcaddr = (in6_addr_t *)(&ipsa->ipsa_srcaddr);
7257 	in6_addr_t *dstaddr = (in6_addr_t *)(&ipsa->ipsa_dstaddr);
7258 	mblk_t		*mp;
7259 
7260 	ASSERT(ipsa->ipsa_state == IPSA_STATE_IDLE);
7261 
7262 	if (cl_inet_idlesa == NULL) {
7263 		ip_drop_packet(bpkt, B_TRUE, ira->ira_ill,
7264 		    DROPPER(ipss, ipds_sadb_inidle_overflow),
7265 		    &ipss->ipsec_sadb_dropper);
7266 		return;
7267 	}
7268 
7269 	cl_inet_idlesa(ns->netstack_stackid,
7270 	    (ipsa->ipsa_type == SADB_SATYPE_AH) ? IPPROTO_AH : IPPROTO_ESP,
7271 	    ipsa->ipsa_spi, ipsa->ipsa_addrfam, *srcaddr, *dstaddr, NULL);
7272 
7273 	mp = ip_recv_attr_to_mblk(ira);
7274 	if (mp == NULL) {
7275 		ip_drop_packet(bpkt, B_TRUE, ira->ira_ill,
7276 		    DROPPER(ipss, ipds_sadb_inidle_overflow),
7277 		    &ipss->ipsec_sadb_dropper);
7278 		return;
7279 	}
7280 	linkb(mp, bpkt);
7281 
7282 	mutex_enter(&ipsa->ipsa_lock);
7283 	ipsa->ipsa_mblkcnt++;
7284 	if (ipsa->ipsa_bpkt_head == NULL) {
7285 		ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_tail = bpkt;
7286 	} else {
7287 		ipsa->ipsa_bpkt_tail->b_next = bpkt;
7288 		ipsa->ipsa_bpkt_tail = bpkt;
7289 		if (ipsa->ipsa_mblkcnt > SADB_MAX_IDLEPKTS) {
7290 			mblk_t *tmp;
7291 
7292 			tmp = ipsa->ipsa_bpkt_head;
7293 			ipsa->ipsa_bpkt_head = ipsa->ipsa_bpkt_head->b_next;
7294 			tmp = ip_recv_attr_free_mblk(tmp);
7295 			ip_drop_packet(tmp, B_TRUE, NULL,
7296 			    DROPPER(ipss, ipds_sadb_inidle_overflow),
7297 			    &ipss->ipsec_sadb_dropper);
7298 			ipsa->ipsa_mblkcnt --;
7299 		}
7300 	}
7301 	mutex_exit(&ipsa->ipsa_lock);
7302 }
7303 
7304 /*
7305  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
7306  * and put into STREAMS again.
7307  */
7308 void
7309 sadb_clear_buf_pkt(void *ipkt)
7310 {
7311 	mblk_t	*tmp, *buf_pkt;
7312 	ip_recv_attr_t	iras;
7313 
7314 	buf_pkt = (mblk_t *)ipkt;
7315 
7316 	while (buf_pkt != NULL) {
7317 		mblk_t *data_mp;
7318 
7319 		tmp = buf_pkt->b_next;
7320 		buf_pkt->b_next = NULL;
7321 
7322 		data_mp = buf_pkt->b_cont;
7323 		buf_pkt->b_cont = NULL;
7324 		if (!ip_recv_attr_from_mblk(buf_pkt, &iras)) {
7325 			/* The ill or ip_stack_t disappeared on us. */
7326 			ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
7327 			freemsg(data_mp);
7328 		} else {
7329 			ip_input_post_ipsec(data_mp, &iras);
7330 		}
7331 		ira_cleanup(&iras, B_TRUE);
7332 		buf_pkt = tmp;
7333 	}
7334 }
7335 /*
7336  * Walker callback used by sadb_alg_update() to free/create crypto
7337  * context template when a crypto software provider is removed or
7338  * added.
7339  */
7340 
7341 struct sadb_update_alg_state {
7342 	ipsec_algtype_t alg_type;
7343 	uint8_t alg_id;
7344 	boolean_t is_added;
7345 	boolean_t async_auth;
7346 	boolean_t async_encr;
7347 };
7348 
7349 static void
7350 sadb_alg_update_cb(isaf_t *head, ipsa_t *entry, void *cookie)
7351 {
7352 	struct sadb_update_alg_state *update_state =
7353 	    (struct sadb_update_alg_state *)cookie;
7354 	crypto_ctx_template_t *ctx_tmpl = NULL;
7355 
7356 	ASSERT(MUTEX_HELD(&head->isaf_lock));
7357 
7358 	if (entry->ipsa_state == IPSA_STATE_LARVAL)
7359 		return;
7360 
7361 	mutex_enter(&entry->ipsa_lock);
7362 
7363 	if ((entry->ipsa_encr_alg != SADB_EALG_NONE && entry->ipsa_encr_alg !=
7364 	    SADB_EALG_NULL && update_state->async_encr) ||
7365 	    (entry->ipsa_auth_alg != SADB_AALG_NONE &&
7366 	    update_state->async_auth)) {
7367 		entry->ipsa_flags |= IPSA_F_ASYNC;
7368 	} else {
7369 		entry->ipsa_flags &= ~IPSA_F_ASYNC;
7370 	}
7371 
7372 	switch (update_state->alg_type) {
7373 	case IPSEC_ALG_AUTH:
7374 		if (entry->ipsa_auth_alg == update_state->alg_id)
7375 			ctx_tmpl = &entry->ipsa_authtmpl;
7376 		break;
7377 	case IPSEC_ALG_ENCR:
7378 		if (entry->ipsa_encr_alg == update_state->alg_id)
7379 			ctx_tmpl = &entry->ipsa_encrtmpl;
7380 		break;
7381 	default:
7382 		ctx_tmpl = NULL;
7383 	}
7384 
7385 	if (ctx_tmpl == NULL) {
7386 		mutex_exit(&entry->ipsa_lock);
7387 		return;
7388 	}
7389 
7390 	/*
7391 	 * The context template of the SA may be affected by the change
7392 	 * of crypto provider.
7393 	 */
7394 	if (update_state->is_added) {
7395 		/* create the context template if not already done */
7396 		if (*ctx_tmpl == NULL) {
7397 			(void) ipsec_create_ctx_tmpl(entry,
7398 			    update_state->alg_type);
7399 		}
7400 	} else {
7401 		/*
7402 		 * The crypto provider was removed. If the context template
7403 		 * exists but it is no longer valid, free it.
7404 		 */
7405 		if (*ctx_tmpl != NULL)
7406 			ipsec_destroy_ctx_tmpl(entry, update_state->alg_type);
7407 	}
7408 
7409 	mutex_exit(&entry->ipsa_lock);
7410 }
7411 
7412 /*
7413  * Invoked by IP when an software crypto provider has been updated, or if
7414  * the crypto synchrony changes.  The type and id of the corresponding
7415  * algorithm is passed as argument.  The type is set to ALL in the case of
7416  * a synchrony change.
7417  *
7418  * is_added is B_TRUE if the provider was added, B_FALSE if it was
7419  * removed. The function updates the SADB and free/creates the
7420  * context templates associated with SAs if needed.
7421  */
7422 
7423 #define	SADB_ALG_UPDATE_WALK(sadb, table) \
7424     sadb_walker((sadb).table, (sadb).sdb_hashsize, sadb_alg_update_cb, \
7425 	&update_state)
7426 
7427 void
7428 sadb_alg_update(ipsec_algtype_t alg_type, uint8_t alg_id, boolean_t is_added,
7429     netstack_t *ns)
7430 {
7431 	struct sadb_update_alg_state update_state;
7432 	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
7433 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
7434 	ipsec_stack_t *ipss = ns->netstack_ipsec;
7435 
7436 	update_state.alg_type = alg_type;
7437 	update_state.alg_id = alg_id;
7438 	update_state.is_added = is_added;
7439 	update_state.async_auth = ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
7440 	    IPSEC_ALGS_EXEC_ASYNC;
7441 	update_state.async_encr = ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
7442 	    IPSEC_ALGS_EXEC_ASYNC;
7443 
7444 	if (alg_type == IPSEC_ALG_AUTH || alg_type == IPSEC_ALG_ALL) {
7445 		/* walk the AH tables only for auth. algorithm changes */
7446 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_of);
7447 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v4, sdb_if);
7448 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_of);
7449 		SADB_ALG_UPDATE_WALK(ahstack->ah_sadb.s_v6, sdb_if);
7450 	}
7451 
7452 	/* walk the ESP tables */
7453 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_of);
7454 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v4, sdb_if);
7455 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_of);
7456 	SADB_ALG_UPDATE_WALK(espstack->esp_sadb.s_v6, sdb_if);
7457 }
7458 
7459 /*
7460  * Creates a context template for the specified SA. This function
7461  * is called when an SA is created and when a context template needs
7462  * to be created due to a change of software provider.
7463  */
7464 int
7465 ipsec_create_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7466 {
7467 	ipsec_alginfo_t *alg;
7468 	crypto_mechanism_t mech;
7469 	crypto_key_t *key;
7470 	crypto_ctx_template_t *sa_tmpl;
7471 	int rv;
7472 	ipsec_stack_t	*ipss = sa->ipsa_netstack->netstack_ipsec;
7473 
7474 	ASSERT(RW_READ_HELD(&ipss->ipsec_alg_lock));
7475 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7476 
7477 	/* get pointers to the algorithm info, context template, and key */
7478 	switch (alg_type) {
7479 	case IPSEC_ALG_AUTH:
7480 		key = &sa->ipsa_kcfauthkey;
7481 		sa_tmpl = &sa->ipsa_authtmpl;
7482 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_auth_alg];
7483 		break;
7484 	case IPSEC_ALG_ENCR:
7485 		key = &sa->ipsa_kcfencrkey;
7486 		sa_tmpl = &sa->ipsa_encrtmpl;
7487 		alg = ipss->ipsec_alglists[alg_type][sa->ipsa_encr_alg];
7488 		break;
7489 	default:
7490 		alg = NULL;
7491 	}
7492 
7493 	if (alg == NULL || !ALG_VALID(alg))
7494 		return (EINVAL);
7495 
7496 	/* initialize the mech info structure for the framework */
7497 	ASSERT(alg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
7498 	mech.cm_type = alg->alg_mech_type;
7499 	mech.cm_param = NULL;
7500 	mech.cm_param_len = 0;
7501 
7502 	/* create a new context template */
7503 	rv = crypto_create_ctx_template(&mech, key, sa_tmpl, KM_NOSLEEP);
7504 
7505 	/*
7506 	 * CRYPTO_MECH_NOT_SUPPORTED can be returned if only hardware
7507 	 * providers are available for that mechanism. In that case
7508 	 * we don't fail, and will generate the context template from
7509 	 * the framework callback when a software provider for that
7510 	 * mechanism registers.
7511 	 *
7512 	 * The context template is assigned the special value
7513 	 * IPSEC_CTX_TMPL_ALLOC if the allocation failed due to a
7514 	 * lack of memory. No attempt will be made to use
7515 	 * the context template if it is set to this value.
7516 	 */
7517 	if (rv == CRYPTO_HOST_MEMORY) {
7518 		*sa_tmpl = IPSEC_CTX_TMPL_ALLOC;
7519 	} else if (rv != CRYPTO_SUCCESS) {
7520 		*sa_tmpl = NULL;
7521 		if (rv != CRYPTO_MECH_NOT_SUPPORTED)
7522 			return (EINVAL);
7523 	}
7524 
7525 	return (0);
7526 }
7527 
7528 /*
7529  * Destroy the context template of the specified algorithm type
7530  * of the specified SA. Must be called while holding the SA lock.
7531  */
7532 void
7533 ipsec_destroy_ctx_tmpl(ipsa_t *sa, ipsec_algtype_t alg_type)
7534 {
7535 	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
7536 
7537 	if (alg_type == IPSEC_ALG_AUTH) {
7538 		if (sa->ipsa_authtmpl == IPSEC_CTX_TMPL_ALLOC)
7539 			sa->ipsa_authtmpl = NULL;
7540 		else if (sa->ipsa_authtmpl != NULL) {
7541 			crypto_destroy_ctx_template(sa->ipsa_authtmpl);
7542 			sa->ipsa_authtmpl = NULL;
7543 		}
7544 	} else {
7545 		ASSERT(alg_type == IPSEC_ALG_ENCR);
7546 		if (sa->ipsa_encrtmpl == IPSEC_CTX_TMPL_ALLOC)
7547 			sa->ipsa_encrtmpl = NULL;
7548 		else if (sa->ipsa_encrtmpl != NULL) {
7549 			crypto_destroy_ctx_template(sa->ipsa_encrtmpl);
7550 			sa->ipsa_encrtmpl = NULL;
7551 		}
7552 	}
7553 }
7554 
7555 /*
7556  * Use the kernel crypto framework to check the validity of a key received
7557  * via keysock. Returns 0 if the key is OK, -1 otherwise.
7558  */
7559 int
7560 ipsec_check_key(crypto_mech_type_t mech_type, sadb_key_t *sadb_key,
7561     boolean_t is_auth, int *diag)
7562 {
7563 	crypto_mechanism_t mech;
7564 	crypto_key_t crypto_key;
7565 	int crypto_rc;
7566 
7567 	mech.cm_type = mech_type;
7568 	mech.cm_param = NULL;
7569 	mech.cm_param_len = 0;
7570 
7571 	crypto_key.ck_format = CRYPTO_KEY_RAW;
7572 	crypto_key.ck_data = sadb_key + 1;
7573 	crypto_key.ck_length = sadb_key->sadb_key_bits;
7574 
7575 	crypto_rc = crypto_key_check(&mech, &crypto_key);
7576 
7577 	switch (crypto_rc) {
7578 	case CRYPTO_SUCCESS:
7579 		return (0);
7580 	case CRYPTO_MECHANISM_INVALID:
7581 	case CRYPTO_MECH_NOT_SUPPORTED:
7582 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AALG :
7583 		    SADB_X_DIAGNOSTIC_BAD_EALG;
7584 		break;
7585 	case CRYPTO_KEY_SIZE_RANGE:
7586 		*diag = is_auth ? SADB_X_DIAGNOSTIC_BAD_AKEYBITS :
7587 		    SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
7588 		break;
7589 	case CRYPTO_WEAK_KEY:
7590 		*diag = is_auth ? SADB_X_DIAGNOSTIC_WEAK_AKEY :
7591 		    SADB_X_DIAGNOSTIC_WEAK_EKEY;
7592 		break;
7593 	}
7594 
7595 	return (-1);
7596 }
7597 
7598 /*
7599  * Whack options in the outer IP header when ipsec changes the outer label
7600  *
7601  * This is inelegant and really could use refactoring.
7602  */
7603 mblk_t *
7604 sadb_whack_label_v4(mblk_t *mp, ipsa_t *assoc, kstat_named_t *counter,
7605     ipdropper_t *dropper)
7606 {
7607 	int delta;
7608 	int plen;
7609 	dblk_t *db;
7610 	int hlen;
7611 	uint8_t *opt_storage = assoc->ipsa_opt_storage;
7612 	ipha_t *ipha = (ipha_t *)mp->b_rptr;
7613 
7614 	plen = ntohs(ipha->ipha_length);
7615 
7616 	delta = tsol_remove_secopt(ipha, MBLKL(mp));
7617 	mp->b_wptr += delta;
7618 	plen += delta;
7619 
7620 	/* XXX XXX code copied from tsol_check_label */
7621 
7622 	/* Make sure we have room for the worst-case addition */
7623 	hlen = IPH_HDR_LENGTH(ipha) + opt_storage[IPOPT_OLEN];
7624 	hlen = (hlen + 3) & ~3;
7625 	if (hlen > IP_MAX_HDR_LENGTH)
7626 		hlen = IP_MAX_HDR_LENGTH;
7627 	hlen -= IPH_HDR_LENGTH(ipha);
7628 
7629 	db = mp->b_datap;
7630 	if ((db->db_ref != 1) || (mp->b_wptr + hlen > db->db_lim)) {
7631 		int copylen;
7632 		mblk_t *new_mp;
7633 
7634 		/* allocate enough to be meaningful, but not *too* much */
7635 		copylen = MBLKL(mp);
7636 		if (copylen > 256)
7637 			copylen = 256;
7638 		new_mp = allocb_tmpl(hlen + copylen +
7639 		    (mp->b_rptr - mp->b_datap->db_base), mp);
7640 
7641 		if (new_mp == NULL) {
7642 			ip_drop_packet(mp, B_FALSE, NULL, counter,  dropper);
7643 			return (NULL);
7644 		}
7645 
7646 		/* keep the bias */
7647 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
7648 		new_mp->b_wptr = new_mp->b_rptr + copylen;
7649 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
7650 		new_mp->b_cont = mp;
7651 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
7652 			new_mp->b_cont = mp->b_cont;
7653 			freeb(mp);
7654 		}
7655 		mp = new_mp;
7656 		ipha = (ipha_t *)mp->b_rptr;
7657 	}
7658 
7659 	delta = tsol_prepend_option(assoc->ipsa_opt_storage, ipha, MBLKL(mp));
7660 
7661 	ASSERT(delta != -1);
7662 
7663 	plen += delta;
7664 	mp->b_wptr += delta;
7665 
7666 	/*
7667 	 * Paranoia
7668 	 */
7669 	db = mp->b_datap;
7670 
7671 	ASSERT3P(mp->b_wptr, <=, db->db_lim);
7672 	ASSERT3P(mp->b_rptr, <=, db->db_lim);
7673 
7674 	ASSERT3P(mp->b_wptr, >=, db->db_base);
7675 	ASSERT3P(mp->b_rptr, >=, db->db_base);
7676 	/* End paranoia */
7677 
7678 	ipha->ipha_length = htons(plen);
7679 
7680 	return (mp);
7681 }
7682 
7683 mblk_t *
7684 sadb_whack_label_v6(mblk_t *mp, ipsa_t *assoc, kstat_named_t *counter,
7685     ipdropper_t *dropper)
7686 {
7687 	int delta;
7688 	int plen;
7689 	dblk_t *db;
7690 	int hlen;
7691 	uint8_t *opt_storage = assoc->ipsa_opt_storage;
7692 	uint_t sec_opt_len; /* label option length not including type, len */
7693 	ip6_t *ip6h = (ip6_t *)mp->b_rptr;
7694 
7695 	plen = ntohs(ip6h->ip6_plen);
7696 
7697 	delta = tsol_remove_secopt_v6(ip6h, MBLKL(mp));
7698 	mp->b_wptr += delta;
7699 	plen += delta;
7700 
7701 	/* XXX XXX code copied from tsol_check_label_v6 */
7702 	/*
7703 	 * Make sure we have room for the worst-case addition. Add 2 bytes for
7704 	 * the hop-by-hop ext header's next header and length fields. Add
7705 	 * another 2 bytes for the label option type, len and then round
7706 	 * up to the next 8-byte multiple.
7707 	 */
7708 	sec_opt_len = opt_storage[1];
7709 
7710 	db = mp->b_datap;
7711 	hlen = (4 + sec_opt_len + 7) & ~7;
7712 
7713 	if ((db->db_ref != 1) || (mp->b_wptr + hlen > db->db_lim)) {
7714 		int copylen;
7715 		mblk_t *new_mp;
7716 		uint16_t hdr_len;
7717 
7718 		hdr_len = ip_hdr_length_v6(mp, ip6h);
7719 		/*
7720 		 * Allocate enough to be meaningful, but not *too* much.
7721 		 * Also all the IPv6 extension headers must be in the same mblk
7722 		 */
7723 		copylen = MBLKL(mp);
7724 		if (copylen > 256)
7725 			copylen = 256;
7726 		if (copylen < hdr_len)
7727 			copylen = hdr_len;
7728 		new_mp = allocb_tmpl(hlen + copylen +
7729 		    (mp->b_rptr - mp->b_datap->db_base), mp);
7730 		if (new_mp == NULL) {
7731 			ip_drop_packet(mp, B_FALSE, NULL, counter,  dropper);
7732 			return (NULL);
7733 		}
7734 
7735 		/* keep the bias */
7736 		new_mp->b_rptr += mp->b_rptr - mp->b_datap->db_base;
7737 		new_mp->b_wptr = new_mp->b_rptr + copylen;
7738 		bcopy(mp->b_rptr, new_mp->b_rptr, copylen);
7739 		new_mp->b_cont = mp;
7740 		if ((mp->b_rptr += copylen) >= mp->b_wptr) {
7741 			new_mp->b_cont = mp->b_cont;
7742 			freeb(mp);
7743 		}
7744 		mp = new_mp;
7745 		ip6h = (ip6_t *)mp->b_rptr;
7746 	}
7747 
7748 	delta = tsol_prepend_option_v6(assoc->ipsa_opt_storage,
7749 	    ip6h, MBLKL(mp));
7750 
7751 	ASSERT(delta != -1);
7752 
7753 	plen += delta;
7754 	mp->b_wptr += delta;
7755 
7756 	/*
7757 	 * Paranoia
7758 	 */
7759 	db = mp->b_datap;
7760 
7761 	ASSERT3P(mp->b_wptr, <=, db->db_lim);
7762 	ASSERT3P(mp->b_rptr, <=, db->db_lim);
7763 
7764 	ASSERT3P(mp->b_wptr, >=, db->db_base);
7765 	ASSERT3P(mp->b_rptr, >=, db->db_base);
7766 	/* End paranoia */
7767 
7768 	ip6h->ip6_plen = htons(plen);
7769 
7770 	return (mp);
7771 }
7772 
7773 /* Whack the labels and update ip_xmit_attr_t as needed */
7774 mblk_t *
7775 sadb_whack_label(mblk_t *mp, ipsa_t *assoc, ip_xmit_attr_t *ixa,
7776     kstat_named_t *counter, ipdropper_t *dropper)
7777 {
7778 	int adjust;
7779 	int iplen;
7780 
7781 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
7782 		ipha_t		*ipha = (ipha_t *)mp->b_rptr;
7783 
7784 		ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
7785 		iplen = ntohs(ipha->ipha_length);
7786 		mp = sadb_whack_label_v4(mp, assoc, counter, dropper);
7787 		if (mp == NULL)
7788 			return (NULL);
7789 
7790 		ipha = (ipha_t *)mp->b_rptr;
7791 		ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
7792 		adjust = (int)ntohs(ipha->ipha_length) - iplen;
7793 	} else {
7794 		ip6_t		*ip6h = (ip6_t *)mp->b_rptr;
7795 
7796 		ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
7797 		iplen = ntohs(ip6h->ip6_plen);
7798 		mp = sadb_whack_label_v6(mp, assoc, counter, dropper);
7799 		if (mp == NULL)
7800 			return (NULL);
7801 
7802 		ip6h = (ip6_t *)mp->b_rptr;
7803 		ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
7804 		adjust = (int)ntohs(ip6h->ip6_plen) - iplen;
7805 	}
7806 	ixa->ixa_pktlen += adjust;
7807 	ixa->ixa_ip_hdr_length += adjust;
7808 	return (mp);
7809 }
7810 
7811 /*
7812  * If this is an outgoing SA then add some fuzz to the
7813  * SOFT EXPIRE time. The reason for this is to stop
7814  * peers trying to renegotiate SOFT expiring SA's at
7815  * the same time. The amount of fuzz needs to be at
7816  * least 8 seconds which is the typical interval
7817  * sadb_ager(), although this is only a guide as it
7818  * selftunes.
7819  */
7820 static void
7821 lifetime_fuzz(ipsa_t *assoc)
7822 {
7823 	uint8_t rnd;
7824 
7825 	if (assoc->ipsa_softaddlt == 0)
7826 		return;
7827 
7828 	(void) random_get_pseudo_bytes(&rnd, sizeof (rnd));
7829 	rnd = (rnd & 0xF) + 8;
7830 	assoc->ipsa_softexpiretime -= rnd;
7831 	assoc->ipsa_softaddlt -= rnd;
7832 }
7833 
7834 static void
7835 destroy_ipsa_pair(ipsap_t *ipsapp)
7836 {
7837 	/*
7838 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
7839 	 * them in { }.
7840 	 */
7841 	if (ipsapp->ipsap_sa_ptr != NULL) {
7842 		IPSA_REFRELE(ipsapp->ipsap_sa_ptr);
7843 	}
7844 	if (ipsapp->ipsap_psa_ptr != NULL) {
7845 		IPSA_REFRELE(ipsapp->ipsap_psa_ptr);
7846 	}
7847 	init_ipsa_pair(ipsapp);
7848 }
7849 
7850 static void
7851 init_ipsa_pair(ipsap_t *ipsapp)
7852 {
7853 	ipsapp->ipsap_bucket = NULL;
7854 	ipsapp->ipsap_sa_ptr = NULL;
7855 	ipsapp->ipsap_pbucket = NULL;
7856 	ipsapp->ipsap_psa_ptr = NULL;
7857 }
7858 
7859 /*
7860  * The sadb_ager() function walks through the hash tables of SA's and ages
7861  * them, if the SA expires as a result, its marked as DEAD and will be reaped
7862  * the next time sadb_ager() runs. SA's which are paired or have a peer (same
7863  * SA appears in both the inbound and outbound tables because its not possible
7864  * to determine its direction) are placed on a list when they expire. This is
7865  * to ensure that pair/peer SA's are reaped at the same time, even if they
7866  * expire at different times.
7867  *
7868  * This function is called twice by sadb_ager(), one after processing the
7869  * inbound table, then again after processing the outbound table.
7870  */
7871 void
7872 age_pair_peer_list(templist_t *haspeerlist, sadb_t *sp, boolean_t outbound)
7873 {
7874 	templist_t *listptr;
7875 	int outhash;
7876 	isaf_t *bucket;
7877 	boolean_t haspeer;
7878 	ipsa_t *peer_assoc, *dying;
7879 	/*
7880 	 * Haspeer cases will contain both IPv4 and IPv6.  This code
7881 	 * is address independent.
7882 	 */
7883 	while (haspeerlist != NULL) {
7884 		/* "dying" contains the SA that has a peer. */
7885 		dying = haspeerlist->ipsa;
7886 		haspeer = (dying->ipsa_haspeer);
7887 		listptr = haspeerlist;
7888 		haspeerlist = listptr->next;
7889 		kmem_free(listptr, sizeof (*listptr));
7890 		/*
7891 		 * Pick peer bucket based on addrfam.
7892 		 */
7893 		if (outbound) {
7894 			if (haspeer)
7895 				bucket = INBOUND_BUCKET(sp, dying->ipsa_spi);
7896 			else
7897 				bucket = INBOUND_BUCKET(sp,
7898 				    dying->ipsa_otherspi);
7899 		} else { /* inbound */
7900 			if (haspeer) {
7901 				if (dying->ipsa_addrfam == AF_INET6) {
7902 					outhash = OUTBOUND_HASH_V6(sp,
7903 					    *((in6_addr_t *)&dying->
7904 					    ipsa_dstaddr));
7905 				} else {
7906 					outhash = OUTBOUND_HASH_V4(sp,
7907 					    *((ipaddr_t *)&dying->
7908 					    ipsa_dstaddr));
7909 				}
7910 			} else if (dying->ipsa_addrfam == AF_INET6) {
7911 				outhash = OUTBOUND_HASH_V6(sp,
7912 				    *((in6_addr_t *)&dying->
7913 				    ipsa_srcaddr));
7914 			} else {
7915 				outhash = OUTBOUND_HASH_V4(sp,
7916 				    *((ipaddr_t *)&dying->
7917 				    ipsa_srcaddr));
7918 			}
7919 			bucket = &(sp->sdb_of[outhash]);
7920 		}
7921 
7922 		mutex_enter(&bucket->isaf_lock);
7923 		/*
7924 		 * "haspeer" SA's have the same src/dst address ordering,
7925 		 * "paired" SA's have the src/dst addresses reversed.
7926 		 */
7927 		if (haspeer) {
7928 			peer_assoc = ipsec_getassocbyspi(bucket,
7929 			    dying->ipsa_spi, dying->ipsa_srcaddr,
7930 			    dying->ipsa_dstaddr, dying->ipsa_addrfam);
7931 		} else {
7932 			peer_assoc = ipsec_getassocbyspi(bucket,
7933 			    dying->ipsa_otherspi, dying->ipsa_dstaddr,
7934 			    dying->ipsa_srcaddr, dying->ipsa_addrfam);
7935 		}
7936 
7937 		mutex_exit(&bucket->isaf_lock);
7938 		if (peer_assoc != NULL) {
7939 			mutex_enter(&peer_assoc->ipsa_lock);
7940 			mutex_enter(&dying->ipsa_lock);
7941 			if (!haspeer) {
7942 				/*
7943 				 * Only SA's which have a "peer" or are
7944 				 * "paired" end up on this list, so this
7945 				 * must be a "paired" SA, update the flags
7946 				 * to break the pair.
7947 				 */
7948 				peer_assoc->ipsa_otherspi = 0;
7949 				peer_assoc->ipsa_flags &= ~IPSA_F_PAIRED;
7950 				dying->ipsa_otherspi = 0;
7951 				dying->ipsa_flags &= ~IPSA_F_PAIRED;
7952 			}
7953 			if (haspeer || outbound) {
7954 				/*
7955 				 * Update the state of the "inbound" SA when
7956 				 * the "outbound" SA has expired. Don't update
7957 				 * the "outbound" SA when the "inbound" SA
7958 				 * SA expires because setting the hard_addtime
7959 				 * below will cause this to happen.
7960 				 */
7961 				peer_assoc->ipsa_state = dying->ipsa_state;
7962 			}
7963 			if (dying->ipsa_state == IPSA_STATE_DEAD)
7964 				peer_assoc->ipsa_hardexpiretime = 1;
7965 
7966 			mutex_exit(&dying->ipsa_lock);
7967 			mutex_exit(&peer_assoc->ipsa_lock);
7968 			IPSA_REFRELE(peer_assoc);
7969 		}
7970 		IPSA_REFRELE(dying);
7971 	}
7972 }
7973 
7974 /*
7975  * Ensure that the IV used for CCM mode never repeats. The IV should
7976  * only be updated by this function. Also check to see if the IV
7977  * is about to wrap and generate a SOFT Expire. This function is only
7978  * called for outgoing packets, the IV for incomming packets is taken
7979  * from the wire. If the outgoing SA needs to be expired, update
7980  * the matching incomming SA.
7981  */
7982 boolean_t
7983 update_iv(uint8_t *iv_ptr, queue_t *pfkey_q, ipsa_t *assoc,
7984     ipsecesp_stack_t *espstack)
7985 {
7986 	boolean_t rc = B_TRUE;
7987 	isaf_t *inbound_bucket;
7988 	sadb_t *sp;
7989 	ipsa_t *pair_sa = NULL;
7990 	int sa_new_state = 0;
7991 
7992 	/* For non counter modes, the IV is random data. */
7993 	if (!(assoc->ipsa_flags & IPSA_F_COUNTERMODE)) {
7994 		(void) random_get_pseudo_bytes(iv_ptr, assoc->ipsa_iv_len);
7995 		return (rc);
7996 	}
7997 
7998 	mutex_enter(&assoc->ipsa_lock);
7999 
8000 	(*assoc->ipsa_iv)++;
8001 
8002 	if (*assoc->ipsa_iv == assoc->ipsa_iv_hardexpire) {
8003 		sa_new_state = IPSA_STATE_DEAD;
8004 		rc = B_FALSE;
8005 	} else if (*assoc->ipsa_iv == assoc->ipsa_iv_softexpire) {
8006 		if (assoc->ipsa_state != IPSA_STATE_DYING) {
8007 			/*
8008 			 * This SA may have already been expired when its
8009 			 * PAIR_SA expired.
8010 			 */
8011 			sa_new_state = IPSA_STATE_DYING;
8012 		}
8013 	}
8014 	if (sa_new_state) {
8015 		/*
8016 		 * If there is a state change, we need to update this SA
8017 		 * and its "pair", we can find the bucket for the "pair" SA
8018 		 * while holding the ipsa_t mutex, but we won't actually
8019 		 * update anything untill the ipsa_t mutex has been released
8020 		 * for _this_ SA.
8021 		 */
8022 		assoc->ipsa_state = sa_new_state;
8023 		if (assoc->ipsa_addrfam == AF_INET6) {
8024 			sp = &espstack->esp_sadb.s_v6;
8025 		} else {
8026 			sp = &espstack->esp_sadb.s_v4;
8027 		}
8028 		inbound_bucket = INBOUND_BUCKET(sp, assoc->ipsa_otherspi);
8029 		sadb_expire_assoc(pfkey_q, assoc);
8030 	}
8031 	if (rc == B_TRUE)
8032 		bcopy(assoc->ipsa_iv, iv_ptr, assoc->ipsa_iv_len);
8033 
8034 	mutex_exit(&assoc->ipsa_lock);
8035 
8036 	if (sa_new_state) {
8037 		/* Find the inbound SA, need to lock hash bucket. */
8038 		mutex_enter(&inbound_bucket->isaf_lock);
8039 		pair_sa = ipsec_getassocbyspi(inbound_bucket,
8040 		    assoc->ipsa_otherspi, assoc->ipsa_dstaddr,
8041 		    assoc->ipsa_srcaddr, assoc->ipsa_addrfam);
8042 		mutex_exit(&inbound_bucket->isaf_lock);
8043 		if (pair_sa != NULL) {
8044 			mutex_enter(&pair_sa->ipsa_lock);
8045 			pair_sa->ipsa_state = sa_new_state;
8046 			mutex_exit(&pair_sa->ipsa_lock);
8047 			IPSA_REFRELE(pair_sa);
8048 		}
8049 	}
8050 
8051 	return (rc);
8052 }
8053 
8054 void
8055 ccm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
8056     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
8057 {
8058 	uchar_t *nonce;
8059 	crypto_mechanism_t *combined_mech;
8060 	CK_AES_CCM_PARAMS *params;
8061 
8062 	combined_mech = (crypto_mechanism_t *)cm_mech;
8063 	params = (CK_AES_CCM_PARAMS *)(combined_mech + 1);
8064 	nonce = (uchar_t *)(params + 1);
8065 	params->ulMACSize = assoc->ipsa_mac_len;
8066 	params->ulNonceSize = assoc->ipsa_nonce_len;
8067 	params->ulAuthDataSize = sizeof (esph_t);
8068 	params->ulDataSize = data_len;
8069 	params->nonce = nonce;
8070 	params->authData = esph;
8071 
8072 	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
8073 	cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_CCM_PARAMS);
8074 	cm_mech->combined_mech.cm_param = (caddr_t)params;
8075 	/* See gcm_params_init() for comments. */
8076 	bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
8077 	nonce += assoc->ipsa_saltlen;
8078 	bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
8079 	crypto_data->cd_miscdata = NULL;
8080 }
8081 
8082 /* ARGSUSED */
8083 void
8084 cbc_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
8085     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
8086 {
8087 	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
8088 	cm_mech->combined_mech.cm_param_len = 0;
8089 	cm_mech->combined_mech.cm_param = NULL;
8090 	crypto_data->cd_miscdata = (char *)iv_ptr;
8091 }
8092 
8093 /* ARGSUSED */
8094 void
8095 gcm_params_init(ipsa_t *assoc, uchar_t *esph, uint_t data_len, uchar_t *iv_ptr,
8096     ipsa_cm_mech_t *cm_mech, crypto_data_t *crypto_data)
8097 {
8098 	uchar_t *nonce;
8099 	crypto_mechanism_t *combined_mech;
8100 	CK_AES_GCM_PARAMS *params;
8101 
8102 	combined_mech = (crypto_mechanism_t *)cm_mech;
8103 	params = (CK_AES_GCM_PARAMS *)(combined_mech + 1);
8104 	nonce = (uchar_t *)(params + 1);
8105 
8106 	params->pIv = nonce;
8107 	params->ulIvLen = assoc->ipsa_nonce_len;
8108 	params->ulIvBits = SADB_8TO1(assoc->ipsa_nonce_len);
8109 	params->pAAD = esph;
8110 	params->ulAADLen = sizeof (esph_t);
8111 	params->ulTagBits = SADB_8TO1(assoc->ipsa_mac_len);
8112 
8113 	cm_mech->combined_mech.cm_type = assoc->ipsa_emech.cm_type;
8114 	cm_mech->combined_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
8115 	cm_mech->combined_mech.cm_param = (caddr_t)params;
8116 	/*
8117 	 * Create the nonce, which is made up of the salt and the IV.
8118 	 * Copy the salt from the SA and the IV from the packet.
8119 	 * For inbound packets we copy the IV from the packet because it
8120 	 * was set by the sending system, for outbound packets we copy the IV
8121 	 * from the packet because the IV in the SA may be changed by another
8122 	 * thread, the IV in the packet was created while holding a mutex.
8123 	 */
8124 	bcopy(assoc->ipsa_nonce, nonce, assoc->ipsa_saltlen);
8125 	nonce += assoc->ipsa_saltlen;
8126 	bcopy(iv_ptr, nonce, assoc->ipsa_iv_len);
8127 	crypto_data->cd_miscdata = NULL;
8128 }
8129