1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 * Copyright (c) 2012 Nexenta Systems, Inc. All rights reserved.
25 * Copyright (c) 2018 Joyent, Inc.
26 */
27
28#include <sys/types.h>
29#include <sys/stream.h>
30#include <sys/stropts.h>
31#include <sys/strsubr.h>
32#include <sys/errno.h>
33#include <sys/ddi.h>
34#include <sys/debug.h>
35#include <sys/cmn_err.h>
36#include <sys/stream.h>
37#include <sys/strlog.h>
38#include <sys/kmem.h>
39#include <sys/sunddi.h>
40#include <sys/tihdr.h>
41#include <sys/atomic.h>
42#include <sys/socket.h>
43#include <sys/sysmacros.h>
44#include <sys/crypto/common.h>
45#include <sys/crypto/api.h>
46#include <sys/zone.h>
47#include <netinet/in.h>
48#include <net/if.h>
49#include <net/pfkeyv2.h>
50#include <net/pfpolicy.h>
51#include <inet/common.h>
52#include <netinet/ip6.h>
53#include <inet/ip.h>
54#include <inet/ip_ire.h>
55#include <inet/ip6.h>
56#include <inet/ipsec_info.h>
57#include <inet/tcp.h>
58#include <inet/sadb.h>
59#include <inet/ipsec_impl.h>
60#include <inet/ipsecah.h>
61#include <inet/ipsecesp.h>
62#include <sys/random.h>
63#include <sys/dlpi.h>
64#include <sys/strsun.h>
65#include <sys/strsubr.h>
66#include <inet/ip_if.h>
67#include <inet/ipdrop.h>
68#include <inet/ipclassifier.h>
69#include <inet/sctp_ip.h>
70#include <sys/tsol/tnet.h>
71
72/*
73 * This source file contains Security Association Database (SADB) common
74 * routines.  They are linked in with the AH module.  Since AH has no chance
75 * of falling under export control, it was safe to link it in there.
76 */
77
78static uint8_t *sadb_action_to_ecomb(uint8_t *, uint8_t *, ipsec_action_t *,
79    netstack_t *);
80static ipsa_t *sadb_torch_assoc(isaf_t *, ipsa_t *);
81static void sadb_destroy_acqlist(iacqf_t **, uint_t, boolean_t,
82			    netstack_t *);
83static void sadb_destroy(sadb_t *, netstack_t *);
84static mblk_t *sadb_sa2msg(ipsa_t *, sadb_msg_t *);
85static ts_label_t *sadb_label_from_sens(sadb_sens_t *, uint64_t *);
86
87static time_t sadb_add_time(time_t, uint64_t);
88static void lifetime_fuzz(ipsa_t *);
89static void age_pair_peer_list(templist_t *, sadb_t *, boolean_t);
90static int get_ipsa_pair(ipsa_query_t *, ipsap_t *, int *);
91static void init_ipsa_pair(ipsap_t *);
92static void destroy_ipsa_pair(ipsap_t *);
93static int update_pairing(ipsap_t *, ipsa_query_t *, keysock_in_t *, int *);
94static void ipsa_set_replay(ipsa_t *ipsa, uint32_t offset);
95
96/*
97 * ipsacq_maxpackets is defined here to make it tunable
98 * from /etc/system.
99 */
100extern uint64_t ipsacq_maxpackets;
101
102#define	SET_EXPIRE(sa, delta, exp) {				\
103	if (((sa)->ipsa_ ## delta) != 0) {				\
104		(sa)->ipsa_ ## exp = sadb_add_time((sa)->ipsa_addtime,	\
105			(sa)->ipsa_ ## delta);				\
106	}								\
107}
108
109#define	UPDATE_EXPIRE(sa, delta, exp) {					\
110	if (((sa)->ipsa_ ## delta) != 0) {				\
111		time_t tmp = sadb_add_time((sa)->ipsa_usetime,		\
112			(sa)->ipsa_ ## delta);				\
113		if (((sa)->ipsa_ ## exp) == 0)				\
114			(sa)->ipsa_ ## exp = tmp;			\
115		else							\
116			(sa)->ipsa_ ## exp =				\
117			    MIN((sa)->ipsa_ ## exp, tmp);		\
118	}								\
119}
120
121
122/* wrap the macro so we can pass it as a function pointer */
123void
124sadb_sa_refrele(void *target)
125{
126	IPSA_REFRELE(((ipsa_t *)target));
127}
128
129/*
130 * We presume that sizeof (long) == sizeof (time_t) and that time_t is
131 * a signed type.
132 */
133#define	TIME_MAX LONG_MAX
134
135/*
136 * PF_KEY gives us lifetimes in uint64_t seconds.  We presume that
137 * time_t is defined to be a signed type with the same range as
138 * "long".  On ILP32 systems, we thus run the risk of wrapping around
139 * at end of time, as well as "overwrapping" the clock back around
140 * into a seemingly valid but incorrect future date earlier than the
141 * desired expiration.
142 *
143 * In order to avoid odd behavior (either negative lifetimes or loss
144 * of high order bits) when someone asks for bizarrely long SA
145 * lifetimes, we do a saturating add for expire times.
146 *
147 * We presume that ILP32 systems will be past end of support life when
148 * the 32-bit time_t overflows (a dangerous assumption, mind you..).
149 *
150 * On LP64, 2^64 seconds are about 5.8e11 years, at which point we
151 * will hopefully have figured out clever ways to avoid the use of
152 * fixed-sized integers in computation.
153 */
154static time_t
155sadb_add_time(time_t base, uint64_t delta)
156{
157	/*
158	 * Clip delta to the maximum possible time_t value to
159	 * prevent "overwrapping" back into a shorter-than-desired
160	 * future time.
161	 */
162	if (delta > TIME_MAX)
163		delta = TIME_MAX;
164
165	if (base > 0) {
166		if (TIME_MAX - base < delta)
167			return (TIME_MAX);	/* Overflow */
168	}
169	return (base + delta);
170}
171
172/*
173 * Callers of this function have already created a working security
174 * association, and have found the appropriate table & hash chain.  All this
175 * function does is check duplicates, and insert the SA.  The caller needs to
176 * hold the hash bucket lock and increment the refcnt before insertion.
177 *
178 * Return 0 if success, EEXIST if collision.
179 */
180#define	SA_UNIQUE_MATCH(sa1, sa2) \
181	(((sa1)->ipsa_unique_id & (sa1)->ipsa_unique_mask) == \
182	((sa2)->ipsa_unique_id & (sa2)->ipsa_unique_mask))
183
184int
185sadb_insertassoc(ipsa_t *ipsa, isaf_t *bucket)
186{
187	ipsa_t **ptpn = NULL;
188	ipsa_t *walker;
189	boolean_t unspecsrc;
190
191	ASSERT(MUTEX_HELD(&bucket->isaf_lock));
192
193	unspecsrc = IPSA_IS_ADDR_UNSPEC(ipsa->ipsa_srcaddr, ipsa->ipsa_addrfam);
194
195	walker = bucket->isaf_ipsa;
196	ASSERT(walker == NULL || ipsa->ipsa_addrfam == walker->ipsa_addrfam);
197
198	/*
199	 * Find insertion point (pointed to with **ptpn).  Insert at the head
200	 * of the list unless there's an unspecified source address, then
201	 * insert it after the last SA with a specified source address.
202	 *
203	 * BTW, you'll have to walk the whole chain, matching on {DST, SPI}
204	 * checking for collisions.
205	 */
206
207	while (walker != NULL) {
208		if (IPSA_ARE_ADDR_EQUAL(walker->ipsa_dstaddr,
209		    ipsa->ipsa_dstaddr, ipsa->ipsa_addrfam)) {
210			if (walker->ipsa_spi == ipsa->ipsa_spi)
211				return (EEXIST);
212
213			mutex_enter(&walker->ipsa_lock);
214			if (ipsa->ipsa_state == IPSA_STATE_MATURE &&
215			    (walker->ipsa_flags & IPSA_F_USED) &&
216			    SA_UNIQUE_MATCH(walker, ipsa)) {
217				walker->ipsa_flags |= IPSA_F_CINVALID;
218			}
219			mutex_exit(&walker->ipsa_lock);
220		}
221
222		if (ptpn == NULL && unspecsrc) {
223			if (IPSA_IS_ADDR_UNSPEC(walker->ipsa_srcaddr,
224			    walker->ipsa_addrfam))
225				ptpn = walker->ipsa_ptpn;
226			else if (walker->ipsa_next == NULL)
227				ptpn = &walker->ipsa_next;
228		}
229
230		walker = walker->ipsa_next;
231	}
232
233	if (ptpn == NULL)
234		ptpn = &bucket->isaf_ipsa;
235	ipsa->ipsa_next = *ptpn;
236	ipsa->ipsa_ptpn = ptpn;
237	if (ipsa->ipsa_next != NULL)
238		ipsa->ipsa_next->ipsa_ptpn = &ipsa->ipsa_next;
239	*ptpn = ipsa;
240	ipsa->ipsa_linklock = &bucket->isaf_lock;
241
242	return (0);
243}
244#undef SA_UNIQUE_MATCH
245
246/*
247 * Free a security association.  Its reference count is 0, which means
248 * I must free it.  The SA must be unlocked and must not be linked into
249 * any fanout list.
250 */
251static void
252sadb_freeassoc(ipsa_t *ipsa)
253{
254	ipsec_stack_t	*ipss = ipsa->ipsa_netstack->netstack_ipsec;
255	mblk_t		*asyncmp, *mp;
256
257	ASSERT(ipss != NULL);
258	ASSERT(MUTEX_NOT_HELD(&ipsa->ipsa_lock));
259	ASSERT(ipsa->ipsa_refcnt == 0);
260	ASSERT(ipsa->ipsa_next == NULL);
261	ASSERT(ipsa->ipsa_ptpn == NULL);
262
263
264	asyncmp = sadb_clear_lpkt(ipsa);
265	if (asyncmp != NULL) {
266		mp = ip_recv_attr_free_mblk(asyncmp);
267		ip_drop_packet(mp, B_TRUE, NULL,
268		    DROPPER(ipss, ipds_sadb_inlarval_timeout),
269		    &ipss->ipsec_sadb_dropper);
270	}
271	mutex_enter(&ipsa->ipsa_lock);
272
273	if (ipsa->ipsa_tsl != NULL) {
274		label_rele(ipsa->ipsa_tsl);
275		ipsa->ipsa_tsl = NULL;
276	}
277
278	if (ipsa->ipsa_otsl != NULL) {
279		label_rele(ipsa->ipsa_otsl);
280		ipsa->ipsa_otsl = NULL;
281	}
282
283	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_AUTH);
284	ipsec_destroy_ctx_tmpl(ipsa, IPSEC_ALG_ENCR);
285	mutex_exit(&ipsa->ipsa_lock);
286
287	/* bzero() these fields for paranoia's sake. */
288	if (ipsa->ipsa_authkey != NULL) {
289		bzero(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
290		kmem_free(ipsa->ipsa_authkey, ipsa->ipsa_authkeylen);
291	}
292	if (ipsa->ipsa_encrkey != NULL) {
293		bzero(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
294		kmem_free(ipsa->ipsa_encrkey, ipsa->ipsa_encrkeylen);
295	}
296	if (ipsa->ipsa_nonce_buf != NULL) {
297		bzero(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
298		kmem_free(ipsa->ipsa_nonce_buf, sizeof (ipsec_nonce_t));
299	}
300	if (ipsa->ipsa_src_cid != NULL) {
301		IPSID_REFRELE(ipsa->ipsa_src_cid);
302	}
303	if (ipsa->ipsa_dst_cid != NULL) {
304		IPSID_REFRELE(ipsa->ipsa_dst_cid);
305	}
306	if (ipsa->ipsa_emech.cm_param != NULL)
307		kmem_free(ipsa->ipsa_emech.cm_param,
308		    ipsa->ipsa_emech.cm_param_len);
309
310	mutex_destroy(&ipsa->ipsa_lock);
311	kmem_free(ipsa, sizeof (*ipsa));
312}
313
314/*
315 * Unlink a security association from a hash bucket.  Assume the hash bucket
316 * lock is held, but the association's lock is not.
317 *
318 * Note that we do not bump the bucket's generation number here because
319 * we might not be making a visible change to the set of visible SA's.
320 * All callers MUST bump the bucket's generation number before they unlock
321 * the bucket if they use sadb_unlinkassoc to permanetly remove an SA which
322 * was present in the bucket at the time it was locked.
323 */
324void
325sadb_unlinkassoc(ipsa_t *ipsa)
326{
327	ASSERT(ipsa->ipsa_linklock != NULL);
328	ASSERT(MUTEX_HELD(ipsa->ipsa_linklock));
329
330	/* These fields are protected by the link lock. */
331	*(ipsa->ipsa_ptpn) = ipsa->ipsa_next;
332	if (ipsa->ipsa_next != NULL) {
333		ipsa->ipsa_next->ipsa_ptpn = ipsa->ipsa_ptpn;
334		ipsa->ipsa_next = NULL;
335	}
336
337	ipsa->ipsa_ptpn = NULL;
338
339	/* This may destroy the SA. */
340	IPSA_REFRELE(ipsa);
341}
342
343void
344sadb_delete_cluster(ipsa_t *assoc)
345{
346	uint8_t protocol;
347
348	if (cl_inet_deletespi &&
349	    ((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
350	    (assoc->ipsa_state == IPSA_STATE_MATURE))) {
351		protocol = (assoc->ipsa_type == SADB_SATYPE_AH) ?
352		    IPPROTO_AH : IPPROTO_ESP;
353		cl_inet_deletespi(assoc->ipsa_netstack->netstack_stackid,
354		    protocol, assoc->ipsa_spi, NULL);
355	}
356}
357
358/*
359 * Create a larval security association with the specified SPI.	 All other
360 * fields are zeroed.
361 */
362static ipsa_t *
363sadb_makelarvalassoc(uint32_t spi, uint32_t *src, uint32_t *dst, int addrfam,
364    netstack_t *ns)
365{
366	ipsa_t *newbie;
367
368	/*
369	 * Allocate...
370	 */
371
372	newbie = (ipsa_t *)kmem_zalloc(sizeof (ipsa_t), KM_NOSLEEP);
373	if (newbie == NULL) {
374		/* Can't make new larval SA. */
375		return (NULL);
376	}
377
378	/* Assigned requested SPI, assume caller does SPI allocation magic. */
379	newbie->ipsa_spi = spi;
380	newbie->ipsa_netstack = ns;	/* No netstack_hold */
381
382	/*
383	 * Copy addresses...
384	 */
385
386	IPSA_COPY_ADDR(newbie->ipsa_srcaddr, src, addrfam);
387	IPSA_COPY_ADDR(newbie->ipsa_dstaddr, dst, addrfam);
388
389	newbie->ipsa_addrfam = addrfam;
390
391	/*
392	 * Set common initialization values, including refcnt.
393	 */
394	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
395	newbie->ipsa_state = IPSA_STATE_LARVAL;
396	newbie->ipsa_refcnt = 1;
397	newbie->ipsa_freefunc = sadb_freeassoc;
398
399	/*
400	 * There aren't a lot of other common initialization values, as
401	 * they are copied in from the PF_KEY message.
402	 */
403
404	return (newbie);
405}
406
407/*
408 * Call me to initialize a security association fanout.
409 */
410static int
411sadb_init_fanout(isaf_t **tablep, uint_t size, int kmflag)
412{
413	isaf_t *table;
414	int i;
415
416	table = (isaf_t *)kmem_alloc(size * sizeof (*table), kmflag);
417	*tablep = table;
418
419	if (table == NULL)
420		return (ENOMEM);
421
422	for (i = 0; i < size; i++) {
423		mutex_init(&(table[i].isaf_lock), NULL, MUTEX_DEFAULT, NULL);
424		table[i].isaf_ipsa = NULL;
425		table[i].isaf_gen = 0;
426	}
427
428	return (0);
429}
430
431/*
432 * Call me to initialize an acquire fanout
433 */
434static int
435sadb_init_acfanout(iacqf_t **tablep, uint_t size, int kmflag)
436{
437	iacqf_t *table;
438	int i;
439
440	table = (iacqf_t *)kmem_alloc(size * sizeof (*table), kmflag);
441	*tablep = table;
442
443	if (table == NULL)
444		return (ENOMEM);
445
446	for (i = 0; i < size; i++) {
447		mutex_init(&(table[i].iacqf_lock), NULL, MUTEX_DEFAULT, NULL);
448		table[i].iacqf_ipsacq = NULL;
449	}
450
451	return (0);
452}
453
454/*
455 * Attempt to initialize an SADB instance.  On failure, return ENOMEM;
456 * caller must clean up partial allocations.
457 */
458static int
459sadb_init_trial(sadb_t *sp, uint_t size, int kmflag)
460{
461	ASSERT(sp->sdb_of == NULL);
462	ASSERT(sp->sdb_if == NULL);
463	ASSERT(sp->sdb_acq == NULL);
464
465	sp->sdb_hashsize = size;
466	if (sadb_init_fanout(&sp->sdb_of, size, kmflag) != 0)
467		return (ENOMEM);
468	if (sadb_init_fanout(&sp->sdb_if, size, kmflag) != 0)
469		return (ENOMEM);
470	if (sadb_init_acfanout(&sp->sdb_acq, size, kmflag) != 0)
471		return (ENOMEM);
472
473	return (0);
474}
475
476/*
477 * Call me to initialize an SADB instance; fall back to default size on failure.
478 */
479static void
480sadb_init(const char *name, sadb_t *sp, uint_t size, uint_t ver,
481    netstack_t *ns)
482{
483	ASSERT(sp->sdb_of == NULL);
484	ASSERT(sp->sdb_if == NULL);
485	ASSERT(sp->sdb_acq == NULL);
486
487	if (size < IPSEC_DEFAULT_HASH_SIZE)
488		size = IPSEC_DEFAULT_HASH_SIZE;
489
490	if (sadb_init_trial(sp, size, KM_NOSLEEP) != 0) {
491
492		cmn_err(CE_WARN,
493		    "Unable to allocate %u entry IPv%u %s SADB hash table",
494		    size, ver, name);
495
496		sadb_destroy(sp, ns);
497		size = IPSEC_DEFAULT_HASH_SIZE;
498		cmn_err(CE_WARN, "Falling back to %d entries", size);
499		(void) sadb_init_trial(sp, size, KM_SLEEP);
500	}
501}
502
503
504/*
505 * Initialize an SADB-pair.
506 */
507void
508sadbp_init(const char *name, sadbp_t *sp, int type, int size, netstack_t *ns)
509{
510	sadb_init(name, &sp->s_v4, size, 4, ns);
511	sadb_init(name, &sp->s_v6, size, 6, ns);
512
513	sp->s_satype = type;
514
515	ASSERT((type == SADB_SATYPE_AH) || (type == SADB_SATYPE_ESP));
516	if (type == SADB_SATYPE_AH) {
517		ipsec_stack_t	*ipss = ns->netstack_ipsec;
518
519		ip_drop_register(&ipss->ipsec_sadb_dropper, "IPsec SADB");
520		sp->s_addflags = AH_ADD_SETTABLE_FLAGS;
521		sp->s_updateflags = AH_UPDATE_SETTABLE_FLAGS;
522	} else {
523		sp->s_addflags = ESP_ADD_SETTABLE_FLAGS;
524		sp->s_updateflags = ESP_UPDATE_SETTABLE_FLAGS;
525	}
526}
527
528/*
529 * Deliver a single SADB_DUMP message representing a single SA.  This is
530 * called many times by sadb_dump().
531 *
532 * If the return value of this is ENOBUFS (not the same as ENOMEM), then
533 * the caller should take that as a hint that dupb() on the "original answer"
534 * failed, and that perhaps the caller should try again with a copyb()ed
535 * "original answer".
536 */
537static int
538sadb_dump_deliver(queue_t *pfkey_q, mblk_t *original_answer, ipsa_t *ipsa,
539    sadb_msg_t *samsg)
540{
541	mblk_t *answer;
542
543	answer = dupb(original_answer);
544	if (answer == NULL)
545		return (ENOBUFS);
546	answer->b_cont = sadb_sa2msg(ipsa, samsg);
547	if (answer->b_cont == NULL) {
548		freeb(answer);
549		return (ENOMEM);
550	}
551
552	/* Just do a putnext, and let keysock deal with flow control. */
553	putnext(pfkey_q, answer);
554	return (0);
555}
556
557/*
558 * Common function to allocate and prepare a keysock_out_t M_CTL message.
559 */
560mblk_t *
561sadb_keysock_out(minor_t serial)
562{
563	mblk_t *mp;
564	keysock_out_t *kso;
565
566	mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
567	if (mp != NULL) {
568		mp->b_datap->db_type = M_CTL;
569		mp->b_wptr += sizeof (ipsec_info_t);
570		kso = (keysock_out_t *)mp->b_rptr;
571		kso->ks_out_type = KEYSOCK_OUT;
572		kso->ks_out_len = sizeof (*kso);
573		kso->ks_out_serial = serial;
574	}
575
576	return (mp);
577}
578
579/*
580 * Perform an SADB_DUMP, spewing out every SA in an array of SA fanouts
581 * to keysock.
582 */
583static int
584sadb_dump_fanout(queue_t *pfkey_q, mblk_t *mp, minor_t serial, isaf_t *fanout,
585    int num_entries, boolean_t do_peers, time_t active_time)
586{
587	int i, error = 0;
588	mblk_t *original_answer;
589	ipsa_t *walker;
590	sadb_msg_t *samsg;
591	time_t	current;
592
593	/*
594	 * For each IPSA hash bucket do:
595	 *	- Hold the mutex
596	 *	- Walk each entry, doing an sadb_dump_deliver() on it.
597	 */
598	ASSERT(mp->b_cont != NULL);
599	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
600
601	original_answer = sadb_keysock_out(serial);
602	if (original_answer == NULL)
603		return (ENOMEM);
604
605	current = gethrestime_sec();
606	for (i = 0; i < num_entries; i++) {
607		mutex_enter(&fanout[i].isaf_lock);
608		for (walker = fanout[i].isaf_ipsa; walker != NULL;
609		    walker = walker->ipsa_next) {
610			if (!do_peers && walker->ipsa_haspeer)
611				continue;
612			if ((active_time != 0) &&
613			    ((current - walker->ipsa_lastuse) > active_time))
614				continue;
615			error = sadb_dump_deliver(pfkey_q, original_answer,
616			    walker, samsg);
617			if (error == ENOBUFS) {
618				mblk_t *new_original_answer;
619
620				/* Ran out of dupb's.  Try a copyb. */
621				new_original_answer = copyb(original_answer);
622				if (new_original_answer == NULL) {
623					error = ENOMEM;
624				} else {
625					freeb(original_answer);
626					original_answer = new_original_answer;
627					error = sadb_dump_deliver(pfkey_q,
628					    original_answer, walker, samsg);
629				}
630			}
631			if (error != 0)
632				break;	/* out of for loop. */
633		}
634		mutex_exit(&fanout[i].isaf_lock);
635		if (error != 0)
636			break;	/* out of for loop. */
637	}
638
639	freeb(original_answer);
640	return (error);
641}
642
643/*
644 * Dump an entire SADB; outbound first, then inbound.
645 */
646
647int
648sadb_dump(queue_t *pfkey_q, mblk_t *mp, keysock_in_t *ksi, sadb_t *sp)
649{
650	int error;
651	time_t	active_time = 0;
652	sadb_x_edump_t	*edump =
653	    (sadb_x_edump_t *)ksi->ks_in_extv[SADB_X_EXT_EDUMP];
654
655	if (edump != NULL) {
656		active_time = edump->sadb_x_edump_timeout;
657	}
658
659	/* Dump outbound */
660	error = sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_of,
661	    sp->sdb_hashsize, B_TRUE, active_time);
662	if (error)
663		return (error);
664
665	/* Dump inbound */
666	return sadb_dump_fanout(pfkey_q, mp, ksi->ks_in_serial, sp->sdb_if,
667	    sp->sdb_hashsize, B_FALSE, active_time);
668}
669
670/*
671 * Generic sadb table walker.
672 *
673 * Call "walkfn" for each SA in each bucket in "table"; pass the
674 * bucket, the entry and "cookie" to the callback function.
675 * Take care to ensure that walkfn can delete the SA without screwing
676 * up our traverse.
677 *
678 * The bucket is locked for the duration of the callback, both so that the
679 * callback can just call sadb_unlinkassoc() when it wants to delete something,
680 * and so that no new entries are added while we're walking the list.
681 */
682static void
683sadb_walker(isaf_t *table, uint_t numentries,
684    void (*walkfn)(isaf_t *head, ipsa_t *entry, void *cookie),
685    void *cookie)
686{
687	int i;
688	for (i = 0; i < numentries; i++) {
689		ipsa_t *entry, *next;
690
691		mutex_enter(&table[i].isaf_lock);
692
693		for (entry = table[i].isaf_ipsa; entry != NULL;
694		    entry = next) {
695			next = entry->ipsa_next;
696			(*walkfn)(&table[i], entry, cookie);
697		}
698		mutex_exit(&table[i].isaf_lock);
699	}
700}
701
702/*
703 * Call me to free up a security association fanout.  Use the forever
704 * variable to indicate freeing up the SAs (forever == B_FALSE, e.g.
705 * an SADB_FLUSH message), or destroying everything (forever == B_TRUE,
706 * when a module is unloaded).
707 */
708static void
709sadb_destroyer(isaf_t **tablep, uint_t numentries, boolean_t forever,
710    boolean_t inbound)
711{
712	int i;
713	isaf_t *table = *tablep;
714	uint8_t protocol;
715	ipsa_t *sa;
716	netstackid_t sid;
717
718	if (table == NULL)
719		return;
720
721	for (i = 0; i < numentries; i++) {
722		mutex_enter(&table[i].isaf_lock);
723		while ((sa = table[i].isaf_ipsa) != NULL) {
724			if (inbound && cl_inet_deletespi &&
725			    (sa->ipsa_state != IPSA_STATE_ACTIVE_ELSEWHERE) &&
726			    (sa->ipsa_state != IPSA_STATE_IDLE)) {
727				protocol = (sa->ipsa_type == SADB_SATYPE_AH) ?
728				    IPPROTO_AH : IPPROTO_ESP;
729				sid = sa->ipsa_netstack->netstack_stackid;
730				cl_inet_deletespi(sid, protocol, sa->ipsa_spi,
731				    NULL);
732			}
733			sadb_unlinkassoc(sa);
734		}
735		table[i].isaf_gen++;
736		mutex_exit(&table[i].isaf_lock);
737		if (forever)
738			mutex_destroy(&(table[i].isaf_lock));
739	}
740
741	if (forever) {
742		*tablep = NULL;
743		kmem_free(table, numentries * sizeof (*table));
744	}
745}
746
747/*
748 * Entry points to sadb_destroyer().
749 */
750static void
751sadb_flush(sadb_t *sp, netstack_t *ns)
752{
753	/*
754	 * Flush out each bucket, one at a time.  Were it not for keysock's
755	 * enforcement, there would be a subtlety where I could add on the
756	 * heels of a flush.  With keysock's enforcement, however, this
757	 * makes ESP's job easy.
758	 */
759	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_FALSE, B_FALSE);
760	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_FALSE, B_TRUE);
761
762	/* For each acquire, destroy it; leave the bucket mutex alone. */
763	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_FALSE, ns);
764}
765
766static void
767sadb_destroy(sadb_t *sp, netstack_t *ns)
768{
769	sadb_destroyer(&sp->sdb_of, sp->sdb_hashsize, B_TRUE, B_FALSE);
770	sadb_destroyer(&sp->sdb_if, sp->sdb_hashsize, B_TRUE, B_TRUE);
771
772	/* For each acquire, destroy it, including the bucket mutex. */
773	sadb_destroy_acqlist(&sp->sdb_acq, sp->sdb_hashsize, B_TRUE, ns);
774
775	ASSERT(sp->sdb_of == NULL);
776	ASSERT(sp->sdb_if == NULL);
777	ASSERT(sp->sdb_acq == NULL);
778}
779
780void
781sadbp_flush(sadbp_t *spp, netstack_t *ns)
782{
783	sadb_flush(&spp->s_v4, ns);
784	sadb_flush(&spp->s_v6, ns);
785}
786
787void
788sadbp_destroy(sadbp_t *spp, netstack_t *ns)
789{
790	sadb_destroy(&spp->s_v4, ns);
791	sadb_destroy(&spp->s_v6, ns);
792
793	if (spp->s_satype == SADB_SATYPE_AH) {
794		ipsec_stack_t	*ipss = ns->netstack_ipsec;
795
796		ip_drop_unregister(&ipss->ipsec_sadb_dropper);
797	}
798}
799
800
801/*
802 * Check hard vs. soft lifetimes.  If there's a reality mismatch (e.g.
803 * soft lifetimes > hard lifetimes) return an appropriate diagnostic for
804 * EINVAL.
805 */
806int
807sadb_hardsoftchk(sadb_lifetime_t *hard, sadb_lifetime_t *soft,
808    sadb_lifetime_t *idle)
809{
810	if (hard == NULL || soft == NULL)
811		return (0);
812
813	if (hard->sadb_lifetime_allocations != 0 &&
814	    soft->sadb_lifetime_allocations != 0 &&
815	    hard->sadb_lifetime_allocations < soft->sadb_lifetime_allocations)
816		return (SADB_X_DIAGNOSTIC_ALLOC_HSERR);
817
818	if (hard->sadb_lifetime_bytes != 0 &&
819	    soft->sadb_lifetime_bytes != 0 &&
820	    hard->sadb_lifetime_bytes < soft->sadb_lifetime_bytes)
821		return (SADB_X_DIAGNOSTIC_BYTES_HSERR);
822
823	if (hard->sadb_lifetime_addtime != 0 &&
824	    soft->sadb_lifetime_addtime != 0 &&
825	    hard->sadb_lifetime_addtime < soft->sadb_lifetime_addtime)
826		return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
827
828	if (hard->sadb_lifetime_usetime != 0 &&
829	    soft->sadb_lifetime_usetime != 0 &&
830	    hard->sadb_lifetime_usetime < soft->sadb_lifetime_usetime)
831		return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
832
833	if (idle != NULL) {
834		if (hard->sadb_lifetime_addtime != 0 &&
835		    idle->sadb_lifetime_addtime != 0 &&
836		    hard->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
837			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
838
839		if (soft->sadb_lifetime_addtime != 0 &&
840		    idle->sadb_lifetime_addtime != 0 &&
841		    soft->sadb_lifetime_addtime < idle->sadb_lifetime_addtime)
842			return (SADB_X_DIAGNOSTIC_ADDTIME_HSERR);
843
844		if (hard->sadb_lifetime_usetime != 0 &&
845		    idle->sadb_lifetime_usetime != 0 &&
846		    hard->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
847			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
848
849		if (soft->sadb_lifetime_usetime != 0 &&
850		    idle->sadb_lifetime_usetime != 0 &&
851		    soft->sadb_lifetime_usetime < idle->sadb_lifetime_usetime)
852			return (SADB_X_DIAGNOSTIC_USETIME_HSERR);
853	}
854
855	return (0);
856}
857
858/*
859 * Sanity check sensitivity labels.
860 *
861 * For now, just reject labels on unlabeled systems.
862 */
863int
864sadb_labelchk(keysock_in_t *ksi)
865{
866	if (!is_system_labeled()) {
867		if (ksi->ks_in_extv[SADB_EXT_SENSITIVITY] != NULL)
868			return (SADB_X_DIAGNOSTIC_BAD_LABEL);
869
870		if (ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS] != NULL)
871			return (SADB_X_DIAGNOSTIC_BAD_LABEL);
872	}
873
874	return (0);
875}
876
877/*
878 * Clone a security association for the purposes of inserting a single SA
879 * into inbound and outbound tables respectively. This function should only
880 * be called from sadb_common_add().
881 */
882static ipsa_t *
883sadb_cloneassoc(ipsa_t *ipsa)
884{
885	ipsa_t *newbie;
886	boolean_t error = B_FALSE;
887
888	ASSERT(MUTEX_NOT_HELD(&(ipsa->ipsa_lock)));
889
890	newbie = kmem_alloc(sizeof (ipsa_t), KM_NOSLEEP);
891	if (newbie == NULL)
892		return (NULL);
893
894	/* Copy over what we can. */
895	*newbie = *ipsa;
896
897	/* bzero and initialize locks, in case *_init() allocates... */
898	mutex_init(&newbie->ipsa_lock, NULL, MUTEX_DEFAULT, NULL);
899
900	if (newbie->ipsa_tsl != NULL)
901		label_hold(newbie->ipsa_tsl);
902
903	if (newbie->ipsa_otsl != NULL)
904		label_hold(newbie->ipsa_otsl);
905
906	/*
907	 * While somewhat dain-bramaged, the most graceful way to
908	 * recover from errors is to keep plowing through the
909	 * allocations, and getting what I can.  It's easier to call
910	 * sadb_freeassoc() on the stillborn clone when all the
911	 * pointers aren't pointing to the parent's data.
912	 */
913
914	if (ipsa->ipsa_authkey != NULL) {
915		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
916		    KM_NOSLEEP);
917		if (newbie->ipsa_authkey == NULL) {
918			error = B_TRUE;
919		} else {
920			bcopy(ipsa->ipsa_authkey, newbie->ipsa_authkey,
921			    newbie->ipsa_authkeylen);
922
923			newbie->ipsa_kcfauthkey.ck_data =
924			    newbie->ipsa_authkey;
925		}
926
927		if (newbie->ipsa_amech.cm_param != NULL) {
928			newbie->ipsa_amech.cm_param =
929			    (char *)&newbie->ipsa_mac_len;
930		}
931	}
932
933	if (ipsa->ipsa_encrkey != NULL) {
934		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
935		    KM_NOSLEEP);
936		if (newbie->ipsa_encrkey == NULL) {
937			error = B_TRUE;
938		} else {
939			bcopy(ipsa->ipsa_encrkey, newbie->ipsa_encrkey,
940			    newbie->ipsa_encrkeylen);
941
942			newbie->ipsa_kcfencrkey.ck_data =
943			    newbie->ipsa_encrkey;
944		}
945	}
946
947	newbie->ipsa_authtmpl = NULL;
948	newbie->ipsa_encrtmpl = NULL;
949	newbie->ipsa_haspeer = B_TRUE;
950
951	if (ipsa->ipsa_src_cid != NULL) {
952		newbie->ipsa_src_cid = ipsa->ipsa_src_cid;
953		IPSID_REFHOLD(ipsa->ipsa_src_cid);
954	}
955
956	if (ipsa->ipsa_dst_cid != NULL) {
957		newbie->ipsa_dst_cid = ipsa->ipsa_dst_cid;
958		IPSID_REFHOLD(ipsa->ipsa_dst_cid);
959	}
960
961	if (error) {
962		sadb_freeassoc(newbie);
963		return (NULL);
964	}
965
966	return (newbie);
967}
968
969/*
970 * Initialize a SADB address extension at the address specified by addrext.
971 * Return a pointer to the end of the new address extension.
972 */
973static uint8_t *
974sadb_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
975    sa_family_t af, uint32_t *addr, uint16_t port, uint8_t proto, int prefix)
976{
977	struct sockaddr_in *sin;
978	struct sockaddr_in6 *sin6;
979	uint8_t *cur = start;
980	int addrext_len;
981	int sin_len;
982	sadb_address_t *addrext	= (sadb_address_t *)cur;
983
984	if (cur == NULL)
985		return (NULL);
986
987	cur += sizeof (*addrext);
988	if (cur > end)
989		return (NULL);
990
991	addrext->sadb_address_proto = proto;
992	addrext->sadb_address_prefixlen = prefix;
993	addrext->sadb_address_reserved = 0;
994	addrext->sadb_address_exttype = exttype;
995
996	switch (af) {
997	case AF_INET:
998		sin = (struct sockaddr_in *)cur;
999		sin_len = sizeof (*sin);
1000		cur += sin_len;
1001		if (cur > end)
1002			return (NULL);
1003
1004		sin->sin_family = af;
1005		bzero(sin->sin_zero, sizeof (sin->sin_zero));
1006		sin->sin_port = port;
1007		IPSA_COPY_ADDR(&sin->sin_addr, addr, af);
1008		break;
1009	case AF_INET6:
1010		sin6 = (struct sockaddr_in6 *)cur;
1011		sin_len = sizeof (*sin6);
1012		cur += sin_len;
1013		if (cur > end)
1014			return (NULL);
1015
1016		bzero(sin6, sizeof (*sin6));
1017		sin6->sin6_family = af;
1018		sin6->sin6_port = port;
1019		IPSA_COPY_ADDR(&sin6->sin6_addr, addr, af);
1020		break;
1021	}
1022
1023	addrext_len = roundup(cur - start, sizeof (uint64_t));
1024	addrext->sadb_address_len = SADB_8TO64(addrext_len);
1025
1026	cur = start + addrext_len;
1027	if (cur > end)
1028		cur = NULL;
1029
1030	return (cur);
1031}
1032
1033/*
1034 * Construct a key management cookie extension.
1035 */
1036
1037static uint8_t *
1038sadb_make_kmc_ext(uint8_t *cur, uint8_t *end, uint32_t kmp, uint64_t kmc)
1039{
1040	sadb_x_kmc_t *kmcext = (sadb_x_kmc_t *)cur;
1041
1042	if (cur == NULL)
1043		return (NULL);
1044
1045	cur += sizeof (*kmcext);
1046
1047	if (cur > end)
1048		return (NULL);
1049
1050	kmcext->sadb_x_kmc_len = SADB_8TO64(sizeof (*kmcext));
1051	kmcext->sadb_x_kmc_exttype = SADB_X_EXT_KM_COOKIE;
1052	kmcext->sadb_x_kmc_proto = kmp;
1053	kmcext->sadb_x_kmc_cookie64 = kmc;
1054
1055	return (cur);
1056}
1057
1058/*
1059 * Given an original message header with sufficient space following it, and an
1060 * SA, construct a full PF_KEY message with all of the relevant extensions.
1061 * This is mostly used for SADB_GET, and SADB_DUMP.
1062 */
1063static mblk_t *
1064sadb_sa2msg(ipsa_t *ipsa, sadb_msg_t *samsg)
1065{
1066	int alloclen, addrsize, paddrsize, authsize, encrsize;
1067	int srcidsize, dstidsize, senslen, osenslen;
1068	sa_family_t fam, pfam;	/* Address family for SADB_EXT_ADDRESS */
1069				/* src/dst and proxy sockaddrs. */
1070
1071	authsize = 0;
1072	encrsize = 0;
1073	pfam = 0;
1074	srcidsize = 0;
1075	dstidsize = 0;
1076	paddrsize = 0;
1077	senslen = 0;
1078	osenslen = 0;
1079	/*
1080	 * The following are pointers into the PF_KEY message this PF_KEY
1081	 * message creates.
1082	 */
1083	sadb_msg_t *newsamsg;
1084	sadb_sa_t *assoc;
1085	sadb_lifetime_t *lt;
1086	sadb_key_t *key;
1087	sadb_ident_t *ident;
1088	sadb_sens_t *sens;
1089	sadb_ext_t *walker;	/* For when we need a generic ext. pointer. */
1090	sadb_x_replay_ctr_t *repl_ctr;
1091	sadb_x_pair_t *pair_ext;
1092
1093	mblk_t *mp;
1094	uint8_t *cur, *end;
1095	/* These indicate the presence of the above extension fields. */
1096	boolean_t soft = B_FALSE, hard = B_FALSE;
1097	boolean_t isrc = B_FALSE, idst = B_FALSE;
1098	boolean_t auth = B_FALSE, encr = B_FALSE;
1099	boolean_t sensinteg = B_FALSE, osensinteg = B_FALSE;
1100	boolean_t srcid = B_FALSE, dstid = B_FALSE;
1101	boolean_t idle;
1102	boolean_t paired;
1103	uint32_t otherspi;
1104
1105	/* First off, figure out the allocation length for this message. */
1106	/*
1107	 * Constant stuff.  This includes base, SA, address (src, dst),
1108	 * and lifetime (current).
1109	 */
1110	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t) +
1111	    sizeof (sadb_lifetime_t);
1112	otherspi = 0;
1113
1114	fam = ipsa->ipsa_addrfam;
1115	switch (fam) {
1116	case AF_INET:
1117		addrsize = roundup(sizeof (struct sockaddr_in) +
1118		    sizeof (sadb_address_t), sizeof (uint64_t));
1119		break;
1120	case AF_INET6:
1121		addrsize = roundup(sizeof (struct sockaddr_in6) +
1122		    sizeof (sadb_address_t), sizeof (uint64_t));
1123		break;
1124	default:
1125		return (NULL);
1126	}
1127	/*
1128	 * Allocate TWO address extensions, for source and destination.
1129	 * (Thus, the * 2.)
1130	 */
1131	alloclen += addrsize * 2;
1132	if (ipsa->ipsa_flags & IPSA_F_NATT_REM)
1133		alloclen += addrsize;
1134	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC)
1135		alloclen += addrsize;
1136
1137	if (ipsa->ipsa_flags & IPSA_F_PAIRED) {
1138		paired = B_TRUE;
1139		alloclen += sizeof (sadb_x_pair_t);
1140		otherspi = ipsa->ipsa_otherspi;
1141	} else {
1142		paired = B_FALSE;
1143	}
1144
1145	/* How 'bout other lifetimes? */
1146	if (ipsa->ipsa_softaddlt != 0 || ipsa->ipsa_softuselt != 0 ||
1147	    ipsa->ipsa_softbyteslt != 0 || ipsa->ipsa_softalloc != 0) {
1148		alloclen += sizeof (sadb_lifetime_t);
1149		soft = B_TRUE;
1150	}
1151
1152	if (ipsa->ipsa_hardaddlt != 0 || ipsa->ipsa_harduselt != 0 ||
1153	    ipsa->ipsa_hardbyteslt != 0 || ipsa->ipsa_hardalloc != 0) {
1154		alloclen += sizeof (sadb_lifetime_t);
1155		hard = B_TRUE;
1156	}
1157
1158	if (ipsa->ipsa_idleaddlt != 0 || ipsa->ipsa_idleuselt != 0) {
1159		alloclen += sizeof (sadb_lifetime_t);
1160		idle = B_TRUE;
1161	} else {
1162		idle = B_FALSE;
1163	}
1164
1165	/* Inner addresses. */
1166	if (ipsa->ipsa_innerfam != 0) {
1167		pfam = ipsa->ipsa_innerfam;
1168		switch (pfam) {
1169		case AF_INET6:
1170			paddrsize = roundup(sizeof (struct sockaddr_in6) +
1171			    sizeof (sadb_address_t), sizeof (uint64_t));
1172			break;
1173		case AF_INET:
1174			paddrsize = roundup(sizeof (struct sockaddr_in) +
1175			    sizeof (sadb_address_t), sizeof (uint64_t));
1176			break;
1177		default:
1178			cmn_err(CE_PANIC,
1179			    "IPsec SADB: Proxy length failure.\n");
1180			break;
1181		}
1182		isrc = B_TRUE;
1183		idst = B_TRUE;
1184		alloclen += 2 * paddrsize;
1185	}
1186
1187	/* For the following fields, assume that length != 0 ==> stuff */
1188	if (ipsa->ipsa_authkeylen != 0) {
1189		authsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_authkeylen,
1190		    sizeof (uint64_t));
1191		alloclen += authsize;
1192		auth = B_TRUE;
1193	}
1194
1195	if (ipsa->ipsa_encrkeylen != 0) {
1196		encrsize = roundup(sizeof (sadb_key_t) + ipsa->ipsa_encrkeylen +
1197		    ipsa->ipsa_nonce_len, sizeof (uint64_t));
1198		alloclen += encrsize;
1199		encr = B_TRUE;
1200	} else {
1201		encr = B_FALSE;
1202	}
1203
1204	if (ipsa->ipsa_tsl != NULL) {
1205		senslen = sadb_sens_len_from_label(ipsa->ipsa_tsl);
1206		alloclen += senslen;
1207		sensinteg = B_TRUE;
1208	}
1209
1210	if (ipsa->ipsa_otsl != NULL) {
1211		osenslen = sadb_sens_len_from_label(ipsa->ipsa_otsl);
1212		alloclen += osenslen;
1213		osensinteg = B_TRUE;
1214	}
1215
1216	/*
1217	 * Must use strlen() here for lengths.	Identities use NULL
1218	 * pointers to indicate their nonexistence.
1219	 */
1220	if (ipsa->ipsa_src_cid != NULL) {
1221		srcidsize = roundup(sizeof (sadb_ident_t) +
1222		    strlen(ipsa->ipsa_src_cid->ipsid_cid) + 1,
1223		    sizeof (uint64_t));
1224		alloclen += srcidsize;
1225		srcid = B_TRUE;
1226	}
1227
1228	if (ipsa->ipsa_dst_cid != NULL) {
1229		dstidsize = roundup(sizeof (sadb_ident_t) +
1230		    strlen(ipsa->ipsa_dst_cid->ipsid_cid) + 1,
1231		    sizeof (uint64_t));
1232		alloclen += dstidsize;
1233		dstid = B_TRUE;
1234	}
1235
1236	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0))
1237		alloclen += sizeof (sadb_x_kmc_t);
1238
1239	if (ipsa->ipsa_replay != 0) {
1240		alloclen += sizeof (sadb_x_replay_ctr_t);
1241	}
1242
1243	/* Make sure the allocation length is a multiple of 8 bytes. */
1244	ASSERT((alloclen & 0x7) == 0);
1245
1246	/* XXX Possibly make it esballoc, with a bzero-ing free_ftn. */
1247	mp = allocb(alloclen, BPRI_HI);
1248	if (mp == NULL)
1249		return (NULL);
1250	bzero(mp->b_rptr, alloclen);
1251
1252	mp->b_wptr += alloclen;
1253	end = mp->b_wptr;
1254	newsamsg = (sadb_msg_t *)mp->b_rptr;
1255	*newsamsg = *samsg;
1256	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
1257
1258	mutex_enter(&ipsa->ipsa_lock);	/* Since I'm grabbing SA fields... */
1259
1260	newsamsg->sadb_msg_satype = ipsa->ipsa_type;
1261
1262	assoc = (sadb_sa_t *)(newsamsg + 1);
1263	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
1264	assoc->sadb_sa_exttype = SADB_EXT_SA;
1265	assoc->sadb_sa_spi = ipsa->ipsa_spi;
1266	assoc->sadb_sa_replay = ipsa->ipsa_replay_wsize;
1267	assoc->sadb_sa_state = ipsa->ipsa_state;
1268	assoc->sadb_sa_auth = ipsa->ipsa_auth_alg;
1269	assoc->sadb_sa_encrypt = ipsa->ipsa_encr_alg;
1270	assoc->sadb_sa_flags = ipsa->ipsa_flags;
1271
1272	lt = (sadb_lifetime_t *)(assoc + 1);
1273	lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1274	lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
1275	/* We do not support the concept. */
1276	lt->sadb_lifetime_allocations = 0;
1277	lt->sadb_lifetime_bytes = ipsa->ipsa_bytes;
1278	lt->sadb_lifetime_addtime = ipsa->ipsa_addtime;
1279	lt->sadb_lifetime_usetime = ipsa->ipsa_usetime;
1280
1281	if (hard) {
1282		lt++;
1283		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1284		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
1285		lt->sadb_lifetime_allocations = ipsa->ipsa_hardalloc;
1286		lt->sadb_lifetime_bytes = ipsa->ipsa_hardbyteslt;
1287		lt->sadb_lifetime_addtime = ipsa->ipsa_hardaddlt;
1288		lt->sadb_lifetime_usetime = ipsa->ipsa_harduselt;
1289	}
1290
1291	if (soft) {
1292		lt++;
1293		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1294		lt->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
1295		lt->sadb_lifetime_allocations = ipsa->ipsa_softalloc;
1296		lt->sadb_lifetime_bytes = ipsa->ipsa_softbyteslt;
1297		lt->sadb_lifetime_addtime = ipsa->ipsa_softaddlt;
1298		lt->sadb_lifetime_usetime = ipsa->ipsa_softuselt;
1299	}
1300
1301	if (idle) {
1302		lt++;
1303		lt->sadb_lifetime_len = SADB_8TO64(sizeof (*lt));
1304		lt->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
1305		lt->sadb_lifetime_addtime = ipsa->ipsa_idleaddlt;
1306		lt->sadb_lifetime_usetime = ipsa->ipsa_idleuselt;
1307	}
1308
1309	cur = (uint8_t *)(lt + 1);
1310
1311	/* NOTE:  Don't fill in ports here if we are a tunnel-mode SA. */
1312	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, fam,
1313	    ipsa->ipsa_srcaddr, (!isrc && !idst) ? SA_SRCPORT(ipsa) : 0,
1314	    SA_PROTO(ipsa), 0);
1315	if (cur == NULL) {
1316		freemsg(mp);
1317		mp = NULL;
1318		goto bail;
1319	}
1320
1321	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, fam,
1322	    ipsa->ipsa_dstaddr, (!isrc && !idst) ? SA_DSTPORT(ipsa) : 0,
1323	    SA_PROTO(ipsa), 0);
1324	if (cur == NULL) {
1325		freemsg(mp);
1326		mp = NULL;
1327		goto bail;
1328	}
1329
1330	if (ipsa->ipsa_flags & IPSA_F_NATT_LOC) {
1331		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_LOC,
1332		    fam, &ipsa->ipsa_natt_addr_loc, ipsa->ipsa_local_nat_port,
1333		    IPPROTO_UDP, 0);
1334		if (cur == NULL) {
1335			freemsg(mp);
1336			mp = NULL;
1337			goto bail;
1338		}
1339	}
1340
1341	if (ipsa->ipsa_flags & IPSA_F_NATT_REM) {
1342		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_NATT_REM,
1343		    fam, &ipsa->ipsa_natt_addr_rem, ipsa->ipsa_remote_nat_port,
1344		    IPPROTO_UDP, 0);
1345		if (cur == NULL) {
1346			freemsg(mp);
1347			mp = NULL;
1348			goto bail;
1349		}
1350	}
1351
1352	/* If we are a tunnel-mode SA, fill in the inner-selectors. */
1353	if (isrc) {
1354		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
1355		    pfam, ipsa->ipsa_innersrc, SA_SRCPORT(ipsa),
1356		    SA_IPROTO(ipsa), ipsa->ipsa_innersrcpfx);
1357		if (cur == NULL) {
1358			freemsg(mp);
1359			mp = NULL;
1360			goto bail;
1361		}
1362	}
1363
1364	if (idst) {
1365		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
1366		    pfam, ipsa->ipsa_innerdst, SA_DSTPORT(ipsa),
1367		    SA_IPROTO(ipsa), ipsa->ipsa_innerdstpfx);
1368		if (cur == NULL) {
1369			freemsg(mp);
1370			mp = NULL;
1371			goto bail;
1372		}
1373	}
1374
1375	if ((ipsa->ipsa_kmp != 0) || (ipsa->ipsa_kmc != 0)) {
1376		cur = sadb_make_kmc_ext(cur, end,
1377		    ipsa->ipsa_kmp, ipsa->ipsa_kmc);
1378		if (cur == NULL) {
1379			freemsg(mp);
1380			mp = NULL;
1381			goto bail;
1382		}
1383	}
1384
1385	walker = (sadb_ext_t *)cur;
1386	if (auth) {
1387		key = (sadb_key_t *)walker;
1388		key->sadb_key_len = SADB_8TO64(authsize);
1389		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
1390		key->sadb_key_bits = ipsa->ipsa_authkeybits;
1391		key->sadb_key_reserved = 0;
1392		bcopy(ipsa->ipsa_authkey, key + 1, ipsa->ipsa_authkeylen);
1393		walker = (sadb_ext_t *)((uint64_t *)walker +
1394		    walker->sadb_ext_len);
1395	}
1396
1397	if (encr) {
1398		uint8_t *buf_ptr;
1399		key = (sadb_key_t *)walker;
1400		key->sadb_key_len = SADB_8TO64(encrsize);
1401		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
1402		key->sadb_key_bits = ipsa->ipsa_encrkeybits;
1403		key->sadb_key_reserved = ipsa->ipsa_saltbits;
1404		buf_ptr = (uint8_t *)(key + 1);
1405		bcopy(ipsa->ipsa_encrkey, buf_ptr, ipsa->ipsa_encrkeylen);
1406		if (ipsa->ipsa_salt != NULL) {
1407			buf_ptr += ipsa->ipsa_encrkeylen;
1408			bcopy(ipsa->ipsa_salt, buf_ptr, ipsa->ipsa_saltlen);
1409		}
1410		walker = (sadb_ext_t *)((uint64_t *)walker +
1411		    walker->sadb_ext_len);
1412	}
1413
1414	if (srcid) {
1415		ident = (sadb_ident_t *)walker;
1416		ident->sadb_ident_len = SADB_8TO64(srcidsize);
1417		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_SRC;
1418		ident->sadb_ident_type = ipsa->ipsa_src_cid->ipsid_type;
1419		ident->sadb_ident_id = 0;
1420		ident->sadb_ident_reserved = 0;
1421		(void) strcpy((char *)(ident + 1),
1422		    ipsa->ipsa_src_cid->ipsid_cid);
1423		walker = (sadb_ext_t *)((uint64_t *)walker +
1424		    walker->sadb_ext_len);
1425	}
1426
1427	if (dstid) {
1428		ident = (sadb_ident_t *)walker;
1429		ident->sadb_ident_len = SADB_8TO64(dstidsize);
1430		ident->sadb_ident_exttype = SADB_EXT_IDENTITY_DST;
1431		ident->sadb_ident_type = ipsa->ipsa_dst_cid->ipsid_type;
1432		ident->sadb_ident_id = 0;
1433		ident->sadb_ident_reserved = 0;
1434		(void) strcpy((char *)(ident + 1),
1435		    ipsa->ipsa_dst_cid->ipsid_cid);
1436		walker = (sadb_ext_t *)((uint64_t *)walker +
1437		    walker->sadb_ext_len);
1438	}
1439
1440	if (sensinteg) {
1441		sens = (sadb_sens_t *)walker;
1442		sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
1443		    ipsa->ipsa_tsl, senslen);
1444
1445		walker = (sadb_ext_t *)((uint64_t *)walker +
1446		    walker->sadb_ext_len);
1447	}
1448
1449	if (osensinteg) {
1450		sens = (sadb_sens_t *)walker;
1451
1452		sadb_sens_from_label(sens, SADB_X_EXT_OUTER_SENS,
1453		    ipsa->ipsa_otsl, osenslen);
1454		if (ipsa->ipsa_mac_exempt)
1455			sens->sadb_x_sens_flags = SADB_X_SENS_IMPLICIT;
1456
1457		walker = (sadb_ext_t *)((uint64_t *)walker +
1458		    walker->sadb_ext_len);
1459	}
1460
1461	if (paired) {
1462		pair_ext = (sadb_x_pair_t *)walker;
1463
1464		pair_ext->sadb_x_pair_len = SADB_8TO64(sizeof (sadb_x_pair_t));
1465		pair_ext->sadb_x_pair_exttype = SADB_X_EXT_PAIR;
1466		pair_ext->sadb_x_pair_spi = otherspi;
1467
1468		walker = (sadb_ext_t *)((uint64_t *)walker +
1469		    walker->sadb_ext_len);
1470	}
1471
1472	if (ipsa->ipsa_replay != 0) {
1473		repl_ctr = (sadb_x_replay_ctr_t *)walker;
1474		repl_ctr->sadb_x_rc_len = SADB_8TO64(sizeof (*repl_ctr));
1475		repl_ctr->sadb_x_rc_exttype = SADB_X_EXT_REPLAY_VALUE;
1476		repl_ctr->sadb_x_rc_replay32 = ipsa->ipsa_replay;
1477		repl_ctr->sadb_x_rc_replay64 = 0;
1478		walker = (sadb_ext_t *)(repl_ctr + 1);
1479	}
1480
1481bail:
1482	/* Pardon any delays... */
1483	mutex_exit(&ipsa->ipsa_lock);
1484
1485	return (mp);
1486}
1487
1488/*
1489 * Strip out key headers or unmarked headers (SADB_EXT_KEY_*, SADB_EXT_UNKNOWN)
1490 * and adjust base message accordingly.
1491 *
1492 * Assume message is pulled up in one piece of contiguous memory.
1493 *
1494 * Say if we start off with:
1495 *
1496 * +------+----+-------------+-----------+---------------+---------------+
1497 * | base | SA | source addr | dest addr | rsrvd. or key | soft lifetime |
1498 * +------+----+-------------+-----------+---------------+---------------+
1499 *
1500 * we will end up with
1501 *
1502 * +------+----+-------------+-----------+---------------+
1503 * | base | SA | source addr | dest addr | soft lifetime |
1504 * +------+----+-------------+-----------+---------------+
1505 */
1506static void
1507sadb_strip(sadb_msg_t *samsg)
1508{
1509	sadb_ext_t *ext;
1510	uint8_t *target = NULL;
1511	uint8_t *msgend;
1512	int sofar = SADB_8TO64(sizeof (*samsg));
1513	int copylen;
1514
1515	ext = (sadb_ext_t *)(samsg + 1);
1516	msgend = (uint8_t *)samsg;
1517	msgend += SADB_64TO8(samsg->sadb_msg_len);
1518	while ((uint8_t *)ext < msgend) {
1519		if (ext->sadb_ext_type == SADB_EXT_RESERVED ||
1520		    ext->sadb_ext_type == SADB_EXT_KEY_AUTH ||
1521		    ext->sadb_ext_type == SADB_X_EXT_EDUMP ||
1522		    ext->sadb_ext_type == SADB_EXT_KEY_ENCRYPT) {
1523			/*
1524			 * Aha!	 I found a header to be erased.
1525			 */
1526
1527			if (target != NULL) {
1528				/*
1529				 * If I had a previous header to be erased,
1530				 * copy over it.  I can get away with just
1531				 * copying backwards because the target will
1532				 * always be 8 bytes behind the source.
1533				 */
1534				copylen = ((uint8_t *)ext) - (target +
1535				    SADB_64TO8(
1536				    ((sadb_ext_t *)target)->sadb_ext_len));
1537				ovbcopy(((uint8_t *)ext - copylen), target,
1538				    copylen);
1539				target += copylen;
1540				((sadb_ext_t *)target)->sadb_ext_len =
1541				    SADB_8TO64(((uint8_t *)ext) - target +
1542				    SADB_64TO8(ext->sadb_ext_len));
1543			} else {
1544				target = (uint8_t *)ext;
1545			}
1546		} else {
1547			sofar += ext->sadb_ext_len;
1548		}
1549
1550		ext = (sadb_ext_t *)(((uint64_t *)ext) + ext->sadb_ext_len);
1551	}
1552
1553	ASSERT((uint8_t *)ext == msgend);
1554
1555	if (target != NULL) {
1556		copylen = ((uint8_t *)ext) - (target +
1557		    SADB_64TO8(((sadb_ext_t *)target)->sadb_ext_len));
1558		if (copylen != 0)
1559			ovbcopy(((uint8_t *)ext - copylen), target, copylen);
1560	}
1561
1562	/* Adjust samsg. */
1563	samsg->sadb_msg_len = (uint16_t)sofar;
1564
1565	/* Assume all of the rest is cleared by caller in sadb_pfkey_echo(). */
1566}
1567
1568/*
1569 * AH needs to send an error to PF_KEY.	 Assume mp points to an M_CTL
1570 * followed by an M_DATA with a PF_KEY message in it.  The serial of
1571 * the sending keysock instance is included.
1572 */
1573void
1574sadb_pfkey_error(queue_t *pfkey_q, mblk_t *mp, int error, int diagnostic,
1575    uint_t serial)
1576{
1577	mblk_t *msg = mp->b_cont;
1578	sadb_msg_t *samsg;
1579	keysock_out_t *kso;
1580
1581	/*
1582	 * Enough functions call this to merit a NULL queue check.
1583	 */
1584	if (pfkey_q == NULL) {
1585		freemsg(mp);
1586		return;
1587	}
1588
1589	ASSERT(msg != NULL);
1590	ASSERT((mp->b_wptr - mp->b_rptr) == sizeof (ipsec_info_t));
1591	ASSERT((msg->b_wptr - msg->b_rptr) >= sizeof (sadb_msg_t));
1592	samsg = (sadb_msg_t *)msg->b_rptr;
1593	kso = (keysock_out_t *)mp->b_rptr;
1594
1595	kso->ks_out_type = KEYSOCK_OUT;
1596	kso->ks_out_len = sizeof (*kso);
1597	kso->ks_out_serial = serial;
1598
1599	/*
1600	 * Only send the base message up in the event of an error.
1601	 * Don't worry about bzero()-ing, because it was probably bogus
1602	 * anyway.
1603	 */
1604	msg->b_wptr = msg->b_rptr + sizeof (*samsg);
1605	samsg = (sadb_msg_t *)msg->b_rptr;
1606	samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
1607	samsg->sadb_msg_errno = (uint8_t)error;
1608	if (diagnostic != SADB_X_DIAGNOSTIC_PRESET)
1609		samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1610
1611	putnext(pfkey_q, mp);
1612}
1613
1614/*
1615 * Send a successful return packet back to keysock via the queue in pfkey_q.
1616 *
1617 * Often, an SA is associated with the reply message, it's passed in if needed,
1618 * and NULL if not.  BTW, that ipsa will have its refcnt appropriately held,
1619 * and the caller will release said refcnt.
1620 */
1621void
1622sadb_pfkey_echo(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
1623    keysock_in_t *ksi, ipsa_t *ipsa)
1624{
1625	keysock_out_t *kso;
1626	mblk_t *mp1;
1627	sadb_msg_t *newsamsg;
1628	uint8_t *oldend;
1629
1630	ASSERT((mp->b_cont != NULL) &&
1631	    ((void *)samsg == (void *)mp->b_cont->b_rptr) &&
1632	    ((void *)mp->b_rptr == (void *)ksi));
1633
1634	switch (samsg->sadb_msg_type) {
1635	case SADB_ADD:
1636	case SADB_UPDATE:
1637	case SADB_X_UPDATEPAIR:
1638	case SADB_X_DELPAIR_STATE:
1639	case SADB_FLUSH:
1640	case SADB_DUMP:
1641		/*
1642		 * I have all of the message already.  I just need to strip
1643		 * out the keying material and echo the message back.
1644		 *
1645		 * NOTE: for SADB_DUMP, the function sadb_dump() did the
1646		 * work.  When DUMP reaches here, it should only be a base
1647		 * message.
1648		 */
1649	justecho:
1650		if (ksi->ks_in_extv[SADB_EXT_KEY_AUTH] != NULL ||
1651		    ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT] != NULL ||
1652		    ksi->ks_in_extv[SADB_X_EXT_EDUMP] != NULL) {
1653			sadb_strip(samsg);
1654			/* Assume PF_KEY message is contiguous. */
1655			ASSERT(mp->b_cont->b_cont == NULL);
1656			oldend = mp->b_cont->b_wptr;
1657			mp->b_cont->b_wptr = mp->b_cont->b_rptr +
1658			    SADB_64TO8(samsg->sadb_msg_len);
1659			bzero(mp->b_cont->b_wptr, oldend - mp->b_cont->b_wptr);
1660		}
1661		break;
1662	case SADB_GET:
1663		/*
1664		 * Do a lot of work here, because of the ipsa I just found.
1665		 * First construct the new PF_KEY message, then abandon
1666		 * the old one.
1667		 */
1668		mp1 = sadb_sa2msg(ipsa, samsg);
1669		if (mp1 == NULL) {
1670			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1671			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1672			return;
1673		}
1674		freemsg(mp->b_cont);
1675		mp->b_cont = mp1;
1676		break;
1677	case SADB_DELETE:
1678	case SADB_X_DELPAIR:
1679		if (ipsa == NULL)
1680			goto justecho;
1681		/*
1682		 * Because listening KMds may require more info, treat
1683		 * DELETE like a special case of GET.
1684		 */
1685		mp1 = sadb_sa2msg(ipsa, samsg);
1686		if (mp1 == NULL) {
1687			sadb_pfkey_error(pfkey_q, mp, ENOMEM,
1688			    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
1689			return;
1690		}
1691		newsamsg = (sadb_msg_t *)mp1->b_rptr;
1692		sadb_strip(newsamsg);
1693		oldend = mp1->b_wptr;
1694		mp1->b_wptr = mp1->b_rptr + SADB_64TO8(newsamsg->sadb_msg_len);
1695		bzero(mp1->b_wptr, oldend - mp1->b_wptr);
1696		freemsg(mp->b_cont);
1697		mp->b_cont = mp1;
1698		break;
1699	default:
1700		freemsg(mp);
1701		return;
1702	}
1703
1704	/* ksi is now null and void. */
1705	kso = (keysock_out_t *)ksi;
1706	kso->ks_out_type = KEYSOCK_OUT;
1707	kso->ks_out_len = sizeof (*kso);
1708	kso->ks_out_serial = ksi->ks_in_serial;
1709	/* We're ready to send... */
1710	putnext(pfkey_q, mp);
1711}
1712
1713/*
1714 * Set up a global pfkey_q instance for AH, ESP, or some other consumer.
1715 */
1716void
1717sadb_keysock_hello(queue_t **pfkey_qp, queue_t *q, mblk_t *mp,
1718    void (*ager)(void *), void *agerarg, timeout_id_t *top, int satype)
1719{
1720	keysock_hello_ack_t *kha;
1721	queue_t *oldq;
1722
1723	ASSERT(OTHERQ(q) != NULL);
1724
1725	/*
1726	 * First, check atomically that I'm the first and only keysock
1727	 * instance.
1728	 *
1729	 * Use OTHERQ(q), because qreply(q, mp) == putnext(OTHERQ(q), mp),
1730	 * and I want this module to say putnext(*_pfkey_q, mp) for PF_KEY
1731	 * messages.
1732	 */
1733
1734	oldq = atomic_cas_ptr((void **)pfkey_qp, NULL, OTHERQ(q));
1735	if (oldq != NULL) {
1736		ASSERT(oldq != q);
1737		cmn_err(CE_WARN, "Danger!  Multiple keysocks on top of %s.\n",
1738		    (satype == SADB_SATYPE_ESP)? "ESP" : "AH or other");
1739		freemsg(mp);
1740		return;
1741	}
1742
1743	kha = (keysock_hello_ack_t *)mp->b_rptr;
1744	kha->ks_hello_len = sizeof (keysock_hello_ack_t);
1745	kha->ks_hello_type = KEYSOCK_HELLO_ACK;
1746	kha->ks_hello_satype = (uint8_t)satype;
1747
1748	/*
1749	 * If we made it past the atomic_cas_ptr, then we have "exclusive"
1750	 * access to the timeout handle.  Fire it off after the default ager
1751	 * interval.
1752	 */
1753	*top = qtimeout(*pfkey_qp, ager, agerarg,
1754	    drv_usectohz(SADB_AGE_INTERVAL_DEFAULT * 1000));
1755
1756	putnext(*pfkey_qp, mp);
1757}
1758
1759/*
1760 * Normalize IPv4-mapped IPv6 addresses (and prefixes) as appropriate.
1761 *
1762 * Check addresses themselves for wildcard or multicast.
1763 * Check ire table for local/non-local/broadcast.
1764 */
1765int
1766sadb_addrcheck(queue_t *pfkey_q, mblk_t *mp, sadb_ext_t *ext, uint_t serial,
1767    netstack_t *ns)
1768{
1769	sadb_address_t *addr = (sadb_address_t *)ext;
1770	struct sockaddr_in *sin;
1771	struct sockaddr_in6 *sin6;
1772	int diagnostic, type;
1773	boolean_t normalized = B_FALSE;
1774
1775	ASSERT(ext != NULL);
1776	ASSERT((ext->sadb_ext_type == SADB_EXT_ADDRESS_SRC) ||
1777	    (ext->sadb_ext_type == SADB_EXT_ADDRESS_DST) ||
1778	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ||
1779	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) ||
1780	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_LOC) ||
1781	    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_NATT_REM));
1782
1783	diagnostic = 0;
1784
1785	/* Assign both sockaddrs, the compiler will do the right thing. */
1786	sin = (struct sockaddr_in *)(addr + 1);
1787	sin6 = (struct sockaddr_in6 *)(addr + 1);
1788
1789	if (sin6->sin6_family == AF_INET6) {
1790		if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1791			/*
1792			 * Convert to an AF_INET sockaddr.  This means the
1793			 * return messages will have the extra space, but have
1794			 * AF_INET sockaddrs instead of AF_INET6.
1795			 *
1796			 * Yes, RFC 2367 isn't clear on what to do here w.r.t.
1797			 * mapped addresses, but since AF_INET6 ::ffff:<v4> is
1798			 * equal to AF_INET <v4>, it shouldnt be a huge
1799			 * problem.
1800			 */
1801			sin->sin_family = AF_INET;
1802			IN6_V4MAPPED_TO_INADDR(&sin6->sin6_addr,
1803			    &sin->sin_addr);
1804			bzero(&sin->sin_zero, sizeof (sin->sin_zero));
1805			normalized = B_TRUE;
1806		}
1807	} else if (sin->sin_family != AF_INET) {
1808		switch (ext->sadb_ext_type) {
1809		case SADB_EXT_ADDRESS_SRC:
1810			diagnostic = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
1811			break;
1812		case SADB_EXT_ADDRESS_DST:
1813			diagnostic = SADB_X_DIAGNOSTIC_BAD_DST_AF;
1814			break;
1815		case SADB_X_EXT_ADDRESS_INNER_SRC:
1816			diagnostic = SADB_X_DIAGNOSTIC_BAD_PROXY_AF;
1817			break;
1818		case SADB_X_EXT_ADDRESS_INNER_DST:
1819			diagnostic = SADB_X_DIAGNOSTIC_BAD_INNER_DST_AF;
1820			break;
1821		case SADB_X_EXT_ADDRESS_NATT_LOC:
1822			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF;
1823			break;
1824		case SADB_X_EXT_ADDRESS_NATT_REM:
1825			diagnostic = SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF;
1826			break;
1827			/* There is no default, see above ASSERT. */
1828		}
1829bail:
1830		if (pfkey_q != NULL) {
1831			sadb_pfkey_error(pfkey_q, mp, EINVAL, diagnostic,
1832			    serial);
1833		} else {
1834			/*
1835			 * Scribble in sadb_msg that we got passed in.
1836			 * Overload "mp" to be an sadb_msg pointer.
1837			 */
1838			sadb_msg_t *samsg = (sadb_msg_t *)mp;
1839
1840			samsg->sadb_msg_errno = EINVAL;
1841			samsg->sadb_x_msg_diagnostic = diagnostic;
1842		}
1843		return (KS_IN_ADDR_UNKNOWN);
1844	}
1845
1846	if (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC ||
1847	    ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_DST) {
1848		/*
1849		 * We need only check for prefix issues.
1850		 */
1851
1852		/* Set diagnostic now, in case we need it later. */
1853		diagnostic =
1854		    (ext->sadb_ext_type == SADB_X_EXT_ADDRESS_INNER_SRC) ?
1855		    SADB_X_DIAGNOSTIC_PREFIX_INNER_SRC :
1856		    SADB_X_DIAGNOSTIC_PREFIX_INNER_DST;
1857
1858		if (normalized)
1859			addr->sadb_address_prefixlen -= 96;
1860
1861		/*
1862		 * Verify and mask out inner-addresses based on prefix length.
1863		 */
1864		if (sin->sin_family == AF_INET) {
1865			if (addr->sadb_address_prefixlen > 32)
1866				goto bail;
1867			sin->sin_addr.s_addr &=
1868			    ip_plen_to_mask(addr->sadb_address_prefixlen);
1869		} else {
1870			in6_addr_t mask;
1871
1872			ASSERT(sin->sin_family == AF_INET6);
1873			/*
1874			 * ip_plen_to_mask_v6() returns NULL if the value in
1875			 * question is out of range.
1876			 */
1877			if (ip_plen_to_mask_v6(addr->sadb_address_prefixlen,
1878			    &mask) == NULL)
1879				goto bail;
1880			sin6->sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
1881			sin6->sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
1882			sin6->sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
1883			sin6->sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
1884		}
1885
1886		/* We don't care in these cases. */
1887		return (KS_IN_ADDR_DONTCARE);
1888	}
1889
1890	if (sin->sin_family == AF_INET6) {
1891		/* Check the easy ones now. */
1892		if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
1893			return (KS_IN_ADDR_MBCAST);
1894		if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
1895			return (KS_IN_ADDR_UNSPEC);
1896		/*
1897		 * At this point, we're a unicast IPv6 address.
1898		 *
1899		 * XXX Zones alert -> me/notme decision needs to be tempered
1900		 * by what zone we're in when we go to zone-aware IPsec.
1901		 */
1902		if (ip_type_v6(&sin6->sin6_addr, ns->netstack_ip) ==
1903		    IRE_LOCAL) {
1904			/* Hey hey, it's local. */
1905			return (KS_IN_ADDR_ME);
1906		}
1907	} else {
1908		ASSERT(sin->sin_family == AF_INET);
1909		if (sin->sin_addr.s_addr == INADDR_ANY)
1910			return (KS_IN_ADDR_UNSPEC);
1911		if (CLASSD(sin->sin_addr.s_addr))
1912			return (KS_IN_ADDR_MBCAST);
1913		/*
1914		 * At this point we're a unicast or broadcast IPv4 address.
1915		 *
1916		 * Check if the address is IRE_BROADCAST or IRE_LOCAL.
1917		 *
1918		 * XXX Zones alert -> me/notme decision needs to be tempered
1919		 * by what zone we're in when we go to zone-aware IPsec.
1920		 */
1921		type = ip_type_v4(sin->sin_addr.s_addr, ns->netstack_ip);
1922		switch (type) {
1923		case IRE_LOCAL:
1924			return (KS_IN_ADDR_ME);
1925		case IRE_BROADCAST:
1926			return (KS_IN_ADDR_MBCAST);
1927		}
1928	}
1929
1930	return (KS_IN_ADDR_NOTME);
1931}
1932
1933/*
1934 * Address normalizations and reality checks for inbound PF_KEY messages.
1935 *
1936 * For the case of src == unspecified AF_INET6, and dst == AF_INET, convert
1937 * the source to AF_INET.  Do the same for the inner sources.
1938 */
1939boolean_t
1940sadb_addrfix(keysock_in_t *ksi, queue_t *pfkey_q, mblk_t *mp, netstack_t *ns)
1941{
1942	struct sockaddr_in *src, *isrc;
1943	struct sockaddr_in6 *dst, *idst;
1944	sadb_address_t *srcext, *dstext;
1945	uint16_t sport;
1946	sadb_ext_t **extv = ksi->ks_in_extv;
1947	int rc;
1948
1949	if (extv[SADB_EXT_ADDRESS_SRC] != NULL) {
1950		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_SRC],
1951		    ksi->ks_in_serial, ns);
1952		if (rc == KS_IN_ADDR_UNKNOWN)
1953			return (B_FALSE);
1954		if (rc == KS_IN_ADDR_MBCAST) {
1955			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1956			    SADB_X_DIAGNOSTIC_BAD_SRC, ksi->ks_in_serial);
1957			return (B_FALSE);
1958		}
1959		ksi->ks_in_srctype = rc;
1960	}
1961
1962	if (extv[SADB_EXT_ADDRESS_DST] != NULL) {
1963		rc = sadb_addrcheck(pfkey_q, mp, extv[SADB_EXT_ADDRESS_DST],
1964		    ksi->ks_in_serial, ns);
1965		if (rc == KS_IN_ADDR_UNKNOWN)
1966			return (B_FALSE);
1967		if (rc == KS_IN_ADDR_UNSPEC) {
1968			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1969			    SADB_X_DIAGNOSTIC_BAD_DST, ksi->ks_in_serial);
1970			return (B_FALSE);
1971		}
1972		ksi->ks_in_dsttype = rc;
1973	}
1974
1975	/*
1976	 * NAT-Traversal addrs are simple enough to not require all of
1977	 * the checks in sadb_addrcheck().  Just normalize or reject if not
1978	 * AF_INET.
1979	 */
1980	if (extv[SADB_X_EXT_ADDRESS_NATT_LOC] != NULL) {
1981		rc = sadb_addrcheck(pfkey_q, mp,
1982		    extv[SADB_X_EXT_ADDRESS_NATT_LOC], ksi->ks_in_serial, ns);
1983
1984		/*
1985		 * Local NAT-T addresses never use an IRE_LOCAL, so it should
1986		 * always be NOTME, or UNSPEC (to handle both tunnel mode
1987		 * AND local-port flexibility).
1988		 */
1989		if (rc != KS_IN_ADDR_NOTME && rc != KS_IN_ADDR_UNSPEC) {
1990			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1991			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC,
1992			    ksi->ks_in_serial);
1993			return (B_FALSE);
1994		}
1995		src = (struct sockaddr_in *)
1996		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_LOC]) + 1);
1997		if (src->sin_family != AF_INET) {
1998			sadb_pfkey_error(pfkey_q, mp, EINVAL,
1999			    SADB_X_DIAGNOSTIC_BAD_NATT_LOC_AF,
2000			    ksi->ks_in_serial);
2001			return (B_FALSE);
2002		}
2003	}
2004
2005	if (extv[SADB_X_EXT_ADDRESS_NATT_REM] != NULL) {
2006		rc = sadb_addrcheck(pfkey_q, mp,
2007		    extv[SADB_X_EXT_ADDRESS_NATT_REM], ksi->ks_in_serial, ns);
2008
2009		/*
2010		 * Remote NAT-T addresses never use an IRE_LOCAL, so it should
2011		 * always be NOTME, or UNSPEC if it's a tunnel-mode SA.
2012		 */
2013		if (rc != KS_IN_ADDR_NOTME &&
2014		    !(extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
2015		    rc == KS_IN_ADDR_UNSPEC)) {
2016			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2017			    SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM,
2018			    ksi->ks_in_serial);
2019			return (B_FALSE);
2020		}
2021		src = (struct sockaddr_in *)
2022		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_NATT_REM]) + 1);
2023		if (src->sin_family != AF_INET) {
2024			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2025			    SADB_X_DIAGNOSTIC_BAD_NATT_REM_AF,
2026			    ksi->ks_in_serial);
2027			return (B_FALSE);
2028		}
2029	}
2030
2031	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL) {
2032		if (extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
2033			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2034			    SADB_X_DIAGNOSTIC_MISSING_INNER_DST,
2035			    ksi->ks_in_serial);
2036			return (B_FALSE);
2037		}
2038
2039		if (sadb_addrcheck(pfkey_q, mp,
2040		    extv[SADB_X_EXT_ADDRESS_INNER_DST], ksi->ks_in_serial, ns)
2041		    == KS_IN_ADDR_UNKNOWN ||
2042		    sadb_addrcheck(pfkey_q, mp,
2043		    extv[SADB_X_EXT_ADDRESS_INNER_SRC], ksi->ks_in_serial, ns)
2044		    == KS_IN_ADDR_UNKNOWN)
2045			return (B_FALSE);
2046
2047		isrc = (struct sockaddr_in *)
2048		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_SRC]) +
2049		    1);
2050		idst = (struct sockaddr_in6 *)
2051		    (((sadb_address_t *)extv[SADB_X_EXT_ADDRESS_INNER_DST]) +
2052		    1);
2053		if (isrc->sin_family != idst->sin6_family) {
2054			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2055			    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH,
2056			    ksi->ks_in_serial);
2057			return (B_FALSE);
2058		}
2059	} else if (extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
2060			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2061			    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC,
2062			    ksi->ks_in_serial);
2063			return (B_FALSE);
2064	} else {
2065		isrc = NULL;	/* For inner/outer port check below. */
2066	}
2067
2068	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
2069	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
2070
2071	if (dstext == NULL || srcext == NULL)
2072		return (B_TRUE);
2073
2074	dst = (struct sockaddr_in6 *)(dstext + 1);
2075	src = (struct sockaddr_in *)(srcext + 1);
2076
2077	if (isrc != NULL &&
2078	    (isrc->sin_port != 0 || idst->sin6_port != 0) &&
2079	    (src->sin_port != 0 || dst->sin6_port != 0)) {
2080		/* Can't set inner and outer ports in one SA. */
2081		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2082		    SADB_X_DIAGNOSTIC_DUAL_PORT_SETS,
2083		    ksi->ks_in_serial);
2084		return (B_FALSE);
2085	}
2086
2087	if (dst->sin6_family == src->sin_family)
2088		return (B_TRUE);
2089
2090	if (srcext->sadb_address_proto != dstext->sadb_address_proto) {
2091		if (srcext->sadb_address_proto == 0) {
2092			srcext->sadb_address_proto = dstext->sadb_address_proto;
2093		} else if (dstext->sadb_address_proto == 0) {
2094			dstext->sadb_address_proto = srcext->sadb_address_proto;
2095		} else {
2096			/* Inequal protocols, neither were 0.  Report error. */
2097			sadb_pfkey_error(pfkey_q, mp, EINVAL,
2098			    SADB_X_DIAGNOSTIC_PROTO_MISMATCH,
2099			    ksi->ks_in_serial);
2100			return (B_FALSE);
2101		}
2102	}
2103
2104	/*
2105	 * With the exception of an unspec IPv6 source and an IPv4
2106	 * destination, address families MUST me matched.
2107	 */
2108	if (src->sin_family == AF_INET ||
2109	    ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC) {
2110		sadb_pfkey_error(pfkey_q, mp, EINVAL,
2111		    SADB_X_DIAGNOSTIC_AF_MISMATCH, ksi->ks_in_serial);
2112		return (B_FALSE);
2113	}
2114
2115	/*
2116	 * Convert "src" to AF_INET INADDR_ANY.  We rely on sin_port being
2117	 * in the same place for sockaddr_in and sockaddr_in6.
2118	 */
2119	sport = src->sin_port;
2120	bzero(src, sizeof (*src));
2121	src->sin_family = AF_INET;
2122	src->sin_port = sport;
2123
2124	return (B_TRUE);
2125}
2126
2127/*
2128 * Set the results in "addrtype", given an IRE as requested by
2129 * sadb_addrcheck().
2130 */
2131int
2132sadb_addrset(ire_t *ire)
2133{
2134	if ((ire->ire_type & IRE_BROADCAST) ||
2135	    (ire->ire_ipversion == IPV4_VERSION && CLASSD(ire->ire_addr)) ||
2136	    (ire->ire_ipversion == IPV6_VERSION &&
2137	    IN6_IS_ADDR_MULTICAST(&(ire->ire_addr_v6))))
2138		return (KS_IN_ADDR_MBCAST);
2139	if (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))
2140		return (KS_IN_ADDR_ME);
2141	return (KS_IN_ADDR_NOTME);
2142}
2143
2144/*
2145 * Match primitives..
2146 * !!! TODO: short term: inner selectors
2147 *		ipv6 scope id (ifindex)
2148 * longer term:  zone id.  sensitivity label. uid.
2149 */
2150boolean_t
2151sadb_match_spi(ipsa_query_t *sq, ipsa_t *sa)
2152{
2153	return (sq->spi == sa->ipsa_spi);
2154}
2155
2156boolean_t
2157sadb_match_dst_v6(ipsa_query_t *sq, ipsa_t *sa)
2158{
2159	return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_dstaddr, sq->dstaddr, AF_INET6));
2160}
2161
2162boolean_t
2163sadb_match_src_v6(ipsa_query_t *sq, ipsa_t *sa)
2164{
2165	return (IPSA_ARE_ADDR_EQUAL(sa->ipsa_srcaddr, sq->srcaddr, AF_INET6));
2166}
2167
2168boolean_t
2169sadb_match_dst_v4(ipsa_query_t *sq, ipsa_t *sa)
2170{
2171	return (sq->dstaddr[0] == sa->ipsa_dstaddr[0]);
2172}
2173
2174boolean_t
2175sadb_match_src_v4(ipsa_query_t *sq, ipsa_t *sa)
2176{
2177	return (sq->srcaddr[0] == sa->ipsa_srcaddr[0]);
2178}
2179
2180boolean_t
2181sadb_match_dstid(ipsa_query_t *sq, ipsa_t *sa)
2182{
2183	return ((sa->ipsa_dst_cid != NULL) &&
2184	    (sq->didtype == sa->ipsa_dst_cid->ipsid_type) &&
2185	    (strcmp(sq->didstr, sa->ipsa_dst_cid->ipsid_cid) == 0));
2186
2187}
2188boolean_t
2189sadb_match_srcid(ipsa_query_t *sq, ipsa_t *sa)
2190{
2191	return ((sa->ipsa_src_cid != NULL) &&
2192	    (sq->sidtype == sa->ipsa_src_cid->ipsid_type) &&
2193	    (strcmp(sq->sidstr, sa->ipsa_src_cid->ipsid_cid) == 0));
2194}
2195
2196boolean_t
2197sadb_match_kmc(ipsa_query_t *sq, ipsa_t *sa)
2198{
2199#define	M(a, b) (((a) == 0) || ((b) == 0) || ((a) == (b)))
2200
2201	return (M(sq->kmc, sa->ipsa_kmc) && M(sq->kmp, sa->ipsa_kmp));
2202
2203#undef M
2204}
2205
2206/*
2207 * Common function which extracts several PF_KEY extensions for ease of
2208 * SADB matching.
2209 *
2210 * XXX TODO: weed out ipsa_query_t fields not used during matching
2211 * or afterwards?
2212 */
2213int
2214sadb_form_query(keysock_in_t *ksi, uint32_t req, uint32_t match,
2215    ipsa_query_t *sq, int *diagnostic)
2216{
2217	int i;
2218	ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2219
2220	for (i = 0; i < IPSA_NMATCH; i++)
2221		sq->matchers[i] = NULL;
2222
2223	ASSERT((req & ~match) == 0);
2224
2225	sq->req = req;
2226	sq->dstext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2227	sq->srcext = (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2228	sq->assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2229
2230	if ((req & IPSA_Q_DST) && (sq->dstext == NULL)) {
2231		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2232		return (EINVAL);
2233	}
2234	if ((req & IPSA_Q_SRC) && (sq->srcext == NULL)) {
2235		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2236		return (EINVAL);
2237	}
2238	if ((req & IPSA_Q_SA) && (sq->assoc == NULL)) {
2239		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2240		return (EINVAL);
2241	}
2242
2243	if (match & IPSA_Q_SA) {
2244		*mfpp++ = sadb_match_spi;
2245		sq->spi = sq->assoc->sadb_sa_spi;
2246	}
2247
2248	if (sq->dstext != NULL)
2249		sq->dst = (struct sockaddr_in *)(sq->dstext + 1);
2250	else {
2251		sq->dst = NULL;
2252		sq->dst6 = NULL;
2253		sq->dstaddr = NULL;
2254	}
2255
2256	if (sq->srcext != NULL)
2257		sq->src = (struct sockaddr_in *)(sq->srcext + 1);
2258	else {
2259		sq->src = NULL;
2260		sq->src6 = NULL;
2261		sq->srcaddr = NULL;
2262	}
2263
2264	if (sq->dst != NULL)
2265		sq->af = sq->dst->sin_family;
2266	else if (sq->src != NULL)
2267		sq->af = sq->src->sin_family;
2268	else
2269		sq->af = AF_INET;
2270
2271	if (sq->af == AF_INET6) {
2272		if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2273			*mfpp++ = sadb_match_dst_v6;
2274			sq->dst6 = (struct sockaddr_in6 *)sq->dst;
2275			sq->dstaddr = (uint32_t *)&(sq->dst6->sin6_addr);
2276		} else {
2277			match &= ~IPSA_Q_DST;
2278			sq->dstaddr = ALL_ZEROES_PTR;
2279		}
2280
2281		if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2282			sq->src6 = (struct sockaddr_in6 *)(sq->srcext + 1);
2283			sq->srcaddr = (uint32_t *)&sq->src6->sin6_addr;
2284			if (sq->src6->sin6_family != AF_INET6) {
2285				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2286				return (EINVAL);
2287			}
2288			*mfpp++ = sadb_match_src_v6;
2289		} else {
2290			match &= ~IPSA_Q_SRC;
2291			sq->srcaddr = ALL_ZEROES_PTR;
2292		}
2293	} else {
2294		sq->src6 = sq->dst6 = NULL;
2295		if ((match & IPSA_Q_DST) && (sq->dstext != NULL)) {
2296			*mfpp++ = sadb_match_dst_v4;
2297			sq->dstaddr = (uint32_t *)&sq->dst->sin_addr;
2298		} else {
2299			match &= ~IPSA_Q_DST;
2300			sq->dstaddr = ALL_ZEROES_PTR;
2301		}
2302		if ((match & IPSA_Q_SRC) && (sq->srcext != NULL)) {
2303			sq->srcaddr = (uint32_t *)&sq->src->sin_addr;
2304			if (sq->src->sin_family != AF_INET) {
2305				*diagnostic = SADB_X_DIAGNOSTIC_AF_MISMATCH;
2306				return (EINVAL);
2307			}
2308			*mfpp++ = sadb_match_src_v4;
2309		} else {
2310			match &= ~IPSA_Q_SRC;
2311			sq->srcaddr = ALL_ZEROES_PTR;
2312		}
2313	}
2314
2315	sq->dstid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
2316	if ((match & IPSA_Q_DSTID) && (sq->dstid != NULL)) {
2317		sq->didstr = (char *)(sq->dstid + 1);
2318		sq->didtype = sq->dstid->sadb_ident_type;
2319		*mfpp++ = sadb_match_dstid;
2320	}
2321
2322	sq->srcid = (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
2323
2324	if ((match & IPSA_Q_SRCID) && (sq->srcid != NULL)) {
2325		sq->sidstr = (char *)(sq->srcid + 1);
2326		sq->sidtype = sq->srcid->sadb_ident_type;
2327		*mfpp++ = sadb_match_srcid;
2328	}
2329
2330	sq->kmcext = (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2331	sq->kmc = 0;
2332	sq->kmp = 0;
2333
2334	if ((match & IPSA_Q_KMC) && (sq->kmcext)) {
2335		sq->kmp = sq->kmcext->sadb_x_kmc_proto;
2336		/*
2337		 * Be liberal in what we receive.  Special-case the IKEv1
2338		 * cookie, which closed-source in.iked assumes is 32 bits.
2339		 * Now that we store all 64 bits, we should pre-zero the
2340		 * reserved field on behalf of closed-source in.iked.
2341		 */
2342		if (sq->kmp == SADB_X_KMP_IKE) {
2343			/* Just in case in.iked is misbehaving... */
2344			sq->kmcext->sadb_x_kmc_reserved = 0;
2345		}
2346		sq->kmc = sq->kmcext->sadb_x_kmc_cookie64;
2347		*mfpp++ = sadb_match_kmc;
2348	}
2349
2350	if (match & (IPSA_Q_INBOUND|IPSA_Q_OUTBOUND)) {
2351		if (sq->af == AF_INET6)
2352			sq->sp = &sq->spp->s_v6;
2353		else
2354			sq->sp = &sq->spp->s_v4;
2355	} else {
2356		sq->sp = NULL;
2357	}
2358
2359	if (match & IPSA_Q_INBOUND) {
2360		sq->inhash = INBOUND_HASH(sq->sp, sq->assoc->sadb_sa_spi);
2361		sq->inbound = &sq->sp->sdb_if[sq->inhash];
2362	} else {
2363		sq->inhash = 0;
2364		sq->inbound = NULL;
2365	}
2366
2367	if (match & IPSA_Q_OUTBOUND) {
2368		if (sq->af == AF_INET6) {
2369			sq->outhash = OUTBOUND_HASH_V6(sq->sp, *(sq->dstaddr));
2370		} else {
2371			sq->outhash = OUTBOUND_HASH_V4(sq->sp, *(sq->dstaddr));
2372		}
2373		sq->outbound = &sq->sp->sdb_of[sq->outhash];
2374	} else {
2375		sq->outhash = 0;
2376		sq->outbound = NULL;
2377	}
2378	sq->match = match;
2379	return (0);
2380}
2381
2382/*
2383 * Match an initialized query structure with a security association;
2384 * return B_TRUE on a match, B_FALSE on a miss.
2385 * Applies match functions set up by sadb_form_query() until one returns false.
2386 */
2387boolean_t
2388sadb_match_query(ipsa_query_t *sq, ipsa_t *sa)
2389{
2390	ipsa_match_fn_t *mfpp = &(sq->matchers[0]);
2391	ipsa_match_fn_t mfp;
2392
2393	for (mfp = *mfpp++; mfp != NULL; mfp = *mfpp++) {
2394		if (!mfp(sq, sa))
2395			return (B_FALSE);
2396	}
2397	return (B_TRUE);
2398}
2399
2400/*
2401 * Walker callback function to delete sa's based on src/dst address.
2402 * Assumes that we're called with *head locked, no other locks held;
2403 * Conveniently, and not coincidentally, this is both what sadb_walker
2404 * gives us and also what sadb_unlinkassoc expects.
2405 */
2406struct sadb_purge_state
2407{
2408	ipsa_query_t sq;
2409	boolean_t inbnd;
2410	uint8_t sadb_sa_state;
2411};
2412
2413static void
2414sadb_purge_cb(isaf_t *head, ipsa_t *entry, void *cookie)
2415{
2416	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2417
2418	ASSERT(MUTEX_HELD(&head->isaf_lock));
2419
2420	mutex_enter(&entry->ipsa_lock);
2421
2422	if (entry->ipsa_state == IPSA_STATE_LARVAL ||
2423	    !sadb_match_query(&ps->sq, entry)) {
2424		mutex_exit(&entry->ipsa_lock);
2425		return;
2426	}
2427
2428	if (ps->inbnd) {
2429		sadb_delete_cluster(entry);
2430	}
2431	entry->ipsa_state = IPSA_STATE_DEAD;
2432	(void) sadb_torch_assoc(head, entry);
2433}
2434
2435/*
2436 * Common code to purge an SA with a matching src or dst address.
2437 * Don't kill larval SA's in such a purge.
2438 */
2439int
2440sadb_purge_sa(mblk_t *mp, keysock_in_t *ksi, sadb_t *sp,
2441    int *diagnostic, queue_t *pfkey_q)
2442{
2443	struct sadb_purge_state ps;
2444	int error = sadb_form_query(ksi, 0,
2445	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2446	    &ps.sq, diagnostic);
2447
2448	if (error != 0)
2449		return (error);
2450
2451	/*
2452	 * This is simple, crude, and effective.
2453	 * Unimplemented optimizations (TBD):
2454	 * - we can limit how many places we search based on where we
2455	 * think the SA is filed.
2456	 * - if we get a dst address, we can hash based on dst addr to find
2457	 * the correct bucket in the outbound table.
2458	 */
2459	ps.inbnd = B_TRUE;
2460	sadb_walker(sp->sdb_if, sp->sdb_hashsize, sadb_purge_cb, &ps);
2461	ps.inbnd = B_FALSE;
2462	sadb_walker(sp->sdb_of, sp->sdb_hashsize, sadb_purge_cb, &ps);
2463
2464	ASSERT(mp->b_cont != NULL);
2465	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
2466	    NULL);
2467	return (0);
2468}
2469
2470static void
2471sadb_delpair_state_one(isaf_t *head, ipsa_t *entry, void *cookie)
2472{
2473	struct sadb_purge_state *ps = (struct sadb_purge_state *)cookie;
2474	isaf_t  *inbound_bucket;
2475	ipsa_t *peer_assoc;
2476	ipsa_query_t *sq = &ps->sq;
2477
2478	ASSERT(MUTEX_HELD(&head->isaf_lock));
2479
2480	mutex_enter(&entry->ipsa_lock);
2481
2482	if ((entry->ipsa_state != ps->sadb_sa_state) ||
2483	    ((sq->srcaddr != NULL) &&
2484	    !IPSA_ARE_ADDR_EQUAL(entry->ipsa_srcaddr, sq->srcaddr, sq->af))) {
2485		mutex_exit(&entry->ipsa_lock);
2486		return;
2487	}
2488
2489	/*
2490	 * The isaf_t *, which is passed in , is always an outbound bucket,
2491	 * and we are preserving the outbound-then-inbound hash-bucket lock
2492	 * ordering. The sadb_walker() which triggers this function is called
2493	 * only on the outbound fanout, and the corresponding inbound bucket
2494	 * lock is safe to acquire here.
2495	 */
2496
2497	if (entry->ipsa_haspeer) {
2498		inbound_bucket = INBOUND_BUCKET(sq->sp, entry->ipsa_spi);
2499		mutex_enter(&inbound_bucket->isaf_lock);
2500		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2501		    entry->ipsa_spi, entry->ipsa_srcaddr,
2502		    entry->ipsa_dstaddr, entry->ipsa_addrfam);
2503	} else {
2504		inbound_bucket = INBOUND_BUCKET(sq->sp, entry->ipsa_otherspi);
2505		mutex_enter(&inbound_bucket->isaf_lock);
2506		peer_assoc = ipsec_getassocbyspi(inbound_bucket,
2507		    entry->ipsa_otherspi, entry->ipsa_dstaddr,
2508		    entry->ipsa_srcaddr, entry->ipsa_addrfam);
2509	}
2510
2511	entry->ipsa_state = IPSA_STATE_DEAD;
2512	(void) sadb_torch_assoc(head, entry);
2513	if (peer_assoc != NULL) {
2514		mutex_enter(&peer_assoc->ipsa_lock);
2515		peer_assoc->ipsa_state = IPSA_STATE_DEAD;
2516		(void) sadb_torch_assoc(inbound_bucket, peer_assoc);
2517	}
2518	mutex_exit(&inbound_bucket->isaf_lock);
2519}
2520
2521static int
2522sadb_delpair_state(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2523    int *diagnostic, queue_t *pfkey_q)
2524{
2525	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2526	struct sadb_purge_state ps;
2527	int error;
2528
2529	ps.sq.spp = spp;		/* XXX param */
2530
2531	error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SRC,
2532	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SRCID|IPSA_Q_DSTID|IPSA_Q_KMC,
2533	    &ps.sq, diagnostic);
2534	if (error != 0)
2535		return (error);
2536
2537	ps.inbnd = B_FALSE;
2538	ps.sadb_sa_state = assoc->sadb_sa_state;
2539	sadb_walker(ps.sq.sp->sdb_of, ps.sq.sp->sdb_hashsize,
2540	    sadb_delpair_state_one, &ps);
2541
2542	ASSERT(mp->b_cont != NULL);
2543	sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
2544	    ksi, NULL);
2545	return (0);
2546}
2547
2548/*
2549 * Common code to delete/get an SA.
2550 */
2551int
2552sadb_delget_sa(mblk_t *mp, keysock_in_t *ksi, sadbp_t *spp,
2553    int *diagnostic, queue_t *pfkey_q, uint8_t sadb_msg_type)
2554{
2555	ipsa_query_t sq;
2556	ipsa_t *echo_target = NULL;
2557	ipsap_t ipsapp;
2558	uint_t	error = 0;
2559
2560	if (sadb_msg_type == SADB_X_DELPAIR_STATE)
2561		return (sadb_delpair_state(mp, ksi, spp, diagnostic, pfkey_q));
2562
2563	sq.spp = spp;		/* XXX param */
2564	error = sadb_form_query(ksi, IPSA_Q_DST|IPSA_Q_SA,
2565	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
2566	    &sq, diagnostic);
2567	if (error != 0)
2568		return (error);
2569
2570	error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
2571	if (error != 0) {
2572		return (error);
2573	}
2574
2575	echo_target = ipsapp.ipsap_sa_ptr;
2576	if (echo_target == NULL)
2577		echo_target = ipsapp.ipsap_psa_ptr;
2578
2579	if (sadb_msg_type == SADB_DELETE || sadb_msg_type == SADB_X_DELPAIR) {
2580		/*
2581		 * Bucket locks will be required if SA is actually unlinked.
2582		 * get_ipsa_pair() returns valid hash bucket pointers even
2583		 * if it can't find a pair SA pointer. To prevent a potential
2584		 * deadlock, always lock the outbound bucket before the inbound.
2585		 */
2586		if (ipsapp.in_inbound_table) {
2587			mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2588			mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2589		} else {
2590			mutex_enter(&ipsapp.ipsap_bucket->isaf_lock);
2591			mutex_enter(&ipsapp.ipsap_pbucket->isaf_lock);
2592		}
2593
2594		if (ipsapp.ipsap_sa_ptr != NULL) {
2595			mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
2596			if (ipsapp.ipsap_sa_ptr->ipsa_flags & IPSA_F_INBOUND) {
2597				sadb_delete_cluster(ipsapp.ipsap_sa_ptr);
2598			}
2599			ipsapp.ipsap_sa_ptr->ipsa_state = IPSA_STATE_DEAD;
2600			(void) sadb_torch_assoc(ipsapp.ipsap_bucket,
2601			    ipsapp.ipsap_sa_ptr);
2602			/*
2603			 * sadb_torch_assoc() releases the ipsa_lock
2604			 * and calls sadb_unlinkassoc() which does a
2605			 * IPSA_REFRELE.
2606			 */
2607		}
2608		if (ipsapp.ipsap_psa_ptr != NULL) {
2609			mutex_enter(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2610			if (sadb_msg_type == SADB_X_DELPAIR ||
2611			    ipsapp.ipsap_psa_ptr->ipsa_haspeer) {
2612				if (ipsapp.ipsap_psa_ptr->ipsa_flags &
2613				    IPSA_F_INBOUND) {
2614					sadb_delete_cluster
2615					    (ipsapp.ipsap_psa_ptr);
2616				}
2617				ipsapp.ipsap_psa_ptr->ipsa_state =
2618				    IPSA_STATE_DEAD;
2619				(void) sadb_torch_assoc(ipsapp.ipsap_pbucket,
2620				    ipsapp.ipsap_psa_ptr);
2621			} else {
2622				/*
2623				 * Only half of the "pair" has been deleted.
2624				 * Update the remaining SA and remove references
2625				 * to its pair SA, which is now gone.
2626				 */
2627				ipsapp.ipsap_psa_ptr->ipsa_otherspi = 0;
2628				ipsapp.ipsap_psa_ptr->ipsa_flags &=
2629				    ~IPSA_F_PAIRED;
2630				mutex_exit(&ipsapp.ipsap_psa_ptr->ipsa_lock);
2631			}
2632		} else if (sadb_msg_type == SADB_X_DELPAIR) {
2633			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
2634			error = ESRCH;
2635		}
2636		mutex_exit(&ipsapp.ipsap_bucket->isaf_lock);
2637		mutex_exit(&ipsapp.ipsap_pbucket->isaf_lock);
2638	}
2639
2640	ASSERT(mp->b_cont != NULL);
2641
2642	if (error == 0)
2643		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)
2644		    mp->b_cont->b_rptr, ksi, echo_target);
2645
2646	destroy_ipsa_pair(&ipsapp);
2647
2648	return (error);
2649}
2650
2651/*
2652 * This function takes a sadb_sa_t and finds the ipsa_t structure
2653 * and the isaf_t (hash bucket) that its stored under. If the security
2654 * association has a peer, the ipsa_t structure and bucket for that security
2655 * association are also searched for. The "pair" of ipsa_t's and isaf_t's
2656 * are returned as a ipsap_t.
2657 *
2658 * The hash buckets are returned for convenience, if the calling function
2659 * needs to use the hash bucket locks, say to remove the SA's, it should
2660 * take care to observe the convention of locking outbound bucket then
2661 * inbound bucket. The flag in_inbound_table provides direction.
2662 *
2663 * Note that a "pair" is defined as one (but not both) of the following:
2664 *
2665 * A security association which has a soft reference to another security
2666 * association via its SPI.
2667 *
2668 * A security association that is not obviously "inbound" or "outbound" so
2669 * it appears in both hash tables, the "peer" being the same security
2670 * association in the other hash table.
2671 *
2672 * This function will return NULL if the ipsa_t can't be found in the
2673 * inbound or outbound  hash tables (not found). If only one ipsa_t is
2674 * found, the pair ipsa_t will be NULL. Both isaf_t values are valid
2675 * provided at least one ipsa_t is found.
2676 */
2677static int
2678get_ipsa_pair(ipsa_query_t *sq, ipsap_t *ipsapp, int *diagnostic)
2679{
2680	uint32_t pair_srcaddr[IPSA_MAX_ADDRLEN];
2681	uint32_t pair_dstaddr[IPSA_MAX_ADDRLEN];
2682	uint32_t pair_spi;
2683
2684	init_ipsa_pair(ipsapp);
2685
2686	ipsapp->in_inbound_table = B_FALSE;
2687
2688	/* Lock down both buckets. */
2689	mutex_enter(&sq->outbound->isaf_lock);
2690	mutex_enter(&sq->inbound->isaf_lock);
2691
2692	if (sq->assoc->sadb_sa_flags & IPSA_F_INBOUND) {
2693		ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2694		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2695		if (ipsapp->ipsap_sa_ptr != NULL) {
2696			ipsapp->ipsap_bucket = sq->inbound;
2697			ipsapp->ipsap_pbucket = sq->outbound;
2698			ipsapp->in_inbound_table = B_TRUE;
2699		} else {
2700			ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->outbound,
2701			    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2702			    sq->af);
2703			ipsapp->ipsap_bucket = sq->outbound;
2704			ipsapp->ipsap_pbucket = sq->inbound;
2705		}
2706	} else {
2707		/* IPSA_F_OUTBOUND is set *or* no directions flags set. */
2708		ipsapp->ipsap_sa_ptr =
2709		    ipsec_getassocbyspi(sq->outbound,
2710		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2711		if (ipsapp->ipsap_sa_ptr != NULL) {
2712			ipsapp->ipsap_bucket = sq->outbound;
2713			ipsapp->ipsap_pbucket = sq->inbound;
2714		} else {
2715			ipsapp->ipsap_sa_ptr = ipsec_getassocbyspi(sq->inbound,
2716			    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr,
2717			    sq->af);
2718			ipsapp->ipsap_bucket = sq->inbound;
2719			ipsapp->ipsap_pbucket = sq->outbound;
2720			if (ipsapp->ipsap_sa_ptr != NULL)
2721				ipsapp->in_inbound_table = B_TRUE;
2722		}
2723	}
2724
2725	if (ipsapp->ipsap_sa_ptr == NULL) {
2726		mutex_exit(&sq->outbound->isaf_lock);
2727		mutex_exit(&sq->inbound->isaf_lock);
2728		*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
2729		return (ESRCH);
2730	}
2731
2732	if ((ipsapp->ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) &&
2733	    ipsapp->in_inbound_table) {
2734		mutex_exit(&sq->outbound->isaf_lock);
2735		mutex_exit(&sq->inbound->isaf_lock);
2736		return (0);
2737	}
2738
2739	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2740	if (ipsapp->ipsap_sa_ptr->ipsa_haspeer) {
2741		/*
2742		 * haspeer implies no sa_pairing, look for same spi
2743		 * in other hashtable.
2744		 */
2745		ipsapp->ipsap_psa_ptr =
2746		    ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2747		    sq->assoc->sadb_sa_spi, sq->srcaddr, sq->dstaddr, sq->af);
2748		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2749		mutex_exit(&sq->outbound->isaf_lock);
2750		mutex_exit(&sq->inbound->isaf_lock);
2751		return (0);
2752	}
2753	pair_spi = ipsapp->ipsap_sa_ptr->ipsa_otherspi;
2754	IPSA_COPY_ADDR(&pair_srcaddr,
2755	    ipsapp->ipsap_sa_ptr->ipsa_srcaddr, sq->af);
2756	IPSA_COPY_ADDR(&pair_dstaddr,
2757	    ipsapp->ipsap_sa_ptr->ipsa_dstaddr, sq->af);
2758	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
2759	mutex_exit(&sq->inbound->isaf_lock);
2760	mutex_exit(&sq->outbound->isaf_lock);
2761
2762	if (pair_spi == 0) {
2763		ASSERT(ipsapp->ipsap_bucket != NULL);
2764		ASSERT(ipsapp->ipsap_pbucket != NULL);
2765		return (0);
2766	}
2767
2768	/* found sa in outbound sadb, peer should be inbound */
2769
2770	if (ipsapp->in_inbound_table) {
2771		/* Found SA in inbound table, pair will be in outbound. */
2772		if (sq->af == AF_INET6) {
2773			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V6(sq->sp,
2774			    *(uint32_t *)pair_srcaddr);
2775		} else {
2776			ipsapp->ipsap_pbucket = OUTBOUND_BUCKET_V4(sq->sp,
2777			    *(uint32_t *)pair_srcaddr);
2778		}
2779	} else {
2780		ipsapp->ipsap_pbucket = INBOUND_BUCKET(sq->sp, pair_spi);
2781	}
2782	mutex_enter(&ipsapp->ipsap_pbucket->isaf_lock);
2783	ipsapp->ipsap_psa_ptr = ipsec_getassocbyspi(ipsapp->ipsap_pbucket,
2784	    pair_spi, pair_dstaddr, pair_srcaddr, sq->af);
2785	mutex_exit(&ipsapp->ipsap_pbucket->isaf_lock);
2786	ASSERT(ipsapp->ipsap_bucket != NULL);
2787	ASSERT(ipsapp->ipsap_pbucket != NULL);
2788	return (0);
2789}
2790
2791/*
2792 * Perform NAT-traversal cached checksum offset calculations here.
2793 */
2794static void
2795sadb_nat_calculations(ipsa_t *newbie, sadb_address_t *natt_loc_ext,
2796    sadb_address_t *natt_rem_ext, uint32_t *src_addr_ptr,
2797    uint32_t *dst_addr_ptr)
2798{
2799	struct sockaddr_in *natt_loc, *natt_rem;
2800	uint32_t *natt_loc_ptr = NULL, *natt_rem_ptr = NULL;
2801	uint32_t running_sum = 0;
2802
2803#define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
2804
2805	if (natt_rem_ext != NULL) {
2806		uint32_t l_src;
2807		uint32_t l_rem;
2808
2809		natt_rem = (struct sockaddr_in *)(natt_rem_ext + 1);
2810
2811		/* Ensured by sadb_addrfix(). */
2812		ASSERT(natt_rem->sin_family == AF_INET);
2813
2814		natt_rem_ptr = (uint32_t *)(&natt_rem->sin_addr);
2815		newbie->ipsa_remote_nat_port = natt_rem->sin_port;
2816		l_src = *src_addr_ptr;
2817		l_rem = *natt_rem_ptr;
2818
2819		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2820		newbie->ipsa_natt_addr_rem = *natt_rem_ptr;
2821
2822		l_src = ntohl(l_src);
2823		DOWN_SUM(l_src);
2824		DOWN_SUM(l_src);
2825		l_rem = ntohl(l_rem);
2826		DOWN_SUM(l_rem);
2827		DOWN_SUM(l_rem);
2828
2829		/*
2830		 * We're 1's complement for checksums, so check for wraparound
2831		 * here.
2832		 */
2833		if (l_rem > l_src)
2834			l_src--;
2835
2836		running_sum += l_src - l_rem;
2837
2838		DOWN_SUM(running_sum);
2839		DOWN_SUM(running_sum);
2840	}
2841
2842	if (natt_loc_ext != NULL) {
2843		natt_loc = (struct sockaddr_in *)(natt_loc_ext + 1);
2844
2845		/* Ensured by sadb_addrfix(). */
2846		ASSERT(natt_loc->sin_family == AF_INET);
2847
2848		natt_loc_ptr = (uint32_t *)(&natt_loc->sin_addr);
2849		newbie->ipsa_local_nat_port = natt_loc->sin_port;
2850
2851		/* Instead of IPSA_COPY_ADDR(), just copy first 32 bits. */
2852		newbie->ipsa_natt_addr_loc = *natt_loc_ptr;
2853
2854		/*
2855		 * NAT-T port agility means we may have natt_loc_ext, but
2856		 * only for a local-port change.
2857		 */
2858		if (natt_loc->sin_addr.s_addr != INADDR_ANY) {
2859			uint32_t l_dst = ntohl(*dst_addr_ptr);
2860			uint32_t l_loc = ntohl(*natt_loc_ptr);
2861
2862			DOWN_SUM(l_loc);
2863			DOWN_SUM(l_loc);
2864			DOWN_SUM(l_dst);
2865			DOWN_SUM(l_dst);
2866
2867			/*
2868			 * We're 1's complement for checksums, so check for
2869			 * wraparound here.
2870			 */
2871			if (l_loc > l_dst)
2872				l_dst--;
2873
2874			running_sum += l_dst - l_loc;
2875			DOWN_SUM(running_sum);
2876			DOWN_SUM(running_sum);
2877		}
2878	}
2879
2880	newbie->ipsa_inbound_cksum = running_sum;
2881#undef DOWN_SUM
2882}
2883
2884/*
2885 * This function is called from consumers that need to insert a fully-grown
2886 * security association into its tables.  This function takes into account that
2887 * SAs can be "inbound", "outbound", or "both".	 The "primary" and "secondary"
2888 * hash bucket parameters are set in order of what the SA will be most of the
2889 * time.  (For example, an SA with an unspecified source, and a multicast
2890 * destination will primarily be an outbound SA.  OTOH, if that destination
2891 * is unicast for this node, then the SA will primarily be inbound.)
2892 *
2893 * It takes a lot of parameters because even if clone is B_FALSE, this needs
2894 * to check both buckets for purposes of collision.
2895 *
2896 * Return 0 upon success.  Return various errnos (ENOMEM, EEXIST) for
2897 * various error conditions.  We may need to set samsg->sadb_x_msg_diagnostic
2898 * with additional diagnostic information because there is at least one EINVAL
2899 * case here.
2900 */
2901int
2902sadb_common_add(queue_t *pfkey_q, mblk_t *mp, sadb_msg_t *samsg,
2903    keysock_in_t *ksi, isaf_t *primary, isaf_t *secondary,
2904    ipsa_t *newbie, boolean_t clone, boolean_t is_inbound, int *diagnostic,
2905    netstack_t *ns, sadbp_t *spp)
2906{
2907	ipsa_t *newbie_clone = NULL, *scratch;
2908	ipsap_t ipsapp;
2909	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
2910	sadb_address_t *srcext =
2911	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
2912	sadb_address_t *dstext =
2913	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
2914	sadb_address_t *isrcext =
2915	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
2916	sadb_address_t *idstext =
2917	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
2918	sadb_x_kmc_t *kmcext =
2919	    (sadb_x_kmc_t *)ksi->ks_in_extv[SADB_X_EXT_KM_COOKIE];
2920	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
2921	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
2922	sadb_sens_t *sens =
2923	    (sadb_sens_t *)ksi->ks_in_extv[SADB_EXT_SENSITIVITY];
2924	sadb_sens_t *osens =
2925	    (sadb_sens_t *)ksi->ks_in_extv[SADB_X_EXT_OUTER_SENS];
2926	sadb_x_pair_t *pair_ext =
2927	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
2928	sadb_x_replay_ctr_t *replayext =
2929	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
2930	uint8_t protocol =
2931	    (samsg->sadb_msg_satype == SADB_SATYPE_AH) ? IPPROTO_AH:IPPROTO_ESP;
2932	int salt_offset;
2933	uint8_t *buf_ptr;
2934	struct sockaddr_in *src, *dst, *isrc, *idst;
2935	struct sockaddr_in6 *src6, *dst6, *isrc6, *idst6;
2936	sadb_lifetime_t *soft =
2937	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
2938	sadb_lifetime_t *hard =
2939	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
2940	sadb_lifetime_t	*idle =
2941	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
2942	sa_family_t af;
2943	int error = 0;
2944	boolean_t isupdate = (newbie != NULL);
2945	uint32_t *src_addr_ptr, *dst_addr_ptr, *isrc_addr_ptr, *idst_addr_ptr;
2946	ipsec_stack_t	*ipss = ns->netstack_ipsec;
2947	ip_stack_t	*ipst = ns->netstack_ip;
2948	ipsec_alginfo_t *alg;
2949	int		rcode;
2950	boolean_t	async = B_FALSE;
2951
2952	init_ipsa_pair(&ipsapp);
2953
2954	if (srcext == NULL) {
2955		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
2956		return (EINVAL);
2957	}
2958	if (dstext == NULL) {
2959		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
2960		return (EINVAL);
2961	}
2962	if (assoc == NULL) {
2963		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
2964		return (EINVAL);
2965	}
2966
2967	src = (struct sockaddr_in *)(srcext + 1);
2968	src6 = (struct sockaddr_in6 *)(srcext + 1);
2969	dst = (struct sockaddr_in *)(dstext + 1);
2970	dst6 = (struct sockaddr_in6 *)(dstext + 1);
2971	if (isrcext != NULL) {
2972		isrc = (struct sockaddr_in *)(isrcext + 1);
2973		isrc6 = (struct sockaddr_in6 *)(isrcext + 1);
2974		ASSERT(idstext != NULL);
2975		idst = (struct sockaddr_in *)(idstext + 1);
2976		idst6 = (struct sockaddr_in6 *)(idstext + 1);
2977	} else {
2978		isrc = NULL;
2979		isrc6 = NULL;
2980	}
2981
2982	af = src->sin_family;
2983
2984	if (af == AF_INET) {
2985		src_addr_ptr = (uint32_t *)&src->sin_addr;
2986		dst_addr_ptr = (uint32_t *)&dst->sin_addr;
2987	} else {
2988		ASSERT(af == AF_INET6);
2989		src_addr_ptr = (uint32_t *)&src6->sin6_addr;
2990		dst_addr_ptr = (uint32_t *)&dst6->sin6_addr;
2991	}
2992
2993	if (!isupdate && (clone == B_TRUE || is_inbound == B_TRUE) &&
2994	    cl_inet_checkspi &&
2995	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
2996		rcode = cl_inet_checkspi(ns->netstack_stackid, protocol,
2997		    assoc->sadb_sa_spi, NULL);
2998		if (rcode == -1) {
2999			return (EEXIST);
3000		}
3001	}
3002
3003	/*
3004	 * Check to see if the new SA will be cloned AND paired. The
3005	 * reason a SA will be cloned is the source or destination addresses
3006	 * are not specific enough to determine if the SA goes in the outbound
3007	 * or the inbound hash table, so its cloned and put in both. If
3008	 * the SA is paired, it's soft linked to another SA for the other
3009	 * direction. Keeping track and looking up SA's that are direction
3010	 * unspecific and linked is too hard.
3011	 */
3012	if (clone && (pair_ext != NULL)) {
3013		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
3014		return (EINVAL);
3015	}
3016
3017	if (!isupdate) {
3018		newbie = sadb_makelarvalassoc(assoc->sadb_sa_spi,
3019		    src_addr_ptr, dst_addr_ptr, af, ns);
3020		if (newbie == NULL)
3021			return (ENOMEM);
3022	}
3023
3024	mutex_enter(&newbie->ipsa_lock);
3025
3026	if (isrc != NULL) {
3027		if (isrc->sin_family == AF_INET) {
3028			if (srcext->sadb_address_proto != IPPROTO_ENCAP) {
3029				if (srcext->sadb_address_proto != 0) {
3030					/*
3031					 * Mismatched outer-packet protocol
3032					 * and inner-packet address family.
3033					 */
3034					mutex_exit(&newbie->ipsa_lock);
3035					error = EPROTOTYPE;
3036					*diagnostic =
3037					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3038					goto error;
3039				} else {
3040					/* Fill in with explicit protocol. */
3041					srcext->sadb_address_proto =
3042					    IPPROTO_ENCAP;
3043					dstext->sadb_address_proto =
3044					    IPPROTO_ENCAP;
3045				}
3046			}
3047			isrc_addr_ptr = (uint32_t *)&isrc->sin_addr;
3048			idst_addr_ptr = (uint32_t *)&idst->sin_addr;
3049		} else {
3050			ASSERT(isrc->sin_family == AF_INET6);
3051			if (srcext->sadb_address_proto != IPPROTO_IPV6) {
3052				if (srcext->sadb_address_proto != 0) {
3053					/*
3054					 * Mismatched outer-packet protocol
3055					 * and inner-packet address family.
3056					 */
3057					mutex_exit(&newbie->ipsa_lock);
3058					error = EPROTOTYPE;
3059					*diagnostic =
3060					    SADB_X_DIAGNOSTIC_INNER_AF_MISMATCH;
3061					goto error;
3062				} else {
3063					/* Fill in with explicit protocol. */
3064					srcext->sadb_address_proto =
3065					    IPPROTO_IPV6;
3066					dstext->sadb_address_proto =
3067					    IPPROTO_IPV6;
3068				}
3069			}
3070			isrc_addr_ptr = (uint32_t *)&isrc6->sin6_addr;
3071			idst_addr_ptr = (uint32_t *)&idst6->sin6_addr;
3072		}
3073		newbie->ipsa_innerfam = isrc->sin_family;
3074
3075		IPSA_COPY_ADDR(newbie->ipsa_innersrc, isrc_addr_ptr,
3076		    newbie->ipsa_innerfam);
3077		IPSA_COPY_ADDR(newbie->ipsa_innerdst, idst_addr_ptr,
3078		    newbie->ipsa_innerfam);
3079		newbie->ipsa_innersrcpfx = isrcext->sadb_address_prefixlen;
3080		newbie->ipsa_innerdstpfx = idstext->sadb_address_prefixlen;
3081
3082		/* Unique value uses inner-ports for Tunnel Mode... */
3083		newbie->ipsa_unique_id = SA_UNIQUE_ID(isrc->sin_port,
3084		    idst->sin_port, dstext->sadb_address_proto,
3085		    idstext->sadb_address_proto);
3086		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(isrc->sin_port,
3087		    idst->sin_port, dstext->sadb_address_proto,
3088		    idstext->sadb_address_proto);
3089	} else {
3090		/* ... and outer-ports for Transport Mode. */
3091		newbie->ipsa_unique_id = SA_UNIQUE_ID(src->sin_port,
3092		    dst->sin_port, dstext->sadb_address_proto, 0);
3093		newbie->ipsa_unique_mask = SA_UNIQUE_MASK(src->sin_port,
3094		    dst->sin_port, dstext->sadb_address_proto, 0);
3095	}
3096	if (newbie->ipsa_unique_mask != (uint64_t)0)
3097		newbie->ipsa_flags |= IPSA_F_UNIQUE;
3098
3099	sadb_nat_calculations(newbie,
3100	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC],
3101	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM],
3102	    src_addr_ptr, dst_addr_ptr);
3103
3104	newbie->ipsa_type = samsg->sadb_msg_satype;
3105
3106	ASSERT((assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
3107	    (assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE));
3108	newbie->ipsa_auth_alg = assoc->sadb_sa_auth;
3109	newbie->ipsa_encr_alg = assoc->sadb_sa_encrypt;
3110
3111	newbie->ipsa_flags |= assoc->sadb_sa_flags;
3112	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_LOC &&
3113	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC] == NULL) {
3114		mutex_exit(&newbie->ipsa_lock);
3115		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
3116		error = EINVAL;
3117		goto error;
3118	}
3119	if (newbie->ipsa_flags & SADB_X_SAFLAGS_NATT_REM &&
3120	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM] == NULL) {
3121		mutex_exit(&newbie->ipsa_lock);
3122		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
3123		error = EINVAL;
3124		goto error;
3125	}
3126	if (newbie->ipsa_flags & SADB_X_SAFLAGS_TUNNEL &&
3127	    ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL) {
3128		mutex_exit(&newbie->ipsa_lock);
3129		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
3130		error = EINVAL;
3131		goto error;
3132	}
3133	/*
3134	 * If unspecified source address, force replay_wsize to 0.
3135	 * This is because an SA that has multiple sources of secure
3136	 * traffic cannot enforce a replay counter w/o synchronizing the
3137	 * senders.
3138	 */
3139	if (ksi->ks_in_srctype != KS_IN_ADDR_UNSPEC)
3140		newbie->ipsa_replay_wsize = assoc->sadb_sa_replay;
3141	else
3142		newbie->ipsa_replay_wsize = 0;
3143
3144	newbie->ipsa_addtime = gethrestime_sec();
3145
3146	if (kmcext != NULL) {
3147		newbie->ipsa_kmp = kmcext->sadb_x_kmc_proto;
3148		/*
3149		 * Be liberal in what we receive.  Special-case the IKEv1
3150		 * cookie, which closed-source in.iked assumes is 32 bits.
3151		 * Now that we store all 64 bits, we should pre-zero the
3152		 * reserved field on behalf of closed-source in.iked.
3153		 */
3154		if (newbie->ipsa_kmp == SADB_X_KMP_IKE) {
3155			/* Just in case in.iked is misbehaving... */
3156			kmcext->sadb_x_kmc_reserved = 0;
3157		}
3158		newbie->ipsa_kmc = kmcext->sadb_x_kmc_cookie64;
3159	}
3160
3161	/*
3162	 * XXX CURRENT lifetime checks MAY BE needed for an UPDATE.
3163	 * The spec says that one can update current lifetimes, but
3164	 * that seems impractical, especially in the larval-to-mature
3165	 * update that this function performs.
3166	 */
3167	if (soft != NULL) {
3168		newbie->ipsa_softaddlt = soft->sadb_lifetime_addtime;
3169		newbie->ipsa_softuselt = soft->sadb_lifetime_usetime;
3170		newbie->ipsa_softbyteslt = soft->sadb_lifetime_bytes;
3171		newbie->ipsa_softalloc = soft->sadb_lifetime_allocations;
3172		SET_EXPIRE(newbie, softaddlt, softexpiretime);
3173	}
3174	if (hard != NULL) {
3175		newbie->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
3176		newbie->ipsa_harduselt = hard->sadb_lifetime_usetime;
3177		newbie->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
3178		newbie->ipsa_hardalloc = hard->sadb_lifetime_allocations;
3179		SET_EXPIRE(newbie, hardaddlt, hardexpiretime);
3180	}
3181	if (idle != NULL) {
3182		newbie->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
3183		newbie->ipsa_idleuselt = idle->sadb_lifetime_usetime;
3184		newbie->ipsa_idleexpiretime = newbie->ipsa_addtime +
3185		    newbie->ipsa_idleaddlt;
3186		newbie->ipsa_idletime = newbie->ipsa_idleaddlt;
3187	}
3188
3189	newbie->ipsa_authtmpl = NULL;
3190	newbie->ipsa_encrtmpl = NULL;
3191
3192#ifdef IPSEC_LATENCY_TEST
3193	if (akey != NULL && newbie->ipsa_auth_alg != SADB_AALG_NONE) {
3194#else
3195	if (akey != NULL) {
3196#endif
3197		async = (ipss->ipsec_algs_exec_mode[IPSEC_ALG_AUTH] ==
3198		    IPSEC_ALGS_EXEC_ASYNC);
3199
3200		newbie->ipsa_authkeybits = akey->sadb_key_bits;
3201		newbie->ipsa_authkeylen = SADB_1TO8(akey->sadb_key_bits);
3202		/* In case we have to round up to the next byte... */
3203		if ((akey->sadb_key_bits & 0x7) != 0)
3204			newbie->ipsa_authkeylen++;
3205		newbie->ipsa_authkey = kmem_alloc(newbie->ipsa_authkeylen,
3206		    KM_NOSLEEP);
3207		if (newbie->ipsa_authkey == NULL) {
3208			error = ENOMEM;
3209			mutex_exit(&newbie->ipsa_lock);
3210			goto error;
3211		}
3212		bcopy(akey + 1, newbie->ipsa_authkey, newbie->ipsa_authkeylen);
3213		bzero(akey + 1, newbie->ipsa_authkeylen);
3214
3215		/*
3216		 * Pre-initialize the kernel crypto framework key
3217		 * structure.
3218		 */
3219		newbie->ipsa_kcfauthkey.ck_format = CRYPTO_KEY_RAW;
3220		newbie->ipsa_kcfauthkey.ck_length = newbie->ipsa_authkeybits;
3221		newbie->ipsa_kcfauthkey.ck_data = newbie->ipsa_authkey;
3222
3223		rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3224		alg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
3225		    [newbie->ipsa_auth_alg];
3226		if (alg != NULL && ALG_VALID(alg)) {
3227			newbie->ipsa_amech.cm_type = alg->alg_mech_type;
3228			newbie->ipsa_amech.cm_param =
3229			    (char *)&newbie->ipsa_mac_len;
3230			newbie->ipsa_amech.cm_param_len = sizeof (size_t);
3231			newbie->ipsa_mac_len = (size_t)alg->alg_datalen;
3232		} else {
3233			newbie->ipsa_amech.cm_type = CRYPTO_MECHANISM_INVALID;
3234		}
3235		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_AUTH);
3236		rw_exit(&ipss->ipsec_alg_lock);
3237		if (error != 0) {
3238			mutex_exit(&newbie->ipsa_lock);
3239			/*
3240			 * An error here indicates that alg is the wrong type
3241			 * (IE: not authentication) or its not in the alg tables
3242			 * created by ipsecalgs(1m), or Kcf does not like the
3243			 * parameters passed in with this algorithm, which is
3244			 * probably a coding error!
3245			 */
3246			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3247
3248			goto error;
3249		}
3250	}
3251
3252	if (ekey != NULL) {
3253		rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3254		async = async || (ipss->ipsec_algs_exec_mode[IPSEC_ALG_ENCR] ==
3255		    IPSEC_ALGS_EXEC_ASYNC);
3256		alg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
3257		    [newbie->ipsa_encr_alg];
3258
3259		if (alg != NULL && ALG_VALID(alg)) {
3260			newbie->ipsa_emech.cm_type = alg->alg_mech_type;
3261			newbie->ipsa_datalen = alg->alg_datalen;
3262			if (alg->alg_flags & ALG_FLAG_COUNTERMODE)
3263				newbie->ipsa_flags |= IPSA_F_COUNTERMODE;
3264
3265			if (alg->alg_flags & ALG_FLAG_COMBINED) {
3266				newbie->ipsa_flags |= IPSA_F_COMBINED;
3267				newbie->ipsa_mac_len =  alg->alg_icvlen;
3268			}
3269
3270			if (alg->alg_flags & ALG_FLAG_CCM)
3271				newbie->ipsa_noncefunc = ccm_params_init;
3272			else if (alg->alg_flags & ALG_FLAG_GCM)
3273				newbie->ipsa_noncefunc = gcm_params_init;
3274			else newbie->ipsa_noncefunc = cbc_params_init;
3275
3276			newbie->ipsa_saltlen = alg->alg_saltlen;
3277			newbie->ipsa_saltbits = SADB_8TO1(newbie->ipsa_saltlen);
3278			newbie->ipsa_iv_len = alg->alg_ivlen;
3279			newbie->ipsa_nonce_len = newbie->ipsa_saltlen +
3280			    newbie->ipsa_iv_len;
3281			newbie->ipsa_emech.cm_param = NULL;
3282			newbie->ipsa_emech.cm_param_len = 0;
3283		} else {
3284			newbie->ipsa_emech.cm_type = CRYPTO_MECHANISM_INVALID;
3285		}
3286		rw_exit(&ipss->ipsec_alg_lock);
3287
3288		/*
3289		 * The byte stream following the sadb_key_t is made up of:
3290		 * key bytes, [salt bytes], [IV initial value]
3291		 * All of these have variable length. The IV is typically
3292		 * randomly generated by this function and not passed in.
3293		 * By supporting the injection of a known IV, the whole
3294		 * IPsec subsystem and the underlying crypto subsystem
3295		 * can be tested with known test vectors.
3296		 *
3297		 * The keying material has been checked by ext_check()
3298		 * and ipsec_valid_key_size(), after removing salt/IV
3299		 * bits, whats left is the encryption key. If this is too
3300		 * short, ipsec_create_ctx_tmpl() will fail and the SA
3301		 * won't get created.
3302		 *
3303		 * set ipsa_encrkeylen to length of key only.
3304		 */
3305		newbie->ipsa_encrkeybits = ekey->sadb_key_bits;
3306		newbie->ipsa_encrkeybits -= ekey->sadb_key_reserved;
3307		newbie->ipsa_encrkeybits -= newbie->ipsa_saltbits;
3308		newbie->ipsa_encrkeylen = SADB_1TO8(newbie->ipsa_encrkeybits);
3309
3310		/* In case we have to round up to the next byte... */
3311		if ((ekey->sadb_key_bits & 0x7) != 0)
3312			newbie->ipsa_encrkeylen++;
3313
3314		newbie->ipsa_encrkey = kmem_alloc(newbie->ipsa_encrkeylen,
3315		    KM_NOSLEEP);
3316		if (newbie->ipsa_encrkey == NULL) {
3317			error = ENOMEM;
3318			mutex_exit(&newbie->ipsa_lock);
3319			goto error;
3320		}
3321
3322		buf_ptr = (uint8_t *)(ekey + 1);
3323		bcopy(buf_ptr, newbie->ipsa_encrkey, newbie->ipsa_encrkeylen);
3324
3325		if (newbie->ipsa_flags & IPSA_F_COMBINED) {
3326			/*
3327			 * Combined mode algs need a nonce. Copy the salt and
3328			 * IV into a buffer. The ipsa_nonce is a pointer into
3329			 * this buffer, some bytes at the start of the buffer
3330			 * may be unused, depends on the salt length. The IV
3331			 * is 64 bit aligned so it can be incremented as a
3332			 * uint64_t. Zero out key in samsg_t before freeing.
3333			 */
3334
3335			newbie->ipsa_nonce_buf = kmem_alloc(
3336			    sizeof (ipsec_nonce_t), KM_NOSLEEP);
3337			if (newbie->ipsa_nonce_buf == NULL) {
3338				error = ENOMEM;
3339				mutex_exit(&newbie->ipsa_lock);
3340				goto error;
3341			}
3342			/*
3343			 * Initialize nonce and salt pointers to point
3344			 * to the nonce buffer. This is just in case we get
3345			 * bad data, the pointers will be valid, the data
3346			 * won't be.
3347			 *
3348			 * See sadb.h for layout of nonce.
3349			 */
3350			newbie->ipsa_iv = &newbie->ipsa_nonce_buf->iv;
3351			newbie->ipsa_salt = (uint8_t *)newbie->ipsa_nonce_buf;
3352			newbie->ipsa_nonce = newbie->ipsa_salt;
3353			if (newbie->ipsa_saltlen != 0) {
3354				salt_offset = MAXSALTSIZE -
3355				    newbie->ipsa_saltlen;
3356				newbie->ipsa_salt = (uint8_t *)
3357				    &newbie->ipsa_nonce_buf->salt[salt_offset];
3358				newbie->ipsa_nonce = newbie->ipsa_salt;
3359				buf_ptr += newbie->ipsa_encrkeylen;
3360				bcopy(buf_ptr, newbie->ipsa_salt,
3361				    newbie->ipsa_saltlen);
3362			}
3363			/*
3364			 * The IV for CCM/GCM mode increments, it should not
3365			 * repeat. Get a random value for the IV, make a
3366			 * copy, the SA will expire when/if the IV ever
3367			 * wraps back to the initial value. If an Initial IV
3368			 * is passed in via PF_KEY, save this in the SA.
3369			 * Initialising IV for inbound is pointless as its
3370			 * taken from the inbound packet.
3371			 */
3372			if (!is_inbound) {
3373				if (ekey->sadb_key_reserved != 0) {
3374					buf_ptr += newbie->ipsa_saltlen;
3375					bcopy(buf_ptr, (uint8_t *)newbie->
3376					    ipsa_iv, SADB_1TO8(ekey->
3377					    sadb_key_reserved));
3378				} else {
3379					(void) random_get_pseudo_bytes(
3380					    (uint8_t *)newbie->ipsa_iv,
3381					    newbie->ipsa_iv_len);
3382				}
3383				newbie->ipsa_iv_softexpire =
3384				    (*newbie->ipsa_iv) << 9;
3385				newbie->ipsa_iv_hardexpire = *newbie->ipsa_iv;
3386			}
3387		}
3388		bzero((ekey + 1), SADB_1TO8(ekey->sadb_key_bits));
3389
3390		/*
3391		 * Pre-initialize the kernel crypto framework key
3392		 * structure.
3393		 */
3394		newbie->ipsa_kcfencrkey.ck_format = CRYPTO_KEY_RAW;
3395		newbie->ipsa_kcfencrkey.ck_length = newbie->ipsa_encrkeybits;
3396		newbie->ipsa_kcfencrkey.ck_data = newbie->ipsa_encrkey;
3397
3398		rw_enter(&ipss->ipsec_alg_lock, RW_READER);
3399		error = ipsec_create_ctx_tmpl(newbie, IPSEC_ALG_ENCR);
3400		rw_exit(&ipss->ipsec_alg_lock);
3401		if (error != 0) {
3402			mutex_exit(&newbie->ipsa_lock);
3403			/* See above for error explanation. */
3404			*diagnostic = SADB_X_DIAGNOSTIC_BAD_CTX;
3405			goto error;
3406		}
3407	}
3408
3409	if (async)
3410		newbie->ipsa_flags |= IPSA_F_ASYNC;
3411
3412	/*
3413	 * Ptrs to processing functions.
3414	 */
3415	if (newbie->ipsa_type == SADB_SATYPE_ESP)
3416		ipsecesp_init_funcs(newbie);
3417	else
3418		ipsecah_init_funcs(newbie);
3419	ASSERT(newbie->ipsa_output_func != NULL &&
3420	    newbie->ipsa_input_func != NULL);
3421
3422	/*
3423	 * Certificate ID stuff.
3424	 */
3425	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC] != NULL) {
3426		sadb_ident_t *id =
3427		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_SRC];
3428
3429		/*
3430		 * Can assume strlen() will return okay because ext_check() in
3431		 * keysock.c prepares the string for us.
3432		 */
3433		newbie->ipsa_src_cid = ipsid_lookup(id->sadb_ident_type,
3434		    (char *)(id+1), ns);
3435		if (newbie->ipsa_src_cid == NULL) {
3436			error = ENOMEM;
3437			mutex_exit(&newbie->ipsa_lock);
3438			goto error;
3439		}
3440	}
3441
3442	if (ksi->ks_in_extv[SADB_EXT_IDENTITY_DST] != NULL) {
3443		sadb_ident_t *id =
3444		    (sadb_ident_t *)ksi->ks_in_extv[SADB_EXT_IDENTITY_DST];
3445
3446		/*
3447		 * Can assume strlen() will return okay because ext_check() in
3448		 * keysock.c prepares the string for us.
3449		 */
3450		newbie->ipsa_dst_cid = ipsid_lookup(id->sadb_ident_type,
3451		    (char *)(id+1), ns);
3452		if (newbie->ipsa_dst_cid == NULL) {
3453			error = ENOMEM;
3454			mutex_exit(&newbie->ipsa_lock);
3455			goto error;
3456		}
3457	}
3458
3459	/*
3460	 * sensitivity label handling code:
3461	 * Convert sens + bitmap into cred_t, and associate it
3462	 * with the new SA.
3463	 */
3464	if (sens != NULL) {
3465		uint64_t *bitmap = (uint64_t *)(sens + 1);
3466
3467		newbie->ipsa_tsl = sadb_label_from_sens(sens, bitmap);
3468	}
3469
3470	/*
3471	 * Likewise for outer sensitivity.
3472	 */
3473	if (osens != NULL) {
3474		uint64_t *bitmap = (uint64_t *)(osens + 1);
3475		ts_label_t *tsl, *effective_tsl;
3476		uint32_t *peer_addr_ptr;
3477		zoneid_t zoneid = GLOBAL_ZONEID;
3478		zone_t *zone;
3479
3480		peer_addr_ptr = is_inbound ? src_addr_ptr : dst_addr_ptr;
3481
3482		tsl = sadb_label_from_sens(osens, bitmap);
3483		newbie->ipsa_mac_exempt = CONN_MAC_DEFAULT;
3484
3485		if (osens->sadb_x_sens_flags & SADB_X_SENS_IMPLICIT) {
3486			newbie->ipsa_mac_exempt = CONN_MAC_IMPLICIT;
3487		}
3488
3489		error = tsol_check_dest(tsl, peer_addr_ptr,
3490		    (af == AF_INET6)?IPV6_VERSION:IPV4_VERSION,
3491		    newbie->ipsa_mac_exempt, B_TRUE, &effective_tsl);
3492		if (error != 0) {
3493			label_rele(tsl);
3494			mutex_exit(&newbie->ipsa_lock);
3495			goto error;
3496		}
3497
3498		if (effective_tsl != NULL) {
3499			label_rele(tsl);
3500			tsl = effective_tsl;
3501		}
3502
3503		newbie->ipsa_otsl = tsl;
3504
3505		zone = zone_find_by_label(tsl);
3506		if (zone != NULL) {
3507			zoneid = zone->zone_id;
3508			zone_rele(zone);
3509		}
3510		/*
3511		 * For exclusive stacks we set the zoneid to zero to operate
3512		 * as if in the global zone for tsol_compute_label_v4/v6
3513		 */
3514		if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID)
3515			zoneid = GLOBAL_ZONEID;
3516
3517		if (af == AF_INET6) {
3518			error = tsol_compute_label_v6(tsl, zoneid,
3519			    (in6_addr_t *)peer_addr_ptr,
3520			    newbie->ipsa_opt_storage, ipst);
3521		} else {
3522			error = tsol_compute_label_v4(tsl, zoneid,
3523			    *peer_addr_ptr, newbie->ipsa_opt_storage, ipst);
3524		}
3525		if (error != 0) {
3526			mutex_exit(&newbie->ipsa_lock);
3527			goto error;
3528		}
3529	}
3530
3531
3532	if (replayext != NULL) {
3533		if ((replayext->sadb_x_rc_replay32 == 0) &&
3534		    (replayext->sadb_x_rc_replay64 != 0)) {
3535			error = EOPNOTSUPP;
3536			*diagnostic = SADB_X_DIAGNOSTIC_INVALID_REPLAY;
3537			mutex_exit(&newbie->ipsa_lock);
3538			goto error;
3539		}
3540		newbie->ipsa_replay = replayext->sadb_x_rc_replay32;
3541	}
3542
3543	/* now that the SA has been updated, set its new state */
3544	newbie->ipsa_state = assoc->sadb_sa_state;
3545
3546	if (clone) {
3547		newbie->ipsa_haspeer = B_TRUE;
3548	} else {
3549		if (!is_inbound) {
3550			lifetime_fuzz(newbie);
3551		}
3552	}
3553	/*
3554	 * The less locks I hold when doing an insertion and possible cloning,
3555	 * the better!
3556	 */
3557	mutex_exit(&newbie->ipsa_lock);
3558
3559	if (clone) {
3560		newbie_clone = sadb_cloneassoc(newbie);
3561
3562		if (newbie_clone == NULL) {
3563			error = ENOMEM;
3564			goto error;
3565		}
3566	}
3567
3568	/*
3569	 * Enter the bucket locks.  The order of entry is outbound,
3570	 * inbound.  We map "primary" and "secondary" into outbound and inbound
3571	 * based on the destination address type.  If the destination address
3572	 * type is for a node that isn't mine (or potentially mine), the
3573	 * "primary" bucket is the outbound one.
3574	 */
3575	if (!is_inbound) {
3576		/* primary == outbound */
3577		mutex_enter(&primary->isaf_lock);
3578		mutex_enter(&secondary->isaf_lock);
3579	} else {
3580		/* primary == inbound */
3581		mutex_enter(&secondary->isaf_lock);
3582		mutex_enter(&primary->isaf_lock);
3583	}
3584
3585	/*
3586	 * sadb_insertassoc() doesn't increment the reference
3587	 * count.  We therefore have to increment the
3588	 * reference count one more time to reflect the
3589	 * pointers of the table that reference this SA.
3590	 */
3591	IPSA_REFHOLD(newbie);
3592
3593	if (isupdate) {
3594		/*
3595		 * Unlink from larval holding cell in the "inbound" fanout.
3596		 */
3597		ASSERT(newbie->ipsa_linklock == &primary->isaf_lock ||
3598		    newbie->ipsa_linklock == &secondary->isaf_lock);
3599		sadb_unlinkassoc(newbie);
3600	}
3601
3602	mutex_enter(&newbie->ipsa_lock);
3603	error = sadb_insertassoc(newbie, primary);
3604	mutex_exit(&newbie->ipsa_lock);
3605
3606	if (error != 0) {
3607		/*
3608		 * Since sadb_insertassoc() failed, we must decrement the
3609		 * refcount again so the cleanup code will actually free
3610		 * the offending SA.
3611		 */
3612		IPSA_REFRELE(newbie);
3613		goto error_unlock;
3614	}
3615
3616	if (newbie_clone != NULL) {
3617		mutex_enter(&newbie_clone->ipsa_lock);
3618		error = sadb_insertassoc(newbie_clone, secondary);
3619		mutex_exit(&newbie_clone->ipsa_lock);
3620		if (error != 0) {
3621			/* Collision in secondary table. */
3622			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3623			goto error_unlock;
3624		}
3625		IPSA_REFHOLD(newbie_clone);
3626	} else {
3627		ASSERT(primary != secondary);
3628		scratch = ipsec_getassocbyspi(secondary, newbie->ipsa_spi,
3629		    ALL_ZEROES_PTR, newbie->ipsa_dstaddr, af);
3630		if (scratch != NULL) {
3631			/* Collision in secondary table. */
3632			sadb_unlinkassoc(newbie);  /* This does REFRELE. */
3633			/* Set the error, since ipsec_getassocbyspi() can't. */
3634			error = EEXIST;
3635			goto error_unlock;
3636		}
3637	}
3638
3639	/* OKAY!  So let's do some reality check assertions. */
3640
3641	ASSERT(MUTEX_NOT_HELD(&newbie->ipsa_lock));
3642	ASSERT(newbie_clone == NULL ||
3643	    (MUTEX_NOT_HELD(&newbie_clone->ipsa_lock)));
3644
3645error_unlock:
3646
3647	/*
3648	 * We can exit the locks in any order.	Only entrance needs to
3649	 * follow any protocol.
3650	 */
3651	mutex_exit(&secondary->isaf_lock);
3652	mutex_exit(&primary->isaf_lock);
3653
3654	if (pair_ext != NULL && error == 0) {
3655		/* update pair_spi if it exists. */
3656		ipsa_query_t sq;
3657
3658		sq.spp = spp;		/* XXX param */
3659		error = sadb_form_query(ksi, IPSA_Q_DST, IPSA_Q_SRC|IPSA_Q_DST|
3660		    IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND, &sq, diagnostic);
3661		if (error)
3662			return (error);
3663
3664		error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
3665
3666		if (error != 0)
3667			goto error;
3668
3669		if (ipsapp.ipsap_psa_ptr != NULL) {
3670			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
3671			error = EINVAL;
3672		} else {
3673			/* update_pairing() sets diagnostic */
3674			error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
3675		}
3676	}
3677	/* Common error point for this routine. */
3678error:
3679	if (newbie != NULL) {
3680		if (error != 0) {
3681			/* This SA is broken, let the reaper clean up. */
3682			mutex_enter(&newbie->ipsa_lock);
3683			newbie->ipsa_state = IPSA_STATE_DEAD;
3684			newbie->ipsa_hardexpiretime = 1;
3685			mutex_exit(&newbie->ipsa_lock);
3686		}
3687		IPSA_REFRELE(newbie);
3688	}
3689	if (newbie_clone != NULL) {
3690		IPSA_REFRELE(newbie_clone);
3691	}
3692
3693	if (error == 0) {
3694		/*
3695		 * Construct favorable PF_KEY return message and send to
3696		 * keysock. Update the flags in the original keysock message
3697		 * to reflect the actual flags in the new SA.
3698		 *  (Q:  Do I need to pass "newbie"?  If I do,
3699		 * make sure to REFHOLD, call, then REFRELE.)
3700		 */
3701		assoc->sadb_sa_flags = newbie->ipsa_flags;
3702		sadb_pfkey_echo(pfkey_q, mp, samsg, ksi, NULL);
3703	}
3704
3705	destroy_ipsa_pair(&ipsapp);
3706	return (error);
3707}
3708
3709/*
3710 * Set the time of first use for a security association.  Update any
3711 * expiration times as a result.
3712 */
3713void
3714sadb_set_usetime(ipsa_t *assoc)
3715{
3716	time_t snapshot = gethrestime_sec();
3717
3718	mutex_enter(&assoc->ipsa_lock);
3719	assoc->ipsa_lastuse = snapshot;
3720	assoc->ipsa_idleexpiretime = snapshot + assoc->ipsa_idletime;
3721
3722	/*
3723	 * Caller does check usetime before calling me usually, and
3724	 * double-checking is better than a mutex_enter/exit hit.
3725	 */
3726	if (assoc->ipsa_usetime == 0) {
3727		/*
3728		 * This is redundant for outbound SA's, as
3729		 * ipsec_getassocbyconn() sets the IPSA_F_USED flag already.
3730		 * Inbound SAs, however, have no such protection.
3731		 */
3732		assoc->ipsa_flags |= IPSA_F_USED;
3733		assoc->ipsa_usetime = snapshot;
3734
3735		/*
3736		 * After setting the use time, see if we have a use lifetime
3737		 * that would cause the actual SA expiration time to shorten.
3738		 */
3739		UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
3740		UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
3741	}
3742	mutex_exit(&assoc->ipsa_lock);
3743}
3744
3745/*
3746 * Send up a PF_KEY expire message for this association.
3747 */
3748static void
3749sadb_expire_assoc(queue_t *pfkey_q, ipsa_t *assoc)
3750{
3751	mblk_t *mp, *mp1;
3752	int alloclen, af;
3753	sadb_msg_t *samsg;
3754	sadb_lifetime_t *current, *expire;
3755	sadb_sa_t *saext;
3756	uint8_t *end;
3757	boolean_t tunnel_mode;
3758
3759	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3760
3761	/* Don't bother sending if there's no queue. */
3762	if (pfkey_q == NULL)
3763		return;
3764
3765	mp = sadb_keysock_out(0);
3766	if (mp == NULL) {
3767		/* cmn_err(CE_WARN, */
3768		/*	"sadb_expire_assoc: Can't allocate KEYSOCK_OUT.\n"); */
3769		return;
3770	}
3771
3772	alloclen = sizeof (*samsg) + sizeof (*current) + sizeof (*expire) +
3773	    2 * sizeof (sadb_address_t) + sizeof (*saext);
3774
3775	af = assoc->ipsa_addrfam;
3776	switch (af) {
3777	case AF_INET:
3778		alloclen += 2 * sizeof (struct sockaddr_in);
3779		break;
3780	case AF_INET6:
3781		alloclen += 2 * sizeof (struct sockaddr_in6);
3782		break;
3783	default:
3784		/* Won't happen unless there's a kernel bug. */
3785		freeb(mp);
3786		cmn_err(CE_WARN,
3787		    "sadb_expire_assoc: Unknown address length.\n");
3788		return;
3789	}
3790
3791	tunnel_mode = (assoc->ipsa_flags & IPSA_F_TUNNEL);
3792	if (tunnel_mode) {
3793		alloclen += 2 * sizeof (sadb_address_t);
3794		switch (assoc->ipsa_innerfam) {
3795		case AF_INET:
3796			alloclen += 2 * sizeof (struct sockaddr_in);
3797			break;
3798		case AF_INET6:
3799			alloclen += 2 * sizeof (struct sockaddr_in6);
3800			break;
3801		default:
3802			/* Won't happen unless there's a kernel bug. */
3803			freeb(mp);
3804			cmn_err(CE_WARN, "sadb_expire_assoc: "
3805			    "Unknown inner address length.\n");
3806			return;
3807		}
3808	}
3809
3810	mp->b_cont = allocb(alloclen, BPRI_HI);
3811	if (mp->b_cont == NULL) {
3812		freeb(mp);
3813		/* cmn_err(CE_WARN, */
3814		/*	"sadb_expire_assoc: Can't allocate message.\n"); */
3815		return;
3816	}
3817
3818	mp1 = mp;
3819	mp = mp->b_cont;
3820	end = mp->b_wptr + alloclen;
3821
3822	samsg = (sadb_msg_t *)mp->b_wptr;
3823	mp->b_wptr += sizeof (*samsg);
3824	samsg->sadb_msg_version = PF_KEY_V2;
3825	samsg->sadb_msg_type = SADB_EXPIRE;
3826	samsg->sadb_msg_errno = 0;
3827	samsg->sadb_msg_satype = assoc->ipsa_type;
3828	samsg->sadb_msg_len = SADB_8TO64(alloclen);
3829	samsg->sadb_msg_reserved = 0;
3830	samsg->sadb_msg_seq = 0;
3831	samsg->sadb_msg_pid = 0;
3832
3833	saext = (sadb_sa_t *)mp->b_wptr;
3834	mp->b_wptr += sizeof (*saext);
3835	saext->sadb_sa_len = SADB_8TO64(sizeof (*saext));
3836	saext->sadb_sa_exttype = SADB_EXT_SA;
3837	saext->sadb_sa_spi = assoc->ipsa_spi;
3838	saext->sadb_sa_replay = assoc->ipsa_replay_wsize;
3839	saext->sadb_sa_state = assoc->ipsa_state;
3840	saext->sadb_sa_auth = assoc->ipsa_auth_alg;
3841	saext->sadb_sa_encrypt = assoc->ipsa_encr_alg;
3842	saext->sadb_sa_flags = assoc->ipsa_flags;
3843
3844	current = (sadb_lifetime_t *)mp->b_wptr;
3845	mp->b_wptr += sizeof (sadb_lifetime_t);
3846	current->sadb_lifetime_len = SADB_8TO64(sizeof (*current));
3847	current->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
3848	/* We do not support the concept. */
3849	current->sadb_lifetime_allocations = 0;
3850	current->sadb_lifetime_bytes = assoc->ipsa_bytes;
3851	current->sadb_lifetime_addtime = assoc->ipsa_addtime;
3852	current->sadb_lifetime_usetime = assoc->ipsa_usetime;
3853
3854	expire = (sadb_lifetime_t *)mp->b_wptr;
3855	mp->b_wptr += sizeof (*expire);
3856	expire->sadb_lifetime_len = SADB_8TO64(sizeof (*expire));
3857
3858	if (assoc->ipsa_state == IPSA_STATE_DEAD) {
3859		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
3860		expire->sadb_lifetime_allocations = assoc->ipsa_hardalloc;
3861		expire->sadb_lifetime_bytes = assoc->ipsa_hardbyteslt;
3862		expire->sadb_lifetime_addtime = assoc->ipsa_hardaddlt;
3863		expire->sadb_lifetime_usetime = assoc->ipsa_harduselt;
3864	} else if (assoc->ipsa_state == IPSA_STATE_DYING) {
3865		expire->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
3866		expire->sadb_lifetime_allocations = assoc->ipsa_softalloc;
3867		expire->sadb_lifetime_bytes = assoc->ipsa_softbyteslt;
3868		expire->sadb_lifetime_addtime = assoc->ipsa_softaddlt;
3869		expire->sadb_lifetime_usetime = assoc->ipsa_softuselt;
3870	} else {
3871		ASSERT(assoc->ipsa_state == IPSA_STATE_MATURE);
3872		expire->sadb_lifetime_exttype = SADB_X_EXT_LIFETIME_IDLE;
3873		expire->sadb_lifetime_allocations = 0;
3874		expire->sadb_lifetime_bytes = 0;
3875		expire->sadb_lifetime_addtime = assoc->ipsa_idleaddlt;
3876		expire->sadb_lifetime_usetime = assoc->ipsa_idleuselt;
3877	}
3878
3879	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_SRC,
3880	    af, assoc->ipsa_srcaddr, tunnel_mode ? 0 : SA_SRCPORT(assoc),
3881	    SA_PROTO(assoc), 0);
3882	ASSERT(mp->b_wptr != NULL);
3883
3884	mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end, SADB_EXT_ADDRESS_DST,
3885	    af, assoc->ipsa_dstaddr, tunnel_mode ? 0 : SA_DSTPORT(assoc),
3886	    SA_PROTO(assoc), 0);
3887	ASSERT(mp->b_wptr != NULL);
3888
3889	if (tunnel_mode) {
3890		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3891		    SADB_X_EXT_ADDRESS_INNER_SRC, assoc->ipsa_innerfam,
3892		    assoc->ipsa_innersrc, SA_SRCPORT(assoc), SA_IPROTO(assoc),
3893		    assoc->ipsa_innersrcpfx);
3894		ASSERT(mp->b_wptr != NULL);
3895		mp->b_wptr = sadb_make_addr_ext(mp->b_wptr, end,
3896		    SADB_X_EXT_ADDRESS_INNER_DST, assoc->ipsa_innerfam,
3897		    assoc->ipsa_innerdst, SA_DSTPORT(assoc), SA_IPROTO(assoc),
3898		    assoc->ipsa_innerdstpfx);
3899		ASSERT(mp->b_wptr != NULL);
3900	}
3901
3902	/* Can just putnext, we're ready to go! */
3903	putnext(pfkey_q, mp1);
3904}
3905
3906/*
3907 * "Age" the SA with the number of bytes that was used to protect traffic.
3908 * Send an SADB_EXPIRE message if appropriate.	Return B_TRUE if there was
3909 * enough "charge" left in the SA to protect the data.	Return B_FALSE
3910 * otherwise.  (If B_FALSE is returned, the association either was, or became
3911 * DEAD.)
3912 */
3913boolean_t
3914sadb_age_bytes(queue_t *pfkey_q, ipsa_t *assoc, uint64_t bytes,
3915    boolean_t sendmsg)
3916{
3917	boolean_t rc = B_TRUE;
3918	uint64_t newtotal;
3919
3920	mutex_enter(&assoc->ipsa_lock);
3921	newtotal = assoc->ipsa_bytes + bytes;
3922	if (assoc->ipsa_hardbyteslt != 0 &&
3923	    newtotal >= assoc->ipsa_hardbyteslt) {
3924		if (assoc->ipsa_state != IPSA_STATE_DEAD) {
3925			sadb_delete_cluster(assoc);
3926			/*
3927			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3928			 * this off on another non-interrupt thread.  Also
3929			 * unlink this SA immediately.
3930			 */
3931			assoc->ipsa_state = IPSA_STATE_DEAD;
3932			if (sendmsg)
3933				sadb_expire_assoc(pfkey_q, assoc);
3934			/*
3935			 * Set non-zero expiration time so sadb_age_assoc()
3936			 * will work when reaping.
3937			 */
3938			assoc->ipsa_hardexpiretime = (time_t)1;
3939		} /* Else someone beat me to it! */
3940		rc = B_FALSE;
3941	} else if (assoc->ipsa_softbyteslt != 0 &&
3942	    (newtotal >= assoc->ipsa_softbyteslt)) {
3943		if (assoc->ipsa_state < IPSA_STATE_DYING) {
3944			/*
3945			 * Send EXPIRE message to PF_KEY.  May wish to pawn
3946			 * this off on another non-interrupt thread.
3947			 */
3948			assoc->ipsa_state = IPSA_STATE_DYING;
3949			assoc->ipsa_bytes = newtotal;
3950			if (sendmsg)
3951				sadb_expire_assoc(pfkey_q, assoc);
3952		} /* Else someone beat me to it! */
3953	}
3954	if (rc == B_TRUE)
3955		assoc->ipsa_bytes = newtotal;
3956	mutex_exit(&assoc->ipsa_lock);
3957	return (rc);
3958}
3959
3960/*
3961 * "Torch" an individual SA.  Returns NULL, so it can be tail-called from
3962 *     sadb_age_assoc().
3963 */
3964static ipsa_t *
3965sadb_torch_assoc(isaf_t *head, ipsa_t *sa)
3966{
3967	ASSERT(MUTEX_HELD(&head->isaf_lock));
3968	ASSERT(MUTEX_HELD(&sa->ipsa_lock));
3969	ASSERT(sa->ipsa_state == IPSA_STATE_DEAD);
3970
3971	/*
3972	 * Force cached SAs to be revalidated..
3973	 */
3974	head->isaf_gen++;
3975
3976	mutex_exit(&sa->ipsa_lock);
3977	sadb_unlinkassoc(sa);
3978
3979	return (NULL);
3980}
3981
3982/*
3983 * Do various SA-is-idle activities depending on delta (the number of idle
3984 * seconds on the SA) and/or other properties of the SA.
3985 *
3986 * Return B_TRUE if I've sent a packet, because I have to drop the
3987 * association's mutex before sending a packet out the wire.
3988 */
3989/* ARGSUSED */
3990static boolean_t
3991sadb_idle_activities(ipsa_t *assoc, time_t delta, boolean_t inbound)
3992{
3993	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
3994	int nat_t_interval = espstack->ipsecesp_nat_keepalive_interval;
3995
3996	ASSERT(MUTEX_HELD(&assoc->ipsa_lock));
3997
3998	if (!inbound && (assoc->ipsa_flags & IPSA_F_NATT_LOC) &&
3999	    delta >= nat_t_interval &&
4000	    gethrestime_sec() - assoc->ipsa_last_nat_t_ka >= nat_t_interval) {
4001		ASSERT(assoc->ipsa_type == SADB_SATYPE_ESP);
4002		assoc->ipsa_last_nat_t_ka = gethrestime_sec();
4003		mutex_exit(&assoc->ipsa_lock);
4004		ipsecesp_send_keepalive(assoc);
4005		return (B_TRUE);
4006	}
4007	return (B_FALSE);
4008}
4009
4010/*
4011 * Return "assoc" if haspeer is true and I send an expire.  This allows
4012 * the consumers' aging functions to tidy up an expired SA's peer.
4013 */
4014static ipsa_t *
4015sadb_age_assoc(isaf_t *head, queue_t *pfkey_q, ipsa_t *assoc,
4016    time_t current, int reap_delay, boolean_t inbound)
4017{
4018	ipsa_t *retval = NULL;
4019	boolean_t dropped_mutex = B_FALSE;
4020
4021	ASSERT(MUTEX_HELD(&head->isaf_lock));
4022
4023	mutex_enter(&assoc->ipsa_lock);
4024
4025	if (((assoc->ipsa_state == IPSA_STATE_LARVAL) ||
4026	    ((assoc->ipsa_state == IPSA_STATE_IDLE) ||
4027	    (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) &&
4028	    (assoc->ipsa_hardexpiretime != 0))) &&
4029	    (assoc->ipsa_hardexpiretime <= current)) {
4030		assoc->ipsa_state = IPSA_STATE_DEAD;
4031		return (sadb_torch_assoc(head, assoc));
4032	}
4033
4034	/*
4035	 * Check lifetimes.  Fortunately, SA setup is done
4036	 * such that there are only two times to look at,
4037	 * softexpiretime, and hardexpiretime.
4038	 *
4039	 * Check hard first.
4040	 */
4041
4042	if (assoc->ipsa_hardexpiretime != 0 &&
4043	    assoc->ipsa_hardexpiretime <= current) {
4044		if (assoc->ipsa_state == IPSA_STATE_DEAD)
4045			return (sadb_torch_assoc(head, assoc));
4046
4047		if (inbound) {
4048			sadb_delete_cluster(assoc);
4049		}
4050
4051		/*
4052		 * Send SADB_EXPIRE with hard lifetime, delay for unlinking.
4053		 */
4054		assoc->ipsa_state = IPSA_STATE_DEAD;
4055		if (assoc->ipsa_haspeer || assoc->ipsa_otherspi != 0) {
4056			/*
4057			 * If the SA is paired or peered with another, put
4058			 * a copy on a list which can be processed later, the
4059			 * pair/peer SA needs to be updated so the both die
4060			 * at the same time.
4061			 *
4062			 * If I return assoc, I have to bump up its reference
4063			 * count to keep with the ipsa_t reference count
4064			 * semantics.
4065			 */
4066			IPSA_REFHOLD(assoc);
4067			retval = assoc;
4068		}
4069		sadb_expire_assoc(pfkey_q, assoc);
4070		assoc->ipsa_hardexpiretime = current + reap_delay;
4071	} else if (assoc->ipsa_softexpiretime != 0 &&
4072	    assoc->ipsa_softexpiretime <= current &&
4073	    assoc->ipsa_state < IPSA_STATE_DYING) {
4074		/*
4075		 * Send EXPIRE message to PF_KEY.  May wish to pawn
4076		 * this off on another non-interrupt thread.
4077		 */
4078		assoc->ipsa_state = IPSA_STATE_DYING;
4079		if (assoc->ipsa_haspeer) {
4080			/*
4081			 * If the SA has a peer, update the peer's state
4082			 * on SOFT_EXPIRE, this is mostly to prevent two
4083			 * expire messages from effectively the same SA.
4084			 *
4085			 * Don't care about paired SA's, then can (and should)
4086			 * be able to soft expire at different times.
4087			 *
4088			 * If I return assoc, I have to bump up its
4089			 * reference count to keep with the ipsa_t reference
4090			 * count semantics.
4091			 */
4092			IPSA_REFHOLD(assoc);
4093			retval = assoc;
4094		}
4095		sadb_expire_assoc(pfkey_q, assoc);
4096	} else if (assoc->ipsa_idletime != 0 &&
4097	    assoc->ipsa_idleexpiretime <= current) {
4098		if (assoc->ipsa_state == IPSA_STATE_ACTIVE_ELSEWHERE) {
4099			assoc->ipsa_state = IPSA_STATE_IDLE;
4100		}
4101
4102		/*
4103		 * Need to handle Mature case
4104		 */
4105		if (assoc->ipsa_state == IPSA_STATE_MATURE) {
4106			sadb_expire_assoc(pfkey_q, assoc);
4107		}
4108	} else {
4109		/* Check idle time activities. */
4110		dropped_mutex = sadb_idle_activities(assoc,
4111		    current - assoc->ipsa_lastuse, inbound);
4112	}
4113
4114	if (!dropped_mutex)
4115		mutex_exit(&assoc->ipsa_lock);
4116	return (retval);
4117}
4118
4119/*
4120 * Called by a consumer protocol to do ther dirty work of reaping dead
4121 * Security Associations.
4122 *
4123 * NOTE: sadb_age_assoc() marks expired SA's as DEAD but only removed
4124 * SA's that are already marked DEAD, so expired SA's are only reaped
4125 * the second time sadb_ager() runs.
4126 */
4127void
4128sadb_ager(sadb_t *sp, queue_t *pfkey_q, int reap_delay, netstack_t *ns)
4129{
4130	int i;
4131	isaf_t *bucket;
4132	ipsa_t *assoc, *spare;
4133	iacqf_t *acqlist;
4134	ipsacq_t *acqrec, *spareacq;
4135	templist_t *haspeerlist, *newbie;
4136	/* Snapshot current time now. */
4137	time_t current = gethrestime_sec();
4138	haspeerlist = NULL;
4139
4140	/*
4141	 * Do my dirty work.  This includes aging real entries, aging
4142	 * larvals, and aging outstanding ACQUIREs.
4143	 *
4144	 * I hope I don't tie up resources for too long.
4145	 */
4146
4147	/* Age acquires. */
4148
4149	for (i = 0; i < sp->sdb_hashsize; i++) {
4150		acqlist = &sp->sdb_acq[i];
4151		mutex_enter(&acqlist->iacqf_lock);
4152		for (acqrec = acqlist->iacqf_ipsacq; acqrec != NULL;
4153		    acqrec = spareacq) {
4154			spareacq = acqrec->ipsacq_next;
4155			if (current > acqrec->ipsacq_expire)
4156				sadb_destroy_acquire(acqrec, ns);
4157		}
4158		mutex_exit(&acqlist->iacqf_lock);
4159	}
4160
4161	/* Age inbound associations. */
4162	for (i = 0; i < sp->sdb_hashsize; i++) {
4163		bucket = &(sp->sdb_if[i]);
4164		mutex_enter(&bucket->isaf_lock);
4165		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4166		    assoc = spare) {
4167			spare = assoc->ipsa_next;
4168			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4169			    reap_delay, B_TRUE) != NULL) {
4170				/*
4171				 * Put SA's which have a peer or SA's which
4172				 * are paired on a list for processing after
4173				 * all the hash tables have been walked.
4174				 *
4175				 * sadb_age_assoc() increments the refcnt,
4176				 * effectively doing an IPSA_REFHOLD().
4177				 */
4178				newbie = kmem_alloc(sizeof (*newbie),
4179				    KM_NOSLEEP);
4180				if (newbie == NULL) {
4181					/*
4182					 * Don't forget to REFRELE().
4183					 */
4184					IPSA_REFRELE(assoc);
4185					continue;	/* for loop... */
4186				}
4187				newbie->next = haspeerlist;
4188				newbie->ipsa = assoc;
4189				haspeerlist = newbie;
4190			}
4191		}
4192		mutex_exit(&bucket->isaf_lock);
4193	}
4194
4195	age_pair_peer_list(haspeerlist, sp, B_FALSE);
4196	haspeerlist = NULL;
4197
4198	/* Age outbound associations. */
4199	for (i = 0; i < sp->sdb_hashsize; i++) {
4200		bucket = &(sp->sdb_of[i]);
4201		mutex_enter(&bucket->isaf_lock);
4202		for (assoc = bucket->isaf_ipsa; assoc != NULL;
4203		    assoc = spare) {
4204			spare = assoc->ipsa_next;
4205			if (sadb_age_assoc(bucket, pfkey_q, assoc, current,
4206			    reap_delay, B_FALSE) != NULL) {
4207				/*
4208				 * sadb_age_assoc() increments the refcnt,
4209				 * effectively doing an IPSA_REFHOLD().
4210				 */
4211				newbie = kmem_alloc(sizeof (*newbie),
4212				    KM_NOSLEEP);
4213				if (newbie == NULL) {
4214					/*
4215					 * Don't forget to REFRELE().
4216					 */
4217					IPSA_REFRELE(assoc);
4218					continue;	/* for loop... */
4219				}
4220				newbie->next = haspeerlist;
4221				newbie->ipsa = assoc;
4222				haspeerlist = newbie;
4223			}
4224		}
4225		mutex_exit(&bucket->isaf_lock);
4226	}
4227
4228	age_pair_peer_list(haspeerlist, sp, B_TRUE);
4229
4230	/*
4231	 * Run a GC pass to clean out dead identities.
4232	 */
4233	ipsid_gc(ns);
4234}
4235
4236/*
4237 * Figure out when to reschedule the ager.
4238 */
4239timeout_id_t
4240sadb_retimeout(hrtime_t begin, queue_t *pfkey_q, void (*ager)(void *),
4241    void *agerarg, uint_t *intp, uint_t intmax, short mid)
4242{
4243	hrtime_t end = gethrtime();
4244	uint_t interval = *intp;	/* "interval" is in ms. */
4245
4246	/*
4247	 * See how long this took.  If it took too long, increase the
4248	 * aging interval.
4249	 */
4250	if ((end - begin) > MSEC2NSEC(interval)) {
4251		if (interval >= intmax) {
4252			/* XXX Rate limit this?  Or recommend flush? */
4253			(void) strlog(mid, 0, 0, SL_ERROR | SL_WARN,
4254			    "Too many SA's to age out in %d msec.\n",
4255			    intmax);
4256		} else {
4257			/* Double by shifting by one bit. */
4258			interval <<= 1;
4259			interval = min(interval, intmax);
4260		}
4261	} else if ((end - begin) <= (MSEC2NSEC(interval) / 2) &&
4262	    interval > SADB_AGE_INTERVAL_DEFAULT) {
4263		/*
4264		 * If I took less than half of the interval, then I should
4265		 * ratchet the interval back down.  Never automatically
4266		 * shift below the default aging interval.
4267		 *
4268		 * NOTE:This even overrides manual setting of the age
4269		 *	interval using NDD to lower the setting past the
4270		 *	default.  In other words, if you set the interval
4271		 *	lower than the default, and your SADB gets too big,
4272		 *	the interval will only self-lower back to the default.
4273		 */
4274		/* Halve by shifting one bit. */
4275		interval >>= 1;
4276		interval = max(interval, SADB_AGE_INTERVAL_DEFAULT);
4277	}
4278	*intp = interval;
4279	return (qtimeout(pfkey_q, ager, agerarg,
4280	    drv_usectohz(interval * (MICROSEC / MILLISEC))));
4281}
4282
4283
4284/*
4285 * Update the lifetime values of an SA.	 This is the path an SADB_UPDATE
4286 * message takes when updating a MATURE or DYING SA.
4287 */
4288static void
4289sadb_update_lifetimes(ipsa_t *assoc, sadb_lifetime_t *hard,
4290    sadb_lifetime_t *soft, sadb_lifetime_t *idle, boolean_t outbound)
4291{
4292	mutex_enter(&assoc->ipsa_lock);
4293
4294	/*
4295	 * XXX RFC 2367 mentions how an SADB_EXT_LIFETIME_CURRENT can be
4296	 * passed in during an update message.	We currently don't handle
4297	 * these.
4298	 */
4299
4300	if (hard != NULL) {
4301		if (hard->sadb_lifetime_bytes != 0)
4302			assoc->ipsa_hardbyteslt = hard->sadb_lifetime_bytes;
4303		if (hard->sadb_lifetime_usetime != 0)
4304			assoc->ipsa_harduselt = hard->sadb_lifetime_usetime;
4305		if (hard->sadb_lifetime_addtime != 0)
4306			assoc->ipsa_hardaddlt = hard->sadb_lifetime_addtime;
4307		if (assoc->ipsa_hardaddlt != 0) {
4308			assoc->ipsa_hardexpiretime =
4309			    assoc->ipsa_addtime + assoc->ipsa_hardaddlt;
4310		}
4311		if (assoc->ipsa_harduselt != 0 &&
4312		    assoc->ipsa_flags & IPSA_F_USED) {
4313			UPDATE_EXPIRE(assoc, harduselt, hardexpiretime);
4314		}
4315		if (hard->sadb_lifetime_allocations != 0)
4316			assoc->ipsa_hardalloc = hard->sadb_lifetime_allocations;
4317	}
4318
4319	if (soft != NULL) {
4320		if (soft->sadb_lifetime_bytes != 0) {
4321			if (soft->sadb_lifetime_bytes >
4322			    assoc->ipsa_hardbyteslt) {
4323				assoc->ipsa_softbyteslt =
4324				    assoc->ipsa_hardbyteslt;
4325			} else {
4326				assoc->ipsa_softbyteslt =
4327				    soft->sadb_lifetime_bytes;
4328			}
4329		}
4330		if (soft->sadb_lifetime_usetime != 0) {
4331			if (soft->sadb_lifetime_usetime >
4332			    assoc->ipsa_harduselt) {
4333				assoc->ipsa_softuselt =
4334				    assoc->ipsa_harduselt;
4335			} else {
4336				assoc->ipsa_softuselt =
4337				    soft->sadb_lifetime_usetime;
4338			}
4339		}
4340		if (soft->sadb_lifetime_addtime != 0) {
4341			if (soft->sadb_lifetime_addtime >
4342			    assoc->ipsa_hardexpiretime) {
4343				assoc->ipsa_softexpiretime =
4344				    assoc->ipsa_hardexpiretime;
4345			} else {
4346				assoc->ipsa_softaddlt =
4347				    soft->sadb_lifetime_addtime;
4348			}
4349		}
4350		if (assoc->ipsa_softaddlt != 0) {
4351			assoc->ipsa_softexpiretime =
4352			    assoc->ipsa_addtime + assoc->ipsa_softaddlt;
4353		}
4354		if (assoc->ipsa_softuselt != 0 &&
4355		    assoc->ipsa_flags & IPSA_F_USED) {
4356			UPDATE_EXPIRE(assoc, softuselt, softexpiretime);
4357		}
4358		if (outbound && assoc->ipsa_softexpiretime != 0) {
4359			if (assoc->ipsa_state == IPSA_STATE_MATURE)
4360				lifetime_fuzz(assoc);
4361		}
4362
4363		if (soft->sadb_lifetime_allocations != 0)
4364			assoc->ipsa_softalloc = soft->sadb_lifetime_allocations;
4365	}
4366
4367	if (idle != NULL) {
4368		time_t current = gethrestime_sec();
4369		if ((assoc->ipsa_idleexpiretime <= current) &&
4370		    (assoc->ipsa_idleaddlt == idle->sadb_lifetime_addtime)) {
4371			assoc->ipsa_idleexpiretime =
4372			    current + assoc->ipsa_idleaddlt;
4373		}
4374		if (idle->sadb_lifetime_addtime != 0)
4375			assoc->ipsa_idleaddlt = idle->sadb_lifetime_addtime;
4376		if (idle->sadb_lifetime_usetime != 0)
4377			assoc->ipsa_idleuselt = idle->sadb_lifetime_usetime;
4378		if (assoc->ipsa_idleaddlt != 0) {
4379			assoc->ipsa_idleexpiretime =
4380			    current + idle->sadb_lifetime_addtime;
4381			assoc->ipsa_idletime = idle->sadb_lifetime_addtime;
4382		}
4383		if (assoc->ipsa_idleuselt != 0) {
4384			if (assoc->ipsa_idletime != 0) {
4385				assoc->ipsa_idletime = min(assoc->ipsa_idletime,
4386				    assoc->ipsa_idleuselt);
4387				assoc->ipsa_idleexpiretime =
4388				    current + assoc->ipsa_idletime;
4389			} else {
4390				assoc->ipsa_idleexpiretime =
4391				    current + assoc->ipsa_idleuselt;
4392				assoc->ipsa_idletime = assoc->ipsa_idleuselt;
4393			}
4394		}
4395	}
4396	mutex_exit(&assoc->ipsa_lock);
4397}
4398
4399static int
4400sadb_update_state(ipsa_t *assoc, uint_t new_state, mblk_t **ipkt_lst)
4401{
4402	int rcode = 0;
4403	time_t current = gethrestime_sec();
4404
4405	mutex_enter(&assoc->ipsa_lock);
4406
4407	switch (new_state) {
4408	case SADB_X_SASTATE_ACTIVE_ELSEWHERE:
4409		if (assoc->ipsa_state == SADB_X_SASTATE_IDLE) {
4410			assoc->ipsa_state = IPSA_STATE_ACTIVE_ELSEWHERE;
4411			assoc->ipsa_idleexpiretime =
4412			    current + assoc->ipsa_idletime;
4413		}
4414		break;
4415	case SADB_X_SASTATE_IDLE:
4416		if (assoc->ipsa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4417			assoc->ipsa_state = IPSA_STATE_IDLE;
4418			assoc->ipsa_idleexpiretime =
4419			    current + assoc->ipsa_idletime;
4420		} else {
4421			rcode = EINVAL;
4422		}
4423		break;
4424
4425	case SADB_X_SASTATE_ACTIVE:
4426		if (assoc->ipsa_state != SADB_X_SASTATE_IDLE) {
4427			rcode = EINVAL;
4428			break;
4429		}
4430		assoc->ipsa_state = IPSA_STATE_MATURE;
4431		assoc->ipsa_idleexpiretime = current + assoc->ipsa_idletime;
4432
4433		if (ipkt_lst == NULL) {
4434			break;
4435		}
4436
4437		if (assoc->ipsa_bpkt_head != NULL) {
4438			*ipkt_lst = assoc->ipsa_bpkt_head;
4439			assoc->ipsa_bpkt_head = assoc->ipsa_bpkt_tail = NULL;
4440			assoc->ipsa_mblkcnt = 0;
4441		} else {
4442			*ipkt_lst = NULL;
4443		}
4444		break;
4445	default:
4446		rcode = EINVAL;
4447		break;
4448	}
4449
4450	mutex_exit(&assoc->ipsa_lock);
4451	return (rcode);
4452}
4453
4454/*
4455 * Check a proposed KMC update for sanity.
4456 */
4457static int
4458sadb_check_kmc(ipsa_query_t *sq, ipsa_t *sa, int *diagnostic)
4459{
4460	uint32_t kmp = sq->kmp;
4461	uint64_t kmc = sq->kmc;
4462
4463	if (sa == NULL)
4464		return (0);
4465
4466	if (sa->ipsa_state == IPSA_STATE_DEAD)
4467		return (ESRCH);	/* DEAD == Not there, in this case. */
4468
4469	if ((kmp != 0) && (sa->ipsa_kmp != 0) && (sa->ipsa_kmp != kmp)) {
4470		*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMP;
4471		return (EINVAL);
4472	}
4473
4474	/* Allow IKEv2 KMCs to update the kmc value for rekeying */
4475	if ((kmp != SADB_X_KMP_IKEV2) && (kmc != 0) && (sa->ipsa_kmc != 0) &&
4476	    (sa->ipsa_kmc != kmc)) {
4477		*diagnostic = SADB_X_DIAGNOSTIC_DUPLICATE_KMC;
4478		return (EINVAL);
4479	}
4480
4481	return (0);
4482}
4483
4484/*
4485 * Actually update the KMC info.
4486 */
4487static void
4488sadb_update_kmc(ipsa_query_t *sq, ipsa_t *sa)
4489{
4490	uint32_t kmp = sq->kmp;
4491	uint64_t kmc = sq->kmc;
4492
4493	if (kmp != 0)
4494		sa->ipsa_kmp = kmp;
4495	if (kmc != 0)
4496		sa->ipsa_kmc = kmc;
4497}
4498
4499/*
4500 * Common code to update an SA.
4501 */
4502
4503int
4504sadb_update_sa(mblk_t *mp, keysock_in_t *ksi, mblk_t **ipkt_lst,
4505    sadbp_t *spp, int *diagnostic, queue_t *pfkey_q,
4506    int (*add_sa_func)(mblk_t *, keysock_in_t *, int *, netstack_t *),
4507    netstack_t *ns, uint8_t sadb_msg_type)
4508{
4509	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
4510	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
4511	sadb_x_replay_ctr_t *replext =
4512	    (sadb_x_replay_ctr_t *)ksi->ks_in_extv[SADB_X_EXT_REPLAY_VALUE];
4513	sadb_lifetime_t *soft =
4514	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
4515	sadb_lifetime_t *hard =
4516	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
4517	sadb_lifetime_t *idle =
4518	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
4519	sadb_x_pair_t *pair_ext =
4520	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4521	ipsa_t *echo_target = NULL;
4522	ipsap_t ipsapp;
4523	ipsa_query_t sq;
4524	time_t current = gethrestime_sec();
4525
4526	sq.spp = spp;		/* XXX param */
4527	int error = sadb_form_query(ksi, IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA,
4528	    IPSA_Q_SRC|IPSA_Q_DST|IPSA_Q_SA|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND|
4529	    IPSA_Q_KMC,
4530	    &sq, diagnostic);
4531
4532	if (error != 0)
4533		return (error);
4534
4535	error = get_ipsa_pair(&sq, &ipsapp, diagnostic);
4536	if (error != 0)
4537		return (error);
4538
4539	if (ipsapp.ipsap_psa_ptr == NULL && ipsapp.ipsap_sa_ptr != NULL) {
4540		if (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) {
4541			/*
4542			 * REFRELE the target and let the add_sa_func()
4543			 * deal with updating a larval SA.
4544			 */
4545			destroy_ipsa_pair(&ipsapp);
4546			return (add_sa_func(mp, ksi, diagnostic, ns));
4547		}
4548	}
4549
4550	/*
4551	 * At this point we have an UPDATE to a MATURE SA. There should
4552	 * not be any keying material present.
4553	 */
4554	if (akey != NULL) {
4555		*diagnostic = SADB_X_DIAGNOSTIC_AKEY_PRESENT;
4556		error = EINVAL;
4557		goto bail;
4558	}
4559	if (ekey != NULL) {
4560		*diagnostic = SADB_X_DIAGNOSTIC_EKEY_PRESENT;
4561		error = EINVAL;
4562		goto bail;
4563	}
4564
4565	if (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE) {
4566		if (ipsapp.ipsap_sa_ptr != NULL &&
4567		    ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4568			if ((error = sadb_update_state(ipsapp.ipsap_sa_ptr,
4569			    sq.assoc->sadb_sa_state, NULL)) != 0) {
4570				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4571				goto bail;
4572			}
4573		}
4574		if (ipsapp.ipsap_psa_ptr != NULL &&
4575		    ipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_IDLE) {
4576			if ((error = sadb_update_state(ipsapp.ipsap_psa_ptr,
4577			    sq.assoc->sadb_sa_state, NULL)) != 0) {
4578				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4579				goto bail;
4580			}
4581		}
4582	}
4583	if (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE) {
4584		if (ipsapp.ipsap_sa_ptr != NULL) {
4585			error = sadb_update_state(ipsapp.ipsap_sa_ptr,
4586			    sq.assoc->sadb_sa_state,
4587			    (ipsapp.ipsap_sa_ptr->ipsa_flags &
4588			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4589			if (error) {
4590				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4591				goto bail;
4592			}
4593		}
4594		if (ipsapp.ipsap_psa_ptr != NULL) {
4595			error = sadb_update_state(ipsapp.ipsap_psa_ptr,
4596			    sq.assoc->sadb_sa_state,
4597			    (ipsapp.ipsap_psa_ptr->ipsa_flags &
4598			    IPSA_F_INBOUND) ? ipkt_lst : NULL);
4599			if (error) {
4600				*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4601				goto bail;
4602			}
4603		}
4604		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4605		    ksi, echo_target);
4606		goto bail;
4607	}
4608
4609	/*
4610	 * Reality checks for updates of active associations.
4611	 * Sundry first-pass UPDATE-specific reality checks.
4612	 * Have to do the checks here, because it's after the add_sa code.
4613	 * XXX STATS : logging/stats here?
4614	 */
4615
4616	if (!((sq.assoc->sadb_sa_state == SADB_SASTATE_MATURE) ||
4617	    (sq.assoc->sadb_sa_state == SADB_X_SASTATE_ACTIVE_ELSEWHERE))) {
4618		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4619		error = EINVAL;
4620		goto bail;
4621	}
4622	if (sq.assoc->sadb_sa_flags & ~spp->s_updateflags) {
4623		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
4624		error = EINVAL;
4625		goto bail;
4626	}
4627	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL) {
4628		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_LIFETIME;
4629		error = EOPNOTSUPP;
4630		goto bail;
4631	}
4632
4633	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
4634		error = EINVAL;
4635		goto bail;
4636	}
4637
4638	if ((*diagnostic = sadb_labelchk(ksi)) != 0)
4639		return (EINVAL);
4640
4641	error = sadb_check_kmc(&sq, ipsapp.ipsap_sa_ptr, diagnostic);
4642	if (error != 0)
4643		goto bail;
4644
4645	error = sadb_check_kmc(&sq, ipsapp.ipsap_psa_ptr, diagnostic);
4646	if (error != 0)
4647		goto bail;
4648
4649
4650	if (ipsapp.ipsap_sa_ptr != NULL) {
4651		/*
4652		 * Do not allow replay value change for MATURE or LARVAL SA.
4653		 */
4654
4655		if ((replext != NULL) &&
4656		    ((ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_LARVAL) ||
4657		    (ipsapp.ipsap_sa_ptr->ipsa_state == IPSA_STATE_MATURE))) {
4658			*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
4659			error = EINVAL;
4660			goto bail;
4661		}
4662	}
4663
4664
4665	if (ipsapp.ipsap_sa_ptr != NULL) {
4666		sadb_update_lifetimes(ipsapp.ipsap_sa_ptr, hard, soft,
4667		    idle, B_TRUE);
4668		sadb_update_kmc(&sq, ipsapp.ipsap_sa_ptr);
4669		if ((replext != NULL) &&
4670		    (ipsapp.ipsap_sa_ptr->ipsa_replay_wsize != 0)) {
4671			/*
4672			 * If an inbound SA, update the replay counter
4673			 * and check off all the other sequence number
4674			 */
4675			if (ksi->ks_in_dsttype == KS_IN_ADDR_ME) {
4676				if (!sadb_replay_check(ipsapp.ipsap_sa_ptr,
4677				    replext->sadb_x_rc_replay32)) {
4678					*diagnostic =
4679					    SADB_X_DIAGNOSTIC_INVALID_REPLAY;
4680					error = EINVAL;
4681					goto bail;
4682				}
4683				mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4684				ipsapp.ipsap_sa_ptr->ipsa_idleexpiretime =
4685				    current +
4686				    ipsapp.ipsap_sa_ptr->ipsa_idletime;
4687				mutex_exit(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4688			} else {
4689				mutex_enter(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4690				ipsapp.ipsap_sa_ptr->ipsa_replay =
4691				    replext->sadb_x_rc_replay32;
4692				ipsapp.ipsap_sa_ptr->ipsa_idleexpiretime =
4693				    current +
4694				    ipsapp.ipsap_sa_ptr->ipsa_idletime;
4695				mutex_exit(&ipsapp.ipsap_sa_ptr->ipsa_lock);
4696			}
4697		}
4698	}
4699
4700	if (sadb_msg_type == SADB_X_UPDATEPAIR) {
4701		if (ipsapp.ipsap_psa_ptr != NULL) {
4702			sadb_update_lifetimes(ipsapp.ipsap_psa_ptr, hard, soft,
4703			    idle, B_FALSE);
4704			sadb_update_kmc(&sq, ipsapp.ipsap_psa_ptr);
4705		} else {
4706			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
4707			error = ESRCH;
4708			goto bail;
4709		}
4710	}
4711
4712	if (pair_ext != NULL)
4713		error = update_pairing(&ipsapp, &sq, ksi, diagnostic);
4714
4715	if (error == 0)
4716		sadb_pfkey_echo(pfkey_q, mp, (sadb_msg_t *)mp->b_cont->b_rptr,
4717		    ksi, echo_target);
4718bail:
4719
4720	destroy_ipsa_pair(&ipsapp);
4721
4722	return (error);
4723}
4724
4725
4726static int
4727update_pairing(ipsap_t *ipsapp, ipsa_query_t *sq, keysock_in_t *ksi,
4728    int *diagnostic)
4729{
4730	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
4731	sadb_x_pair_t *pair_ext =
4732	    (sadb_x_pair_t *)ksi->ks_in_extv[SADB_X_EXT_PAIR];
4733	int error = 0;
4734	ipsap_t oipsapp;
4735	boolean_t undo_pair = B_FALSE;
4736	uint32_t ipsa_flags;
4737
4738	if (pair_ext->sadb_x_pair_spi == 0 || pair_ext->sadb_x_pair_spi ==
4739	    assoc->sadb_sa_spi) {
4740		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4741		return (EINVAL);
4742	}
4743
4744	/*
4745	 * Assume for now that the spi value provided in the SADB_UPDATE
4746	 * message was valid, update the SA with its pair spi value.
4747	 * If the spi turns out to be bogus or the SA no longer exists
4748	 * then this will be detected when the reverse update is made
4749	 * below.
4750	 */
4751	mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4752	ipsapp->ipsap_sa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4753	ipsapp->ipsap_sa_ptr->ipsa_otherspi = pair_ext->sadb_x_pair_spi;
4754	mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4755
4756	/*
4757	 * After updating the ipsa_otherspi element of the SA, get_ipsa_pair()
4758	 * should now return pointers to the SA *AND* its pair, if this is not
4759	 * the case, the "otherspi" either did not exist or was deleted. Also
4760	 * check that "otherspi" is not already paired. If everything looks
4761	 * good, complete the update. IPSA_REFRELE the first pair_pointer
4762	 * after this update to ensure its not deleted until we are done.
4763	 */
4764	error = get_ipsa_pair(sq, &oipsapp, diagnostic);
4765	if (error != 0) {
4766		/*
4767		 * This should never happen, calling function still has
4768		 * IPSA_REFHELD on the SA we just updated.
4769		 */
4770		return (error);	/* XXX EINVAL instead of ESRCH? */
4771	}
4772
4773	if (oipsapp.ipsap_psa_ptr == NULL) {
4774		*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4775		error = EINVAL;
4776		undo_pair = B_TRUE;
4777	} else {
4778		ipsa_flags = oipsapp.ipsap_psa_ptr->ipsa_flags;
4779		if ((oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DEAD) ||
4780		    (oipsapp.ipsap_psa_ptr->ipsa_state == IPSA_STATE_DYING)) {
4781			/* Its dead Jim! */
4782			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4783			undo_pair = B_TRUE;
4784		} else if ((ipsa_flags & (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) ==
4785		    (IPSA_F_OUTBOUND | IPSA_F_INBOUND)) {
4786			/* This SA is in both hashtables. */
4787			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_INAPPROPRIATE;
4788			undo_pair = B_TRUE;
4789		} else if (ipsa_flags & IPSA_F_PAIRED) {
4790			/* This SA is already paired with another. */
4791			*diagnostic = SADB_X_DIAGNOSTIC_PAIR_ALREADY;
4792			undo_pair = B_TRUE;
4793		}
4794	}
4795
4796	if (undo_pair) {
4797		/* The pair SA does not exist. */
4798		mutex_enter(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4799		ipsapp->ipsap_sa_ptr->ipsa_flags &= ~IPSA_F_PAIRED;
4800		ipsapp->ipsap_sa_ptr->ipsa_otherspi = 0;
4801		mutex_exit(&ipsapp->ipsap_sa_ptr->ipsa_lock);
4802	} else {
4803		mutex_enter(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4804		oipsapp.ipsap_psa_ptr->ipsa_otherspi = assoc->sadb_sa_spi;
4805		oipsapp.ipsap_psa_ptr->ipsa_flags |= IPSA_F_PAIRED;
4806		mutex_exit(&oipsapp.ipsap_psa_ptr->ipsa_lock);
4807	}
4808
4809	destroy_ipsa_pair(&oipsapp);
4810	return (error);
4811}
4812
4813/*
4814 * The following functions deal with ACQUIRE LISTS.  An ACQUIRE list is
4815 * a list of outstanding SADB_ACQUIRE messages.	 If ipsec_getassocbyconn() fails
4816 * for an outbound datagram, that datagram is queued up on an ACQUIRE record,
4817 * and an SADB_ACQUIRE message is sent up.  Presumably, a user-space key
4818 * management daemon will process the ACQUIRE, use a SADB_GETSPI to reserve
4819 * an SPI value and a larval SA, then SADB_UPDATE the larval SA, and ADD the
4820 * other direction's SA.
4821 */
4822
4823/*
4824 * Check the ACQUIRE lists.  If there's an existing ACQUIRE record,
4825 * grab it, lock it, and return it.  Otherwise return NULL.
4826 *
4827 * XXX MLS number of arguments getting unwieldy here
4828 */
4829static ipsacq_t *
4830sadb_checkacquire(iacqf_t *bucket, ipsec_action_t *ap, ipsec_policy_t *pp,
4831    uint32_t *src, uint32_t *dst, uint32_t *isrc, uint32_t *idst,
4832    uint64_t unique_id, ts_label_t *tsl)
4833{
4834	ipsacq_t *walker;
4835	sa_family_t fam;
4836	uint32_t blank_address[4] = {0, 0, 0, 0};
4837
4838	if (isrc == NULL) {
4839		ASSERT(idst == NULL);
4840		isrc = idst = blank_address;
4841	}
4842
4843	/*
4844	 * Scan list for duplicates.  Check for UNIQUE, src/dest, policy.
4845	 *
4846	 * XXX May need search for duplicates based on other things too!
4847	 */
4848	for (walker = bucket->iacqf_ipsacq; walker != NULL;
4849	    walker = walker->ipsacq_next) {
4850		mutex_enter(&walker->ipsacq_lock);
4851		fam = walker->ipsacq_addrfam;
4852		if (IPSA_ARE_ADDR_EQUAL(dst, walker->ipsacq_dstaddr, fam) &&
4853		    IPSA_ARE_ADDR_EQUAL(src, walker->ipsacq_srcaddr, fam) &&
4854		    ip_addr_match((uint8_t *)isrc, walker->ipsacq_innersrcpfx,
4855		    (in6_addr_t *)walker->ipsacq_innersrc) &&
4856		    ip_addr_match((uint8_t *)idst, walker->ipsacq_innerdstpfx,
4857		    (in6_addr_t *)walker->ipsacq_innerdst) &&
4858		    (ap == walker->ipsacq_act) &&
4859		    (pp == walker->ipsacq_policy) &&
4860		    /* XXX do deep compares of ap/pp? */
4861		    (unique_id == walker->ipsacq_unique_id) &&
4862		    (ipsec_label_match(tsl, walker->ipsacq_tsl)))
4863			break;			/* everything matched */
4864		mutex_exit(&walker->ipsacq_lock);
4865	}
4866
4867	return (walker);
4868}
4869
4870/*
4871 * Generate an SADB_ACQUIRE base message mblk, including KEYSOCK_OUT metadata.
4872 * In other words, this will return, upon success, a two-mblk chain.
4873 */
4874static inline mblk_t *
4875sadb_acquire_msg_base(minor_t serial, uint8_t satype, uint32_t seq, pid_t pid)
4876{
4877	mblk_t *mp;
4878	sadb_msg_t *samsg;
4879
4880	mp = sadb_keysock_out(serial);
4881	if (mp == NULL)
4882		return (NULL);
4883	mp->b_cont = allocb(sizeof (sadb_msg_t), BPRI_HI);
4884	if (mp->b_cont == NULL) {
4885		freeb(mp);
4886		return (NULL);
4887	}
4888
4889	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
4890	mp->b_cont->b_wptr += sizeof (*samsg);
4891	samsg->sadb_msg_version = PF_KEY_V2;
4892	samsg->sadb_msg_type = SADB_ACQUIRE;
4893	samsg->sadb_msg_errno = 0;
4894	samsg->sadb_msg_reserved = 0;
4895	samsg->sadb_msg_satype = satype;
4896	samsg->sadb_msg_seq = seq;
4897	samsg->sadb_msg_pid = pid;
4898
4899	return (mp);
4900}
4901
4902/*
4903 * Generate address and TX/MLS sensitivity label PF_KEY extensions that are
4904 * common to both regular and extended ACQUIREs.
4905 */
4906static mblk_t *
4907sadb_acquire_msg_common(ipsec_selector_t *sel, ipsec_policy_t *pp,
4908    ipsec_action_t *ap, boolean_t tunnel_mode, ts_label_t *tsl,
4909    sadb_sens_t *sens)
4910{
4911	size_t len;
4912	mblk_t *mp;
4913	uint8_t *start, *cur, *end;
4914	uint32_t *saddrptr, *daddrptr;
4915	sa_family_t af;
4916	ipsec_action_t *oldap;
4917	ipsec_selkey_t *ipsl;
4918	uint8_t proto, pfxlen;
4919	uint16_t lport, rport;
4920	int senslen = 0;
4921
4922	/*
4923	 * Get action pointer set if it isn't already.
4924	 */
4925	oldap = ap;
4926	if (pp != NULL) {
4927		ap = pp->ipsp_act;
4928		if (ap == NULL)
4929			ap = oldap;
4930	}
4931
4932	/*
4933	 * Biggest-case scenario:
4934	 * 4x (sadb_address_t + struct sockaddr_in6)
4935	 *	(src, dst, isrc, idst)
4936	 *	(COMING SOON, 6x, because of triggering-packet contents.)
4937	 * sadb_x_kmc_t
4938	 * sadb_sens_t
4939	 * And wiggle room for label bitvectors.  Luckily there are
4940	 * programmatic ways to find it.
4941	 */
4942	len = 4 * (sizeof (sadb_address_t) + sizeof (struct sockaddr_in6));
4943
4944	/* Figure out full and proper length of sensitivity labels. */
4945	if (sens != NULL) {
4946		ASSERT(tsl == NULL);
4947		senslen = SADB_64TO8(sens->sadb_sens_len);
4948	} else if (tsl != NULL) {
4949		senslen = sadb_sens_len_from_label(tsl);
4950	}
4951#ifdef DEBUG
4952	else {
4953		ASSERT(senslen == 0);
4954	}
4955#endif /* DEBUG */
4956	len += senslen;
4957
4958	mp = allocb(len, BPRI_HI);
4959	if (mp == NULL)
4960		return (NULL);
4961
4962	start = mp->b_rptr;
4963	end = start + len;
4964	cur = start;
4965
4966	/*
4967	 * Address extensions first, from most-recently-defined to least.
4968	 * (This should immediately trigger surprise or verify robustness on
4969	 * older apps, like in.iked.)
4970	 */
4971	if (tunnel_mode) {
4972		/*
4973		 * Form inner address extensions based NOT on the inner
4974		 * selectors (i.e. the packet data), but on the policy's
4975		 * selector key (i.e. the policy's selector information).
4976		 *
4977		 * NOTE:  The position of IPv4 and IPv6 addresses is the
4978		 * same in ipsec_selkey_t (unless the compiler does very
4979		 * strange things with unions, consult your local C language
4980		 * lawyer for details).
4981		 */
4982		ASSERT(pp != NULL);
4983
4984		ipsl = &(pp->ipsp_sel->ipsl_key);
4985		if (ipsl->ipsl_valid & IPSL_IPV4) {
4986			af = AF_INET;
4987			ASSERT(sel->ips_protocol == IPPROTO_ENCAP);
4988			ASSERT(!(ipsl->ipsl_valid & IPSL_IPV6));
4989		} else {
4990			af = AF_INET6;
4991			ASSERT(sel->ips_protocol == IPPROTO_IPV6);
4992			ASSERT(ipsl->ipsl_valid & IPSL_IPV6);
4993		}
4994
4995		if (ipsl->ipsl_valid & IPSL_LOCAL_ADDR) {
4996			saddrptr = (uint32_t *)(&ipsl->ipsl_local);
4997			pfxlen = ipsl->ipsl_local_pfxlen;
4998		} else {
4999			saddrptr = (uint32_t *)(&ipv6_all_zeros);
5000			pfxlen = 0;
5001		}
5002		/* XXX What about ICMP type/code? */
5003		lport = (ipsl->ipsl_valid & IPSL_LOCAL_PORT) ?
5004		    ipsl->ipsl_lport : 0;
5005		proto = (ipsl->ipsl_valid & IPSL_PROTOCOL) ?
5006		    ipsl->ipsl_proto : 0;
5007
5008		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_SRC,
5009		    af, saddrptr, lport, proto, pfxlen);
5010		if (cur == NULL) {
5011			freeb(mp);
5012			return (NULL);
5013		}
5014
5015		if (ipsl->ipsl_valid & IPSL_REMOTE_ADDR) {
5016			daddrptr = (uint32_t *)(&ipsl->ipsl_remote);
5017			pfxlen = ipsl->ipsl_remote_pfxlen;
5018		} else {
5019			daddrptr = (uint32_t *)(&ipv6_all_zeros);
5020			pfxlen = 0;
5021		}
5022		/* XXX What about ICMP type/code? */
5023		rport = (ipsl->ipsl_valid & IPSL_REMOTE_PORT) ?
5024		    ipsl->ipsl_rport : 0;
5025
5026		cur = sadb_make_addr_ext(cur, end, SADB_X_EXT_ADDRESS_INNER_DST,
5027		    af, daddrptr, rport, proto, pfxlen);
5028		if (cur == NULL) {
5029			freeb(mp);
5030			return (NULL);
5031		}
5032		/*
5033		 * TODO  - if we go to 3884's dream of transport mode IP-in-IP
5034		 * _with_ inner-packet address selectors, we'll need to further
5035		 * distinguish tunnel mode here.  For now, having inner
5036		 * addresses and/or ports is sufficient.
5037		 *
5038		 * Meanwhile, whack proto/ports to reflect IP-in-IP for the
5039		 * outer addresses.
5040		 */
5041		proto = sel->ips_protocol;	/* Either _ENCAP or _IPV6 */
5042		lport = rport = 0;
5043	} else if ((ap != NULL) && (!ap->ipa_want_unique)) {
5044		/*
5045		 * For cases when the policy calls out specific ports (or not).
5046		 */
5047		proto = 0;
5048		lport = 0;
5049		rport = 0;
5050		if (pp != NULL) {
5051			ipsl = &(pp->ipsp_sel->ipsl_key);
5052			if (ipsl->ipsl_valid & IPSL_PROTOCOL)
5053				proto = ipsl->ipsl_proto;
5054			if (ipsl->ipsl_valid & IPSL_REMOTE_PORT)
5055				rport = ipsl->ipsl_rport;
5056			if (ipsl->ipsl_valid & IPSL_LOCAL_PORT)
5057				lport = ipsl->ipsl_lport;
5058		}
5059	} else {
5060		/*
5061		 * For require-unique-SA policies.
5062		 */
5063		proto = sel->ips_protocol;
5064		lport = sel->ips_local_port;
5065		rport = sel->ips_remote_port;
5066	}
5067
5068	/*
5069	 * Regular addresses.  These are outer-packet ones for tunnel mode.
5070	 * Or for transport mode, the regulard address & port information.
5071	 */
5072	af = sel->ips_isv4 ? AF_INET : AF_INET6;
5073
5074	/*
5075	 * NOTE:  The position of IPv4 and IPv6 addresses is the same in
5076	 * ipsec_selector_t.
5077	 */
5078	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC, af,
5079	    (uint32_t *)(&sel->ips_local_addr_v6), lport, proto, 0);
5080	if (cur == NULL) {
5081		freeb(mp);
5082		return (NULL);
5083	}
5084
5085	cur = sadb_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST, af,
5086	    (uint32_t *)(&sel->ips_remote_addr_v6), rport, proto, 0);
5087	if (cur == NULL) {
5088		freeb(mp);
5089		return (NULL);
5090	}
5091
5092	/*
5093	 * If present, generate a sensitivity label.
5094	 */
5095	if (cur + senslen > end) {
5096		freeb(mp);
5097		return (NULL);
5098	}
5099	if (sens != NULL) {
5100		/* Explicit sadb_sens_t, usually from inverse-ACQUIRE. */
5101		bcopy(sens, cur, senslen);
5102	} else if (tsl != NULL) {
5103		/* Generate sadb_sens_t from ACQUIRE source. */
5104		sadb_sens_from_label((sadb_sens_t *)cur, SADB_EXT_SENSITIVITY,
5105		    tsl, senslen);
5106	}
5107#ifdef DEBUG
5108	else {
5109		ASSERT(senslen == 0);
5110	}
5111#endif /* DEBUG */
5112	cur += senslen;
5113	mp->b_wptr = cur;
5114
5115	return (mp);
5116}
5117
5118/*
5119 * Generate a regular ACQUIRE's proposal extension and KMC information..
5120 */
5121static mblk_t *
5122sadb_acquire_prop(ipsec_action_t *ap, netstack_t *ns, boolean_t do_esp)
5123{
5124	ipsec_stack_t *ipss = ns->netstack_ipsec;
5125	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
5126	ipsecah_stack_t *ahstack = ns->netstack_ipsecah;
5127	mblk_t *mp = NULL;
5128	sadb_prop_t *prop;
5129	sadb_comb_t *comb;
5130	ipsec_action_t *walker;
5131	int ncombs, allocsize, ealgid, aalgid, aminbits, amaxbits, eminbits,
5132	    emaxbits, esaltlen, replay;
5133	uint64_t softbytes, hardbytes, softaddtime, hardaddtime, softusetime,
5134	    hardusetime;
5135	uint64_t kmc = 0;
5136	uint32_t kmp = 0;
5137
5138	/*
5139	 * Since it's an rwlock read, AND writing to the IPsec algorithms is
5140	 * rare, just acquire it once up top, and drop it upon return.
5141	 */
5142	rw_enter(&ipss->ipsec_alg_lock, RW_READER);
5143	if (do_esp) {
5144		uint64_t num_aalgs, num_ealgs;
5145
5146		if (espstack->esp_kstats == NULL)
5147			goto bail;
5148
5149		num_aalgs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
5150		num_ealgs = ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
5151		if (num_ealgs == 0)
5152			goto bail;	/* IPsec not loaded yet, apparently. */
5153		num_aalgs++;	/* No-auth or self-auth-crypto ESP. */
5154
5155		/* Use netstack's maximum loaded algorithms... */
5156		ncombs = num_ealgs * num_aalgs;
5157		replay =  espstack->ipsecesp_replay_size;
5158	} else {
5159		if (ahstack->ah_kstats == NULL)
5160			goto bail;
5161
5162		ncombs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
5163
5164		if (ncombs == 0)
5165			goto bail;	/* IPsec not loaded yet, apparently. */
5166		replay =  ahstack->ipsecah_replay_size;
5167	}
5168
5169	allocsize = sizeof (*prop) + ncombs * sizeof (*comb) +
5170	    sizeof (sadb_x_kmc_t);
5171	mp = allocb(allocsize, BPRI_HI);
5172	if (mp == NULL)
5173		goto bail;
5174	prop = (sadb_prop_t *)mp->b_rptr;
5175	mp->b_wptr += sizeof (*prop);
5176	comb = (sadb_comb_t *)mp->b_wptr;
5177	/* Decrement allocsize, if it goes to or below 0, stop. */
5178	allocsize -= sizeof (*prop);
5179	prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
5180	prop->sadb_prop_len = SADB_8TO64(sizeof (*prop));
5181	*(uint32_t *)(&prop->sadb_prop_replay) = 0;	/* Quick zero-out! */
5182	prop->sadb_prop_replay = replay;
5183
5184	/*
5185	 * Based upon algorithm properties, and what-not, prioritize a
5186	 * proposal, based on the ordering of the ESP algorithms in the
5187	 * alternatives in the policy rule or socket that was placed
5188	 * in the acquire record.
5189	 *
5190	 * For each action in policy list
5191	 *   Add combination.
5192	 *   I should not hit it, but if I've hit limit, return.
5193	 */
5194
5195	for (walker = ap; walker != NULL; walker = walker->ipa_next) {
5196		ipsec_alginfo_t *ealg, *aalg;
5197		ipsec_prot_t *prot;
5198
5199		if (walker->ipa_act.ipa_type != IPSEC_POLICY_APPLY)
5200			continue;
5201
5202		prot = &walker->ipa_act.ipa_apply;
5203		if (walker->ipa_act.ipa_apply.ipp_km_proto != 0)
5204			kmp = walker->ipa_act.ipa_apply.ipp_km_proto;
5205		if (walker->ipa_act.ipa_apply.ipp_km_cookie != 0)
5206			kmc = walker->ipa_act.ipa_apply.ipp_km_cookie;
5207		if (walker->ipa_act.ipa_apply.ipp_replay_depth) {
5208			prop->sadb_prop_replay =
5209			    walker->ipa_act.ipa_apply.ipp_replay_depth;
5210		}
5211
5212		if (do_esp) {
5213			if (!prot->ipp_use_esp)
5214				continue;
5215
5216			if (prot->ipp_esp_auth_alg != 0) {
5217				aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
5218				    [prot->ipp_esp_auth_alg];
5219				if (aalg == NULL || !ALG_VALID(aalg))
5220					continue;
5221			} else
5222				aalg = NULL;
5223
5224			ASSERT(prot->ipp_encr_alg > 0);
5225			ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
5226			    [prot->ipp_encr_alg];
5227			if (ealg == NULL || !ALG_VALID(ealg))
5228				continue;
5229
5230			/*
5231			 * These may want to come from policy rule..
5232			 */
5233			softbytes = espstack->ipsecesp_default_soft_bytes;
5234			hardbytes = espstack->ipsecesp_default_hard_bytes;
5235			softaddtime = espstack->ipsecesp_default_soft_addtime;
5236			hardaddtime = espstack->ipsecesp_default_hard_addtime;
5237			softusetime = espstack->ipsecesp_default_soft_usetime;
5238			hardusetime = espstack->ipsecesp_default_hard_usetime;
5239		} else {
5240			if (!prot->ipp_use_ah)
5241				continue;
5242			ealg = NULL;
5243			aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
5244			    [prot->ipp_auth_alg];
5245			if (aalg == NULL || !ALG_VALID(aalg))
5246				continue;
5247
5248			/*
5249			 * These may want to come from policy rule..
5250			 */
5251			softbytes = ahstack->ipsecah_default_soft_bytes;
5252			hardbytes = ahstack->ipsecah_default_hard_bytes;
5253			softaddtime = ahstack->ipsecah_default_soft_addtime;
5254			hardaddtime = ahstack->ipsecah_default_hard_addtime;
5255			softusetime = ahstack->ipsecah_default_soft_usetime;
5256			hardusetime = ahstack->ipsecah_default_hard_usetime;
5257		}
5258
5259		if (ealg == NULL) {
5260			ealgid = eminbits = emaxbits = esaltlen = 0;
5261		} else {
5262			ealgid = ealg->alg_id;
5263			eminbits =
5264			    MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits);
5265			emaxbits =
5266			    MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits);
5267			esaltlen = ealg->alg_saltlen;
5268		}
5269
5270		if (aalg == NULL) {
5271			aalgid = aminbits = amaxbits = 0;
5272		} else {
5273			aalgid = aalg->alg_id;
5274			aminbits = MAX(prot->ipp_espa_minbits,
5275			    aalg->alg_ef_minbits);
5276			amaxbits = MIN(prot->ipp_espa_maxbits,
5277			    aalg->alg_ef_maxbits);
5278		}
5279
5280		comb->sadb_comb_flags = 0;
5281		comb->sadb_comb_reserved = 0;
5282		comb->sadb_comb_encrypt = ealgid;
5283		comb->sadb_comb_encrypt_minbits = eminbits;
5284		comb->sadb_comb_encrypt_maxbits = emaxbits;
5285		comb->sadb_x_comb_encrypt_saltbits = SADB_8TO1(esaltlen);
5286		comb->sadb_comb_auth = aalgid;
5287		comb->sadb_comb_auth_minbits = aminbits;
5288		comb->sadb_comb_auth_maxbits = amaxbits;
5289		comb->sadb_comb_soft_allocations = 0;
5290		comb->sadb_comb_hard_allocations = 0;
5291		comb->sadb_comb_soft_bytes = softbytes;
5292		comb->sadb_comb_hard_bytes = hardbytes;
5293		comb->sadb_comb_soft_addtime = softaddtime;
5294		comb->sadb_comb_hard_addtime = hardaddtime;
5295		comb->sadb_comb_soft_usetime = softusetime;
5296		comb->sadb_comb_hard_usetime = hardusetime;
5297
5298		prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
5299		mp->b_wptr += sizeof (*comb);
5300		allocsize -= sizeof (*comb);
5301		/* Should never dip BELOW sizeof (KM cookie extension). */
5302		ASSERT3S(allocsize, >=, sizeof (sadb_x_kmc_t));
5303		if (allocsize <= sizeof (sadb_x_kmc_t))
5304			break;	/* out of space.. */
5305		comb++;
5306	}
5307
5308	/* Don't include KMC extension if there's no room. */
5309	if (((kmp != 0) || (kmc != 0)) && allocsize >= sizeof (sadb_x_kmc_t)) {
5310		if (sadb_make_kmc_ext(mp->b_wptr,
5311		    mp->b_wptr + sizeof (sadb_x_kmc_t), kmp, kmc) == NULL) {
5312			freeb(mp);
5313			mp = NULL;
5314			goto bail;
5315		}
5316		mp->b_wptr += sizeof (sadb_x_kmc_t);
5317		prop->sadb_prop_len += SADB_8TO64(sizeof (sadb_x_kmc_t));
5318	}
5319
5320bail:
5321	rw_exit(&ipss->ipsec_alg_lock);
5322	return (mp);
5323}
5324
5325/*
5326 * Generate an extended ACQUIRE's extended-proposal extension.
5327 */
5328static mblk_t *
5329sadb_acquire_extended_prop(ipsec_action_t *ap, netstack_t *ns)
5330{
5331	sadb_prop_t *eprop;
5332	uint8_t *cur, *end;
5333	mblk_t *mp;
5334	int allocsize, numecombs = 0, numalgdescs = 0;
5335	uint32_t kmp = 0, replay = 0;
5336	uint64_t kmc = 0;
5337	ipsec_action_t *walker;
5338
5339	allocsize = sizeof (*eprop);
5340
5341	/*
5342	 * Going to walk through the action list twice.  Once for allocation
5343	 * measurement, and once for actual construction.
5344	 */
5345	for (walker = ap; walker != NULL; walker = walker->ipa_next) {
5346		ipsec_prot_t *ipp;
5347
5348		/*
5349		 * Skip non-IPsec policies
5350		 */
5351		if (walker->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5352			continue;
5353
5354		ipp = &walker->ipa_act.ipa_apply;
5355
5356		if (walker->ipa_act.ipa_apply.ipp_km_proto)
5357			kmp = ipp->ipp_km_proto;
5358		if (walker->ipa_act.ipa_apply.ipp_km_cookie)
5359			kmc = ipp->ipp_km_cookie;
5360		if (walker->ipa_act.ipa_apply.ipp_replay_depth)
5361			replay = ipp->ipp_replay_depth;
5362
5363		if (ipp->ipp_use_ah)
5364			numalgdescs++;
5365		if (ipp->ipp_use_esp) {
5366			numalgdescs++;
5367			if (ipp->ipp_use_espa)
5368				numalgdescs++;
5369		}
5370
5371		numecombs++;
5372	}
5373	ASSERT(numecombs > 0);
5374
5375	allocsize += numecombs * sizeof (sadb_x_ecomb_t) +
5376	    numalgdescs * sizeof (sadb_x_algdesc_t) + sizeof (sadb_x_kmc_t);
5377	mp = allocb(allocsize, BPRI_HI);
5378	if (mp == NULL)
5379		return (NULL);
5380	eprop = (sadb_prop_t *)mp->b_rptr;
5381	end = mp->b_rptr + allocsize;
5382	cur = mp->b_rptr + sizeof (*eprop);
5383
5384	eprop->sadb_prop_exttype = SADB_X_EXT_EPROP;
5385	eprop->sadb_x_prop_ereserved = 0;
5386	eprop->sadb_x_prop_numecombs = 0;
5387	*(uint32_t *)(&eprop->sadb_prop_replay) = 0;	/* Quick zero-out! */
5388	/* Pick ESP's replay default if need be. */
5389	eprop->sadb_prop_replay = (replay == 0) ?
5390	    ns->netstack_ipsecesp->ipsecesp_replay_size : replay;
5391
5392	/* This time, walk through and actually allocate. */
5393	for (walker = ap; walker != NULL; walker = walker->ipa_next) {
5394		/*
5395		 * Skip non-IPsec policies
5396		 */
5397		if (walker->ipa_act.ipa_type != IPSEC_ACT_APPLY)
5398			continue;
5399		cur = sadb_action_to_ecomb(cur, end, walker, ns);
5400		if (cur == NULL) {
5401			/* NOTE: inverse-ACQUIRE should note this as ENOMEM. */
5402			freeb(mp);
5403			return (NULL);
5404		}
5405		eprop->sadb_x_prop_numecombs++;
5406	}
5407
5408	ASSERT(end - cur >= sizeof (sadb_x_kmc_t));
5409	if ((kmp != 0) || (kmc != 0)) {
5410		cur = sadb_make_kmc_ext(cur, end, kmp, kmc);
5411		if (cur == NULL) {
5412			freeb(mp);
5413			return (NULL);
5414		}
5415	}
5416	mp->b_wptr = cur;
5417	eprop->sadb_prop_len = SADB_8TO64(cur - mp->b_rptr);
5418
5419	return (mp);
5420}
5421
5422/*
5423 * For this mblk, insert a new acquire record.  Assume bucket contains addrs
5424 * of all of the same length.  Give up (and drop) if memory
5425 * cannot be allocated for a new one; otherwise, invoke callback to
5426 * send the acquire up..
5427 *
5428 * In cases where we need both AH and ESP, add the SA to the ESP ACQUIRE
5429 * list.  The ah_add_sa_finish() routines can look at the packet's attached
5430 * attributes and handle this case specially.
5431 */
5432void
5433sadb_acquire(mblk_t *datamp, ip_xmit_attr_t *ixa, boolean_t need_ah,
5434    boolean_t need_esp)
5435{
5436	mblk_t	*asyncmp, *regular, *extended, *common, *prop, *eprop;
5437	sadbp_t *spp;
5438	sadb_t *sp;
5439	ipsacq_t *newbie;
5440	iacqf_t *bucket;
5441	ipha_t *ipha = (ipha_t *)datamp->b_rptr;
5442	ip6_t *ip6h = (ip6_t *)datamp->b_rptr;
5443	uint32_t *src, *dst, *isrc, *idst;
5444	ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
5445	ipsec_action_t *ap = ixa->ixa_ipsec_action;
5446	sa_family_t af;
5447	int hashoffset;
5448	uint32_t seq;
5449	uint64_t unique_id = 0;
5450	boolean_t tunnel_mode = (ixa->ixa_flags & IXAF_IPSEC_TUNNEL) != 0;
5451	ts_label_t	*tsl;
5452	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
5453	ipsec_stack_t	*ipss = ns->netstack_ipsec;
5454	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
5455	ipsecah_stack_t	*ahstack = ns->netstack_ipsecah;
5456	ipsec_selector_t sel;
5457	queue_t *q;
5458
5459	ASSERT((pp != NULL) || (ap != NULL));
5460
5461	ASSERT(need_ah || need_esp);
5462
5463	/* Assign sadb pointers */
5464	if (need_esp) {
5465		/*
5466		 * ESP happens first if we need both AH and ESP.
5467		 */
5468		spp = &espstack->esp_sadb;
5469	} else {
5470		spp = &ahstack->ah_sadb;
5471	}
5472	sp = (ixa->ixa_flags & IXAF_IS_IPV4) ? &spp->s_v4 : &spp->s_v6;
5473
5474	if (is_system_labeled())
5475		tsl = ixa->ixa_tsl;
5476	else
5477		tsl = NULL;
5478
5479	if (ap == NULL)
5480		ap = pp->ipsp_act;
5481	ASSERT(ap != NULL);
5482
5483	if (ap->ipa_act.ipa_apply.ipp_use_unique || tunnel_mode)
5484		unique_id = SA_FORM_UNIQUE_ID(ixa);
5485
5486	/*
5487	 * Set up an ACQUIRE record.
5488	 *
5489	 * Immediately, make sure the ACQUIRE sequence number doesn't slip
5490	 * below the lowest point allowed in the kernel.  (In other words,
5491	 * make sure the high bit on the sequence number is set.)
5492	 */
5493
5494	seq = keysock_next_seq(ns) | IACQF_LOWEST_SEQ;
5495
5496	if (IPH_HDR_VERSION(ipha) == IP_VERSION) {
5497		src = (uint32_t *)&ipha->ipha_src;
5498		dst = (uint32_t *)&ipha->ipha_dst;
5499		af = AF_INET;
5500		hashoffset = OUTBOUND_HASH_V4(sp, ipha->ipha_dst);
5501		ASSERT(ixa->ixa_flags & IXAF_IS_IPV4);
5502	} else {
5503		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
5504		src = (uint32_t *)&ip6h->ip6_src;
5505		dst = (uint32_t *)&ip6h->ip6_dst;
5506		af = AF_INET6;
5507		hashoffset = OUTBOUND_HASH_V6(sp, ip6h->ip6_dst);
5508		ASSERT(!(ixa->ixa_flags & IXAF_IS_IPV4));
5509	}
5510
5511	if (tunnel_mode) {
5512		if (pp == NULL) {
5513			/*
5514			 * Tunnel mode with no policy pointer means this is a
5515			 * reflected ICMP (like a ECHO REQUEST) that came in
5516			 * with self-encapsulated protection.  Until we better
5517			 * support this, drop the packet.
5518			 */
5519			ip_drop_packet(datamp, B_FALSE, NULL,
5520			    DROPPER(ipss, ipds_spd_got_selfencap),
5521			    &ipss->ipsec_spd_dropper);
5522			return;
5523		}
5524		/* Snag inner addresses. */
5525		isrc = ixa->ixa_ipsec_insrc;
5526		idst = ixa->ixa_ipsec_indst;
5527	} else {
5528		isrc = idst = NULL;
5529	}
5530
5531	/*
5532	 * Check buckets to see if there is an existing entry.  If so,
5533	 * grab it.  sadb_checkacquire locks newbie if found.
5534	 */
5535	bucket = &(sp->sdb_acq[hashoffset]);
5536	mutex_enter(&bucket->iacqf_lock);
5537	newbie = sadb_checkacquire(bucket, ap, pp, src, dst, isrc, idst,
5538	    unique_id, tsl);
5539
5540	if (newbie == NULL) {
5541		/*
5542		 * Otherwise, allocate a new one.
5543		 */
5544		newbie = kmem_zalloc(sizeof (*newbie), KM_NOSLEEP);
5545		if (newbie == NULL) {
5546			mutex_exit(&bucket->iacqf_lock);
5547			ip_drop_packet(datamp, B_FALSE, NULL,
5548			    DROPPER(ipss, ipds_sadb_acquire_nomem),
5549			    &ipss->ipsec_sadb_dropper);
5550			return;
5551		}
5552		newbie->ipsacq_policy = pp;
5553		if (pp != NULL) {
5554			IPPOL_REFHOLD(pp);
5555		}
5556		IPACT_REFHOLD(ap);
5557		newbie->ipsacq_act = ap;
5558		newbie->ipsacq_linklock = &bucket->iacqf_lock;
5559		newbie->ipsacq_next = bucket->iacqf_ipsacq;
5560		newbie->ipsacq_ptpn = &bucket->iacqf_ipsacq;
5561		if (newbie->ipsacq_next != NULL)
5562			newbie->ipsacq_next->ipsacq_ptpn = &newbie->ipsacq_next;
5563
5564		bucket->iacqf_ipsacq = newbie;
5565		mutex_init(&newbie->ipsacq_lock, NULL, MUTEX_DEFAULT, NULL);
5566		mutex_enter(&newbie->ipsacq_lock);
5567	}
5568
5569	/*
5570	 * XXX MLS does it actually help us to drop the bucket lock here?
5571	 * we have inserted a half-built, locked acquire record into the
5572	 * bucket.  any competing thread will now be able to lock the bucket
5573	 * to scan it, but will immediately pile up on the new acquire
5574	 * record's lock; I don't think we gain anything here other than to
5575	 * disperse blame for lock contention.
5576	 *
5577	 * we might be able to dispense with acquire record locks entirely..
5578	 * just use the bucket locks..
5579	 */
5580
5581	mutex_exit(&bucket->iacqf_lock);
5582
5583	/*
5584	 * This assert looks silly for now, but we may need to enter newbie's
5585	 * mutex during a search.
5586	 */
5587	ASSERT(MUTEX_HELD(&newbie->ipsacq_lock));
5588
5589	/*
5590	 * Make the ip_xmit_attr_t into something we can queue.
5591	 * If no memory it frees datamp.
5592	 */
5593	asyncmp = ip_xmit_attr_to_mblk(ixa);
5594	if (asyncmp != NULL)
5595		linkb(asyncmp, datamp);
5596
5597	/* Queue up packet.  Use b_next. */
5598
5599	if (asyncmp == NULL) {
5600		/* Statistics for allocation failure */
5601		if (ixa->ixa_flags & IXAF_IS_IPV4) {
5602			BUMP_MIB(&ixa->ixa_ipst->ips_ip_mib,
5603			    ipIfStatsOutDiscards);
5604		} else {
5605			BUMP_MIB(&ixa->ixa_ipst->ips_ip6_mib,
5606			    ipIfStatsOutDiscards);
5607		}
5608		ip_drop_output("No memory for asyncmp", datamp, NULL);
5609		freemsg(datamp);
5610		/*
5611		 * The acquire record will be freed quickly if it's new
5612		 * (ipsacq_expire == 0), and will proceed as if no packet
5613		 * showed up if not.
5614		 */
5615		mutex_exit(&newbie->ipsacq_lock);
5616		return;
5617	} else if (newbie->ipsacq_numpackets == 0) {
5618		/* First one. */
5619		newbie->ipsacq_mp = asyncmp;
5620		newbie->ipsacq_numpackets = 1;
5621		newbie->ipsacq_expire = gethrestime_sec();
5622		/*
5623		 * Extended ACQUIRE with both AH+ESP will use ESP's timeout
5624		 * value.
5625		 */
5626		newbie->ipsacq_expire += *spp->s_acquire_timeout;
5627		newbie->ipsacq_seq = seq;
5628		newbie->ipsacq_addrfam = af;
5629
5630		newbie->ipsacq_srcport = ixa->ixa_ipsec_src_port;
5631		newbie->ipsacq_dstport = ixa->ixa_ipsec_dst_port;
5632		newbie->ipsacq_icmp_type = ixa->ixa_ipsec_icmp_type;
5633		newbie->ipsacq_icmp_code = ixa->ixa_ipsec_icmp_code;
5634		if (tunnel_mode) {
5635			newbie->ipsacq_inneraddrfam = ixa->ixa_ipsec_inaf;
5636			newbie->ipsacq_proto = ixa->ixa_ipsec_inaf == AF_INET6 ?
5637			    IPPROTO_IPV6 : IPPROTO_ENCAP;
5638			newbie->ipsacq_innersrcpfx = ixa->ixa_ipsec_insrcpfx;
5639			newbie->ipsacq_innerdstpfx = ixa->ixa_ipsec_indstpfx;
5640			IPSA_COPY_ADDR(newbie->ipsacq_innersrc,
5641			    ixa->ixa_ipsec_insrc, ixa->ixa_ipsec_inaf);
5642			IPSA_COPY_ADDR(newbie->ipsacq_innerdst,
5643			    ixa->ixa_ipsec_indst, ixa->ixa_ipsec_inaf);
5644		} else {
5645			newbie->ipsacq_proto = ixa->ixa_ipsec_proto;
5646		}
5647		newbie->ipsacq_unique_id = unique_id;
5648
5649		if (tsl != NULL) {
5650			label_hold(tsl);
5651			newbie->ipsacq_tsl = tsl;
5652		}
5653	} else {
5654		/* Scan to the end of the list & insert. */
5655		mblk_t *lastone = newbie->ipsacq_mp;
5656
5657		while (lastone->b_next != NULL)
5658			lastone = lastone->b_next;
5659		lastone->b_next = asyncmp;
5660		if (newbie->ipsacq_numpackets++ == ipsacq_maxpackets) {
5661			newbie->ipsacq_numpackets = ipsacq_maxpackets;
5662			lastone = newbie->ipsacq_mp;
5663			newbie->ipsacq_mp = lastone->b_next;
5664			lastone->b_next = NULL;
5665
5666			/* Freeing the async message */
5667			lastone = ip_xmit_attr_free_mblk(lastone);
5668			ip_drop_packet(lastone, B_FALSE, NULL,
5669			    DROPPER(ipss, ipds_sadb_acquire_toofull),
5670			    &ipss->ipsec_sadb_dropper);
5671		} else {
5672			IP_ACQUIRE_STAT(ipss, qhiwater,
5673			    newbie->ipsacq_numpackets);
5674		}
5675	}
5676
5677	/*
5678	 * Reset addresses.  Set them to the most recently added mblk chain,
5679	 * so that the address pointers in the acquire record will point
5680	 * at an mblk still attached to the acquire list.
5681	 */
5682
5683	newbie->ipsacq_srcaddr = src;
5684	newbie->ipsacq_dstaddr = dst;
5685
5686	/*
5687	 * If the acquire record has more than one queued packet, we've
5688	 * already sent an ACQUIRE, and don't need to repeat ourself.
5689	 */
5690	if (newbie->ipsacq_seq != seq || newbie->ipsacq_numpackets > 1) {
5691		/* I have an acquire outstanding already! */
5692		mutex_exit(&newbie->ipsacq_lock);
5693		return;
5694	}
5695
5696	if (need_esp) {
5697		ESP_BUMP_STAT(espstack, acquire_requests);
5698		q = espstack->esp_pfkey_q;
5699	} else {
5700		/*
5701		 * Two cases get us here:
5702		 * 1.) AH-only policy.
5703		 *
5704		 * 2.) A continuation of an AH+ESP policy, and this is the
5705		 * post-ESP, AH-needs-to-send-a-regular-ACQUIRE case.
5706		 * (i.e. called from esp_do_outbound_ah().)
5707		 */
5708		AH_BUMP_STAT(ahstack, acquire_requests);
5709		q = ahstack->ah_pfkey_q;
5710	}
5711
5712	/*
5713	 * Get selectors and other policy-expression bits needed for an
5714	 * ACQUIRE.
5715	 */
5716	bzero(&sel, sizeof (sel));
5717	sel.ips_isv4 = (ixa->ixa_flags & IXAF_IS_IPV4) != 0;
5718	if (tunnel_mode) {
5719		sel.ips_protocol = (ixa->ixa_ipsec_inaf == AF_INET) ?
5720		    IPPROTO_ENCAP : IPPROTO_IPV6;
5721	} else {
5722		sel.ips_protocol = ixa->ixa_ipsec_proto;
5723		sel.ips_local_port = ixa->