1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25/*
26 * Copyright 2017 Joyent, Inc.
27 */
28
29#include <sys/param.h>
30#include <sys/types.h>
31#include <sys/stream.h>
32#include <sys/strsubr.h>
33#include <sys/strsun.h>
34#include <sys/stropts.h>
35#include <sys/vnode.h>
36#include <sys/zone.h>
37#include <sys/strlog.h>
38#include <sys/sysmacros.h>
39#define	_SUN_TPI_VERSION 2
40#include <sys/tihdr.h>
41#include <sys/timod.h>
42#include <sys/tiuser.h>
43#include <sys/ddi.h>
44#include <sys/sunddi.h>
45#include <sys/sunldi.h>
46#include <sys/file.h>
47#include <sys/modctl.h>
48#include <sys/debug.h>
49#include <sys/kmem.h>
50#include <sys/cmn_err.h>
51#include <sys/proc.h>
52#include <sys/suntpi.h>
53#include <sys/atomic.h>
54#include <sys/mkdev.h>
55#include <sys/policy.h>
56#include <sys/disp.h>
57
58#include <sys/socket.h>
59#include <netinet/in.h>
60#include <net/pfkeyv2.h>
61
62#include <inet/common.h>
63#include <netinet/ip6.h>
64#include <inet/ip.h>
65#include <inet/proto_set.h>
66#include <inet/nd.h>
67#include <inet/optcom.h>
68#include <inet/ipsec_info.h>
69#include <inet/ipsec_impl.h>
70#include <inet/keysock.h>
71
72#include <sys/isa_defs.h>
73
74/*
75 * This is a transport provider for the PF_KEY key mangement socket.
76 * (See RFC 2367 for details.)
77 * Downstream messages are wrapped in a keysock consumer interface KEYSOCK_IN
78 * messages (see ipsec_info.h), and passed to the appropriate consumer.
79 * Upstream messages are generated for all open PF_KEY sockets, when
80 * appropriate, as well as the sender (as long as SO_USELOOPBACK is enabled)
81 * in reply to downstream messages.
82 *
83 * Upstream messages must be created asynchronously for the following
84 * situations:
85 *
86 *	1.) A keysock consumer requires an SA, and there is currently none.
87 *	2.) An SA expires, either hard or soft lifetime.
88 *	3.) Other events a consumer deems fit.
89 *
90 * The MT model of this is PERMOD, with shared put procedures.  Two types of
91 * messages, SADB_FLUSH and SADB_DUMP, need to lock down the perimeter to send
92 * down the *multiple* messages they create.
93 */
94
95static vmem_t *keysock_vmem;		/* for minor numbers. */
96
97#define	KEYSOCK_MAX_CONSUMERS 256
98
99/* Default structure copied into T_INFO_ACK messages (from rts.c...) */
100static struct T_info_ack keysock_g_t_info_ack = {
101	T_INFO_ACK,
102	T_INFINITE,	/* TSDU_size. Maximum size messages. */
103	T_INVALID,	/* ETSDU_size. No expedited data. */
104	T_INVALID,	/* CDATA_size. No connect data. */
105	T_INVALID,	/* DDATA_size. No disconnect data. */
106	0,		/* ADDR_size. */
107	0,		/* OPT_size. No user-settable options */
108	64 * 1024,	/* TIDU_size. keysock allows maximum size messages. */
109	T_COTS,		/* SERV_type. keysock supports connection oriented. */
110	TS_UNBND,	/* CURRENT_state. This is set from keysock_state. */
111	(XPG4_1)	/* Provider flags */
112};
113
114/* Named Dispatch Parameter Management Structure */
115typedef struct keysockparam_s {
116	uint_t	keysock_param_min;
117	uint_t	keysock_param_max;
118	uint_t	keysock_param_value;
119	char	*keysock_param_name;
120} keysockparam_t;
121
122/*
123 * Table of NDD variables supported by keysock. These are loaded into
124 * keysock_g_nd in keysock_init_nd.
125 * All of these are alterable, within the min/max values given, at run time.
126 */
127static	keysockparam_t	lcl_param_arr[] = {
128	/* min	max	value	name */
129	{ 4096, 65536,	8192,	"keysock_xmit_hiwat"},
130	{ 0,	65536,	1024,	"keysock_xmit_lowat"},
131	{ 4096, 65536,	8192,	"keysock_recv_hiwat"},
132	{ 65536, 1024*1024*1024, 256*1024,	"keysock_max_buf"},
133	{ 0,	3,	0,	"keysock_debug"},
134};
135#define	keystack_xmit_hiwat	keystack_params[0].keysock_param_value
136#define	keystack_xmit_lowat	keystack_params[1].keysock_param_value
137#define	keystack_recv_hiwat	keystack_params[2].keysock_param_value
138#define	keystack_max_buf	keystack_params[3].keysock_param_value
139#define	keystack_debug	keystack_params[4].keysock_param_value
140
141#define	ks0dbg(a)	printf a
142/* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
143#define	ks1dbg(keystack, a)	if (keystack->keystack_debug != 0) printf a
144#define	ks2dbg(keystack, a)	if (keystack->keystack_debug > 1) printf a
145#define	ks3dbg(keystack, a)	if (keystack->keystack_debug > 2) printf a
146
147static int keysock_close(queue_t *, int, cred_t *);
148static int keysock_open(queue_t *, dev_t *, int, int, cred_t *);
149static int keysock_wput(queue_t *, mblk_t *);
150static int keysock_rput(queue_t *, mblk_t *);
151static int keysock_rsrv(queue_t *);
152static void keysock_passup(mblk_t *, sadb_msg_t *, minor_t,
153    keysock_consumer_t *, boolean_t, keysock_stack_t *);
154static void *keysock_stack_init(netstackid_t stackid, netstack_t *ns);
155static void keysock_stack_fini(netstackid_t stackid, void *arg);
156
157static struct module_info info = {
158	5138, "keysock", 1, INFPSZ, 512, 128
159};
160
161static struct qinit rinit = {
162	keysock_rput, keysock_rsrv, keysock_open, keysock_close,
163	NULL, &info
164};
165
166static struct qinit winit = {
167	keysock_wput, NULL, NULL, NULL, NULL, &info
168};
169
170struct streamtab keysockinfo = {
171	&rinit, &winit
172};
173
174extern struct modlinkage *keysock_modlp;
175
176/*
177 * Plumb IPsec.
178 *
179 * NOTE:  New "default" modules will need to be loaded here if needed before
180 *	  boot time.
181 */
182
183/* Keep these in global space to keep the lint from complaining. */
184static char *IPSECESP = "ipsecesp";
185static char *IPSECESPDEV = "/devices/pseudo/ipsecesp@0:ipsecesp";
186static char *IPSECAH = "ipsecah";
187static char *IPSECAHDEV = "/devices/pseudo/ipsecah@0:ipsecah";
188static char *IP6DEV = "/devices/pseudo/ip6@0:ip6";
189static char *KEYSOCK = "keysock";
190static char *STRMOD = "strmod";
191
192/*
193 * Load the other ipsec modules and plumb them together.
194 */
195int
196keysock_plumb_ipsec(netstack_t *ns)
197{
198	ldi_handle_t	lh, ip6_lh = NULL;
199	ldi_ident_t	li = NULL;
200	int		err = 0;
201	int		muxid, rval;
202	boolean_t	esp_present = B_TRUE;
203	cred_t		*cr;
204	keysock_stack_t *keystack = ns->netstack_keysock;
205
206#ifdef NS_DEBUG
207	(void) printf("keysock_plumb_ipsec(%d)\n",
208	    ns->netstack_stackid);
209#endif
210
211	keystack->keystack_plumbed = 0;	/* we're trying again.. */
212
213	cr = zone_get_kcred(netstackid_to_zoneid(
214	    keystack->keystack_netstack->netstack_stackid));
215	ASSERT(cr != NULL);
216	/*
217	 * Load up the drivers (AH/ESP).
218	 *
219	 * I do this separately from the actual plumbing in case this function
220	 * ever gets called from a diskless boot before the root filesystem is
221	 * up.  I don't have to worry about "keysock" because, well, if I'm
222	 * here, keysock must've loaded successfully.
223	 */
224	if (i_ddi_attach_pseudo_node(IPSECAH) == NULL) {
225		ks0dbg(("IPsec:  AH failed to attach.\n"));
226		goto bail;
227	}
228	if (i_ddi_attach_pseudo_node(IPSECESP) == NULL) {
229		ks0dbg(("IPsec:  ESP failed to attach.\n"));
230		esp_present = B_FALSE;
231	}
232
233	/*
234	 * Set up the IP streams for AH and ESP, as well as tacking keysock
235	 * on top of them.  Assume keysock has set the autopushes up already.
236	 */
237
238	/* Open IP. */
239	err = ldi_ident_from_mod(keysock_modlp, &li);
240	if (err) {
241		ks0dbg(("IPsec:  lid_ident_from_mod failed (err %d).\n",
242		    err));
243		goto bail;
244	}
245
246	err = ldi_open_by_name(IP6DEV, FREAD|FWRITE, cr, &ip6_lh, li);
247	if (err) {
248		ks0dbg(("IPsec:  Open of IP6 failed (err %d).\n", err));
249		goto bail;
250	}
251
252	/* PLINK KEYSOCK/AH */
253	err = ldi_open_by_name(IPSECAHDEV, FREAD|FWRITE, cr, &lh, li);
254	if (err) {
255		ks0dbg(("IPsec:  Open of AH failed (err %d).\n", err));
256		goto bail;
257	}
258	err = ldi_ioctl(lh,
259	    I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, cr, &rval);
260	if (err) {
261		ks0dbg(("IPsec:  Push of KEYSOCK onto AH failed (err %d).\n",
262		    err));
263		(void) ldi_close(lh, FREAD|FWRITE, cr);
264		goto bail;
265	}
266	err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh,
267	    FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid);
268	if (err) {
269		ks0dbg(("IPsec:  PLINK of KEYSOCK/AH failed (err %d).\n", err));
270		(void) ldi_close(lh, FREAD|FWRITE, cr);
271		goto bail;
272	}
273	(void) ldi_close(lh, FREAD|FWRITE, cr);
274
275	/* PLINK KEYSOCK/ESP */
276	if (esp_present) {
277		err = ldi_open_by_name(IPSECESPDEV,
278		    FREAD|FWRITE, cr, &lh, li);
279		if (err) {
280			ks0dbg(("IPsec:  Open of ESP failed (err %d).\n", err));
281			goto bail;
282		}
283		err = ldi_ioctl(lh,
284		    I_PUSH, (intptr_t)KEYSOCK, FKIOCTL, cr, &rval);
285		if (err) {
286			ks0dbg(("IPsec:  "
287			    "Push of KEYSOCK onto ESP failed (err %d).\n",
288			    err));
289			(void) ldi_close(lh, FREAD|FWRITE, cr);
290			goto bail;
291		}
292		err = ldi_ioctl(ip6_lh, I_PLINK, (intptr_t)lh,
293		    FREAD+FWRITE+FNOCTTY+FKIOCTL, cr, &muxid);
294		if (err) {
295			ks0dbg(("IPsec:  "
296			    "PLINK of KEYSOCK/ESP failed (err %d).\n", err));
297			(void) ldi_close(lh, FREAD|FWRITE, cr);
298			goto bail;
299		}
300		(void) ldi_close(lh, FREAD|FWRITE, cr);
301	}
302
303bail:
304	keystack->keystack_plumbed = (err == 0) ? 1 : -1;
305	if (ip6_lh != NULL) {
306		(void) ldi_close(ip6_lh, FREAD|FWRITE, cr);
307	}
308	if (li != NULL)
309		ldi_ident_release(li);
310#ifdef NS_DEBUG
311	(void) printf("keysock_plumb_ipsec -> %d\n",
312	    keystack->keystack_plumbed);
313#endif
314	crfree(cr);
315	return (err);
316}
317
318/* ARGSUSED */
319static int
320keysock_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
321{
322	keysockparam_t	*keysockpa = (keysockparam_t *)cp;
323	uint_t value;
324	keysock_t *ks = (keysock_t *)q->q_ptr;
325	keysock_stack_t	*keystack = ks->keysock_keystack;
326
327	mutex_enter(&keystack->keystack_param_lock);
328	value = keysockpa->keysock_param_value;
329	mutex_exit(&keystack->keystack_param_lock);
330
331	(void) mi_mpprintf(mp, "%u", value);
332	return (0);
333}
334
335/* This routine sets an NDD variable in a keysockparam_t structure. */
336/* ARGSUSED */
337static int
338keysock_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
339{
340	ulong_t	new_value;
341	keysockparam_t	*keysockpa = (keysockparam_t *)cp;
342	keysock_t *ks = (keysock_t *)q->q_ptr;
343	keysock_stack_t	*keystack = ks->keysock_keystack;
344
345	/* Convert the value from a string into a long integer. */
346	if (ddi_strtoul(value, NULL, 10, &new_value) != 0)
347		return (EINVAL);
348
349	mutex_enter(&keystack->keystack_param_lock);
350	/*
351	 * Fail the request if the new value does not lie within the
352	 * required bounds.
353	 */
354	if (new_value < keysockpa->keysock_param_min ||
355	    new_value > keysockpa->keysock_param_max) {
356		mutex_exit(&keystack->keystack_param_lock);
357		return (EINVAL);
358	}
359
360	/* Set the new value */
361	keysockpa->keysock_param_value = new_value;
362	mutex_exit(&keystack->keystack_param_lock);
363
364	return (0);
365}
366
367/*
368 * Initialize keysock at module load time
369 */
370boolean_t
371keysock_ddi_init(void)
372{
373	keysock_max_optsize = optcom_max_optsize(
374	    keysock_opt_obj.odb_opt_des_arr, keysock_opt_obj.odb_opt_arr_cnt);
375
376	keysock_vmem = vmem_create("keysock", (void *)1, MAXMIN, 1,
377	    NULL, NULL, NULL, 1, VM_SLEEP | VMC_IDENTIFIER);
378
379	/*
380	 * We want to be informed each time a stack is created or
381	 * destroyed in the kernel, so we can maintain the
382	 * set of keysock_stack_t's.
383	 */
384	netstack_register(NS_KEYSOCK, keysock_stack_init, NULL,
385	    keysock_stack_fini);
386
387	return (B_TRUE);
388}
389
390/*
391 * Walk through the param array specified registering each element with the
392 * named dispatch handler.
393 */
394static boolean_t
395keysock_param_register(IDP *ndp, keysockparam_t *ksp, int cnt)
396{
397	for (; cnt-- > 0; ksp++) {
398		if (ksp->keysock_param_name != NULL &&
399		    ksp->keysock_param_name[0]) {
400			if (!nd_load(ndp,
401			    ksp->keysock_param_name,
402			    keysock_param_get, keysock_param_set,
403			    (caddr_t)ksp)) {
404				nd_free(ndp);
405				return (B_FALSE);
406			}
407		}
408	}
409	return (B_TRUE);
410}
411
412/*
413 * Initialize keysock for one stack instance
414 */
415/* ARGSUSED */
416static void *
417keysock_stack_init(netstackid_t stackid, netstack_t *ns)
418{
419	keysock_stack_t	*keystack;
420	keysockparam_t *ksp;
421
422	keystack = (keysock_stack_t *)kmem_zalloc(sizeof (*keystack), KM_SLEEP);
423	keystack->keystack_netstack = ns;
424
425	keystack->keystack_acquire_seq = 0xffffffff;
426
427	ksp = (keysockparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
428	keystack->keystack_params = ksp;
429	bcopy(lcl_param_arr, ksp, sizeof (lcl_param_arr));
430
431	(void) keysock_param_register(&keystack->keystack_g_nd, ksp,
432	    A_CNT(lcl_param_arr));
433
434	mutex_init(&keystack->keystack_list_lock, NULL, MUTEX_DEFAULT, NULL);
435	mutex_init(&keystack->keystack_consumers_lock,
436	    NULL, MUTEX_DEFAULT, NULL);
437	mutex_init(&keystack->keystack_param_lock, NULL, MUTEX_DEFAULT, NULL);
438	return (keystack);
439}
440
441/*
442 * Free NDD variable space, and other destructors, for keysock.
443 */
444void
445keysock_ddi_destroy(void)
446{
447	netstack_unregister(NS_KEYSOCK);
448	vmem_destroy(keysock_vmem);
449}
450
451/*
452 * Remove one stack instance from keysock
453 */
454/* ARGSUSED */
455static void
456keysock_stack_fini(netstackid_t stackid, void *arg)
457{
458	keysock_stack_t *keystack = (keysock_stack_t *)arg;
459
460	nd_free(&keystack->keystack_g_nd);
461	kmem_free(keystack->keystack_params, sizeof (lcl_param_arr));
462	keystack->keystack_params = NULL;
463
464	mutex_destroy(&keystack->keystack_list_lock);
465	mutex_destroy(&keystack->keystack_consumers_lock);
466	mutex_destroy(&keystack->keystack_param_lock);
467
468	kmem_free(keystack, sizeof (*keystack));
469}
470
471/*
472 * Close routine for keysock.
473 */
474/* ARGSUSED */
475static int
476keysock_close(queue_t *q, int flags __unused, cred_t *credp __unused)
477{
478	keysock_t *ks;
479	keysock_consumer_t *kc;
480	void *ptr = q->q_ptr;
481	int size;
482	keysock_stack_t	*keystack;
483
484
485	qprocsoff(q);
486
487	/* Safe assumption. */
488	ASSERT(ptr != NULL);
489
490	if (WR(q)->q_next) {
491		kc = (keysock_consumer_t *)ptr;
492		keystack = kc->kc_keystack;
493
494		ks1dbg(keystack, ("Module close, removing a consumer (%d).\n",
495		    kc->kc_sa_type));
496		/*
497		 * Because of PERMOD open/close exclusive perimeter, I
498		 * can inspect KC_FLUSHING w/o locking down kc->kc_lock.
499		 */
500		if (kc->kc_flags & KC_FLUSHING) {
501			/*
502			 * If this decrement was the last one, send
503			 * down the next pending one, if any.
504			 *
505			 * With a PERMOD perimeter, the mutexes ops aren't
506			 * really necessary, but if we ever loosen up, we will
507			 * have this bit covered already.
508			 */
509			keystack->keystack_flushdump--;
510			if (keystack->keystack_flushdump == 0) {
511				/*
512				 * The flush/dump terminated by having a
513				 * consumer go away.  I need to send up to the
514				 * appropriate keysock all of the relevant
515				 * information.  Unfortunately, I don't
516				 * have that handy.
517				 */
518				ks0dbg(("Consumer went away while flushing or"
519				    " dumping.\n"));
520			}
521		}
522		size = sizeof (keysock_consumer_t);
523		mutex_enter(&keystack->keystack_consumers_lock);
524		keystack->keystack_consumers[kc->kc_sa_type] = NULL;
525		mutex_exit(&keystack->keystack_consumers_lock);
526		mutex_destroy(&kc->kc_lock);
527		netstack_rele(kc->kc_keystack->keystack_netstack);
528	} else {
529		ks = (keysock_t *)ptr;
530		keystack = ks->keysock_keystack;
531
532		ks3dbg(keystack,
533		    ("Driver close, PF_KEY socket is going away.\n"));
534		if ((ks->keysock_flags & KEYSOCK_EXTENDED) != 0)
535			atomic_dec_32(&keystack->keystack_num_extended);
536		size = sizeof (keysock_t);
537		mutex_enter(&keystack->keystack_list_lock);
538		*(ks->keysock_ptpn) = ks->keysock_next;
539		if (ks->keysock_next != NULL)
540			ks->keysock_next->keysock_ptpn = ks->keysock_ptpn;
541		mutex_exit(&keystack->keystack_list_lock);
542		mutex_destroy(&ks->keysock_lock);
543		vmem_free(keysock_vmem, (void *)(uintptr_t)ks->keysock_serial,
544		    1);
545		netstack_rele(ks->keysock_keystack->keystack_netstack);
546	}
547
548	/* Now I'm free. */
549	kmem_free(ptr, size);
550	return (0);
551}
552/*
553 * Open routine for keysock.
554 */
555/* ARGSUSED */
556static int
557keysock_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
558{
559	keysock_t *ks;
560	keysock_consumer_t *kc;
561	mblk_t *mp;
562	ipsec_info_t *ii;
563	netstack_t *ns;
564	keysock_stack_t *keystack;
565
566	if (secpolicy_ip_config(credp, B_FALSE) != 0) {
567		/* Privilege debugging will log the error */
568		return (EPERM);
569	}
570
571	if (q->q_ptr != NULL)
572		return (0);  /* Re-open of an already open instance. */
573
574	ns = netstack_find_by_cred(credp);
575	ASSERT(ns != NULL);
576	keystack = ns->netstack_keysock;
577	ASSERT(keystack != NULL);
578
579	ks3dbg(keystack, ("Entering keysock open.\n"));
580
581	if (keystack->keystack_plumbed < 1) {
582		netstack_t *ns = keystack->keystack_netstack;
583
584		keystack->keystack_plumbed = 0;
585#ifdef NS_DEBUG
586		printf("keysock_open(%d) - plumb\n",
587		    keystack->keystack_netstack->netstack_stackid);
588#endif
589		/*
590		 * Don't worry about ipsec_failure being true here.
591		 * (See ip.c).  An open of keysock should try and force
592		 * the issue.  Maybe it was a transient failure.
593		 */
594		ipsec_loader_loadnow(ns->netstack_ipsec);
595	}
596
597	if (sflag & MODOPEN) {
598		/* Initialize keysock_consumer state here. */
599		kc = kmem_zalloc(sizeof (keysock_consumer_t), KM_NOSLEEP);
600		if (kc == NULL) {
601			netstack_rele(keystack->keystack_netstack);
602			return (ENOMEM);
603		}
604		mutex_init(&kc->kc_lock, NULL, MUTEX_DEFAULT, 0);
605		kc->kc_rq = q;
606		kc->kc_wq = WR(q);
607
608		q->q_ptr = kc;
609		WR(q)->q_ptr = kc;
610
611		kc->kc_keystack = keystack;
612		qprocson(q);
613
614		/*
615		 * Send down initial message to whatever I was pushed on top
616		 * of asking for its consumer type.  The reply will set it.
617		 */
618
619		/* Allocate it. */
620		mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
621		if (mp == NULL) {
622			ks1dbg(keystack, (
623			    "keysock_open:  Cannot allocate KEYSOCK_HELLO.\n"));
624			/* Do I need to set these to null? */
625			q->q_ptr = NULL;
626			WR(q)->q_ptr = NULL;
627			mutex_destroy(&kc->kc_lock);
628			kmem_free(kc, sizeof (*kc));
629			netstack_rele(keystack->keystack_netstack);
630			return (ENOMEM);
631		}
632
633		/* If I allocated okay, putnext to what I was pushed atop. */
634		mp->b_wptr += sizeof (ipsec_info_t);
635		mp->b_datap->db_type = M_CTL;
636		ii = (ipsec_info_t *)mp->b_rptr;
637		ii->ipsec_info_type = KEYSOCK_HELLO;
638		/* Length only of type/len. */
639		ii->ipsec_info_len = sizeof (ii->ipsec_allu);
640		ks2dbg(keystack, ("Ready to putnext KEYSOCK_HELLO.\n"));
641		putnext(kc->kc_wq, mp);
642	} else {
643		minor_t ksminor;
644
645		/* Initialize keysock state. */
646
647		ks2dbg(keystack, ("Made it into PF_KEY socket open.\n"));
648
649		ksminor = (minor_t)(uintptr_t)
650		    vmem_alloc(keysock_vmem, 1, VM_NOSLEEP);
651		if (ksminor == 0) {
652			netstack_rele(keystack->keystack_netstack);
653			return (ENOMEM);
654		}
655		ks = kmem_zalloc(sizeof (keysock_t), KM_NOSLEEP);
656		if (ks == NULL) {
657			vmem_free(keysock_vmem, (void *)(uintptr_t)ksminor, 1);
658			netstack_rele(keystack->keystack_netstack);
659			return (ENOMEM);
660		}
661
662		mutex_init(&ks->keysock_lock, NULL, MUTEX_DEFAULT, 0);
663		ks->keysock_rq = q;
664		ks->keysock_wq = WR(q);
665		ks->keysock_state = TS_UNBND;
666		ks->keysock_serial = ksminor;
667
668		q->q_ptr = ks;
669		WR(q)->q_ptr = ks;
670		ks->keysock_keystack = keystack;
671
672		/*
673		 * The receive hiwat is only looked at on the stream head
674		 * queue.  Store in q_hiwat in order to return on SO_RCVBUF
675		 * getsockopts.
676		 */
677
678		q->q_hiwat = keystack->keystack_recv_hiwat;
679
680		/*
681		 * The transmit hiwat/lowat is only looked at on IP's queue.
682		 * Store in q_hiwat/q_lowat in order to return on
683		 * SO_SNDBUF/SO_SNDLOWAT getsockopts.
684		 */
685
686		WR(q)->q_hiwat = keystack->keystack_xmit_hiwat;
687		WR(q)->q_lowat = keystack->keystack_xmit_lowat;
688
689		*devp = makedevice(getmajor(*devp), ksminor);
690
691		/*
692		 * Thread keysock into the global keysock list.
693		 */
694		mutex_enter(&keystack->keystack_list_lock);
695		ks->keysock_next = keystack->keystack_list;
696		ks->keysock_ptpn = &keystack->keystack_list;
697		if (keystack->keystack_list != NULL) {
698			keystack->keystack_list->keysock_ptpn =
699			    &ks->keysock_next;
700		}
701		keystack->keystack_list = ks;
702		mutex_exit(&keystack->keystack_list_lock);
703
704		qprocson(q);
705		(void) proto_set_rx_hiwat(q, NULL,
706		    keystack->keystack_recv_hiwat);
707		/*
708		 * Wait outside the keysock module perimeter for IPsec
709		 * plumbing to be completed.  If it fails, keysock_close()
710		 * undoes everything we just did.
711		 */
712		if (!ipsec_loader_wait(q,
713		    keystack->keystack_netstack->netstack_ipsec)) {
714			(void) keysock_close(q, 0, credp);
715			return (EPFNOSUPPORT);
716		}
717	}
718
719	return (0);
720}
721
722/* BELOW THIS LINE ARE ROUTINES INCLUDING AND RELATED TO keysock_wput(). */
723
724/*
725 * Copy relevant state bits.
726 */
727static void
728keysock_copy_info(struct T_info_ack *tap, keysock_t *ks)
729{
730	*tap = keysock_g_t_info_ack;
731	tap->CURRENT_state = ks->keysock_state;
732	tap->OPT_size = keysock_max_optsize;
733}
734
735/*
736 * This routine responds to T_CAPABILITY_REQ messages.  It is called by
737 * keysock_wput.  Much of the T_CAPABILITY_ACK information is copied from
738 * keysock_g_t_info_ack.  The current state of the stream is copied from
739 * keysock_state.
740 */
741static void
742keysock_capability_req(queue_t *q, mblk_t *mp)
743{
744	keysock_t *ks = (keysock_t *)q->q_ptr;
745	t_uscalar_t cap_bits1;
746	struct T_capability_ack	*tcap;
747
748	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
749
750	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
751	    mp->b_datap->db_type, T_CAPABILITY_ACK);
752	if (mp == NULL)
753		return;
754
755	tcap = (struct T_capability_ack *)mp->b_rptr;
756	tcap->CAP_bits1 = 0;
757
758	if (cap_bits1 & TC1_INFO) {
759		keysock_copy_info(&tcap->INFO_ack, ks);
760		tcap->CAP_bits1 |= TC1_INFO;
761	}
762
763	qreply(q, mp);
764}
765
766/*
767 * This routine responds to T_INFO_REQ messages. It is called by
768 * keysock_wput_other.
769 * Most of the T_INFO_ACK information is copied from keysock_g_t_info_ack.
770 * The current state of the stream is copied from keysock_state.
771 */
772static void
773keysock_info_req(queue_t *q, mblk_t *mp)
774{
775	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
776	    T_INFO_ACK);
777	if (mp == NULL)
778		return;
779	keysock_copy_info((struct T_info_ack *)mp->b_rptr,
780	    (keysock_t *)q->q_ptr);
781	qreply(q, mp);
782}
783
784/*
785 * keysock_err_ack. This routine creates a
786 * T_ERROR_ACK message and passes it
787 * upstream.
788 */
789static void
790keysock_err_ack(queue_t *q, mblk_t *mp, int t_error, int sys_error)
791{
792	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
793		qreply(q, mp);
794}
795
796/*
797 * This routine retrieves the current status of socket options.
798 * It returns the size of the option retrieved.
799 */
800/* ARGSUSED */
801int
802keysock_opt_get(queue_t *q, int level, int name, uchar_t *ptr)
803{
804	int *i1 = (int *)ptr;
805	keysock_t *ks = (keysock_t *)q->q_ptr;
806
807	switch (level) {
808	case SOL_SOCKET:
809		mutex_enter(&ks->keysock_lock);
810		switch (name) {
811		case SO_TYPE:
812			*i1 = SOCK_RAW;
813			break;
814		case SO_USELOOPBACK:
815			*i1 = (int)(!((ks->keysock_flags & KEYSOCK_NOLOOP) ==
816			    KEYSOCK_NOLOOP));
817			break;
818		/*
819		 * The following two items can be manipulated,
820		 * but changing them should do nothing.
821		 */
822		case SO_SNDBUF:
823			*i1 = (int)q->q_hiwat;
824			break;
825		case SO_RCVBUF:
826			*i1 = (int)(RD(q)->q_hiwat);
827			break;
828		}
829		mutex_exit(&ks->keysock_lock);
830		break;
831	default:
832		return (0);
833	}
834	return (sizeof (int));
835}
836
837/*
838 * This routine sets socket options.
839 */
840/* ARGSUSED */
841int
842keysock_opt_set(queue_t *q, uint_t mgmt_flags, int level,
843    int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
844    uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
845{
846	int *i1 = (int *)invalp, errno = 0;
847	keysock_t *ks = (keysock_t *)q->q_ptr;
848	keysock_stack_t	*keystack = ks->keysock_keystack;
849
850	switch (level) {
851	case SOL_SOCKET:
852		mutex_enter(&ks->keysock_lock);
853		switch (name) {
854		case SO_USELOOPBACK:
855			if (!(*i1))
856				ks->keysock_flags |= KEYSOCK_NOLOOP;
857			else ks->keysock_flags &= ~KEYSOCK_NOLOOP;
858			break;
859		case SO_SNDBUF:
860			if (*i1 > keystack->keystack_max_buf)
861				errno = ENOBUFS;
862			else q->q_hiwat = *i1;
863			break;
864		case SO_RCVBUF:
865			if (*i1 > keystack->keystack_max_buf) {
866				errno = ENOBUFS;
867			} else {
868				RD(q)->q_hiwat = *i1;
869				(void) proto_set_rx_hiwat(RD(q), NULL, *i1);
870			}
871			break;
872		default:
873			errno = EINVAL;
874		}
875		mutex_exit(&ks->keysock_lock);
876		break;
877	default:
878		errno = EINVAL;
879	}
880	return (errno);
881}
882
883/*
884 * Handle STREAMS ioctl copyin for getsockname() for both PF_KEY and
885 * PF_POLICY.
886 */
887void
888keysock_spdsock_wput_iocdata(queue_t *q, mblk_t *mp, sa_family_t family)
889{
890	mblk_t *mp1;
891	STRUCT_HANDLE(strbuf, sb);
892	/* What size of sockaddr do we need? */
893	const uint_t addrlen = sizeof (struct sockaddr);
894
895	/* We only handle TI_GET{MY,PEER}NAME (get{sock,peer}name()). */
896	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
897	case TI_GETMYNAME:
898	case TI_GETPEERNAME:
899		break;
900	default:
901		freemsg(mp);
902		return;
903	}
904
905	switch (mi_copy_state(q, mp, &mp1)) {
906	case -1:
907		return;
908	case MI_COPY_CASE(MI_COPY_IN, 1):
909		break;
910	case MI_COPY_CASE(MI_COPY_OUT, 1):
911		/*
912		 * The address has been copied out, so now
913		 * copyout the strbuf.
914		 */
915		mi_copyout(q, mp);
916		return;
917	case MI_COPY_CASE(MI_COPY_OUT, 2):
918		/*
919		 * The address and strbuf have been copied out.
920		 * We're done, so just acknowledge the original
921		 * M_IOCTL.
922		 */
923		mi_copy_done(q, mp, 0);
924		return;
925	default:
926		/*
927		 * Something strange has happened, so acknowledge
928		 * the original M_IOCTL with an EPROTO error.
929		 */
930		mi_copy_done(q, mp, EPROTO);
931		return;
932	}
933
934	/*
935	 * Now we have the strbuf structure for TI_GET{MY,PEER}NAME. Next we
936	 * copyout the requested address and then we'll copyout the strbuf.
937	 * Regardless of sockname or peername, we just return a sockaddr with
938	 * sa_family set.
939	 */
940	STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag,
941	    (void *)mp1->b_rptr);
942
943	if (STRUCT_FGET(sb, maxlen) < addrlen) {
944		mi_copy_done(q, mp, EINVAL);
945		return;
946	}
947
948	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
949	if (mp1 == NULL)
950		return;
951
952	STRUCT_FSET(sb, len, addrlen);
953	((struct sockaddr *)mp1->b_wptr)->sa_family = family;
954	mp1->b_wptr += addrlen;
955	mi_copyout(q, mp);
956}
957
958/*
959 * Handle STREAMS messages.
960 */
961static void
962keysock_wput_other(queue_t *q, mblk_t *mp)
963{
964	struct iocblk *iocp;
965	int error;
966	keysock_t *ks = (keysock_t *)q->q_ptr;
967	keysock_stack_t	*keystack = ks->keysock_keystack;
968	cred_t		*cr;
969
970	switch (mp->b_datap->db_type) {
971	case M_PROTO:
972	case M_PCPROTO:
973		if ((mp->b_wptr - mp->b_rptr) < sizeof (long)) {
974			ks3dbg(keystack, (
975			    "keysock_wput_other: Not big enough M_PROTO\n"));
976			freemsg(mp);
977			return;
978		}
979		switch (((union T_primitives *)mp->b_rptr)->type) {
980		case T_CAPABILITY_REQ:
981			keysock_capability_req(q, mp);
982			break;
983		case T_INFO_REQ:
984			keysock_info_req(q, mp);
985			break;
986		case T_SVR4_OPTMGMT_REQ:
987		case T_OPTMGMT_REQ:
988			/*
989			 * All Solaris components should pass a db_credp
990			 * for this TPI message, hence we ASSERT.
991			 * But in case there is some other M_PROTO that looks
992			 * like a TPI message sent by some other kernel
993			 * component, we check and return an error.
994			 */
995			cr = msg_getcred(mp, NULL);
996			ASSERT(cr != NULL);
997			if (cr == NULL) {
998				keysock_err_ack(q, mp, TSYSERR, EINVAL);
999				return;
1000			}
1001			if (((union T_primitives *)mp->b_rptr)->type ==
1002			    T_SVR4_OPTMGMT_REQ) {
1003				svr4_optcom_req(q, mp, cr, &keysock_opt_obj);
1004			} else {
1005				tpi_optcom_req(q, mp, cr, &keysock_opt_obj);
1006			}
1007			break;
1008		case T_DATA_REQ:
1009		case T_EXDATA_REQ:
1010		case T_ORDREL_REQ:
1011			/* Illegal for keysock. */
1012			freemsg(mp);
1013			(void) putnextctl1(RD(q), M_ERROR, EPROTO);
1014			break;
1015		default:
1016			/* Not supported by keysock. */
1017			keysock_err_ack(q, mp, TNOTSUPPORT, 0);
1018			break;
1019		}
1020		return;
1021	case M_IOCDATA:
1022		keysock_spdsock_wput_iocdata(q, mp, PF_KEY);
1023		return;
1024	case M_IOCTL:
1025		iocp = (struct iocblk *)mp->b_rptr;
1026		error = EINVAL;
1027
1028		switch (iocp->ioc_cmd) {
1029		case TI_GETMYNAME:
1030		case TI_GETPEERNAME:
1031			/*
1032			 * For pfiles(1) observability with getsockname().
1033			 * See keysock_spdsock_wput_iocdata() for the rest of
1034			 * this.
1035			 */
1036			mi_copyin(q, mp, NULL,
1037			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
1038			return;
1039		case ND_SET:
1040		case ND_GET:
1041			if (nd_getset(q, keystack->keystack_g_nd, mp)) {
1042				qreply(q, mp);
1043				return;
1044			} else
1045				error = ENOENT;
1046			/* FALLTHRU */
1047		default:
1048			miocnak(q, mp, 0, error);
1049			return;
1050		}
1051	case M_FLUSH:
1052		if (*mp->b_rptr & FLUSHW) {
1053			flushq(q, FLUSHALL);
1054			*mp->b_rptr &= ~FLUSHW;
1055		}
1056		if (*mp->b_rptr & FLUSHR) {
1057			qreply(q, mp);
1058			return;
1059		}
1060		/* Else FALLTHRU */
1061	}
1062
1063	/* If fell through, just black-hole the message. */
1064	freemsg(mp);
1065}
1066
1067/*
1068 * Transmit a PF_KEY error message to the instance either pointed to
1069 * by ks, the instance with serial number serial, or more, depending.
1070 *
1071 * The faulty message (or a reasonable facsimile thereof) is in mp.
1072 * This function will free mp or recycle it for delivery, thereby causing
1073 * the stream head to free it.
1074 */
1075static void
1076keysock_error(keysock_t *ks, mblk_t *mp, int error, int diagnostic)
1077{
1078	sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr;
1079	keysock_stack_t	*keystack = ks->keysock_keystack;
1080
1081	ASSERT(mp->b_datap->db_type == M_DATA);
1082
1083	if (samsg->sadb_msg_type < SADB_GETSPI ||
1084	    samsg->sadb_msg_type > SADB_MAX)
1085		samsg->sadb_msg_type = SADB_RESERVED;
1086
1087	/*
1088	 * Strip out extension headers.
1089	 */
1090	ASSERT(mp->b_rptr + sizeof (*samsg) <= mp->b_datap->db_lim);
1091	mp->b_wptr = mp->b_rptr + sizeof (*samsg);
1092	samsg->sadb_msg_len = SADB_8TO64(sizeof (sadb_msg_t));
1093	samsg->sadb_msg_errno = (uint8_t)error;
1094	samsg->sadb_x_msg_diagnostic = (uint16_t)diagnostic;
1095
1096	keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE, keystack);
1097}
1098
1099/*
1100 * Pass down a message to a consumer.  Wrap it in KEYSOCK_IN, and copy
1101 * in the extv if passed in.
1102 */
1103static void
1104keysock_passdown(keysock_t *ks, mblk_t *mp, uint8_t satype, sadb_ext_t *extv[],
1105    boolean_t flushmsg)
1106{
1107	keysock_consumer_t *kc;
1108	mblk_t *wrapper;
1109	keysock_in_t *ksi;
1110	int i;
1111	keysock_stack_t	*keystack = ks->keysock_keystack;
1112
1113	wrapper = allocb(sizeof (ipsec_info_t), BPRI_HI);
1114	if (wrapper == NULL) {
1115		ks3dbg(keystack, ("keysock_passdown: allocb failed.\n"));
1116		if (extv[SADB_EXT_KEY_ENCRYPT] != NULL)
1117			bzero(extv[SADB_EXT_KEY_ENCRYPT],
1118			    SADB_64TO8(
1119			    extv[SADB_EXT_KEY_ENCRYPT]->sadb_ext_len));
1120		if (extv[SADB_EXT_KEY_AUTH] != NULL)
1121			bzero(extv[SADB_EXT_KEY_AUTH],
1122			    SADB_64TO8(
1123			    extv[SADB_EXT_KEY_AUTH]->sadb_ext_len));
1124		if (flushmsg) {
1125			ks0dbg((
1126			    "keysock: Downwards flush/dump message failed!\n"));
1127			/* If this is true, I hold the perimeter. */
1128			keystack->keystack_flushdump--;
1129		}
1130		freemsg(mp);
1131		return;
1132	}
1133
1134	wrapper->b_datap->db_type = M_CTL;
1135	ksi = (keysock_in_t *)wrapper->b_rptr;
1136	ksi->ks_in_type = KEYSOCK_IN;
1137	ksi->ks_in_len = sizeof (keysock_in_t);
1138	if (extv[SADB_EXT_ADDRESS_SRC] != NULL)
1139		ksi->ks_in_srctype = KS_IN_ADDR_UNKNOWN;
1140	else ksi->ks_in_srctype = KS_IN_ADDR_NOTTHERE;
1141	if (extv[SADB_EXT_ADDRESS_DST] != NULL)
1142		ksi->ks_in_dsttype = KS_IN_ADDR_UNKNOWN;
1143	else ksi->ks_in_dsttype = KS_IN_ADDR_NOTTHERE;
1144	for (i = 0; i <= SADB_EXT_MAX; i++)
1145		ksi->ks_in_extv[i] = extv[i];
1146	ksi->ks_in_serial = ks->keysock_serial;
1147	wrapper->b_wptr += sizeof (ipsec_info_t);
1148	wrapper->b_cont = mp;
1149
1150	/*
1151	 * Find the appropriate consumer where the message is passed down.
1152	 */
1153	kc = keystack->keystack_consumers[satype];
1154	if (kc == NULL) {
1155		freeb(wrapper);
1156		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE);
1157		if (flushmsg) {
1158			ks0dbg((
1159			    "keysock: Downwards flush/dump message failed!\n"));
1160			/* If this is true, I hold the perimeter. */
1161			keystack->keystack_flushdump--;
1162		}
1163		return;
1164	}
1165
1166	/*
1167	 * NOTE: There used to be code in here to spin while a flush or
1168	 *	 dump finished.  Keysock now assumes that consumers have enough
1169	 *	 MT-savviness to deal with that.
1170	 */
1171
1172	/*
1173	 * Current consumers (AH and ESP) are guaranteed to return a
1174	 * FLUSH or DUMP message back, so when we reach here, we don't
1175	 * have to worry about keysock_flushdumps.
1176	 */
1177
1178	putnext(kc->kc_wq, wrapper);
1179}
1180
1181/*
1182 * High-level reality checking of extensions.
1183 */
1184static boolean_t
1185ext_check(sadb_ext_t *ext, keysock_stack_t *keystack)
1186{
1187	int i;
1188	uint64_t *lp;
1189	sadb_ident_t *id;
1190	char *idstr;
1191
1192	switch (ext->sadb_ext_type) {
1193	case SADB_EXT_ADDRESS_SRC:
1194	case SADB_EXT_ADDRESS_DST:
1195	case SADB_X_EXT_ADDRESS_INNER_SRC:
1196	case SADB_X_EXT_ADDRESS_INNER_DST:
1197		/* Check for at least enough addtl length for a sockaddr. */
1198		if (ext->sadb_ext_len <= SADB_8TO64(sizeof (sadb_address_t)))
1199			return (B_FALSE);
1200		break;
1201	case SADB_EXT_LIFETIME_HARD:
1202	case SADB_EXT_LIFETIME_SOFT:
1203	case SADB_EXT_LIFETIME_CURRENT:
1204		if (ext->sadb_ext_len != SADB_8TO64(sizeof (sadb_lifetime_t)))
1205			return (B_FALSE);
1206		break;
1207	case SADB_EXT_SPIRANGE:
1208		/* See if the SPI range is legit. */
1209		if (htonl(((sadb_spirange_t *)ext)->sadb_spirange_min) >
1210		    htonl(((sadb_spirange_t *)ext)->sadb_spirange_max))
1211			return (B_FALSE);
1212		break;
1213	case SADB_EXT_KEY_AUTH:
1214	case SADB_EXT_KEY_ENCRYPT:
1215		/* Key length check. */
1216		if (((sadb_key_t *)ext)->sadb_key_bits == 0)
1217			return (B_FALSE);
1218		/*
1219		 * Check to see if the key length (in bits) is less than the
1220		 * extension length (in 8-bits words).
1221		 */
1222		if ((roundup(SADB_1TO8(((sadb_key_t *)ext)->sadb_key_bits), 8) +
1223		    sizeof (sadb_key_t)) != SADB_64TO8(ext->sadb_ext_len)) {
1224			ks1dbg(keystack, (
1225			    "ext_check:  Key bits/length inconsistent.\n"));
1226			ks1dbg(keystack, ("%d bits, len is %d bytes.\n",
1227			    ((sadb_key_t *)ext)->sadb_key_bits,
1228			    SADB_64TO8(ext->sadb_ext_len)));
1229			return (B_FALSE);
1230		}
1231
1232		/* All-zeroes key check. */
1233		lp = (uint64_t *)(((char *)ext) + sizeof (sadb_key_t));
1234		for (i = 0;
1235		    i < (ext->sadb_ext_len - SADB_8TO64(sizeof (sadb_key_t)));
1236		    i++)
1237			if (lp[i] != 0)
1238				break;	/* Out of for loop. */
1239		/* If finished the loop naturally, it's an all zero key. */
1240		if (lp[i] == 0)
1241			return (B_FALSE);
1242		break;
1243	case SADB_EXT_IDENTITY_SRC:
1244	case SADB_EXT_IDENTITY_DST:
1245		/*
1246		 * Make sure the strings in these identities are
1247		 * null-terminated.  RFC 2367 underspecified how to handle
1248		 * such a case.  I "proactively" null-terminate the string
1249		 * at the last byte if it's not terminated sooner.
1250		 */
1251		id = (sadb_ident_t *)ext;
1252		i = SADB_64TO8(id->sadb_ident_len);
1253		i -= sizeof (sadb_ident_t);
1254		idstr = (char *)(id + 1);
1255		while (*idstr != '\0' && i > 0) {
1256			i--;
1257			idstr++;
1258		}
1259		if (i == 0) {
1260			/*
1261			 * I.e., if the bozo user didn't NULL-terminate the
1262			 * string...
1263			 */
1264			idstr--;
1265			*idstr = '\0';
1266		}
1267		break;
1268	}
1269	return (B_TRUE);	/* For now... */
1270}
1271
1272/* Return values for keysock_get_ext(). */
1273#define	KGE_OK	0
1274#define	KGE_DUP	1
1275#define	KGE_UNK	2
1276#define	KGE_LEN	3
1277#define	KGE_CHK	4
1278
1279/*
1280 * Parse basic extension headers and return in the passed-in pointer vector.
1281 * Return values include:
1282 *
1283 *	KGE_OK	Everything's nice and parsed out.
1284 *		If there are no extensions, place NULL in extv[0].
1285 *	KGE_DUP	There is a duplicate extension.
1286 *		First instance in appropriate bin.  First duplicate in
1287 *		extv[0].
1288 *	KGE_UNK	Unknown extension type encountered.  extv[0] contains
1289 *		unknown header.
1290 *	KGE_LEN	Extension length error.
1291 *	KGE_CHK	High-level reality check failed on specific extension.
1292 *
1293 * My apologies for some of the pointer arithmetic in here.  I'm thinking
1294 * like an assembly programmer, yet trying to make the compiler happy.
1295 */
1296static int
1297keysock_get_ext(sadb_ext_t *extv[], sadb_msg_t *basehdr, uint_t msgsize,
1298    keysock_stack_t *keystack)
1299{
1300	bzero(extv, sizeof (sadb_ext_t *) * (SADB_EXT_MAX + 1));
1301
1302	/* Use extv[0] as the "current working pointer". */
1303
1304	extv[0] = (sadb_ext_t *)(basehdr + 1);
1305
1306	while (extv[0] < (sadb_ext_t *)(((uint8_t *)basehdr) + msgsize)) {
1307		/* Check for unknown headers. */
1308		if (extv[0]->sadb_ext_type == 0 ||
1309		    extv[0]->sadb_ext_type > SADB_EXT_MAX)
1310			return (KGE_UNK);
1311
1312		/*
1313		 * Check length.  Use uint64_t because extlen is in units
1314		 * of 64-bit words.  If length goes beyond the msgsize,
1315		 * return an error.  (Zero length also qualifies here.)
1316		 */
1317		if (extv[0]->sadb_ext_len == 0 ||
1318		    (void *)((uint64_t *)extv[0] + extv[0]->sadb_ext_len) >
1319		    (void *)((uint8_t *)basehdr + msgsize))
1320			return (KGE_LEN);
1321
1322		/* Check for redundant headers. */
1323		if (extv[extv[0]->sadb_ext_type] != NULL)
1324			return (KGE_DUP);
1325
1326		/*
1327		 * Reality check the extension if possible at the keysock
1328		 * level.
1329		 */
1330		if (!ext_check(extv[0], keystack))
1331			return (KGE_CHK);
1332
1333		/* If I make it here, assign the appropriate bin. */
1334		extv[extv[0]->sadb_ext_type] = extv[0];
1335
1336		/* Advance pointer (See above for uint64_t ptr reasoning.) */
1337		extv[0] = (sadb_ext_t *)
1338		    ((uint64_t *)extv[0] + extv[0]->sadb_ext_len);
1339	}
1340
1341	/* Everything's cool. */
1342
1343	/*
1344	 * If extv[0] == NULL, then there are no extension headers in this
1345	 * message.  Ensure that this is the case.
1346	 */
1347	if (extv[0] == (sadb_ext_t *)(basehdr + 1))
1348		extv[0] = NULL;
1349
1350	return (KGE_OK);
1351}
1352
1353/*
1354 * qwriter() callback to handle flushes and dumps.  This routine will hold
1355 * the inner perimeter.
1356 */
1357void
1358keysock_do_flushdump(queue_t *q, mblk_t *mp)
1359{
1360	int i, start, finish;
1361	mblk_t *mp1 = NULL;
1362	keysock_t *ks = (keysock_t *)q->q_ptr;
1363	sadb_ext_t *extv[SADB_EXT_MAX + 1];
1364	sadb_msg_t *samsg = (sadb_msg_t *)mp->b_rptr;
1365	keysock_stack_t	*keystack = ks->keysock_keystack;
1366
1367	/*
1368	 * I am guaranteed this will work.  I did the work in keysock_parse()
1369	 * already.
1370	 */
1371	(void) keysock_get_ext(extv, samsg, SADB_64TO8(samsg->sadb_msg_len),
1372	    keystack);
1373
1374	/*
1375	 * I hold the perimeter, therefore I don't need to use atomic ops.
1376	 */
1377	if (keystack->keystack_flushdump != 0) {
1378		/* XXX Should I instead use EBUSY? */
1379		/* XXX Or is there a way to queue these up? */
1380		keysock_error(ks, mp, ENOMEM, SADB_X_DIAGNOSTIC_NONE);
1381		return;
1382	}
1383
1384	if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1385		start = 0;
1386		finish = KEYSOCK_MAX_CONSUMERS - 1;
1387	} else {
1388		start = samsg->sadb_msg_satype;
1389		finish = samsg->sadb_msg_satype;
1390	}
1391
1392	/*
1393	 * Fill up keysock_flushdump with the number of outstanding dumps
1394	 * and/or flushes.
1395	 */
1396
1397	keystack->keystack_flushdump_errno = 0;
1398
1399	/*
1400	 * Okay, I hold the perimeter.  Eventually keysock_flushdump will
1401	 * contain the number of consumers with outstanding flush operations.
1402	 *
1403	 * SO, here's the plan:
1404	 *	* For each relevant consumer (Might be one, might be all)
1405	 *		* Twiddle on the FLUSHING flag.
1406	 *		* Pass down the FLUSH/DUMP message.
1407	 *
1408	 * When I see upbound FLUSH/DUMP messages, I will decrement the
1409	 * keysock_flushdump.  When I decrement it to 0, I will pass the
1410	 * FLUSH/DUMP message back up to the PF_KEY sockets.  Because I will
1411	 * pass down the right SA type to the consumer (either its own, or
1412	 * that of UNSPEC), the right one will be reflected from each consumer,
1413	 * and accordingly back to the socket.
1414	 */
1415
1416	mutex_enter(&keystack->keystack_consumers_lock);
1417	for (i = start; i <= finish; i++) {
1418		if (keystack->keystack_consumers[i] != NULL) {
1419			mp1 = copymsg(mp);
1420			if (mp1 == NULL) {
1421				ks0dbg(("SADB_FLUSH copymsg() failed.\n"));
1422				/*
1423				 * Error?  And what about outstanding
1424				 * flushes?  Oh, yeah, they get sucked up and
1425				 * the counter is decremented.  Consumers
1426				 * (see keysock_passdown()) are guaranteed
1427				 * to deliver back a flush request, even if
1428				 * it's an error.
1429				 */
1430				keysock_error(ks, mp, ENOMEM,
1431				    SADB_X_DIAGNOSTIC_NONE);
1432				return;
1433			}
1434			/*
1435			 * Because my entry conditions are met above, the
1436			 * following assertion should hold true.
1437			 */
1438			mutex_enter(&keystack->keystack_consumers[i]->kc_lock);
1439			ASSERT((keystack->keystack_consumers[i]->kc_flags &
1440			    KC_FLUSHING) == 0);
1441			keystack->keystack_consumers[i]->kc_flags |=
1442			    KC_FLUSHING;
1443			mutex_exit(&(keystack->keystack_consumers[i]->kc_lock));
1444			/* Always increment the number of flushes... */
1445			keystack->keystack_flushdump++;
1446			/* Guaranteed to return a message. */
1447			keysock_passdown(ks, mp1, i, extv, B_TRUE);
1448		} else if (start == finish) {
1449			/*
1450			 * In case where start == finish, and there's no
1451			 * consumer, should we force an error?  Yes.
1452			 */
1453			mutex_exit(&keystack->keystack_consumers_lock);
1454			keysock_error(ks, mp, EINVAL,
1455			    SADB_X_DIAGNOSTIC_UNKNOWN_SATYPE);
1456			return;
1457		}
1458	}
1459	mutex_exit(&keystack->keystack_consumers_lock);
1460
1461	if (keystack->keystack_flushdump == 0) {
1462		/*
1463		 * There were no consumers at all for this message.
1464		 * XXX For now return ESRCH.
1465		 */
1466		keysock_error(ks, mp, ESRCH, SADB_X_DIAGNOSTIC_NO_SADBS);
1467	} else {
1468		/* Otherwise, free the original message. */
1469		freemsg(mp);
1470	}
1471}
1472
1473/*
1474 * Get the right diagnostic for a duplicate.  Should probably use a static
1475 * table lookup.
1476 */
1477int
1478keysock_duplicate(int ext_type)
1479{
1480	int rc = 0;
1481
1482	switch (ext_type) {
1483	case SADB_EXT_ADDRESS_SRC:
1484		rc = SADB_X_DIAGNOSTIC_DUPLICATE_SRC;
1485		break;
1486	case SADB_EXT_ADDRESS_DST:
1487		rc = SADB_X_DIAGNOSTIC_DUPLICATE_DST;
1488		break;
1489	case SADB_X_EXT_ADDRESS_INNER_SRC:
1490		rc = SADB_X_DIAGNOSTIC_DUPLICATE_INNER_SRC;
1491		break;
1492	case SADB_X_EXT_ADDRESS_INNER_DST:
1493		rc = SADB_X_DIAGNOSTIC_DUPLICATE_INNER_DST;
1494		break;
1495	case SADB_EXT_SA:
1496		rc = SADB_X_DIAGNOSTIC_DUPLICATE_SA;
1497		break;
1498	case SADB_EXT_SPIRANGE:
1499		rc = SADB_X_DIAGNOSTIC_DUPLICATE_RANGE;
1500		break;
1501	case SADB_EXT_KEY_AUTH:
1502		rc = SADB_X_DIAGNOSTIC_DUPLICATE_AKEY;
1503		break;
1504	case SADB_EXT_KEY_ENCRYPT:
1505		rc = SADB_X_DIAGNOSTIC_DUPLICATE_EKEY;
1506		break;
1507	}
1508	return (rc);
1509}
1510
1511/*
1512 * Get the right diagnostic for a reality check failure.  Should probably use
1513 * a static table lookup.
1514 */
1515int
1516keysock_malformed(int ext_type)
1517{
1518	int rc = 0;
1519
1520	switch (ext_type) {
1521	case SADB_EXT_ADDRESS_SRC:
1522		rc = SADB_X_DIAGNOSTIC_MALFORMED_SRC;
1523		break;
1524	case SADB_EXT_ADDRESS_DST:
1525		rc = SADB_X_DIAGNOSTIC_MALFORMED_DST;
1526		break;
1527	case SADB_X_EXT_ADDRESS_INNER_SRC:
1528		rc = SADB_X_DIAGNOSTIC_MALFORMED_INNER_SRC;
1529		break;
1530	case SADB_X_EXT_ADDRESS_INNER_DST:
1531		rc = SADB_X_DIAGNOSTIC_MALFORMED_INNER_DST;
1532		break;
1533	case SADB_EXT_SA:
1534		rc = SADB_X_DIAGNOSTIC_MALFORMED_SA;
1535		break;
1536	case SADB_EXT_SPIRANGE:
1537		rc = SADB_X_DIAGNOSTIC_MALFORMED_RANGE;
1538		break;
1539	case SADB_EXT_KEY_AUTH:
1540		rc = SADB_X_DIAGNOSTIC_MALFORMED_AKEY;
1541		break;
1542	case SADB_EXT_KEY_ENCRYPT:
1543		rc = SADB_X_DIAGNOSTIC_MALFORMED_EKEY;
1544		break;
1545	}
1546	return (rc);
1547}
1548
1549/*
1550 * Keysock massaging of an inverse ACQUIRE.  Consult policy,
1551 * and construct an appropriate response.
1552 */
1553static void
1554keysock_inverse_acquire(mblk_t *mp, sadb_msg_t *samsg, sadb_ext_t *extv[],
1555    keysock_t *ks)
1556{
1557	mblk_t *reply_mp;
1558	keysock_stack_t	*keystack = ks->keysock_keystack;
1559
1560	/*
1561	 * Reality check things...
1562	 */
1563	if (extv[SADB_EXT_ADDRESS_SRC] == NULL) {
1564		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_SRC);
1565		return;
1566	}
1567	if (extv[SADB_EXT_ADDRESS_DST] == NULL) {
1568		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_MISSING_DST);
1569		return;
1570	}
1571
1572	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] != NULL &&
1573	    extv[SADB_X_EXT_ADDRESS_INNER_DST] == NULL) {
1574		keysock_error(ks, mp, EINVAL,
1575		    SADB_X_DIAGNOSTIC_MISSING_INNER_DST);
1576		return;
1577	}
1578
1579	if (extv[SADB_X_EXT_ADDRESS_INNER_SRC] == NULL &&
1580	    extv[SADB_X_EXT_ADDRESS_INNER_DST] != NULL) {
1581		keysock_error(ks, mp, EINVAL,
1582		    SADB_X_DIAGNOSTIC_MISSING_INNER_SRC);
1583		return;
1584	}
1585
1586	reply_mp = ipsec_construct_inverse_acquire(samsg, extv,
1587	    keystack->keystack_netstack);
1588
1589	if (reply_mp != NULL) {
1590		freemsg(mp);
1591		keysock_passup(reply_mp, (sadb_msg_t *)reply_mp->b_rptr,
1592		    ks->keysock_serial, NULL, B_FALSE, keystack);
1593	} else {
1594		keysock_error(ks, mp, samsg->sadb_msg_errno,
1595		    samsg->sadb_x_msg_diagnostic);
1596	}
1597}
1598
1599/*
1600 * Spew an extended REGISTER down to the relevant consumers.
1601 */
1602static void
1603keysock_extended_register(keysock_t *ks, mblk_t *mp, sadb_ext_t *extv[])
1604{
1605	sadb_x_ereg_t *ereg = (sadb_x_ereg_t *)extv[SADB_X_EXT_EREG];
1606	uint8_t *satypes, *fencepost;
1607	mblk_t *downmp;
1608	sadb_ext_t *downextv[SADB_EXT_MAX + 1];
1609	keysock_stack_t	*keystack = ks->keysock_keystack;
1610
1611	if (ks->keysock_registered[0] != 0 || ks->keysock_registered[1] != 0 ||
1612	    ks->keysock_registered[2] != 0 || ks->keysock_registered[3] != 0) {
1613		keysock_error(ks, mp, EBUSY, 0);
1614	}
1615
1616	ks->keysock_flags |= KEYSOCK_EXTENDED;
1617	if (ereg == NULL) {
1618		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1619	} else {
1620		ASSERT(mp->b_rptr + msgdsize(mp) == mp->b_wptr);
1621		fencepost = (uint8_t *)mp->b_wptr;
1622		satypes = ereg->sadb_x_ereg_satypes;
1623		while (*satypes != SADB_SATYPE_UNSPEC && satypes != fencepost) {
1624			downmp = copymsg(mp);
1625			if (downmp == NULL) {
1626				keysock_error(ks, mp, ENOMEM, 0);
1627				return;
1628			}
1629			/*
1630			 * Since we've made it here, keysock_get_ext will work!
1631			 */
1632			(void) keysock_get_ext(downextv,
1633			    (sadb_msg_t *)downmp->b_rptr, msgdsize(downmp),
1634			    keystack);
1635			keysock_passdown(ks, downmp, *satypes, downextv,
1636			    B_FALSE);
1637			++satypes;
1638		}
1639		freemsg(mp);
1640	}
1641
1642	/*
1643	 * Set global to indicate we prefer an extended ACQUIRE.
1644	 */
1645	atomic_inc_32(&keystack->keystack_num_extended);
1646}
1647
1648static void
1649keysock_delpair_all(keysock_t *ks, mblk_t *mp, sadb_ext_t *extv[])
1650{
1651	int i, start, finish;
1652	mblk_t *mp1 = NULL;
1653	keysock_stack_t *keystack = ks->keysock_keystack;
1654
1655	start = 0;
1656	finish = KEYSOCK_MAX_CONSUMERS - 1;
1657
1658	for (i = start; i <= finish; i++) {
1659		if (keystack->keystack_consumers[i] != NULL) {
1660			mp1 = copymsg(mp);
1661			if (mp1 == NULL) {
1662				keysock_error(ks, mp, ENOMEM,
1663				    SADB_X_DIAGNOSTIC_NONE);
1664				return;
1665			}
1666			keysock_passdown(ks, mp1, i, extv, B_FALSE);
1667		}
1668	}
1669}
1670
1671/*
1672 * Handle PF_KEY messages.
1673 */
1674static void
1675keysock_parse(queue_t *q, mblk_t *mp)
1676{
1677	sadb_msg_t *samsg;
1678	sadb_ext_t *extv[SADB_EXT_MAX + 1];
1679	keysock_t *ks = (keysock_t *)q->q_ptr;
1680	uint_t msgsize;
1681	uint8_t satype;
1682	keysock_stack_t	*keystack = ks->keysock_keystack;
1683
1684	/* Make sure I'm a PF_KEY socket.  (i.e. nothing's below me) */
1685	ASSERT(WR(q)->q_next == NULL);
1686
1687	samsg = (sadb_msg_t *)mp->b_rptr;
1688	ks2dbg(keystack, ("Received possible PF_KEY message, type %d.\n",
1689	    samsg->sadb_msg_type));
1690
1691	msgsize = SADB_64TO8(samsg->sadb_msg_len);
1692
1693	if (msgdsize(mp) != msgsize) {
1694		/*
1695		 * Message len incorrect w.r.t. actual size.  Send an error
1696		 * (EMSGSIZE).	It may be necessary to massage things a
1697		 * bit.	 For example, if the sadb_msg_type is hosed,
1698		 * I need to set it to SADB_RESERVED to get delivery to
1699		 * do the right thing.	Then again, maybe just letting
1700		 * the error delivery do the right thing.
1701		 */
1702		ks2dbg(keystack,
1703		    ("mblk (%lu) and base (%d) message sizes don't jibe.\n",
1704		    msgdsize(mp), msgsize));
1705		keysock_error(ks, mp, EMSGSIZE, SADB_X_DIAGNOSTIC_NONE);
1706		return;
1707	}
1708
1709	if (msgsize > (uint_t)(mp->b_wptr - mp->b_rptr)) {
1710		/* Get all message into one mblk. */
1711		if (pullupmsg(mp, -1) == 0) {
1712			/*
1713			 * Something screwy happened.
1714			 */
1715			ks3dbg(keystack,
1716			    ("keysock_parse: pullupmsg() failed.\n"));
1717			return;
1718		} else {
1719			samsg = (sadb_msg_t *)mp->b_rptr;
1720		}
1721	}
1722
1723	switch (keysock_get_ext(extv, samsg, msgsize, keystack)) {
1724	case KGE_DUP:
1725		/* Handle duplicate extension. */
1726		ks1dbg(keystack, ("Got duplicate extension of type %d.\n",
1727		    extv[0]->sadb_ext_type));
1728		keysock_error(ks, mp, EINVAL,
1729		    keysock_duplicate(extv[0]->sadb_ext_type));
1730		return;
1731	case KGE_UNK:
1732		/* Handle unknown extension. */
1733		ks1dbg(keystack, ("Got unknown extension of type %d.\n",
1734		    extv[0]->sadb_ext_type));
1735		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_EXT);
1736		return;
1737	case KGE_LEN:
1738		/* Length error. */
1739		ks1dbg(keystack,
1740		    ("Length %d on extension type %d overrun or 0.\n",
1741		    extv[0]->sadb_ext_len, extv[0]->sadb_ext_type));
1742		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_BAD_EXTLEN);
1743		return;
1744	case KGE_CHK:
1745		/* Reality check failed. */
1746		ks1dbg(keystack,
1747		    ("Reality check failed on extension type %d.\n",
1748		    extv[0]->sadb_ext_type));
1749		keysock_error(ks, mp, EINVAL,
1750		    keysock_malformed(extv[0]->sadb_ext_type));
1751		return;
1752	default:
1753		/* Default case is no errors. */
1754		break;
1755	}
1756
1757	switch (samsg->sadb_msg_type) {
1758	case SADB_REGISTER:
1759		/*
1760		 * There's a semantic weirdness in that a message OTHER than
1761		 * the return REGISTER message may be passed up if I set the
1762		 * registered bit BEFORE I pass it down.
1763		 *
1764		 * SOOOO, I'll not twiddle any registered bits until I see
1765		 * the upbound REGISTER (with a serial number in it).
1766		 */
1767		if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1768			/* Handle extended register here. */
1769			keysock_extended_register(ks, mp, extv);
1770			return;
1771		} else if (ks->keysock_flags & KEYSOCK_EXTENDED) {
1772			keysock_error(ks, mp, EBUSY, 0);
1773			return;
1774		}
1775		/* FALLTHRU */
1776	case SADB_GETSPI:
1777	case SADB_ADD:
1778	case SADB_UPDATE:
1779	case SADB_X_UPDATEPAIR:
1780	case SADB_DELETE:
1781	case SADB_X_DELPAIR:
1782	case SADB_GET:
1783		/*
1784		 * Pass down to appropriate consumer.
1785		 */
1786		if (samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC)
1787			keysock_passdown(ks, mp, samsg->sadb_msg_satype, extv,
1788			    B_FALSE);
1789		else keysock_error(ks, mp, EINVAL,
1790		    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1791		return;
1792	case SADB_X_DELPAIR_STATE:
1793		if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1794			keysock_delpair_all(ks, mp, extv);
1795		} else {
1796			keysock_passdown(ks, mp, samsg->sadb_msg_satype, extv,
1797			    B_FALSE);
1798		}
1799		return;
1800	case SADB_ACQUIRE:
1801		/*
1802		 * If I _receive_ an acquire, this means I should spread it
1803		 * out to registered sockets.  Unless there's an errno...
1804		 *
1805		 * Need ADDRESS, may have ID, SENS, and PROP, unless errno,
1806		 * in which case there should be NO extensions.
1807		 *
1808		 * Return to registered.
1809		 */
1810		if (samsg->sadb_msg_errno != 0) {
1811			satype = samsg->sadb_msg_satype;
1812			if (satype == SADB_SATYPE_UNSPEC) {
1813				if (!(ks->keysock_flags & KEYSOCK_EXTENDED)) {
1814					keysock_error(ks, mp, EINVAL,
1815					    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1816					return;
1817				}
1818				/*
1819				 * Reassign satype based on the first
1820				 * flags that KEYSOCK_SETREG says.
1821				 */
1822				while (satype <= SADB_SATYPE_MAX) {
1823					if (KEYSOCK_ISREG(ks, satype))
1824						break;
1825					satype++;
1826				}
1827				if (satype > SADB_SATYPE_MAX) {
1828					keysock_error(ks, mp, EBUSY, 0);
1829					return;
1830				}
1831			}
1832			keysock_passdown(ks, mp, satype, extv, B_FALSE);
1833		} else {
1834			if (samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC) {
1835				keysock_error(ks, mp, EINVAL,
1836				    SADB_X_DIAGNOSTIC_SATYPE_NEEDED);
1837			} else {
1838				keysock_passup(mp, samsg, 0, NULL, B_FALSE,
1839				    keystack);
1840			}
1841		}
1842		return;
1843	case SADB_EXPIRE:
1844		/*
1845		 * If someone sends this in, then send out to all senders.
1846		 * (Save maybe ESP or AH, I have to be careful here.)
1847		 *
1848		 * Need ADDRESS, may have ID and SENS.
1849		 *
1850		 * XXX for now this is unsupported.
1851		 */
1852		break;
1853	case SADB_FLUSH:
1854		/*
1855		 * Nuke all SAs.
1856		 *
1857		 * No extensions at all.  Return to all listeners.
1858		 *
1859		 * Question:	Should I hold a lock here to prevent
1860		 *		additions/deletions while flushing?
1861		 * Answer:	No.  (See keysock_passdown() for details.)
1862		 */
1863		if (extv[0] != NULL) {
1864			/*
1865			 * FLUSH messages shouldn't have extensions.
1866			 * Return EINVAL.
1867			 */
1868			ks2dbg(keystack, ("FLUSH message with extension.\n"));
1869			keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_NO_EXT);
1870			return;
1871		}
1872
1873		/* Passing down of DUMP/FLUSH messages are special. */
1874		qwriter(q, mp, keysock_do_flushdump, PERIM_INNER);
1875		return;
1876	case SADB_DUMP:	 /* not used by normal applications */
1877		if ((extv[0] != NULL) &&
1878		    ((msgsize >
1879		    (sizeof (sadb_msg_t) + sizeof (sadb_x_edump_t))) ||
1880		    (extv[SADB_X_EXT_EDUMP] == NULL))) {
1881				keysock_error(ks, mp, EINVAL,
1882				    SADB_X_DIAGNOSTIC_NO_EXT);
1883				return;
1884		}
1885		qwriter(q, mp, keysock_do_flushdump, PERIM_INNER);
1886		return;
1887	case SADB_X_PROMISC:
1888		/*
1889		 * Promiscuous processing message.
1890		 */
1891		if (samsg->sadb_msg_satype == 0)
1892			ks->keysock_flags &= ~KEYSOCK_PROMISC;
1893		else
1894			ks->keysock_flags |= KEYSOCK_PROMISC;
1895		keysock_passup(mp, samsg, ks->keysock_serial, NULL, B_FALSE,
1896		    keystack);
1897		return;
1898	case SADB_X_INVERSE_ACQUIRE:
1899		keysock_inverse_acquire(mp, samsg, extv, ks);
1900		return;
1901	default:
1902		ks2dbg(keystack, ("Got unknown message type %d.\n",
1903		    samsg->sadb_msg_type));
1904		keysock_error(ks, mp, EINVAL, SADB_X_DIAGNOSTIC_UNKNOWN_MSG);
1905		return;
1906	}
1907
1908	/* As a placeholder... */
1909	ks0dbg(("keysock_parse():  Hit EOPNOTSUPP\n"));
1910	keysock_error(ks, mp, EOPNOTSUPP, SADB_X_DIAGNOSTIC_NONE);
1911}
1912
1913/*
1914 * wput routing for PF_KEY/keysock/whatever.  Unlike the routing socket,
1915 * I don't convert to ioctl()'s for IP.  I am the end-all driver as far
1916 * as PF_KEY sockets are concerned.  I do some conversion, but not as much
1917 * as IP/rts does.
1918 */
1919static int
1920keysock_wput(queue_t *q, mblk_t *mp)
1921{
1922	uchar_t *rptr = mp->b_rptr;
1923	mblk_t *mp1;
1924	keysock_t *ks;
1925	keysock_stack_t	*keystack;
1926
1927	if (WR(q)->q_next) {
1928		keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr;
1929		keystack = kc->kc_keystack;
1930
1931		ks3dbg(keystack, ("In keysock_wput\n"));
1932
1933		/*
1934		 * We shouldn't get writes on a consumer instance.
1935		 * But for now, just passthru.
1936		 */
1937		ks1dbg(keystack, ("Huh?  wput for an consumer instance (%d)?\n",
1938		    kc->kc_sa_type));
1939		putnext(q, mp);
1940		return (0);
1941	}
1942	ks = (keysock_t *)q->q_ptr;
1943	keystack = ks->keysock_keystack;
1944
1945	ks3dbg(keystack, ("In keysock_wput\n"));
1946
1947	switch (mp->b_datap->db_type) {
1948	case M_DATA:
1949		/*
1950		 * Silently discard.
1951		 */
1952		ks2dbg(keystack, ("raw M_DATA in keysock.\n"));
1953		freemsg(mp);
1954		return (0);
1955	case M_PROTO:
1956	case M_PCPROTO:
1957		if ((mp->b_wptr - rptr) >= sizeof (struct T_data_req)) {
1958			if (((union T_primitives *)rptr)->type == T_DATA_REQ) {
1959				if ((mp1 = mp->b_cont) == NULL) {
1960					/* No data after T_DATA_REQ. */
1961					ks2dbg(keystack,
1962					    ("No data after DATA_REQ.\n"));
1963					freemsg(mp);
1964					return (0);
1965				}
1966				freeb(mp);
1967				mp = mp1;
1968				ks2dbg(keystack, ("T_DATA_REQ\n"));
1969				break;	/* Out of switch. */
1970			}
1971		}
1972		/* FALLTHRU */
1973	default:
1974		ks3dbg(keystack, ("In default wput case (%d %d).\n",
1975		    mp->b_datap->db_type, ((union T_primitives *)rptr)->type));
1976		keysock_wput_other(q, mp);
1977		return (0);
1978	}
1979
1980	/* I now have a PF_KEY message in an M_DATA block, pointed to by mp. */
1981	keysock_parse(q, mp);
1982	return (0);
1983}
1984
1985/* BELOW THIS LINE ARE ROUTINES INCLUDING AND RELATED TO keysock_rput(). */
1986
1987/*
1988 * Called upon receipt of a KEYSOCK_HELLO_ACK to set up the appropriate
1989 * state vectors.
1990 */
1991static void
1992keysock_link_consumer(uint8_t satype, keysock_consumer_t *kc)
1993{
1994	keysock_t *ks;
1995	keysock_stack_t	*keystack = kc->kc_keystack;
1996
1997	mutex_enter(&keystack->keystack_consumers_lock);
1998	mutex_enter(&kc->kc_lock);
1999	if (keystack->keystack_consumers[satype] != NULL) {
2000		ks0dbg((
2001		    "Hmmmm, someone closed %d before the HELLO_ACK happened.\n",
2002		    satype));
2003		/*
2004		 * Perhaps updating the new below-me consumer with what I have
2005		 * so far would work too?
2006		 */
2007		mutex_exit(&kc->kc_lock);
2008		mutex_exit(&keystack->keystack_consumers_lock);
2009	} else {
2010		/* Add new below-me consumer. */
2011		keystack->keystack_consumers[satype] = kc;
2012
2013		kc->kc_flags = 0;
2014		kc->kc_sa_type = satype;
2015		mutex_exit(&kc->kc_lock);
2016		mutex_exit(&keystack->keystack_consumers_lock);
2017
2018		/* Scan the keysock list. */
2019		mutex_enter(&keystack->keystack_list_lock);
2020		for (ks = keystack->keystack_list; ks != NULL;
2021		    ks = ks->keysock_next) {
2022			if (KEYSOCK_ISREG(ks, satype)) {
2023				/*
2024				 * XXX Perhaps send an SADB_REGISTER down on
2025				 * the socket's behalf.
2026				 */
2027				ks1dbg(keystack,
2028				    ("Socket %u registered already for "
2029				    "new consumer.\n", ks->keysock_serial));
2030			}
2031		}
2032		mutex_exit(&keystack->keystack_list_lock);
2033	}
2034}
2035
2036/*
2037 * Generate a KEYSOCK_OUT_ERR message for my consumer.
2038 */
2039static void
2040keysock_out_err(keysock_consumer_t *kc, int ks_errno, mblk_t *mp)
2041{
2042	keysock_out_err_t *kse;
2043	mblk_t *imp;
2044	keysock_stack_t	*keystack = kc->kc_keystack;
2045
2046	imp = allocb(sizeof (ipsec_info_t), BPRI_HI);
2047	if (imp == NULL) {
2048		ks1dbg(keystack, ("keysock_out_err:  Can't alloc message.\n"));
2049		return;
2050	}
2051
2052	imp->b_datap->db_type = M_CTL;
2053	imp->b_wptr += sizeof (ipsec_info_t);
2054
2055	kse = (keysock_out_err_t *)imp->b_rptr;
2056	imp->b_cont = mp;
2057	kse->ks_err_type = KEYSOCK_OUT_ERR;
2058	kse->ks_err_len = sizeof (*kse);
2059	/* Is serial necessary? */
2060	kse->ks_err_serial = 0;
2061	kse->ks_err_errno = ks_errno;
2062
2063	/*
2064	 * XXX What else do I need to do here w.r.t. information
2065	 * to tell the consumer what caused this error?
2066	 *
2067	 * I believe the answer is the PF_KEY ACQUIRE (or other) message
2068	 * attached in mp, which is appended at the end.  I believe the
2069	 * db_ref won't matter here, because the PF_KEY message is only read
2070	 * for KEYSOCK_OUT_ERR.
2071	 */
2072
2073	putnext(kc->kc_wq, imp);
2074}
2075
2076/* XXX this is a hack errno. */
2077#define	EIPSECNOSA 255
2078
2079/*
2080 * Route message (pointed by mp, header in samsg) toward appropriate
2081 * sockets.  Assume the message's creator did its job correctly.
2082 *
2083 * This should be a function that is followed by a return in its caller.
2084 * The compiler _should_ be able to use tail-call optimizations to make the
2085 * large ## of parameters not a huge deal.
2086 */
2087static void
2088keysock_passup(mblk_t *mp, sadb_msg_t *samsg, minor_t serial,
2089    keysock_consumer_t *kc, boolean_t persistent, keysock_stack_t *keystack)
2090{
2091	keysock_t *ks;
2092	uint8_t satype = samsg->sadb_msg_satype;
2093	boolean_t toall = B_FALSE, allreg = B_FALSE, allereg = B_FALSE,
2094	    setalg = B_FALSE;
2095	mblk_t *mp1;
2096	int err = EIPSECNOSA;
2097
2098	/* Convert mp, which is M_DATA, into an M_PROTO of type T_DATA_IND */
2099	mp1 = allocb(sizeof (struct T_data_req), BPRI_HI);
2100	if (mp1 == NULL) {
2101		err = ENOMEM;
2102		goto error;
2103	}
2104	mp1->b_wptr += sizeof (struct T_data_req);
2105	((struct T_data_ind *)mp1->b_rptr)->PRIM_type = T_DATA_IND;
2106	((struct T_data_ind *)mp1->b_rptr)->MORE_flag = 0;
2107	mp1->b_datap->db_type = M_PROTO;
2108	mp1->b_cont = mp;
2109	mp = mp1;
2110
2111	switch (samsg->sadb_msg_type) {
2112	case SADB_FLUSH:
2113	case SADB_GETSPI:
2114	case SADB_UPDATE:
2115	case SADB_X_UPDATEPAIR:
2116	case SADB_ADD:
2117	case SADB_DELETE:
2118	case SADB_X_DELPAIR:
2119	case SADB_EXPIRE:
2120		/*
2121		 * These are most likely replies.  Don't worry about
2122		 * KEYSOCK_OUT_ERR handling.  Deliver to all sockets.
2123		 */
2124		ks3dbg(keystack,
2125		    ("Delivering normal message (%d) to all sockets.\n",
2126		    samsg->sadb_msg_type));
2127		toall = B_TRUE;
2128		break;
2129	case SADB_REGISTER:
2130		/*
2131		 * REGISTERs come up for one of three reasons:
2132		 *
2133		 *	1.) In response to a normal SADB_REGISTER
2134		 *		(samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
2135		 *		    serial != 0)
2136		 *		Deliver to normal SADB_REGISTERed sockets.
2137		 *	2.) In response to an extended REGISTER
2138		 *		(samsg->sadb_msg_satype == SADB_SATYPE_UNSPEC)
2139		 *		Deliver to extended REGISTERed socket.
2140		 *	3.) Spontaneous algorithm changes
2141		 *		(samsg->sadb_msg_satype != SADB_SATYPE_UNSPEC &&
2142		 *		    serial == 0)
2143		 *		Deliver to REGISTERed sockets of all sorts.
2144		 */
2145		if (kc == NULL) {
2146			/* Here because of keysock_error() call. */
2147			ASSERT(samsg->sadb_msg_errno != 0);
2148			break;	/* Out of switch. */
2149		}
2150		ks3dbg(keystack, ("Delivering REGISTER.\n"));
2151		if (satype == SADB_SATYPE_UNSPEC) {
2152			/* REGISTER Reason #2 */
2153			allereg = B_TRUE;
2154			/*
2155			 * Rewhack SA type so PF_KEY socket holder knows what
2156			 * consumer generated this algorithm list.
2157			 */
2158			satype = kc->kc_sa_type;
2159			samsg->sadb_msg_satype = satype;
2160			setalg = B_TRUE;
2161		} else if (serial == 0) {
2162			/* REGISTER Reason #3 */
2163			allreg = B_TRUE;
2164			allereg = B_TRUE;
2165		} else {
2166			/* REGISTER Reason #1 */
2167			allreg = B_TRUE;
2168			setalg = B_TRUE;
2169		}
2170		break;
2171	case SADB_ACQUIRE:
2172		/*
2173		 * ACQUIREs are either extended (sadb_msg_satype == 0) or
2174		 * regular (sadb_msg_satype != 0).  And we're guaranteed
2175		 * that serial == 0 for an ACQUIRE.
2176		 */
2177		ks3dbg(keystack, ("Delivering ACQUIRE.\n"));
2178		allereg = (satype == SADB_SATYPE_UNSPEC);
2179		allreg = !allereg;
2180		/*
2181		 * Corner case - if we send a regular ACQUIRE and there's
2182		 * extended ones registered, don't send an error down to
2183		 * consumers if nobody's listening and prematurely destroy
2184		 * their ACQUIRE record.  This might be too hackish of a
2185		 * solution.
2186		 */
2187		if (allreg && keystack->keystack_num_extended > 0)
2188			err = 0;
2189		break;
2190	case SADB_X_PROMISC:
2191	case SADB_X_INVERSE_ACQUIRE:
2192	case SADB_DUMP:
2193	case SADB_GET:
2194	default:
2195		/*
2196		 * Deliver to the sender and promiscuous only.
2197		 */
2198		ks3dbg(keystack, ("Delivering sender/promisc only (%d).\n",
2199		    samsg->sadb_msg_type));
2200		break;
2201	}
2202
2203	mutex_enter(&keystack->keystack_list_lock);
2204	for (ks = keystack->keystack_list; ks != NULL; ks = ks->keysock_next) {
2205		/* Delivery loop. */
2206
2207		/*
2208		 * Check special keysock-setting cases (REGISTER replies)
2209		 * here.
2210		 */
2211		if (setalg && serial == ks->keysock_serial) {
2212			ASSERT(kc != NULL);
2213			ASSERT(kc->kc_sa_type == satype);
2214			KEYSOCK_SETREG(ks, satype);
2215		}
2216
2217		/*
2218		 * NOLOOP takes precedence over PROMISC.  So if you've set
2219		 * !SO_USELOOPBACK, don't expect to see any data...
2220		 */
2221		if (ks->keysock_flags & KEYSOCK_NOLOOP)
2222			continue;
2223
2224		/*
2225		 * Messages to all, or promiscuous sockets just GET the
2226		 * message.  Perform rules-type checking iff it's not for all
2227		 * listeners or the socket is in promiscuous mode.
2228		 *
2229		 * NOTE:Because of the (kc != NULL && ISREG()), make sure
2230		 *	extended ACQUIREs arrive off a consumer that is
2231		 *	part of the extended REGISTER set of consumers.
2232		 */
2233		if (serial != ks->keysock_serial &&
2234		    !toall &&
2235		    !(ks->keysock_flags & KEYSOCK_PROMISC) &&
2236		    !((ks->keysock_flags & KEYSOCK_EXTENDED) ?
2237		    allereg : allreg && kc != NULL &&
2238		    KEYSOCK_ISREG(ks, kc->kc_sa_type)))
2239			continue;
2240
2241		mp1 = dupmsg(mp);
2242		if (mp1 == NULL) {
2243			ks2dbg(keystack, (
2244			    "keysock_passup():  dupmsg() failed.\n"));
2245			mp1 = mp;
2246			mp = NULL;
2247			err = ENOMEM;
2248		}
2249
2250		/*
2251		 * At this point, we can deliver or attempt to deliver
2252		 * this message.  We're free of obligation to report
2253		 * no listening PF_KEY sockets.  So set err to 0.
2254		 */
2255		err = 0;
2256
2257		/*
2258		 * See if we canputnext(), as well as see if the message
2259		 * needs to be queued if we can't.
2260		 */
2261		if (!canputnext(ks->keysock_rq)) {
2262			if (persistent) {
2263				if (putq(ks->keysock_rq, mp1) == 0) {
2264					ks1dbg(keystack, (
2265					    "keysock_passup: putq failed.\n"));
2266				} else {
2267					continue;
2268				}
2269			}
2270			freemsg(mp1);
2271			continue;
2272		}
2273
2274		ks3dbg(keystack,
2275		    ("Putting to serial %d.\n", ks->keysock_serial));
2276		/*
2277		 * Unlike the specific keysock instance case, this
2278		 * will only hit for listeners, so we will only
2279		 * putnext() if we can.
2280		 */
2281		putnext(ks->keysock_rq, mp1);
2282		if (mp == NULL)
2283			break;	/* out of for loop. */
2284	}
2285	mutex_exit(&keystack->keystack_list_lock);
2286
2287error:
2288	if ((err != 0) && (kc != NULL)) {
2289		/*
2290		 * Generate KEYSOCK_OUT_ERR for consumer.
2291		 * Basically, I send this back if I have not been able to
2292		 * transmit (for whatever reason)
2293		 */
2294		ks1dbg(keystack,
2295		    ("keysock_passup():  No registered of type %d.\n",
2296		    satype));
2297		if (mp != NULL) {
2298			if (mp->b_datap->db_type == M_PROTO) {
2299				mp1 = mp;
2300				mp = mp->b_cont;
2301				freeb(mp1);
2302			}
2303			/*
2304			 * Do a copymsg() because people who get
2305			 * KEYSOCK_OUT_ERR may alter the message contents.
2306			 */
2307			mp1 = copymsg(mp);
2308			if (mp1 == NULL) {
2309				ks2dbg(keystack,
2310				    ("keysock_passup: copymsg() failed.\n"));
2311				mp1 = mp;
2312				mp = NULL;
2313			}
2314			keysock_out_err(kc, err, mp1);
2315		}
2316	}
2317
2318	/*
2319	 * XXX Blank the message somehow.  This is difficult because we don't
2320	 * know at this point if the message has db_ref > 1, etc.
2321	 *
2322	 * Optimally, keysock messages containing actual keying material would
2323	 * be allocated with esballoc(), with a zeroing free function.
2324	 */
2325	if (mp != NULL)
2326		freemsg(mp);
2327}
2328
2329/*
2330 * Keysock's read service procedure is there only for PF_KEY reply
2331 * messages that really need to reach the top.
2332 */
2333static int
2334keysock_rsrv(queue_t *q)
2335{
2336	mblk_t *mp;
2337
2338	while ((mp = getq(q)) != NULL) {
2339		if (canputnext(q)) {
2340			putnext(q, mp);
2341		} else {
2342			(void) putbq(q, mp);
2343			return (0);
2344		}
2345	}
2346	return (0);
2347}
2348
2349/*
2350 * The read procedure should only be invoked by a keysock consumer, like
2351 * ESP, AH, etc.  I should only see KEYSOCK_OUT and KEYSOCK_HELLO_ACK
2352 * messages on my read queues.
2353 */
2354static int
2355keysock_rput(queue_t *q, mblk_t *mp)
2356{
2357	keysock_consumer_t *kc = (keysock_consumer_t *)q->q_ptr;
2358	ipsec_info_t *ii;
2359	keysock_hello_ack_t *ksa;
2360	minor_t serial;
2361	mblk_t *mp1;
2362	sadb_msg_t *samsg;
2363	keysock_stack_t	*keystack = kc->kc_keystack;
2364
2365	/* Make sure I'm a consumer instance.  (i.e. something's below me) */
2366	ASSERT(WR(q)->q_next != NULL);
2367
2368	if (mp->b_datap->db_type != M_CTL) {
2369		/*
2370		 * Keysock should only see keysock consumer interface
2371		 * messages (see ipsec_info.h) on its read procedure.
2372		 * To be robust, however, putnext() up so the STREAM head can
2373		 * deal with it appropriately.
2374		 */
2375		ks1dbg(keystack,
2376		    ("Hmmm, a non M_CTL (%d, 0x%x) on keysock_rput.\n",
2377		    mp->b_datap->db_type, mp->b_datap->db_type));
2378		putnext(q, mp);
2379		return (0);
2380	}
2381
2382	ii = (ipsec_info_t *)mp->b_rptr;
2383
2384	switch (ii->ipsec_info_type) {
2385	case KEYSOCK_OUT:
2386		/*
2387		 * A consumer needs to pass a response message or an ACQUIRE
2388		 * UP.  I assume that the consumer has done the right
2389		 * thing w.r.t. message creation, etc.
2390		 */
2391		serial = ((keysock_out_t *)mp->b_rptr)->ks_out_serial;
2392		mp1 = mp->b_cont;	/* Get M_DATA portion. */
2393		freeb(mp);
2394		samsg = (sadb_msg_t *)mp1->b_rptr;
2395		if (samsg->sadb_msg_type == SADB_FLUSH ||
2396		    (samsg->sadb_msg_type == SADB_DUMP &&
2397		    samsg->sadb_msg_len == SADB_8TO64(sizeof (*samsg)))) {
2398			/*
2399			 * If I'm an end-of-FLUSH or an end-of-DUMP marker...
2400			 */
2401			ASSERT(keystack->keystack_flushdump != 0);
2402						/* Am I flushing? */
2403
2404			mutex_enter(&kc->kc_lock);
2405			kc->kc_flags &= ~KC_FLUSHING;
2406			mutex_exit(&kc->kc_lock);
2407
2408			if (samsg->sadb_msg_errno != 0)
2409				keystack->keystack_flushdump_errno =
2410				    samsg->sadb_msg_errno;
2411
2412			/*
2413			 * Lower the atomic "flushing" count.  If it's
2414			 * the last one, send up the end-of-{FLUSH,DUMP} to
2415			 * the appropriate PF_KEY socket.
2416			 */
2417			if (atomic_dec_32_nv(&keystack->keystack_flushdump) !=
2418			    0) {
2419				ks1dbg(keystack,
2420				    ("One flush/dump message back from %d,"
2421				    " more to go.\n", samsg->sadb_msg_satype));
2422				freemsg(mp1);
2423				return (0);
2424			}
2425
2426			samsg->sadb_msg_errno =
2427			    (uint8_t)keystack->keystack_flushdump_errno;
2428			if (samsg->sadb_msg_type == SADB_DUMP) {
2429				samsg->sadb_msg_seq = 0;
2430			}
2431		}
2432		keysock_passup(mp1, samsg, serial, kc,
2433		    (samsg->sadb_msg_type == SADB_DUMP), keystack);
2434		return (0);
2435	case KEYSOCK_HELLO_ACK:
2436		/* Aha, now we can link in the consumer! */
2437		ksa = (keysock_hello_ack_t *)ii;
2438		keysock_link_consumer(ksa->ks_hello_satype, kc);
2439		freemsg(mp);
2440		return (0);
2441	default:
2442		ks1dbg(keystack, ("Hmmm, an IPsec info I'm not used to, 0x%x\n",
2443		    ii->ipsec_info_type));
2444		putnext(q, mp);
2445	}
2446	return (0);
2447}
2448
2449/*
2450 * So we can avoid external linking problems....
2451 */
2452boolean_t
2453keysock_extended_reg(netstack_t *ns)
2454{
2455	keysock_stack_t	*keystack = ns->netstack_keysock;
2456
2457	return (keystack->keystack_num_extended != 0);
2458}
2459
2460uint32_t
2461keysock_next_seq(netstack_t *ns)
2462{
2463	keysock_stack_t	*keystack = ns->netstack_keysock;
2464
2465	return (atomic_dec_32_nv(&keystack->keystack_acquire_seq));
2466}
2467