1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25#include <sys/systm.h>
26#include <sys/sysmacros.h>
27#include <sys/cmn_err.h>
28#include <sys/disp.h>
29#include <sys/list.h>
30#include <sys/mutex.h>
31#include <sys/note.h>
32#include <sys/rwlock.h>
33#include <sys/stropts.h>
34#include <sys/taskq.h>
35#include <sys/socketvar.h>
36#include <fs/sockfs/sockcommon.h>
37#include <fs/sockfs/sockfilter_impl.h>
38
39/*
40 * Socket Filter Framework
41 *
42 * Socket filter entry (sof_entry_t):
43 *
44 *   There exists one entry for each configured filter (done via soconfig(1M)),
45 *   and they are all in sof_entry_list. In addition to the global list, each
46 *   sockparams entry maintains a list of filters that is interested in that
47 *   particular socket type. So the filter entry may be referenced by multiple
48 *   sockparams. The set of sockparams referencing a filter may change as
49 *   socket types are added and/or removed from the system. Both sof_entry_list
50 *   and the sockparams list is protected by sockconf_lock.
51 *
52 *   Each filter entry has a ref count which is incremented whenever a filter
53 *   is attached to a socket. An entry is marked SOFEF_CONDEMED when it is
54 *   unconfigured, which will result in the entry being freed when its ref
55 *   count reaches zero.
56 *
57 * Socket filter module (sof_module_t):
58 *
59 *   Modules are created by sof_register() and placed in sof_module_list,
60 *   which is protected by sof_module_lock. Each module has a reference count
61 *   that is incremented when a filter entry is using the module. A module
62 *   can be destroyed by sof_unregister() only when its ref count is zero.
63 *
64 * Socket filter instance (sof_instance_t):
65 *
66 *   Whenever a filter is attached to a socket (sonode), a new instance is
67 *   created. The socket is guaranteed to be single threaded when filters are
68 *   being attached/detached. The instance uses the sonode's so_lock for
69 *   protection.
70 *
71 *   The lifetime of an instance is the same as the socket it's attached to.
72 *
73 * How things link together:
74 *
75 *      sockparams.sp_{auto,prog}_filters -> sp_filter_t -> sp_filter_t
76 *      ^                                    |              |
77 *      |                                    |              |
78 *   sonode.so_filter_top -> sof_instance_t  |              |
79 *                                     |     |              |
80 *                                     v     v              v
81 *    sof_entry_list -> sof_entry_t -> sof_entry -> ... -> sof_entry_t
82 *                                     |
83 *                                     v
84 *           sof_module_list -> sof_module_t -> ... -> sof_module_t
85 */
86
87static list_t 	sof_entry_list;		/* list of configured filters */
88
89static list_t	sof_module_list;	/* list of loaded filter modules */
90static kmutex_t	sof_module_lock;	/* protect the module list */
91
92static sof_kstat_t	sof_stat;
93static kstat_t 		*sof_stat_ksp;
94
95#ifdef DEBUG
96static int socket_filter_debug = 0;
97#endif
98
99/*
100 * A connection that has been deferred for more than `sof_defer_drop_time'
101 * ticks can be dropped to make room for new connections. A connection that
102 * is to be dropped is moved over to `sof_close_deferred_list' where it will
103 * be closed by sof_close_deferred() (which is running on a taskq). Connections
104 * will not be moved over to the close list if it grows larger than
105 * `sof_close_deferred_max_backlog'.
106 */
107clock_t		sof_defer_drop_time = 3000;
108uint_t		sof_close_deferred_max_backlog = 1000;
109
110taskq_t		*sof_close_deferred_taskq;
111boolean_t	sof_close_deferred_running;
112uint_t		sof_close_deferred_backlog;
113list_t		sof_close_deferred_list;
114kmutex_t	sof_close_deferred_lock;
115
116static void	sof_close_deferred(void *);
117
118static void		sof_module_rele(sof_module_t *);
119static sof_module_t 	*sof_module_hold_by_name(const char *, const char *);
120
121static int		sof_entry_load_module(sof_entry_t *);
122static void 		sof_entry_hold(sof_entry_t *);
123static void 		sof_entry_rele(sof_entry_t *);
124static int 		sof_entry_kstat_create(sof_entry_t *);
125static void 		sof_entry_kstat_destroy(sof_entry_t *);
126
127static sof_instance_t 	*sof_instance_create(sof_entry_t *, struct sonode *);
128static void		sof_instance_destroy(sof_instance_t *);
129
130static int
131sof_kstat_update(kstat_t *ksp, int rw)
132{
133	_NOTE(ARGUNUSED(ksp));
134
135	if (rw == KSTAT_WRITE)
136		return (EACCES);
137
138	sof_stat.sofks_defer_close_backlog.value.ui64 =
139	    sof_close_deferred_backlog;
140
141	return (0);
142}
143
144void
145sof_init(void)
146{
147	list_create(&sof_entry_list, sizeof (sof_entry_t),
148	    offsetof(sof_entry_t, sofe_node));
149	list_create(&sof_module_list, sizeof (sof_module_t),
150	    offsetof(sof_module_t, sofm_node));
151	list_create(&sof_close_deferred_list, sizeof (struct sonode),
152	    offsetof(struct sonode, so_acceptq_node));
153
154	sof_close_deferred_taskq = taskq_create("sof_close_deferred_taskq",
155	    1, minclsyspri, 1, INT_MAX, TASKQ_PREPOPULATE);
156	sof_close_deferred_running = B_FALSE;
157	sof_close_deferred_backlog = 0;
158
159	mutex_init(&sof_close_deferred_lock, NULL, MUTEX_DEFAULT, 0);
160	mutex_init(&sof_module_lock, NULL, MUTEX_DEFAULT, 0);
161
162	sof_stat_ksp = kstat_create("sockfs", 0, "sockfilter", "misc",
163	    KSTAT_TYPE_NAMED, sizeof (sof_kstat_t) / sizeof (kstat_named_t),
164	    KSTAT_FLAG_VIRTUAL);
165
166	if (sof_stat_ksp == NULL)
167		return;
168
169	kstat_named_init(&sof_stat.sofks_defer_closed, "defer_closed",
170	    KSTAT_DATA_UINT64);
171	kstat_named_init(&sof_stat.sofks_defer_close_backlog,
172	    "defer_close_backlog", KSTAT_DATA_UINT64);
173	kstat_named_init(&sof_stat.sofks_defer_close_failed_backlog_too_big,
174	    "defer_close_failed_backlog_too_big", KSTAT_DATA_UINT64);
175
176	sof_stat_ksp->ks_data = &sof_stat;
177	sof_stat_ksp->ks_update = sof_kstat_update;
178	kstat_install(sof_stat_ksp);
179}
180
181/*
182 * Process filter options.
183 */
184static int
185sof_setsockopt_impl(struct sonode *so, int option_name,
186    const void *optval, socklen_t optlen, struct cred *cr)
187{
188	struct sockparams *sp = so->so_sockparams;
189	sof_entry_t *ent = NULL;
190	sp_filter_t *fil;
191	sof_instance_t *inst;
192	sof_rval_t rval;
193	int error;
194
195	_NOTE(ARGUNUSED(optlen));
196
197	/*
198	 * Is the filter in a state where filters can be attached?
199	 */
200	if (!(so->so_state & SS_FILOP_OK))
201		return (EINVAL);
202
203	if (option_name == FIL_ATTACH) {
204		/*
205		 * Make sure there isn't already another instance of the
206		 * same filter attached to the socket.
207		 */
208		for (inst = so->so_filter_top; inst != NULL;
209		    inst = inst->sofi_next) {
210			if (strncmp(inst->sofi_filter->sofe_name,
211			    (const char *)optval, SOF_MAXNAMELEN) == 0)
212				return (EEXIST);
213		}
214		/* Look up the filter. */
215		rw_enter(&sockconf_lock, RW_READER);
216		for (fil = list_head(&sp->sp_prog_filters); fil != NULL;
217		    fil = list_next(&sp->sp_prog_filters, fil)) {
218			ent = fil->spf_filter;
219			ASSERT(ent->sofe_flags & SOFEF_PROG);
220
221			if (strncmp(ent->sofe_name, (const char *)optval,
222			    SOF_MAXNAMELEN) == 0)
223				break;
224		}
225		/* No such filter */
226		if (fil == NULL) {
227			rw_exit(&sockconf_lock);
228			return (ENOENT);
229		}
230		inst = sof_instance_create(ent, so);
231		rw_exit(&sockconf_lock);
232
233		/* Failed to create an instance; must be out of memory */
234		if (inst == NULL)
235			return (ENOMEM);
236
237		/*
238		 * This might be the first time the filter is being used,
239		 * so try to load the module if it's not already registered.
240		 */
241		if (ent->sofe_mod == NULL &&
242		    (error = sof_entry_load_module(ent)) != 0) {
243			sof_instance_destroy(inst);
244			return (error);
245		}
246
247		/* Module loaded OK, so there must be an ops vector */
248		ASSERT(ent->sofe_mod != NULL);
249		inst->sofi_ops = &ent->sofe_mod->sofm_ops;
250
251		SOF_STAT_ADD(inst, tot_active_attach, 1);
252		if (inst->sofi_ops->sofop_attach_active != NULL) {
253			rval = inst->sofi_ops->sofop_attach_active(
254			    (sof_handle_t)inst, so->so_family, so->so_type,
255			    so->so_protocol, cr, &inst->sofi_cookie);
256			if (rval != SOF_RVAL_CONTINUE) {
257				switch (rval) {
258				case SOF_RVAL_DETACH:
259					/*
260					 * Filter does not want to to attach.
261					 * An error is returned so the user
262					 * knows the request did not go
263					 * through.
264					 */
265					error = EINVAL;
266					break;
267				default:
268					SOF_STAT_ADD(inst, attach_failures, 1);
269					/* Not a valid rval for active attach */
270					ASSERT(rval != SOF_RVAL_DEFER);
271					error = sof_rval2errno(rval);
272					break;
273				}
274				sof_instance_destroy(inst);
275				return (error);
276			}
277		}
278		return (0);
279	} else if (option_name == FIL_DETACH) {
280		for (inst = so->so_filter_top; inst != NULL;
281		    inst = inst->sofi_next) {
282
283			ent = inst->sofi_filter;
284			if (strncmp(ent->sofe_name, (const char *)optval,
285			    SOF_MAXNAMELEN) == 0)
286				break;
287		}
288		if (inst == NULL)
289			return (ENXIO);
290
291		/* automatic filters cannot be detached */
292		if (inst->sofi_filter->sofe_flags & SOFEF_AUTO)
293			return (EINVAL);
294
295		if (inst->sofi_ops->sofop_detach != NULL)
296			inst->sofi_ops->sofop_detach((sof_handle_t)inst,
297			    inst->sofi_cookie, cr);
298		sof_instance_destroy(inst);
299
300		return (0);
301	} else {
302		return (EINVAL);
303	}
304}
305
306int
307sof_setsockopt(struct sonode *so, int option_name,
308    const void *optval, socklen_t optlen, struct cred *cr)
309{
310	int error;
311
312	/*
313	 * By grabbing the lock as a writer we ensure that no other socket
314	 * operations can start while the filter stack is being manipulated.
315	 *
316	 * We do a tryenter so that in case there is an active thread we
317	 * ask the caller to try again instead of blocking here until the
318	 * other thread is done (which could be indefinitely in case of recv).
319	 */
320	if (!rw_tryenter(&so->so_fallback_rwlock, RW_WRITER)) {
321		return (EAGAIN);
322	}
323
324	/* Bail out if a fallback has taken place */
325	if (so->so_state & SS_FALLBACK_COMP)
326		error = EINVAL;
327	else
328		error = sof_setsockopt_impl(so, option_name, optval,
329		    optlen, cr);
330	rw_exit(&so->so_fallback_rwlock);
331
332	return (error);
333}
334
335/*
336 * Get filter socket options.
337 */
338static int
339sof_getsockopt_impl(struct sonode *so, int option_name,
340    void *optval, socklen_t *optlenp, struct cred *cr)
341{
342	sof_instance_t *inst;
343	struct fil_info *fi;
344	socklen_t maxsz = *optlenp;
345	int i;
346	uint_t cnt;
347
348	_NOTE(ARGUNUSED(cr));
349
350	if (option_name == FIL_LIST) {
351		fi = (struct fil_info *)optval;
352
353		if (maxsz < sizeof (*fi))
354			return (EINVAL);
355
356		for (inst = so->so_filter_top, cnt = 0; inst != NULL;
357		    inst = inst->sofi_next)
358			cnt++;
359		for (inst = so->so_filter_top, i = 0;
360		    inst != NULL && (i+1) * sizeof (*fi) <= maxsz;
361		    inst = inst->sofi_next, i++) {
362			fi[i].fi_flags =
363			    (inst->sofi_filter->sofe_flags & SOFEF_AUTO) ?
364			    FILF_AUTO : FILF_PROG;
365			if (inst->sofi_flags & SOFIF_BYPASS)
366				fi[i].fi_flags |= FILF_BYPASS;
367			(void) strncpy(fi[i].fi_name,
368			    inst->sofi_filter->sofe_name, FILNAME_MAX);
369			ASSERT(cnt > 0);
370			fi[i].fi_pos = --cnt;
371		}
372		*optlenp = i * sizeof (*fi);
373		return (0);
374	} else {
375		return (EINVAL);
376	}
377}
378
379int
380sof_getsockopt(struct sonode *so, int option_name,
381    void *optval, socklen_t *optlenp, struct cred *cr)
382{
383	int error;
384
385	/*
386	 * The fallback lock is used here to serialize set and get
387	 * filter operations.
388	 */
389	rw_enter(&so->so_fallback_rwlock, RW_READER);
390	if (so->so_state & SS_FALLBACK_COMP)
391		error = EINVAL;
392	else
393		error = sof_getsockopt_impl(so, option_name, optval, optlenp,
394		    cr);
395	rw_exit(&so->so_fallback_rwlock);
396
397	return (error);
398}
399
400/*
401 * The socket `so' wants to inherit the filter stack from `pso'.
402 * Returns 0 if all went well or an errno otherwise.
403 */
404int
405sof_sonode_inherit_filters(struct sonode *so, struct sonode *pso)
406{
407	sof_instance_t *inst, *pinst;
408	sof_rval_t rval;
409	int error;
410	struct sockaddr_in6 laddrbuf, faddrbuf;
411	struct sockaddr_in6 *laddr, *faddr;
412	socklen_t laddrlen, faddrlen;
413
414	/*
415	 * Make sure there is enough room to retrieve the addresses
416	 */
417	if (so->so_proto_props.sopp_maxaddrlen > sizeof (laddrbuf)) {
418		laddr = kmem_zalloc(so->so_proto_props.sopp_maxaddrlen,
419		    KM_NOSLEEP);
420		if (laddr == NULL)
421			return (ENOMEM);
422		faddr = kmem_zalloc(so->so_proto_props.sopp_maxaddrlen,
423		    KM_NOSLEEP);
424		if (faddr == NULL) {
425			kmem_free(laddr, so->so_proto_props.sopp_maxaddrlen);
426			return (ENOMEM);
427		}
428		laddrlen = faddrlen = so->so_proto_props.sopp_maxaddrlen;
429	} else {
430		laddrlen = faddrlen = sizeof (laddrbuf);
431		laddr = &laddrbuf;
432		faddr = &faddrbuf;
433	}
434
435	error = (*so->so_downcalls->sd_getpeername)
436	    (so->so_proto_handle, (struct sockaddr *)faddr, &faddrlen, kcred);
437	if (error != 0)
438		goto out;
439	error = (*so->so_downcalls->sd_getsockname)
440	    (so->so_proto_handle, (struct sockaddr *)laddr, &laddrlen, kcred);
441	if (error != 0)
442		goto out;
443
444	/*
445	 * The stack is built bottom up. Filters are allowed to modify the
446	 * the foreign and local addresses during attach.
447	 */
448	for (pinst = pso->so_filter_bottom;
449	    pinst != NULL && !(pinst->sofi_flags & SOFIF_BYPASS);
450	    pinst = pinst->sofi_prev) {
451		inst = sof_instance_create(pinst->sofi_filter, so);
452		if (inst == NULL) {
453			error = ENOMEM;
454			goto out;
455		}
456		/*
457		 * The filter module must be loaded since it's already
458		 * attached to the listener.
459		 */
460		ASSERT(pinst->sofi_ops != NULL);
461		inst->sofi_ops = pinst->sofi_ops;
462
463		SOF_STAT_ADD(inst, tot_passive_attach, 1);
464		if (inst->sofi_ops->sofop_attach_passive != NULL) {
465			rval = inst->sofi_ops->sofop_attach_passive(
466			    (sof_handle_t)inst,
467			    (sof_handle_t)pinst, pinst->sofi_cookie,
468			    (struct sockaddr *)laddr, laddrlen,
469			    (struct sockaddr *)faddr, faddrlen,
470			    &inst->sofi_cookie);
471			if (rval != SOF_RVAL_CONTINUE) {
472				if (rval == SOF_RVAL_DEFER) {
473					mutex_enter(&so->so_lock);
474					inst->sofi_flags |= SOFIF_DEFER;
475					so->so_state |= SS_FIL_DEFER;
476					mutex_exit(&so->so_lock);
477					so->so_filter_defertime =
478					    ddi_get_lbolt();
479					SOF_STAT_ADD(inst, ndeferred, 1);
480				} else if (rval == SOF_RVAL_DETACH) {
481					sof_instance_destroy(inst);
482				} else {
483					SOF_STAT_ADD(inst, attach_failures, 1);
484					error = sof_rval2errno(rval);
485					/*
486					 * Filters that called attached will be
487					 * destroyed when the socket goes away,
488					 * after detach is called.
489					 */
490					goto out;
491				}
492			}
493		}
494	}
495
496out:
497	if (laddr != &laddrbuf) {
498		kmem_free(laddr, so->so_proto_props.sopp_maxaddrlen);
499		kmem_free(faddr, so->so_proto_props.sopp_maxaddrlen);
500	}
501	return (error);
502}
503
504/*
505 * Attach any automatic filters to sonode `so'. Returns 0 if all went well
506 * and an errno otherwise.
507 */
508int
509sof_sonode_autoattach_filters(struct sonode *so, cred_t *cr)
510{
511	struct sockparams *sp = so->so_sockparams;
512	sp_filter_t *fil;
513	sof_instance_t *inst;
514	sof_rval_t rval;
515	int error;
516
517	/*
518	 * A created instance is added to the top of the sonode's filter
519	 * stack, so traverse the config list in reverse order.
520	 */
521	rw_enter(&sockconf_lock, RW_READER);
522	for (fil = list_tail(&sp->sp_auto_filters);
523	    fil != NULL; fil = list_prev(&sp->sp_auto_filters, fil)) {
524		ASSERT(fil->spf_filter->sofe_flags & SOFEF_AUTO);
525		if (!sof_instance_create(fil->spf_filter, so)) {
526			rw_exit(&sockconf_lock);
527			error = ENOMEM; /* must have run out of memory */
528			goto free_all;
529		}
530	}
531	rw_exit(&sockconf_lock);
532
533	/*
534	 * Notify each filter that it's being attached.
535	 */
536	inst = so->so_filter_top;
537	while (inst != NULL) {
538		sof_entry_t *ent = inst->sofi_filter;
539		sof_instance_t *ninst = inst->sofi_next;
540
541		/*
542		 * This might be the first time the filter is being used,
543		 * so try to load the module if it's not already registered.
544		 */
545		if (ent->sofe_mod == NULL &&
546		    (error = sof_entry_load_module(ent)) != 0)
547			goto free_detached;
548
549		/* Module loaded OK, so there must be an ops vector */
550		ASSERT(ent->sofe_mod != NULL);
551		inst->sofi_ops = &ent->sofe_mod->sofm_ops;
552
553		SOF_STAT_ADD(inst, tot_active_attach, 1);
554		if (inst->sofi_ops->sofop_attach_active != NULL) {
555			rval = inst->sofi_ops->sofop_attach_active(
556			    (sof_handle_t)inst, so->so_family, so->so_type,
557			    so->so_protocol, cr, &inst->sofi_cookie);
558			if (rval != SOF_RVAL_CONTINUE) {
559				switch (rval) {
560				case SOF_RVAL_DETACH:
561					/* filter does not want to attach */
562					sof_instance_destroy(inst);
563					break;
564				default:
565					SOF_STAT_ADD(inst, attach_failures, 1);
566					/* Not a valid rval for active attach */
567					ASSERT(rval != SOF_RVAL_DEFER);
568					error = sof_rval2errno(rval);
569					goto free_detached;
570				}
571			}
572		}
573		inst = ninst;
574	}
575	return (0);
576
577free_all:
578	inst = so->so_filter_top;
579free_detached:
580	ASSERT(inst != NULL);
581	/*
582	 * Destroy all filters for which attach was not called. The other
583	 * filters will be destroyed (and detach called) when the socket
584	 * is freed.
585	 */
586	do {
587		sof_instance_t *t = inst->sofi_next;
588		sof_instance_destroy(inst);
589		inst = t;
590	} while (inst != NULL);
591
592	return (error);
593}
594
595/*
596 * Detaches and frees all filters attached to sonode `so'.
597 */
598void
599sof_sonode_cleanup(struct sonode *so)
600{
601	sof_instance_t *inst;
602
603	while ((inst = so->so_filter_top) != NULL) {
604		(inst->sofi_ops->sofop_detach)((sof_handle_t)inst,
605		    inst->sofi_cookie, kcred);
606		sof_instance_destroy(inst);
607	}
608}
609
610/*
611 * Notifies all active filters attached to `so' about the `event' and
612 * where `arg' is an event specific argument.
613 */
614void
615sof_sonode_notify_filters(struct sonode *so, sof_event_t event, uintptr_t arg)
616{
617	sof_instance_t *inst;
618
619	for (inst = so->so_filter_bottom; inst != NULL;
620	    inst = inst->sofi_prev) {
621		if (SOF_INTERESTED(inst, notify))
622			(inst->sofi_ops->sofop_notify)((sof_handle_t)inst,
623			    inst->sofi_cookie, event, arg);
624	}
625}
626
627/*
628 * The socket `so' is closing. Notify filters and make sure that there
629 * are no pending tx operations.
630 */
631void
632sof_sonode_closing(struct sonode *so)
633{
634	/*
635	 * Notify filters that the socket is being closed. It's OK for
636	 * filters to inject data.
637	 */
638	sof_sonode_notify_filters(so, SOF_EV_CLOSING, (uintptr_t)B_TRUE);
639
640	/*
641	 * Stop any future attempts to inject data, and wait for any
642	 * pending operations to complete. This has to be done to ensure
643	 * that no data is sent down to the protocol once a close
644	 * downcall has been made.
645	 */
646	mutex_enter(&so->so_lock);
647	so->so_state |= SS_FIL_STOP;
648	while (so->so_filter_tx > 0)
649		cv_wait(&so->so_closing_cv, &so->so_lock);
650	mutex_exit(&so->so_lock);
651}
652
653/*
654 * Called when socket `so' wants to get rid of a deferred connection.
655 * Returns TRUE if a connection was dropped.
656 */
657boolean_t
658sof_sonode_drop_deferred(struct sonode *so)
659{
660	struct sonode *def;
661	clock_t now = ddi_get_lbolt();
662
663	if (sof_close_deferred_backlog > sof_close_deferred_max_backlog) {
664		SOF_GLOBAL_STAT_BUMP(defer_close_failed_backlog_too_big);
665		return (B_FALSE);
666	}
667	mutex_enter(&so->so_acceptq_lock);
668	if ((def = list_head(&so->so_acceptq_defer)) != NULL &&
669	    (now - def->so_filter_defertime) > sof_defer_drop_time) {
670		list_remove(&so->so_acceptq_defer, def);
671		so->so_acceptq_len--;
672		mutex_exit(&so->so_acceptq_lock);
673		def->so_listener = NULL;
674	} else {
675		mutex_exit(&so->so_acceptq_lock);
676		return (B_FALSE);
677	}
678
679	mutex_enter(&sof_close_deferred_lock);
680	list_insert_tail(&sof_close_deferred_list, def);
681	sof_close_deferred_backlog++;
682	if (!sof_close_deferred_running) {
683		mutex_exit(&sof_close_deferred_lock);
684		(void) taskq_dispatch(sof_close_deferred_taskq,
685		    sof_close_deferred, NULL, TQ_NOSLEEP);
686	} else {
687		mutex_exit(&sof_close_deferred_lock);
688	}
689	return (B_TRUE);
690}
691
692/*
693 * Called from a taskq to close connections that have been deferred for
694 * too long.
695 */
696void
697sof_close_deferred(void *unused)
698{
699	struct sonode *drop;
700
701	_NOTE(ARGUNUSED(unused));
702
703	mutex_enter(&sof_close_deferred_lock);
704	if (!sof_close_deferred_running) {
705		sof_close_deferred_running = B_TRUE;
706		while ((drop =
707		    list_remove_head(&sof_close_deferred_list)) != NULL) {
708			sof_close_deferred_backlog--;
709			mutex_exit(&sof_close_deferred_lock);
710
711			SOF_GLOBAL_STAT_BUMP(defer_closed);
712			(void) socket_close(drop, 0, kcred);
713			socket_destroy(drop);
714
715			mutex_enter(&sof_close_deferred_lock);
716		}
717		sof_close_deferred_running = B_FALSE;
718		ASSERT(sof_close_deferred_backlog == 0);
719	}
720	mutex_exit(&sof_close_deferred_lock);
721}
722
723/*
724 * Creates a new filter instance from the entry `ent' and attaches
725 * it to the sonode `so'. On success, return a pointer to the created
726 * instance.
727 *
728 * The new instance will be placed on the top of the filter stack.
729 *
730 * The caller is responsible for assigning the instance's ops vector and
731 * calling the filter's attach callback.
732 *
733 * No locks are held while manipulating the sonode fields because we are
734 * guaranteed that this operation is serialized.
735 *
736 * We can be sure that the entry `ent' will not disappear, because the
737 * caller is either holding sockconf_lock (in case of an active open), or is
738 * already holding a reference (in case of a passive open, the listener has
739 * one).
740 */
741static sof_instance_t *
742sof_instance_create(sof_entry_t *ent, struct sonode *so)
743{
744	sof_instance_t *inst;
745
746	inst = kmem_zalloc(sizeof (sof_instance_t), KM_NOSLEEP);
747	if (inst == NULL)
748		return (NULL);
749	sof_entry_hold(ent);
750	inst->sofi_filter = ent;
751	inst->sofi_sonode = so;
752
753	inst->sofi_next = so->so_filter_top;
754	if (so->so_filter_top != NULL)
755		so->so_filter_top->sofi_prev = inst;
756	else
757		so->so_filter_bottom = inst;
758	so->so_filter_top = inst;
759	so->so_filter_active++;
760
761	return (inst);
762}
763/*
764 * Destroys the filter instance `inst' and unlinks it from the sonode.
765 *
766 * Any filter private state must be destroyed (via the detach callback)
767 * before the instance is destroyed.
768 */
769static void
770sof_instance_destroy(sof_instance_t *inst)
771{
772	struct sonode *so = inst->sofi_sonode;
773
774	ASSERT(inst->sofi_sonode != NULL);
775	ASSERT(inst->sofi_filter != NULL);
776	ASSERT(inst->sofi_prev != NULL || so->so_filter_top == inst);
777	ASSERT(inst->sofi_next != NULL || so->so_filter_bottom == inst);
778
779	if (inst->sofi_prev != NULL)
780		inst->sofi_prev->sofi_next = inst->sofi_next;
781	else
782		so->so_filter_top = inst->sofi_next;
783
784	if (inst->sofi_next != NULL)
785		inst->sofi_next->sofi_prev = inst->sofi_prev;
786	else
787		so->so_filter_bottom = inst->sofi_prev;
788
789	if (!(inst->sofi_flags & SOFIF_BYPASS)) {
790		ASSERT(so->so_filter_active > 0);
791		so->so_filter_active--;
792	}
793	if (inst->sofi_flags & SOFIF_DEFER)
794		SOF_STAT_ADD(inst, ndeferred, -1);
795	sof_entry_rele(inst->sofi_filter);
796	kmem_free(inst, sizeof (sof_instance_t));
797}
798
799static sof_entry_t *
800sof_entry_find(const char *name)
801{
802	sof_entry_t *ent;
803
804	for (ent = list_head(&sof_entry_list); ent != NULL;
805	    ent = list_next(&sof_entry_list, ent)) {
806		if (strncmp(ent->sofe_name, name, SOF_MAXNAMELEN) == 0)
807			return (ent);
808	}
809	return (NULL);
810}
811
812void
813sof_entry_free(sof_entry_t *ent)
814{
815	ASSERT(ent->sofe_refcnt == 0);
816	ASSERT(!list_link_active(&ent->sofe_node));
817
818	if (ent->sofe_hintarg != NULL) {
819		ASSERT(ent->sofe_hint == SOF_HINT_BEFORE ||
820		    ent->sofe_hint == SOF_HINT_AFTER);
821		kmem_free(ent->sofe_hintarg, strlen(ent->sofe_hintarg) + 1);
822		ent->sofe_hintarg = NULL;
823	}
824	if (ent->sofe_socktuple_cnt > 0) {
825		ASSERT(ent->sofe_socktuple != NULL);
826		kmem_free(ent->sofe_socktuple,
827		    sizeof (sof_socktuple_t) * ent->sofe_socktuple_cnt);
828		ent->sofe_socktuple = NULL;
829		ent->sofe_socktuple_cnt = 0;
830	}
831	sof_entry_kstat_destroy(ent);
832
833	mutex_destroy(&ent->sofe_lock);
834	kmem_free(ent, sizeof (sof_entry_t));
835}
836
837static int
838sof_entry_kstat_update(kstat_t *ksp, int rw)
839{
840	sof_entry_t *ent = ksp->ks_private;
841
842	if (rw == KSTAT_WRITE)
843		return (EACCES);
844
845	ent->sofe_kstat.sofek_nactive.value.ui64 = ent->sofe_refcnt;
846
847	return (0);
848}
849
850/*
851 * Create the kstat for filter entry `ent'.
852 */
853static int
854sof_entry_kstat_create(sof_entry_t *ent)
855{
856	char name[SOF_MAXNAMELEN + 7];
857
858	(void) snprintf(name, sizeof (name), "filter_%s", ent->sofe_name);
859	ent->sofe_ksp = kstat_create("sockfs", 0, name, "misc",
860	    KSTAT_TYPE_NAMED,
861	    sizeof (sof_entry_kstat_t) / sizeof (kstat_named_t),
862	    KSTAT_FLAG_VIRTUAL);
863
864	if (ent->sofe_ksp == NULL)
865		return (ENOMEM);
866
867	kstat_named_init(&ent->sofe_kstat.sofek_nactive, "nactive",
868	    KSTAT_DATA_UINT64);
869	kstat_named_init(&ent->sofe_kstat.sofek_tot_active_attach,
870	    "tot_active_attach", KSTAT_DATA_UINT64);
871	kstat_named_init(&ent->sofe_kstat.sofek_tot_passive_attach,
872	    "tot_passive_attach", KSTAT_DATA_UINT64);
873	kstat_named_init(&ent->sofe_kstat.sofek_ndeferred, "ndeferred",
874	    KSTAT_DATA_UINT64);
875	kstat_named_init(&ent->sofe_kstat.sofek_attach_failures,
876	    "attach_failures", KSTAT_DATA_UINT64);
877
878	ent->sofe_ksp->ks_data = &ent->sofe_kstat;
879	ent->sofe_ksp->ks_update = sof_entry_kstat_update;
880	ent->sofe_ksp->ks_private = ent;
881	kstat_install(ent->sofe_ksp);
882
883	return (0);
884}
885
886/*
887 * Destroys the kstat for filter entry `ent'.
888 */
889static void
890sof_entry_kstat_destroy(sof_entry_t *ent)
891{
892	if (ent->sofe_ksp != NULL) {
893		kstat_delete(ent->sofe_ksp);
894		ent->sofe_ksp = NULL;
895	}
896}
897
898static void
899sof_entry_hold(sof_entry_t *ent)
900{
901	mutex_enter(&ent->sofe_lock);
902	ent->sofe_refcnt++;
903	mutex_exit(&ent->sofe_lock);
904}
905
906/*
907 * Decrement the reference count for `ent'. The entry will
908 * drop its' reference on the filter module whenever its'
909 * ref count reaches zero.
910 */
911static void
912sof_entry_rele(sof_entry_t *ent)
913{
914	mutex_enter(&ent->sofe_lock);
915	if (--ent->sofe_refcnt == 0) {
916		sof_module_t *mod = ent->sofe_mod;
917		ent->sofe_mod = NULL;
918		if (ent->sofe_flags & SOFEF_CONDEMED) {
919			mutex_exit(&ent->sofe_lock);
920			sof_entry_free(ent);
921		} else {
922			mutex_exit(&ent->sofe_lock);
923		}
924		if (mod != NULL)
925			sof_module_rele(mod);
926	} else {
927		mutex_exit(&ent->sofe_lock);
928	}
929}
930
931/*
932 * Loads the module used by `ent'
933 */
934static int
935sof_entry_load_module(sof_entry_t *ent)
936{
937	sof_module_t *mod = sof_module_hold_by_name(ent->sofe_name,
938	    ent->sofe_modname);
939
940	if (mod == NULL)
941		return (EINVAL);
942
943	mutex_enter(&ent->sofe_lock);
944	/* Another thread might have already loaded the module */
945	ASSERT(ent->sofe_mod == mod || ent->sofe_mod == NULL);
946	if (ent->sofe_mod != NULL) {
947		mutex_exit(&ent->sofe_lock);
948		sof_module_rele(mod);
949	} else {
950		ent->sofe_mod = mod;
951		mutex_exit(&ent->sofe_lock);
952	}
953
954	return (0);
955}
956
957/*
958 * Add filter entry `ent' to the global list and attach it to all sockparam
959 * entries which the filter is interested in. Upon successful return the filter
960 * will be available for applications to use.
961 */
962int
963sof_entry_add(sof_entry_t *ent)
964{
965	int error;
966
967	/*
968	 * We hold sockconf_lock as a WRITER for the whole operation,
969	 * so all operations must be non-blocking.
970	 */
971	rw_enter(&sockconf_lock, RW_WRITER);
972	if (sof_entry_find(ent->sofe_name) != NULL) {
973		rw_exit(&sockconf_lock);
974		return (EEXIST);
975	}
976
977	/* The entry is unique; create the kstats */
978	if (sof_entry_kstat_create(ent) != 0) {
979		rw_exit(&sockconf_lock);
980		return (ENOMEM);
981	}
982
983	/*
984	 * Attach the filter to sockparams of interest.
985	 */
986	if ((error = sockparams_new_filter(ent)) != 0) {
987		sof_entry_kstat_destroy(ent);
988		rw_exit(&sockconf_lock);
989		return (error);
990	}
991	/*
992	 * Everything is OK; insert in global list.
993	 */
994	list_insert_tail(&sof_entry_list, ent);
995	rw_exit(&sockconf_lock);
996
997	return (0);
998}
999
1000/*
1001 * Removes the filter entry `ent' from global list and all sockparams.
1002 */
1003sof_entry_t *
1004sof_entry_remove_by_name(const char *name)
1005{
1006	sof_entry_t *ent;
1007
1008	rw_enter(&sockconf_lock, RW_WRITER);
1009	if ((ent = sof_entry_find(name)) == NULL) {
1010		rw_exit(&sockconf_lock);
1011		return (NULL);
1012	}
1013	list_remove(&sof_entry_list, ent);
1014	sockparams_filter_cleanup(ent);
1015	sof_entry_kstat_destroy(ent);
1016	rw_exit(&sockconf_lock);
1017
1018	return (ent);
1019}
1020
1021/*
1022 * Filter entry `ent' will process sockparams entry `sp' to determine whether
1023 * it should be attached to the sockparams. It should be called whenever a new
1024 * filter or sockparams is being added. Returns zero either if the filter is
1025 * not interested in the sockparams or if it successfully attached to the
1026 * sockparams. On failure an errno is returned.
1027 */
1028int
1029sof_entry_proc_sockparams(sof_entry_t *ent, struct sockparams *sp)
1030{
1031	uint_t i;
1032	sof_socktuple_t *t = ent->sofe_socktuple;
1033	sp_filter_t *new, *fil;
1034
1035	/* Only interested in non-TPI sockets */
1036	if (strcmp(sp->sp_smod_name, SOTPI_SMOD_NAME) == 0)
1037		return (0);
1038
1039	for (i = 0; i < ent->sofe_socktuple_cnt; i++) {
1040		if (t[i].sofst_family == sp->sp_family &&
1041		    t[i].sofst_type == sp->sp_type &&
1042		    t[i].sofst_protocol == sp->sp_protocol)
1043			break;
1044	}
1045	/* This filter is not interested in the sockparams entry */
1046	if (i == ent->sofe_socktuple_cnt)
1047		return (0);
1048
1049	new = kmem_zalloc(sizeof (sp_filter_t), KM_NOSLEEP);
1050	if (new == NULL)
1051		return (ENOMEM);
1052
1053	new->spf_filter = ent;
1054	if (ent->sofe_flags & SOFEF_PROG) {
1055		/* placement is irrelevant for programmatic filters */
1056		list_insert_head(&sp->sp_prog_filters, new);
1057		return (0);
1058	} else {
1059		ASSERT(ent->sofe_flags & SOFEF_AUTO);
1060		/*
1061		 * If the filter specifies a placement hint, then make sure
1062		 * it can be satisfied.
1063		 */
1064		switch (ent->sofe_hint) {
1065		case SOF_HINT_TOP:
1066			if ((fil = list_head(&sp->sp_auto_filters)) != NULL &&
1067			    fil->spf_filter->sofe_hint == SOF_HINT_TOP)
1068				break;
1069			list_insert_head(&sp->sp_auto_filters, new);
1070			return (0);
1071		case SOF_HINT_BOTTOM:
1072			if ((fil = list_tail(&sp->sp_auto_filters)) != NULL &&
1073			    fil->spf_filter->sofe_hint == SOF_HINT_BOTTOM)
1074				break;
1075			list_insert_tail(&sp->sp_auto_filters, new);
1076			return (0);
1077		case SOF_HINT_BEFORE:
1078		case SOF_HINT_AFTER:
1079			for (fil = list_head(&sp->sp_auto_filters);
1080			    fil != NULL;
1081			    fil = list_next(&sp->sp_auto_filters, fil)) {
1082				if (strncmp(ent->sofe_hintarg,
1083				    fil->spf_filter->sofe_name,
1084				    SOF_MAXNAMELEN) == 0)
1085				break;
1086			}
1087
1088			if (fil != NULL) {
1089				if (ent->sofe_hint == SOF_HINT_BEFORE) {
1090					if (fil->spf_filter->sofe_hint ==
1091					    SOF_HINT_TOP)
1092						break;
1093					list_insert_before(&sp->sp_auto_filters,
1094					    fil, new);
1095				} else {
1096					if (fil->spf_filter->sofe_hint ==
1097					    SOF_HINT_BOTTOM)
1098						break;
1099					list_insert_after(&sp->sp_auto_filters,
1100					    fil, new);
1101				}
1102				return (0);
1103			}
1104			/*FALLTHRU*/
1105		case SOF_HINT_NONE:
1106			/*
1107			 * Insert the new filter at the beginning as long as it
1108			 * does not violate a TOP hint, otherwise insert in the
1109			 * next suitable location.
1110			 */
1111			if ((fil = list_head(&sp->sp_auto_filters)) != NULL &&
1112			    fil->spf_filter->sofe_hint == SOF_HINT_TOP) {
1113				list_insert_after(&sp->sp_auto_filters, fil,
1114				    new);
1115			} else {
1116				list_insert_head(&sp->sp_auto_filters, new);
1117			}
1118			return (0);
1119		}
1120		/* Failed to insert the filter */
1121		kmem_free(new, sizeof (sp_filter_t));
1122		return (ENOSPC);
1123	}
1124}
1125
1126/*
1127 * Remove all filter entries attached to the sockparams entry `sp'.
1128 */
1129void
1130sof_sockparams_fini(struct sockparams *sp)
1131{
1132	sp_filter_t *fil;
1133
1134	ASSERT(!list_link_active(&sp->sp_node));
1135
1136	while ((fil = list_remove_head(&sp->sp_auto_filters)) != NULL)
1137		kmem_free(fil, sizeof (sp_filter_t));
1138	while ((fil = list_remove_head(&sp->sp_prog_filters)) != NULL)
1139		kmem_free(fil, sizeof (sp_filter_t));
1140}
1141
1142/*
1143 * A new sockparams is being added. Walk all filters and attach those that
1144 * are interested in the entry.
1145 *
1146 * It should be called when the sockparams entry is about to be made available
1147 * for use and while holding the sockconf_lock.
1148 */
1149int
1150sof_sockparams_init(struct sockparams *sp)
1151{
1152	sof_entry_t *ent;
1153
1154	ASSERT(RW_WRITE_HELD(&sockconf_lock));
1155
1156	for (ent = list_head(&sof_entry_list); ent != NULL;
1157	    ent = list_next(&sof_entry_list, ent)) {
1158		if (sof_entry_proc_sockparams(ent, sp) != 0) {
1159			sof_sockparams_fini(sp);
1160			return (ENOMEM);
1161		}
1162	}
1163	return (0);
1164}
1165
1166static sof_module_t *
1167sof_module_find(const char *name)
1168{
1169	sof_module_t *ent;
1170
1171	ASSERT(MUTEX_HELD(&sof_module_lock));
1172
1173	for (ent = list_head(&sof_module_list); ent != NULL;
1174	    ent = list_next(&sof_module_list, ent))
1175		if (strcmp(ent->sofm_name, name) == 0)
1176			return (ent);
1177	return (NULL);
1178}
1179
1180/*
1181 * Returns a pointer to a module identified by `name' with its ref count
1182 * bumped. An attempt to load the module is done if it's not found in the
1183 * global list.
1184 */
1185sof_module_t *
1186sof_module_hold_by_name(const char *name, const char *modname)
1187{
1188	ddi_modhandle_t handle = NULL;
1189	sof_module_t *mod = NULL;
1190	char *modpath;
1191	int error;
1192
1193	/*
1194	 * We'll go through the loop at most two times, which will only
1195	 * happen if the module needs to be loaded.
1196	 */
1197	for (;;) {
1198		mutex_enter(&sof_module_lock);
1199		mod = sof_module_find(name);
1200		if (mod != NULL || handle != NULL)
1201			break;
1202		mutex_exit(&sof_module_lock);
1203
1204		modpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1205		(void) snprintf(modpath, MAXPATHLEN, "%s/%s", SOF_MODPATH,
1206		    modname);
1207		handle = ddi_modopen(modpath, KRTLD_MODE_FIRST, &error);
1208		kmem_free(modpath, MAXPATHLEN);
1209		/* Failed to load, then bail */
1210		if (handle == NULL) {
1211			cmn_err(CE_WARN,
1212			    "Failed to load socket filter module: %s (err %d)",
1213			    modname, error);
1214			return (NULL);
1215		}
1216	}
1217	if (mod != NULL)
1218		mod->sofm_refcnt++;
1219	mutex_exit(&sof_module_lock);
1220
1221	if (handle != NULL) {
1222		(void) ddi_modclose(handle);
1223		/*
1224		 * The module was loaded, but the filter module could not be
1225		 * found. It's likely a misconfigured filter.
1226		 */
1227		if (mod == NULL) {
1228			cmn_err(CE_WARN,
1229			    "Socket filter module %s was loaded, but did not" \
1230			    "register. Filter %s is likely misconfigured.",
1231			    modname, name);
1232		}
1233	}
1234
1235	return (mod);
1236}
1237
1238void
1239sof_module_rele(sof_module_t *mod)
1240{
1241	mutex_enter(&sof_module_lock);
1242	mod->sofm_refcnt--;
1243	mutex_exit(&sof_module_lock);
1244}
1245
1246int
1247sof_rval2errno(sof_rval_t rval)
1248{
1249	if (rval > SOF_RVAL_CONTINUE) {
1250		return ((int)rval);
1251	} else {
1252#ifdef DEBUG
1253		if (socket_filter_debug)
1254			printf("sof_rval2errno: invalid rval '%d'\n", rval);
1255#endif
1256		return (EINVAL);
1257	}
1258}
1259
1260/*
1261 * Walk through all the filters attached to `so' and allow each filter
1262 * to process the data using its data_out callback. `mp' is a b_cont chain.
1263 *
1264 * Returns the processed mblk, or NULL if mblk was consumed. The mblk might
1265 * have been consumed as a result of an error, in which case `errp' is set to
1266 * the appropriate errno.
1267 */
1268mblk_t *
1269sof_filter_data_out_from(struct sonode *so, sof_instance_t *start,
1270    mblk_t *mp, struct nmsghdr *msg, cred_t *cr, int *errp)
1271{
1272	sof_instance_t *inst;
1273	sof_rval_t rval;
1274
1275	_NOTE(ARGUNUSED(so));
1276
1277	for (inst = start; inst != NULL; inst = inst->sofi_next) {
1278		if (!SOF_INTERESTED(inst, data_out))
1279			continue;
1280		mp = (inst->sofi_ops->sofop_data_out)((sof_handle_t)inst,
1281		    inst->sofi_cookie, mp, msg, cr, &rval);
1282		DTRACE_PROBE2(filter__data, (sof_instance_t), inst,
1283		    (mblk_t *), mp);
1284		if (mp == NULL) {
1285			*errp = sof_rval2errno(rval);
1286			break;
1287		}
1288	}
1289	return (mp);
1290}
1291
1292/*
1293 * Walk through all the filters attached to `so' and allow each filter
1294 * to process the data using its data_in_proc callback. `mp' is the start of
1295 * a possible b_next chain, and `lastmp' points to the last mblk in the chain.
1296 *
1297 * Returns the processed mblk, or NULL if all mblks in the chain were
1298 * consumed. `lastmp' is updated to point to the last mblk in the processed
1299 * chain.
1300 */
1301mblk_t *
1302sof_filter_data_in_proc(struct sonode *so, mblk_t *mp, mblk_t **lastmp)
1303{
1304	sof_instance_t *inst;
1305	size_t len = 0, orig = 0;
1306	ssize_t diff = 0;
1307	mblk_t *retmp = NULL, *tailmp, *nextmp;
1308
1309	*lastmp = NULL;
1310	do {
1311		nextmp = mp->b_next;
1312		mp->b_next = mp->b_prev = NULL;
1313		len = orig = msgdsize(mp);
1314		for (inst = so->so_filter_bottom; inst != NULL;
1315		    inst = inst->sofi_prev) {
1316			if (!SOF_INTERESTED(inst, data_in_proc))
1317				continue;
1318			mp = (inst->sofi_ops->sofop_data_in_proc)(
1319			    (sof_handle_t)inst, inst->sofi_cookie, mp,
1320			    kcred, &len);
1321			if (mp == NULL)
1322				break;
1323		}
1324		DTRACE_PROBE2(filter__data, (sof_instance_t), inst,
1325		    (mblk_t *), mp);
1326		diff += len - orig;
1327		if (mp == NULL)
1328			continue;
1329
1330		for (tailmp = mp; tailmp->b_cont != NULL;
1331		    tailmp = tailmp->b_cont)
1332			;
1333		mp->b_prev = tailmp;
1334
1335		if (*lastmp == NULL)
1336			retmp = mp;
1337		else
1338			(*lastmp)->b_next = mp;
1339		*lastmp = mp;
1340	} while ((mp = nextmp) != NULL);
1341
1342	/*
1343	 * The size of the chain has changed; make sure the rcv queue
1344	 * stays consistent and check if the flow control state should
1345	 * change.
1346	 */
1347	if (diff != 0) {
1348		DTRACE_PROBE2(filter__data__adjust__qlen,
1349		    (struct sonode *), so, (size_t), diff);
1350		mutex_enter(&so->so_lock);
1351		so->so_rcv_queued += diff;
1352		/* so_check_flow_control drops so_lock */
1353		(void) so_check_flow_control(so);
1354	}
1355
1356	return (retmp);
1357}
1358
1359int
1360sof_filter_bind(struct sonode *so, struct sockaddr *addr,
1361    socklen_t *addrlen, cred_t *cr)
1362{
1363	__SOF_FILTER_OP(so, bind, cr, addr, addrlen)
1364}
1365
1366int
1367sof_filter_listen(struct sonode *so, int *backlogp, cred_t *cr)
1368{
1369	__SOF_FILTER_OP(so, listen, cr, backlogp)
1370}
1371
1372int
1373sof_filter_connect(struct sonode *so, struct sockaddr *addr,
1374    socklen_t *addrlen, cred_t *cr)
1375{
1376	__SOF_FILTER_OP(so, connect, cr, addr, addrlen)
1377}
1378
1379int
1380sof_filter_accept(struct sonode *so, cred_t *cr)
1381{
1382	sof_instance_t *inst;
1383	sof_rval_t rval;
1384
1385	for (inst = so->so_filter_top; inst != NULL; inst = inst->sofi_next) {
1386		if (!SOF_INTERESTED(inst, accept))
1387			continue;
1388		rval = (inst->sofi_ops->sofop_accept)((sof_handle_t)inst,
1389		    inst->sofi_cookie, cr);
1390		DTRACE_PROBE2(filter__action, (sof_instance_t), inst,
1391		    (sof_rval_t), rval);
1392		if (rval != SOF_RVAL_CONTINUE) {
1393			ASSERT(rval != SOF_RVAL_RETURN);
1394			return (sof_rval2errno(rval));
1395		}
1396	}
1397	return (-1);
1398}
1399
1400int
1401sof_filter_shutdown(struct sonode *so, int *howp, cred_t *cr)
1402{
1403	__SOF_FILTER_OP(so, shutdown, cr, howp)
1404}
1405
1406int
1407sof_filter_getsockname(struct sonode *so, struct sockaddr *addr,
1408    socklen_t *addrlenp, cred_t *cr)
1409{
1410	__SOF_FILTER_OP(so, getsockname, cr, addr, addrlenp)
1411}
1412
1413int
1414sof_filter_getpeername(struct sonode *so, struct sockaddr *addr,
1415    socklen_t *addrlenp, cred_t *cr)
1416{
1417	__SOF_FILTER_OP(so, getpeername, cr, addr, addrlenp)
1418}
1419
1420int
1421sof_filter_setsockopt(struct sonode *so, int level, int option_name,
1422    void *optval, socklen_t *optlenp, cred_t *cr)
1423{
1424	__SOF_FILTER_OP(so, setsockopt, cr, level, option_name,
1425	    optval, optlenp)
1426}
1427
1428int
1429sof_filter_getsockopt(struct sonode *so, int level, int option_name,
1430    void *optval, socklen_t *optlenp, cred_t *cr)
1431{
1432	__SOF_FILTER_OP(so, getsockopt, cr, level, option_name,
1433	    optval, optlenp)
1434}
1435
1436int
1437sof_filter_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
1438    int32_t *rvalp, cred_t *cr)
1439{
1440	__SOF_FILTER_OP(so, ioctl, cr, cmd, arg, mode, rvalp)
1441}
1442
1443/*
1444 * sof_register(version, name, ops, flags)
1445 *
1446 * Register a socket filter identified by name `name' and which should use
1447 * the ops vector `ops' for event notification. `flags' should be set to 0.
1448 * On success 0 is returned, otherwise an errno is returned.
1449 */
1450int
1451sof_register(int version, const char *name, const sof_ops_t *ops, int flags)
1452{
1453	sof_module_t *mod;
1454
1455	_NOTE(ARGUNUSED(flags));
1456
1457	if (version != SOF_VERSION)
1458		return (EINVAL);
1459
1460	mod = kmem_zalloc(sizeof (sof_module_t), KM_SLEEP);
1461	mod->sofm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1462	(void) strcpy(mod->sofm_name, name);
1463	mod->sofm_ops = *ops;
1464
1465	mutex_enter(&sof_module_lock);
1466	if (sof_module_find(name) != NULL) {
1467		mutex_exit(&sof_module_lock);
1468		kmem_free(mod->sofm_name, strlen(mod->sofm_name) + 1);
1469		kmem_free(mod, sizeof (sof_module_t));
1470		return (EEXIST);
1471	}
1472	list_insert_tail(&sof_module_list, mod);
1473	mutex_exit(&sof_module_lock);
1474
1475	return (0);
1476}
1477
1478/*
1479 * sof_unregister(name)
1480 *
1481 * Try to unregister the socket filter identified by `name'. If the filter
1482 * is successfully unregistered, then 0 is returned, otherwise an errno is
1483 * returned.
1484 */
1485int
1486sof_unregister(const char *name)
1487{
1488	sof_module_t *mod;
1489
1490	mutex_enter(&sof_module_lock);
1491	mod = sof_module_find(name);
1492	if (mod != NULL) {
1493		if (mod->sofm_refcnt == 0) {
1494			list_remove(&sof_module_list, mod);
1495			mutex_exit(&sof_module_lock);
1496
1497			kmem_free(mod->sofm_name, strlen(mod->sofm_name) + 1);
1498			kmem_free(mod, sizeof (sof_module_t));
1499			return (0);
1500		} else {
1501			mutex_exit(&sof_module_lock);
1502			return (EBUSY);
1503		}
1504	}
1505	mutex_exit(&sof_module_lock);
1506
1507	return (ENXIO);
1508}
1509
1510/*
1511 * sof_newconn_ready(handle)
1512 *
1513 * The filter `handle` no longer wants to defer the socket it is attached
1514 * to. A newconn notification will be generated if there is no other filter
1515 * that wants the socket deferred.
1516 */
1517void
1518sof_newconn_ready(sof_handle_t handle)
1519{
1520	sof_instance_t *inst = (sof_instance_t *)handle;
1521	struct sonode *so = inst->sofi_sonode;
1522	struct sonode *pso = so->so_listener;
1523
1524	mutex_enter(&so->so_lock);
1525	if (!(inst->sofi_flags & SOFIF_DEFER)) {
1526		mutex_exit(&so->so_lock);
1527		return;
1528	}
1529	ASSERT(so->so_state & SS_FIL_DEFER);
1530	inst->sofi_flags &= ~SOFIF_DEFER;
1531	SOF_STAT_ADD(inst, ndeferred, -1);
1532
1533	/*
1534	 * Check if any other filter has deferred the socket. The last
1535	 * filter to remove its DEFER flag will be the one generating the
1536	 * wakeup.
1537	 */
1538	for (inst = so->so_filter_top; inst != NULL; inst = inst->sofi_next) {
1539		/* Still deferred; nothing to do */
1540		if (inst->sofi_flags & SOFIF_DEFER) {
1541			mutex_exit(&so->so_lock);
1542			return;
1543		}
1544	}
1545	so->so_state &= ~SS_FIL_DEFER;
1546	mutex_exit(&so->so_lock);
1547
1548	/*
1549	 * The socket is no longer deferred; move it over to the regular
1550	 * accept list and notify the user. However, it is possible that
1551	 * the socket is being dropped by sof_sonode_drop_deferred(), so
1552	 * first make sure the socket is on the deferred list.
1553	 */
1554	mutex_enter(&pso->so_acceptq_lock);
1555	if (!list_link_active(&so->so_acceptq_node)) {
1556		mutex_exit(&pso->so_acceptq_lock);
1557		return;
1558	}
1559	list_remove(&pso->so_acceptq_defer, so);
1560	list_insert_tail(&pso->so_acceptq_list, so);
1561	cv_signal(&pso->so_acceptq_cv);
1562	mutex_exit(&pso->so_acceptq_lock);
1563
1564	mutex_enter(&pso->so_lock);
1565	so_notify_newconn(pso);		/* so_notify_newconn drops the lock */
1566}
1567
1568/*
1569 * sof_bypass(handle)
1570 *
1571 * Stop generating callbacks for `handle'.
1572 */
1573void
1574sof_bypass(sof_handle_t handle)
1575{
1576	sof_instance_t *inst = (sof_instance_t *)handle;
1577	struct sonode *so = inst->sofi_sonode;
1578
1579	mutex_enter(&so->so_lock);
1580	if (!(inst->sofi_flags & SOFIF_BYPASS)) {
1581		inst->sofi_flags |= SOFIF_BYPASS;
1582		ASSERT(so->so_filter_active > 0);
1583		so->so_filter_active--;
1584	}
1585	mutex_exit(&so->so_lock);
1586}
1587
1588/*
1589 * sof_rcv_flowctrl(handle, enable)
1590 *
1591 * If `enable' is TRUE, then recv side flow control will be asserted for
1592 * the socket associated with `handle'. When `enable' is FALSE the filter
1593 * indicates that it no longer wants to assert flow control, however, the
1594 * condition will not be removed until there are no other filters asserting
1595 * flow control and there is space available in the receive buffer.
1596 */
1597void
1598sof_rcv_flowctrl(sof_handle_t handle, boolean_t enable)
1599{
1600	sof_instance_t *inst = (sof_instance_t *)handle;
1601	struct sonode *so = inst->sofi_sonode;
1602
1603	mutex_enter(&so->so_lock);
1604	if (enable) {
1605		inst->sofi_flags |= SOFIF_RCV_FLOWCTRL;
1606		so->so_flowctrld = B_TRUE;
1607		so->so_state |= SS_FIL_RCV_FLOWCTRL;
1608		mutex_exit(&so->so_lock);
1609	} else {
1610		inst->sofi_flags &= ~SOFIF_RCV_FLOWCTRL;
1611		for (inst = so->so_filter_top; inst != NULL;
1612		    inst = inst->sofi_next) {
1613			/* another filter is asserting flow control */
1614			if (inst->sofi_flags & SOFIF_RCV_FLOWCTRL) {
1615				mutex_exit(&so->so_lock);
1616				return;
1617			}
1618		}
1619		so->so_state &= ~SS_FIL_RCV_FLOWCTRL;
1620		/* so_check_flow_control drops so_lock */
1621		(void) so_check_flow_control(so);
1622	}
1623	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
1624}
1625
1626/*
1627 * sof_snd_flowctrl(handle, enable)
1628 *
1629 * If `enable' is TRUE, then send side flow control will be asserted for
1630 * the socket associated with `handle'. When `enable' is FALSE the filter
1631 * indicates that is no longer wants to assert flow control, however, the
1632 * condition will not be removed until there are no other filters asserting
1633 * flow control and there are tx buffers available.
1634 */
1635void
1636sof_snd_flowctrl(sof_handle_t handle, boolean_t enable)
1637{
1638	sof_instance_t *inst = (sof_instance_t *)handle;
1639	struct sonode *so = inst->sofi_sonode;
1640
1641	mutex_enter(&so->so_lock);
1642	if (enable) {
1643		inst->sofi_flags |= SOFIF_SND_FLOWCTRL;
1644		so->so_state |= SS_FIL_SND_FLOWCTRL;
1645	} else {
1646		inst->sofi_flags &= ~SOFIF_SND_FLOWCTRL;
1647		for (inst = so->so_filter_top; inst != NULL;
1648		    inst = inst->sofi_next) {
1649			if (inst->sofi_flags & SOFIF_SND_FLOWCTRL) {
1650				mutex_exit(&so->so_lock);
1651				return;
1652			}
1653		}
1654		so->so_state &= ~SS_FIL_SND_FLOWCTRL;
1655		/*
1656		 * Wake up writer if the socket is no longer flow controlled.
1657		 */
1658		if (!SO_SND_FLOWCTRLD(so)) {
1659			/* so_notify_writable drops so_lock */
1660			so_notify_writable(so);
1661			return;
1662		}
1663	}
1664	mutex_exit(&so->so_lock);
1665}
1666
1667/*
1668 * sof_get_cookie(handle)
1669 *
1670 * Returns the cookie used by `handle'.
1671 */
1672void *
1673sof_get_cookie(sof_handle_t handle)
1674{
1675	return (((sof_instance_t *)handle)->sofi_cookie);
1676}
1677
1678/*
1679 * sof_cas_cookie(handle, old, new)
1680 *
1681 * Compare-and-swap the cookie used by `handle'.
1682 */
1683void *
1684sof_cas_cookie(sof_handle_t handle, void *old, void *new)
1685{
1686	sof_instance_t *inst = (sof_instance_t *)handle;
1687
1688	return (atomic_cas_ptr(&inst->sofi_cookie, old, new));
1689}
1690
1691/*
1692 * sof_inject_data_out(handle, mp, msg, flowctrld)
1693 *
1694 * Submit `mp' for transmission. `msg' cannot by NULL, and may contain
1695 * ancillary data and destination address. Returns 0 when successful
1696 * in which case `flowctrld' is updated. If flow controlled, no new data
1697 * should be injected until a SOF_EV_INJECT_DATA_OUT_OK event is observed.
1698 * In case of failure, an errno is returned.
1699 *
1700 * Filters that are lower in the stack than `handle' will see the data
1701 * before it is transmitted and may end up modifying or freeing the data.
1702 */
1703int
1704sof_inject_data_out(sof_handle_t handle, mblk_t *mp, struct nmsghdr *msg,
1705    boolean_t *flowctrld)
1706{
1707	sof_instance_t *inst = (sof_instance_t *)handle;
1708	struct sonode *so = inst->sofi_sonode;
1709	int error;
1710
1711	mutex_enter(&so->so_lock);
1712	if (so->so_state & SS_FIL_STOP) {
1713		mutex_exit(&so->so_lock);
1714		freemsg(mp);
1715		return (EPIPE);
1716	}
1717	so->so_filter_tx++;
1718	mutex_exit(&so->so_lock);
1719
1720	error = so_sendmblk_impl(inst->sofi_sonode, msg, FNONBLOCK,
1721	    kcred, &mp, inst->sofi_next, B_TRUE);
1722
1723	mutex_enter(&so->so_lock);
1724	ASSERT(so->so_filter_tx > 0);
1725	so->so_filter_tx--;
1726	if (so->so_state & SS_CLOSING)
1727		cv_signal(&so->so_closing_cv);
1728	mutex_exit(&so->so_lock);
1729
1730	if (mp != NULL)
1731		freemsg(mp);
1732
1733	if (error == ENOSPC) {
1734		*flowctrld = B_TRUE;
1735		error = 0;
1736	} else {
1737		*flowctrld = B_FALSE;
1738	}
1739
1740	return (error);
1741}
1742
1743/*
1744 * sof_inject_data_in(handle, mp, len, flag, flowctrld)
1745 *
1746 * Enqueue `mp' which contains `len' bytes of M_DATA onto the socket
1747 * associated with `handle'. `flags' should be set to 0. Returns 0 when
1748 * successful in which case `flowctrld' is updated. If flow controlled,
1749 * no new data should be injected until a SOF_EV_INJECT_DATA_IN_OK event
1750 * is observed.  In case of failure, an errno is returned.
1751 *
1752 * Filters that are higher in the stack than `handle' will see the data
1753 * before it is enqueued on the receive queue and may end up modifying or
1754 * freeing the data.
1755 */
1756int
1757sof_inject_data_in(sof_handle_t handle, mblk_t *mp, size_t len, int flags,
1758    boolean_t *flowctrld)
1759{
1760	sof_instance_t *inst = (sof_instance_t *)handle;
1761	ssize_t avail;
1762	int error = 0;
1763
1764	ASSERT(flags == 0);
1765	avail = so_queue_msg_impl(inst->sofi_sonode, mp, len, flags, &error,
1766	    NULL, inst->sofi_prev);
1767	/* fallback should never happen when there is an active filter */
1768	ASSERT(error != EOPNOTSUPP);
1769
1770	*flowctrld = (avail > 0) ? B_FALSE : B_TRUE;
1771	return (error);
1772}
1773
1774/*
1775 * sof_newconn_move(handle, newparent)
1776 *
1777 * Private interface only to be used by KSSL.
1778 *
1779 * Moves the socket associated with `handle' from its current listening
1780 * socket to the listener associated with `newparent'. The socket being
1781 * moved must be in a deferred state and it is up to the consumer of the
1782 * interface to ensure that the `newparent' does not go away while this
1783 * operation is pending.
1784 */
1785boolean_t
1786sof_newconn_move(sof_handle_t handle, sof_handle_t newparent)
1787{
1788	sof_instance_t *inst = (sof_instance_t *)handle;
1789	sof_instance_t *newpinst = (sof_instance_t *)newparent;
1790	struct sonode *so, *old, *new;
1791
1792	so = inst->sofi_sonode;
1793	ASSERT(so->so_state & SS_FIL_DEFER);
1794
1795	if (inst->sofi_next != NULL || inst->sofi_prev != NULL ||
1796	    !(so->so_state & SS_FIL_DEFER))
1797		return (B_FALSE);
1798
1799	old = so->so_listener;
1800	mutex_enter(&old->so_acceptq_lock);
1801	list_remove(&old->so_acceptq_defer, so);
1802	old->so_acceptq_len--;
1803	mutex_exit(&old->so_acceptq_lock);
1804
1805	new = newpinst->sofi_sonode;
1806	mutex_enter(&new->so_acceptq_lock);
1807	list_insert_tail(&new->so_acceptq_defer, so);
1808	new->so_acceptq_len++;
1809	mutex_exit(&new->so_acceptq_lock);
1810
1811	so->so_listener = new;
1812
1813	return (B_TRUE);
1814}
1815