1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
24 * Copyright 2016 Toomas Soome <tsoome@me.com>
25 */
26
27#include <sys/types.h>
28#include <sys/errno.h>
29#include <sys/stropts.h>
30#include <sys/debug.h>
31#include <sys/ddi.h>
32#include <sys/sunddi.h>
33#include <sys/vmem.h>
34#include <sys/cmn_err.h>
35#include <sys/callb.h>
36#include <sys/sysevent.h>
37#include <sys/sysevent_impl.h>
38#include <sys/sysevent/dev.h>
39#include <sys/modctl.h>
40#include <sys/lofi_impl.h>
41#include <sys/sysmacros.h>
42#include <sys/disp.h>
43#include <sys/autoconf.h>
44#include <sys/atomic.h>
45#include <sys/sdt.h>
46
47/* for doors */
48#include <sys/pathname.h>
49#include <sys/door.h>
50#include <sys/kmem.h>
51#include <sys/cpuvar.h>
52#include <sys/fs/snode.h>
53
54/*
55 * log_sysevent.c - Provides the interfaces for kernel event publication
56 *			to the sysevent event daemon (syseventd).
57 */
58
59/*
60 * Debug stuff
61 */
62static int log_event_debug = 0;
63#define	LOG_DEBUG(args)  if (log_event_debug) cmn_err args
64#ifdef DEBUG
65#define	LOG_DEBUG1(args)  if (log_event_debug > 1) cmn_err args
66#else
67#define	LOG_DEBUG1(args)
68#endif
69
70/*
71 * Local static vars
72 */
73/* queue of event buffers sent to syseventd */
74static log_eventq_t *log_eventq_sent = NULL;
75
76/*
77 * Count of event buffers in the queue
78 */
79int log_eventq_cnt = 0;
80
81/* queue of event buffers awaiting delivery to syseventd */
82static log_eventq_t *log_eventq_head = NULL;
83static log_eventq_t *log_eventq_tail = NULL;
84static uint64_t kernel_event_id = 0;
85static int encoding = NV_ENCODE_NATIVE;
86
87/* log event delivery flag */
88#define	LOGEVENT_DELIVERY_OK	0	/* OK to deliver event buffers */
89#define	LOGEVENT_DELIVERY_CONT	1	/* Continue to deliver event buffers */
90#define	LOGEVENT_DELIVERY_HOLD	2	/* Hold delivering of event buffers */
91
92/*
93 * Tunable maximum event buffer queue size. Size depends on how many events
94 * the queue must hold when syseventd is not available, for example during
95 * system startup. Experience showed that more than 2000 events could be posted
96 * due to correctable memory errors.
97 */
98int logevent_max_q_sz = 5000;
99
100
101static int log_event_delivery = LOGEVENT_DELIVERY_HOLD;
102static char logevent_door_upcall_filename[MAXPATHLEN];
103
104static door_handle_t event_door = NULL;		/* Door for upcalls */
105static kmutex_t event_door_mutex;		/* To protect event_door */
106
107/*
108 * async thread-related variables
109 *
110 * eventq_head_mutex - synchronizes access to the kernel event queue
111 *
112 * eventq_sent_mutex - synchronizes access to the queue of event sents to
113 *			userlevel
114 *
115 * log_event_cv - condition variable signaled when an event has arrived or
116 *			userlevel ready to process event buffers
117 *
118 * async_thread - asynchronous event delivery thread to userlevel daemon.
119 *
120 * sysevent_upcall_status - status of the door upcall link
121 */
122static kmutex_t eventq_head_mutex;
123static kmutex_t eventq_sent_mutex;
124static kcondvar_t log_event_cv;
125static kthread_id_t async_thread = NULL;
126
127static kmutex_t event_qfull_mutex;
128static kcondvar_t event_qfull_cv;
129static int event_qfull_blocked = 0;
130
131static int sysevent_upcall_status = -1;
132static kmutex_t registered_channel_mutex;
133
134/*
135 * Indicates the syseventd daemon has begun taking events
136 */
137int sysevent_daemon_init = 0;
138
139/*
140 * Back-off delay when door_ki_upcall returns EAGAIN.  Typically
141 * caused by the server process doing a forkall().  Since all threads
142 * but the thread actually doing the forkall() need to be quiesced,
143 * the fork may take some time.  The min/max pause are in units
144 * of clock ticks.
145 */
146#define	LOG_EVENT_MIN_PAUSE	8
147#define	LOG_EVENT_MAX_PAUSE	128
148
149static kmutex_t	event_pause_mutex;
150static kcondvar_t event_pause_cv;
151static int event_pause_state = 0;
152
153/* Cached device links for lofi. */
154lofi_nvl_t lofi_devlink_cache;
155
156/*ARGSUSED*/
157static void
158log_event_busy_timeout(void *arg)
159{
160	mutex_enter(&event_pause_mutex);
161	event_pause_state = 0;
162	cv_signal(&event_pause_cv);
163	mutex_exit(&event_pause_mutex);
164}
165
166static void
167log_event_pause(int nticks)
168{
169	timeout_id_t id;
170
171	/*
172	 * Only one use of log_event_pause at a time
173	 */
174	ASSERT(event_pause_state == 0);
175
176	event_pause_state = 1;
177	id = timeout(log_event_busy_timeout, NULL, nticks);
178	if (id != 0) {
179		mutex_enter(&event_pause_mutex);
180		while (event_pause_state)
181			cv_wait(&event_pause_cv, &event_pause_mutex);
182		mutex_exit(&event_pause_mutex);
183	}
184	event_pause_state = 0;
185}
186
187
188/*
189 * log_event_upcall - Perform the upcall to syseventd for event buffer delivery.
190 * 			Check for rebinding errors
191 * 			This buffer is reused to by the syseventd door_return
192 *			to hold the result code
193 */
194static int
195log_event_upcall(log_event_upcall_arg_t *arg)
196{
197	int error;
198	size_t size;
199	sysevent_t *ev;
200	door_arg_t darg, save_arg;
201	int retry;
202	int neagain = 0;
203	int neintr = 0;
204	int nticks = LOG_EVENT_MIN_PAUSE;
205
206	/* Initialize door args */
207	ev = (sysevent_t *)&arg->buf;
208	size = sizeof (log_event_upcall_arg_t) + SE_PAYLOAD_SZ(ev);
209
210	darg.rbuf = (char *)arg;
211	darg.data_ptr = (char *)arg;
212	darg.rsize = size;
213	darg.data_size = size;
214	darg.desc_ptr = NULL;
215	darg.desc_num = 0;
216
217	LOG_DEBUG1((CE_CONT, "log_event_upcall: 0x%llx\n",
218	    (longlong_t)SE_SEQ((sysevent_t *)&arg->buf)));
219
220	save_arg = darg;
221	for (retry = 0; ; retry++) {
222
223		mutex_enter(&event_door_mutex);
224		if (event_door == NULL) {
225			mutex_exit(&event_door_mutex);
226
227			return (EBADF);
228		}
229
230		if ((error = door_ki_upcall_limited(event_door, &darg, NULL,
231		    SIZE_MAX, 0)) == 0) {
232			mutex_exit(&event_door_mutex);
233			break;
234		}
235
236		/*
237		 * EBADF is handled outside the switch below because we need to
238		 * hold event_door_mutex a bit longer
239		 */
240		if (error == EBADF) {
241			/* Server died */
242			door_ki_rele(event_door);
243			event_door = NULL;
244
245			mutex_exit(&event_door_mutex);
246			return (error);
247		}
248
249		mutex_exit(&event_door_mutex);
250
251		/*
252		 * The EBADF case is already handled above with event_door_mutex
253		 * held
254		 */
255		switch (error) {
256		case EINTR:
257			neintr++;
258			log_event_pause(2);
259			darg = save_arg;
260			break;
261		case EAGAIN:
262			/* cannot deliver upcall - process may be forking */
263			neagain++;
264			log_event_pause(nticks);
265			nticks <<= 1;
266			if (nticks > LOG_EVENT_MAX_PAUSE)
267				nticks = LOG_EVENT_MAX_PAUSE;
268			darg = save_arg;
269			break;
270		default:
271			cmn_err(CE_CONT,
272			    "log_event_upcall: door_ki_upcall error %d\n",
273			    error);
274			return (error);
275		}
276	}
277
278	if (neagain > 0 || neintr > 0) {
279		LOG_DEBUG((CE_CONT, "upcall: eagain=%d eintr=%d nticks=%d\n",
280		    neagain, neintr, nticks));
281	}
282
283	LOG_DEBUG1((CE_CONT, "log_event_upcall:\n\t"
284	    "error=%d rptr1=%p rptr2=%p dptr2=%p ret1=%x ret2=%x\n",
285	    error, (void *)arg, (void *)darg.rbuf,
286	    (void *)darg.data_ptr,
287	    *((int *)(darg.rbuf)), *((int *)(darg.data_ptr))));
288
289	if (!error) {
290		/*
291		 * upcall was successfully executed. Check return code.
292		 */
293		error = *((int *)(darg.rbuf));
294	}
295
296	return (error);
297}
298
299/*
300 * log_event_deliver - event delivery thread
301 *			Deliver all events on the event queue to syseventd.
302 *			If the daemon can not process events, stop event
303 *			delivery and wait for an indication from the
304 *			daemon to resume delivery.
305 *
306 *			Once all event buffers have been delivered, wait
307 *			until there are more to deliver.
308 */
309static void
310log_event_deliver()
311{
312	log_eventq_t *q;
313	int upcall_err;
314	callb_cpr_t cprinfo;
315
316	CALLB_CPR_INIT(&cprinfo, &eventq_head_mutex, callb_generic_cpr,
317	    "logevent");
318
319	/*
320	 * eventq_head_mutex is exited (released) when there are no more
321	 * events to process from the eventq in cv_wait().
322	 */
323	mutex_enter(&eventq_head_mutex);
324
325	for (;;) {
326		LOG_DEBUG1((CE_CONT, "log_event_deliver: head = %p\n",
327		    (void *)log_eventq_head));
328
329		upcall_err = 0;
330		q = log_eventq_head;
331
332		while (q) {
333			if (log_event_delivery == LOGEVENT_DELIVERY_HOLD) {
334				upcall_err = EAGAIN;
335				break;
336			}
337
338			log_event_delivery = LOGEVENT_DELIVERY_OK;
339
340			/*
341			 * Release event queue lock during upcall to
342			 * syseventd
343			 */
344			mutex_exit(&eventq_head_mutex);
345			if ((upcall_err = log_event_upcall(&q->arg)) != 0) {
346				mutex_enter(&eventq_head_mutex);
347				break;
348			}
349
350			/*
351			 * We may be able to add entries to
352			 * the queue now.
353			 */
354			if (event_qfull_blocked > 0 &&
355			    log_eventq_cnt < logevent_max_q_sz) {
356				mutex_enter(&event_qfull_mutex);
357				if (event_qfull_blocked > 0) {
358					cv_signal(&event_qfull_cv);
359				}
360				mutex_exit(&event_qfull_mutex);
361			}
362
363			mutex_enter(&eventq_head_mutex);
364
365			/*
366			 * Daemon restart can cause entries to be moved from
367			 * the sent queue and put back on the event queue.
368			 * If this has occurred, replay event queue
369			 * processing from the new queue head.
370			 */
371			if (q != log_eventq_head) {
372				q = log_eventq_head;
373				LOG_DEBUG((CE_CONT, "log_event_deliver: "
374				    "door upcall/daemon restart race\n"));
375			} else {
376				log_eventq_t *next;
377
378				/*
379				 * Move the event to the sent queue when a
380				 * successful delivery has been made.
381				 */
382				mutex_enter(&eventq_sent_mutex);
383				next = q->next;
384				q->next = log_eventq_sent;
385				log_eventq_sent = q;
386				q = next;
387				log_eventq_head = q;
388				log_eventq_cnt--;
389				if (q == NULL) {
390					ASSERT(log_eventq_cnt == 0);
391					log_eventq_tail = NULL;
392				}
393				mutex_exit(&eventq_sent_mutex);
394			}
395		}
396
397		switch (upcall_err) {
398		case 0:
399			/*
400			 * Success. The queue is empty.
401			 */
402			sysevent_upcall_status = 0;
403			break;
404		case EAGAIN:
405			/*
406			 * Delivery is on hold (but functional).
407			 */
408			sysevent_upcall_status = 0;
409			/*
410			 * If the user has already signaled for delivery
411			 * resumption, continue.  Otherwise, we wait until
412			 * we are signaled to continue.
413			 */
414			if (log_event_delivery == LOGEVENT_DELIVERY_CONT)
415				continue;
416			log_event_delivery = LOGEVENT_DELIVERY_HOLD;
417
418			LOG_DEBUG1((CE_CONT, "log_event_deliver: EAGAIN\n"));
419			break;
420		default:
421			LOG_DEBUG((CE_CONT, "log_event_deliver: "
422			    "upcall err %d\n", upcall_err));
423			sysevent_upcall_status = upcall_err;
424			/*
425			 * Signal everyone waiting that transport is down
426			 */
427			if (event_qfull_blocked > 0) {
428				mutex_enter(&event_qfull_mutex);
429				if (event_qfull_blocked > 0) {
430					cv_broadcast(&event_qfull_cv);
431				}
432				mutex_exit(&event_qfull_mutex);
433			}
434			break;
435		}
436
437		CALLB_CPR_SAFE_BEGIN(&cprinfo);
438		cv_wait(&log_event_cv, &eventq_head_mutex);
439		CALLB_CPR_SAFE_END(&cprinfo, &eventq_head_mutex);
440	}
441	/* NOTREACHED */
442}
443
444/*
445 * Set up the nvlist based data cache. User by lofi to find
446 * device name for mapped file.
447 */
448static void
449lofi_nvl_init(lofi_nvl_t *cache)
450{
451	mutex_init(&cache->ln_lock, NULL, MUTEX_DRIVER, NULL);
452	cv_init(&cache->ln_cv, NULL, CV_DRIVER, NULL);
453	(void) nvlist_alloc(&cache->ln_data, NV_UNIQUE_NAME, KM_SLEEP);
454}
455
456/*
457 * log_event_init - Allocate and initialize log_event data structures.
458 */
459void
460log_event_init()
461{
462	/* Set up devlink cache for lofi. */
463	lofi_nvl_init(&lofi_devlink_cache);
464
465	mutex_init(&event_door_mutex, NULL, MUTEX_DEFAULT, NULL);
466
467	mutex_init(&eventq_head_mutex, NULL, MUTEX_DEFAULT, NULL);
468	mutex_init(&eventq_sent_mutex, NULL, MUTEX_DEFAULT, NULL);
469	cv_init(&log_event_cv, NULL, CV_DEFAULT, NULL);
470
471	mutex_init(&event_qfull_mutex, NULL, MUTEX_DEFAULT, NULL);
472	cv_init(&event_qfull_cv, NULL, CV_DEFAULT, NULL);
473
474	mutex_init(&event_pause_mutex, NULL, MUTEX_DEFAULT, NULL);
475	cv_init(&event_pause_cv, NULL, CV_DEFAULT, NULL);
476
477	mutex_init(&registered_channel_mutex, NULL, MUTEX_DEFAULT, NULL);
478	sysevent_evc_init();
479}
480
481/*
482 * The following routines are used by kernel event publishers to
483 * allocate, append and free event buffers
484 */
485/*
486 * sysevent_alloc - Allocate new eventq struct.  This element contains
487 *			an event buffer that will be used in a subsequent
488 *			call to log_sysevent.
489 */
490sysevent_t *
491sysevent_alloc(char *class, char *subclass, char *pub, int flag)
492{
493	int payload_sz;
494	int class_sz, subclass_sz, pub_sz;
495	int aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
496	sysevent_t *ev;
497	log_eventq_t *q;
498
499	ASSERT(class != NULL);
500	ASSERT(subclass != NULL);
501	ASSERT(pub != NULL);
502
503	/*
504	 * Calculate and reserve space for the class, subclass and
505	 * publisher strings in the event buffer
506	 */
507	class_sz = strlen(class) + 1;
508	subclass_sz = strlen(subclass) + 1;
509	pub_sz = strlen(pub) + 1;
510
511	ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz
512	    <= MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
513
514	/* String sizes must be 64-bit aligned in the event buffer */
515	aligned_class_sz = SE_ALIGN(class_sz);
516	aligned_subclass_sz = SE_ALIGN(subclass_sz);
517	aligned_pub_sz = SE_ALIGN(pub_sz);
518
519	payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
520	    (aligned_subclass_sz - sizeof (uint64_t)) +
521	    (aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t);
522
523	/*
524	 * Allocate event buffer plus additional sysevent queue
525	 * and payload overhead.
526	 */
527	q = kmem_zalloc(sizeof (log_eventq_t) + payload_sz, flag);
528	if (q == NULL) {
529		return (NULL);
530	}
531
532	/* Initialize the event buffer data */
533	ev = (sysevent_t *)&q->arg.buf;
534	SE_VERSION(ev) = SYS_EVENT_VERSION;
535	bcopy(class, SE_CLASS_NAME(ev), class_sz);
536
537	SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t, se_class_name))
538		+ aligned_class_sz;
539	bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
540
541	SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
542	bcopy(pub, SE_PUB_NAME(ev), pub_sz);
543
544	SE_ATTR_PTR(ev) = UINT64_C(0);
545	SE_PAYLOAD_SZ(ev) = payload_sz;
546
547	return (ev);
548}
549
550/*
551 * sysevent_free - Free event buffer and any attribute data.
552 */
553void
554sysevent_free(sysevent_t *ev)
555{
556	log_eventq_t *q;
557	nvlist_t *nvl;
558
559	ASSERT(ev != NULL);
560	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
561	nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev);
562
563	if (nvl != NULL) {
564		size_t size = 0;
565		(void) nvlist_size(nvl, &size, encoding);
566		SE_PAYLOAD_SZ(ev) -= size;
567		nvlist_free(nvl);
568	}
569	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
570}
571
572/*
573 * free_packed_event - Free packed event buffer
574 */
575static void
576free_packed_event(sysevent_t *ev)
577{
578	log_eventq_t *q;
579
580	ASSERT(ev != NULL);
581	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
582
583	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
584}
585
586/*
587 * sysevent_add_attr - Add new attribute element to an event attribute list
588 *			If attribute list is NULL, start a new list.
589 */
590int
591sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name,
592    sysevent_value_t *se_value, int flag)
593{
594	int error;
595	nvlist_t **nvlp = (nvlist_t **)ev_attr_list;
596
597	if (nvlp == NULL || se_value == NULL) {
598		return (SE_EINVAL);
599	}
600
601	/*
602	 * attr_sz is composed of the value data size + the name data size +
603	 * any header data.  64-bit aligned.
604	 */
605	if (strlen(name) >= MAX_ATTR_NAME) {
606		return (SE_EINVAL);
607	}
608
609	/*
610	 * Allocate nvlist
611	 */
612	if ((*nvlp == NULL) &&
613	    (nvlist_alloc(nvlp, NV_UNIQUE_NAME_TYPE, flag) != 0))
614		return (SE_ENOMEM);
615
616	/* add the attribute */
617	switch (se_value->value_type) {
618	case SE_DATA_TYPE_BYTE:
619		error = nvlist_add_byte(*ev_attr_list, name,
620		    se_value->value.sv_byte);
621		break;
622	case SE_DATA_TYPE_INT16:
623		error = nvlist_add_int16(*ev_attr_list, name,
624		    se_value->value.sv_int16);
625		break;
626	case SE_DATA_TYPE_UINT16:
627		error = nvlist_add_uint16(*ev_attr_list, name,
628		    se_value->value.sv_uint16);
629		break;
630	case SE_DATA_TYPE_INT32:
631		error = nvlist_add_int32(*ev_attr_list, name,
632		    se_value->value.sv_int32);
633		break;
634	case SE_DATA_TYPE_UINT32:
635		error = nvlist_add_uint32(*ev_attr_list, name,
636		    se_value->value.sv_uint32);
637		break;
638	case SE_DATA_TYPE_INT64:
639		error = nvlist_add_int64(*ev_attr_list, name,
640		    se_value->value.sv_int64);
641		break;
642	case SE_DATA_TYPE_UINT64:
643		error = nvlist_add_uint64(*ev_attr_list, name,
644		    se_value->value.sv_uint64);
645		break;
646	case SE_DATA_TYPE_STRING:
647		if (strlen((char *)se_value->value.sv_string) >= MAX_STRING_SZ)
648			return (SE_EINVAL);
649		error = nvlist_add_string(*ev_attr_list, name,
650		    se_value->value.sv_string);
651		break;
652	case SE_DATA_TYPE_BYTES:
653		if (se_value->value.sv_bytes.size > MAX_BYTE_ARRAY)
654			return (SE_EINVAL);
655		error = nvlist_add_byte_array(*ev_attr_list, name,
656		    se_value->value.sv_bytes.data,
657		    se_value->value.sv_bytes.size);
658		break;
659	case SE_DATA_TYPE_TIME:
660		error = nvlist_add_hrtime(*ev_attr_list, name,
661		    se_value->value.sv_time);
662		break;
663	default:
664		return (SE_EINVAL);
665	}
666
667	return (error ? SE_ENOMEM : 0);
668}
669
670/*
671 * sysevent_free_attr - Free an attribute list not associated with an
672 *			event buffer.
673 */
674void
675sysevent_free_attr(sysevent_attr_list_t *ev_attr_list)
676{
677	nvlist_free((nvlist_t *)ev_attr_list);
678}
679
680/*
681 * sysevent_attach_attributes - Attach an attribute list to an event buffer.
682 *
683 *	This data will be re-packed into contiguous memory when the event
684 *	buffer is posted to log_sysevent.
685 */
686int
687sysevent_attach_attributes(sysevent_t *ev, sysevent_attr_list_t *ev_attr_list)
688{
689	size_t size = 0;
690
691	if (SE_ATTR_PTR(ev) != UINT64_C(0)) {
692		return (SE_EINVAL);
693	}
694
695	SE_ATTR_PTR(ev) = (uintptr_t)ev_attr_list;
696	(void) nvlist_size((nvlist_t *)ev_attr_list, &size, encoding);
697	SE_PAYLOAD_SZ(ev) += size;
698	SE_FLAG(ev) = 0;
699
700	return (0);
701}
702
703/*
704 * sysevent_detach_attributes - Detach but don't free attribute list from the
705 *				event buffer.
706 */
707void
708sysevent_detach_attributes(sysevent_t *ev)
709{
710	size_t size = 0;
711	nvlist_t *nvl;
712
713	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
714		return;
715	}
716
717	SE_ATTR_PTR(ev) = UINT64_C(0);
718	(void) nvlist_size(nvl, &size, encoding);
719	SE_PAYLOAD_SZ(ev) -= size;
720	ASSERT(SE_PAYLOAD_SZ(ev) >= 0);
721}
722
723/*
724 * sysevent_attr_name - Get name of attribute
725 */
726char *
727sysevent_attr_name(sysevent_attr_t *attr)
728{
729	if (attr == NULL) {
730		return (NULL);
731	}
732
733	return (nvpair_name(attr));
734}
735
736/*
737 * sysevent_attr_type - Get type of attribute
738 */
739int
740sysevent_attr_type(sysevent_attr_t *attr)
741{
742	/*
743	 * The SE_DATA_TYPE_* are typedef'ed to be the
744	 * same value as DATA_TYPE_*
745	 */
746	return (nvpair_type((nvpair_t *)attr));
747}
748
749/*
750 * Repack event buffer into contiguous memory
751 */
752static sysevent_t *
753se_repack(sysevent_t *ev, int flag)
754{
755	size_t copy_len;
756	caddr_t attr;
757	size_t size;
758	uint64_t attr_offset;
759	sysevent_t *copy;
760	log_eventq_t *qcopy;
761	sysevent_attr_list_t *nvl;
762
763	copy_len = sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev);
764	qcopy = kmem_zalloc(copy_len, flag);
765	if (qcopy == NULL) {
766		return (NULL);
767	}
768	copy = (sysevent_t *)&qcopy->arg.buf;
769
770	/*
771	 * Copy event header, class, subclass and publisher names
772	 * Set the attribute offset (in number of bytes) to contiguous
773	 * memory after the header.
774	 */
775
776	attr_offset = SE_ATTR_OFF(ev);
777
778	ASSERT((caddr_t)copy + attr_offset <= (caddr_t)copy + copy_len);
779
780	bcopy(ev, copy, attr_offset);
781
782	/* Check if attribute list exists */
783	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
784		return (copy);
785	}
786
787	/*
788	 * Copy attribute data to contiguous memory
789	 */
790	attr = (char *)copy + attr_offset;
791	(void) nvlist_size(nvl, &size, encoding);
792	if (nvlist_pack(nvl, &attr, &size, encoding, flag) != 0) {
793		kmem_free(qcopy, copy_len);
794		return (NULL);
795	}
796	SE_ATTR_PTR(copy) = UINT64_C(0);
797	SE_FLAG(copy) = SE_PACKED_BUF;
798
799	return (copy);
800}
801
802/*
803 * The sysevent registration provides a persistent and reliable database
804 * for channel information for sysevent channel publishers and
805 * subscribers.
806 *
807 * A channel is created and maintained by the kernel upon the first
808 * SE_OPEN_REGISTRATION operation to log_sysevent_register().  Channel
809 * event subscription information is updated as publishers or subscribers
810 * perform subsequent operations (SE_BIND_REGISTRATION, SE_REGISTER,
811 * SE_UNREGISTER and SE_UNBIND_REGISTRATION).
812 *
813 * For consistency, id's are assigned for every publisher or subscriber
814 * bound to a particular channel.  The id's are used to constrain resources
815 * and perform subscription lookup.
816 *
817 * Associated with each channel is a hashed list of the current subscriptions
818 * based upon event class and subclasses.  A subscription contains a class name,
819 * list of possible subclasses and an array of subscriber ids.  Subscriptions
820 * are updated for every SE_REGISTER or SE_UNREGISTER operation.
821 *
822 * Channels are closed once the last subscriber or publisher performs a
823 * SE_CLOSE_REGISTRATION operation.  All resources associated with the named
824 * channel are freed upon last close.
825 *
826 * Locking:
827 *	Every operation to log_sysevent() is protected by a single lock,
828 *	registered_channel_mutex.  It is expected that the granularity of
829 *	a single lock is sufficient given the frequency that updates will
830 *	occur.
831 *
832 *	If this locking strategy proves to be too contentious, a per-hash
833 *	or per-channel locking strategy may be implemented.
834 */
835
836
837#define	CHANN_HASH(channel_name)	(hash_func(channel_name) \
838					% CHAN_HASH_SZ)
839
840sysevent_channel_descriptor_t *registered_channels[CHAN_HASH_SZ];
841static int channel_cnt;
842static void remove_all_class(sysevent_channel_descriptor_t *chan,
843	uint32_t sub_id);
844
845static uint32_t
846hash_func(const char *s)
847{
848	uint32_t result = 0;
849	uint_t g;
850
851	while (*s != '\0') {
852		result <<= 4;
853		result += (uint32_t)*s++;
854		g = result & 0xf0000000;
855		if (g != 0) {
856			result ^= g >> 24;
857			result ^= g;
858		}
859	}
860
861	return (result);
862}
863
864static sysevent_channel_descriptor_t *
865get_channel(char *channel_name)
866{
867	int hash_index;
868	sysevent_channel_descriptor_t *chan_list;
869
870	if (channel_name == NULL)
871		return (NULL);
872
873	/* Find channel descriptor */
874	hash_index = CHANN_HASH(channel_name);
875	chan_list = registered_channels[hash_index];
876	while (chan_list != NULL) {
877		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
878			break;
879		} else {
880			chan_list = chan_list->scd_next;
881		}
882	}
883
884	return (chan_list);
885}
886
887static class_lst_t *
888create_channel_registration(sysevent_channel_descriptor_t *chan,
889    char *event_class, int index)
890{
891	size_t class_len;
892	class_lst_t *c_list;
893
894	class_len = strlen(event_class) + 1;
895	c_list = kmem_zalloc(sizeof (class_lst_t), KM_SLEEP);
896	c_list->cl_name = kmem_zalloc(class_len, KM_SLEEP);
897	bcopy(event_class, c_list->cl_name, class_len);
898
899	c_list->cl_subclass_list =
900	    kmem_zalloc(sizeof (subclass_lst_t), KM_SLEEP);
901	c_list->cl_subclass_list->sl_name =
902	    kmem_zalloc(sizeof (EC_SUB_ALL), KM_SLEEP);
903	bcopy(EC_SUB_ALL, c_list->cl_subclass_list->sl_name,
904	    sizeof (EC_SUB_ALL));
905
906	c_list->cl_next = chan->scd_class_list_tbl[index];
907	chan->scd_class_list_tbl[index] = c_list;
908
909	return (c_list);
910}
911
912static void
913free_channel_registration(sysevent_channel_descriptor_t *chan)
914{
915	int i;
916	class_lst_t *clist, *next_clist;
917	subclass_lst_t *sclist, *next_sc;
918
919	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
920
921		clist = chan->scd_class_list_tbl[i];
922		while (clist != NULL) {
923			sclist = clist->cl_subclass_list;
924			while (sclist != NULL) {
925				kmem_free(sclist->sl_name,
926				    strlen(sclist->sl_name) + 1);
927				next_sc = sclist->sl_next;
928				kmem_free(sclist, sizeof (subclass_lst_t));
929				sclist = next_sc;
930			}
931			kmem_free(clist->cl_name,
932			    strlen(clist->cl_name) + 1);
933			next_clist = clist->cl_next;
934			kmem_free(clist, sizeof (class_lst_t));
935			clist = next_clist;
936		}
937	}
938	chan->scd_class_list_tbl[0] = NULL;
939}
940
941static int
942open_channel(char *channel_name)
943{
944	int hash_index;
945	sysevent_channel_descriptor_t *chan, *chan_list;
946
947
948	if (channel_cnt > MAX_CHAN) {
949		return (-1);
950	}
951
952	/* Find channel descriptor */
953	hash_index = CHANN_HASH(channel_name);
954	chan_list = registered_channels[hash_index];
955	while (chan_list != NULL) {
956		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
957			chan_list->scd_ref_cnt++;
958			kmem_free(channel_name, strlen(channel_name) + 1);
959			return (0);
960		} else {
961			chan_list = chan_list->scd_next;
962		}
963	}
964
965
966	/* New channel descriptor */
967	chan = kmem_zalloc(sizeof (sysevent_channel_descriptor_t), KM_SLEEP);
968	chan->scd_channel_name = channel_name;
969
970	/*
971	 * Create subscriber ids in the range [1, MAX_SUBSCRIBERS).
972	 * Subscriber id 0 is never allocated, but is used as a reserved id
973	 * by libsysevent
974	 */
975	if ((chan->scd_subscriber_cache = vmem_create(channel_name, (void *)1,
976	    MAX_SUBSCRIBERS + 1, 1, NULL, NULL, NULL, 0,
977	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
978		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
979		return (-1);
980	}
981	if ((chan->scd_publisher_cache = vmem_create(channel_name, (void *)1,
982	    MAX_PUBLISHERS + 1, 1, NULL, NULL, NULL, 0,
983	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
984		vmem_destroy(chan->scd_subscriber_cache);
985		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
986		return (-1);
987	}
988
989	chan->scd_ref_cnt = 1;
990
991	(void) create_channel_registration(chan, EC_ALL, 0);
992
993	if (registered_channels[hash_index] != NULL)
994		chan->scd_next = registered_channels[hash_index];
995
996	registered_channels[hash_index] = chan;
997
998	++channel_cnt;
999
1000	return (0);
1001}
1002
1003static void
1004close_channel(char *channel_name)
1005{
1006	int hash_index;
1007	sysevent_channel_descriptor_t *chan, *prev_chan;
1008
1009	/* Find channel descriptor */
1010	hash_index = CHANN_HASH(channel_name);
1011	prev_chan = chan = registered_channels[hash_index];
1012
1013	while (chan != NULL) {
1014		if (strcmp(chan->scd_channel_name, channel_name) == 0) {
1015			break;
1016		} else {
1017			prev_chan = chan;
1018			chan = chan->scd_next;
1019		}
1020	}
1021
1022	if (chan == NULL)
1023		return;
1024
1025	chan->scd_ref_cnt--;
1026	if (chan->scd_ref_cnt > 0)
1027		return;
1028
1029	free_channel_registration(chan);
1030	vmem_destroy(chan->scd_subscriber_cache);
1031	vmem_destroy(chan->scd_publisher_cache);
1032	kmem_free(chan->scd_channel_name,
1033	    strlen(chan->scd_channel_name) + 1);
1034	if (registered_channels[hash_index] == chan)
1035		registered_channels[hash_index] = chan->scd_next;
1036	else
1037		prev_chan->scd_next = chan->scd_next;
1038	kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
1039	--channel_cnt;
1040}
1041
1042static id_t
1043bind_common(sysevent_channel_descriptor_t *chan, int type)
1044{
1045	id_t id;
1046
1047	if (type == SUBSCRIBER) {
1048		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_subscriber_cache, 1,
1049		    VM_NOSLEEP | VM_NEXTFIT);
1050		if (id <= 0 || id > MAX_SUBSCRIBERS)
1051			return (0);
1052		chan->scd_subscriber_ids[id] = 1;
1053	} else {
1054		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_publisher_cache, 1,
1055		    VM_NOSLEEP | VM_NEXTFIT);
1056		if (id <= 0 || id > MAX_PUBLISHERS)
1057			return (0);
1058		chan->scd_publisher_ids[id] = 1;
1059	}
1060
1061	return (id);
1062}
1063
1064static int
1065unbind_common(sysevent_channel_descriptor_t *chan, int type, id_t id)
1066{
1067	if (type == SUBSCRIBER) {
1068		if (id <= 0 || id > MAX_SUBSCRIBERS)
1069			return (0);
1070		if (chan->scd_subscriber_ids[id] == 0)
1071			return (0);
1072		(void) remove_all_class(chan, id);
1073		chan->scd_subscriber_ids[id] = 0;
1074		vmem_free(chan->scd_subscriber_cache, (void *)(uintptr_t)id, 1);
1075	} else {
1076		if (id <= 0 || id > MAX_PUBLISHERS)
1077			return (0);
1078		if (chan->scd_publisher_ids[id] == 0)
1079			return (0);
1080		chan->scd_publisher_ids[id] = 0;
1081		vmem_free(chan->scd_publisher_cache, (void *)(uintptr_t)id, 1);
1082	}
1083
1084	return (1);
1085}
1086
1087static void
1088release_id(sysevent_channel_descriptor_t *chan, int type, id_t id)
1089{
1090	if (unbind_common(chan, type, id))
1091		close_channel(chan->scd_channel_name);
1092}
1093
1094static subclass_lst_t *
1095find_subclass(class_lst_t *c_list, char *subclass)
1096{
1097	subclass_lst_t *sc_list;
1098
1099	if (c_list == NULL)
1100		return (NULL);
1101
1102	sc_list = c_list->cl_subclass_list;
1103
1104	while (sc_list != NULL) {
1105		if (strcmp(sc_list->sl_name, subclass) == 0) {
1106			return (sc_list);
1107		}
1108		sc_list = sc_list->sl_next;
1109	}
1110
1111	return (NULL);
1112}
1113
1114static void
1115insert_subclass(class_lst_t *c_list, char **subclass_names,
1116    int subclass_num, uint32_t sub_id)
1117{
1118	int i, subclass_sz;
1119	subclass_lst_t *sc_list;
1120
1121	for (i = 0; i < subclass_num; ++i) {
1122		if ((sc_list = find_subclass(c_list, subclass_names[i]))
1123		    != NULL) {
1124			sc_list->sl_num[sub_id] = 1;
1125		} else {
1126
1127			sc_list = kmem_zalloc(sizeof (subclass_lst_t),
1128			    KM_SLEEP);
1129			subclass_sz = strlen(subclass_names[i]) + 1;
1130			sc_list->sl_name = kmem_zalloc(subclass_sz, KM_SLEEP);
1131			bcopy(subclass_names[i], sc_list->sl_name,
1132			    subclass_sz);
1133
1134			sc_list->sl_num[sub_id] = 1;
1135
1136			sc_list->sl_next = c_list->cl_subclass_list;
1137			c_list->cl_subclass_list = sc_list;
1138		}
1139	}
1140}
1141
1142static class_lst_t *
1143find_class(sysevent_channel_descriptor_t *chan, char *class_name)
1144{
1145	class_lst_t *c_list;
1146
1147	c_list = chan->scd_class_list_tbl[CLASS_HASH(class_name)];
1148	while (c_list != NULL) {
1149		if (strcmp(class_name, c_list->cl_name) == 0)
1150			break;
1151		c_list = c_list->cl_next;
1152	}
1153
1154	return (c_list);
1155}
1156
1157static void
1158remove_all_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id)
1159{
1160	int i;
1161	class_lst_t *c_list;
1162	subclass_lst_t *sc_list;
1163
1164	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
1165
1166		c_list = chan->scd_class_list_tbl[i];
1167		while (c_list != NULL) {
1168			sc_list = c_list->cl_subclass_list;
1169			while (sc_list != NULL) {
1170				sc_list->sl_num[sub_id] = 0;
1171				sc_list = sc_list->sl_next;
1172			}
1173			c_list = c_list->cl_next;
1174		}
1175	}
1176}
1177
1178static void
1179remove_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1180    char *class_name)
1181{
1182	class_lst_t *c_list;
1183	subclass_lst_t *sc_list;
1184
1185	if (strcmp(class_name, EC_ALL) == 0) {
1186		remove_all_class(chan, sub_id);
1187		return;
1188	}
1189
1190	if ((c_list = find_class(chan, class_name)) == NULL) {
1191		return;
1192	}
1193
1194	sc_list = c_list->cl_subclass_list;
1195	while (sc_list != NULL) {
1196		sc_list->sl_num[sub_id] = 0;
1197		sc_list = sc_list->sl_next;
1198	}
1199}
1200
1201static int
1202insert_class(sysevent_channel_descriptor_t *chan, char *event_class,
1203    char **event_subclass_lst, int subclass_num, uint32_t sub_id)
1204{
1205	class_lst_t *c_list;
1206
1207	if (strcmp(event_class, EC_ALL) == 0) {
1208		insert_subclass(chan->scd_class_list_tbl[0],
1209		    event_subclass_lst, 1, sub_id);
1210		return (0);
1211	}
1212
1213	if (strlen(event_class) + 1 > MAX_CLASS_LEN)
1214		return (-1);
1215
1216	/* New class, add to the registration cache */
1217	if ((c_list = find_class(chan, event_class)) == NULL) {
1218		c_list = create_channel_registration(chan, event_class,
1219		    CLASS_HASH(event_class));
1220	}
1221
1222	/* Update the subclass list */
1223	insert_subclass(c_list, event_subclass_lst, subclass_num, sub_id);
1224
1225	return (0);
1226}
1227
1228static int
1229add_registration(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1230    char *nvlbuf, size_t nvlsize)
1231{
1232	uint_t num_elem;
1233	char *event_class;
1234	char **event_list;
1235	nvlist_t *nvl;
1236	nvpair_t *nvpair = NULL;
1237
1238	if (nvlist_unpack(nvlbuf, nvlsize, &nvl, KM_SLEEP) != 0)
1239		return (-1);
1240
1241	if ((nvpair = nvlist_next_nvpair(nvl, nvpair)) == NULL) {
1242		nvlist_free(nvl);
1243		return (-1);
1244	}
1245
1246	if ((event_class = nvpair_name(nvpair)) == NULL) {
1247		nvlist_free(nvl);
1248		return (-1);
1249	}
1250	if (nvpair_value_string_array(nvpair, &event_list,
1251	    &num_elem) != 0) {
1252		nvlist_free(nvl);
1253		return (-1);
1254	}
1255
1256	if (insert_class(chan, event_class, event_list, num_elem, sub_id) < 0) {
1257		nvlist_free(nvl);
1258		return (-1);
1259	}
1260
1261	nvlist_free(nvl);
1262
1263	return (0);
1264}
1265
1266/*
1267 * get_registration - Return the requested class hash chain
1268 */
1269static int
1270get_registration(sysevent_channel_descriptor_t *chan, char *databuf,
1271    uint32_t *bufsz, uint32_t class_index)
1272{
1273	int num_classes = 0;
1274	char *nvlbuf = NULL;
1275	size_t nvlsize;
1276	nvlist_t *nvl;
1277	class_lst_t *clist;
1278	subclass_lst_t *sc_list;
1279
1280	if (class_index < 0 || class_index > CLASS_HASH_SZ)
1281		return (EINVAL);
1282
1283	if ((clist = chan->scd_class_list_tbl[class_index]) == NULL) {
1284		return (ENOENT);
1285	}
1286
1287	if (nvlist_alloc(&nvl, 0, 0) != 0) {
1288		return (EFAULT);
1289	}
1290
1291	while (clist != NULL) {
1292		if (nvlist_add_string(nvl, CLASS_NAME, clist->cl_name)
1293		    != 0) {
1294			nvlist_free(nvl);
1295			return (EFAULT);
1296		}
1297
1298		sc_list = clist->cl_subclass_list;
1299		while (sc_list != NULL) {
1300			if (nvlist_add_byte_array(nvl, sc_list->sl_name,
1301			    sc_list->sl_num, MAX_SUBSCRIBERS) != 0) {
1302				nvlist_free(nvl);
1303				return (EFAULT);
1304			}
1305			sc_list = sc_list->sl_next;
1306		}
1307		num_classes++;
1308		clist = clist->cl_next;
1309	}
1310
1311	if (num_classes == 0) {
1312		nvlist_free(nvl);
1313		return (ENOENT);
1314	}
1315
1316	if (nvlist_pack(nvl, &nvlbuf, &nvlsize, NV_ENCODE_NATIVE,
1317	    KM_SLEEP)
1318	    != 0) {
1319		nvlist_free(nvl);
1320		return (EFAULT);
1321	}
1322
1323	nvlist_free(nvl);
1324
1325	if (nvlsize > *bufsz) {
1326		kmem_free(nvlbuf, nvlsize);
1327		*bufsz = nvlsize;
1328		return (EAGAIN);
1329	}
1330
1331	bcopy(nvlbuf, databuf, nvlsize);
1332	kmem_free(nvlbuf, nvlsize);
1333
1334	return (0);
1335}
1336
1337/*
1338 * log_sysevent_register - Register event subscriber for a particular
1339 *		event channel.
1340 */
1341int
1342log_sysevent_register(char *channel_name, char *udatabuf, se_pubsub_t *udata)
1343{
1344	int error = 0;
1345	char *kchannel, *databuf = NULL;
1346	size_t bufsz;
1347	se_pubsub_t kdata;
1348	sysevent_channel_descriptor_t *chan;
1349
1350	if (copyin(udata, &kdata, sizeof (se_pubsub_t)) == -1) {
1351		return (EFAULT);
1352	}
1353	if (kdata.ps_channel_name_len == 0) {
1354		return (EINVAL);
1355	}
1356	kchannel = kmem_alloc(kdata.ps_channel_name_len, KM_SLEEP);
1357	if (copyin(channel_name, kchannel, kdata.ps_channel_name_len) == -1) {
1358		kmem_free(kchannel, kdata.ps_channel_name_len);
1359		return (EFAULT);
1360	}
1361	bufsz = kdata.ps_buflen;
1362	if (bufsz > 0) {
1363		databuf = kmem_alloc(bufsz, KM_SLEEP);
1364		if (copyin(udatabuf, databuf, bufsz) == -1) {
1365			kmem_free(kchannel, kdata.ps_channel_name_len);
1366			kmem_free(databuf, bufsz);
1367			return (EFAULT);
1368		}
1369	}
1370
1371	mutex_enter(&registered_channel_mutex);
1372	if (kdata.ps_op != SE_OPEN_REGISTRATION &&
1373	    kdata.ps_op != SE_CLOSE_REGISTRATION) {
1374		chan = get_channel(kchannel);
1375		if (chan == NULL) {
1376			mutex_exit(&registered_channel_mutex);
1377			kmem_free(kchannel, kdata.ps_channel_name_len);
1378			if (bufsz > 0)
1379				kmem_free(databuf, bufsz);
1380			return (ENOENT);
1381		}
1382	}
1383
1384	switch (kdata.ps_op) {
1385	case SE_OPEN_REGISTRATION:
1386		if (open_channel(kchannel) != 0) {
1387			error = ENOMEM;
1388			if (bufsz > 0)
1389				kmem_free(databuf, bufsz);
1390			kmem_free(kchannel, kdata.ps_channel_name_len);
1391		}
1392
1393		mutex_exit(&registered_channel_mutex);
1394		return (error);
1395	case SE_CLOSE_REGISTRATION:
1396		close_channel(kchannel);
1397		break;
1398	case SE_BIND_REGISTRATION:
1399		if ((kdata.ps_id = bind_common(chan, kdata.ps_type)) <= 0)
1400			error = EBUSY;
1401		break;
1402	case SE_UNBIND_REGISTRATION:
1403		(void) unbind_common(chan, kdata.ps_type, (id_t)kdata.ps_id);
1404		break;
1405	case SE_REGISTER:
1406		if (bufsz == 0) {
1407			error = EINVAL;
1408			break;
1409		}
1410		if (add_registration(chan, kdata.ps_id, databuf, bufsz) == -1)
1411			error = EINVAL;
1412		break;
1413	case SE_UNREGISTER:
1414		if (bufsz == 0) {
1415			error = EINVAL;
1416			break;
1417		}
1418		remove_class(chan, kdata.ps_id, databuf);
1419		break;
1420	case SE_CLEANUP:
1421		/* Cleanup the indicated subscriber or publisher */
1422		release_id(chan, kdata.ps_type, kdata.ps_id);
1423		break;
1424	case SE_GET_REGISTRATION:
1425		error = get_registration(chan, databuf,
1426		    &kdata.ps_buflen, kdata.ps_id);
1427		break;
1428	default:
1429		error = ENOTSUP;
1430	}
1431
1432	mutex_exit(&registered_channel_mutex);
1433
1434	kmem_free(kchannel, kdata.ps_channel_name_len);
1435
1436	if (bufsz > 0) {
1437		if (copyout(databuf, udatabuf, bufsz) == -1)
1438			error = EFAULT;
1439		kmem_free(databuf, bufsz);
1440	}
1441
1442	if (copyout(&kdata, udata, sizeof (se_pubsub_t)) == -1)
1443		return (EFAULT);
1444
1445	return (error);
1446}
1447
1448/*
1449 * log_sysevent_copyout_data - Copyout event data to userland.
1450 *			This is called from modctl(MODEVENTS, MODEVENTS_GETDATA)
1451 *			The buffer size is always sufficient.
1452 */
1453int
1454log_sysevent_copyout_data(sysevent_id_t *eid, size_t ubuflen, caddr_t ubuf)
1455{
1456	int error = ENOENT;
1457	log_eventq_t *q;
1458	sysevent_t *ev;
1459	sysevent_id_t eid_copy;
1460
1461	/*
1462	 * Copy eid
1463	 */
1464	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1465		return (EFAULT);
1466	}
1467
1468	mutex_enter(&eventq_sent_mutex);
1469	q = log_eventq_sent;
1470
1471	/*
1472	 * Search for event buffer on the sent queue with matching
1473	 * event identifier
1474	 */
1475	while (q) {
1476		ev = (sysevent_t *)&q->arg.buf;
1477
1478		if (SE_TIME(ev) != eid_copy.eid_ts ||
1479		    SE_SEQ(ev) != eid_copy.eid_seq) {
1480			q = q->next;
1481			continue;
1482		}
1483
1484		if (ubuflen < SE_SIZE(ev)) {
1485			error = EFAULT;
1486			break;
1487		}
1488		if (copyout(ev, ubuf, SE_SIZE(ev)) != 0) {
1489			error = EFAULT;
1490			LOG_DEBUG((CE_NOTE, "Unable to retrieve system event "
1491			    "0x%" PRIx64 " from queue: EFAULT\n",
1492			    eid->eid_seq));
1493		} else {
1494			error = 0;
1495		}
1496		break;
1497	}
1498
1499	mutex_exit(&eventq_sent_mutex);
1500
1501	return (error);
1502}
1503
1504/*
1505 * log_sysevent_free_data - Free kernel copy of the event buffer identified
1506 *			by eid (must have already been sent).  Called from
1507 *			modctl(MODEVENTS, MODEVENTS_FREEDATA).
1508 */
1509int
1510log_sysevent_free_data(sysevent_id_t *eid)
1511{
1512	int error = ENOENT;
1513	sysevent_t *ev;
1514	log_eventq_t *q, *prev = NULL;
1515	sysevent_id_t eid_copy;
1516
1517	/*
1518	 * Copy eid
1519	 */
1520	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1521		return (EFAULT);
1522	}
1523
1524	mutex_enter(&eventq_sent_mutex);
1525	q = log_eventq_sent;
1526
1527	/*
1528	 * Look for the event to be freed on the sent queue.  Due to delayed
1529	 * processing of the event, it may not be on the sent queue yet.
1530	 * It is up to the user to retry the free operation to ensure that the
1531	 * event is properly freed.
1532	 */
1533	while (q) {
1534		ev = (sysevent_t *)&q->arg.buf;
1535
1536		if (SE_TIME(ev) != eid_copy.eid_ts ||
1537		    SE_SEQ(ev) != eid_copy.eid_seq) {
1538			prev = q;
1539			q = q->next;
1540			continue;
1541		}
1542		/*
1543		 * Take it out of log_eventq_sent and free it
1544		 */
1545		if (prev) {
1546			prev->next = q->next;
1547		} else {
1548			log_eventq_sent = q->next;
1549		}
1550		free_packed_event(ev);
1551		error = 0;
1552		break;
1553	}
1554
1555	mutex_exit(&eventq_sent_mutex);
1556
1557	return (error);
1558}
1559
1560/*
1561 * log_sysevent_flushq - Begin or resume event buffer delivery.  If neccessary,
1562 *			create log_event_deliver thread or wake it up
1563 */
1564/*ARGSUSED*/
1565void
1566log_sysevent_flushq(int cmd, uint_t flag)
1567{
1568	mutex_enter(&eventq_head_mutex);
1569
1570	/*
1571	 * Start the event delivery thread
1572	 * Mark the upcall status as active since we should
1573	 * now be able to begin emptying the queue normally.
1574	 */
1575	if (!async_thread) {
1576		sysevent_upcall_status = 0;
1577		sysevent_daemon_init = 1;
1578		setup_ddi_poststartup();
1579		async_thread = thread_create(NULL, 0, log_event_deliver,
1580		    NULL, 0, &p0, TS_RUN, minclsyspri);
1581	}
1582
1583	log_event_delivery = LOGEVENT_DELIVERY_CONT;
1584	cv_signal(&log_event_cv);
1585	mutex_exit(&eventq_head_mutex);
1586}
1587
1588/*
1589 * log_sysevent_filename - Called by syseventd via
1590 *			modctl(MODEVENTS, MODEVENTS_SET_DOOR_UPCALL_FILENAME)
1591 *			to subsequently bind the event_door.
1592 *
1593 *			This routine is called everytime syseventd (re)starts
1594 *			and must therefore replay any events buffers that have
1595 *			been sent but not freed.
1596 *
1597 *			Event buffer delivery begins after a call to
1598 *			log_sysevent_flushq().
1599 */
1600int
1601log_sysevent_filename(char *file)
1602{
1603	mutex_enter(&event_door_mutex);
1604
1605	(void) strlcpy(logevent_door_upcall_filename, file,
1606	    sizeof (logevent_door_upcall_filename));
1607
1608	/* Unbind old event door */
1609	if (event_door != NULL)
1610		door_ki_rele(event_door);
1611	/* Establish door connection with user event daemon (syseventd) */
1612	if (door_ki_open(logevent_door_upcall_filename, &event_door) != 0)
1613		event_door = NULL;
1614
1615	mutex_exit(&event_door_mutex);
1616
1617	/*
1618	 * We are called when syseventd restarts. Move all sent, but
1619	 * not committed events from log_eventq_sent to log_eventq_head.
1620	 * Do it in proper order to maintain increasing event id.
1621	 */
1622	mutex_enter(&eventq_head_mutex);
1623
1624	mutex_enter(&eventq_sent_mutex);
1625	while (log_eventq_sent) {
1626		log_eventq_t *tmp = log_eventq_sent->next;
1627		log_eventq_sent->next = log_eventq_head;
1628		if (log_eventq_head == NULL) {
1629			ASSERT(log_eventq_cnt == 0);
1630			log_eventq_tail = log_eventq_sent;
1631			log_eventq_tail->next = NULL;
1632		} else if (log_eventq_head == log_eventq_tail) {
1633			ASSERT(log_eventq_cnt == 1);
1634			ASSERT(log_eventq_head->next == NULL);
1635			ASSERT(log_eventq_tail->next == NULL);
1636		}
1637		log_eventq_head = log_eventq_sent;
1638		log_eventq_sent = tmp;
1639		log_eventq_cnt++;
1640	}
1641	mutex_exit(&eventq_sent_mutex);
1642	mutex_exit(&eventq_head_mutex);
1643
1644	return (0);
1645}
1646
1647/*
1648 * queue_sysevent - queue an event buffer
1649 */
1650static int
1651queue_sysevent(sysevent_t *ev, sysevent_id_t *eid, int flag)
1652{
1653	log_eventq_t *q;
1654
1655	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1656
1657	DTRACE_SYSEVENT2(post, evch_bind_t *, NULL, sysevent_impl_t *, ev);
1658
1659restart:
1660
1661	/* Max Q size exceeded */
1662	mutex_enter(&event_qfull_mutex);
1663	if (sysevent_daemon_init && log_eventq_cnt >= logevent_max_q_sz) {
1664		/*
1665		 * If queue full and transport down, return no transport
1666		 */
1667		if (sysevent_upcall_status != 0) {
1668			mutex_exit(&event_qfull_mutex);
1669			free_packed_event(ev);
1670			eid->eid_seq = UINT64_C(0);
1671			eid->eid_ts = INT64_C(0);
1672			return (SE_NO_TRANSPORT);
1673		}
1674		if (flag == SE_NOSLEEP) {
1675			mutex_exit(&event_qfull_mutex);
1676			free_packed_event(ev);
1677			eid->eid_seq = UINT64_C(0);
1678			eid->eid_ts = INT64_C(0);
1679			return (SE_EQSIZE);
1680		}
1681		event_qfull_blocked++;
1682		cv_wait(&event_qfull_cv, &event_qfull_mutex);
1683		event_qfull_blocked--;
1684		mutex_exit(&event_qfull_mutex);
1685		goto restart;
1686	}
1687	mutex_exit(&event_qfull_mutex);
1688
1689	mutex_enter(&eventq_head_mutex);
1690
1691	/* Time stamp and assign ID */
1692	SE_SEQ(ev) = eid->eid_seq = atomic_add_64_nv(&kernel_event_id,
1693	    (uint64_t)1);
1694	SE_TIME(ev) = eid->eid_ts = gethrtime();
1695
1696	LOG_DEBUG1((CE_CONT, "log_sysevent: class=%d type=%d id=0x%llx\n",
1697	    SE_CLASS(ev), SE_SUBCLASS(ev), (longlong_t)SE_SEQ(ev)));
1698
1699	/*
1700	 * Put event on eventq
1701	 */
1702	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
1703	q->next = NULL;
1704	if (log_eventq_head == NULL) {
1705		ASSERT(log_eventq_cnt == 0);
1706		log_eventq_head = q;
1707		log_eventq_tail = q;
1708	} else {
1709		if (log_eventq_head == log_eventq_tail) {
1710			ASSERT(log_eventq_cnt == 1);
1711			ASSERT(log_eventq_head->next == NULL);
1712			ASSERT(log_eventq_tail->next == NULL);
1713		}
1714		log_eventq_tail->next = q;
1715		log_eventq_tail = q;
1716	}
1717	log_eventq_cnt++;
1718
1719	/* Signal event delivery thread */
1720	if (log_eventq_cnt == 1) {
1721		cv_signal(&log_event_cv);
1722	}
1723	mutex_exit(&eventq_head_mutex);
1724
1725	return (0);
1726}
1727
1728/*
1729 * log_sysevent - kernel system event logger.
1730 *
1731 * Returns SE_ENOMEM if buf allocation failed or SE_EQSIZE if the
1732 * maximum event queue size will be exceeded
1733 * Returns 0 for successfully queued event buffer
1734 */
1735int
1736log_sysevent(sysevent_t *ev, int flag, sysevent_id_t *eid)
1737{
1738	sysevent_t *ev_copy;
1739	int rval;
1740
1741	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1742	ASSERT(!(flag == SE_SLEEP && servicing_interrupt()));
1743
1744	ev_copy = se_repack(ev, flag);
1745	if (ev_copy == NULL) {
1746		ASSERT(flag == SE_NOSLEEP);
1747		return (SE_ENOMEM);
1748	}
1749	rval = queue_sysevent(ev_copy, eid, flag);
1750	ASSERT(rval == 0 || rval == SE_ENOMEM || rval == SE_EQSIZE ||
1751	    rval == SE_NO_TRANSPORT);
1752	ASSERT(!(flag == SE_SLEEP && (rval == SE_EQSIZE || rval == SE_ENOMEM)));
1753	return (rval);
1754}
1755
1756/*
1757 * Publish EC_DEV_ADD and EC_DEV_REMOVE events from devfsadm to lofi.
1758 * This interface is needed to pass device link names to the lofi driver,
1759 * to be returned via ioctl() to the lofiadm command.
1760 * The problem is, if lofiadm is executed in local zone, there is no
1761 * mechanism to announce the device name from the /dev tree back to lofiadm,
1762 * as sysevents are not accessible from local zone and devfsadmd is only
1763 * running in global zone.
1764 *
1765 * Delayed/missed events are not fatal for lofi, as the device name returned
1766 * to lofiadm is for information and can be re-queried with listing
1767 * mappings with lofiadm command.
1768 *
1769 * Once we have a better method, this interface should be reworked.
1770 */
1771static void
1772notify_lofi(sysevent_t *ev)
1773{
1774	nvlist_t *nvlist;
1775	char name[10], *class, *driver;
1776	int32_t instance;
1777
1778	class = sysevent_get_class_name(ev);
1779	if ((strcmp(EC_DEV_ADD, class) != 0) &&
1780	    (strcmp(EC_DEV_REMOVE, class) != 0)) {
1781		return;
1782	}
1783
1784	(void) sysevent_get_attr_list(ev, &nvlist);
1785	driver = fnvlist_lookup_string(nvlist, DEV_DRIVER_NAME);
1786	instance = fnvlist_lookup_int32(nvlist, DEV_INSTANCE);
1787
1788	/* We are only interested about lofi. */
1789	if (strcmp(driver, "lofi") != 0) {
1790		fnvlist_free(nvlist);
1791		return;
1792	}
1793
1794	/*
1795	 * insert or remove device info, then announce the change
1796	 * via cv_broadcast.
1797	 */
1798	(void) snprintf(name, sizeof (name), "%d", instance);
1799	mutex_enter(&lofi_devlink_cache.ln_lock);
1800	if (strcmp(class, EC_DEV_ADD) == 0) {
1801		fnvlist_add_nvlist(lofi_devlink_cache.ln_data, name, nvlist);
1802	} else {
1803		/* Can not use fnvlist_remove() as we can get ENOENT. */
1804		(void) nvlist_remove_all(lofi_devlink_cache.ln_data, name);
1805	}
1806	cv_broadcast(&lofi_devlink_cache.ln_cv);
1807	mutex_exit(&lofi_devlink_cache.ln_lock);
1808
1809	fnvlist_free(nvlist);
1810}
1811
1812/*
1813 * log_usr_sysevent - user system event logger
1814 *			Private to devfsadm and accessible only via
1815 *			modctl(MODEVENTS, MODEVENTS_POST_EVENT)
1816 */
1817int
1818log_usr_sysevent(sysevent_t *ev, int ev_size, sysevent_id_t *eid)
1819{
1820	int ret, copy_sz;
1821	sysevent_t *ev_copy;
1822	sysevent_id_t new_eid;
1823	log_eventq_t *qcopy;
1824
1825	copy_sz = ev_size + offsetof(log_eventq_t, arg) +
1826	    offsetof(log_event_upcall_arg_t, buf);
1827	qcopy = kmem_zalloc(copy_sz, KM_SLEEP);
1828	ev_copy = (sysevent_t *)&qcopy->arg.buf;
1829
1830	/*
1831	 * Copy event
1832	 */
1833	if (copyin(ev, ev_copy, ev_size) == -1) {
1834		kmem_free(qcopy, copy_sz);
1835		return (EFAULT);
1836	}
1837
1838	notify_lofi(ev_copy);
1839
1840	if ((ret = queue_sysevent(ev_copy, &new_eid, SE_NOSLEEP)) != 0) {
1841		if (ret == SE_ENOMEM || ret == SE_EQSIZE)
1842			return (EAGAIN);
1843		else
1844			return (EIO);
1845	}
1846
1847	if (copyout(&new_eid, eid, sizeof (sysevent_id_t)) == -1) {
1848		return (EFAULT);
1849	}
1850
1851	return (0);
1852}
1853
1854
1855
1856int
1857ddi_log_sysevent(
1858	dev_info_t		*dip,
1859	char			*vendor,
1860	char			*class,
1861	char			*subclass,
1862	nvlist_t		*attr_list,
1863	sysevent_id_t		*eidp,
1864	int			sleep_flag)
1865{
1866	sysevent_attr_list_t	*list = (sysevent_attr_list_t *)attr_list;
1867	char			pubstr[32];
1868	sysevent_t		*event;
1869	sysevent_id_t		eid;
1870	const char		*drvname;
1871	char			*publisher;
1872	int			se_flag;
1873	int			rval;
1874	int			n;
1875
1876	if (sleep_flag == DDI_SLEEP && servicing_interrupt()) {
1877		cmn_err(CE_NOTE, "!ddi_log_syevent: driver %s%d - cannot queue "
1878		    "event from interrupt context with sleep semantics\n",
1879		    ddi_driver_name(dip), ddi_get_instance(dip));
1880		return (DDI_ECONTEXT);
1881	}
1882
1883	drvname = ddi_driver_name(dip);
1884	n = strlen(vendor) + strlen(drvname) + 7;
1885	if (n < sizeof (pubstr)) {
1886		publisher = pubstr;
1887	} else {
1888		publisher = kmem_alloc(n,
1889		    (sleep_flag == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1890		if (publisher == NULL) {
1891			return (DDI_ENOMEM);
1892		}
1893	}
1894	(void) strcpy(publisher, vendor);
1895	(void) strcat(publisher, ":kern:");
1896	(void) strcat(publisher, drvname);
1897
1898	se_flag = (sleep_flag == DDI_SLEEP) ? SE_SLEEP : SE_NOSLEEP;
1899	event = sysevent_alloc(class, subclass, publisher, se_flag);
1900
1901	if (publisher != pubstr) {
1902		kmem_free(publisher, n);
1903	}
1904
1905	if (event == NULL) {
1906		return (DDI_ENOMEM);
1907	}
1908
1909	if (list) {
1910		(void) sysevent_attach_attributes(event, list);
1911	}
1912
1913	rval = log_sysevent(event, se_flag, &eid);
1914	if (list) {
1915		sysevent_detach_attributes(event);
1916	}
1917	sysevent_free(event);
1918	if (rval == 0) {
1919		if (eidp) {
1920			eidp->eid_seq = eid.eid_seq;
1921			eidp->eid_ts = eid.eid_ts;
1922		}
1923		return (DDI_SUCCESS);
1924	}
1925	if (rval == SE_NO_TRANSPORT)
1926		return (DDI_ETRANSPORT);
1927
1928	ASSERT(rval == SE_ENOMEM || rval == SE_EQSIZE);
1929	return ((rval == SE_ENOMEM) ? DDI_ENOMEM : DDI_EBUSY);
1930}
1931
1932uint64_t
1933log_sysevent_new_id(void)
1934{
1935	return (atomic_add_64_nv(&kernel_event_id, (uint64_t)1));
1936}
1937