xref: /illumos-gate/usr/src/uts/common/os/log_sysevent.c (revision 323a81d93e2f58a7d62f6e523f9fddbc029d3d0b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/errno.h>
30 #include <sys/stropts.h>
31 #include <sys/debug.h>
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/vmem.h>
35 #include <sys/cmn_err.h>
36 #include <sys/callb.h>
37 #include <sys/sysevent.h>
38 #include <sys/sysevent_impl.h>
39 #include <sys/modctl.h>
40 #include <sys/sysmacros.h>
41 #include <sys/disp.h>
42 #include <sys/autoconf.h>
43 #include <sys/atomic.h>
44 #include <sys/sdt.h>
45 
46 /* for doors */
47 #include <sys/pathname.h>
48 #include <sys/door.h>
49 #include <sys/kmem.h>
50 #include <sys/cpuvar.h>
51 #include <sys/fs/snode.h>
52 
53 /*
54  * log_sysevent.c - Provides the interfaces for kernel event publication
55  *			to the sysevent event daemon (syseventd).
56  */
57 
58 /*
59  * Debug stuff
60  */
61 static int log_event_debug = 0;
62 #define	LOG_DEBUG(args)  if (log_event_debug) cmn_err args
63 #ifdef DEBUG
64 #define	LOG_DEBUG1(args)  if (log_event_debug > 1) cmn_err args
65 #else
66 #define	LOG_DEBUG1(args)
67 #endif
68 
69 /*
70  * Local static vars
71  */
72 /* queue of event buffers sent to syseventd */
73 static log_eventq_t *log_eventq_sent = NULL;
74 
75 /*
76  * Count of event buffers in the queue
77  */
78 int log_eventq_cnt = 0;
79 
80 /* queue of event buffers awaiting delivery to syseventd */
81 static log_eventq_t *log_eventq_head = NULL;
82 static log_eventq_t *log_eventq_tail = NULL;
83 static uint64_t kernel_event_id = 0;
84 static int encoding = NV_ENCODE_NATIVE;
85 
86 /* log event delivery flag */
87 #define	LOGEVENT_DELIVERY_OK	0	/* OK to deliver event buffers */
88 #define	LOGEVENT_DELIVERY_CONT	1	/* Continue to deliver event buffers */
89 #define	LOGEVENT_DELIVERY_HOLD	2	/* Hold delivering of event buffers */
90 
91 /*
92  * Tunable maximum event buffer queue size. Size depends on how many events
93  * the queue must hold when syseventd is not available, for example during
94  * system startup. Experience showed that more than 2000 events could be posted
95  * due to correctable memory errors.
96  */
97 int logevent_max_q_sz = 5000;
98 
99 
100 static int log_event_delivery = LOGEVENT_DELIVERY_HOLD;
101 static char *logevent_door_upcall_filename = NULL;
102 static int logevent_door_upcall_filename_size;
103 
104 static door_handle_t event_door = NULL;		/* Door for upcalls */
105 
106 /*
107  * async thread-related variables
108  *
109  * eventq_head_mutex - synchronizes access to the kernel event queue
110  *
111  * eventq_sent_mutex - synchronizes access to the queue of event sents to
112  *			userlevel
113  *
114  * log_event_cv - condition variable signaled when an event has arrived or
115  *			userlevel ready to process event buffers
116  *
117  * async_thread - asynchronous event delivery thread to userlevel daemon.
118  *
119  * sysevent_upcall_status - status of the door upcall link
120  */
121 static kmutex_t eventq_head_mutex;
122 static kmutex_t eventq_sent_mutex;
123 static kcondvar_t log_event_cv;
124 static kthread_id_t async_thread = NULL;
125 
126 static kmutex_t event_qfull_mutex;
127 static kcondvar_t event_qfull_cv;
128 static int event_qfull_blocked = 0;
129 
130 static int sysevent_upcall_status = -1;
131 static kmutex_t registered_channel_mutex;
132 
133 /*
134  * Indicates the syseventd daemon has begun taking events
135  */
136 int sysevent_daemon_init = 0;
137 
138 /*
139  * Back-off delay when door_ki_upcall returns EAGAIN.  Typically
140  * caused by the server process doing a forkall().  Since all threads
141  * but the thread actually doing the forkall() need to be quiesced,
142  * the fork may take some time.  The min/max pause are in units
143  * of clock ticks.
144  */
145 #define	LOG_EVENT_MIN_PAUSE	8
146 #define	LOG_EVENT_MAX_PAUSE	128
147 
148 static kmutex_t	event_pause_mutex;
149 static kcondvar_t event_pause_cv;
150 static int event_pause_state = 0;
151 
152 /*
153  * log_event_upcall_lookup - Establish door connection with user event
154  *				daemon (syseventd)
155  */
156 static int
157 log_event_upcall_lookup()
158 {
159 	int	error;
160 
161 	if (event_door) {	/* Release our previous hold (if any) */
162 		door_ki_rele(event_door);
163 	}
164 
165 	event_door = NULL;
166 
167 	/*
168 	 * Locate the door used for upcalls
169 	 */
170 	if ((error =
171 	    door_ki_open(logevent_door_upcall_filename, &event_door)) != 0) {
172 		return (error);
173 	}
174 
175 	return (0);
176 }
177 
178 
179 /*ARGSUSED*/
180 static void
181 log_event_busy_timeout(void *arg)
182 {
183 	mutex_enter(&event_pause_mutex);
184 	event_pause_state = 0;
185 	cv_signal(&event_pause_cv);
186 	mutex_exit(&event_pause_mutex);
187 }
188 
189 static void
190 log_event_pause(int nticks)
191 {
192 	timeout_id_t id;
193 
194 	/*
195 	 * Only one use of log_event_pause at a time
196 	 */
197 	ASSERT(event_pause_state == 0);
198 
199 	event_pause_state = 1;
200 	id = timeout(log_event_busy_timeout, NULL, nticks);
201 	if (id != 0) {
202 		mutex_enter(&event_pause_mutex);
203 		while (event_pause_state)
204 			cv_wait(&event_pause_cv, &event_pause_mutex);
205 		mutex_exit(&event_pause_mutex);
206 	}
207 	event_pause_state = 0;
208 }
209 
210 
211 /*
212  * log_event_upcall - Perform the upcall to syseventd for event buffer delivery.
213  * 			Check for rebinding errors
214  * 			This buffer is reused to by the syseventd door_return
215  *			to hold the result code
216  */
217 static int
218 log_event_upcall(log_event_upcall_arg_t *arg)
219 {
220 	int error;
221 	size_t size;
222 	sysevent_t *ev;
223 	door_arg_t darg, save_arg;
224 	int retry;
225 	int neagain = 0;
226 	int neintr = 0;
227 	int nticks = LOG_EVENT_MIN_PAUSE;
228 
229 	/* Initialize door args */
230 	ev = (sysevent_t *)&arg->buf;
231 	size = sizeof (log_event_upcall_arg_t) + SE_PAYLOAD_SZ(ev);
232 
233 	darg.rbuf = (char *)arg;
234 	darg.data_ptr = (char *)arg;
235 	darg.rsize = size;
236 	darg.data_size = size;
237 	darg.desc_ptr = NULL;
238 	darg.desc_num = 0;
239 
240 	if ((event_door == NULL) &&
241 	    ((error = log_event_upcall_lookup()) != 0)) {
242 		LOG_DEBUG((CE_CONT,
243 		    "log_event_upcall: event_door error (%d)\n", error));
244 
245 		return (error);
246 	}
247 
248 	LOG_DEBUG1((CE_CONT, "log_event_upcall: 0x%llx\n",
249 	    (longlong_t)SE_SEQ((sysevent_t *)&arg->buf)));
250 
251 	save_arg = darg;
252 	for (retry = 0; ; retry++) {
253 		if ((error = door_ki_upcall_limited(event_door, &darg, NULL,
254 		    SIZE_MAX, 0)) == 0) {
255 			break;
256 		}
257 		switch (error) {
258 		case EINTR:
259 			neintr++;
260 			log_event_pause(2);
261 			darg = save_arg;
262 			break;
263 		case EAGAIN:
264 			/* cannot deliver upcall - process may be forking */
265 			neagain++;
266 			log_event_pause(nticks);
267 			nticks <<= 1;
268 			if (nticks > LOG_EVENT_MAX_PAUSE)
269 				nticks = LOG_EVENT_MAX_PAUSE;
270 			darg = save_arg;
271 			break;
272 		case EBADF:
273 			LOG_DEBUG((CE_CONT, "log_event_upcall: rebinding\n"));
274 			/* Server may have died. Try rebinding */
275 			if ((error = log_event_upcall_lookup()) != 0) {
276 				LOG_DEBUG((CE_CONT,
277 				    "log_event_upcall: lookup error %d\n",
278 				    error));
279 				return (EBADF);
280 			}
281 			if (retry > 4) {
282 				LOG_DEBUG((CE_CONT,
283 					"log_event_upcall: ebadf\n"));
284 				return (EBADF);
285 			}
286 			LOG_DEBUG((CE_CONT, "log_event_upcall: "
287 				"retrying upcall after lookup\n"));
288 			darg = save_arg;
289 			break;
290 		default:
291 			cmn_err(CE_CONT,
292 			    "log_event_upcall: door_ki_upcall error %d\n",
293 			    error);
294 			return (error);
295 		}
296 	}
297 
298 	if (neagain > 0 || neintr > 0) {
299 		LOG_DEBUG((CE_CONT, "upcall: eagain=%d eintr=%d nticks=%d\n",
300 			neagain, neintr, nticks));
301 	}
302 
303 	LOG_DEBUG1((CE_CONT, "log_event_upcall:\n\t"
304 		"error=%d rptr1=%p rptr2=%p dptr2=%p ret1=%x ret2=%x\n",
305 		error, (void *)arg, (void *)darg.rbuf,
306 		(void *)darg.data_ptr,
307 		*((int *)(darg.rbuf)), *((int *)(darg.data_ptr))));
308 
309 	if (!error) {
310 		/*
311 		 * upcall was successfully executed. Check return code.
312 		 */
313 		error = *((int *)(darg.rbuf));
314 	}
315 
316 	return (error);
317 }
318 
319 /*
320  * log_event_deliver - event delivery thread
321  *			Deliver all events on the event queue to syseventd.
322  *			If the daemon can not process events, stop event
323  *			delivery and wait for an indication from the
324  *			daemon to resume delivery.
325  *
326  *			Once all event buffers have been delivered, wait
327  *			until there are more to deliver.
328  */
329 static void
330 log_event_deliver()
331 {
332 	log_eventq_t *q;
333 	int upcall_err;
334 	callb_cpr_t cprinfo;
335 
336 	CALLB_CPR_INIT(&cprinfo, &eventq_head_mutex, callb_generic_cpr,
337 				"logevent");
338 
339 	/*
340 	 * eventq_head_mutex is exited (released) when there are no more
341 	 * events to process from the eventq in cv_wait().
342 	 */
343 	mutex_enter(&eventq_head_mutex);
344 
345 	for (;;) {
346 		LOG_DEBUG1((CE_CONT, "log_event_deliver: head = %p\n",
347 		    (void *)log_eventq_head));
348 
349 		upcall_err = 0;
350 		q = log_eventq_head;
351 
352 		while (q) {
353 			log_eventq_t *next;
354 
355 			/*
356 			 * Release event queue lock during upcall to
357 			 * syseventd
358 			 */
359 			if (log_event_delivery == LOGEVENT_DELIVERY_HOLD) {
360 				upcall_err = EAGAIN;
361 				break;
362 			}
363 
364 			mutex_exit(&eventq_head_mutex);
365 			if ((upcall_err = log_event_upcall(&q->arg)) != 0) {
366 				mutex_enter(&eventq_head_mutex);
367 				break;
368 			}
369 
370 			/*
371 			 * We may be able to add entries to
372 			 * the queue now.
373 			 */
374 			if (event_qfull_blocked > 0 &&
375 			    log_eventq_cnt < logevent_max_q_sz) {
376 				mutex_enter(&event_qfull_mutex);
377 				if (event_qfull_blocked > 0) {
378 					cv_signal(&event_qfull_cv);
379 				}
380 				mutex_exit(&event_qfull_mutex);
381 			}
382 
383 			mutex_enter(&eventq_head_mutex);
384 
385 			/*
386 			 * Daemon restart can cause entries to be moved from
387 			 * the sent queue and put back on the event queue.
388 			 * If this has occurred, replay event queue
389 			 * processing from the new queue head.
390 			 */
391 			if (q != log_eventq_head) {
392 				q = log_eventq_head;
393 				LOG_DEBUG((CE_CONT, "log_event_deliver: "
394 				    "door upcall/daemon restart race\n"));
395 			} else {
396 				/*
397 				 * Move the event to the sent queue when a
398 				 * successful delivery has been made.
399 				 */
400 				mutex_enter(&eventq_sent_mutex);
401 				next = q->next;
402 				q->next = log_eventq_sent;
403 				log_eventq_sent = q;
404 				q = next;
405 				log_eventq_head = q;
406 				log_eventq_cnt--;
407 				if (q == NULL) {
408 					ASSERT(log_eventq_cnt == 0);
409 					log_eventq_tail = NULL;
410 				}
411 				mutex_exit(&eventq_sent_mutex);
412 			}
413 		}
414 
415 		switch (upcall_err) {
416 		case 0:
417 			/*
418 			 * Success. The queue is empty.
419 			 */
420 			sysevent_upcall_status = 0;
421 			break;
422 		case EAGAIN:
423 			/*
424 			 * Delivery is on hold (but functional).
425 			 */
426 			sysevent_upcall_status = 0;
427 			/*
428 			 * If the user has already signaled for delivery
429 			 * resumption, continue.  Otherwise, we wait until
430 			 * we are signaled to continue.
431 			 */
432 			if (log_event_delivery == LOGEVENT_DELIVERY_CONT) {
433 				log_event_delivery = LOGEVENT_DELIVERY_OK;
434 				continue;
435 			} else {
436 				log_event_delivery = LOGEVENT_DELIVERY_HOLD;
437 			}
438 
439 			LOG_DEBUG1((CE_CONT, "log_event_deliver: EAGAIN\n"));
440 			break;
441 		default:
442 			LOG_DEBUG((CE_CONT, "log_event_deliver: "
443 				"upcall err %d\n", upcall_err));
444 			sysevent_upcall_status = upcall_err;
445 			/*
446 			 * Signal everyone waiting that transport is down
447 			 */
448 			if (event_qfull_blocked > 0) {
449 				mutex_enter(&event_qfull_mutex);
450 				if (event_qfull_blocked > 0) {
451 					cv_broadcast(&event_qfull_cv);
452 				}
453 				mutex_exit(&event_qfull_mutex);
454 			}
455 			break;
456 		}
457 
458 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
459 		cv_wait(&log_event_cv, &eventq_head_mutex);
460 		CALLB_CPR_SAFE_END(&cprinfo, &eventq_head_mutex);
461 	}
462 	/* NOTREACHED */
463 }
464 
465 /*
466  * log_event_init - Allocate and initialize log_event data structures.
467  */
468 void
469 log_event_init()
470 {
471 	mutex_init(&eventq_head_mutex, NULL, MUTEX_DEFAULT, NULL);
472 	mutex_init(&eventq_sent_mutex, NULL, MUTEX_DEFAULT, NULL);
473 	cv_init(&log_event_cv, NULL, CV_DEFAULT, NULL);
474 
475 	mutex_init(&event_qfull_mutex, NULL, MUTEX_DEFAULT, NULL);
476 	cv_init(&event_qfull_cv, NULL, CV_DEFAULT, NULL);
477 
478 	mutex_init(&event_pause_mutex, NULL, MUTEX_DEFAULT, NULL);
479 	cv_init(&event_pause_cv, NULL, CV_DEFAULT, NULL);
480 
481 	mutex_init(&registered_channel_mutex, NULL, MUTEX_DEFAULT, NULL);
482 	sysevent_evc_init();
483 }
484 
485 /*
486  * The following routines are used by kernel event publishers to
487  * allocate, append and free event buffers
488  */
489 /*
490  * sysevent_alloc - Allocate new eventq struct.  This element contains
491  *			an event buffer that will be used in a subsequent
492  *			call to log_sysevent.
493  */
494 sysevent_t *
495 sysevent_alloc(char *class, char *subclass, char *pub, int flag)
496 {
497 	int payload_sz;
498 	int class_sz, subclass_sz, pub_sz;
499 	int aligned_class_sz, aligned_subclass_sz, aligned_pub_sz;
500 	sysevent_t *ev;
501 	log_eventq_t *q;
502 
503 	ASSERT(class != NULL);
504 	ASSERT(subclass != NULL);
505 	ASSERT(pub != NULL);
506 
507 	/*
508 	 * Calculate and reserve space for the class, subclass and
509 	 * publisher strings in the event buffer
510 	 */
511 	class_sz = strlen(class) + 1;
512 	subclass_sz = strlen(subclass) + 1;
513 	pub_sz = strlen(pub) + 1;
514 
515 	ASSERT((class_sz <= MAX_CLASS_LEN) && (subclass_sz
516 	    <= MAX_SUBCLASS_LEN) && (pub_sz <= MAX_PUB_LEN));
517 
518 	/* String sizes must be 64-bit aligned in the event buffer */
519 	aligned_class_sz = SE_ALIGN(class_sz);
520 	aligned_subclass_sz = SE_ALIGN(subclass_sz);
521 	aligned_pub_sz = SE_ALIGN(pub_sz);
522 
523 	payload_sz = (aligned_class_sz - sizeof (uint64_t)) +
524 		(aligned_subclass_sz - sizeof (uint64_t)) +
525 		(aligned_pub_sz - sizeof (uint64_t)) - sizeof (uint64_t);
526 
527 	/*
528 	 * Allocate event buffer plus additional sysevent queue
529 	 * and payload overhead.
530 	 */
531 	q = kmem_zalloc(sizeof (log_eventq_t) + payload_sz, flag);
532 	if (q == NULL) {
533 		return (NULL);
534 	}
535 
536 	/* Initialize the event buffer data */
537 	ev = (sysevent_t *)&q->arg.buf;
538 	SE_VERSION(ev) = SYS_EVENT_VERSION;
539 	bcopy(class, SE_CLASS_NAME(ev), class_sz);
540 
541 	SE_SUBCLASS_OFF(ev) = SE_ALIGN(offsetof(sysevent_impl_t, se_class_name))
542 		+ aligned_class_sz;
543 	bcopy(subclass, SE_SUBCLASS_NAME(ev), subclass_sz);
544 
545 	SE_PUB_OFF(ev) = SE_SUBCLASS_OFF(ev) + aligned_subclass_sz;
546 	bcopy(pub, SE_PUB_NAME(ev), pub_sz);
547 
548 	SE_ATTR_PTR(ev) = UINT64_C(0);
549 	SE_PAYLOAD_SZ(ev) = payload_sz;
550 
551 	return (ev);
552 }
553 
554 /*
555  * sysevent_free - Free event buffer and any attribute data.
556  */
557 void
558 sysevent_free(sysevent_t *ev)
559 {
560 	log_eventq_t *q;
561 	nvlist_t *nvl;
562 
563 	ASSERT(ev != NULL);
564 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
565 	nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev);
566 
567 	if (nvl != NULL) {
568 		size_t size = 0;
569 		(void) nvlist_size(nvl, &size, encoding);
570 		SE_PAYLOAD_SZ(ev) -= size;
571 		nvlist_free(nvl);
572 	}
573 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
574 }
575 
576 /*
577  * free_packed_event - Free packed event buffer
578  */
579 static void
580 free_packed_event(sysevent_t *ev)
581 {
582 	log_eventq_t *q;
583 
584 	ASSERT(ev != NULL);
585 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
586 
587 	kmem_free(q, sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev));
588 }
589 
590 /*
591  * sysevent_add_attr - Add new attribute element to an event attribute list
592  *			If attribute list is NULL, start a new list.
593  */
594 int
595 sysevent_add_attr(sysevent_attr_list_t **ev_attr_list, char *name,
596 	sysevent_value_t *se_value, int flag)
597 {
598 	int error;
599 	nvlist_t **nvlp = (nvlist_t **)ev_attr_list;
600 
601 	if (nvlp == NULL || se_value == NULL) {
602 		return (SE_EINVAL);
603 	}
604 
605 	/*
606 	 * attr_sz is composed of the value data size + the name data size +
607 	 * any header data.  64-bit aligned.
608 	 */
609 	if (strlen(name) >= MAX_ATTR_NAME) {
610 		return (SE_EINVAL);
611 	}
612 
613 	/*
614 	 * Allocate nvlist
615 	 */
616 	if ((*nvlp == NULL) &&
617 	    (nvlist_alloc(nvlp, NV_UNIQUE_NAME_TYPE, flag) != 0))
618 		return (SE_ENOMEM);
619 
620 	/* add the attribute */
621 	switch (se_value->value_type) {
622 	case SE_DATA_TYPE_BYTE:
623 		error = nvlist_add_byte(*ev_attr_list, name,
624 		    se_value->value.sv_byte);
625 		break;
626 	case SE_DATA_TYPE_INT16:
627 		error = nvlist_add_int16(*ev_attr_list, name,
628 		    se_value->value.sv_int16);
629 		break;
630 	case SE_DATA_TYPE_UINT16:
631 		error = nvlist_add_uint16(*ev_attr_list, name,
632 		    se_value->value.sv_uint16);
633 		break;
634 	case SE_DATA_TYPE_INT32:
635 		error = nvlist_add_int32(*ev_attr_list, name,
636 		    se_value->value.sv_int32);
637 		break;
638 	case SE_DATA_TYPE_UINT32:
639 		error = nvlist_add_uint32(*ev_attr_list, name,
640 		    se_value->value.sv_uint32);
641 		break;
642 	case SE_DATA_TYPE_INT64:
643 		error = nvlist_add_int64(*ev_attr_list, name,
644 		    se_value->value.sv_int64);
645 		break;
646 	case SE_DATA_TYPE_UINT64:
647 		error = nvlist_add_uint64(*ev_attr_list, name,
648 		    se_value->value.sv_uint64);
649 		break;
650 	case SE_DATA_TYPE_STRING:
651 		if (strlen((char *)se_value->value.sv_string) >= MAX_STRING_SZ)
652 			return (SE_EINVAL);
653 		error = nvlist_add_string(*ev_attr_list, name,
654 		    se_value->value.sv_string);
655 		break;
656 	case SE_DATA_TYPE_BYTES:
657 		if (se_value->value.sv_bytes.size > MAX_BYTE_ARRAY)
658 			return (SE_EINVAL);
659 		error = nvlist_add_byte_array(*ev_attr_list, name,
660 		    se_value->value.sv_bytes.data,
661 		    se_value->value.sv_bytes.size);
662 		break;
663 	case SE_DATA_TYPE_TIME:
664 		error = nvlist_add_hrtime(*ev_attr_list, name,
665 		    se_value->value.sv_time);
666 		break;
667 	default:
668 		return (SE_EINVAL);
669 	}
670 
671 	return (error ? SE_ENOMEM : 0);
672 }
673 
674 /*
675  * sysevent_free_attr - Free an attribute list not associated with an
676  *			event buffer.
677  */
678 void
679 sysevent_free_attr(sysevent_attr_list_t *ev_attr_list)
680 {
681 	nvlist_free((nvlist_t *)ev_attr_list);
682 }
683 
684 /*
685  * sysevent_attach_attributes - Attach an attribute list to an event buffer.
686  *
687  *	This data will be re-packed into contiguous memory when the event
688  *	buffer is posted to log_sysevent.
689  */
690 int
691 sysevent_attach_attributes(sysevent_t *ev, sysevent_attr_list_t *ev_attr_list)
692 {
693 	size_t size = 0;
694 
695 	if (SE_ATTR_PTR(ev) != UINT64_C(0)) {
696 		return (SE_EINVAL);
697 	}
698 
699 	SE_ATTR_PTR(ev) = (uintptr_t)ev_attr_list;
700 	(void) nvlist_size((nvlist_t *)ev_attr_list, &size, encoding);
701 	SE_PAYLOAD_SZ(ev) += size;
702 	SE_FLAG(ev) = 0;
703 
704 	return (0);
705 }
706 
707 /*
708  * sysevent_detach_attributes - Detach but don't free attribute list from the
709  *				event buffer.
710  */
711 void
712 sysevent_detach_attributes(sysevent_t *ev)
713 {
714 	size_t size = 0;
715 	nvlist_t *nvl;
716 
717 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
718 		return;
719 	}
720 
721 	SE_ATTR_PTR(ev) = UINT64_C(0);
722 	(void) nvlist_size(nvl, &size, encoding);
723 	SE_PAYLOAD_SZ(ev) -= size;
724 	ASSERT(SE_PAYLOAD_SZ(ev) >= 0);
725 }
726 
727 /*
728  * sysevent_attr_name - Get name of attribute
729  */
730 char *
731 sysevent_attr_name(sysevent_attr_t *attr)
732 {
733 	if (attr == NULL) {
734 		return (NULL);
735 	}
736 
737 	return (nvpair_name(attr));
738 }
739 
740 /*
741  * sysevent_attr_type - Get type of attribute
742  */
743 int
744 sysevent_attr_type(sysevent_attr_t *attr)
745 {
746 	/*
747 	 * The SE_DATA_TYPE_* are typedef'ed to be the
748 	 * same value as DATA_TYPE_*
749 	 */
750 	return (nvpair_type((nvpair_t *)attr));
751 }
752 
753 /*
754  * Repack event buffer into contiguous memory
755  */
756 static sysevent_t *
757 se_repack(sysevent_t *ev, int flag)
758 {
759 	size_t copy_len;
760 	caddr_t attr;
761 	size_t size;
762 	uint64_t attr_offset;
763 	sysevent_t *copy;
764 	log_eventq_t *qcopy;
765 	sysevent_attr_list_t *nvl;
766 
767 	copy_len = sizeof (log_eventq_t) + SE_PAYLOAD_SZ(ev);
768 	qcopy = kmem_zalloc(copy_len, flag);
769 	if (qcopy == NULL) {
770 		return (NULL);
771 	}
772 	copy = (sysevent_t *)&qcopy->arg.buf;
773 
774 	/*
775 	 * Copy event header, class, subclass and publisher names
776 	 * Set the attribute offset (in number of bytes) to contiguous
777 	 * memory after the header.
778 	 */
779 
780 	attr_offset = SE_ATTR_OFF(ev);
781 
782 	ASSERT((caddr_t)copy + attr_offset <= (caddr_t)copy + copy_len);
783 
784 	bcopy(ev, copy, attr_offset);
785 
786 	/* Check if attribute list exists */
787 	if ((nvl = (nvlist_t *)(uintptr_t)SE_ATTR_PTR(ev)) == NULL) {
788 		return (copy);
789 	}
790 
791 	/*
792 	 * Copy attribute data to contiguous memory
793 	 */
794 	attr = (char *)copy + attr_offset;
795 	(void) nvlist_size(nvl, &size, encoding);
796 	if (nvlist_pack(nvl, &attr, &size, encoding, flag) != 0) {
797 		kmem_free(qcopy, copy_len);
798 		return (NULL);
799 	}
800 	SE_ATTR_PTR(copy) = UINT64_C(0);
801 	SE_FLAG(copy) = SE_PACKED_BUF;
802 
803 	return (copy);
804 }
805 
806 /*
807  * The sysevent registration provides a persistent and reliable database
808  * for channel information for sysevent channel publishers and
809  * subscribers.
810  *
811  * A channel is created and maintained by the kernel upon the first
812  * SE_OPEN_REGISTRATION operation to log_sysevent_register().  Channel
813  * event subscription information is updated as publishers or subscribers
814  * perform subsequent operations (SE_BIND_REGISTRATION, SE_REGISTER,
815  * SE_UNREGISTER and SE_UNBIND_REGISTRATION).
816  *
817  * For consistency, id's are assigned for every publisher or subscriber
818  * bound to a particular channel.  The id's are used to constrain resources
819  * and perform subscription lookup.
820  *
821  * Associated with each channel is a hashed list of the current subscriptions
822  * based upon event class and subclasses.  A subscription contains a class name,
823  * list of possible subclasses and an array of subscriber ids.  Subscriptions
824  * are updated for every SE_REGISTER or SE_UNREGISTER operation.
825  *
826  * Channels are closed once the last subscriber or publisher performs a
827  * SE_CLOSE_REGISTRATION operation.  All resources associated with the named
828  * channel are freed upon last close.
829  *
830  * Locking:
831  *	Every operation to log_sysevent() is protected by a single lock,
832  *	registered_channel_mutex.  It is expected that the granularity of
833  *	a single lock is sufficient given the frequency that updates will
834  *	occur.
835  *
836  *	If this locking strategy proves to be too contentious, a per-hash
837  *	or per-channel locking strategy may be implemented.
838  */
839 
840 
841 #define	CHANN_HASH(channel_name)	(hash_func(channel_name) \
842 					% CHAN_HASH_SZ)
843 
844 sysevent_channel_descriptor_t *registered_channels[CHAN_HASH_SZ];
845 static int channel_cnt;
846 static void remove_all_class(sysevent_channel_descriptor_t *chan,
847 	uint32_t sub_id);
848 
849 static uint32_t
850 hash_func(const char *s)
851 {
852 	uint32_t result = 0;
853 	uint_t g;
854 
855 	while (*s != '\0') {
856 		result <<= 4;
857 		result += (uint32_t)*s++;
858 		g = result & 0xf0000000;
859 		if (g != 0) {
860 			result ^= g >> 24;
861 			result ^= g;
862 		}
863 	}
864 
865 	return (result);
866 }
867 
868 static sysevent_channel_descriptor_t *
869 get_channel(char *channel_name)
870 {
871 	int hash_index;
872 	sysevent_channel_descriptor_t *chan_list;
873 
874 	if (channel_name == NULL)
875 		return (NULL);
876 
877 	/* Find channel descriptor */
878 	hash_index = CHANN_HASH(channel_name);
879 	chan_list = registered_channels[hash_index];
880 	while (chan_list != NULL) {
881 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
882 			break;
883 		} else {
884 			chan_list = chan_list->scd_next;
885 		}
886 	}
887 
888 	return (chan_list);
889 }
890 
891 static class_lst_t *
892 create_channel_registration(sysevent_channel_descriptor_t *chan,
893     char *event_class, int index)
894 {
895 	size_t class_len;
896 	class_lst_t *c_list;
897 
898 	class_len = strlen(event_class) + 1;
899 	c_list = kmem_zalloc(sizeof (class_lst_t), KM_SLEEP);
900 	c_list->cl_name = kmem_zalloc(class_len, KM_SLEEP);
901 	bcopy(event_class, c_list->cl_name, class_len);
902 
903 	c_list->cl_subclass_list =
904 	    kmem_zalloc(sizeof (subclass_lst_t), KM_SLEEP);
905 	c_list->cl_subclass_list->sl_name =
906 	    kmem_zalloc(sizeof (EC_SUB_ALL), KM_SLEEP);
907 	bcopy(EC_SUB_ALL, c_list->cl_subclass_list->sl_name,
908 	    sizeof (EC_SUB_ALL));
909 
910 	c_list->cl_next = chan->scd_class_list_tbl[index];
911 	chan->scd_class_list_tbl[index] = c_list;
912 
913 	return (c_list);
914 }
915 
916 static void
917 free_channel_registration(sysevent_channel_descriptor_t *chan)
918 {
919 	int i;
920 	class_lst_t *clist, *next_clist;
921 	subclass_lst_t *sclist, *next_sc;
922 
923 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
924 
925 		clist = chan->scd_class_list_tbl[i];
926 		while (clist != NULL) {
927 			sclist = clist->cl_subclass_list;
928 			while (sclist != NULL) {
929 				kmem_free(sclist->sl_name,
930 				    strlen(sclist->sl_name) + 1);
931 				next_sc = sclist->sl_next;
932 				kmem_free(sclist, sizeof (subclass_lst_t));
933 				sclist = next_sc;
934 			}
935 			kmem_free(clist->cl_name,
936 			    strlen(clist->cl_name) + 1);
937 			next_clist = clist->cl_next;
938 			kmem_free(clist, sizeof (class_lst_t));
939 			clist = next_clist;
940 		}
941 	}
942 	chan->scd_class_list_tbl[0] = NULL;
943 }
944 
945 static int
946 open_channel(char *channel_name)
947 {
948 	int hash_index;
949 	sysevent_channel_descriptor_t *chan, *chan_list;
950 
951 
952 	if (channel_cnt > MAX_CHAN) {
953 		return (-1);
954 	}
955 
956 	/* Find channel descriptor */
957 	hash_index = CHANN_HASH(channel_name);
958 	chan_list = registered_channels[hash_index];
959 	while (chan_list != NULL) {
960 		if (strcmp(chan_list->scd_channel_name, channel_name) == 0) {
961 			chan_list->scd_ref_cnt++;
962 			kmem_free(channel_name, strlen(channel_name) + 1);
963 			return (0);
964 		} else {
965 			chan_list = chan_list->scd_next;
966 		}
967 	}
968 
969 
970 	/* New channel descriptor */
971 	chan = kmem_zalloc(sizeof (sysevent_channel_descriptor_t), KM_SLEEP);
972 	chan->scd_channel_name = channel_name;
973 
974 	/*
975 	 * Create subscriber ids in the range [1, MAX_SUBSCRIBERS).
976 	 * Subscriber id 0 is never allocated, but is used as a reserved id
977 	 * by libsysevent
978 	 */
979 	if ((chan->scd_subscriber_cache = vmem_create(channel_name, (void *)1,
980 	    MAX_SUBSCRIBERS + 1, 1, NULL, NULL, NULL, 0,
981 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
982 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
983 		return (-1);
984 	}
985 	if ((chan->scd_publisher_cache = vmem_create(channel_name, (void *)1,
986 	    MAX_PUBLISHERS + 1, 1, NULL, NULL, NULL, 0,
987 	    VM_NOSLEEP | VMC_IDENTIFIER)) == NULL) {
988 		vmem_destroy(chan->scd_subscriber_cache);
989 		kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
990 		return (-1);
991 	}
992 
993 	chan->scd_ref_cnt = 1;
994 
995 	(void) create_channel_registration(chan, EC_ALL, 0);
996 
997 	if (registered_channels[hash_index] != NULL)
998 		chan->scd_next = registered_channels[hash_index];
999 
1000 	registered_channels[hash_index] = chan;
1001 
1002 	++channel_cnt;
1003 
1004 	return (0);
1005 }
1006 
1007 static void
1008 close_channel(char *channel_name)
1009 {
1010 	int hash_index;
1011 	sysevent_channel_descriptor_t *chan, *prev_chan;
1012 
1013 	/* Find channel descriptor */
1014 	hash_index = CHANN_HASH(channel_name);
1015 	prev_chan = chan = registered_channels[hash_index];
1016 
1017 	while (chan != NULL) {
1018 		if (strcmp(chan->scd_channel_name, channel_name) == 0) {
1019 			break;
1020 		} else {
1021 			prev_chan = chan;
1022 			chan = chan->scd_next;
1023 		}
1024 	}
1025 
1026 	if (chan == NULL)
1027 		return;
1028 
1029 	chan->scd_ref_cnt--;
1030 	if (chan->scd_ref_cnt > 0)
1031 		return;
1032 
1033 	free_channel_registration(chan);
1034 	vmem_destroy(chan->scd_subscriber_cache);
1035 	vmem_destroy(chan->scd_publisher_cache);
1036 	kmem_free(chan->scd_channel_name,
1037 	    strlen(chan->scd_channel_name) + 1);
1038 	if (registered_channels[hash_index] == chan)
1039 		registered_channels[hash_index] = chan->scd_next;
1040 	else
1041 		prev_chan->scd_next = chan->scd_next;
1042 	kmem_free(chan, sizeof (sysevent_channel_descriptor_t));
1043 	--channel_cnt;
1044 }
1045 
1046 static id_t
1047 bind_common(sysevent_channel_descriptor_t *chan, int type)
1048 {
1049 	id_t id;
1050 
1051 	if (type == SUBSCRIBER) {
1052 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_subscriber_cache, 1,
1053 		    VM_NOSLEEP | VM_NEXTFIT);
1054 		if (id <= 0 || id > MAX_SUBSCRIBERS)
1055 			return (0);
1056 		chan->scd_subscriber_ids[id] = 1;
1057 	} else {
1058 		id = (id_t)(uintptr_t)vmem_alloc(chan->scd_publisher_cache, 1,
1059 		    VM_NOSLEEP | VM_NEXTFIT);
1060 		if (id <= 0 || id > MAX_PUBLISHERS)
1061 			return (0);
1062 		chan->scd_publisher_ids[id] = 1;
1063 	}
1064 
1065 	return (id);
1066 }
1067 
1068 static int
1069 unbind_common(sysevent_channel_descriptor_t *chan, int type, id_t id)
1070 {
1071 	if (type == SUBSCRIBER) {
1072 		if (id <= 0 || id > MAX_SUBSCRIBERS)
1073 			return (0);
1074 		if (chan->scd_subscriber_ids[id] == 0)
1075 			return (0);
1076 		(void) remove_all_class(chan, id);
1077 		chan->scd_subscriber_ids[id] = 0;
1078 		vmem_free(chan->scd_subscriber_cache, (void *)(uintptr_t)id, 1);
1079 	} else {
1080 		if (id <= 0 || id > MAX_PUBLISHERS)
1081 			return (0);
1082 		if (chan->scd_publisher_ids[id] == 0)
1083 			return (0);
1084 		chan->scd_publisher_ids[id] = 0;
1085 		vmem_free(chan->scd_publisher_cache, (void *)(uintptr_t)id, 1);
1086 	}
1087 
1088 	return (1);
1089 }
1090 
1091 static void
1092 release_id(sysevent_channel_descriptor_t *chan, int type, id_t id)
1093 {
1094 	if (unbind_common(chan, type, id))
1095 		close_channel(chan->scd_channel_name);
1096 }
1097 
1098 static subclass_lst_t *
1099 find_subclass(class_lst_t *c_list, char *subclass)
1100 {
1101 	subclass_lst_t *sc_list;
1102 
1103 	if (c_list == NULL)
1104 		return (NULL);
1105 
1106 	sc_list = c_list->cl_subclass_list;
1107 
1108 	while (sc_list != NULL) {
1109 		if (strcmp(sc_list->sl_name, subclass) == 0) {
1110 			return (sc_list);
1111 		}
1112 		sc_list = sc_list->sl_next;
1113 	}
1114 
1115 	return (NULL);
1116 }
1117 
1118 static void
1119 insert_subclass(class_lst_t *c_list, char **subclass_names,
1120 	int subclass_num, uint32_t sub_id)
1121 {
1122 	int i, subclass_sz;
1123 	subclass_lst_t *sc_list;
1124 
1125 	for (i = 0; i < subclass_num; ++i) {
1126 		if ((sc_list = find_subclass(c_list, subclass_names[i]))
1127 		    != NULL) {
1128 			sc_list->sl_num[sub_id] = 1;
1129 		} else {
1130 
1131 			sc_list = kmem_zalloc(sizeof (subclass_lst_t),
1132 			    KM_SLEEP);
1133 			subclass_sz = strlen(subclass_names[i]) + 1;
1134 			sc_list->sl_name = kmem_zalloc(subclass_sz, KM_SLEEP);
1135 			bcopy(subclass_names[i], sc_list->sl_name,
1136 			    subclass_sz);
1137 
1138 			sc_list->sl_num[sub_id] = 1;
1139 
1140 			sc_list->sl_next = c_list->cl_subclass_list;
1141 			c_list->cl_subclass_list = sc_list;
1142 		}
1143 	}
1144 }
1145 
1146 static class_lst_t *
1147 find_class(sysevent_channel_descriptor_t *chan, char *class_name)
1148 {
1149 	class_lst_t *c_list;
1150 
1151 	c_list = chan->scd_class_list_tbl[CLASS_HASH(class_name)];
1152 	while (c_list != NULL) {
1153 		if (strcmp(class_name, c_list->cl_name) == 0)
1154 			break;
1155 		c_list = c_list->cl_next;
1156 	}
1157 
1158 	return (c_list);
1159 }
1160 
1161 static void
1162 remove_all_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id)
1163 {
1164 	int i;
1165 	class_lst_t *c_list;
1166 	subclass_lst_t *sc_list;
1167 
1168 	for (i = 0; i <= CLASS_HASH_SZ; ++i) {
1169 
1170 		c_list = chan->scd_class_list_tbl[i];
1171 		while (c_list != NULL) {
1172 			sc_list = c_list->cl_subclass_list;
1173 			while (sc_list != NULL) {
1174 				sc_list->sl_num[sub_id] = 0;
1175 				sc_list = sc_list->sl_next;
1176 			}
1177 			c_list = c_list->cl_next;
1178 		}
1179 	}
1180 }
1181 
1182 static void
1183 remove_class(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1184 	char *class_name)
1185 {
1186 	class_lst_t *c_list;
1187 	subclass_lst_t *sc_list;
1188 
1189 	if (strcmp(class_name, EC_ALL) == 0) {
1190 		remove_all_class(chan, sub_id);
1191 		return;
1192 	}
1193 
1194 	if ((c_list = find_class(chan, class_name)) == NULL) {
1195 		return;
1196 	}
1197 
1198 	sc_list = c_list->cl_subclass_list;
1199 	while (sc_list != NULL) {
1200 		sc_list->sl_num[sub_id] = 0;
1201 		sc_list = sc_list->sl_next;
1202 	}
1203 }
1204 
1205 static int
1206 insert_class(sysevent_channel_descriptor_t *chan, char *event_class,
1207 	char **event_subclass_lst, int subclass_num, uint32_t sub_id)
1208 {
1209 	class_lst_t *c_list;
1210 
1211 	if (strcmp(event_class, EC_ALL) == 0) {
1212 		insert_subclass(chan->scd_class_list_tbl[0],
1213 		    event_subclass_lst, 1, sub_id);
1214 		return (0);
1215 	}
1216 
1217 	if (strlen(event_class) + 1 > MAX_CLASS_LEN)
1218 		return (-1);
1219 
1220 	/* New class, add to the registration cache */
1221 	if ((c_list = find_class(chan, event_class)) == NULL) {
1222 		c_list = create_channel_registration(chan, event_class,
1223 		    CLASS_HASH(event_class));
1224 	}
1225 
1226 	/* Update the subclass list */
1227 	insert_subclass(c_list, event_subclass_lst, subclass_num, sub_id);
1228 
1229 	return (0);
1230 }
1231 
1232 static int
1233 add_registration(sysevent_channel_descriptor_t *chan, uint32_t sub_id,
1234 	char *nvlbuf, size_t nvlsize)
1235 {
1236 	uint_t num_elem;
1237 	char *event_class;
1238 	char **event_list;
1239 	nvlist_t *nvl;
1240 	nvpair_t *nvpair = NULL;
1241 
1242 	if (nvlist_unpack(nvlbuf, nvlsize, &nvl, KM_SLEEP) != 0)
1243 		return (-1);
1244 
1245 	if ((nvpair = nvlist_next_nvpair(nvl, nvpair)) == NULL) {
1246 		nvlist_free(nvl);
1247 		return (-1);
1248 	}
1249 
1250 	if ((event_class = nvpair_name(nvpair)) == NULL) {
1251 		nvlist_free(nvl);
1252 		return (-1);
1253 	}
1254 	if (nvpair_value_string_array(nvpair, &event_list,
1255 	    &num_elem) != 0) {
1256 		nvlist_free(nvl);
1257 		return (-1);
1258 	}
1259 
1260 	if (insert_class(chan, event_class, event_list, num_elem, sub_id) < 0) {
1261 		nvlist_free(nvl);
1262 		return (-1);
1263 	}
1264 
1265 	nvlist_free(nvl);
1266 
1267 	return (0);
1268 }
1269 
1270 /*
1271  * get_registration - Return the requested class hash chain
1272  */
1273 static int
1274 get_registration(sysevent_channel_descriptor_t *chan, char *databuf,
1275 	uint32_t *bufsz, uint32_t class_index)
1276 {
1277 	int num_classes = 0;
1278 	char *nvlbuf = NULL;
1279 	size_t nvlsize;
1280 	nvlist_t *nvl;
1281 	class_lst_t *clist;
1282 	subclass_lst_t *sc_list;
1283 
1284 	if (class_index < 0 || class_index > CLASS_HASH_SZ)
1285 		return (EINVAL);
1286 
1287 	if ((clist = chan->scd_class_list_tbl[class_index]) == NULL) {
1288 		return (ENOENT);
1289 	}
1290 
1291 	if (nvlist_alloc(&nvl, 0, 0) != 0) {
1292 		return (EFAULT);
1293 	}
1294 
1295 	while (clist != NULL) {
1296 		if (nvlist_add_string(nvl, CLASS_NAME, clist->cl_name)
1297 		    != 0) {
1298 			nvlist_free(nvl);
1299 			return (EFAULT);
1300 		}
1301 
1302 		sc_list = clist->cl_subclass_list;
1303 		while (sc_list != NULL) {
1304 			if (nvlist_add_byte_array(nvl, sc_list->sl_name,
1305 			    sc_list->sl_num, MAX_SUBSCRIBERS) != 0) {
1306 				nvlist_free(nvl);
1307 				return (EFAULT);
1308 			}
1309 			sc_list = sc_list->sl_next;
1310 		}
1311 		num_classes++;
1312 		clist = clist->cl_next;
1313 	}
1314 
1315 	if (num_classes == 0) {
1316 		nvlist_free(nvl);
1317 		return (ENOENT);
1318 	}
1319 
1320 	if (nvlist_pack(nvl, &nvlbuf, &nvlsize, NV_ENCODE_NATIVE,
1321 	    KM_SLEEP)
1322 	    != 0) {
1323 		nvlist_free(nvl);
1324 		return (EFAULT);
1325 	}
1326 
1327 	nvlist_free(nvl);
1328 
1329 	if (nvlsize > *bufsz) {
1330 		kmem_free(nvlbuf, nvlsize);
1331 		*bufsz = nvlsize;
1332 		return (EAGAIN);
1333 	}
1334 
1335 	bcopy(nvlbuf, databuf, nvlsize);
1336 	kmem_free(nvlbuf, nvlsize);
1337 
1338 	return (0);
1339 }
1340 
1341 /*
1342  * log_sysevent_register - Register event subscriber for a particular
1343  *		event channel.
1344  */
1345 int
1346 log_sysevent_register(char *channel_name, char *udatabuf, se_pubsub_t *udata)
1347 {
1348 	int error = 0;
1349 	char *kchannel, *databuf = NULL;
1350 	size_t bufsz;
1351 	se_pubsub_t kdata;
1352 	sysevent_channel_descriptor_t *chan;
1353 
1354 	if (copyin(udata, &kdata, sizeof (se_pubsub_t)) == -1) {
1355 		return (EFAULT);
1356 	}
1357 	if (kdata.ps_channel_name_len == 0) {
1358 		return (EINVAL);
1359 	}
1360 	kchannel = kmem_alloc(kdata.ps_channel_name_len, KM_SLEEP);
1361 	if (copyin(channel_name, kchannel, kdata.ps_channel_name_len) == -1) {
1362 		kmem_free(kchannel, kdata.ps_channel_name_len);
1363 		return (EFAULT);
1364 	}
1365 	bufsz = kdata.ps_buflen;
1366 	if (bufsz > 0) {
1367 		databuf = kmem_alloc(bufsz, KM_SLEEP);
1368 		if (copyin(udatabuf, databuf, bufsz) == -1) {
1369 			kmem_free(kchannel, kdata.ps_channel_name_len);
1370 			kmem_free(databuf, bufsz);
1371 			return (EFAULT);
1372 		}
1373 	}
1374 
1375 	mutex_enter(&registered_channel_mutex);
1376 	if (kdata.ps_op != SE_OPEN_REGISTRATION &&
1377 	    kdata.ps_op != SE_CLOSE_REGISTRATION) {
1378 		chan = get_channel(kchannel);
1379 		if (chan == NULL) {
1380 			mutex_exit(&registered_channel_mutex);
1381 			kmem_free(kchannel, kdata.ps_channel_name_len);
1382 			if (bufsz > 0)
1383 				kmem_free(databuf, bufsz);
1384 			return (ENOENT);
1385 		}
1386 	}
1387 
1388 	switch (kdata.ps_op) {
1389 	case SE_OPEN_REGISTRATION:
1390 		if (open_channel(kchannel) != 0) {
1391 			error = ENOMEM;
1392 			if (bufsz > 0)
1393 				kmem_free(databuf, bufsz);
1394 			kmem_free(kchannel, kdata.ps_channel_name_len);
1395 		}
1396 
1397 		mutex_exit(&registered_channel_mutex);
1398 		return (error);
1399 	case SE_CLOSE_REGISTRATION:
1400 		close_channel(kchannel);
1401 		break;
1402 	case SE_BIND_REGISTRATION:
1403 		if ((kdata.ps_id = bind_common(chan, kdata.ps_type)) <= 0)
1404 			error = EBUSY;
1405 		break;
1406 	case SE_UNBIND_REGISTRATION:
1407 		(void) unbind_common(chan, kdata.ps_type, (id_t)kdata.ps_id);
1408 		break;
1409 	case SE_REGISTER:
1410 		if (bufsz == 0) {
1411 			error = EINVAL;
1412 			break;
1413 		}
1414 		if (add_registration(chan, kdata.ps_id, databuf, bufsz) == -1)
1415 			error = EINVAL;
1416 		break;
1417 	case SE_UNREGISTER:
1418 		if (bufsz == 0) {
1419 			error = EINVAL;
1420 			break;
1421 		}
1422 		remove_class(chan, kdata.ps_id, databuf);
1423 		break;
1424 	case SE_CLEANUP:
1425 		/* Cleanup the indicated subscriber or publisher */
1426 		release_id(chan, kdata.ps_type, kdata.ps_id);
1427 		break;
1428 	case SE_GET_REGISTRATION:
1429 		error = get_registration(chan, databuf,
1430 		    &kdata.ps_buflen, kdata.ps_id);
1431 		break;
1432 	default:
1433 		error = ENOTSUP;
1434 	}
1435 
1436 	mutex_exit(&registered_channel_mutex);
1437 
1438 	kmem_free(kchannel, kdata.ps_channel_name_len);
1439 
1440 	if (bufsz > 0) {
1441 		if (copyout(databuf, udatabuf, bufsz) == -1)
1442 			error = EFAULT;
1443 		kmem_free(databuf, bufsz);
1444 	}
1445 
1446 	if (copyout(&kdata, udata, sizeof (se_pubsub_t)) == -1)
1447 		return (EFAULT);
1448 
1449 	return (error);
1450 }
1451 
1452 /*
1453  * log_sysevent_copyout_data - Copyout event data to userland.
1454  *			This is called from modctl(MODEVENTS, MODEVENTS_GETDATA)
1455  *			The buffer size is always sufficient.
1456  */
1457 int
1458 log_sysevent_copyout_data(sysevent_id_t *eid, size_t ubuflen, caddr_t ubuf)
1459 {
1460 	int error = ENOENT;
1461 	log_eventq_t *q;
1462 	sysevent_t *ev;
1463 	sysevent_id_t eid_copy;
1464 
1465 	/*
1466 	 * Copy eid
1467 	 */
1468 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1469 		return (EFAULT);
1470 	}
1471 
1472 	mutex_enter(&eventq_sent_mutex);
1473 	q = log_eventq_sent;
1474 
1475 	/*
1476 	 * Search for event buffer on the sent queue with matching
1477 	 * event identifier
1478 	 */
1479 	while (q) {
1480 		ev = (sysevent_t *)&q->arg.buf;
1481 
1482 		if (SE_TIME(ev) != eid_copy.eid_ts ||
1483 		    SE_SEQ(ev) != eid_copy.eid_seq) {
1484 			q = q->next;
1485 			continue;
1486 		}
1487 
1488 		if (ubuflen < SE_SIZE(ev)) {
1489 			error = EFAULT;
1490 			break;
1491 		}
1492 		if (copyout(ev, ubuf, SE_SIZE(ev)) != 0) {
1493 			error = EFAULT;
1494 			LOG_DEBUG((CE_NOTE, "Unable to retrieve system event "
1495 			    "0x%" PRIx64 " from queue: EFAULT\n",
1496 			    eid->eid_seq));
1497 		} else {
1498 			error = 0;
1499 		}
1500 		break;
1501 	}
1502 
1503 	mutex_exit(&eventq_sent_mutex);
1504 
1505 	return (error);
1506 }
1507 
1508 /*
1509  * log_sysevent_free_data - Free kernel copy of the event buffer identified
1510  *			by eid (must have already been sent).  Called from
1511  *			modctl(MODEVENTS, MODEVENTS_FREEDATA).
1512  */
1513 int
1514 log_sysevent_free_data(sysevent_id_t *eid)
1515 {
1516 	int error = ENOENT;
1517 	sysevent_t *ev;
1518 	log_eventq_t *q, *prev = NULL;
1519 	sysevent_id_t eid_copy;
1520 
1521 	/*
1522 	 * Copy eid
1523 	 */
1524 	if (copyin(eid, &eid_copy, sizeof (sysevent_id_t)) == -1) {
1525 		return (EFAULT);
1526 	}
1527 
1528 	mutex_enter(&eventq_sent_mutex);
1529 	q = log_eventq_sent;
1530 
1531 	/*
1532 	 * Look for the event to be freed on the sent queue.  Due to delayed
1533 	 * processing of the event, it may not be on the sent queue yet.
1534 	 * It is up to the user to retry the free operation to ensure that the
1535 	 * event is properly freed.
1536 	 */
1537 	while (q) {
1538 		ev = (sysevent_t *)&q->arg.buf;
1539 
1540 		if (SE_TIME(ev) != eid_copy.eid_ts ||
1541 		    SE_SEQ(ev) != eid_copy.eid_seq) {
1542 			prev = q;
1543 			q = q->next;
1544 			continue;
1545 		}
1546 		/*
1547 		 * Take it out of log_eventq_sent and free it
1548 		 */
1549 		if (prev) {
1550 			prev->next = q->next;
1551 		} else {
1552 			log_eventq_sent = q->next;
1553 		}
1554 		free_packed_event(ev);
1555 		error = 0;
1556 		break;
1557 	}
1558 
1559 	mutex_exit(&eventq_sent_mutex);
1560 
1561 	return (error);
1562 }
1563 
1564 /*
1565  * log_sysevent_flushq - Begin or resume event buffer delivery.  If neccessary,
1566  *			create log_event_deliver thread or wake it up
1567  */
1568 /*ARGSUSED*/
1569 void
1570 log_sysevent_flushq(int cmd, uint_t flag)
1571 {
1572 	mutex_enter(&eventq_head_mutex);
1573 
1574 	/*
1575 	 * Start the event delivery thread
1576 	 * Mark the upcall status as active since we should
1577 	 * now be able to begin emptying the queue normally.
1578 	 */
1579 	if (!async_thread) {
1580 		sysevent_upcall_status = 0;
1581 		sysevent_daemon_init = 1;
1582 		setup_ddi_poststartup();
1583 		async_thread = thread_create(NULL, 0, log_event_deliver,
1584 		    NULL, 0, &p0, TS_RUN, minclsyspri);
1585 	}
1586 
1587 	log_event_delivery = LOGEVENT_DELIVERY_CONT;
1588 	cv_signal(&log_event_cv);
1589 	mutex_exit(&eventq_head_mutex);
1590 }
1591 
1592 /*
1593  * log_sysevent_filename - Called by syseventd via
1594  *			modctl(MODEVENTS, MODEVENTS_SET_DOOR_UPCALL_FILENAME)
1595  *			to subsequently bind the event_door.
1596  *
1597  *			This routine is called everytime syseventd (re)starts
1598  *			and must therefore replay any events buffers that have
1599  *			been sent but not freed.
1600  *
1601  *			Event buffer delivery begins after a call to
1602  *			log_sysevent_flushq().
1603  */
1604 int
1605 log_sysevent_filename(char *file)
1606 {
1607 	/*
1608 	 * Called serially by syseventd init code, no need to protect door
1609 	 * data.
1610 	 */
1611 	/* Unbind old event door */
1612 	if (logevent_door_upcall_filename) {
1613 		kmem_free(logevent_door_upcall_filename,
1614 			logevent_door_upcall_filename_size);
1615 		if (event_door) {
1616 			door_ki_rele(event_door);
1617 			event_door = NULL;
1618 		}
1619 	}
1620 	logevent_door_upcall_filename_size = strlen(file) + 1;
1621 	logevent_door_upcall_filename = kmem_alloc(
1622 		logevent_door_upcall_filename_size, KM_SLEEP);
1623 	(void) strcpy(logevent_door_upcall_filename, file);
1624 
1625 	/*
1626 	 * We are called when syseventd restarts. Move all sent, but
1627 	 * not committed events from log_eventq_sent to log_eventq_head.
1628 	 * Do it in proper order to maintain increasing event id.
1629 	 */
1630 	mutex_enter(&eventq_head_mutex);
1631 
1632 	mutex_enter(&eventq_sent_mutex);
1633 	while (log_eventq_sent) {
1634 		log_eventq_t *tmp = log_eventq_sent->next;
1635 		log_eventq_sent->next = log_eventq_head;
1636 		if (log_eventq_head == NULL) {
1637 			ASSERT(log_eventq_cnt == 0);
1638 			log_eventq_tail = log_eventq_sent;
1639 			log_eventq_tail->next = NULL;
1640 		} else if (log_eventq_head == log_eventq_tail) {
1641 			ASSERT(log_eventq_cnt == 1);
1642 			ASSERT(log_eventq_head->next == NULL);
1643 			ASSERT(log_eventq_tail->next == NULL);
1644 		}
1645 		log_eventq_head = log_eventq_sent;
1646 		log_eventq_sent = tmp;
1647 		log_eventq_cnt++;
1648 	}
1649 	mutex_exit(&eventq_sent_mutex);
1650 	mutex_exit(&eventq_head_mutex);
1651 
1652 	return (0);
1653 }
1654 
1655 /*
1656  * queue_sysevent - queue an event buffer
1657  */
1658 static int
1659 queue_sysevent(sysevent_t *ev, sysevent_id_t *eid, int flag)
1660 {
1661 	log_eventq_t *q;
1662 
1663 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1664 
1665 	DTRACE_SYSEVENT2(post, evch_bind_t *, NULL, sysevent_impl_t *, ev);
1666 
1667 restart:
1668 
1669 	/* Max Q size exceeded */
1670 	mutex_enter(&event_qfull_mutex);
1671 	if (sysevent_daemon_init && log_eventq_cnt >= logevent_max_q_sz) {
1672 		/*
1673 		 * If queue full and transport down, return no transport
1674 		 */
1675 		if (sysevent_upcall_status != 0) {
1676 			mutex_exit(&event_qfull_mutex);
1677 			free_packed_event(ev);
1678 			eid->eid_seq = UINT64_C(0);
1679 			eid->eid_ts = INT64_C(0);
1680 			return (SE_NO_TRANSPORT);
1681 		}
1682 		if (flag == SE_NOSLEEP) {
1683 			mutex_exit(&event_qfull_mutex);
1684 			free_packed_event(ev);
1685 			eid->eid_seq = UINT64_C(0);
1686 			eid->eid_ts = INT64_C(0);
1687 			return (SE_EQSIZE);
1688 		}
1689 		event_qfull_blocked++;
1690 		cv_wait(&event_qfull_cv, &event_qfull_mutex);
1691 		event_qfull_blocked--;
1692 		mutex_exit(&event_qfull_mutex);
1693 		goto restart;
1694 	}
1695 	mutex_exit(&event_qfull_mutex);
1696 
1697 	mutex_enter(&eventq_head_mutex);
1698 
1699 	/* Time stamp and assign ID */
1700 	SE_SEQ(ev) = eid->eid_seq = atomic_add_64_nv(&kernel_event_id,
1701 		(uint64_t)1);
1702 	SE_TIME(ev) = eid->eid_ts = gethrtime();
1703 
1704 	LOG_DEBUG1((CE_CONT, "log_sysevent: class=%d type=%d id=0x%llx\n",
1705 	    SE_CLASS(ev), SE_SUBCLASS(ev), (longlong_t)SE_SEQ(ev)));
1706 
1707 	/*
1708 	 * Put event on eventq
1709 	 */
1710 	q = (log_eventq_t *)((caddr_t)ev - offsetof(log_eventq_t, arg.buf));
1711 	q->next = NULL;
1712 	if (log_eventq_head == NULL) {
1713 		ASSERT(log_eventq_cnt == 0);
1714 		log_eventq_head = q;
1715 		log_eventq_tail = q;
1716 	} else {
1717 		if (log_eventq_head == log_eventq_tail) {
1718 			ASSERT(log_eventq_cnt == 1);
1719 			ASSERT(log_eventq_head->next == NULL);
1720 			ASSERT(log_eventq_tail->next == NULL);
1721 		}
1722 		log_eventq_tail->next = q;
1723 		log_eventq_tail = q;
1724 	}
1725 	log_eventq_cnt++;
1726 
1727 	/* Signal event delivery thread */
1728 	if (log_eventq_cnt == 1) {
1729 		cv_signal(&log_event_cv);
1730 	}
1731 	mutex_exit(&eventq_head_mutex);
1732 
1733 	return (0);
1734 }
1735 
1736 /*
1737  * log_sysevent - kernel system event logger.
1738  *
1739  * Returns SE_ENOMEM if buf allocation failed or SE_EQSIZE if the
1740  * maximum event queue size will be exceeded
1741  * Returns 0 for successfully queued event buffer
1742  */
1743 int
1744 log_sysevent(sysevent_t *ev, int flag, sysevent_id_t *eid)
1745 {
1746 	sysevent_t *ev_copy;
1747 	int rval;
1748 
1749 	ASSERT(flag == SE_SLEEP || flag == SE_NOSLEEP);
1750 	ASSERT(!(flag == SE_SLEEP && servicing_interrupt()));
1751 
1752 	ev_copy = se_repack(ev, flag);
1753 	if (ev_copy == NULL) {
1754 		ASSERT(flag == SE_NOSLEEP);
1755 		return (SE_ENOMEM);
1756 	}
1757 	rval = queue_sysevent(ev_copy, eid, flag);
1758 	ASSERT(rval == 0 || rval == SE_ENOMEM || rval == SE_EQSIZE ||
1759 		rval == SE_NO_TRANSPORT);
1760 	ASSERT(!(flag == SE_SLEEP && (rval == SE_EQSIZE || rval == SE_ENOMEM)));
1761 	return (rval);
1762 }
1763 
1764 /*
1765  * log_usr_sysevent - user system event logger
1766  *			Private to devfsadm and accessible only via
1767  *			modctl(MODEVENTS, MODEVENTS_POST_EVENT)
1768  */
1769 int
1770 log_usr_sysevent(sysevent_t *ev, int ev_size, sysevent_id_t *eid)
1771 {
1772 	int ret, copy_sz;
1773 	sysevent_t *ev_copy;
1774 	sysevent_id_t new_eid;
1775 	log_eventq_t *qcopy;
1776 
1777 	copy_sz = ev_size + offsetof(log_eventq_t, arg) +
1778 		offsetof(log_event_upcall_arg_t, buf);
1779 	qcopy = kmem_zalloc(copy_sz, KM_SLEEP);
1780 	ev_copy = (sysevent_t *)&qcopy->arg.buf;
1781 
1782 	/*
1783 	 * Copy event
1784 	 */
1785 	if (copyin(ev, ev_copy, ev_size) == -1) {
1786 		kmem_free(qcopy, copy_sz);
1787 		return (EFAULT);
1788 	}
1789 
1790 	if ((ret = queue_sysevent(ev_copy, &new_eid, SE_NOSLEEP)) != 0) {
1791 		if (ret == SE_ENOMEM || ret == SE_EQSIZE)
1792 			return (EAGAIN);
1793 		else
1794 			return (EIO);
1795 	}
1796 
1797 	if (copyout(&new_eid, eid, sizeof (sysevent_id_t)) == -1) {
1798 		return (EFAULT);
1799 	}
1800 
1801 	return (0);
1802 }
1803 
1804 
1805 
1806 int
1807 ddi_log_sysevent(
1808 	dev_info_t		*dip,
1809 	char			*vendor,
1810 	char			*class,
1811 	char			*subclass,
1812 	nvlist_t		*attr_list,
1813 	sysevent_id_t		*eidp,
1814 	int			sleep_flag)
1815 {
1816 	sysevent_attr_list_t	*list = (sysevent_attr_list_t *)attr_list;
1817 	char			pubstr[32];
1818 	sysevent_t		*event;
1819 	sysevent_id_t		eid;
1820 	const char		*drvname;
1821 	char			*publisher;
1822 	int			se_flag;
1823 	int			rval;
1824 	int			n;
1825 
1826 	if (sleep_flag == DDI_SLEEP && servicing_interrupt()) {
1827 		cmn_err(CE_NOTE, "!ddi_log_syevent: driver %s%d - cannot queue "
1828 			"event from interrupt context with sleep semantics\n",
1829 			ddi_driver_name(dip), ddi_get_instance(dip));
1830 		return (DDI_ECONTEXT);
1831 	}
1832 
1833 	drvname = ddi_driver_name(dip);
1834 	n = strlen(vendor) + strlen(drvname) + 7;
1835 	if (n < sizeof (pubstr)) {
1836 		publisher = pubstr;
1837 	} else {
1838 		publisher = kmem_alloc(n,
1839 			(sleep_flag == DDI_SLEEP) ? KM_SLEEP : KM_NOSLEEP);
1840 		if (publisher == NULL) {
1841 			return (DDI_ENOMEM);
1842 		}
1843 	}
1844 	(void) strcpy(publisher, vendor);
1845 	(void) strcat(publisher, ":kern:");
1846 	(void) strcat(publisher, drvname);
1847 
1848 	se_flag = (sleep_flag == DDI_SLEEP) ? SE_SLEEP : SE_NOSLEEP;
1849 	event = sysevent_alloc(class, subclass, publisher, se_flag);
1850 
1851 	if (publisher != pubstr) {
1852 		kmem_free(publisher, n);
1853 	}
1854 
1855 	if (event == NULL) {
1856 		return (DDI_ENOMEM);
1857 	}
1858 
1859 	if (list) {
1860 		(void) sysevent_attach_attributes(event, list);
1861 	}
1862 
1863 	rval = log_sysevent(event, se_flag, &eid);
1864 	if (list) {
1865 		sysevent_detach_attributes(event);
1866 	}
1867 	sysevent_free(event);
1868 	if (rval == 0) {
1869 		if (eidp) {
1870 			eidp->eid_seq = eid.eid_seq;
1871 			eidp->eid_ts = eid.eid_ts;
1872 		}
1873 		return (DDI_SUCCESS);
1874 	}
1875 	if (rval == SE_NO_TRANSPORT)
1876 		return (DDI_ETRANSPORT);
1877 
1878 	ASSERT(rval == SE_ENOMEM || rval == SE_EQSIZE);
1879 	return ((rval == SE_ENOMEM) ? DDI_ENOMEM : DDI_EBUSY);
1880 }
1881 
1882 uint64_t
1883 log_sysevent_new_id()
1884 {
1885 	return (atomic_add_64_nv(&kernel_event_id, (uint64_t)1));
1886 }
1887