1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25/*
26 * Fault Management Architecture (FMA) Resource and Protocol Support
27 *
28 * The routines contained herein provide services to support kernel subsystems
29 * in publishing fault management telemetry (see PSARC 2002/412 and 2003/089).
30 *
31 * Name-Value Pair Lists
32 *
33 * The embodiment of an FMA protocol element (event, fmri or authority) is a
34 * name-value pair list (nvlist_t).  FMA-specific nvlist constructor and
35 * destructor functions, fm_nvlist_create() and fm_nvlist_destroy(), are used
36 * to create an nvpair list using custom allocators.  Callers may choose to
37 * allocate either from the kernel memory allocator, or from a preallocated
38 * buffer, useful in constrained contexts like high-level interrupt routines.
39 *
40 * Protocol Event and FMRI Construction
41 *
42 * Convenience routines are provided to construct nvlist events according to
43 * the FMA Event Protocol and Naming Schema specification for ereports and
44 * FMRIs for the dev, cpu, hc, mem, legacy hc and de schemes.
45 *
46 * ENA Manipulation
47 *
48 * Routines to generate ENA formats 0, 1 and 2 are available as well as
49 * routines to increment formats 1 and 2.  Individual fields within the
50 * ENA are extractable via fm_ena_time_get(), fm_ena_id_get(),
51 * fm_ena_format_get() and fm_ena_gen_get().
52 */
53
54#include <sys/types.h>
55#include <sys/time.h>
56#include <sys/list.h>
57#include <sys/nvpair.h>
58#include <sys/cmn_err.h>
59#include <sys/sysmacros.h>
60#include <sys/sunddi.h>
61#include <sys/systeminfo.h>
62#include <sys/fm/util.h>
63#include <sys/fm/protocol.h>
64#include <sys/kstat.h>
65#include <sys/zfs_context.h>
66#ifdef _KERNEL
67#include <sys/atomic.h>
68#include <sys/condvar.h>
69#include <sys/console.h>
70#include <sys/time.h>
71#include <sys/zfs_ioctl.h>
72
73int zfs_zevent_len_max = 0;
74int zfs_zevent_cols = 80;
75int zfs_zevent_console = 0;
76
77static int zevent_len_cur = 0;
78static int zevent_waiters = 0;
79static int zevent_flags = 0;
80
81/* Num events rate limited since the last time zfs_zevent_next() was called */
82static uint64_t ratelimit_dropped = 0;
83
84/*
85 * The EID (Event IDentifier) is used to uniquely tag a zevent when it is
86 * posted.  The posted EIDs are monotonically increasing but not persistent.
87 * They will be reset to the initial value (1) each time the kernel module is
88 * loaded.
89 */
90static uint64_t zevent_eid = 0;
91
92static kmutex_t zevent_lock;
93static list_t zevent_list;
94static kcondvar_t zevent_cv;
95#endif /* _KERNEL */
96
97
98/*
99 * Common fault management kstats to record event generation failures
100 */
101
102struct erpt_kstat {
103	kstat_named_t	erpt_dropped;		/* num erpts dropped on post */
104	kstat_named_t	erpt_set_failed;	/* num erpt set failures */
105	kstat_named_t	fmri_set_failed;	/* num fmri set failures */
106	kstat_named_t	payload_set_failed;	/* num payload set failures */
107	kstat_named_t	erpt_duplicates;	/* num duplicate erpts */
108};
109
110static struct erpt_kstat erpt_kstat_data = {
111	{ "erpt-dropped", KSTAT_DATA_UINT64 },
112	{ "erpt-set-failed", KSTAT_DATA_UINT64 },
113	{ "fmri-set-failed", KSTAT_DATA_UINT64 },
114	{ "payload-set-failed", KSTAT_DATA_UINT64 },
115	{ "erpt-duplicates", KSTAT_DATA_UINT64 }
116};
117
118kstat_t *fm_ksp;
119
120#ifdef _KERNEL
121
122/*
123 * Formatting utility function for fm_nvprintr.  We attempt to wrap chunks of
124 * output so they aren't split across console lines, and return the end column.
125 */
126/*PRINTFLIKE4*/
127static int
128fm_printf(int depth, int c, int cols, const char *format, ...)
129{
130	va_list ap;
131	int width;
132	char c1;
133
134	va_start(ap, format);
135	width = vsnprintf(&c1, sizeof (c1), format, ap);
136	va_end(ap);
137
138	if (c + width >= cols) {
139		console_printf("\n");
140		c = 0;
141		if (format[0] != ' ' && depth > 0) {
142			console_printf(" ");
143			c++;
144		}
145	}
146
147	va_start(ap, format);
148	console_vprintf(format, ap);
149	va_end(ap);
150
151	return ((c + width) % cols);
152}
153
154/*
155 * Recursively print an nvlist in the specified column width and return the
156 * column we end up in.  This function is called recursively by fm_nvprint(),
157 * below.  We generically format the entire nvpair using hexadecimal
158 * integers and strings, and elide any integer arrays.  Arrays are basically
159 * used for cache dumps right now, so we suppress them so as not to overwhelm
160 * the amount of console output we produce at panic time.  This can be further
161 * enhanced as FMA technology grows based upon the needs of consumers.  All
162 * FMA telemetry is logged using the dump device transport, so the console
163 * output serves only as a fallback in case this procedure is unsuccessful.
164 */
165static int
166fm_nvprintr(nvlist_t *nvl, int d, int c, int cols)
167{
168	nvpair_t *nvp;
169
170	for (nvp = nvlist_next_nvpair(nvl, NULL);
171	    nvp != NULL; nvp = nvlist_next_nvpair(nvl, nvp)) {
172
173		data_type_t type = nvpair_type(nvp);
174		const char *name = nvpair_name(nvp);
175
176		boolean_t b;
177		uint8_t i8;
178		uint16_t i16;
179		uint32_t i32;
180		uint64_t i64;
181		char *str;
182		nvlist_t *cnv;
183
184		if (strcmp(name, FM_CLASS) == 0)
185			continue; /* already printed by caller */
186
187		c = fm_printf(d, c, cols, " %s=", name);
188
189		switch (type) {
190		case DATA_TYPE_BOOLEAN:
191			c = fm_printf(d + 1, c, cols, " 1");
192			break;
193
194		case DATA_TYPE_BOOLEAN_VALUE:
195			(void) nvpair_value_boolean_value(nvp, &b);
196			c = fm_printf(d + 1, c, cols, b ? "1" : "0");
197			break;
198
199		case DATA_TYPE_BYTE:
200			(void) nvpair_value_byte(nvp, &i8);
201			c = fm_printf(d + 1, c, cols, "0x%x", i8);
202			break;
203
204		case DATA_TYPE_INT8:
205			(void) nvpair_value_int8(nvp, (void *)&i8);
206			c = fm_printf(d + 1, c, cols, "0x%x", i8);
207			break;
208
209		case DATA_TYPE_UINT8:
210			(void) nvpair_value_uint8(nvp, &i8);
211			c = fm_printf(d + 1, c, cols, "0x%x", i8);
212			break;
213
214		case DATA_TYPE_INT16:
215			(void) nvpair_value_int16(nvp, (void *)&i16);
216			c = fm_printf(d + 1, c, cols, "0x%x", i16);
217			break;
218
219		case DATA_TYPE_UINT16:
220			(void) nvpair_value_uint16(nvp, &i16);
221			c = fm_printf(d + 1, c, cols, "0x%x", i16);
222			break;
223
224		case DATA_TYPE_INT32:
225			(void) nvpair_value_int32(nvp, (void *)&i32);
226			c = fm_printf(d + 1, c, cols, "0x%x", i32);
227			break;
228
229		case DATA_TYPE_UINT32:
230			(void) nvpair_value_uint32(nvp, &i32);
231			c = fm_printf(d + 1, c, cols, "0x%x", i32);
232			break;
233
234		case DATA_TYPE_INT64:
235			(void) nvpair_value_int64(nvp, (void *)&i64);
236			c = fm_printf(d + 1, c, cols, "0x%llx",
237			    (u_longlong_t)i64);
238			break;
239
240		case DATA_TYPE_UINT64:
241			(void) nvpair_value_uint64(nvp, &i64);
242			c = fm_printf(d + 1, c, cols, "0x%llx",
243			    (u_longlong_t)i64);
244			break;
245
246		case DATA_TYPE_HRTIME:
247			(void) nvpair_value_hrtime(nvp, (void *)&i64);
248			c = fm_printf(d + 1, c, cols, "0x%llx",
249			    (u_longlong_t)i64);
250			break;
251
252		case DATA_TYPE_STRING:
253			(void) nvpair_value_string(nvp, &str);
254			c = fm_printf(d + 1, c, cols, "\"%s\"",
255			    str ? str : "<NULL>");
256			break;
257
258		case DATA_TYPE_NVLIST:
259			c = fm_printf(d + 1, c, cols, "[");
260			(void) nvpair_value_nvlist(nvp, &cnv);
261			c = fm_nvprintr(cnv, d + 1, c, cols);
262			c = fm_printf(d + 1, c, cols, " ]");
263			break;
264
265		case DATA_TYPE_NVLIST_ARRAY: {
266			nvlist_t **val;
267			uint_t i, nelem;
268
269			c = fm_printf(d + 1, c, cols, "[");
270			(void) nvpair_value_nvlist_array(nvp, &val, &nelem);
271			for (i = 0; i < nelem; i++) {
272				c = fm_nvprintr(val[i], d + 1, c, cols);
273			}
274			c = fm_printf(d + 1, c, cols, " ]");
275			}
276			break;
277
278		case DATA_TYPE_INT8_ARRAY: {
279			int8_t *val;
280			uint_t i, nelem;
281
282			c = fm_printf(d + 1, c, cols, "[ ");
283			(void) nvpair_value_int8_array(nvp, &val, &nelem);
284			for (i = 0; i < nelem; i++)
285				c = fm_printf(d + 1, c, cols, "0x%llx ",
286				    (u_longlong_t)val[i]);
287
288			c = fm_printf(d + 1, c, cols, "]");
289			break;
290			}
291
292		case DATA_TYPE_UINT8_ARRAY: {
293			uint8_t *val;
294			uint_t i, nelem;
295
296			c = fm_printf(d + 1, c, cols, "[ ");
297			(void) nvpair_value_uint8_array(nvp, &val, &nelem);
298			for (i = 0; i < nelem; i++)
299				c = fm_printf(d + 1, c, cols, "0x%llx ",
300				    (u_longlong_t)val[i]);
301
302			c = fm_printf(d + 1, c, cols, "]");
303			break;
304			}
305
306		case DATA_TYPE_INT16_ARRAY: {
307			int16_t *val;
308			uint_t i, nelem;
309
310			c = fm_printf(d + 1, c, cols, "[ ");
311			(void) nvpair_value_int16_array(nvp, &val, &nelem);
312			for (i = 0; i < nelem; i++)
313				c = fm_printf(d + 1, c, cols, "0x%llx ",
314				    (u_longlong_t)val[i]);
315
316			c = fm_printf(d + 1, c, cols, "]");
317			break;
318			}
319
320		case DATA_TYPE_UINT16_ARRAY: {
321			uint16_t *val;
322			uint_t i, nelem;
323
324			c = fm_printf(d + 1, c, cols, "[ ");
325			(void) nvpair_value_uint16_array(nvp, &val, &nelem);
326			for (i = 0; i < nelem; i++)
327				c = fm_printf(d + 1, c, cols, "0x%llx ",
328				    (u_longlong_t)val[i]);
329
330			c = fm_printf(d + 1, c, cols, "]");
331			break;
332			}
333
334		case DATA_TYPE_INT32_ARRAY: {
335			int32_t *val;
336			uint_t i, nelem;
337
338			c = fm_printf(d + 1, c, cols, "[ ");
339			(void) nvpair_value_int32_array(nvp, &val, &nelem);
340			for (i = 0; i < nelem; i++)
341			c = fm_printf(d + 1, c, cols, "0x%llx ",
342			    (u_longlong_t)val[i]);
343
344			c = fm_printf(d + 1, c, cols, "]");
345			break;
346			}
347
348		case DATA_TYPE_UINT32_ARRAY: {
349			uint32_t *val;
350			uint_t i, nelem;
351
352			c = fm_printf(d + 1, c, cols, "[ ");
353			(void) nvpair_value_uint32_array(nvp, &val, &nelem);
354			for (i = 0; i < nelem; i++)
355				c = fm_printf(d + 1, c, cols, "0x%llx ",
356				    (u_longlong_t)val[i]);
357
358			c = fm_printf(d + 1, c, cols, "]");
359			break;
360			}
361
362		case DATA_TYPE_INT64_ARRAY: {
363			int64_t *val;
364			uint_t i, nelem;
365
366			c = fm_printf(d + 1, c, cols, "[ ");
367			(void) nvpair_value_int64_array(nvp, &val, &nelem);
368			for (i = 0; i < nelem; i++)
369				c = fm_printf(d + 1, c, cols, "0x%llx ",
370				    (u_longlong_t)val[i]);
371
372			c = fm_printf(d + 1, c, cols, "]");
373			break;
374			}
375
376		case DATA_TYPE_UINT64_ARRAY: {
377			uint64_t *val;
378			uint_t i, nelem;
379
380			c = fm_printf(d + 1, c, cols, "[ ");
381			(void) nvpair_value_uint64_array(nvp, &val, &nelem);
382			for (i = 0; i < nelem; i++)
383				c = fm_printf(d + 1, c, cols, "0x%llx ",
384				    (u_longlong_t)val[i]);
385
386			c = fm_printf(d + 1, c, cols, "]");
387			break;
388			}
389
390		case DATA_TYPE_STRING_ARRAY:
391		case DATA_TYPE_BOOLEAN_ARRAY:
392		case DATA_TYPE_BYTE_ARRAY:
393			c = fm_printf(d + 1, c, cols, "[...]");
394			break;
395
396		case DATA_TYPE_UNKNOWN:
397		case DATA_TYPE_DONTCARE:
398			c = fm_printf(d + 1, c, cols, "<unknown>");
399			break;
400		}
401	}
402
403	return (c);
404}
405
406void
407fm_nvprint(nvlist_t *nvl)
408{
409	char *class;
410	int c = 0;
411
412	console_printf("\n");
413
414	if (nvlist_lookup_string(nvl, FM_CLASS, &class) == 0)
415		c = fm_printf(0, c, zfs_zevent_cols, "%s", class);
416
417	if (fm_nvprintr(nvl, 0, c, zfs_zevent_cols) != 0)
418		console_printf("\n");
419
420	console_printf("\n");
421}
422
423static zevent_t *
424zfs_zevent_alloc(void)
425{
426	zevent_t *ev;
427
428	ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);
429
430	list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),
431	    offsetof(zfs_zevent_t, ze_node));
432	list_link_init(&ev->ev_node);
433
434	return (ev);
435}
436
437static void
438zfs_zevent_free(zevent_t *ev)
439{
440	/* Run provided cleanup callback */
441	ev->ev_cb(ev->ev_nvl, ev->ev_detector);
442
443	list_destroy(&ev->ev_ze_list);
444	kmem_free(ev, sizeof (zevent_t));
445}
446
447static void
448zfs_zevent_drain(zevent_t *ev)
449{
450	zfs_zevent_t *ze;
451
452	ASSERT(MUTEX_HELD(&zevent_lock));
453	list_remove(&zevent_list, ev);
454
455	/* Remove references to this event in all private file data */
456	while ((ze = list_head(&ev->ev_ze_list)) != NULL) {
457		list_remove(&ev->ev_ze_list, ze);
458		ze->ze_zevent = NULL;
459		ze->ze_dropped++;
460	}
461
462	zfs_zevent_free(ev);
463}
464
465void
466zfs_zevent_drain_all(int *count)
467{
468	zevent_t *ev;
469
470	mutex_enter(&zevent_lock);
471	while ((ev = list_head(&zevent_list)) != NULL)
472		zfs_zevent_drain(ev);
473
474	*count = zevent_len_cur;
475	zevent_len_cur = 0;
476	mutex_exit(&zevent_lock);
477}
478
479/*
480 * New zevents are inserted at the head.  If the maximum queue
481 * length is exceeded a zevent will be drained from the tail.
482 * As part of this any user space processes which currently have
483 * a reference to this zevent_t in their private data will have
484 * this reference set to NULL.
485 */
486static void
487zfs_zevent_insert(zevent_t *ev)
488{
489	ASSERT(MUTEX_HELD(&zevent_lock));
490	list_insert_head(&zevent_list, ev);
491
492	if (zevent_len_cur >= zfs_zevent_len_max)
493		zfs_zevent_drain(list_tail(&zevent_list));
494	else
495		zevent_len_cur++;
496}
497
498/*
499 * Post a zevent. The cb will be called when nvl and detector are no longer
500 * needed, i.e.:
501 * - An error happened and a zevent can't be posted. In this case, cb is called
502 *   before zfs_zevent_post() returns.
503 * - The event is being drained and freed.
504 */
505int
506zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
507{
508	inode_timespec_t tv;
509	int64_t tv_array[2];
510	uint64_t eid;
511	size_t nvl_size = 0;
512	zevent_t *ev;
513	int error;
514
515	ASSERT(cb != NULL);
516
517	gethrestime(&tv);
518	tv_array[0] = tv.tv_sec;
519	tv_array[1] = tv.tv_nsec;
520
521	error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);
522	if (error) {
523		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
524		goto out;
525	}
526
527	eid = atomic_inc_64_nv(&zevent_eid);
528	error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);
529	if (error) {
530		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
531		goto out;
532	}
533
534	error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
535	if (error) {
536		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
537		goto out;
538	}
539
540	if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
541		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
542		error = EOVERFLOW;
543		goto out;
544	}
545
546	if (zfs_zevent_console)
547		fm_nvprint(nvl);
548
549	ev = zfs_zevent_alloc();
550	if (ev == NULL) {
551		atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
552		error = ENOMEM;
553		goto out;
554	}
555
556	ev->ev_nvl = nvl;
557	ev->ev_detector = detector;
558	ev->ev_cb = cb;
559	ev->ev_eid = eid;
560
561	mutex_enter(&zevent_lock);
562	zfs_zevent_insert(ev);
563	cv_broadcast(&zevent_cv);
564	mutex_exit(&zevent_lock);
565
566out:
567	if (error)
568		cb(nvl, detector);
569
570	return (error);
571}
572
573void
574zfs_zevent_track_duplicate(void)
575{
576	atomic_inc_64(&erpt_kstat_data.erpt_duplicates.value.ui64);
577}
578
579static int
580zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
581{
582	*ze = zfsdev_get_state(minor, ZST_ZEVENT);
583	if (*ze == NULL)
584		return (SET_ERROR(EBADF));
585
586	return (0);
587}
588
589int
590zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
591{
592	int error;
593
594	error = zfsdev_getminor(fd, minorp);
595	if (error == 0)
596		error = zfs_zevent_minor_to_state(*minorp, ze);
597
598	if (error)
599		zfs_zevent_fd_rele(fd);
600
601	return (error);
602}
603
604void
605zfs_zevent_fd_rele(int fd)
606{
607	zfs_file_put(fd);
608}
609
610/*
611 * Get the next zevent in the stream and place a copy in 'event'.  This
612 * may fail with ENOMEM if the encoded nvlist size exceeds the passed
613 * 'event_size'.  In this case the stream pointer is not advanced and
614 * and 'event_size' is set to the minimum required buffer size.
615 */
616int
617zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
618    uint64_t *dropped)
619{
620	zevent_t *ev;
621	size_t size;
622	int error = 0;
623
624	mutex_enter(&zevent_lock);
625	if (ze->ze_zevent == NULL) {
626		/* New stream start at the beginning/tail */
627		ev = list_tail(&zevent_list);
628		if (ev == NULL) {
629			error = ENOENT;
630			goto out;
631		}
632	} else {
633		/*
634		 * Existing stream continue with the next element and remove
635		 * ourselves from the wait queue for the previous element
636		 */
637		ev = list_prev(&zevent_list, ze->ze_zevent);
638		if (ev == NULL) {
639			error = ENOENT;
640			goto out;
641		}
642	}
643
644	VERIFY(nvlist_size(ev->ev_nvl, &size, NV_ENCODE_NATIVE) == 0);
645	if (size > *event_size) {
646		*event_size = size;
647		error = ENOMEM;
648		goto out;
649	}
650
651	if (ze->ze_zevent)
652		list_remove(&ze->ze_zevent->ev_ze_list, ze);
653
654	ze->ze_zevent = ev;
655	list_insert_head(&ev->ev_ze_list, ze);
656	(void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
657	*dropped = ze->ze_dropped;
658
659#ifdef _KERNEL
660	/* Include events dropped due to rate limiting */
661	*dropped += ratelimit_dropped;
662	ratelimit_dropped = 0;
663#endif
664	ze->ze_dropped = 0;
665out:
666	mutex_exit(&zevent_lock);
667
668	return (error);
669}
670
671/*
672 * Wait in an interruptible state for any new events.
673 */
674int
675zfs_zevent_wait(zfs_zevent_t *ze)
676{
677	int error = EAGAIN;
678
679	mutex_enter(&zevent_lock);
680	zevent_waiters++;
681
682	while (error == EAGAIN) {
683		if (zevent_flags & ZEVENT_SHUTDOWN) {
684			error = SET_ERROR(ESHUTDOWN);
685			break;
686		}
687
688		error = cv_wait_sig(&zevent_cv, &zevent_lock);
689		if (signal_pending(current)) {
690			error = SET_ERROR(EINTR);
691			break;
692		} else if (!list_is_empty(&zevent_list)) {
693			error = 0;
694			continue;
695		} else {
696			error = EAGAIN;
697		}
698	}
699
700	zevent_waiters--;
701	mutex_exit(&zevent_lock);
702
703	return (error);
704}
705
706/*
707 * The caller may seek to a specific EID by passing that EID.  If the EID
708 * is still available in the posted list of events the cursor is positioned
709 * there.  Otherwise ENOENT is returned and the cursor is not moved.
710 *
711 * There are two reserved EIDs which may be passed and will never fail.
712 * ZEVENT_SEEK_START positions the cursor at the start of the list, and
713 * ZEVENT_SEEK_END positions the cursor at the end of the list.
714 */
715int
716zfs_zevent_seek(zfs_zevent_t *ze, uint64_t eid)
717{
718	zevent_t *ev;
719	int error = 0;
720
721	mutex_enter(&zevent_lock);
722
723	if (eid == ZEVENT_SEEK_START) {
724		if (ze->ze_zevent)
725			list_remove(&ze->ze_zevent->ev_ze_list, ze);
726
727		ze->ze_zevent = NULL;
728		goto out;
729	}
730
731	if (eid == ZEVENT_SEEK_END) {
732		if (ze->ze_zevent)
733			list_remove(&ze->ze_zevent->ev_ze_list, ze);
734
735		ev = list_head(&zevent_list);
736		if (ev) {
737			ze->ze_zevent = ev;
738			list_insert_head(&ev->ev_ze_list, ze);
739		} else {
740			ze->ze_zevent = NULL;
741		}
742
743		goto out;
744	}
745
746	for (ev = list_tail(&zevent_list); ev != NULL;
747	    ev = list_prev(&zevent_list, ev)) {
748		if (ev->ev_eid == eid) {
749			if (ze->ze_zevent)
750				list_remove(&ze->ze_zevent->ev_ze_list, ze);
751
752			ze->ze_zevent = ev;
753			list_insert_head(&ev->ev_ze_list, ze);
754			break;
755		}
756	}
757
758	if (ev == NULL)
759		error = ENOENT;
760
761out:
762	mutex_exit(&zevent_lock);
763
764	return (error);
765}
766
767void
768zfs_zevent_init(zfs_zevent_t **zep)
769{
770	zfs_zevent_t *ze;
771
772	ze = *zep = kmem_zalloc(sizeof (zfs_zevent_t), KM_SLEEP);
773	list_link_init(&ze->ze_node);
774}
775
776void
777zfs_zevent_destroy(zfs_zevent_t *ze)
778{
779	mutex_enter(&zevent_lock);
780	if (ze->ze_zevent)
781		list_remove(&ze->ze_zevent->ev_ze_list, ze);
782	mutex_exit(&zevent_lock);
783
784	kmem_free(ze, sizeof (zfs_zevent_t));
785}
786#endif /* _KERNEL */
787
788/*
789 * Wrappers for FM nvlist allocators
790 */
791/* ARGSUSED */
792static void *
793i_fm_alloc(nv_alloc_t *nva, size_t size)
794{
795	return (kmem_zalloc(size, KM_SLEEP));
796}
797
798/* ARGSUSED */
799static void
800i_fm_free(nv_alloc_t *nva, void *buf, size_t size)
801{
802	kmem_free(buf, size);
803}
804
805const nv_alloc_ops_t fm_mem_alloc_ops = {
806	.nv_ao_init = NULL,
807	.nv_ao_fini = NULL,
808	.nv_ao_alloc = i_fm_alloc,
809	.nv_ao_free = i_fm_free,
810	.nv_ao_reset = NULL
811};
812
813/*
814 * Create and initialize a new nv_alloc_t for a fixed buffer, buf.  A pointer
815 * to the newly allocated nv_alloc_t structure is returned upon success or NULL
816 * is returned to indicate that the nv_alloc structure could not be created.
817 */
818nv_alloc_t *
819fm_nva_xcreate(char *buf, size_t bufsz)
820{
821	nv_alloc_t *nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
822
823	if (bufsz == 0 || nv_alloc_init(nvhdl, nv_fixed_ops, buf, bufsz) != 0) {
824		kmem_free(nvhdl, sizeof (nv_alloc_t));
825		return (NULL);
826	}
827
828	return (nvhdl);
829}
830
831/*
832 * Destroy a previously allocated nv_alloc structure.  The fixed buffer
833 * associated with nva must be freed by the caller.
834 */
835void
836fm_nva_xdestroy(nv_alloc_t *nva)
837{
838	nv_alloc_fini(nva);
839	kmem_free(nva, sizeof (nv_alloc_t));
840}
841
842/*
843 * Create a new nv list.  A pointer to a new nv list structure is returned
844 * upon success or NULL is returned to indicate that the structure could
845 * not be created.  The newly created nv list is created and managed by the
846 * operations installed in nva.   If nva is NULL, the default FMA nva
847 * operations are installed and used.
848 *
849 * When called from the kernel and nva == NULL, this function must be called
850 * from passive kernel context with no locks held that can prevent a
851 * sleeping memory allocation from occurring.  Otherwise, this function may
852 * be called from other kernel contexts as long a valid nva created via
853 * fm_nva_create() is supplied.
854 */
855nvlist_t *
856fm_nvlist_create(nv_alloc_t *nva)
857{
858	int hdl_alloced = 0;
859	nvlist_t *nvl;
860	nv_alloc_t *nvhdl;
861
862	if (nva == NULL) {
863		nvhdl = kmem_zalloc(sizeof (nv_alloc_t), KM_SLEEP);
864
865		if (nv_alloc_init(nvhdl, &fm_mem_alloc_ops, NULL, 0) != 0) {
866			kmem_free(nvhdl, sizeof (nv_alloc_t));
867			return (NULL);
868		}
869		hdl_alloced = 1;
870	} else {
871		nvhdl = nva;
872	}
873
874	if (nvlist_xalloc(&nvl, NV_UNIQUE_NAME, nvhdl) != 0) {
875		if (hdl_alloced) {
876			nv_alloc_fini(nvhdl);
877			kmem_free(nvhdl, sizeof (nv_alloc_t));
878		}
879		return (NULL);
880	}
881
882	return (nvl);
883}
884
885/*
886 * Destroy a previously allocated nvlist structure.  flag indicates whether
887 * or not the associated nva structure should be freed (FM_NVA_FREE) or
888 * retained (FM_NVA_RETAIN).  Retaining the nv alloc structure allows
889 * it to be re-used for future nvlist creation operations.
890 */
891void
892fm_nvlist_destroy(nvlist_t *nvl, int flag)
893{
894	nv_alloc_t *nva = nvlist_lookup_nv_alloc(nvl);
895
896	nvlist_free(nvl);
897
898	if (nva != NULL) {
899		if (flag == FM_NVA_FREE)
900			fm_nva_xdestroy(nva);
901	}
902}
903
904int
905i_fm_payload_set(nvlist_t *payload, const char *name, va_list ap)
906{
907	int nelem, ret = 0;
908	data_type_t type;
909
910	while (ret == 0 && name != NULL) {
911		type = va_arg(ap, data_type_t);
912		switch (type) {
913		case DATA_TYPE_BYTE:
914			ret = nvlist_add_byte(payload, name,
915			    va_arg(ap, uint_t));
916			break;
917		case DATA_TYPE_BYTE_ARRAY:
918			nelem = va_arg(ap, int);
919			ret = nvlist_add_byte_array(payload, name,
920			    va_arg(ap, uchar_t *), nelem);
921			break;
922		case DATA_TYPE_BOOLEAN_VALUE:
923			ret = nvlist_add_boolean_value(payload, name,
924			    va_arg(ap, boolean_t));
925			break;
926		case DATA_TYPE_BOOLEAN_ARRAY:
927			nelem = va_arg(ap, int);
928			ret = nvlist_add_boolean_array(payload, name,
929			    va_arg(ap, boolean_t *), nelem);
930			break;
931		case DATA_TYPE_INT8:
932			ret = nvlist_add_int8(payload, name,
933			    va_arg(ap, int));
934			break;
935		case DATA_TYPE_INT8_ARRAY:
936			nelem = va_arg(ap, int);
937			ret = nvlist_add_int8_array(payload, name,
938			    va_arg(ap, int8_t *), nelem);
939			break;
940		case DATA_TYPE_UINT8:
941			ret = nvlist_add_uint8(payload, name,
942			    va_arg(ap, uint_t));
943			break;
944		case DATA_TYPE_UINT8_ARRAY:
945			nelem = va_arg(ap, int);
946			ret = nvlist_add_uint8_array(payload, name,
947			    va_arg(ap, uint8_t *), nelem);
948			break;
949		case DATA_TYPE_INT16:
950			ret = nvlist_add_int16(payload, name,
951			    va_arg(ap, int));
952			break;
953		case DATA_TYPE_INT16_ARRAY:
954			nelem = va_arg(ap, int);
955			ret = nvlist_add_int16_array(payload, name,
956			    va_arg(ap, int16_t *), nelem);
957			break;
958		case DATA_TYPE_UINT16:
959			ret = nvlist_add_uint16(payload, name,
960			    va_arg(ap, uint_t));
961			break;
962		case DATA_TYPE_UINT16_ARRAY:
963			nelem = va_arg(ap, int);
964			ret = nvlist_add_uint16_array(payload, name,
965			    va_arg(ap, uint16_t *), nelem);
966			break;
967		case DATA_TYPE_INT32:
968			ret = nvlist_add_int32(payload, name,
969			    va_arg(ap, int32_t));
970			break;
971		case DATA_TYPE_INT32_ARRAY:
972			nelem = va_arg(ap, int);
973			ret = nvlist_add_int32_array(payload, name,
974			    va_arg(ap, int32_t *), nelem);
975			break;
976		case DATA_TYPE_UINT32:
977			ret = nvlist_add_uint32(payload, name,
978			    va_arg(ap, uint32_t));
979			break;
980		case DATA_TYPE_UINT32_ARRAY:
981			nelem = va_arg(ap, int);
982			ret = nvlist_add_uint32_array(payload, name,
983			    va_arg(ap, uint32_t *), nelem);
984			break;
985		case DATA_TYPE_INT64:
986			ret = nvlist_add_int64(payload, name,
987			    va_arg(ap, int64_t));
988			break;
989		case DATA_TYPE_INT64_ARRAY:
990			nelem = va_arg(ap, int);
991			ret = nvlist_add_int64_array(payload, name,
992			    va_arg(ap, int64_t *), nelem);
993			break;
994		case DATA_TYPE_UINT64:
995			ret = nvlist_add_uint64(payload, name,
996			    va_arg(ap, uint64_t));
997			break;
998		case DATA_TYPE_UINT64_ARRAY:
999			nelem = va_arg(ap, int);
1000			ret = nvlist_add_uint64_array(payload, name,
1001			    va_arg(ap, uint64_t *), nelem);
1002			break;
1003		case DATA_TYPE_STRING:
1004			ret = nvlist_add_string(payload, name,
1005			    va_arg(ap, char *));
1006			break;
1007		case DATA_TYPE_STRING_ARRAY:
1008			nelem = va_arg(ap, int);
1009			ret = nvlist_add_string_array(payload, name,
1010			    va_arg(ap, char **), nelem);
1011			break;
1012		case DATA_TYPE_NVLIST:
1013			ret = nvlist_add_nvlist(payload, name,
1014			    va_arg(ap, nvlist_t *));
1015			break;
1016		case DATA_TYPE_NVLIST_ARRAY:
1017			nelem = va_arg(ap, int);
1018			ret = nvlist_add_nvlist_array(payload, name,
1019			    va_arg(ap, nvlist_t **), nelem);
1020			break;
1021		default:
1022			ret = EINVAL;
1023		}
1024
1025		name = va_arg(ap, char *);
1026	}
1027	return (ret);
1028}
1029
1030void
1031fm_payload_set(nvlist_t *payload, ...)
1032{
1033	int ret;
1034	const char *name;
1035	va_list ap;
1036
1037	va_start(ap, payload);
1038	name = va_arg(ap, char *);
1039	ret = i_fm_payload_set(payload, name, ap);
1040	va_end(ap);
1041
1042	if (ret)
1043		atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);
1044}
1045
1046/*
1047 * Set-up and validate the members of an ereport event according to:
1048 *
1049 *	Member name		Type		Value
1050 *	====================================================
1051 *	class			string		ereport
1052 *	version			uint8_t		0
1053 *	ena			uint64_t	<ena>
1054 *	detector		nvlist_t	<detector>
1055 *	ereport-payload		nvlist_t	<var args>
1056 *
1057 * We don't actually add a 'version' member to the payload.  Really,
1058 * the version quoted to us by our caller is that of the category 1
1059 * "ereport" event class (and we require FM_EREPORT_VERS0) but
1060 * the payload version of the actual leaf class event under construction
1061 * may be something else.  Callers should supply a version in the varargs,
1062 * or (better) we could take two version arguments - one for the
1063 * ereport category 1 classification (expect FM_EREPORT_VERS0) and one
1064 * for the leaf class.
1065 */
1066void
1067fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
1068    uint64_t ena, const nvlist_t *detector, ...)
1069{
1070	char ereport_class[FM_MAX_CLASS];
1071	const char *name;
1072	va_list ap;
1073	int ret;
1074
1075	if (version != FM_EREPORT_VERS0) {
1076		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1077		return;
1078	}
1079
1080	(void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
1081	    FM_EREPORT_CLASS, erpt_class);
1082	if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
1083		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1084		return;
1085	}
1086
1087	if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
1088		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1089	}
1090
1091	if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
1092	    (nvlist_t *)detector) != 0) {
1093		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1094	}
1095
1096	va_start(ap, detector);
1097	name = va_arg(ap, const char *);
1098	ret = i_fm_payload_set(ereport, name, ap);
1099	va_end(ap);
1100
1101	if (ret)
1102		atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
1103}
1104
1105/*
1106 * Set-up and validate the members of an hc fmri according to;
1107 *
1108 *	Member name		Type		Value
1109 *	===================================================
1110 *	version			uint8_t		0
1111 *	auth			nvlist_t	<auth>
1112 *	hc-name			string		<name>
1113 *	hc-id			string		<id>
1114 *
1115 * Note that auth and hc-id are optional members.
1116 */
1117
1118#define	HC_MAXPAIRS	20
1119#define	HC_MAXNAMELEN	50
1120
1121static int
1122fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
1123{
1124	if (version != FM_HC_SCHEME_VERSION) {
1125		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1126		return (0);
1127	}
1128
1129	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
1130	    nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
1131		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1132		return (0);
1133	}
1134
1135	if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1136	    (nvlist_t *)auth) != 0) {
1137		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1138		return (0);
1139	}
1140
1141	return (1);
1142}
1143
1144void
1145fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1146    nvlist_t *snvl, int npairs, ...)
1147{
1148	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1149	nvlist_t *pairs[HC_MAXPAIRS];
1150	va_list ap;
1151	int i;
1152
1153	if (!fm_fmri_hc_set_common(fmri, version, auth))
1154		return;
1155
1156	npairs = MIN(npairs, HC_MAXPAIRS);
1157
1158	va_start(ap, npairs);
1159	for (i = 0; i < npairs; i++) {
1160		const char *name = va_arg(ap, const char *);
1161		uint32_t id = va_arg(ap, uint32_t);
1162		char idstr[11];
1163
1164		(void) snprintf(idstr, sizeof (idstr), "%u", id);
1165
1166		pairs[i] = fm_nvlist_create(nva);
1167		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1168		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1169			atomic_inc_64(
1170			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1171		}
1172	}
1173	va_end(ap);
1174
1175	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
1176		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1177
1178	for (i = 0; i < npairs; i++)
1179		fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1180
1181	if (snvl != NULL) {
1182		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
1183			atomic_inc_64(
1184			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1185		}
1186	}
1187}
1188
1189void
1190fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
1191    nvlist_t *snvl, nvlist_t *bboard, int npairs, ...)
1192{
1193	nv_alloc_t *nva = nvlist_lookup_nv_alloc(fmri);
1194	nvlist_t *pairs[HC_MAXPAIRS];
1195	nvlist_t **hcl;
1196	uint_t n;
1197	int i, j;
1198	va_list ap;
1199	char *hcname, *hcid;
1200
1201	if (!fm_fmri_hc_set_common(fmri, version, auth))
1202		return;
1203
1204	/*
1205	 * copy the bboard nvpairs to the pairs array
1206	 */
1207	if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
1208	    != 0) {
1209		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1210		return;
1211	}
1212
1213	for (i = 0; i < n; i++) {
1214		if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
1215		    &hcname) != 0) {
1216			atomic_inc_64(
1217			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1218			return;
1219		}
1220		if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
1221			atomic_inc_64(
1222			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1223			return;
1224		}
1225
1226		pairs[i] = fm_nvlist_create(nva);
1227		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, hcname) != 0 ||
1228		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, hcid) != 0) {
1229			for (j = 0; j <= i; j++) {
1230				if (pairs[j] != NULL)
1231					fm_nvlist_destroy(pairs[j],
1232					    FM_NVA_RETAIN);
1233			}
1234			atomic_inc_64(
1235			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1236			return;
1237		}
1238	}
1239
1240	/*
1241	 * create the pairs from passed in pairs
1242	 */
1243	npairs = MIN(npairs, HC_MAXPAIRS);
1244
1245	va_start(ap, npairs);
1246	for (i = n; i < npairs + n; i++) {
1247		const char *name = va_arg(ap, const char *);
1248		uint32_t id = va_arg(ap, uint32_t);
1249		char idstr[11];
1250		(void) snprintf(idstr, sizeof (idstr), "%u", id);
1251		pairs[i] = fm_nvlist_create(nva);
1252		if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
1253		    nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
1254			for (j = 0; j <= i; j++) {
1255				if (pairs[j] != NULL)
1256					fm_nvlist_destroy(pairs[j],
1257					    FM_NVA_RETAIN);
1258			}
1259			atomic_inc_64(
1260			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1261			return;
1262		}
1263	}
1264	va_end(ap);
1265
1266	/*
1267	 * Create the fmri hc list
1268	 */
1269	if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
1270	    npairs + n) != 0) {
1271		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1272		return;
1273	}
1274
1275	for (i = 0; i < npairs + n; i++) {
1276			fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
1277	}
1278
1279	if (snvl != NULL) {
1280		if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
1281			atomic_inc_64(
1282			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1283			return;
1284		}
1285	}
1286}
1287
1288/*
1289 * Set-up and validate the members of an dev fmri according to:
1290 *
1291 *	Member name		Type		Value
1292 *	====================================================
1293 *	version			uint8_t		0
1294 *	auth			nvlist_t	<auth>
1295 *	devpath			string		<devpath>
1296 *	[devid]			string		<devid>
1297 *	[target-port-l0id]	string		<target-port-lun0-id>
1298 *
1299 * Note that auth and devid are optional members.
1300 */
1301void
1302fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
1303    const char *devpath, const char *devid, const char *tpl0)
1304{
1305	int err = 0;
1306
1307	if (version != DEV_SCHEME_VERSION0) {
1308		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1309		return;
1310	}
1311
1312	err |= nvlist_add_uint8(fmri_dev, FM_VERSION, version);
1313	err |= nvlist_add_string(fmri_dev, FM_FMRI_SCHEME, FM_FMRI_SCHEME_DEV);
1314
1315	if (auth != NULL) {
1316		err |= nvlist_add_nvlist(fmri_dev, FM_FMRI_AUTHORITY,
1317		    (nvlist_t *)auth);
1318	}
1319
1320	err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_PATH, devpath);
1321
1322	if (devid != NULL)
1323		err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_ID, devid);
1324
1325	if (tpl0 != NULL)
1326		err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
1327
1328	if (err)
1329		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1330
1331}
1332
1333/*
1334 * Set-up and validate the members of an cpu fmri according to:
1335 *
1336 *	Member name		Type		Value
1337 *	====================================================
1338 *	version			uint8_t		0
1339 *	auth			nvlist_t	<auth>
1340 *	cpuid			uint32_t	<cpu_id>
1341 *	cpumask			uint8_t		<cpu_mask>
1342 *	serial			uint64_t	<serial_id>
1343 *
1344 * Note that auth, cpumask, serial are optional members.
1345 *
1346 */
1347void
1348fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
1349    uint32_t cpu_id, uint8_t *cpu_maskp, const char *serial_idp)
1350{
1351	uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
1352
1353	if (version < CPU_SCHEME_VERSION1) {
1354		atomic_inc_64(failedp);
1355		return;
1356	}
1357
1358	if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
1359		atomic_inc_64(failedp);
1360		return;
1361	}
1362
1363	if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
1364	    FM_FMRI_SCHEME_CPU) != 0) {
1365		atomic_inc_64(failedp);
1366		return;
1367	}
1368
1369	if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
1370	    (nvlist_t *)auth) != 0)
1371		atomic_inc_64(failedp);
1372
1373	if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
1374		atomic_inc_64(failedp);
1375
1376	if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
1377	    *cpu_maskp) != 0)
1378		atomic_inc_64(failedp);
1379
1380	if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
1381	    FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
1382			atomic_inc_64(failedp);
1383}
1384
1385/*
1386 * Set-up and validate the members of a mem according to:
1387 *
1388 *	Member name		Type		Value
1389 *	====================================================
1390 *	version			uint8_t		0
1391 *	auth			nvlist_t	<auth>		[optional]
1392 *	unum			string		<unum>
1393 *	serial			string		<serial>	[optional*]
1394 *	offset			uint64_t	<offset>	[optional]
1395 *
1396 *	* serial is required if offset is present
1397 */
1398void
1399fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
1400    const char *unum, const char *serial, uint64_t offset)
1401{
1402	if (version != MEM_SCHEME_VERSION0) {
1403		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1404		return;
1405	}
1406
1407	if (!serial && (offset != (uint64_t)-1)) {
1408		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1409		return;
1410	}
1411
1412	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1413		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1414		return;
1415	}
1416
1417	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
1418		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1419		return;
1420	}
1421
1422	if (auth != NULL) {
1423		if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
1424		    (nvlist_t *)auth) != 0) {
1425			atomic_inc_64(
1426			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1427		}
1428	}
1429
1430	if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
1431		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1432	}
1433
1434	if (serial != NULL) {
1435		if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
1436		    (char **)&serial, 1) != 0) {
1437			atomic_inc_64(
1438			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1439		}
1440		if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,
1441		    FM_FMRI_MEM_OFFSET, offset) != 0) {
1442			atomic_inc_64(
1443			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1444		}
1445	}
1446}
1447
1448void
1449fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
1450    uint64_t vdev_guid)
1451{
1452	if (version != ZFS_SCHEME_VERSION0) {
1453		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1454		return;
1455	}
1456
1457	if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
1458		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1459		return;
1460	}
1461
1462	if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
1463		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1464		return;
1465	}
1466
1467	if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
1468		atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
1469	}
1470
1471	if (vdev_guid != 0) {
1472		if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
1473			atomic_inc_64(
1474			    &erpt_kstat_data.fmri_set_failed.value.ui64);
1475		}
1476	}
1477}
1478
1479uint64_t
1480fm_ena_increment(uint64_t ena)
1481{
1482	uint64_t new_ena;
1483
1484	switch (ENA_FORMAT(ena)) {
1485	case FM_ENA_FMT1:
1486		new_ena = ena + (1 << ENA_FMT1_GEN_SHFT);
1487		break;
1488	case FM_ENA_FMT2:
1489		new_ena = ena + (1 << ENA_FMT2_GEN_SHFT);
1490		break;
1491	default:
1492		new_ena = 0;
1493	}
1494
1495	return (new_ena);
1496}
1497
1498uint64_t
1499fm_ena_generate_cpu(uint64_t timestamp, processorid_t cpuid, uchar_t format)
1500{
1501	uint64_t ena = 0;
1502
1503	switch (format) {
1504	case FM_ENA_FMT1:
1505		if (timestamp) {
1506			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1507			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
1508			    ENA_FMT1_CPUID_MASK) |
1509			    ((timestamp << ENA_FMT1_TIME_SHFT) &
1510			    ENA_FMT1_TIME_MASK));
1511		} else {
1512			ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1513			    ((cpuid << ENA_FMT1_CPUID_SHFT) &
1514			    ENA_FMT1_CPUID_MASK) |
1515			    ((gethrtime() << ENA_FMT1_TIME_SHFT) &
1516			    ENA_FMT1_TIME_MASK));
1517		}
1518		break;
1519	case FM_ENA_FMT2:
1520		ena = (uint64_t)((format & ENA_FORMAT_MASK) |
1521		    ((timestamp << ENA_FMT2_TIME_SHFT) & ENA_FMT2_TIME_MASK));
1522		break;
1523	default:
1524		break;
1525	}
1526
1527	return (ena);
1528}
1529
1530uint64_t
1531fm_ena_generate(uint64_t timestamp, uchar_t format)
1532{
1533	uint64_t ena;
1534
1535	kpreempt_disable();
1536	ena = fm_ena_generate_cpu(timestamp, getcpuid(), format);
1537	kpreempt_enable();
1538
1539	return (ena);
1540}
1541
1542uint64_t
1543fm_ena_generation_get(uint64_t ena)
1544{
1545	uint64_t gen;
1546
1547	switch (ENA_FORMAT(ena)) {
1548	case FM_ENA_FMT1:
1549		gen = (ena & ENA_FMT1_GEN_MASK) >> ENA_FMT1_GEN_SHFT;
1550		break;
1551	case FM_ENA_FMT2:
1552		gen = (ena & ENA_FMT2_GEN_MASK) >> ENA_FMT2_GEN_SHFT;
1553		break;
1554	default:
1555		gen = 0;
1556		break;
1557	}
1558
1559	return (gen);
1560}
1561
1562uchar_t
1563fm_ena_format_get(uint64_t ena)
1564{
1565
1566	return (ENA_FORMAT(ena));
1567}
1568
1569uint64_t
1570fm_ena_id_get(uint64_t ena)
1571{
1572	uint64_t id;
1573
1574	switch (ENA_FORMAT(ena)) {
1575	case FM_ENA_FMT1:
1576		id = (ena & ENA_FMT1_ID_MASK) >> ENA_FMT1_ID_SHFT;
1577		break;
1578	case FM_ENA_FMT2:
1579		id = (ena & ENA_FMT2_ID_MASK) >> ENA_FMT2_ID_SHFT;
1580		break;
1581	default:
1582		id = 0;
1583	}
1584
1585	return (id);
1586}
1587
1588uint64_t
1589fm_ena_time_get(uint64_t ena)
1590{
1591	uint64_t time;
1592
1593	switch (ENA_FORMAT(ena)) {
1594	case FM_ENA_FMT1:
1595		time = (ena & ENA_FMT1_TIME_MASK) >> ENA_FMT1_TIME_SHFT;
1596		break;
1597	case FM_ENA_FMT2:
1598		time = (ena & ENA_FMT2_TIME_MASK) >> ENA_FMT2_TIME_SHFT;
1599		break;
1600	default:
1601		time = 0;
1602	}
1603
1604	return (time);
1605}
1606
1607#ifdef _KERNEL
1608/*
1609 * Helper function to increment ereport dropped count.  Used by the event
1610 * rate limiting code to give feedback to the user about how many events were
1611 * rate limited by including them in the 'dropped' count.
1612 */
1613void
1614fm_erpt_dropped_increment(void)
1615{
1616	atomic_inc_64(&ratelimit_dropped);
1617}
1618
1619void
1620fm_init(void)
1621{
1622	zevent_len_cur = 0;
1623	zevent_flags = 0;
1624
1625	if (zfs_zevent_len_max == 0)
1626		zfs_zevent_len_max = ERPT_MAX_ERRS * MAX(max_ncpus, 4);
1627
1628	/* Initialize zevent allocation and generation kstats */
1629	fm_ksp = kstat_create("zfs", 0, "fm", "misc", KSTAT_TYPE_NAMED,
1630	    sizeof (struct erpt_kstat) / sizeof (kstat_named_t),
1631	    KSTAT_FLAG_VIRTUAL);
1632
1633	if (fm_ksp != NULL) {
1634		fm_ksp->ks_data = &erpt_kstat_data;
1635		kstat_install(fm_ksp);
1636	} else {
1637		cmn_err(CE_NOTE, "failed to create fm/misc kstat\n");
1638	}
1639
1640	mutex_init(&zevent_lock, NULL, MUTEX_DEFAULT, NULL);
1641	list_create(&zevent_list, sizeof (zevent_t),
1642	    offsetof(zevent_t, ev_node));
1643	cv_init(&zevent_cv, NULL, CV_DEFAULT, NULL);
1644
1645	zfs_ereport_init();
1646}
1647
1648void
1649fm_fini(void)
1650{
1651	int count;
1652
1653	zfs_ereport_fini();
1654
1655	zfs_zevent_drain_all(&count);
1656
1657	mutex_enter(&zevent_lock);
1658	cv_broadcast(&zevent_cv);
1659
1660	zevent_flags |= ZEVENT_SHUTDOWN;
1661	while (zevent_waiters > 0) {
1662		mutex_exit(&zevent_lock);
1663		schedule();
1664		mutex_enter(&zevent_lock);
1665	}
1666	mutex_exit(&zevent_lock);
1667
1668	cv_destroy(&zevent_cv);
1669	list_destroy(&zevent_list);
1670	mutex_destroy(&zevent_lock);
1671
1672	if (fm_ksp != NULL) {
1673		kstat_delete(fm_ksp);
1674		fm_ksp = NULL;
1675	}
1676}
1677#endif /* _KERNEL */
1678
1679ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, len_max, INT, ZMOD_RW,
1680	"Max event queue length");
1681
1682ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, cols, INT, ZMOD_RW,
1683	"Max event column width");
1684
1685ZFS_MODULE_PARAM(zfs_zevent, zfs_zevent_, console, INT, ZMOD_RW,
1686	"Log events to the console");
1687