xref: /illumos-gate/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c (revision 4263d13f00c9691fa14620eff82abef795be0693)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2012 by Delphix. All rights reserved.
24  */
25 
26 /*
27  * ZFS syseventd module.
28  *
29  * The purpose of this module is to identify when devices are added to the
30  * system, and appropriately online or replace the affected vdevs.
31  *
32  * When a device is added to the system:
33  *
34  * 	1. Search for any vdevs whose devid matches that of the newly added
35  *	   device.
36  *
37  * 	2. If no vdevs are found, then search for any vdevs whose devfs path
38  *	   matches that of the new device.
39  *
40  *	3. If no vdevs match by either method, then ignore the event.
41  *
42  * 	4. Attempt to online the device with a flag to indicate that it should
43  *	   be unspared when resilvering completes.  If this succeeds, then the
44  *	   same device was inserted and we should continue normally.
45  *
46  *	5. If the pool does not have the 'autoreplace' property set, attempt to
47  *	   online the device again without the unspare flag, which will
48  *	   generate a FMA fault.
49  *
50  *	6. If the pool has the 'autoreplace' property set, and the matching vdev
51  *	   is a whole disk, then label the new disk and attempt a 'zpool
52  *	   replace'.
53  *
54  * The module responds to EC_DEV_ADD events for both disks and lofi devices,
55  * with the latter used for testing.  The special ESC_ZFS_VDEV_CHECK event
56  * indicates that a device failed to open during pool load, but the autoreplace
57  * property was set.  In this case, we deferred the associated FMA fault until
58  * our module had a chance to process the autoreplace logic.  If the device
59  * could not be replaced, then the second online attempt will trigger the FMA
60  * fault that we skipped earlier.
61  */
62 
63 #include <alloca.h>
64 #include <devid.h>
65 #include <fcntl.h>
66 #include <libnvpair.h>
67 #include <libsysevent.h>
68 #include <libzfs.h>
69 #include <limits.h>
70 #include <stdlib.h>
71 #include <string.h>
72 #include <syslog.h>
73 #include <sys/list.h>
74 #include <sys/sunddi.h>
75 #include <sys/sysevent/eventdefs.h>
76 #include <sys/sysevent/dev.h>
77 #include <thread_pool.h>
78 #include <unistd.h>
79 #include "syseventd.h"
80 
81 #if defined(__i386) || defined(__amd64)
82 #define	PHYS_PATH	":q"
83 #define	RAW_SLICE	"p0"
84 #elif defined(__sparc)
85 #define	PHYS_PATH	":c"
86 #define	RAW_SLICE	"s2"
87 #else
88 #error Unknown architecture
89 #endif
90 
91 typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
92 
93 libzfs_handle_t *g_zfshdl;
94 list_t g_pool_list;
95 tpool_t *g_tpool;
96 boolean_t g_enumeration_done;
97 thread_t g_zfs_tid;
98 
99 typedef struct unavailpool {
100 	zpool_handle_t	*uap_zhp;
101 	list_node_t	uap_node;
102 } unavailpool_t;
103 
104 int
105 zfs_toplevel_state(zpool_handle_t *zhp)
106 {
107 	nvlist_t *nvroot;
108 	vdev_stat_t *vs;
109 	unsigned int c;
110 
111 	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
112 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
113 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
114 	    (uint64_t **)&vs, &c) == 0);
115 	return (vs->vs_state);
116 }
117 
118 static int
119 zfs_unavail_pool(zpool_handle_t *zhp, void *data)
120 {
121 	if (zfs_toplevel_state(zhp) < VDEV_STATE_DEGRADED) {
122 		unavailpool_t *uap;
123 		uap = malloc(sizeof (unavailpool_t));
124 		uap->uap_zhp = zhp;
125 		list_insert_tail((list_t *)data, uap);
126 	} else {
127 		zpool_close(zhp);
128 	}
129 	return (0);
130 }
131 
132 /*
133  * The device associated with the given vdev (either by devid or physical path)
134  * has been added to the system.  If 'isdisk' is set, then we only attempt a
135  * replacement if it's a whole disk.  This also implies that we should label the
136  * disk first.
137  *
138  * First, we attempt to online the device (making sure to undo any spare
139  * operation when finished).  If this succeeds, then we're done.  If it fails,
140  * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
141  * but that the label was not what we expected.  If the 'autoreplace' property
142  * is not set, then we relabel the disk (if specified), and attempt a 'zpool
143  * replace'.  If the online is successful, but the new state is something else
144  * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
145  * race, and we should avoid attempting to relabel the disk.
146  */
147 static void
148 zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
149 {
150 	char *path;
151 	vdev_state_t newstate;
152 	nvlist_t *nvroot, *newvd;
153 	uint64_t wholedisk = 0ULL;
154 	char *physpath = NULL;
155 	char rawpath[PATH_MAX], fullpath[PATH_MAX];
156 	size_t len;
157 
158 	if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
159 		return;
160 
161 	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
162 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
163 
164 	/*
165 	 * We should have a way to online a device by guid.  With the current
166 	 * interface, we are forced to chop off the 's0' for whole disks.
167 	 */
168 	(void) strlcpy(fullpath, path, sizeof (fullpath));
169 	if (wholedisk)
170 		fullpath[strlen(fullpath) - 2] = '\0';
171 
172 	/*
173 	 * Attempt to online the device.  It would be nice to online this by
174 	 * GUID, but the current interface only supports lookup by path.
175 	 */
176 	if (zpool_vdev_online(zhp, fullpath,
177 	    ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
178 	    (newstate == VDEV_STATE_HEALTHY || newstate == VDEV_STATE_DEGRADED))
179 		return;
180 
181 	/*
182 	 * If the pool doesn't have the autoreplace property set, then attempt a
183 	 * true online (without the unspare flag), which will trigger a FMA
184 	 * fault.
185 	 */
186 	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
187 	    (isdisk && !wholedisk)) {
188 		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
189 		    &newstate);
190 		return;
191 	}
192 
193 	if (isdisk) {
194 		/*
195 		 * If this is a request to label a whole disk, then attempt to
196 		 * write out the label.  Before we can label the disk, we need
197 		 * access to a raw node.  Ideally, we'd like to walk the devinfo
198 		 * tree and find a raw node from the corresponding parent node.
199 		 * This is overly complicated, and since we know how we labeled
200 		 * this device in the first place, we know it's save to switch
201 		 * from /dev/dsk to /dev/rdsk and append the backup slice.
202 		 *
203 		 * If any part of this process fails, then do a force online to
204 		 * trigger a ZFS fault for the device (and any hot spare
205 		 * replacement).
206 		 */
207 		if (strncmp(path, "/dev/dsk/", 9) != 0) {
208 			(void) zpool_vdev_online(zhp, fullpath,
209 			    ZFS_ONLINE_FORCEFAULT, &newstate);
210 			return;
211 		}
212 
213 		(void) strlcpy(rawpath, path + 9, sizeof (rawpath));
214 		len = strlen(rawpath);
215 		rawpath[len - 2] = '\0';
216 
217 		if (zpool_label_disk(g_zfshdl, zhp, rawpath) != 0) {
218 			(void) zpool_vdev_online(zhp, fullpath,
219 			    ZFS_ONLINE_FORCEFAULT, &newstate);
220 			return;
221 		}
222 	}
223 
224 	/*
225 	 * Cosntruct the root vdev to pass to zpool_vdev_attach().  While adding
226 	 * the entire vdev structure is harmless, we construct a reduced set of
227 	 * path/physpath/wholedisk to keep it simple.
228 	 */
229 	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
230 		return;
231 
232 	if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
233 		nvlist_free(nvroot);
234 		return;
235 	}
236 
237 	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
238 	    nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 ||
239 	    (physpath != NULL && nvlist_add_string(newvd,
240 	    ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) ||
241 	    nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
242 	    nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
243 	    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd,
244 	    1) != 0) {
245 		nvlist_free(newvd);
246 		nvlist_free(nvroot);
247 		return;
248 	}
249 
250 	nvlist_free(newvd);
251 
252 	(void) zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
253 
254 	nvlist_free(nvroot);
255 
256 }
257 
258 /*
259  * Utility functions to find a vdev matching given criteria.
260  */
261 typedef struct dev_data {
262 	const char		*dd_compare;
263 	const char		*dd_prop;
264 	zfs_process_func_t	dd_func;
265 	boolean_t		dd_found;
266 	boolean_t		dd_isdisk;
267 	uint64_t		dd_pool_guid;
268 	uint64_t		dd_vdev_guid;
269 } dev_data_t;
270 
271 static void
272 zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
273 {
274 	dev_data_t *dp = data;
275 	char *path;
276 	uint_t c, children;
277 	nvlist_t **child;
278 	size_t len;
279 	uint64_t guid;
280 
281 	/*
282 	 * First iterate over any children.
283 	 */
284 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
285 	    &child, &children) == 0) {
286 		for (c = 0; c < children; c++)
287 			zfs_iter_vdev(zhp, child[c], data);
288 		return;
289 	}
290 
291 	if (dp->dd_vdev_guid != 0) {
292 		if (nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
293 		    &guid) != 0 || guid != dp->dd_vdev_guid)
294 			return;
295 	} else {
296 		len = strlen(dp->dd_compare);
297 
298 		if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
299 		    strncmp(dp->dd_compare, path, len) != 0)
300 			return;
301 
302 		/*
303 		 * Normally, we want to have an exact match for the comparison
304 		 * string.  However, we allow substring matches in the following
305 		 * cases:
306 		 *
307 		 * 	<path>:		This is a devpath, and the target is one
308 		 * 			of its children.
309 		 *
310 		 * 	<path/>		This is a devid for a whole disk, and
311 		 * 			the target is one of its children.
312 		 */
313 		if (path[len] != '\0' && path[len] != ':' &&
314 		    path[len - 1] != '/')
315 			return;
316 	}
317 
318 	(dp->dd_func)(zhp, nvl, dp->dd_isdisk);
319 }
320 
321 void
322 zfs_enable_ds(void *arg)
323 {
324 	unavailpool_t *pool = (unavailpool_t *)arg;
325 
326 	(void) zpool_enable_datasets(pool->uap_zhp, NULL, 0);
327 	zpool_close(pool->uap_zhp);
328 	free(pool);
329 }
330 
331 static int
332 zfs_iter_pool(zpool_handle_t *zhp, void *data)
333 {
334 	nvlist_t *config, *nvl;
335 	dev_data_t *dp = data;
336 	uint64_t pool_guid;
337 	unavailpool_t *pool;
338 
339 	if ((config = zpool_get_config(zhp, NULL)) != NULL) {
340 		if (dp->dd_pool_guid == 0 ||
341 		    (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
342 		    &pool_guid) == 0 && pool_guid == dp->dd_pool_guid)) {
343 			(void) nvlist_lookup_nvlist(config,
344 			    ZPOOL_CONFIG_VDEV_TREE, &nvl);
345 			zfs_iter_vdev(zhp, nvl, data);
346 		}
347 	}
348 	if (g_enumeration_done)  {
349 		for (pool = list_head(&g_pool_list); pool != NULL;
350 		    pool = list_next(&g_pool_list, pool)) {
351 
352 			if (strcmp(zpool_get_name(zhp),
353 			    zpool_get_name(pool->uap_zhp)))
354 				continue;
355 			if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
356 				list_remove(&g_pool_list, pool);
357 				(void) tpool_dispatch(g_tpool, zfs_enable_ds,
358 				    pool);
359 				break;
360 			}
361 		}
362 	}
363 
364 	zpool_close(zhp);
365 	return (0);
366 }
367 
368 /*
369  * Given a physical device path, iterate over all (pool, vdev) pairs which
370  * correspond to the given path.
371  */
372 static boolean_t
373 devpath_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
374 {
375 	dev_data_t data = { 0 };
376 
377 	data.dd_compare = devpath;
378 	data.dd_func = func;
379 	data.dd_prop = ZPOOL_CONFIG_PHYS_PATH;
380 	data.dd_found = B_FALSE;
381 	data.dd_isdisk = wholedisk;
382 
383 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
384 
385 	return (data.dd_found);
386 }
387 
388 /*
389  * Given a /devices path, lookup the corresponding devid for each minor node,
390  * and find any vdevs with matching devids.  Doing this straight up would be
391  * rather inefficient, O(minor nodes * vdevs in system), so we take advantage of
392  * the fact that each devid ends with "/<minornode>".  Once we find any valid
393  * minor node, we chop off the portion after the last slash, and then search for
394  * matching vdevs, which is O(vdevs in system).
395  */
396 static boolean_t
397 devid_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
398 {
399 	size_t len = strlen(devpath) + sizeof ("/devices") +
400 	    sizeof (PHYS_PATH) - 1;
401 	char *fullpath;
402 	int fd;
403 	ddi_devid_t devid;
404 	char *devidstr, *fulldevid;
405 	dev_data_t data = { 0 };
406 
407 	/*
408 	 * Try to open a known minor node.
409 	 */
410 	fullpath = alloca(len);
411 	(void) snprintf(fullpath, len, "/devices%s%s", devpath, PHYS_PATH);
412 	if ((fd = open(fullpath, O_RDONLY)) < 0)
413 		return (B_FALSE);
414 
415 	/*
416 	 * Determine the devid as a string, with no trailing slash for the minor
417 	 * node.
418 	 */
419 	if (devid_get(fd, &devid) != 0) {
420 		(void) close(fd);
421 		return (B_FALSE);
422 	}
423 	(void) close(fd);
424 
425 	if ((devidstr = devid_str_encode(devid, NULL)) == NULL) {
426 		devid_free(devid);
427 		return (B_FALSE);
428 	}
429 
430 	len = strlen(devidstr) + 2;
431 	fulldevid = alloca(len);
432 	(void) snprintf(fulldevid, len, "%s/", devidstr);
433 
434 	data.dd_compare = fulldevid;
435 	data.dd_func = func;
436 	data.dd_prop = ZPOOL_CONFIG_DEVID;
437 	data.dd_found = B_FALSE;
438 	data.dd_isdisk = wholedisk;
439 
440 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
441 
442 	devid_str_free(devidstr);
443 	devid_free(devid);
444 
445 	return (data.dd_found);
446 }
447 
448 /*
449  * This function is called when we receive a devfs add event.  This can be
450  * either a disk event or a lofi event, and the behavior is slightly different
451  * depending on which it is.
452  */
453 static int
454 zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
455 {
456 	char *devpath, *devname;
457 	char path[PATH_MAX], realpath[PATH_MAX];
458 	char *colon, *raw;
459 	int ret;
460 
461 	/*
462 	 * The main unit of operation is the physical device path.  For disks,
463 	 * this is the device node, as all minor nodes are affected.  For lofi
464 	 * devices, this includes the minor path.  Unfortunately, this isn't
465 	 * represented in the DEV_PHYS_PATH for various reasons.
466 	 */
467 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath) != 0)
468 		return (-1);
469 
470 	/*
471 	 * If this is a lofi device, then also get the minor instance name.
472 	 * Unfortunately, the current payload doesn't include an easy way to get
473 	 * this information.  So we cheat by resolving the 'dev_name' (which
474 	 * refers to the raw device) and taking the portion between ':(*),raw'.
475 	 */
476 	(void) strlcpy(realpath, devpath, sizeof (realpath));
477 	if (is_lofi) {
478 		if (nvlist_lookup_string(nvl, DEV_NAME,
479 		    &devname) == 0 &&
480 		    (ret = resolvepath(devname, path,
481 		    sizeof (path))) > 0) {
482 			path[ret] = '\0';
483 			colon = strchr(path, ':');
484 			if (colon != NULL)
485 				raw = strstr(colon + 1, ",raw");
486 			if (colon != NULL && raw != NULL) {
487 				*raw = '\0';
488 				(void) snprintf(realpath,
489 				    sizeof (realpath), "%s%s",
490 				    devpath, colon);
491 				*raw = ',';
492 			}
493 		}
494 	}
495 
496 	/*
497 	 * Iterate over all vdevs with a matching devid, and then those with a
498 	 * matching /devices path.  For disks, we only want to pay attention to
499 	 * vdevs marked as whole disks.  For lofi, we don't care (because we're
500 	 * matching an exact minor name).
501 	 */
502 	if (!devid_iter(realpath, zfs_process_add, !is_lofi))
503 		(void) devpath_iter(realpath, zfs_process_add, !is_lofi);
504 
505 	return (0);
506 }
507 
508 /*
509  * Called when we receive a VDEV_CHECK event, which indicates a device could not
510  * be opened during initial pool open, but the autoreplace property was set on
511  * the pool.  In this case, we treat it as if it were an add event.
512  */
513 static int
514 zfs_deliver_check(nvlist_t *nvl)
515 {
516 	dev_data_t data = { 0 };
517 
518 	if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID,
519 	    &data.dd_pool_guid) != 0 ||
520 	    nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID,
521 	    &data.dd_vdev_guid) != 0)
522 		return (0);
523 
524 	data.dd_isdisk = B_TRUE;
525 	data.dd_func = zfs_process_add;
526 
527 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
528 
529 	return (0);
530 }
531 
532 #define	DEVICE_PREFIX	"/devices"
533 
534 static int
535 zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
536 {
537 	char *devname = data;
538 	boolean_t avail_spare, l2cache;
539 	vdev_state_t newstate;
540 	nvlist_t *tgt;
541 
542 	syseventd_print(9, "zfsdle_vdev_online: searching for %s in pool %s\n",
543 	    devname, zpool_get_name(zhp));
544 
545 	if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
546 	    &avail_spare, &l2cache, NULL)) != NULL) {
547 		char *path, fullpath[MAXPATHLEN];
548 		uint64_t wholedisk = 0ULL;
549 
550 		verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
551 		    &path) == 0);
552 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
553 		    &wholedisk) == 0);
554 
555 		(void) strlcpy(fullpath, path, sizeof (fullpath));
556 		if (wholedisk) {
557 			fullpath[strlen(fullpath) - 2] = '\0';
558 
559 			/*
560 			 * We need to reopen the pool associated with this
561 			 * device so that the kernel can update the size
562 			 * of the expanded device.
563 			 */
564 			(void) zpool_reopen(zhp);
565 		}
566 
567 		if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
568 			syseventd_print(9, "zfsdle_vdev_online: setting device"
569 			    " device %s to ONLINE state in pool %s.\n",
570 			    fullpath, zpool_get_name(zhp));
571 			if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL)
572 				(void) zpool_vdev_online(zhp, fullpath, 0,
573 				    &newstate);
574 		}
575 		zpool_close(zhp);
576 		return (1);
577 	}
578 	zpool_close(zhp);
579 	return (0);
580 }
581 
582 int
583 zfs_deliver_dle(nvlist_t *nvl)
584 {
585 	char *devname;
586 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) != 0) {
587 		syseventd_print(9, "zfs_deliver_event: no physpath\n");
588 		return (-1);
589 	}
590 	if (strncmp(devname, DEVICE_PREFIX, strlen(DEVICE_PREFIX)) != 0) {
591 		syseventd_print(9, "zfs_deliver_event: invalid "
592 		    "device '%s'", devname);
593 		return (-1);
594 	}
595 
596 	/*
597 	 * We try to find the device using the physical
598 	 * path that has been supplied. We need to strip off
599 	 * the /devices prefix before starting our search.
600 	 */
601 	devname += strlen(DEVICE_PREFIX);
602 	if (zpool_iter(g_zfshdl, zfsdle_vdev_online, devname) != 1) {
603 		syseventd_print(9, "zfs_deliver_event: device '%s' not"
604 		    " found\n", devname);
605 		return (1);
606 	}
607 	return (0);
608 }
609 
610 
611 /*ARGSUSED*/
612 static int
613 zfs_deliver_event(sysevent_t *ev, int unused)
614 {
615 	const char *class = sysevent_get_class_name(ev);
616 	const char *subclass = sysevent_get_subclass_name(ev);
617 	nvlist_t *nvl;
618 	int ret;
619 	boolean_t is_lofi, is_check, is_dle = B_FALSE;
620 
621 	if (strcmp(class, EC_DEV_ADD) == 0) {
622 		/*
623 		 * We're mainly interested in disk additions, but we also listen
624 		 * for new lofi devices, to allow for simplified testing.
625 		 */
626 		if (strcmp(subclass, ESC_DISK) == 0)
627 			is_lofi = B_FALSE;
628 		else if (strcmp(subclass, ESC_LOFI) == 0)
629 			is_lofi = B_TRUE;
630 		else
631 			return (0);
632 
633 		is_check = B_FALSE;
634 	} else if (strcmp(class, EC_ZFS) == 0 &&
635 	    strcmp(subclass, ESC_ZFS_VDEV_CHECK) == 0) {
636 		/*
637 		 * This event signifies that a device failed to open during pool
638 		 * load, but the 'autoreplace' property was set, so we should
639 		 * pretend it's just been added.
640 		 */
641 		is_check = B_TRUE;
642 	} else if (strcmp(class, EC_DEV_STATUS) == 0 &&
643 	    strcmp(subclass, ESC_DEV_DLE) == 0) {
644 		is_dle = B_TRUE;
645 	} else {
646 		return (0);
647 	}
648 
649 	if (sysevent_get_attr_list(ev, &nvl) != 0)
650 		return (-1);
651 
652 	if (is_dle)
653 		ret = zfs_deliver_dle(nvl);
654 	else if (is_check)
655 		ret = zfs_deliver_check(nvl);
656 	else
657 		ret = zfs_deliver_add(nvl, is_lofi);
658 
659 	nvlist_free(nvl);
660 	return (ret);
661 }
662 
663 /*ARGSUSED*/
664 void *
665 zfs_enum_pools(void *arg)
666 {
667 	(void) zpool_iter(g_zfshdl, zfs_unavail_pool, (void *)&g_pool_list);
668 	if (!list_is_empty(&g_pool_list))
669 		g_tpool = tpool_create(1, sysconf(_SC_NPROCESSORS_ONLN),
670 		    0, NULL);
671 	g_enumeration_done = B_TRUE;
672 	return (NULL);
673 }
674 
675 static struct slm_mod_ops zfs_mod_ops = {
676 	SE_MAJOR_VERSION, SE_MINOR_VERSION, 10, zfs_deliver_event
677 };
678 
679 struct slm_mod_ops *
680 slm_init()
681 {
682 	if ((g_zfshdl = libzfs_init()) == NULL)
683 		return (NULL);
684 	/*
685 	 * collect a list of unavailable pools (asynchronously,
686 	 * since this can take a while)
687 	 */
688 	list_create(&g_pool_list, sizeof (struct unavailpool),
689 	    offsetof(struct unavailpool, uap_node));
690 	if (thr_create(NULL, 0, zfs_enum_pools, NULL, 0, &g_zfs_tid) != 0)
691 		return (NULL);
692 	return (&zfs_mod_ops);
693 }
694 
695 void
696 slm_fini()
697 {
698 	unavailpool_t *pool;
699 
700 	if (g_tpool != NULL) {
701 		tpool_wait(g_tpool);
702 		tpool_destroy(g_tpool);
703 	}
704 	while ((pool = (list_head(&g_pool_list))) != NULL) {
705 		list_remove(&g_pool_list, pool);
706 		zpool_close(pool->uap_zhp);
707 		free(pool);
708 	}
709 	(void) thr_join(g_zfs_tid, NULL, NULL);
710 	list_destroy(&g_pool_list);
711 	libzfs_fini(g_zfshdl);
712 }
713