1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2012 by Delphix. All rights reserved.
24 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
25 */
26
27/*
28 * ZFS syseventd module.
29 *
30 * The purpose of this module is to identify when devices are added to the
31 * system, and appropriately online or replace the affected vdevs.
32 *
33 * When a device is added to the system:
34 *
35 * 	1. Search for any vdevs whose devid matches that of the newly added
36 *	   device.
37 *
38 * 	2. If no vdevs are found, then search for any vdevs whose devfs path
39 *	   matches that of the new device.
40 *
41 *	3. If no vdevs match by either method, then ignore the event.
42 *
43 * 	4. Attempt to online the device with a flag to indicate that it should
44 *	   be unspared when resilvering completes.  If this succeeds, then the
45 *	   same device was inserted and we should continue normally.
46 *
47 *	5. If the pool does not have the 'autoreplace' property set, attempt to
48 *	   online the device again without the unspare flag, which will
49 *	   generate a FMA fault.
50 *
51 *	6. If the pool has the 'autoreplace' property set, and the matching vdev
52 *	   is a whole disk, then label the new disk and attempt a 'zpool
53 *	   replace'.
54 *
55 * The module responds to EC_DEV_ADD events for both disks and lofi devices,
56 * with the latter used for testing.  The special ESC_ZFS_VDEV_CHECK event
57 * indicates that a device failed to open during pool load, but the autoreplace
58 * property was set.  In this case, we deferred the associated FMA fault until
59 * our module had a chance to process the autoreplace logic.  If the device
60 * could not be replaced, then the second online attempt will trigger the FMA
61 * fault that we skipped earlier.
62 */
63
64#include <alloca.h>
65#include <devid.h>
66#include <fcntl.h>
67#include <libnvpair.h>
68#include <libsysevent.h>
69#include <libzfs.h>
70#include <limits.h>
71#include <stdlib.h>
72#include <string.h>
73#include <syslog.h>
74#include <sys/list.h>
75#include <sys/sunddi.h>
76#include <sys/sysevent/eventdefs.h>
77#include <sys/sysevent/dev.h>
78#include <thread_pool.h>
79#include <unistd.h>
80#include "syseventd.h"
81
82#if defined(__i386) || defined(__amd64)
83#define	PHYS_PATH	":q"
84#define	RAW_SLICE	"p0"
85#elif defined(__sparc)
86#define	PHYS_PATH	":c"
87#define	RAW_SLICE	"s2"
88#else
89#error Unknown architecture
90#endif
91
92typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
93
94libzfs_handle_t *g_zfshdl;
95list_t g_pool_list;
96tpool_t *g_tpool;
97boolean_t g_enumeration_done;
98thread_t g_zfs_tid;
99
100typedef struct unavailpool {
101	zpool_handle_t	*uap_zhp;
102	list_node_t	uap_node;
103} unavailpool_t;
104
105int
106zfs_toplevel_state(zpool_handle_t *zhp)
107{
108	nvlist_t *nvroot;
109	vdev_stat_t *vs;
110	unsigned int c;
111
112	verify(nvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
113	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
114	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_VDEV_STATS,
115	    (uint64_t **)&vs, &c) == 0);
116	return (vs->vs_state);
117}
118
119static int
120zfs_unavail_pool(zpool_handle_t *zhp, void *data)
121{
122	if (zfs_toplevel_state(zhp) < VDEV_STATE_DEGRADED) {
123		unavailpool_t *uap;
124		uap = malloc(sizeof (unavailpool_t));
125		uap->uap_zhp = zhp;
126		list_insert_tail((list_t *)data, uap);
127	} else {
128		zpool_close(zhp);
129	}
130	return (0);
131}
132
133/*
134 * The device associated with the given vdev (either by devid or physical path)
135 * has been added to the system.  If 'isdisk' is set, then we only attempt a
136 * replacement if it's a whole disk.  This also implies that we should label the
137 * disk first.
138 *
139 * First, we attempt to online the device (making sure to undo any spare
140 * operation when finished).  If this succeeds, then we're done.  If it fails,
141 * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
142 * but that the label was not what we expected.  If the 'autoreplace' property
143 * is not set, then we relabel the disk (if specified), and attempt a 'zpool
144 * replace'.  If the online is successful, but the new state is something else
145 * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
146 * race, and we should avoid attempting to relabel the disk.
147 */
148static void
149zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
150{
151	char *path;
152	vdev_state_t newstate;
153	nvlist_t *nvroot, *newvd;
154	uint64_t wholedisk = 0ULL;
155	uint64_t offline = 0ULL;
156	char *physpath = NULL;
157	char rawpath[PATH_MAX], fullpath[PATH_MAX];
158	zpool_boot_label_t boot_type;
159	uint64_t boot_size;
160	size_t len;
161
162	if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
163		return;
164
165	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
166	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
167	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_OFFLINE, &offline);
168
169	/*
170	 * We should have a way to online a device by guid.  With the current
171	 * interface, we are forced to chop off the 's0' for whole disks.
172	 */
173	(void) strlcpy(fullpath, path, sizeof (fullpath));
174	if (wholedisk)
175		fullpath[strlen(fullpath) - 2] = '\0';
176
177	/*
178	 * Attempt to online the device.  It would be nice to online this by
179	 * GUID, but the current interface only supports lookup by path.
180	 */
181	if (offline ||
182	    (zpool_vdev_online(zhp, fullpath,
183	    ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
184	    (newstate == VDEV_STATE_HEALTHY ||
185	    newstate == VDEV_STATE_DEGRADED)))
186		return;
187
188	/*
189	 * If the pool doesn't have the autoreplace property set, then attempt a
190	 * true online (without the unspare flag), which will trigger a FMA
191	 * fault.
192	 */
193	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
194	    (isdisk && !wholedisk)) {
195		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
196		    &newstate);
197		return;
198	}
199
200	if (isdisk) {
201		/*
202		 * If this is a request to label a whole disk, then attempt to
203		 * write out the label.  Before we can label the disk, we need
204		 * access to a raw node.  Ideally, we'd like to walk the devinfo
205		 * tree and find a raw node from the corresponding parent node.
206		 * This is overly complicated, and since we know how we labeled
207		 * this device in the first place, we know it's save to switch
208		 * from /dev/dsk to /dev/rdsk and append the backup slice.
209		 *
210		 * If any part of this process fails, then do a force online to
211		 * trigger a ZFS fault for the device (and any hot spare
212		 * replacement).
213		 */
214		if (strncmp(path, ZFS_DISK_ROOTD,
215		    strlen(ZFS_DISK_ROOTD)) != 0) {
216			(void) zpool_vdev_online(zhp, fullpath,
217			    ZFS_ONLINE_FORCEFAULT, &newstate);
218			return;
219		}
220
221		(void) strlcpy(rawpath, path + 9, sizeof (rawpath));
222		len = strlen(rawpath);
223		rawpath[len - 2] = '\0';
224
225		if (zpool_is_bootable(zhp))
226			boot_type = ZPOOL_COPY_BOOT_LABEL;
227		else
228			boot_type = ZPOOL_NO_BOOT_LABEL;
229
230		boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL);
231		if (zpool_label_disk(g_zfshdl, zhp, rawpath,
232		    boot_type, boot_size, NULL) != 0) {
233			(void) zpool_vdev_online(zhp, fullpath,
234			    ZFS_ONLINE_FORCEFAULT, &newstate);
235			return;
236		}
237	}
238
239	/*
240	 * Cosntruct the root vdev to pass to zpool_vdev_attach().  While adding
241	 * the entire vdev structure is harmless, we construct a reduced set of
242	 * path/physpath/wholedisk to keep it simple.
243	 */
244	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
245		return;
246
247	if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
248		nvlist_free(nvroot);
249		return;
250	}
251
252	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
253	    nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 ||
254	    (physpath != NULL && nvlist_add_string(newvd,
255	    ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) ||
256	    nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
257	    nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
258	    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd,
259	    1) != 0) {
260		nvlist_free(newvd);
261		nvlist_free(nvroot);
262		return;
263	}
264
265	nvlist_free(newvd);
266
267	(void) zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
268
269	nvlist_free(nvroot);
270
271}
272
273/*
274 * Utility functions to find a vdev matching given criteria.
275 */
276typedef struct dev_data {
277	const char		*dd_compare;
278	const char		*dd_prop;
279	zfs_process_func_t	dd_func;
280	boolean_t		dd_found;
281	boolean_t		dd_isdisk;
282	uint64_t		dd_pool_guid;
283	uint64_t		dd_vdev_guid;
284} dev_data_t;
285
286static void
287zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
288{
289	dev_data_t *dp = data;
290	char *path;
291	uint_t c, children;
292	nvlist_t **child;
293	size_t len;
294	uint64_t guid;
295
296	/*
297	 * First iterate over any children.
298	 */
299	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
300	    &child, &children) == 0) {
301		for (c = 0; c < children; c++)
302			zfs_iter_vdev(zhp, child[c], data);
303		return;
304	}
305
306	if (dp->dd_vdev_guid != 0) {
307		if (nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
308		    &guid) != 0 || guid != dp->dd_vdev_guid)
309			return;
310	} else if (dp->dd_compare != NULL) {
311		len = strlen(dp->dd_compare);
312
313		if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
314		    strncmp(dp->dd_compare, path, len) != 0)
315			return;
316
317		/*
318		 * Normally, we want to have an exact match for the comparison
319		 * string.  However, we allow substring matches in the following
320		 * cases:
321		 *
322		 * 	<path>:		This is a devpath, and the target is one
323		 * 			of its children.
324		 *
325		 * 	<path/>		This is a devid for a whole disk, and
326		 * 			the target is one of its children.
327		 */
328		if (path[len] != '\0' && path[len] != ':' &&
329		    path[len - 1] != '/')
330			return;
331	}
332
333	(dp->dd_func)(zhp, nvl, dp->dd_isdisk);
334}
335
336void
337zfs_enable_ds(void *arg)
338{
339	unavailpool_t *pool = (unavailpool_t *)arg;
340
341	(void) zpool_enable_datasets(pool->uap_zhp, NULL, 0);
342	zpool_close(pool->uap_zhp);
343	free(pool);
344}
345
346static int
347zfs_iter_pool(zpool_handle_t *zhp, void *data)
348{
349	nvlist_t *config, *nvl;
350	dev_data_t *dp = data;
351	uint64_t pool_guid;
352	unavailpool_t *pool;
353
354	if ((config = zpool_get_config(zhp, NULL)) != NULL) {
355		if (dp->dd_pool_guid == 0 ||
356		    (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
357		    &pool_guid) == 0 && pool_guid == dp->dd_pool_guid)) {
358			(void) nvlist_lookup_nvlist(config,
359			    ZPOOL_CONFIG_VDEV_TREE, &nvl);
360			zfs_iter_vdev(zhp, nvl, data);
361		}
362	}
363	if (g_enumeration_done)  {
364		for (pool = list_head(&g_pool_list); pool != NULL;
365		    pool = list_next(&g_pool_list, pool)) {
366
367			if (strcmp(zpool_get_name(zhp),
368			    zpool_get_name(pool->uap_zhp)))
369				continue;
370			if (zfs_toplevel_state(zhp) >= VDEV_STATE_DEGRADED) {
371				list_remove(&g_pool_list, pool);
372				(void) tpool_dispatch(g_tpool, zfs_enable_ds,
373				    pool);
374				break;
375			}
376		}
377	}
378
379	zpool_close(zhp);
380	return (0);
381}
382
383/*
384 * Given a physical device path, iterate over all (pool, vdev) pairs which
385 * correspond to the given path.
386 */
387static boolean_t
388devpath_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
389{
390	dev_data_t data = { 0 };
391
392	data.dd_compare = devpath;
393	data.dd_func = func;
394	data.dd_prop = ZPOOL_CONFIG_PHYS_PATH;
395	data.dd_found = B_FALSE;
396	data.dd_isdisk = wholedisk;
397
398	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
399
400	return (data.dd_found);
401}
402
403/*
404 * Given a /devices path, lookup the corresponding devid for each minor node,
405 * and find any vdevs with matching devids.  Doing this straight up would be
406 * rather inefficient, O(minor nodes * vdevs in system), so we take advantage of
407 * the fact that each devid ends with "/<minornode>".  Once we find any valid
408 * minor node, we chop off the portion after the last slash, and then search for
409 * matching vdevs, which is O(vdevs in system).
410 */
411static boolean_t
412devid_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
413{
414	size_t len = strlen(devpath) + sizeof ("/devices") +
415	    sizeof (PHYS_PATH) - 1;
416	char *fullpath;
417	int fd;
418	ddi_devid_t devid;
419	char *devidstr, *fulldevid;
420	dev_data_t data = { 0 };
421
422	/*
423	 * Try to open a known minor node.
424	 */
425	fullpath = alloca(len);
426	(void) snprintf(fullpath, len, "/devices%s%s", devpath, PHYS_PATH);
427	if ((fd = open(fullpath, O_RDONLY)) < 0)
428		return (B_FALSE);
429
430	/*
431	 * Determine the devid as a string, with no trailing slash for the minor
432	 * node.
433	 */
434	if (devid_get(fd, &devid) != 0) {
435		(void) close(fd);
436		return (B_FALSE);
437	}
438	(void) close(fd);
439
440	if ((devidstr = devid_str_encode(devid, NULL)) == NULL) {
441		devid_free(devid);
442		return (B_FALSE);
443	}
444
445	len = strlen(devidstr) + 2;
446	fulldevid = alloca(len);
447	(void) snprintf(fulldevid, len, "%s/", devidstr);
448
449	data.dd_compare = fulldevid;
450	data.dd_func = func;
451	data.dd_prop = ZPOOL_CONFIG_DEVID;
452	data.dd_found = B_FALSE;
453	data.dd_isdisk = wholedisk;
454
455	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
456
457	devid_str_free(devidstr);
458	devid_free(devid);
459
460	return (data.dd_found);
461}
462
463/*
464 * This function is called when we receive a devfs add event.  This can be
465 * either a disk event or a lofi event, and the behavior is slightly different
466 * depending on which it is.
467 */
468static int
469zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
470{
471	char *devpath, *devname;
472	char path[PATH_MAX], realpath[PATH_MAX];
473	char *colon, *raw;
474	int ret;
475
476	/*
477	 * The main unit of operation is the physical device path.  For disks,
478	 * this is the device node, as all minor nodes are affected.  For lofi
479	 * devices, this includes the minor path.  Unfortunately, this isn't
480	 * represented in the DEV_PHYS_PATH for various reasons.
481	 */
482	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath) != 0)
483		return (-1);
484
485	/*
486	 * If this is a lofi device, then also get the minor instance name.
487	 * Unfortunately, the current payload doesn't include an easy way to get
488	 * this information.  So we cheat by resolving the 'dev_name' (which
489	 * refers to the raw device) and taking the portion between ':(*),raw'.
490	 */
491	(void) strlcpy(realpath, devpath, sizeof (realpath));
492	if (is_lofi) {
493		if (nvlist_lookup_string(nvl, DEV_NAME,
494		    &devname) == 0 &&
495		    (ret = resolvepath(devname, path,
496		    sizeof (path))) > 0) {
497			path[ret] = '\0';
498			colon = strchr(path, ':');
499			if (colon != NULL)
500				raw = strstr(colon + 1, ",raw");
501			if (colon != NULL && raw != NULL) {
502				*raw = '\0';
503				(void) snprintf(realpath,
504				    sizeof (realpath), "%s%s",
505				    devpath, colon);
506				*raw = ',';
507			}
508		}
509	}
510
511	/*
512	 * Iterate over all vdevs with a matching devid, and then those with a
513	 * matching /devices path.  For disks, we only want to pay attention to
514	 * vdevs marked as whole disks.  For lofi, we don't care (because we're
515	 * matching an exact minor name).
516	 */
517	if (!devid_iter(realpath, zfs_process_add, !is_lofi))
518		(void) devpath_iter(realpath, zfs_process_add, !is_lofi);
519
520	return (0);
521}
522
523/*
524 * Called when we receive a VDEV_CHECK event, which indicates a device could not
525 * be opened during initial pool open, but the autoreplace property was set on
526 * the pool.  In this case, we treat it as if it were an add event.
527 */
528static int
529zfs_deliver_check(nvlist_t *nvl)
530{
531	dev_data_t data = { 0 };
532
533	if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID,
534	    &data.dd_pool_guid) != 0 ||
535	    nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID,
536	    &data.dd_vdev_guid) != 0 ||
537	    data.dd_vdev_guid == 0)
538		return (0);
539
540	data.dd_isdisk = B_TRUE;
541	data.dd_func = zfs_process_add;
542
543	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
544
545	return (0);
546}
547
548#define	DEVICE_PREFIX	"/devices"
549
550static int
551zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
552{
553	char *devname = data;
554	boolean_t avail_spare, l2cache;
555	vdev_state_t newstate;
556	nvlist_t *tgt;
557
558	syseventd_print(9, "zfsdle_vdev_online: searching for %s in pool %s\n",
559	    devname, zpool_get_name(zhp));
560
561	if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
562	    &avail_spare, &l2cache, NULL)) != NULL) {
563		char *path, fullpath[MAXPATHLEN];
564		uint64_t wholedisk = 0ULL;
565
566		verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
567		    &path) == 0);
568		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
569		    &wholedisk) == 0);
570
571		(void) strlcpy(fullpath, path, sizeof (fullpath));
572		if (wholedisk) {
573			fullpath[strlen(fullpath) - 2] = '\0';
574
575			/*
576			 * We need to reopen the pool associated with this
577			 * device so that the kernel can update the size
578			 * of the expanded device.
579			 */
580			(void) zpool_reopen(zhp);
581		}
582
583		if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
584			syseventd_print(9, "zfsdle_vdev_online: setting device"
585			    " device %s to ONLINE state in pool %s.\n",
586			    fullpath, zpool_get_name(zhp));
587			if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL)
588				(void) zpool_vdev_online(zhp, fullpath, 0,
589				    &newstate);
590		}
591		zpool_close(zhp);
592		return (1);
593	}
594	zpool_close(zhp);
595	return (0);
596}
597
598/*
599 * This function is called for each vdev of a pool for which any of the
600 * following events was recieved:
601 *  - ESC_ZFS_vdev_add
602 *  - ESC_ZFS_vdev_attach
603 *  - ESC_ZFS_vdev_clear
604 *  - ESC_ZFS_vdev_online
605 *  - ESC_ZFS_pool_create
606 *  - ESC_ZFS_pool_import
607 * It will update the vdevs FRU property if it is out of date.
608 */
609/*ARGSUSED2*/
610static void
611zfs_update_vdev_fru(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
612{
613	char *devpath, *cptr, *oldfru = NULL;
614	const char *newfru;
615	uint64_t vdev_guid;
616
617	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_GUID, &vdev_guid);
618	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &devpath);
619	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_FRU, &oldfru);
620
621	/* remove :<slice> from devpath */
622	cptr = strrchr(devpath, ':');
623	if (cptr != NULL)
624		*cptr = '\0';
625
626	newfru = libzfs_fru_lookup(g_zfshdl, devpath);
627	if (newfru == NULL) {
628		syseventd_print(9, "zfs_update_vdev_fru: no FRU for %s\n",
629		    devpath);
630		return;
631	}
632
633	/* do nothing if the FRU hasn't changed */
634	if (oldfru != NULL && libzfs_fru_compare(g_zfshdl, oldfru, newfru)) {
635		syseventd_print(9, "zfs_update_vdev_fru: FRU unchanged\n");
636		return;
637	}
638
639	syseventd_print(9, "zfs_update_vdev_fru: devpath = %s\n", devpath);
640	syseventd_print(9, "zfs_update_vdev_fru: FRU = %s\n", newfru);
641
642	(void) zpool_fru_set(zhp, vdev_guid, newfru);
643}
644
645/*
646 * This function handles the following events:
647 *  - ESC_ZFS_vdev_add
648 *  - ESC_ZFS_vdev_attach
649 *  - ESC_ZFS_vdev_clear
650 *  - ESC_ZFS_vdev_online
651 *  - ESC_ZFS_pool_create
652 *  - ESC_ZFS_pool_import
653 * It will iterate over the pool vdevs to update the FRU property.
654 */
655int
656zfs_deliver_update(nvlist_t *nvl)
657{
658	dev_data_t dd = { 0 };
659	char *pname;
660	zpool_handle_t *zhp;
661	nvlist_t *config, *vdev;
662
663	if (nvlist_lookup_string(nvl, "pool_name", &pname) != 0) {
664		syseventd_print(9, "zfs_deliver_update: no pool name\n");
665		return (-1);
666	}
667
668	/*
669	 * If this event was triggered by a pool export or destroy we cannot
670	 * open the pool. This is not an error, just return 0 as we don't care
671	 * about these events.
672	 */
673	zhp = zpool_open_canfail(g_zfshdl, pname);
674	if (zhp == NULL)
675		return (0);
676
677	config = zpool_get_config(zhp, NULL);
678	if (config == NULL) {
679		syseventd_print(9, "zfs_deliver_update: "
680		    "failed to get pool config for %s\n", pname);
681		zpool_close(zhp);
682		return (-1);
683	}
684
685	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &vdev) != 0) {
686		syseventd_print(0, "zfs_deliver_update: "
687		    "failed to get vdev tree for %s\n", pname);
688		zpool_close(zhp);
689		return (-1);
690	}
691
692	libzfs_fru_refresh(g_zfshdl);
693
694	dd.dd_func = zfs_update_vdev_fru;
695	zfs_iter_vdev(zhp, vdev, &dd);
696
697	zpool_close(zhp);
698	return (0);
699}
700
701int
702zfs_deliver_dle(nvlist_t *nvl)
703{
704	char *devname;
705	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) != 0) {
706		syseventd_print(9, "zfs_deliver_event: no physpath\n");
707		return (-1);
708	}
709	if (strncmp(devname, DEVICE_PREFIX, strlen(DEVICE_PREFIX)) != 0) {
710		syseventd_print(9, "zfs_deliver_event: invalid "
711		    "device '%s'", devname);
712		return (-1);
713	}
714
715	/*
716	 * We try to find the device using the physical
717	 * path that has been supplied. We need to strip off
718	 * the /devices prefix before starting our search.
719	 */
720	devname += strlen(DEVICE_PREFIX);
721	if (zpool_iter(g_zfshdl, zfsdle_vdev_online, devname) != 1) {
722		syseventd_print(9, "zfs_deliver_event: device '%s' not"
723		    " found\n", devname);
724		return (1);
725	}
726	return (0);
727}
728
729
730/*ARGSUSED*/
731static int
732zfs_deliver_event(sysevent_t *ev, int unused)
733{
734	const char *class = sysevent_get_class_name(ev);
735	const char *subclass = sysevent_get_subclass_name(ev);
736	nvlist_t *nvl;
737	int ret;
738	boolean_t is_lofi = B_FALSE, is_check = B_FALSE;
739	boolean_t is_dle = B_FALSE, is_update = B_FALSE;
740
741	if (strcmp(class, EC_DEV_ADD) == 0) {
742		/*
743		 * We're mainly interested in disk additions, but we also listen
744		 * for new lofi devices, to allow for simplified testing.
745		 */
746		if (strcmp(subclass, ESC_DISK) == 0)
747			is_lofi = B_FALSE;
748		else if (strcmp(subclass, ESC_LOFI) == 0)
749			is_lofi = B_TRUE;
750		else
751			return (0);
752
753		is_check = B_FALSE;
754	} else if (strcmp(class, EC_ZFS) == 0) {
755		if (strcmp(subclass, ESC_ZFS_VDEV_CHECK) == 0) {
756			/*
757			 * This event signifies that a device failed to open
758			 * during pool load, but the 'autoreplace' property was
759			 * set, so we should pretend it's just been added.
760			 */
761			is_check = B_TRUE;
762		} else if ((strcmp(subclass, ESC_ZFS_VDEV_ADD) == 0) ||
763		    (strcmp(subclass, ESC_ZFS_VDEV_ATTACH) == 0) ||
764		    (strcmp(subclass, ESC_ZFS_VDEV_CLEAR) == 0) ||
765		    (strcmp(subclass, ESC_ZFS_VDEV_ONLINE) == 0) ||
766		    (strcmp(subclass, ESC_ZFS_POOL_CREATE) == 0) ||
767		    (strcmp(subclass, ESC_ZFS_POOL_IMPORT) == 0)) {
768			/*
769			 * When we receive these events we check the pool
770			 * configuration and update the vdev FRUs if necessary.
771			 */
772			is_update = B_TRUE;
773		}
774	} else if (strcmp(class, EC_DEV_STATUS) == 0 &&
775	    strcmp(subclass, ESC_DEV_DLE) == 0) {
776		is_dle = B_TRUE;
777	} else {
778		return (0);
779	}
780
781	if (sysevent_get_attr_list(ev, &nvl) != 0)
782		return (-1);
783
784	if (is_dle)
785		ret = zfs_deliver_dle(nvl);
786	else if (is_update)
787		ret = zfs_deliver_update(nvl);
788	else if (is_check)
789		ret = zfs_deliver_check(nvl);
790	else
791		ret = zfs_deliver_add(nvl, is_lofi);
792
793	nvlist_free(nvl);
794	return (ret);
795}
796
797/*ARGSUSED*/
798void *
799zfs_enum_pools(void *arg)
800{
801	(void) zpool_iter(g_zfshdl, zfs_unavail_pool, (void *)&g_pool_list);
802	if (!list_is_empty(&g_pool_list))
803		g_tpool = tpool_create(1, sysconf(_SC_NPROCESSORS_ONLN),
804		    0, NULL);
805	g_enumeration_done = B_TRUE;
806	return (NULL);
807}
808
809static struct slm_mod_ops zfs_mod_ops = {
810	SE_MAJOR_VERSION, SE_MINOR_VERSION, 10, zfs_deliver_event
811};
812
813struct slm_mod_ops *
814slm_init()
815{
816	if ((g_zfshdl = libzfs_init()) == NULL)
817		return (NULL);
818	/*
819	 * collect a list of unavailable pools (asynchronously,
820	 * since this can take a while)
821	 */
822	list_create(&g_pool_list, sizeof (struct unavailpool),
823	    offsetof(struct unavailpool, uap_node));
824	if (thr_create(NULL, 0, zfs_enum_pools, NULL, 0, &g_zfs_tid) != 0)
825		return (NULL);
826	return (&zfs_mod_ops);
827}
828
829void
830slm_fini()
831{
832	unavailpool_t *pool;
833
834	(void) thr_join(g_zfs_tid, NULL, NULL);
835	if (g_tpool != NULL) {
836		tpool_wait(g_tpool);
837		tpool_destroy(g_tpool);
838	}
839	while ((pool = (list_head(&g_pool_list))) != NULL) {
840		list_remove(&g_pool_list, pool);
841		zpool_close(pool->uap_zhp);
842		free(pool);
843	}
844	list_destroy(&g_pool_list);
845	libzfs_fini(g_zfshdl);
846}
847