xref: /illumos-gate/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c (revision b01c3b58f7eb7fb570f606f96f130fb9b2018b49)
13d7072f8Seschrock /*
23d7072f8Seschrock  * CDDL HEADER START
33d7072f8Seschrock  *
43d7072f8Seschrock  * The contents of this file are subject to the terms of the
53d7072f8Seschrock  * Common Development and Distribution License (the "License").
63d7072f8Seschrock  * You may not use this file except in compliance with the License.
73d7072f8Seschrock  *
83d7072f8Seschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93d7072f8Seschrock  * or http://www.opensolaris.org/os/licensing.
103d7072f8Seschrock  * See the License for the specific language governing permissions
113d7072f8Seschrock  * and limitations under the License.
123d7072f8Seschrock  *
133d7072f8Seschrock  * When distributing Covered Code, include this CDDL HEADER in each
143d7072f8Seschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153d7072f8Seschrock  * If applicable, add the following below this CDDL HEADER, with the
163d7072f8Seschrock  * fields enclosed by brackets "[]" replaced with your own identifying
173d7072f8Seschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
183d7072f8Seschrock  *
193d7072f8Seschrock  * CDDL HEADER END
203d7072f8Seschrock  */
213d7072f8Seschrock /*
22*b01c3b58Seschrock  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
233d7072f8Seschrock  * Use is subject to license terms.
243d7072f8Seschrock  */
253d7072f8Seschrock 
263d7072f8Seschrock #pragma ident	"%Z%%M%	%I%	%E% SMI"
273d7072f8Seschrock 
283d7072f8Seschrock /*
293d7072f8Seschrock  * ZFS syseventd module.
303d7072f8Seschrock  *
313d7072f8Seschrock  * The purpose of this module is to identify when devices are added to the
323d7072f8Seschrock  * system, and appropriately online or replace the affected vdevs.
333d7072f8Seschrock  *
343d7072f8Seschrock  * When a device is added to the system:
353d7072f8Seschrock  *
363d7072f8Seschrock  * 	1. Search for any vdevs whose devid matches that of the newly added
373d7072f8Seschrock  *	   device.
383d7072f8Seschrock  *
393d7072f8Seschrock  * 	2. If no vdevs are found, then search for any vdevs whose devfs path
403d7072f8Seschrock  *	   matches that of the new device.
413d7072f8Seschrock  *
423d7072f8Seschrock  *	3. If no vdevs match by either method, then ignore the event.
433d7072f8Seschrock  *
443d7072f8Seschrock  * 	4. Attempt to online the device with a flag to indicate that it should
453d7072f8Seschrock  *	   be unspared when resilvering completes.  If this succeeds, then the
463d7072f8Seschrock  *	   same device was inserted and we should continue normally.
473d7072f8Seschrock  *
483d7072f8Seschrock  *	5. If the pool does not have the 'autoreplace' property set, attempt to
493d7072f8Seschrock  *	   online the device again without the unspare flag, which will
503d7072f8Seschrock  *	   generate a FMA fault.
513d7072f8Seschrock  *
523d7072f8Seschrock  *	6. If the pool has the 'autoreplace' property set, and the matching vdev
533d7072f8Seschrock  *	   is a whole disk, then label the new disk and attempt a 'zpool
543d7072f8Seschrock  *	   replace'.
553d7072f8Seschrock  *
563d7072f8Seschrock  * The module responds to EC_DEV_ADD events for both disks and lofi devices,
573d7072f8Seschrock  * with the latter used for testing.  The special ESC_ZFS_VDEV_CHECK event
583d7072f8Seschrock  * indicates that a device failed to open during pool load, but the autoreplace
593d7072f8Seschrock  * property was set.  In this case, we deferred the associated FMA fault until
603d7072f8Seschrock  * our module had a chance to process the autoreplace logic.  If the device
613d7072f8Seschrock  * could not be replaced, then the second online attempt will trigger the FMA
623d7072f8Seschrock  * fault that we skipped earlier.
633d7072f8Seschrock  */
643d7072f8Seschrock 
653d7072f8Seschrock #include <alloca.h>
663d7072f8Seschrock #include <devid.h>
673d7072f8Seschrock #include <fcntl.h>
683d7072f8Seschrock #include <libnvpair.h>
693d7072f8Seschrock #include <libsysevent.h>
703d7072f8Seschrock #include <libzfs.h>
713d7072f8Seschrock #include <limits.h>
723d7072f8Seschrock #include <stdlib.h>
733d7072f8Seschrock #include <string.h>
743d7072f8Seschrock #include <syslog.h>
753d7072f8Seschrock #include <sys/sunddi.h>
763d7072f8Seschrock #include <sys/sysevent/eventdefs.h>
773d7072f8Seschrock #include <sys/sysevent/dev.h>
783d7072f8Seschrock #include <unistd.h>
793d7072f8Seschrock 
803d7072f8Seschrock #if defined(__i386) || defined(__amd64)
813d7072f8Seschrock #define	PHYS_PATH	":q"
823d7072f8Seschrock #define	RAW_SLICE	"p0"
833d7072f8Seschrock #elif defined(__sparc)
843d7072f8Seschrock #define	PHYS_PATH	":c"
853d7072f8Seschrock #define	RAW_SLICE	"s2"
863d7072f8Seschrock #else
873d7072f8Seschrock #error Unknown architecture
883d7072f8Seschrock #endif
893d7072f8Seschrock 
903d7072f8Seschrock typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
913d7072f8Seschrock 
923d7072f8Seschrock libzfs_handle_t *g_zfshdl;
933d7072f8Seschrock 
943d7072f8Seschrock /*
953d7072f8Seschrock  * The device associated with the given vdev (either by devid or physical path)
963d7072f8Seschrock  * has been added to the system.  If 'isdisk' is set, then we only attempt a
973d7072f8Seschrock  * replacement if it's a whole disk.  This also implies that we should label the
983d7072f8Seschrock  * disk first.
993d7072f8Seschrock  *
1003d7072f8Seschrock  * First, we attempt to online the device (making sure to undo any spare
1013d7072f8Seschrock  * operation when finished).  If this succeeds, then we're done.  If it fails,
1023d7072f8Seschrock  * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
1033d7072f8Seschrock  * but that the label was not what we expected.  If the 'autoreplace' property
1043d7072f8Seschrock  * is not set, then we relabel the disk (if specified), and attempt a 'zpool
1053d7072f8Seschrock  * replace'.  If the online is successful, but the new state is something else
1063d7072f8Seschrock  * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
1073d7072f8Seschrock  * race, and we should avoid attempting to relabel the disk.
1083d7072f8Seschrock  */
1093d7072f8Seschrock static void
1103d7072f8Seschrock zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
1113d7072f8Seschrock {
1123d7072f8Seschrock 	char *path;
1133d7072f8Seschrock 	vdev_state_t newstate;
1143d7072f8Seschrock 	nvlist_t *nvroot, *newvd;
1153d7072f8Seschrock 	uint64_t wholedisk = 0ULL;
1163d7072f8Seschrock 	char *devid = NULL;
1173d7072f8Seschrock 	char rawpath[PATH_MAX], fullpath[PATH_MAX];
1183d7072f8Seschrock 	size_t len;
1193d7072f8Seschrock 
1203d7072f8Seschrock 	if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
1213d7072f8Seschrock 		return;
1223d7072f8Seschrock 
1233d7072f8Seschrock 	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_DEVID, &devid);
1243d7072f8Seschrock 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
1253d7072f8Seschrock 
1263d7072f8Seschrock 	/*
1273d7072f8Seschrock 	 * We should have a way to online a device by guid.  With the current
1283d7072f8Seschrock 	 * interface, we are forced to chop off the 's0' for whole disks.
1293d7072f8Seschrock 	 */
1303d7072f8Seschrock 	(void) strlcpy(fullpath, path, sizeof (fullpath));
1313d7072f8Seschrock 	if (wholedisk)
1323d7072f8Seschrock 		fullpath[strlen(fullpath) - 2] = '\0';
1333d7072f8Seschrock 
1343d7072f8Seschrock 	/*
1353d7072f8Seschrock 	 * Attempt to online the device.  It would be nice to online this by
1363d7072f8Seschrock 	 * GUID, but the current interface only supports lookup by path.
1373d7072f8Seschrock 	 */
1383d7072f8Seschrock 	if (zpool_vdev_online(zhp, fullpath,
1393d7072f8Seschrock 	    ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
1403d7072f8Seschrock 	    newstate != VDEV_STATE_CANT_OPEN)
1413d7072f8Seschrock 		return;
1423d7072f8Seschrock 
1433d7072f8Seschrock 	/*
1443d7072f8Seschrock 	 * If the pool doesn't have the autoreplace property set, then attempt a
1453d7072f8Seschrock 	 * true online (without the unspare flag), which will trigger a FMA
1463d7072f8Seschrock 	 * fault.
1473d7072f8Seschrock 	 */
148990b4856Slling 	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
1493d7072f8Seschrock 	    (isdisk && !wholedisk)) {
1503d7072f8Seschrock 		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
1513d7072f8Seschrock 		    &newstate);
1523d7072f8Seschrock 		return;
1533d7072f8Seschrock 	}
1543d7072f8Seschrock 
1553d7072f8Seschrock 	if (isdisk) {
1563d7072f8Seschrock 		/*
1573d7072f8Seschrock 		 * If this is a request to label a whole disk, then attempt to
1583d7072f8Seschrock 		 * write out the label.  Before we can label the disk, we need
1593d7072f8Seschrock 		 * access to a raw node.  Ideally, we'd like to walk the devinfo
1603d7072f8Seschrock 		 * tree and find a raw node from the corresponding parent node.
1613d7072f8Seschrock 		 * This is overly complicated, and since we know how we labeled
1623d7072f8Seschrock 		 * this device in the first place, we know it's save to switch
1633d7072f8Seschrock 		 * from /dev/dsk to /dev/rdsk and append the backup slice.
1643d7072f8Seschrock 		 */
1653d7072f8Seschrock 		if (strncmp(path, "/dev/dsk/", 9) != 0)
1663d7072f8Seschrock 			return;
1673d7072f8Seschrock 
1683d7072f8Seschrock 		(void) strlcpy(rawpath, path + 9, sizeof (rawpath));
1693d7072f8Seschrock 		len = strlen(rawpath);
1703d7072f8Seschrock 		rawpath[len - 2] = '\0';
1713d7072f8Seschrock 
1723d7072f8Seschrock 		if (zpool_label_disk(g_zfshdl, zhp, rawpath) != 0)
1733d7072f8Seschrock 			return;
1743d7072f8Seschrock 	}
1753d7072f8Seschrock 
1763d7072f8Seschrock 	/*
1773d7072f8Seschrock 	 * Cosntruct the root vdev to pass to zpool_vdev_attach().  While adding
1783d7072f8Seschrock 	 * the entire vdev structure is harmless, we construct a reduced set of
1793d7072f8Seschrock 	 * path/devid/wholedisk to keep it simple.
1803d7072f8Seschrock 	 */
1813d7072f8Seschrock 	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
1823d7072f8Seschrock 		return;
1833d7072f8Seschrock 
1843d7072f8Seschrock 	if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1853d7072f8Seschrock 		nvlist_free(nvroot);
1863d7072f8Seschrock 		return;
1873d7072f8Seschrock 	}
1883d7072f8Seschrock 
1893d7072f8Seschrock 	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
1903d7072f8Seschrock 	    nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 ||
1913d7072f8Seschrock 	    (devid && nvlist_add_string(newvd, ZPOOL_CONFIG_DEVID,
1923d7072f8Seschrock 	    devid) != 0) ||
1933d7072f8Seschrock 	    nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
1943d7072f8Seschrock 	    nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
1953d7072f8Seschrock 	    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd,
1963d7072f8Seschrock 	    1) != 0) {
1973d7072f8Seschrock 		nvlist_free(newvd);
1983d7072f8Seschrock 		nvlist_free(nvroot);
1993d7072f8Seschrock 		return;
2003d7072f8Seschrock 	}
2013d7072f8Seschrock 
2023d7072f8Seschrock 	nvlist_free(newvd);
2033d7072f8Seschrock 
2043d7072f8Seschrock 	(void) zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
2053d7072f8Seschrock 
2063d7072f8Seschrock 	nvlist_free(nvroot);
2073d7072f8Seschrock 
2083d7072f8Seschrock }
2093d7072f8Seschrock 
2103d7072f8Seschrock /*
2113d7072f8Seschrock  * Utility functions to find a vdev matching given criteria.
2123d7072f8Seschrock  */
2133d7072f8Seschrock typedef struct dev_data {
2143d7072f8Seschrock 	const char		*dd_compare;
2153d7072f8Seschrock 	const char		*dd_prop;
2163d7072f8Seschrock 	zfs_process_func_t	dd_func;
2173d7072f8Seschrock 	boolean_t		dd_found;
2183d7072f8Seschrock 	boolean_t		dd_isdisk;
2193d7072f8Seschrock 	uint64_t		dd_pool_guid;
2203d7072f8Seschrock 	uint64_t		dd_vdev_guid;
2213d7072f8Seschrock } dev_data_t;
2223d7072f8Seschrock 
2233d7072f8Seschrock static void
2243d7072f8Seschrock zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
2253d7072f8Seschrock {
2263d7072f8Seschrock 	dev_data_t *dp = data;
2273d7072f8Seschrock 	char *path;
2283d7072f8Seschrock 	uint_t c, children;
2293d7072f8Seschrock 	nvlist_t **child;
230*b01c3b58Seschrock 	size_t len;
2313d7072f8Seschrock 	uint64_t guid;
2323d7072f8Seschrock 
2333d7072f8Seschrock 	/*
2343d7072f8Seschrock 	 * First iterate over any children.
2353d7072f8Seschrock 	 */
2363d7072f8Seschrock 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
2373d7072f8Seschrock 	    &child, &children) == 0) {
2383d7072f8Seschrock 		for (c = 0; c < children; c++)
2393d7072f8Seschrock 			zfs_iter_vdev(zhp, child[c], data);
2403d7072f8Seschrock 		return;
2413d7072f8Seschrock 	}
2423d7072f8Seschrock 
2433d7072f8Seschrock 	if (dp->dd_vdev_guid != 0) {
2443d7072f8Seschrock 		if (nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
2453d7072f8Seschrock 		    &guid) != 0 || guid != dp->dd_vdev_guid)
2463d7072f8Seschrock 			return;
2473d7072f8Seschrock 	} else {
248*b01c3b58Seschrock 		len = strlen(dp->dd_compare);
249*b01c3b58Seschrock 
2503d7072f8Seschrock 		if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
2513d7072f8Seschrock 		    strncmp(dp->dd_compare, path, len) != 0)
2523d7072f8Seschrock 			return;
2533d7072f8Seschrock 
2543d7072f8Seschrock 		/*
2553d7072f8Seschrock 		 * Normally, we want to have an exact match for the comparison
2563d7072f8Seschrock 		 * string.  However, we allow substring matches in the following
2573d7072f8Seschrock 		 * cases:
2583d7072f8Seschrock 		 *
2593d7072f8Seschrock 		 * 	<path>:		This is a devpath, and the target is one
2603d7072f8Seschrock 		 * 			of its children.
2613d7072f8Seschrock 		 *
2623d7072f8Seschrock 		 * 	<path/>		This is a devid for a whole disk, and
2633d7072f8Seschrock 		 * 			the target is one of its children.
2643d7072f8Seschrock 		 */
2653d7072f8Seschrock 		if (path[len] != '\0' && path[len] != ':' &&
2663d7072f8Seschrock 		    path[len - 1] != '/')
2673d7072f8Seschrock 			return;
2683d7072f8Seschrock 	}
2693d7072f8Seschrock 
2703d7072f8Seschrock 	(dp->dd_func)(zhp, nvl, dp->dd_isdisk);
2713d7072f8Seschrock }
2723d7072f8Seschrock 
2733d7072f8Seschrock static int
2743d7072f8Seschrock zfs_iter_pool(zpool_handle_t *zhp, void *data)
2753d7072f8Seschrock {
2763d7072f8Seschrock 	nvlist_t *config, *nvl;
2773d7072f8Seschrock 	dev_data_t *dp = data;
2783d7072f8Seschrock 	uint64_t pool_guid;
2793d7072f8Seschrock 
2803d7072f8Seschrock 	if ((config = zpool_get_config(zhp, NULL)) != NULL) {
2813d7072f8Seschrock 		if (dp->dd_pool_guid == 0 ||
2823d7072f8Seschrock 		    (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
2833d7072f8Seschrock 		    &pool_guid) == 0 && pool_guid == dp->dd_pool_guid)) {
2843d7072f8Seschrock 			(void) nvlist_lookup_nvlist(config,
2853d7072f8Seschrock 			    ZPOOL_CONFIG_VDEV_TREE, &nvl);
2863d7072f8Seschrock 			zfs_iter_vdev(zhp, nvl, data);
2873d7072f8Seschrock 		}
2883d7072f8Seschrock 	}
2893d7072f8Seschrock 
2903d7072f8Seschrock 	zpool_close(zhp);
2913d7072f8Seschrock 	return (0);
2923d7072f8Seschrock }
2933d7072f8Seschrock 
2943d7072f8Seschrock /*
2953d7072f8Seschrock  * Given a physical device path, iterate over all (pool, vdev) pairs which
2963d7072f8Seschrock  * correspond to the given path.
2973d7072f8Seschrock  */
2983d7072f8Seschrock static boolean_t
2993d7072f8Seschrock devpath_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
3003d7072f8Seschrock {
3013d7072f8Seschrock 	dev_data_t data = { 0 };
3023d7072f8Seschrock 
3033d7072f8Seschrock 	data.dd_compare = devpath;
3043d7072f8Seschrock 	data.dd_func = func;
3053d7072f8Seschrock 	data.dd_prop = ZPOOL_CONFIG_PHYS_PATH;
3063d7072f8Seschrock 	data.dd_found = B_FALSE;
3073d7072f8Seschrock 	data.dd_isdisk = wholedisk;
3083d7072f8Seschrock 
3093d7072f8Seschrock 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
3103d7072f8Seschrock 
3113d7072f8Seschrock 	return (data.dd_found);
3123d7072f8Seschrock }
3133d7072f8Seschrock 
3143d7072f8Seschrock /*
3153d7072f8Seschrock  * Given a /devices path, lookup the corresponding devid for each minor node,
3163d7072f8Seschrock  * and find any vdevs with matching devids.  Doing this straight up would be
3173d7072f8Seschrock  * rather inefficient, O(minor nodes * vdevs in system), so we take advantage of
3183d7072f8Seschrock  * the fact that each devid ends with "/<minornode>".  Once we find any valid
3193d7072f8Seschrock  * minor node, we chop off the portion after the last slash, and then search for
3203d7072f8Seschrock  * matching vdevs, which is O(vdevs in system).
3213d7072f8Seschrock  */
3223d7072f8Seschrock static boolean_t
3233d7072f8Seschrock devid_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
3243d7072f8Seschrock {
3253d7072f8Seschrock 	size_t len = strlen(devpath) + sizeof ("/devices") +
3263d7072f8Seschrock 	    sizeof (PHYS_PATH) - 1;
3273d7072f8Seschrock 	char *fullpath;
3283d7072f8Seschrock 	int fd;
3293d7072f8Seschrock 	ddi_devid_t devid;
3303d7072f8Seschrock 	char *devidstr, *fulldevid;
3313d7072f8Seschrock 	dev_data_t data = { 0 };
3323d7072f8Seschrock 
3333d7072f8Seschrock 	/*
3343d7072f8Seschrock 	 * Try to open a known minor node.
3353d7072f8Seschrock 	 */
3363d7072f8Seschrock 	fullpath = alloca(len);
3373d7072f8Seschrock 	(void) snprintf(fullpath, len, "/devices%s%s", devpath, PHYS_PATH);
3383d7072f8Seschrock 	if ((fd = open(fullpath, O_RDONLY)) < 0)
3393d7072f8Seschrock 		return (B_FALSE);
3403d7072f8Seschrock 
3413d7072f8Seschrock 	/*
3423d7072f8Seschrock 	 * Determine the devid as a string, with no trailing slash for the minor
3433d7072f8Seschrock 	 * node.
3443d7072f8Seschrock 	 */
3453d7072f8Seschrock 	if (devid_get(fd, &devid) != 0) {
3463d7072f8Seschrock 		(void) close(fd);
3473d7072f8Seschrock 		return (B_FALSE);
3483d7072f8Seschrock 	}
3493d7072f8Seschrock 	(void) close(fd);
3503d7072f8Seschrock 
3513d7072f8Seschrock 	if ((devidstr = devid_str_encode(devid, NULL)) == NULL) {
3523d7072f8Seschrock 		devid_free(devid);
3533d7072f8Seschrock 		return (B_FALSE);
3543d7072f8Seschrock 	}
3553d7072f8Seschrock 
3563d7072f8Seschrock 	len = strlen(devidstr) + 2;
3573d7072f8Seschrock 	fulldevid = alloca(len);
3583d7072f8Seschrock 	(void) snprintf(fulldevid, len, "%s/", devidstr);
3593d7072f8Seschrock 
3603d7072f8Seschrock 	data.dd_compare = fulldevid;
3613d7072f8Seschrock 	data.dd_func = func;
3623d7072f8Seschrock 	data.dd_prop = ZPOOL_CONFIG_DEVID;
3633d7072f8Seschrock 	data.dd_found = B_FALSE;
3643d7072f8Seschrock 	data.dd_isdisk = wholedisk;
3653d7072f8Seschrock 
3663d7072f8Seschrock 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
3673d7072f8Seschrock 
3683d7072f8Seschrock 	devid_str_free(devidstr);
3693d7072f8Seschrock 
3703d7072f8Seschrock 	return (data.dd_found);
3713d7072f8Seschrock }
3723d7072f8Seschrock 
3733d7072f8Seschrock /*
3743d7072f8Seschrock  * This function is called when we receive a devfs add event.  This can be
3753d7072f8Seschrock  * either a disk event or a lofi event, and the behavior is slightly different
3763d7072f8Seschrock  * depending on which it is.
3773d7072f8Seschrock  */
3783d7072f8Seschrock static int
3793d7072f8Seschrock zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
3803d7072f8Seschrock {
3813d7072f8Seschrock 	char *devpath, *devname;
3823d7072f8Seschrock 	char path[PATH_MAX], realpath[PATH_MAX];
3833d7072f8Seschrock 	char *colon, *raw;
3843d7072f8Seschrock 	int ret;
3853d7072f8Seschrock 
3863d7072f8Seschrock 	/*
3873d7072f8Seschrock 	 * The main unit of operation is the physical device path.  For disks,
3883d7072f8Seschrock 	 * this is the device node, as all minor nodes are affected.  For lofi
3893d7072f8Seschrock 	 * devices, this includes the minor path.  Unfortunately, this isn't
3903d7072f8Seschrock 	 * represented in the DEV_PHYS_PATH for various reasons.
3913d7072f8Seschrock 	 */
3923d7072f8Seschrock 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath) != 0)
3933d7072f8Seschrock 		return (-1);
3943d7072f8Seschrock 
3953d7072f8Seschrock 	/*
3963d7072f8Seschrock 	 * If this is a lofi device, then also get the minor instance name.
3973d7072f8Seschrock 	 * Unfortunately, the current payload doesn't include an easy way to get
3983d7072f8Seschrock 	 * this information.  So we cheat by resolving the 'dev_name' (which
3993d7072f8Seschrock 	 * refers to the raw device) and taking the portion between ':(*),raw'.
4003d7072f8Seschrock 	 */
4013d7072f8Seschrock 	(void) strlcpy(realpath, devpath, sizeof (realpath));
4023d7072f8Seschrock 	if (is_lofi) {
4033d7072f8Seschrock 		if (nvlist_lookup_string(nvl, DEV_NAME,
4043d7072f8Seschrock 		    &devname) == 0 &&
4053d7072f8Seschrock 		    (ret = resolvepath(devname, path,
4063d7072f8Seschrock 		    sizeof (path))) > 0) {
4073d7072f8Seschrock 			path[ret] = '\0';
4083d7072f8Seschrock 			colon = strchr(path, ':');
4093d7072f8Seschrock 			if (colon != NULL)
4103d7072f8Seschrock 				raw = strstr(colon + 1, ",raw");
4113d7072f8Seschrock 			if (colon != NULL && raw != NULL) {
4123d7072f8Seschrock 				*raw = '\0';
4133d7072f8Seschrock 				(void) snprintf(realpath,
4143d7072f8Seschrock 				    sizeof (realpath), "%s%s",
4153d7072f8Seschrock 				    devpath, colon);
4163d7072f8Seschrock 				*raw = ',';
4173d7072f8Seschrock 			}
4183d7072f8Seschrock 		}
4193d7072f8Seschrock 	}
4203d7072f8Seschrock 
4213d7072f8Seschrock 	/*
4223d7072f8Seschrock 	 * Iterate over all vdevs with a matching devid, and then those with a
4233d7072f8Seschrock 	 * matching /devices path.  For disks, we only want to pay attention to
4243d7072f8Seschrock 	 * vdevs marked as whole disks.  For lofi, we don't care (because we're
4253d7072f8Seschrock 	 * matching an exact minor name).
4263d7072f8Seschrock 	 */
4273d7072f8Seschrock 	if (!devid_iter(realpath, zfs_process_add, !is_lofi))
4283d7072f8Seschrock 		(void) devpath_iter(realpath, zfs_process_add, !is_lofi);
4293d7072f8Seschrock 
4303d7072f8Seschrock 	return (0);
4313d7072f8Seschrock }
4323d7072f8Seschrock 
4333d7072f8Seschrock /*
4343d7072f8Seschrock  * Called when we receive a VDEV_CHECK event, which indicates a device could not
4353d7072f8Seschrock  * be opened during initial pool open, but the autoreplace property was set on
4363d7072f8Seschrock  * the pool.  In this case, we treat it as if it were an add event.
4373d7072f8Seschrock  */
4383d7072f8Seschrock static int
4393d7072f8Seschrock zfs_deliver_check(nvlist_t *nvl)
4403d7072f8Seschrock {
4413d7072f8Seschrock 	dev_data_t data = { 0 };
4423d7072f8Seschrock 
4433d7072f8Seschrock 	if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID,
4443d7072f8Seschrock 	    &data.dd_pool_guid) != 0 ||
4453d7072f8Seschrock 	    nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID,
4463d7072f8Seschrock 	    &data.dd_vdev_guid) != 0)
4473d7072f8Seschrock 		return (0);
4483d7072f8Seschrock 
4493d7072f8Seschrock 	data.dd_isdisk = B_TRUE;
4503d7072f8Seschrock 	data.dd_func = zfs_process_add;
4513d7072f8Seschrock 
4523d7072f8Seschrock 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
4533d7072f8Seschrock 
4543d7072f8Seschrock 	return (0);
4553d7072f8Seschrock }
4563d7072f8Seschrock 
4573d7072f8Seschrock /*ARGSUSED*/
4583d7072f8Seschrock static int
4593d7072f8Seschrock zfs_deliver_event(sysevent_t *ev, int unused)
4603d7072f8Seschrock {
4613d7072f8Seschrock 	const char *class = sysevent_get_class_name(ev);
4623d7072f8Seschrock 	const char *subclass = sysevent_get_subclass_name(ev);
4633d7072f8Seschrock 	nvlist_t *nvl;
4643d7072f8Seschrock 	int ret;
4653d7072f8Seschrock 	boolean_t is_lofi, is_check;
4663d7072f8Seschrock 
4673d7072f8Seschrock 	if (strcmp(class, EC_DEV_ADD) == 0) {
4683d7072f8Seschrock 		/*
4693d7072f8Seschrock 		 * We're mainly interested in disk additions, but we also listen
4703d7072f8Seschrock 		 * for new lofi devices, to allow for simplified testing.
4713d7072f8Seschrock 		 */
4723d7072f8Seschrock 		if (strcmp(subclass, ESC_DISK) == 0)
4733d7072f8Seschrock 			is_lofi = B_FALSE;
4743d7072f8Seschrock 		else if (strcmp(subclass, ESC_LOFI) == 0)
4753d7072f8Seschrock 			is_lofi = B_TRUE;
4763d7072f8Seschrock 		else
4773d7072f8Seschrock 			return (0);
4783d7072f8Seschrock 
4793d7072f8Seschrock 		is_check = B_FALSE;
4803d7072f8Seschrock 	} else if (strcmp(class, EC_ZFS) == 0 &&
4813d7072f8Seschrock 	    strcmp(subclass, ESC_ZFS_VDEV_CHECK) == 0) {
4823d7072f8Seschrock 		/*
4833d7072f8Seschrock 		 * This event signifies that a device failed to open during pool
4843d7072f8Seschrock 		 * load, but the 'autoreplace' property was set, so we should
4853d7072f8Seschrock 		 * pretend it's just been added.
4863d7072f8Seschrock 		 */
4873d7072f8Seschrock 		is_check = B_TRUE;
4883d7072f8Seschrock 	} else {
4893d7072f8Seschrock 		return (0);
4903d7072f8Seschrock 	}
4913d7072f8Seschrock 
4923d7072f8Seschrock 	if (sysevent_get_attr_list(ev, &nvl) != 0)
4933d7072f8Seschrock 		return (-1);
4943d7072f8Seschrock 
4953d7072f8Seschrock 	if (is_check)
4963d7072f8Seschrock 		ret = zfs_deliver_check(nvl);
4973d7072f8Seschrock 	else
4983d7072f8Seschrock 		ret = zfs_deliver_add(nvl, is_lofi);
4993d7072f8Seschrock 
5003d7072f8Seschrock 
5013d7072f8Seschrock 	nvlist_free(nvl);
5023d7072f8Seschrock 	return (ret);
5033d7072f8Seschrock }
5043d7072f8Seschrock 
5053d7072f8Seschrock static struct slm_mod_ops zfs_mod_ops = {
5063d7072f8Seschrock 	SE_MAJOR_VERSION, SE_MINOR_VERSION, 10, zfs_deliver_event
5073d7072f8Seschrock };
5083d7072f8Seschrock 
5093d7072f8Seschrock struct slm_mod_ops *
5103d7072f8Seschrock slm_init()
5113d7072f8Seschrock {
5123d7072f8Seschrock 	if ((g_zfshdl = libzfs_init()) == NULL)
5133d7072f8Seschrock 		return (NULL);
5143d7072f8Seschrock 
5153d7072f8Seschrock 	return (&zfs_mod_ops);
5163d7072f8Seschrock }
5173d7072f8Seschrock 
5183d7072f8Seschrock void
5193d7072f8Seschrock slm_fini()
5203d7072f8Seschrock {
5213d7072f8Seschrock }
522