xref: /illumos-gate/usr/src/cmd/syseventd/modules/zfs_mod/zfs_mod.c (revision b98131cff90a91303826565dacf89c46a422e6c5)
13d7072f8Seschrock /*
23d7072f8Seschrock  * CDDL HEADER START
33d7072f8Seschrock  *
43d7072f8Seschrock  * The contents of this file are subject to the terms of the
53d7072f8Seschrock  * Common Development and Distribution License (the "License").
63d7072f8Seschrock  * You may not use this file except in compliance with the License.
73d7072f8Seschrock  *
83d7072f8Seschrock  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
93d7072f8Seschrock  * or http://www.opensolaris.org/os/licensing.
103d7072f8Seschrock  * See the License for the specific language governing permissions
113d7072f8Seschrock  * and limitations under the License.
123d7072f8Seschrock  *
133d7072f8Seschrock  * When distributing Covered Code, include this CDDL HEADER in each
143d7072f8Seschrock  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
153d7072f8Seschrock  * If applicable, add the following below this CDDL HEADER, with the
163d7072f8Seschrock  * fields enclosed by brackets "[]" replaced with your own identifying
173d7072f8Seschrock  * information: Portions Copyright [yyyy] [name of copyright owner]
183d7072f8Seschrock  *
193d7072f8Seschrock  * CDDL HEADER END
203d7072f8Seschrock  */
213d7072f8Seschrock /*
22*b98131cfSEric Taylor  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
233d7072f8Seschrock  */
243d7072f8Seschrock 
253d7072f8Seschrock /*
263d7072f8Seschrock  * ZFS syseventd module.
273d7072f8Seschrock  *
283d7072f8Seschrock  * The purpose of this module is to identify when devices are added to the
293d7072f8Seschrock  * system, and appropriately online or replace the affected vdevs.
303d7072f8Seschrock  *
313d7072f8Seschrock  * When a device is added to the system:
323d7072f8Seschrock  *
333d7072f8Seschrock  * 	1. Search for any vdevs whose devid matches that of the newly added
343d7072f8Seschrock  *	   device.
353d7072f8Seschrock  *
363d7072f8Seschrock  * 	2. If no vdevs are found, then search for any vdevs whose devfs path
373d7072f8Seschrock  *	   matches that of the new device.
383d7072f8Seschrock  *
393d7072f8Seschrock  *	3. If no vdevs match by either method, then ignore the event.
403d7072f8Seschrock  *
413d7072f8Seschrock  * 	4. Attempt to online the device with a flag to indicate that it should
423d7072f8Seschrock  *	   be unspared when resilvering completes.  If this succeeds, then the
433d7072f8Seschrock  *	   same device was inserted and we should continue normally.
443d7072f8Seschrock  *
453d7072f8Seschrock  *	5. If the pool does not have the 'autoreplace' property set, attempt to
463d7072f8Seschrock  *	   online the device again without the unspare flag, which will
473d7072f8Seschrock  *	   generate a FMA fault.
483d7072f8Seschrock  *
493d7072f8Seschrock  *	6. If the pool has the 'autoreplace' property set, and the matching vdev
503d7072f8Seschrock  *	   is a whole disk, then label the new disk and attempt a 'zpool
513d7072f8Seschrock  *	   replace'.
523d7072f8Seschrock  *
533d7072f8Seschrock  * The module responds to EC_DEV_ADD events for both disks and lofi devices,
543d7072f8Seschrock  * with the latter used for testing.  The special ESC_ZFS_VDEV_CHECK event
553d7072f8Seschrock  * indicates that a device failed to open during pool load, but the autoreplace
563d7072f8Seschrock  * property was set.  In this case, we deferred the associated FMA fault until
573d7072f8Seschrock  * our module had a chance to process the autoreplace logic.  If the device
583d7072f8Seschrock  * could not be replaced, then the second online attempt will trigger the FMA
593d7072f8Seschrock  * fault that we skipped earlier.
603d7072f8Seschrock  */
613d7072f8Seschrock 
623d7072f8Seschrock #include <alloca.h>
633d7072f8Seschrock #include <devid.h>
643d7072f8Seschrock #include <fcntl.h>
653d7072f8Seschrock #include <libnvpair.h>
663d7072f8Seschrock #include <libsysevent.h>
673d7072f8Seschrock #include <libzfs.h>
683d7072f8Seschrock #include <limits.h>
693d7072f8Seschrock #include <stdlib.h>
703d7072f8Seschrock #include <string.h>
713d7072f8Seschrock #include <syslog.h>
723d7072f8Seschrock #include <sys/sunddi.h>
733d7072f8Seschrock #include <sys/sysevent/eventdefs.h>
743d7072f8Seschrock #include <sys/sysevent/dev.h>
753d7072f8Seschrock #include <unistd.h>
76*b98131cfSEric Taylor #include "syseventd.h"
773d7072f8Seschrock 
783d7072f8Seschrock #if defined(__i386) || defined(__amd64)
793d7072f8Seschrock #define	PHYS_PATH	":q"
803d7072f8Seschrock #define	RAW_SLICE	"p0"
813d7072f8Seschrock #elif defined(__sparc)
823d7072f8Seschrock #define	PHYS_PATH	":c"
833d7072f8Seschrock #define	RAW_SLICE	"s2"
843d7072f8Seschrock #else
853d7072f8Seschrock #error Unknown architecture
863d7072f8Seschrock #endif
873d7072f8Seschrock 
883d7072f8Seschrock typedef void (*zfs_process_func_t)(zpool_handle_t *, nvlist_t *, boolean_t);
893d7072f8Seschrock 
903d7072f8Seschrock libzfs_handle_t *g_zfshdl;
913d7072f8Seschrock 
923d7072f8Seschrock /*
933d7072f8Seschrock  * The device associated with the given vdev (either by devid or physical path)
943d7072f8Seschrock  * has been added to the system.  If 'isdisk' is set, then we only attempt a
953d7072f8Seschrock  * replacement if it's a whole disk.  This also implies that we should label the
963d7072f8Seschrock  * disk first.
973d7072f8Seschrock  *
983d7072f8Seschrock  * First, we attempt to online the device (making sure to undo any spare
993d7072f8Seschrock  * operation when finished).  If this succeeds, then we're done.  If it fails,
1003d7072f8Seschrock  * and the new state is VDEV_CANT_OPEN, it indicates that the device was opened,
1013d7072f8Seschrock  * but that the label was not what we expected.  If the 'autoreplace' property
1023d7072f8Seschrock  * is not set, then we relabel the disk (if specified), and attempt a 'zpool
1033d7072f8Seschrock  * replace'.  If the online is successful, but the new state is something else
1043d7072f8Seschrock  * (REMOVED or FAULTED), it indicates that we're out of sync or in some sort of
1053d7072f8Seschrock  * race, and we should avoid attempting to relabel the disk.
1063d7072f8Seschrock  */
1073d7072f8Seschrock static void
1083d7072f8Seschrock zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t isdisk)
1093d7072f8Seschrock {
1103d7072f8Seschrock 	char *path;
1113d7072f8Seschrock 	vdev_state_t newstate;
1123d7072f8Seschrock 	nvlist_t *nvroot, *newvd;
1133d7072f8Seschrock 	uint64_t wholedisk = 0ULL;
114bf82a41bSeschrock 	char *physpath = NULL;
1153d7072f8Seschrock 	char rawpath[PATH_MAX], fullpath[PATH_MAX];
1163d7072f8Seschrock 	size_t len;
1173d7072f8Seschrock 
1183d7072f8Seschrock 	if (nvlist_lookup_string(vdev, ZPOOL_CONFIG_PATH, &path) != 0)
1193d7072f8Seschrock 		return;
1203d7072f8Seschrock 
121bf82a41bSeschrock 	(void) nvlist_lookup_string(vdev, ZPOOL_CONFIG_PHYS_PATH, &physpath);
1223d7072f8Seschrock 	(void) nvlist_lookup_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
1233d7072f8Seschrock 
1243d7072f8Seschrock 	/*
1253d7072f8Seschrock 	 * We should have a way to online a device by guid.  With the current
1263d7072f8Seschrock 	 * interface, we are forced to chop off the 's0' for whole disks.
1273d7072f8Seschrock 	 */
1283d7072f8Seschrock 	(void) strlcpy(fullpath, path, sizeof (fullpath));
1293d7072f8Seschrock 	if (wholedisk)
1303d7072f8Seschrock 		fullpath[strlen(fullpath) - 2] = '\0';
1313d7072f8Seschrock 
1323d7072f8Seschrock 	/*
1333d7072f8Seschrock 	 * Attempt to online the device.  It would be nice to online this by
1343d7072f8Seschrock 	 * GUID, but the current interface only supports lookup by path.
1353d7072f8Seschrock 	 */
1363d7072f8Seschrock 	if (zpool_vdev_online(zhp, fullpath,
1373d7072f8Seschrock 	    ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE, &newstate) == 0 &&
138bf82a41bSeschrock 	    (newstate == VDEV_STATE_HEALTHY || newstate == VDEV_STATE_DEGRADED))
1393d7072f8Seschrock 		return;
1403d7072f8Seschrock 
1413d7072f8Seschrock 	/*
1423d7072f8Seschrock 	 * If the pool doesn't have the autoreplace property set, then attempt a
1433d7072f8Seschrock 	 * true online (without the unspare flag), which will trigger a FMA
1443d7072f8Seschrock 	 * fault.
1453d7072f8Seschrock 	 */
146990b4856Slling 	if (!zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOREPLACE, NULL) ||
1473d7072f8Seschrock 	    (isdisk && !wholedisk)) {
1483d7072f8Seschrock 		(void) zpool_vdev_online(zhp, fullpath, ZFS_ONLINE_FORCEFAULT,
1493d7072f8Seschrock 		    &newstate);
1503d7072f8Seschrock 		return;
1513d7072f8Seschrock 	}
1523d7072f8Seschrock 
1533d7072f8Seschrock 	if (isdisk) {
1543d7072f8Seschrock 		/*
1553d7072f8Seschrock 		 * If this is a request to label a whole disk, then attempt to
1563d7072f8Seschrock 		 * write out the label.  Before we can label the disk, we need
1573d7072f8Seschrock 		 * access to a raw node.  Ideally, we'd like to walk the devinfo
1583d7072f8Seschrock 		 * tree and find a raw node from the corresponding parent node.
1593d7072f8Seschrock 		 * This is overly complicated, and since we know how we labeled
1603d7072f8Seschrock 		 * this device in the first place, we know it's save to switch
1613d7072f8Seschrock 		 * from /dev/dsk to /dev/rdsk and append the backup slice.
162c5904d13Seschrock 		 *
163c5904d13Seschrock 		 * If any part of this process fails, then do a force online to
164c5904d13Seschrock 		 * trigger a ZFS fault for the device (and any hot spare
165c5904d13Seschrock 		 * replacement).
1663d7072f8Seschrock 		 */
167c5904d13Seschrock 		if (strncmp(path, "/dev/dsk/", 9) != 0) {
168c5904d13Seschrock 			(void) zpool_vdev_online(zhp, fullpath,
169c5904d13Seschrock 			    ZFS_ONLINE_FORCEFAULT, &newstate);
1703d7072f8Seschrock 			return;
171c5904d13Seschrock 		}
1723d7072f8Seschrock 
1733d7072f8Seschrock 		(void) strlcpy(rawpath, path + 9, sizeof (rawpath));
1743d7072f8Seschrock 		len = strlen(rawpath);
1753d7072f8Seschrock 		rawpath[len - 2] = '\0';
1763d7072f8Seschrock 
177c5904d13Seschrock 		if (zpool_label_disk(g_zfshdl, zhp, rawpath) != 0) {
178c5904d13Seschrock 			(void) zpool_vdev_online(zhp, fullpath,
179c5904d13Seschrock 			    ZFS_ONLINE_FORCEFAULT, &newstate);
1803d7072f8Seschrock 			return;
181c5904d13Seschrock 		}
1823d7072f8Seschrock 	}
1833d7072f8Seschrock 
1843d7072f8Seschrock 	/*
1853d7072f8Seschrock 	 * Cosntruct the root vdev to pass to zpool_vdev_attach().  While adding
1863d7072f8Seschrock 	 * the entire vdev structure is harmless, we construct a reduced set of
187bf82a41bSeschrock 	 * path/physpath/wholedisk to keep it simple.
1883d7072f8Seschrock 	 */
1893d7072f8Seschrock 	if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0)
1903d7072f8Seschrock 		return;
1913d7072f8Seschrock 
1923d7072f8Seschrock 	if (nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1933d7072f8Seschrock 		nvlist_free(nvroot);
1943d7072f8Seschrock 		return;
1953d7072f8Seschrock 	}
1963d7072f8Seschrock 
1973d7072f8Seschrock 	if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK) != 0 ||
1983d7072f8Seschrock 	    nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0 ||
199bf82a41bSeschrock 	    (physpath != NULL && nvlist_add_string(newvd,
200bf82a41bSeschrock 	    ZPOOL_CONFIG_PHYS_PATH, physpath) != 0) ||
2013d7072f8Seschrock 	    nvlist_add_uint64(newvd, ZPOOL_CONFIG_WHOLE_DISK, wholedisk) != 0 ||
2023d7072f8Seschrock 	    nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0 ||
2033d7072f8Seschrock 	    nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN, &newvd,
2043d7072f8Seschrock 	    1) != 0) {
2053d7072f8Seschrock 		nvlist_free(newvd);
2063d7072f8Seschrock 		nvlist_free(nvroot);
2073d7072f8Seschrock 		return;
2083d7072f8Seschrock 	}
2093d7072f8Seschrock 
2103d7072f8Seschrock 	nvlist_free(newvd);
2113d7072f8Seschrock 
2123d7072f8Seschrock 	(void) zpool_vdev_attach(zhp, fullpath, path, nvroot, B_TRUE);
2133d7072f8Seschrock 
2143d7072f8Seschrock 	nvlist_free(nvroot);
2153d7072f8Seschrock 
2163d7072f8Seschrock }
2173d7072f8Seschrock 
2183d7072f8Seschrock /*
2193d7072f8Seschrock  * Utility functions to find a vdev matching given criteria.
2203d7072f8Seschrock  */
2213d7072f8Seschrock typedef struct dev_data {
2223d7072f8Seschrock 	const char		*dd_compare;
2233d7072f8Seschrock 	const char		*dd_prop;
2243d7072f8Seschrock 	zfs_process_func_t	dd_func;
2253d7072f8Seschrock 	boolean_t		dd_found;
2263d7072f8Seschrock 	boolean_t		dd_isdisk;
2273d7072f8Seschrock 	uint64_t		dd_pool_guid;
2283d7072f8Seschrock 	uint64_t		dd_vdev_guid;
2293d7072f8Seschrock } dev_data_t;
2303d7072f8Seschrock 
2313d7072f8Seschrock static void
2323d7072f8Seschrock zfs_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *data)
2333d7072f8Seschrock {
2343d7072f8Seschrock 	dev_data_t *dp = data;
2353d7072f8Seschrock 	char *path;
2363d7072f8Seschrock 	uint_t c, children;
2373d7072f8Seschrock 	nvlist_t **child;
238b01c3b58Seschrock 	size_t len;
2393d7072f8Seschrock 	uint64_t guid;
2403d7072f8Seschrock 
2413d7072f8Seschrock 	/*
2423d7072f8Seschrock 	 * First iterate over any children.
2433d7072f8Seschrock 	 */
2443d7072f8Seschrock 	if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
2453d7072f8Seschrock 	    &child, &children) == 0) {
2463d7072f8Seschrock 		for (c = 0; c < children; c++)
2473d7072f8Seschrock 			zfs_iter_vdev(zhp, child[c], data);
2483d7072f8Seschrock 		return;
2493d7072f8Seschrock 	}
2503d7072f8Seschrock 
2513d7072f8Seschrock 	if (dp->dd_vdev_guid != 0) {
2523d7072f8Seschrock 		if (nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
2533d7072f8Seschrock 		    &guid) != 0 || guid != dp->dd_vdev_guid)
2543d7072f8Seschrock 			return;
2553d7072f8Seschrock 	} else {
256b01c3b58Seschrock 		len = strlen(dp->dd_compare);
257b01c3b58Seschrock 
2583d7072f8Seschrock 		if (nvlist_lookup_string(nvl, dp->dd_prop, &path) != 0 ||
2593d7072f8Seschrock 		    strncmp(dp->dd_compare, path, len) != 0)
2603d7072f8Seschrock 			return;
2613d7072f8Seschrock 
2623d7072f8Seschrock 		/*
2633d7072f8Seschrock 		 * Normally, we want to have an exact match for the comparison
2643d7072f8Seschrock 		 * string.  However, we allow substring matches in the following
2653d7072f8Seschrock 		 * cases:
2663d7072f8Seschrock 		 *
2673d7072f8Seschrock 		 * 	<path>:		This is a devpath, and the target is one
2683d7072f8Seschrock 		 * 			of its children.
2693d7072f8Seschrock 		 *
2703d7072f8Seschrock 		 * 	<path/>		This is a devid for a whole disk, and
2713d7072f8Seschrock 		 * 			the target is one of its children.
2723d7072f8Seschrock 		 */
2733d7072f8Seschrock 		if (path[len] != '\0' && path[len] != ':' &&
2743d7072f8Seschrock 		    path[len - 1] != '/')
2753d7072f8Seschrock 			return;
2763d7072f8Seschrock 	}
2773d7072f8Seschrock 
2783d7072f8Seschrock 	(dp->dd_func)(zhp, nvl, dp->dd_isdisk);
2793d7072f8Seschrock }
2803d7072f8Seschrock 
2813d7072f8Seschrock static int
2823d7072f8Seschrock zfs_iter_pool(zpool_handle_t *zhp, void *data)
2833d7072f8Seschrock {
2843d7072f8Seschrock 	nvlist_t *config, *nvl;
2853d7072f8Seschrock 	dev_data_t *dp = data;
2863d7072f8Seschrock 	uint64_t pool_guid;
2873d7072f8Seschrock 
2883d7072f8Seschrock 	if ((config = zpool_get_config(zhp, NULL)) != NULL) {
2893d7072f8Seschrock 		if (dp->dd_pool_guid == 0 ||
2903d7072f8Seschrock 		    (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
2913d7072f8Seschrock 		    &pool_guid) == 0 && pool_guid == dp->dd_pool_guid)) {
2923d7072f8Seschrock 			(void) nvlist_lookup_nvlist(config,
2933d7072f8Seschrock 			    ZPOOL_CONFIG_VDEV_TREE, &nvl);
2943d7072f8Seschrock 			zfs_iter_vdev(zhp, nvl, data);
2953d7072f8Seschrock 		}
2963d7072f8Seschrock 	}
2973d7072f8Seschrock 
2983d7072f8Seschrock 	zpool_close(zhp);
2993d7072f8Seschrock 	return (0);
3003d7072f8Seschrock }
3013d7072f8Seschrock 
3023d7072f8Seschrock /*
3033d7072f8Seschrock  * Given a physical device path, iterate over all (pool, vdev) pairs which
3043d7072f8Seschrock  * correspond to the given path.
3053d7072f8Seschrock  */
3063d7072f8Seschrock static boolean_t
3073d7072f8Seschrock devpath_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
3083d7072f8Seschrock {
3093d7072f8Seschrock 	dev_data_t data = { 0 };
3103d7072f8Seschrock 
3113d7072f8Seschrock 	data.dd_compare = devpath;
3123d7072f8Seschrock 	data.dd_func = func;
3133d7072f8Seschrock 	data.dd_prop = ZPOOL_CONFIG_PHYS_PATH;
3143d7072f8Seschrock 	data.dd_found = B_FALSE;
3153d7072f8Seschrock 	data.dd_isdisk = wholedisk;
3163d7072f8Seschrock 
3173d7072f8Seschrock 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
3183d7072f8Seschrock 
3193d7072f8Seschrock 	return (data.dd_found);
3203d7072f8Seschrock }
3213d7072f8Seschrock 
3223d7072f8Seschrock /*
3233d7072f8Seschrock  * Given a /devices path, lookup the corresponding devid for each minor node,
3243d7072f8Seschrock  * and find any vdevs with matching devids.  Doing this straight up would be
3253d7072f8Seschrock  * rather inefficient, O(minor nodes * vdevs in system), so we take advantage of
3263d7072f8Seschrock  * the fact that each devid ends with "/<minornode>".  Once we find any valid
3273d7072f8Seschrock  * minor node, we chop off the portion after the last slash, and then search for
3283d7072f8Seschrock  * matching vdevs, which is O(vdevs in system).
3293d7072f8Seschrock  */
3303d7072f8Seschrock static boolean_t
3313d7072f8Seschrock devid_iter(const char *devpath, zfs_process_func_t func, boolean_t wholedisk)
3323d7072f8Seschrock {
3333d7072f8Seschrock 	size_t len = strlen(devpath) + sizeof ("/devices") +
3343d7072f8Seschrock 	    sizeof (PHYS_PATH) - 1;
3353d7072f8Seschrock 	char *fullpath;
3363d7072f8Seschrock 	int fd;
3373d7072f8Seschrock 	ddi_devid_t devid;
3383d7072f8Seschrock 	char *devidstr, *fulldevid;
3393d7072f8Seschrock 	dev_data_t data = { 0 };
3403d7072f8Seschrock 
3413d7072f8Seschrock 	/*
3423d7072f8Seschrock 	 * Try to open a known minor node.
3433d7072f8Seschrock 	 */
3443d7072f8Seschrock 	fullpath = alloca(len);
3453d7072f8Seschrock 	(void) snprintf(fullpath, len, "/devices%s%s", devpath, PHYS_PATH);
3463d7072f8Seschrock 	if ((fd = open(fullpath, O_RDONLY)) < 0)
3473d7072f8Seschrock 		return (B_FALSE);
3483d7072f8Seschrock 
3493d7072f8Seschrock 	/*
3503d7072f8Seschrock 	 * Determine the devid as a string, with no trailing slash for the minor
3513d7072f8Seschrock 	 * node.
3523d7072f8Seschrock 	 */
3533d7072f8Seschrock 	if (devid_get(fd, &devid) != 0) {
3543d7072f8Seschrock 		(void) close(fd);
3553d7072f8Seschrock 		return (B_FALSE);
3563d7072f8Seschrock 	}
3573d7072f8Seschrock 	(void) close(fd);
3583d7072f8Seschrock 
3593d7072f8Seschrock 	if ((devidstr = devid_str_encode(devid, NULL)) == NULL) {
3603d7072f8Seschrock 		devid_free(devid);
3613d7072f8Seschrock 		return (B_FALSE);
3623d7072f8Seschrock 	}
3633d7072f8Seschrock 
3643d7072f8Seschrock 	len = strlen(devidstr) + 2;
3653d7072f8Seschrock 	fulldevid = alloca(len);
3663d7072f8Seschrock 	(void) snprintf(fulldevid, len, "%s/", devidstr);
3673d7072f8Seschrock 
3683d7072f8Seschrock 	data.dd_compare = fulldevid;
3693d7072f8Seschrock 	data.dd_func = func;
3703d7072f8Seschrock 	data.dd_prop = ZPOOL_CONFIG_DEVID;
3713d7072f8Seschrock 	data.dd_found = B_FALSE;
3723d7072f8Seschrock 	data.dd_isdisk = wholedisk;
3733d7072f8Seschrock 
3743d7072f8Seschrock 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
3753d7072f8Seschrock 
3763d7072f8Seschrock 	devid_str_free(devidstr);
3773d7072f8Seschrock 
3783d7072f8Seschrock 	return (data.dd_found);
3793d7072f8Seschrock }
3803d7072f8Seschrock 
3813d7072f8Seschrock /*
3823d7072f8Seschrock  * This function is called when we receive a devfs add event.  This can be
3833d7072f8Seschrock  * either a disk event or a lofi event, and the behavior is slightly different
3843d7072f8Seschrock  * depending on which it is.
3853d7072f8Seschrock  */
3863d7072f8Seschrock static int
3873d7072f8Seschrock zfs_deliver_add(nvlist_t *nvl, boolean_t is_lofi)
3883d7072f8Seschrock {
3893d7072f8Seschrock 	char *devpath, *devname;
3903d7072f8Seschrock 	char path[PATH_MAX], realpath[PATH_MAX];
3913d7072f8Seschrock 	char *colon, *raw;
3923d7072f8Seschrock 	int ret;
3933d7072f8Seschrock 
3943d7072f8Seschrock 	/*
3953d7072f8Seschrock 	 * The main unit of operation is the physical device path.  For disks,
3963d7072f8Seschrock 	 * this is the device node, as all minor nodes are affected.  For lofi
3973d7072f8Seschrock 	 * devices, this includes the minor path.  Unfortunately, this isn't
3983d7072f8Seschrock 	 * represented in the DEV_PHYS_PATH for various reasons.
3993d7072f8Seschrock 	 */
4003d7072f8Seschrock 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devpath) != 0)
4013d7072f8Seschrock 		return (-1);
4023d7072f8Seschrock 
4033d7072f8Seschrock 	/*
4043d7072f8Seschrock 	 * If this is a lofi device, then also get the minor instance name.
4053d7072f8Seschrock 	 * Unfortunately, the current payload doesn't include an easy way to get
4063d7072f8Seschrock 	 * this information.  So we cheat by resolving the 'dev_name' (which
4073d7072f8Seschrock 	 * refers to the raw device) and taking the portion between ':(*),raw'.
4083d7072f8Seschrock 	 */
4093d7072f8Seschrock 	(void) strlcpy(realpath, devpath, sizeof (realpath));
4103d7072f8Seschrock 	if (is_lofi) {
4113d7072f8Seschrock 		if (nvlist_lookup_string(nvl, DEV_NAME,
4123d7072f8Seschrock 		    &devname) == 0 &&
4133d7072f8Seschrock 		    (ret = resolvepath(devname, path,
4143d7072f8Seschrock 		    sizeof (path))) > 0) {
4153d7072f8Seschrock 			path[ret] = '\0';
4163d7072f8Seschrock 			colon = strchr(path, ':');
4173d7072f8Seschrock 			if (colon != NULL)
4183d7072f8Seschrock 				raw = strstr(colon + 1, ",raw");
4193d7072f8Seschrock 			if (colon != NULL && raw != NULL) {
4203d7072f8Seschrock 				*raw = '\0';
4213d7072f8Seschrock 				(void) snprintf(realpath,
4223d7072f8Seschrock 				    sizeof (realpath), "%s%s",
4233d7072f8Seschrock 				    devpath, colon);
4243d7072f8Seschrock 				*raw = ',';
4253d7072f8Seschrock 			}
4263d7072f8Seschrock 		}
4273d7072f8Seschrock 	}
4283d7072f8Seschrock 
4293d7072f8Seschrock 	/*
4303d7072f8Seschrock 	 * Iterate over all vdevs with a matching devid, and then those with a
4313d7072f8Seschrock 	 * matching /devices path.  For disks, we only want to pay attention to
4323d7072f8Seschrock 	 * vdevs marked as whole disks.  For lofi, we don't care (because we're
4333d7072f8Seschrock 	 * matching an exact minor name).
4343d7072f8Seschrock 	 */
4353d7072f8Seschrock 	if (!devid_iter(realpath, zfs_process_add, !is_lofi))
4363d7072f8Seschrock 		(void) devpath_iter(realpath, zfs_process_add, !is_lofi);
4373d7072f8Seschrock 
4383d7072f8Seschrock 	return (0);
4393d7072f8Seschrock }
4403d7072f8Seschrock 
4413d7072f8Seschrock /*
4423d7072f8Seschrock  * Called when we receive a VDEV_CHECK event, which indicates a device could not
4433d7072f8Seschrock  * be opened during initial pool open, but the autoreplace property was set on
4443d7072f8Seschrock  * the pool.  In this case, we treat it as if it were an add event.
4453d7072f8Seschrock  */
4463d7072f8Seschrock static int
4473d7072f8Seschrock zfs_deliver_check(nvlist_t *nvl)
4483d7072f8Seschrock {
4493d7072f8Seschrock 	dev_data_t data = { 0 };
4503d7072f8Seschrock 
4513d7072f8Seschrock 	if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID,
4523d7072f8Seschrock 	    &data.dd_pool_guid) != 0 ||
4533d7072f8Seschrock 	    nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID,
4543d7072f8Seschrock 	    &data.dd_vdev_guid) != 0)
4553d7072f8Seschrock 		return (0);
4563d7072f8Seschrock 
4573d7072f8Seschrock 	data.dd_isdisk = B_TRUE;
4583d7072f8Seschrock 	data.dd_func = zfs_process_add;
4593d7072f8Seschrock 
4603d7072f8Seschrock 	(void) zpool_iter(g_zfshdl, zfs_iter_pool, &data);
4613d7072f8Seschrock 
4623d7072f8Seschrock 	return (0);
4633d7072f8Seschrock }
4643d7072f8Seschrock 
465*b98131cfSEric Taylor #define	DEVICE_PREFIX	"/devices"
466*b98131cfSEric Taylor 
467*b98131cfSEric Taylor static int
468*b98131cfSEric Taylor zfsdle_vdev_online(zpool_handle_t *zhp, void *data)
469*b98131cfSEric Taylor {
470*b98131cfSEric Taylor 	char *devname = data;
471*b98131cfSEric Taylor 	boolean_t avail_spare, l2cache;
472*b98131cfSEric Taylor 	vdev_state_t newstate;
473*b98131cfSEric Taylor 	nvlist_t *tgt;
474*b98131cfSEric Taylor 
475*b98131cfSEric Taylor 	syseventd_print(9, "zfsdle_vdev_online: searching for %s in pool %s\n",
476*b98131cfSEric Taylor 	    devname, zpool_get_name(zhp));
477*b98131cfSEric Taylor 
478*b98131cfSEric Taylor 	if ((tgt = zpool_find_vdev_by_physpath(zhp, devname,
479*b98131cfSEric Taylor 	    &avail_spare, &l2cache, NULL)) != NULL) {
480*b98131cfSEric Taylor 		char *path, fullpath[MAXPATHLEN];
481*b98131cfSEric Taylor 		uint64_t wholedisk = 0ULL;
482*b98131cfSEric Taylor 
483*b98131cfSEric Taylor 		verify(nvlist_lookup_string(tgt, ZPOOL_CONFIG_PATH,
484*b98131cfSEric Taylor 		    &path) == 0);
485*b98131cfSEric Taylor 		verify(nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_WHOLE_DISK,
486*b98131cfSEric Taylor 		    &wholedisk) == 0);
487*b98131cfSEric Taylor 
488*b98131cfSEric Taylor 		(void) strlcpy(fullpath, path, sizeof (fullpath));
489*b98131cfSEric Taylor 		if (wholedisk)
490*b98131cfSEric Taylor 			fullpath[strlen(fullpath) - 2] = '\0';
491*b98131cfSEric Taylor 
492*b98131cfSEric Taylor 		if (zpool_get_prop_int(zhp, ZPOOL_PROP_AUTOEXPAND, NULL)) {
493*b98131cfSEric Taylor 			syseventd_print(9, "zfsdle_vdev_online: setting device"
494*b98131cfSEric Taylor 			    " device %s to ONLINE state in pool %s.\n",
495*b98131cfSEric Taylor 			    fullpath, zpool_get_name(zhp));
496*b98131cfSEric Taylor 			if (zpool_get_state(zhp) != POOL_STATE_UNAVAIL)
497*b98131cfSEric Taylor 				(void) zpool_vdev_online(zhp, fullpath, 0,
498*b98131cfSEric Taylor 				    &newstate);
499*b98131cfSEric Taylor 		}
500*b98131cfSEric Taylor 		return (1);
501*b98131cfSEric Taylor 	}
502*b98131cfSEric Taylor 	return (0);
503*b98131cfSEric Taylor }
504*b98131cfSEric Taylor 
505*b98131cfSEric Taylor int
506*b98131cfSEric Taylor zfs_deliver_dle(nvlist_t *nvl)
507*b98131cfSEric Taylor {
508*b98131cfSEric Taylor 	char *devname;
509*b98131cfSEric Taylor 	if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &devname) != 0) {
510*b98131cfSEric Taylor 		syseventd_print(9, "zfs_deliver_event: no physpath\n");
511*b98131cfSEric Taylor 		return (-1);
512*b98131cfSEric Taylor 	}
513*b98131cfSEric Taylor 	if (strncmp(devname, DEVICE_PREFIX, strlen(DEVICE_PREFIX)) != 0) {
514*b98131cfSEric Taylor 		syseventd_print(9, "zfs_deliver_event: invalid "
515*b98131cfSEric Taylor 		    "device '%s'", devname);
516*b98131cfSEric Taylor 		return (-1);
517*b98131cfSEric Taylor 	}
518*b98131cfSEric Taylor 
519*b98131cfSEric Taylor 	/*
520*b98131cfSEric Taylor 	 * We try to find the device using the physical
521*b98131cfSEric Taylor 	 * path that has been supplied. We need to strip off
522*b98131cfSEric Taylor 	 * the /devices prefix before starting our search.
523*b98131cfSEric Taylor 	 */
524*b98131cfSEric Taylor 	devname += strlen(DEVICE_PREFIX);
525*b98131cfSEric Taylor 	if (zpool_iter(g_zfshdl, zfsdle_vdev_online, devname) != 1) {
526*b98131cfSEric Taylor 		syseventd_print(9, "zfs_deliver_event: device '%s' not"
527*b98131cfSEric Taylor 		    " found\n", devname);
528*b98131cfSEric Taylor 		return (1);
529*b98131cfSEric Taylor 	}
530*b98131cfSEric Taylor 	nvlist_free(nvl);
531*b98131cfSEric Taylor 	return (0);
532*b98131cfSEric Taylor }
533*b98131cfSEric Taylor 
534*b98131cfSEric Taylor 
5353d7072f8Seschrock /*ARGSUSED*/
5363d7072f8Seschrock static int
5373d7072f8Seschrock zfs_deliver_event(sysevent_t *ev, int unused)
5383d7072f8Seschrock {
5393d7072f8Seschrock 	const char *class = sysevent_get_class_name(ev);
5403d7072f8Seschrock 	const char *subclass = sysevent_get_subclass_name(ev);
5413d7072f8Seschrock 	nvlist_t *nvl;
5423d7072f8Seschrock 	int ret;
543*b98131cfSEric Taylor 	boolean_t is_lofi, is_check, is_dle = B_FALSE;
5443d7072f8Seschrock 
5453d7072f8Seschrock 	if (strcmp(class, EC_DEV_ADD) == 0) {
5463d7072f8Seschrock 		/*
5473d7072f8Seschrock 		 * We're mainly interested in disk additions, but we also listen
5483d7072f8Seschrock 		 * for new lofi devices, to allow for simplified testing.
5493d7072f8Seschrock 		 */
5503d7072f8Seschrock 		if (strcmp(subclass, ESC_DISK) == 0)
5513d7072f8Seschrock 			is_lofi = B_FALSE;
5523d7072f8Seschrock 		else if (strcmp(subclass, ESC_LOFI) == 0)
5533d7072f8Seschrock 			is_lofi = B_TRUE;
5543d7072f8Seschrock 		else
5553d7072f8Seschrock 			return (0);
5563d7072f8Seschrock 
5573d7072f8Seschrock 		is_check = B_FALSE;
5583d7072f8Seschrock 	} else if (strcmp(class, EC_ZFS) == 0 &&
5593d7072f8Seschrock 	    strcmp(subclass, ESC_ZFS_VDEV_CHECK) == 0) {
5603d7072f8Seschrock 		/*
5613d7072f8Seschrock 		 * This event signifies that a device failed to open during pool
5623d7072f8Seschrock 		 * load, but the 'autoreplace' property was set, so we should
5633d7072f8Seschrock 		 * pretend it's just been added.
5643d7072f8Seschrock 		 */
5653d7072f8Seschrock 		is_check = B_TRUE;
566*b98131cfSEric Taylor 	} else if (strcmp(class, EC_DEV_STATUS) == 0 &&
567*b98131cfSEric Taylor 	    strcmp(subclass, ESC_DEV_DLE) == 0) {
568*b98131cfSEric Taylor 		is_dle = B_TRUE;
5693d7072f8Seschrock 	} else {
5703d7072f8Seschrock 		return (0);
5713d7072f8Seschrock 	}
5723d7072f8Seschrock 
5733d7072f8Seschrock 	if (sysevent_get_attr_list(ev, &nvl) != 0)
5743d7072f8Seschrock 		return (-1);
5753d7072f8Seschrock 
576*b98131cfSEric Taylor 	if (is_dle)
577*b98131cfSEric Taylor 		ret = zfs_deliver_dle(nvl);
578*b98131cfSEric Taylor 	else if (is_check)
5793d7072f8Seschrock 		ret = zfs_deliver_check(nvl);
5803d7072f8Seschrock 	else
5813d7072f8Seschrock 		ret = zfs_deliver_add(nvl, is_lofi);
5823d7072f8Seschrock 
5833d7072f8Seschrock 	nvlist_free(nvl);
5843d7072f8Seschrock 	return (ret);
5853d7072f8Seschrock }
5863d7072f8Seschrock 
5873d7072f8Seschrock static struct slm_mod_ops zfs_mod_ops = {
5883d7072f8Seschrock 	SE_MAJOR_VERSION, SE_MINOR_VERSION, 10, zfs_deliver_event
5893d7072f8Seschrock };
5903d7072f8Seschrock 
5913d7072f8Seschrock struct slm_mod_ops *
5923d7072f8Seschrock slm_init()
5933d7072f8Seschrock {
5943d7072f8Seschrock 	if ((g_zfshdl = libzfs_init()) == NULL)
5953d7072f8Seschrock 		return (NULL);
5963d7072f8Seschrock 
5973d7072f8Seschrock 	return (&zfs_mod_ops);
5983d7072f8Seschrock }
5993d7072f8Seschrock 
6003d7072f8Seschrock void
6013d7072f8Seschrock slm_fini()
6023d7072f8Seschrock {
6033d7072f8Seschrock }
604