199653d4eSeschrock /* 299653d4eSeschrock * CDDL HEADER START 399653d4eSeschrock * 499653d4eSeschrock * The contents of this file are subject to the terms of the 599653d4eSeschrock * Common Development and Distribution License (the "License"). 699653d4eSeschrock * You may not use this file except in compliance with the License. 799653d4eSeschrock * 899653d4eSeschrock * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 999653d4eSeschrock * or http://www.opensolaris.org/os/licensing. 1099653d4eSeschrock * See the License for the specific language governing permissions 1199653d4eSeschrock * and limitations under the License. 1299653d4eSeschrock * 1399653d4eSeschrock * When distributing Covered Code, include this CDDL HEADER in each 1499653d4eSeschrock * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 1599653d4eSeschrock * If applicable, add the following below this CDDL HEADER, with the 1699653d4eSeschrock * fields enclosed by brackets "[]" replaced with your own identifying 1799653d4eSeschrock * information: Portions Copyright [yyyy] [name of copyright owner] 1899653d4eSeschrock * 1999653d4eSeschrock * CDDL HEADER END 2099653d4eSeschrock */ 2199653d4eSeschrock /* 22cbf75e67SStephen Hanson * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 2399653d4eSeschrock * Use is subject to license terms. 2499653d4eSeschrock */ 2599653d4eSeschrock 2699653d4eSeschrock /* 2799653d4eSeschrock * The ZFS retire agent is responsible for managing hot spares across all pools. 283d7072f8Seschrock * When we see a device fault or a device removal, we try to open the associated 293d7072f8Seschrock * pool and look for any hot spares. We iterate over any available hot spares 303d7072f8Seschrock * and attempt a 'zpool replace' for each one. 313d7072f8Seschrock * 323d7072f8Seschrock * For vdevs diagnosed as faulty, the agent is also responsible for proactively 333d7072f8Seschrock * marking the vdev FAULTY (for I/O errors) or DEGRADED (for checksum errors). 3499653d4eSeschrock */ 3599653d4eSeschrock 3699653d4eSeschrock #include <fm/fmd_api.h> 3799653d4eSeschrock #include <sys/fs/zfs.h> 3899653d4eSeschrock #include <sys/fm/protocol.h> 3999653d4eSeschrock #include <sys/fm/fs/zfs.h> 4099653d4eSeschrock #include <libzfs.h> 41069f55e2SEric Schrock #include <fm/libtopo.h> 423d7072f8Seschrock #include <string.h> 4399653d4eSeschrock 44069f55e2SEric Schrock typedef struct zfs_retire_repaired { 45069f55e2SEric Schrock struct zfs_retire_repaired *zrr_next; 46069f55e2SEric Schrock uint64_t zrr_pool; 47069f55e2SEric Schrock uint64_t zrr_vdev; 48069f55e2SEric Schrock } zfs_retire_repaired_t; 49069f55e2SEric Schrock 50069f55e2SEric Schrock typedef struct zfs_retire_data { 51069f55e2SEric Schrock libzfs_handle_t *zrd_hdl; 52069f55e2SEric Schrock zfs_retire_repaired_t *zrd_repaired; 53069f55e2SEric Schrock } zfs_retire_data_t; 54069f55e2SEric Schrock 55069f55e2SEric Schrock static void 56069f55e2SEric Schrock zfs_retire_clear_data(fmd_hdl_t *hdl, zfs_retire_data_t *zdp) 57069f55e2SEric Schrock { 58069f55e2SEric Schrock zfs_retire_repaired_t *zrp; 59069f55e2SEric Schrock 60069f55e2SEric Schrock while ((zrp = zdp->zrd_repaired) != NULL) { 61069f55e2SEric Schrock zdp->zrd_repaired = zrp->zrr_next; 62069f55e2SEric Schrock fmd_hdl_free(hdl, zrp, sizeof (zfs_retire_repaired_t)); 63069f55e2SEric Schrock } 64069f55e2SEric Schrock } 65069f55e2SEric Schrock 6699653d4eSeschrock /* 6799653d4eSeschrock * Find a pool with a matching GUID. 6899653d4eSeschrock */ 6999653d4eSeschrock typedef struct find_cbdata { 7099653d4eSeschrock uint64_t cb_guid; 71069f55e2SEric Schrock const char *cb_fru; 7299653d4eSeschrock zpool_handle_t *cb_zhp; 73069f55e2SEric Schrock nvlist_t *cb_vdev; 7499653d4eSeschrock } find_cbdata_t; 7599653d4eSeschrock 7699653d4eSeschrock static int 7799653d4eSeschrock find_pool(zpool_handle_t *zhp, void *data) 7899653d4eSeschrock { 7999653d4eSeschrock find_cbdata_t *cbp = data; 8099653d4eSeschrock 81990b4856Slling if (cbp->cb_guid == 82990b4856Slling zpool_get_prop_int(zhp, ZPOOL_PROP_GUID, NULL)) { 8399653d4eSeschrock cbp->cb_zhp = zhp; 8499653d4eSeschrock return (1); 8599653d4eSeschrock } 8699653d4eSeschrock 8799653d4eSeschrock zpool_close(zhp); 8899653d4eSeschrock return (0); 8999653d4eSeschrock } 9099653d4eSeschrock 9199653d4eSeschrock /* 9299653d4eSeschrock * Find a vdev within a tree with a matching GUID. 9399653d4eSeschrock */ 9499653d4eSeschrock static nvlist_t * 95069f55e2SEric Schrock find_vdev(libzfs_handle_t *zhdl, nvlist_t *nv, const char *search_fru, 96069f55e2SEric Schrock uint64_t search_guid) 9799653d4eSeschrock { 9899653d4eSeschrock uint64_t guid; 9999653d4eSeschrock nvlist_t **child; 10099653d4eSeschrock uint_t c, children; 10199653d4eSeschrock nvlist_t *ret; 102069f55e2SEric Schrock char *fru; 103069f55e2SEric Schrock 104069f55e2SEric Schrock if (search_fru != NULL) { 105069f55e2SEric Schrock if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &fru) == 0 && 106069f55e2SEric Schrock libzfs_fru_compare(zhdl, fru, search_fru)) 107069f55e2SEric Schrock return (nv); 108069f55e2SEric Schrock } else { 109069f55e2SEric Schrock if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 && 110069f55e2SEric Schrock guid == search_guid) 111069f55e2SEric Schrock return (nv); 112069f55e2SEric Schrock } 11399653d4eSeschrock 11499653d4eSeschrock if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, 11599653d4eSeschrock &child, &children) != 0) 11699653d4eSeschrock return (NULL); 11799653d4eSeschrock 11899653d4eSeschrock for (c = 0; c < children; c++) { 119069f55e2SEric Schrock if ((ret = find_vdev(zhdl, child[c], search_fru, 120069f55e2SEric Schrock search_guid)) != NULL) 12199653d4eSeschrock return (ret); 12299653d4eSeschrock } 12399653d4eSeschrock 124c5904d13Seschrock if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE, 125c5904d13Seschrock &child, &children) != 0) 126c5904d13Seschrock return (NULL); 127c5904d13Seschrock 128c5904d13Seschrock for (c = 0; c < children; c++) { 129069f55e2SEric Schrock if ((ret = find_vdev(zhdl, child[c], search_fru, 130069f55e2SEric Schrock search_guid)) != NULL) 131c5904d13Seschrock return (ret); 132c5904d13Seschrock } 133c5904d13Seschrock 13499653d4eSeschrock return (NULL); 13599653d4eSeschrock } 13699653d4eSeschrock 1373d7072f8Seschrock /* 1383d7072f8Seschrock * Given a (pool, vdev) GUID pair, find the matching pool and vdev. 1393d7072f8Seschrock */ 1403d7072f8Seschrock static zpool_handle_t * 1413d7072f8Seschrock find_by_guid(libzfs_handle_t *zhdl, uint64_t pool_guid, uint64_t vdev_guid, 1423d7072f8Seschrock nvlist_t **vdevp) 1433d7072f8Seschrock { 1443d7072f8Seschrock find_cbdata_t cb; 1453d7072f8Seschrock zpool_handle_t *zhp; 1463d7072f8Seschrock nvlist_t *config, *nvroot; 1473d7072f8Seschrock 1483d7072f8Seschrock /* 1493d7072f8Seschrock * Find the corresponding pool and make sure the vdev still exists. 1503d7072f8Seschrock */ 1513d7072f8Seschrock cb.cb_guid = pool_guid; 1523d7072f8Seschrock if (zpool_iter(zhdl, find_pool, &cb) != 1) 1533d7072f8Seschrock return (NULL); 1543d7072f8Seschrock 1553d7072f8Seschrock zhp = cb.cb_zhp; 1563d7072f8Seschrock config = zpool_get_config(zhp, NULL); 1573d7072f8Seschrock if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 1583d7072f8Seschrock &nvroot) != 0) { 1593d7072f8Seschrock zpool_close(zhp); 1603d7072f8Seschrock return (NULL); 1613d7072f8Seschrock } 1623d7072f8Seschrock 163069f55e2SEric Schrock if (vdev_guid != 0) { 164069f55e2SEric Schrock if ((*vdevp = find_vdev(zhdl, nvroot, NULL, 165069f55e2SEric Schrock vdev_guid)) == NULL) { 166069f55e2SEric Schrock zpool_close(zhp); 167069f55e2SEric Schrock return (NULL); 168069f55e2SEric Schrock } 1693d7072f8Seschrock } 1703d7072f8Seschrock 1713d7072f8Seschrock return (zhp); 1723d7072f8Seschrock } 1733d7072f8Seschrock 174069f55e2SEric Schrock static int 175069f55e2SEric Schrock search_pool(zpool_handle_t *zhp, void *data) 176069f55e2SEric Schrock { 177069f55e2SEric Schrock find_cbdata_t *cbp = data; 178069f55e2SEric Schrock nvlist_t *config; 179069f55e2SEric Schrock nvlist_t *nvroot; 180069f55e2SEric Schrock 181069f55e2SEric Schrock config = zpool_get_config(zhp, NULL); 182069f55e2SEric Schrock if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 183069f55e2SEric Schrock &nvroot) != 0) { 184069f55e2SEric Schrock zpool_close(zhp); 185069f55e2SEric Schrock return (0); 186069f55e2SEric Schrock } 187069f55e2SEric Schrock 188069f55e2SEric Schrock if ((cbp->cb_vdev = find_vdev(zpool_get_handle(zhp), nvroot, 189069f55e2SEric Schrock cbp->cb_fru, 0)) != NULL) { 190069f55e2SEric Schrock cbp->cb_zhp = zhp; 191069f55e2SEric Schrock return (1); 192069f55e2SEric Schrock } 193069f55e2SEric Schrock 194069f55e2SEric Schrock zpool_close(zhp); 195069f55e2SEric Schrock return (0); 196069f55e2SEric Schrock } 197069f55e2SEric Schrock 198069f55e2SEric Schrock /* 199069f55e2SEric Schrock * Given a FRU FMRI, find the matching pool and vdev. 200069f55e2SEric Schrock */ 201069f55e2SEric Schrock static zpool_handle_t * 202069f55e2SEric Schrock find_by_fru(libzfs_handle_t *zhdl, const char *fru, nvlist_t **vdevp) 203069f55e2SEric Schrock { 204069f55e2SEric Schrock find_cbdata_t cb; 205069f55e2SEric Schrock 206069f55e2SEric Schrock cb.cb_fru = fru; 207069f55e2SEric Schrock cb.cb_zhp = NULL; 208069f55e2SEric Schrock if (zpool_iter(zhdl, search_pool, &cb) != 1) 209069f55e2SEric Schrock return (NULL); 210069f55e2SEric Schrock 211069f55e2SEric Schrock *vdevp = cb.cb_vdev; 212069f55e2SEric Schrock return (cb.cb_zhp); 213069f55e2SEric Schrock } 214069f55e2SEric Schrock 2153d7072f8Seschrock /* 2163d7072f8Seschrock * Given a vdev, attempt to replace it with every known spare until one 2173d7072f8Seschrock * succeeds. 2183d7072f8Seschrock */ 2193d7072f8Seschrock static void 220069f55e2SEric Schrock replace_with_spare(fmd_hdl_t *hdl, zpool_handle_t *zhp, nvlist_t *vdev) 2213d7072f8Seschrock { 2223d7072f8Seschrock nvlist_t *config, *nvroot, *replacement; 2233d7072f8Seschrock nvlist_t **spares; 2243d7072f8Seschrock uint_t s, nspares; 2253d7072f8Seschrock char *dev_name; 2263d7072f8Seschrock 2273d7072f8Seschrock config = zpool_get_config(zhp, NULL); 2283d7072f8Seschrock if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 2293d7072f8Seschrock &nvroot) != 0) 2303d7072f8Seschrock return; 2313d7072f8Seschrock 2323d7072f8Seschrock /* 2333d7072f8Seschrock * Find out if there are any hot spares available in the pool. 2343d7072f8Seschrock */ 2353d7072f8Seschrock if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, 2363d7072f8Seschrock &spares, &nspares) != 0) 2373d7072f8Seschrock return; 2383d7072f8Seschrock 239069f55e2SEric Schrock replacement = fmd_nvl_alloc(hdl, FMD_SLEEP); 2403d7072f8Seschrock 241069f55e2SEric Schrock (void) nvlist_add_string(replacement, ZPOOL_CONFIG_TYPE, 242069f55e2SEric Schrock VDEV_TYPE_ROOT); 2433d7072f8Seschrock 24488ecc943SGeorge Wilson dev_name = zpool_vdev_name(NULL, zhp, vdev, B_FALSE); 2453d7072f8Seschrock 2463d7072f8Seschrock /* 2473d7072f8Seschrock * Try to replace each spare, ending when we successfully 2483d7072f8Seschrock * replace it. 2493d7072f8Seschrock */ 2503d7072f8Seschrock for (s = 0; s < nspares; s++) { 2513d7072f8Seschrock char *spare_name; 2523d7072f8Seschrock 2533d7072f8Seschrock if (nvlist_lookup_string(spares[s], ZPOOL_CONFIG_PATH, 2543d7072f8Seschrock &spare_name) != 0) 2553d7072f8Seschrock continue; 2563d7072f8Seschrock 257069f55e2SEric Schrock (void) nvlist_add_nvlist_array(replacement, 258069f55e2SEric Schrock ZPOOL_CONFIG_CHILDREN, &spares[s], 1); 2593d7072f8Seschrock 2603d7072f8Seschrock if (zpool_vdev_attach(zhp, dev_name, spare_name, 2613d7072f8Seschrock replacement, B_TRUE) == 0) 2623d7072f8Seschrock break; 2633d7072f8Seschrock } 2643d7072f8Seschrock 2653d7072f8Seschrock free(dev_name); 2663d7072f8Seschrock nvlist_free(replacement); 2673d7072f8Seschrock } 2683d7072f8Seschrock 269069f55e2SEric Schrock /* 270069f55e2SEric Schrock * Repair this vdev if we had diagnosed a 'fault.fs.zfs.device' and 271069f55e2SEric Schrock * ASRU is now usable. ZFS has found the device to be present and 272069f55e2SEric Schrock * functioning. 273069f55e2SEric Schrock */ 274069f55e2SEric Schrock /*ARGSUSED*/ 275069f55e2SEric Schrock void 276069f55e2SEric Schrock zfs_vdev_repair(fmd_hdl_t *hdl, nvlist_t *nvl) 277069f55e2SEric Schrock { 278069f55e2SEric Schrock zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl); 279069f55e2SEric Schrock zfs_retire_repaired_t *zrp; 280069f55e2SEric Schrock uint64_t pool_guid, vdev_guid; 281069f55e2SEric Schrock nvlist_t *asru; 282069f55e2SEric Schrock 283069f55e2SEric Schrock if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 284069f55e2SEric Schrock &pool_guid) != 0 || nvlist_lookup_uint64(nvl, 285069f55e2SEric Schrock FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, &vdev_guid) != 0) 286069f55e2SEric Schrock return; 287069f55e2SEric Schrock 288069f55e2SEric Schrock /* 289069f55e2SEric Schrock * Before checking the state of the ASRU, go through and see if we've 290069f55e2SEric Schrock * already made an attempt to repair this ASRU. This list is cleared 291069f55e2SEric Schrock * whenever we receive any kind of list event, and is designed to 292069f55e2SEric Schrock * prevent us from generating a feedback loop when we attempt repairs 293069f55e2SEric Schrock * against a faulted pool. The problem is that checking the unusable 294069f55e2SEric Schrock * state of the ASRU can involve opening the pool, which can post 295069f55e2SEric Schrock * statechange events but otherwise leave the pool in the faulted 296069f55e2SEric Schrock * state. This list allows us to detect when a statechange event is 297069f55e2SEric Schrock * due to our own request. 298069f55e2SEric Schrock */ 299069f55e2SEric Schrock for (zrp = zdp->zrd_repaired; zrp != NULL; zrp = zrp->zrr_next) { 300069f55e2SEric Schrock if (zrp->zrr_pool == pool_guid && 301069f55e2SEric Schrock zrp->zrr_vdev == vdev_guid) 302069f55e2SEric Schrock return; 303069f55e2SEric Schrock } 304069f55e2SEric Schrock 305069f55e2SEric Schrock asru = fmd_nvl_alloc(hdl, FMD_SLEEP); 306069f55e2SEric Schrock 307069f55e2SEric Schrock (void) nvlist_add_uint8(asru, FM_VERSION, ZFS_SCHEME_VERSION0); 308069f55e2SEric Schrock (void) nvlist_add_string(asru, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS); 309069f55e2SEric Schrock (void) nvlist_add_uint64(asru, FM_FMRI_ZFS_POOL, pool_guid); 310069f55e2SEric Schrock (void) nvlist_add_uint64(asru, FM_FMRI_ZFS_VDEV, vdev_guid); 311069f55e2SEric Schrock 312069f55e2SEric Schrock /* 313069f55e2SEric Schrock * We explicitly check for the unusable state here to make sure we 314069f55e2SEric Schrock * aren't responding to a transient state change. As part of opening a 315069f55e2SEric Schrock * vdev, it's possible to see the 'statechange' event, only to be 316069f55e2SEric Schrock * followed by a vdev failure later. If we don't check the current 317069f55e2SEric Schrock * state of the vdev (or pool) before marking it repaired, then we risk 318069f55e2SEric Schrock * generating spurious repair events followed immediately by the same 319069f55e2SEric Schrock * diagnosis. 320069f55e2SEric Schrock * 321069f55e2SEric Schrock * This assumes that the ZFS scheme code associated unusable (i.e. 322069f55e2SEric Schrock * isolated) with its own definition of faulty state. In the case of a 323069f55e2SEric Schrock * DEGRADED leaf vdev (due to checksum errors), this is not the case. 324069f55e2SEric Schrock * This works, however, because the transient state change is not 325069f55e2SEric Schrock * posted in this case. This could be made more explicit by not 326069f55e2SEric Schrock * relying on the scheme's unusable callback and instead directly 327069f55e2SEric Schrock * checking the vdev state, where we could correctly account for 328069f55e2SEric Schrock * DEGRADED state. 329069f55e2SEric Schrock */ 330069f55e2SEric Schrock if (!fmd_nvl_fmri_unusable(hdl, asru) && fmd_nvl_fmri_has_fault(hdl, 331069f55e2SEric Schrock asru, FMD_HAS_FAULT_ASRU, NULL)) { 332069f55e2SEric Schrock topo_hdl_t *thp; 333069f55e2SEric Schrock char *fmri = NULL; 334069f55e2SEric Schrock int err; 335069f55e2SEric Schrock 336069f55e2SEric Schrock thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION); 337069f55e2SEric Schrock if (topo_fmri_nvl2str(thp, asru, &fmri, &err) == 0) 338eb04386eSEric Schrock (void) fmd_repair_asru(hdl, fmri); 339069f55e2SEric Schrock fmd_hdl_topo_rele(hdl, thp); 340069f55e2SEric Schrock 341069f55e2SEric Schrock topo_hdl_strfree(thp, fmri); 342069f55e2SEric Schrock } 343069f55e2SEric Schrock 344069f55e2SEric Schrock zrp = fmd_hdl_alloc(hdl, sizeof (zfs_retire_repaired_t), FMD_SLEEP); 345069f55e2SEric Schrock zrp->zrr_next = zdp->zrd_repaired; 346069f55e2SEric Schrock zrp->zrr_pool = pool_guid; 347069f55e2SEric Schrock zrp->zrr_vdev = vdev_guid; 348069f55e2SEric Schrock zdp->zrd_repaired = zrp; 349069f55e2SEric Schrock } 350069f55e2SEric Schrock 35199653d4eSeschrock /*ARGSUSED*/ 35299653d4eSeschrock static void 35399653d4eSeschrock zfs_retire_recv(fmd_hdl_t *hdl, fmd_event_t *ep, nvlist_t *nvl, 35499653d4eSeschrock const char *class) 35599653d4eSeschrock { 35699653d4eSeschrock uint64_t pool_guid, vdev_guid; 35799653d4eSeschrock zpool_handle_t *zhp; 358069f55e2SEric Schrock nvlist_t *resource, *fault, *fru; 3593d7072f8Seschrock nvlist_t **faults; 3603d7072f8Seschrock uint_t f, nfaults; 361069f55e2SEric Schrock zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl); 362069f55e2SEric Schrock libzfs_handle_t *zhdl = zdp->zrd_hdl; 3633d7072f8Seschrock boolean_t fault_device, degrade_device; 3643d7072f8Seschrock boolean_t is_repair; 365069f55e2SEric Schrock char *scheme, *fmri; 3663d7072f8Seschrock nvlist_t *vdev; 36725c6ff4bSstephh char *uuid; 36825c6ff4bSstephh int repair_done = 0; 369cbf75e67SStephen Hanson boolean_t retire; 370069f55e2SEric Schrock boolean_t is_disk; 371069f55e2SEric Schrock vdev_aux_t aux; 372069f55e2SEric Schrock topo_hdl_t *thp; 373069f55e2SEric Schrock int err; 3743d7072f8Seschrock 3753d7072f8Seschrock /* 3763d7072f8Seschrock * If this is a resource notifying us of device removal, then simply 3773d7072f8Seschrock * check for an available spare and continue. 3783d7072f8Seschrock */ 3793d7072f8Seschrock if (strcmp(class, "resource.fs.zfs.removed") == 0) { 3803d7072f8Seschrock if (nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, 3813d7072f8Seschrock &pool_guid) != 0 || 3823d7072f8Seschrock nvlist_lookup_uint64(nvl, FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, 3833d7072f8Seschrock &vdev_guid) != 0) 3843d7072f8Seschrock return; 3853d7072f8Seschrock 3863d7072f8Seschrock if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 3873d7072f8Seschrock &vdev)) == NULL) 3883d7072f8Seschrock return; 3893d7072f8Seschrock 3903d7072f8Seschrock if (fmd_prop_get_int32(hdl, "spare_on_remove")) 391069f55e2SEric Schrock replace_with_spare(hdl, zhp, vdev); 3923d7072f8Seschrock zpool_close(zhp); 3933d7072f8Seschrock return; 3943d7072f8Seschrock } 3953d7072f8Seschrock 396cbf75e67SStephen Hanson if (strcmp(class, FM_LIST_RESOLVED_CLASS) == 0) 397cbf75e67SStephen Hanson return; 398cbf75e67SStephen Hanson 399069f55e2SEric Schrock if (strcmp(class, "resource.fs.zfs.statechange") == 0 || 400069f55e2SEric Schrock strcmp(class, 401069f55e2SEric Schrock "resource.sysevent.EC_zfs.ESC_ZFS_vdev_remove") == 0) { 402069f55e2SEric Schrock zfs_vdev_repair(hdl, nvl); 403069f55e2SEric Schrock return; 404069f55e2SEric Schrock } 405069f55e2SEric Schrock 406069f55e2SEric Schrock zfs_retire_clear_data(hdl, zdp); 407069f55e2SEric Schrock 40825c6ff4bSstephh if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0) 4093d7072f8Seschrock is_repair = B_TRUE; 4103d7072f8Seschrock else 4113d7072f8Seschrock is_repair = B_FALSE; 41299653d4eSeschrock 41399653d4eSeschrock /* 4143d7072f8Seschrock * We subscribe to zfs faults as well as all repair events. 41599653d4eSeschrock */ 41699653d4eSeschrock if (nvlist_lookup_nvlist_array(nvl, FM_SUSPECT_FAULT_LIST, 41799653d4eSeschrock &faults, &nfaults) != 0) 41899653d4eSeschrock return; 41999653d4eSeschrock 42099653d4eSeschrock for (f = 0; f < nfaults; f++) { 4213d7072f8Seschrock fault = faults[f]; 4223d7072f8Seschrock 4233d7072f8Seschrock fault_device = B_FALSE; 4243d7072f8Seschrock degrade_device = B_FALSE; 425069f55e2SEric Schrock is_disk = B_FALSE; 42699653d4eSeschrock 427cbf75e67SStephen Hanson if (nvlist_lookup_boolean_value(fault, FM_SUSPECT_RETIRE, 428cbf75e67SStephen Hanson &retire) == 0 && retire == 0) 429cbf75e67SStephen Hanson continue; 430cbf75e67SStephen Hanson 43199653d4eSeschrock /* 4323d7072f8Seschrock * While we subscribe to fault.fs.zfs.*, we only take action 4333d7072f8Seschrock * for faults targeting a specific vdev (open failure or SERD 434069f55e2SEric Schrock * failure). We also subscribe to fault.io.* events, so that 435069f55e2SEric Schrock * faulty disks will be faulted in the ZFS configuration. 43699653d4eSeschrock */ 437069f55e2SEric Schrock if (fmd_nvl_class_match(hdl, fault, "fault.fs.zfs.vdev.io")) { 4383d7072f8Seschrock fault_device = B_TRUE; 439069f55e2SEric Schrock } else if (fmd_nvl_class_match(hdl, fault, 440069f55e2SEric Schrock "fault.fs.zfs.vdev.checksum")) { 4413d7072f8Seschrock degrade_device = B_TRUE; 442069f55e2SEric Schrock } else if (fmd_nvl_class_match(hdl, fault, 443069f55e2SEric Schrock "fault.fs.zfs.device")) { 4443d7072f8Seschrock fault_device = B_FALSE; 445069f55e2SEric Schrock } else if (fmd_nvl_class_match(hdl, fault, "fault.io.*")) { 446069f55e2SEric Schrock is_disk = B_TRUE; 447069f55e2SEric Schrock fault_device = B_TRUE; 448069f55e2SEric Schrock } else { 44999653d4eSeschrock continue; 450069f55e2SEric Schrock } 45199653d4eSeschrock 452069f55e2SEric Schrock if (is_disk) { 453069f55e2SEric Schrock /* 454069f55e2SEric Schrock * This is a disk fault. Lookup the FRU, convert it to 455069f55e2SEric Schrock * an FMRI string, and attempt to find a matching vdev. 456069f55e2SEric Schrock */ 457069f55e2SEric Schrock if (nvlist_lookup_nvlist(fault, FM_FAULT_FRU, 458069f55e2SEric Schrock &fru) != 0 || 459069f55e2SEric Schrock nvlist_lookup_string(fru, FM_FMRI_SCHEME, 460069f55e2SEric Schrock &scheme) != 0) 461069f55e2SEric Schrock continue; 462069f55e2SEric Schrock 463069f55e2SEric Schrock if (strcmp(scheme, FM_FMRI_SCHEME_HC) != 0) 464069f55e2SEric Schrock continue; 465069f55e2SEric Schrock 466069f55e2SEric Schrock thp = fmd_hdl_topo_hold(hdl, TOPO_VERSION); 467069f55e2SEric Schrock if (topo_fmri_nvl2str(thp, fru, &fmri, &err) != 0) { 468069f55e2SEric Schrock fmd_hdl_topo_rele(hdl, thp); 469069f55e2SEric Schrock continue; 470069f55e2SEric Schrock } 471069f55e2SEric Schrock 472069f55e2SEric Schrock zhp = find_by_fru(zhdl, fmri, &vdev); 473069f55e2SEric Schrock topo_hdl_strfree(thp, fmri); 474069f55e2SEric Schrock fmd_hdl_topo_rele(hdl, thp); 475069f55e2SEric Schrock 476069f55e2SEric Schrock if (zhp == NULL) 477069f55e2SEric Schrock continue; 478069f55e2SEric Schrock 479069f55e2SEric Schrock (void) nvlist_lookup_uint64(vdev, 480069f55e2SEric Schrock ZPOOL_CONFIG_GUID, &vdev_guid); 481069f55e2SEric Schrock aux = VDEV_AUX_EXTERNAL; 482069f55e2SEric Schrock } else { 483069f55e2SEric Schrock /* 484069f55e2SEric Schrock * This is a ZFS fault. Lookup the resource, and 485069f55e2SEric Schrock * attempt to find the matching vdev. 486069f55e2SEric Schrock */ 487069f55e2SEric Schrock if (nvlist_lookup_nvlist(fault, FM_FAULT_RESOURCE, 488069f55e2SEric Schrock &resource) != 0 || 489069f55e2SEric Schrock nvlist_lookup_string(resource, FM_FMRI_SCHEME, 490069f55e2SEric Schrock &scheme) != 0) 491069f55e2SEric Schrock continue; 492069f55e2SEric Schrock 493069f55e2SEric Schrock if (strcmp(scheme, FM_FMRI_SCHEME_ZFS) != 0) 494069f55e2SEric Schrock continue; 495069f55e2SEric Schrock 496069f55e2SEric Schrock if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_POOL, 497069f55e2SEric Schrock &pool_guid) != 0) 498069f55e2SEric Schrock continue; 499069f55e2SEric Schrock 500069f55e2SEric Schrock if (nvlist_lookup_uint64(resource, FM_FMRI_ZFS_VDEV, 501069f55e2SEric Schrock &vdev_guid) != 0) { 502069f55e2SEric Schrock if (is_repair) 503069f55e2SEric Schrock vdev_guid = 0; 504069f55e2SEric Schrock else 505069f55e2SEric Schrock continue; 506069f55e2SEric Schrock } 507069f55e2SEric Schrock 508069f55e2SEric Schrock if ((zhp = find_by_guid(zhdl, pool_guid, vdev_guid, 509069f55e2SEric Schrock &vdev)) == NULL) 510069f55e2SEric Schrock continue; 511069f55e2SEric Schrock 512069f55e2SEric Schrock aux = VDEV_AUX_ERR_EXCEEDED; 513069f55e2SEric Schrock } 51499653d4eSeschrock 515069f55e2SEric Schrock if (vdev_guid == 0) { 516069f55e2SEric Schrock /* 517069f55e2SEric Schrock * For pool-level repair events, clear the entire pool. 518069f55e2SEric Schrock */ 519*468c413aSTim Haley (void) zpool_clear(zhp, NULL, NULL); 520069f55e2SEric Schrock zpool_close(zhp); 52199653d4eSeschrock continue; 522069f55e2SEric Schrock } 52399653d4eSeschrock 5243d7072f8Seschrock /* 5253d7072f8Seschrock * If this is a repair event, then mark the vdev as repaired and 5263d7072f8Seschrock * continue. 5273d7072f8Seschrock */ 5283d7072f8Seschrock if (is_repair) { 52925c6ff4bSstephh repair_done = 1; 5303d7072f8Seschrock (void) zpool_vdev_clear(zhp, vdev_guid); 53199653d4eSeschrock zpool_close(zhp); 53299653d4eSeschrock continue; 53399653d4eSeschrock } 53499653d4eSeschrock 53599653d4eSeschrock /* 5363d7072f8Seschrock * Actively fault the device if needed. 53799653d4eSeschrock */ 5383d7072f8Seschrock if (fault_device) 539069f55e2SEric Schrock (void) zpool_vdev_fault(zhp, vdev_guid, aux); 5403d7072f8Seschrock if (degrade_device) 541069f55e2SEric Schrock (void) zpool_vdev_degrade(zhp, vdev_guid, aux); 54299653d4eSeschrock 5433d7072f8Seschrock /* 5443d7072f8Seschrock * Attempt to substitute a hot spare. 5453d7072f8Seschrock */ 546069f55e2SEric Schrock replace_with_spare(hdl, zhp, vdev); 54799653d4eSeschrock zpool_close(zhp); 54899653d4eSeschrock } 54925c6ff4bSstephh 55025c6ff4bSstephh if (strcmp(class, FM_LIST_REPAIRED_CLASS) == 0 && repair_done && 55125c6ff4bSstephh nvlist_lookup_string(nvl, FM_SUSPECT_UUID, &uuid) == 0) 55225c6ff4bSstephh fmd_case_uuresolved(hdl, uuid); 55399653d4eSeschrock } 55499653d4eSeschrock 55599653d4eSeschrock static const fmd_hdl_ops_t fmd_ops = { 55699653d4eSeschrock zfs_retire_recv, /* fmdo_recv */ 55799653d4eSeschrock NULL, /* fmdo_timeout */ 55899653d4eSeschrock NULL, /* fmdo_close */ 55999653d4eSeschrock NULL, /* fmdo_stats */ 56099653d4eSeschrock NULL, /* fmdo_gc */ 56199653d4eSeschrock }; 56299653d4eSeschrock 56399653d4eSeschrock static const fmd_prop_t fmd_props[] = { 5643d7072f8Seschrock { "spare_on_remove", FMD_TYPE_BOOL, "true" }, 56599653d4eSeschrock { NULL, 0, NULL } 56699653d4eSeschrock }; 56799653d4eSeschrock 56899653d4eSeschrock static const fmd_hdl_info_t fmd_info = { 56999653d4eSeschrock "ZFS Retire Agent", "1.0", &fmd_ops, fmd_props 57099653d4eSeschrock }; 57199653d4eSeschrock 57299653d4eSeschrock void 57399653d4eSeschrock _fmd_init(fmd_hdl_t *hdl) 57499653d4eSeschrock { 575069f55e2SEric Schrock zfs_retire_data_t *zdp; 57699653d4eSeschrock libzfs_handle_t *zhdl; 57799653d4eSeschrock 57899653d4eSeschrock if ((zhdl = libzfs_init()) == NULL) 57999653d4eSeschrock return; 58099653d4eSeschrock 58199653d4eSeschrock if (fmd_hdl_register(hdl, FMD_API_VERSION, &fmd_info) != 0) { 58299653d4eSeschrock libzfs_fini(zhdl); 58399653d4eSeschrock return; 58499653d4eSeschrock } 58599653d4eSeschrock 586069f55e2SEric Schrock zdp = fmd_hdl_zalloc(hdl, sizeof (zfs_retire_data_t), FMD_SLEEP); 587069f55e2SEric Schrock zdp->zrd_hdl = zhdl; 588069f55e2SEric Schrock 589069f55e2SEric Schrock fmd_hdl_setspecific(hdl, zdp); 59099653d4eSeschrock } 59199653d4eSeschrock 59299653d4eSeschrock void 59399653d4eSeschrock _fmd_fini(fmd_hdl_t *hdl) 59499653d4eSeschrock { 595069f55e2SEric Schrock zfs_retire_data_t *zdp = fmd_hdl_getspecific(hdl); 59699653d4eSeschrock 597069f55e2SEric Schrock if (zdp != NULL) { 598069f55e2SEric Schrock zfs_retire_clear_data(hdl, zdp); 599069f55e2SEric Schrock libzfs_fini(zdp->zrd_hdl); 600069f55e2SEric Schrock fmd_hdl_free(hdl, zdp, sizeof (zfs_retire_data_t)); 601069f55e2SEric Schrock } 60299653d4eSeschrock } 603