1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 2232b87932Sek * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens /* 29fa9e4066Sahrens * This file contains the functions which analyze the status of a pool. This 30fa9e4066Sahrens * include both the status of an active pool, as well as the status exported 31fa9e4066Sahrens * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 32fa9e4066Sahrens * the pool. This status is independent (to a certain degree) from the state of 333d7072f8Seschrock * the pool. A pool's state describes only whether or not it is capable of 34fa9e4066Sahrens * providing the necessary fault tolerance for data. The status describes the 35fa9e4066Sahrens * overall status of devices. A pool that is online can still have a device 36fa9e4066Sahrens * that is experiencing errors. 37fa9e4066Sahrens * 38fa9e4066Sahrens * Only a subset of the possible faults can be detected using 'zpool status', 39fa9e4066Sahrens * and not all possible errors correspond to a FMA message ID. The explanation 40fa9e4066Sahrens * is left up to the caller, depending on whether it is a live pool or an 41fa9e4066Sahrens * import. 42fa9e4066Sahrens */ 43fa9e4066Sahrens 44fa9e4066Sahrens #include <libzfs.h> 45fa9e4066Sahrens #include <string.h> 4695173954Sek #include <unistd.h> 47fa9e4066Sahrens #include "libzfs_impl.h" 48fa9e4066Sahrens 49fa9e4066Sahrens /* 503d7072f8Seschrock * Message ID table. This must be kept in sync with the ZPOOL_STATUS_* defines 51fa9e4066Sahrens * in libzfs.h. Note that there are some status results which go past the end 52fa9e4066Sahrens * of this table, and hence have no associated message ID. 53fa9e4066Sahrens */ 5495173954Sek static char *zfs_msgid_table[] = { 55fa9e4066Sahrens "ZFS-8000-14", 56fa9e4066Sahrens "ZFS-8000-2Q", 57fa9e4066Sahrens "ZFS-8000-3C", 58fa9e4066Sahrens "ZFS-8000-4J", 59fa9e4066Sahrens "ZFS-8000-5E", 60fa9e4066Sahrens "ZFS-8000-6X", 61fa9e4066Sahrens "ZFS-8000-72", 62fa9e4066Sahrens "ZFS-8000-8A", 63fa9e4066Sahrens "ZFS-8000-9P", 6495173954Sek "ZFS-8000-A5", 6532b87932Sek "ZFS-8000-EY", 6632b87932Sek "ZFS-8000-HC", 67*b87f3af3Sperrin "ZFS-8000-JQ", 68*b87f3af3Sperrin "ZFS-8000-K4", 69fa9e4066Sahrens }; 70fa9e4066Sahrens 7195173954Sek #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 72fa9e4066Sahrens 73fa9e4066Sahrens /* ARGSUSED */ 74fa9e4066Sahrens static int 75fa9e4066Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 76fa9e4066Sahrens { 77fa9e4066Sahrens return (state == VDEV_STATE_CANT_OPEN && 78fa9e4066Sahrens aux == VDEV_AUX_OPEN_FAILED); 79fa9e4066Sahrens } 80fa9e4066Sahrens 813d7072f8Seschrock /* ARGSUSED */ 823d7072f8Seschrock static int 833d7072f8Seschrock vdev_faulted(uint64_t state, uint64_t aux, uint64_t errs) 843d7072f8Seschrock { 853d7072f8Seschrock return (state == VDEV_STATE_FAULTED); 863d7072f8Seschrock } 873d7072f8Seschrock 88fa9e4066Sahrens /* ARGSUSED */ 89fa9e4066Sahrens static int 90fa9e4066Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 91fa9e4066Sahrens { 923d7072f8Seschrock return (state == VDEV_STATE_DEGRADED || errs != 0); 93fa9e4066Sahrens } 94fa9e4066Sahrens 95fa9e4066Sahrens /* ARGSUSED */ 96fa9e4066Sahrens static int 97fa9e4066Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 98fa9e4066Sahrens { 99fa9e4066Sahrens return (state == VDEV_STATE_CANT_OPEN); 100fa9e4066Sahrens } 101fa9e4066Sahrens 102fa9e4066Sahrens /* ARGSUSED */ 103fa9e4066Sahrens static int 104fa9e4066Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 105fa9e4066Sahrens { 106fa9e4066Sahrens return (state == VDEV_STATE_OFFLINE); 107fa9e4066Sahrens } 108fa9e4066Sahrens 109fa9e4066Sahrens /* 110fa9e4066Sahrens * Detect if any leaf devices that have seen errors or could not be opened. 111fa9e4066Sahrens */ 11299653d4eSeschrock static boolean_t 113fa9e4066Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 114fa9e4066Sahrens { 115fa9e4066Sahrens nvlist_t **child; 116fa9e4066Sahrens vdev_stat_t *vs; 117fa9e4066Sahrens uint_t c, children; 118fa9e4066Sahrens char *type; 119fa9e4066Sahrens 120fa9e4066Sahrens /* 121fa9e4066Sahrens * Ignore problems within a 'replacing' vdev, since we're presumably in 122fa9e4066Sahrens * the process of repairing any such errors, and don't want to call them 123fa9e4066Sahrens * out again. We'll pick up the fact that a resilver is happening 124fa9e4066Sahrens * later. 125fa9e4066Sahrens */ 126fa9e4066Sahrens verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 127fa9e4066Sahrens if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 12899653d4eSeschrock return (B_FALSE); 129fa9e4066Sahrens 130fa9e4066Sahrens if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 131fa9e4066Sahrens &children) == 0) { 132fa9e4066Sahrens for (c = 0; c < children; c++) 133fa9e4066Sahrens if (find_vdev_problem(child[c], func)) 13499653d4eSeschrock return (B_TRUE); 135fa9e4066Sahrens } else { 136fa9e4066Sahrens verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 137fa9e4066Sahrens (uint64_t **)&vs, &c) == 0); 138fa9e4066Sahrens 139fa9e4066Sahrens if (func(vs->vs_state, vs->vs_aux, 140fa9e4066Sahrens vs->vs_read_errors + 141fa9e4066Sahrens vs->vs_write_errors + 142fa9e4066Sahrens vs->vs_checksum_errors)) 14399653d4eSeschrock return (B_TRUE); 144fa9e4066Sahrens } 145fa9e4066Sahrens 14699653d4eSeschrock return (B_FALSE); 147fa9e4066Sahrens } 148fa9e4066Sahrens 149fa9e4066Sahrens /* 150fa9e4066Sahrens * Active pool health status. 151fa9e4066Sahrens * 152fa9e4066Sahrens * To determine the status for a pool, we make several passes over the config, 153fa9e4066Sahrens * picking the most egregious error we find. In order of importance, we do the 154fa9e4066Sahrens * following: 155fa9e4066Sahrens * 156fa9e4066Sahrens * - Check for a complete and valid configuration 1573d7072f8Seschrock * - Look for any faulted or missing devices in a non-replicated config 158fa9e4066Sahrens * - Check for any data errors 1593d7072f8Seschrock * - Check for any faulted or missing devices in a replicated config 160ea8dc4b6Seschrock * - Look for any devices showing errors 161fa9e4066Sahrens * - Check for any resilvering devices 162fa9e4066Sahrens * 163fa9e4066Sahrens * There can obviously be multiple errors within a single pool, so this routine 164fa9e4066Sahrens * only picks the most damaging of all the current errors to report. 165fa9e4066Sahrens */ 166fa9e4066Sahrens static zpool_status_t 16732b87932Sek check_status(zpool_handle_t *zhp, nvlist_t *config, boolean_t isimport) 168fa9e4066Sahrens { 169fa9e4066Sahrens nvlist_t *nvroot; 170fa9e4066Sahrens vdev_stat_t *vs; 171fa9e4066Sahrens uint_t vsc; 172ea8dc4b6Seschrock uint64_t nerr; 173eaca9bbdSeschrock uint64_t version; 17495173954Sek uint64_t stateval; 17595173954Sek uint64_t hostid = 0; 176fa9e4066Sahrens 177eaca9bbdSeschrock verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 178eaca9bbdSeschrock &version) == 0); 179fa9e4066Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 180fa9e4066Sahrens &nvroot) == 0); 181fa9e4066Sahrens verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 182fa9e4066Sahrens (uint64_t **)&vs, &vsc) == 0); 18395173954Sek verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 18495173954Sek &stateval) == 0); 18595173954Sek (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 18695173954Sek 18795173954Sek /* 18895173954Sek * Pool last accessed by another system. 18995173954Sek */ 19095173954Sek if (hostid != 0 && (unsigned long)hostid != gethostid() && 19195173954Sek stateval == POOL_STATE_ACTIVE) 19295173954Sek return (ZPOOL_STATUS_HOSTID_MISMATCH); 193fa9e4066Sahrens 194eaca9bbdSeschrock /* 195eaca9bbdSeschrock * Newer on-disk version. 196eaca9bbdSeschrock */ 197eaca9bbdSeschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 198eaca9bbdSeschrock vs->vs_aux == VDEV_AUX_VERSION_NEWER) 199eaca9bbdSeschrock return (ZPOOL_STATUS_VERSION_NEWER); 200eaca9bbdSeschrock 201fa9e4066Sahrens /* 202fa9e4066Sahrens * Check that the config is complete. 203fa9e4066Sahrens */ 204fa9e4066Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN && 205ea8dc4b6Seschrock vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 206fa9e4066Sahrens return (ZPOOL_STATUS_BAD_GUID_SUM); 207fa9e4066Sahrens 20832b87932Sek /* 20932b87932Sek * Pool has experienced failed I/O. 21032b87932Sek */ 21132b87932Sek if (stateval == POOL_STATE_IO_FAILURE) { 21232b87932Sek zpool_handle_t *tmp_zhp = NULL; 21332b87932Sek libzfs_handle_t *hdl = NULL; 21432b87932Sek char property[ZPOOL_MAXPROPLEN]; 21532b87932Sek char *failmode = NULL; 21632b87932Sek 21732b87932Sek if (zhp == NULL) { 21832b87932Sek char *poolname; 21932b87932Sek 22032b87932Sek verify(nvlist_lookup_string(config, 22132b87932Sek ZPOOL_CONFIG_POOL_NAME, &poolname) == 0); 22232b87932Sek if ((hdl = libzfs_init()) == NULL) 22332b87932Sek return (ZPOOL_STATUS_IO_FAILURE_WAIT); 22432b87932Sek tmp_zhp = zpool_open_canfail(hdl, poolname); 22532b87932Sek if (tmp_zhp == NULL) { 22632b87932Sek libzfs_fini(hdl); 22732b87932Sek return (ZPOOL_STATUS_IO_FAILURE_WAIT); 22832b87932Sek } 22932b87932Sek } 23032b87932Sek if (zpool_get_prop(zhp ? zhp : tmp_zhp, ZPOOL_PROP_FAILUREMODE, 23132b87932Sek property, sizeof (property), NULL) == 0) 23232b87932Sek failmode = property; 23332b87932Sek if (tmp_zhp != NULL) 23432b87932Sek zpool_close(tmp_zhp); 23532b87932Sek if (hdl != NULL) 23632b87932Sek libzfs_fini(hdl); 23732b87932Sek if (failmode == NULL) 23832b87932Sek return (ZPOOL_STATUS_IO_FAILURE_WAIT); 23932b87932Sek 24032b87932Sek if (strncmp(failmode, "continue", strlen("continue")) == 0) 24132b87932Sek return (ZPOOL_STATUS_IO_FAILURE_CONTINUE); 24232b87932Sek else 24332b87932Sek return (ZPOOL_STATUS_IO_FAILURE_WAIT); 24432b87932Sek } 24532b87932Sek 246*b87f3af3Sperrin /* 247*b87f3af3Sperrin * Could not read a log. 248*b87f3af3Sperrin */ 249*b87f3af3Sperrin if (vs->vs_state == VDEV_STATE_CANT_OPEN && 250*b87f3af3Sperrin vs->vs_aux == VDEV_AUX_BAD_LOG) { 251*b87f3af3Sperrin return (ZPOOL_STATUS_BAD_LOG); 252*b87f3af3Sperrin } 253*b87f3af3Sperrin 254fa9e4066Sahrens /* 2553d7072f8Seschrock * Bad devices in non-replicated config. 256fa9e4066Sahrens */ 2573d7072f8Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 2583d7072f8Seschrock find_vdev_problem(nvroot, vdev_faulted)) 2593d7072f8Seschrock return (ZPOOL_STATUS_FAULTED_DEV_NR); 2603d7072f8Seschrock 261ea8dc4b6Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 262ea8dc4b6Seschrock find_vdev_problem(nvroot, vdev_missing)) 263ea8dc4b6Seschrock return (ZPOOL_STATUS_MISSING_DEV_NR); 264ea8dc4b6Seschrock 265ea8dc4b6Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 266ea8dc4b6Seschrock find_vdev_problem(nvroot, vdev_broken)) 267ea8dc4b6Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 268ea8dc4b6Seschrock 269ea8dc4b6Seschrock /* 270ea8dc4b6Seschrock * Corrupted pool metadata 271ea8dc4b6Seschrock */ 272ea8dc4b6Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 273ea8dc4b6Seschrock vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 274ea8dc4b6Seschrock return (ZPOOL_STATUS_CORRUPT_POOL); 275fa9e4066Sahrens 276fa9e4066Sahrens /* 277ea8dc4b6Seschrock * Persistent data errors. 278fa9e4066Sahrens */ 279ea8dc4b6Seschrock if (!isimport) { 280ea8dc4b6Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 281ea8dc4b6Seschrock &nerr) == 0 && nerr != 0) 282ea8dc4b6Seschrock return (ZPOOL_STATUS_CORRUPT_DATA); 283fa9e4066Sahrens } 284fa9e4066Sahrens 285ea8dc4b6Seschrock /* 286ea8dc4b6Seschrock * Missing devices in a replicated config. 287ea8dc4b6Seschrock */ 2883d7072f8Seschrock if (find_vdev_problem(nvroot, vdev_faulted)) 2893d7072f8Seschrock return (ZPOOL_STATUS_FAULTED_DEV_R); 290ea8dc4b6Seschrock if (find_vdev_problem(nvroot, vdev_missing)) 291ea8dc4b6Seschrock return (ZPOOL_STATUS_MISSING_DEV_R); 292ea8dc4b6Seschrock if (find_vdev_problem(nvroot, vdev_broken)) 293ea8dc4b6Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_R); 294ea8dc4b6Seschrock 295fa9e4066Sahrens /* 296fa9e4066Sahrens * Devices with errors 297fa9e4066Sahrens */ 298fa9e4066Sahrens if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 299fa9e4066Sahrens return (ZPOOL_STATUS_FAILING_DEV); 300fa9e4066Sahrens 301fa9e4066Sahrens /* 302fa9e4066Sahrens * Offlined devices 303fa9e4066Sahrens */ 304fa9e4066Sahrens if (find_vdev_problem(nvroot, vdev_offlined)) 305fa9e4066Sahrens return (ZPOOL_STATUS_OFFLINE_DEV); 306fa9e4066Sahrens 307fa9e4066Sahrens /* 308fa9e4066Sahrens * Currently resilvering 309fa9e4066Sahrens */ 310fa9e4066Sahrens if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 311fa9e4066Sahrens return (ZPOOL_STATUS_RESILVERING); 312fa9e4066Sahrens 313fa9e4066Sahrens /* 314eaca9bbdSeschrock * Outdated, but usable, version 315fa9e4066Sahrens */ 316e7437265Sahrens if (version < SPA_VERSION) 317eaca9bbdSeschrock return (ZPOOL_STATUS_VERSION_OLDER); 318fa9e4066Sahrens 319fa9e4066Sahrens return (ZPOOL_STATUS_OK); 320fa9e4066Sahrens } 321fa9e4066Sahrens 322fa9e4066Sahrens zpool_status_t 323fa9e4066Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid) 324fa9e4066Sahrens { 32532b87932Sek zpool_status_t ret = check_status(zhp, zhp->zpool_config, B_FALSE); 326fa9e4066Sahrens 327fa9e4066Sahrens if (ret >= NMSGID) 328fa9e4066Sahrens *msgid = NULL; 329fa9e4066Sahrens else 3303d7072f8Seschrock *msgid = zfs_msgid_table[ret]; 331fa9e4066Sahrens 332fa9e4066Sahrens return (ret); 333fa9e4066Sahrens } 334fa9e4066Sahrens 335fa9e4066Sahrens zpool_status_t 336fa9e4066Sahrens zpool_import_status(nvlist_t *config, char **msgid) 337fa9e4066Sahrens { 33832b87932Sek zpool_status_t ret = check_status(NULL, config, B_TRUE); 339fa9e4066Sahrens 340fa9e4066Sahrens if (ret >= NMSGID) 341fa9e4066Sahrens *msgid = NULL; 342fa9e4066Sahrens else 34395173954Sek *msgid = zfs_msgid_table[ret]; 344fa9e4066Sahrens 345fa9e4066Sahrens return (ret); 346fa9e4066Sahrens } 347