1fa9e4066Sahrens /* 2fa9e4066Sahrens * CDDL HEADER START 3fa9e4066Sahrens * 4fa9e4066Sahrens * The contents of this file are subject to the terms of the 5ea8dc4b6Seschrock * Common Development and Distribution License (the "License"). 6ea8dc4b6Seschrock * You may not use this file except in compliance with the License. 7fa9e4066Sahrens * 8fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 10fa9e4066Sahrens * See the License for the specific language governing permissions 11fa9e4066Sahrens * and limitations under the License. 12fa9e4066Sahrens * 13fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 14fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 16fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 17fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 18fa9e4066Sahrens * 19fa9e4066Sahrens * CDDL HEADER END 20fa9e4066Sahrens */ 21fa9e4066Sahrens /* 22*95173954Sek * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23fa9e4066Sahrens * Use is subject to license terms. 24fa9e4066Sahrens */ 25fa9e4066Sahrens 26fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 27fa9e4066Sahrens 28fa9e4066Sahrens /* 29fa9e4066Sahrens * This file contains the functions which analyze the status of a pool. This 30fa9e4066Sahrens * include both the status of an active pool, as well as the status exported 31fa9e4066Sahrens * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 32fa9e4066Sahrens * the pool. This status is independent (to a certain degree) from the state of 33fa9e4066Sahrens * the pool. A pool's state descsribes only whether or not it is capable of 34fa9e4066Sahrens * providing the necessary fault tolerance for data. The status describes the 35fa9e4066Sahrens * overall status of devices. A pool that is online can still have a device 36fa9e4066Sahrens * that is experiencing errors. 37fa9e4066Sahrens * 38fa9e4066Sahrens * Only a subset of the possible faults can be detected using 'zpool status', 39fa9e4066Sahrens * and not all possible errors correspond to a FMA message ID. The explanation 40fa9e4066Sahrens * is left up to the caller, depending on whether it is a live pool or an 41fa9e4066Sahrens * import. 42fa9e4066Sahrens */ 43fa9e4066Sahrens 44fa9e4066Sahrens #include <libzfs.h> 45fa9e4066Sahrens #include <string.h> 46*95173954Sek #include <unistd.h> 47fa9e4066Sahrens #include "libzfs_impl.h" 48fa9e4066Sahrens 49fa9e4066Sahrens /* 50fa9e4066Sahrens * Message ID table. This must be kep in sync with the ZPOOL_STATUS_* defines 51fa9e4066Sahrens * in libzfs.h. Note that there are some status results which go past the end 52fa9e4066Sahrens * of this table, and hence have no associated message ID. 53fa9e4066Sahrens */ 54*95173954Sek static char *zfs_msgid_table[] = { 55fa9e4066Sahrens "ZFS-8000-14", 56fa9e4066Sahrens "ZFS-8000-2Q", 57fa9e4066Sahrens "ZFS-8000-3C", 58fa9e4066Sahrens "ZFS-8000-4J", 59fa9e4066Sahrens "ZFS-8000-5E", 60fa9e4066Sahrens "ZFS-8000-6X", 61fa9e4066Sahrens "ZFS-8000-72", 62fa9e4066Sahrens "ZFS-8000-8A", 63fa9e4066Sahrens "ZFS-8000-9P", 64*95173954Sek "ZFS-8000-A5", 65*95173954Sek "ZFS-8000-EY" 66fa9e4066Sahrens }; 67fa9e4066Sahrens 68ea8dc4b6Seschrock /* 69ea8dc4b6Seschrock * If the pool is active, a certain class of static errors is overridden by the 70ea8dc4b6Seschrock * faults as analayzed by FMA. These faults have separate knowledge articles, 71ea8dc4b6Seschrock * and the article referred to by 'zpool status' must match that indicated by 72ea8dc4b6Seschrock * the syslog error message. We override missing data as well as corrupt pool. 73ea8dc4b6Seschrock */ 74*95173954Sek static char *zfs_msgid_table_active[] = { 75ea8dc4b6Seschrock "ZFS-8000-14", 76ea8dc4b6Seschrock "ZFS-8000-D3", /* overridden */ 77ea8dc4b6Seschrock "ZFS-8000-D3", /* overridden */ 78ea8dc4b6Seschrock "ZFS-8000-4J", 79ea8dc4b6Seschrock "ZFS-8000-5E", 80ea8dc4b6Seschrock "ZFS-8000-6X", 81ea8dc4b6Seschrock "ZFS-8000-CS", /* overridden */ 82ea8dc4b6Seschrock "ZFS-8000-8A", 83ea8dc4b6Seschrock "ZFS-8000-9P", 84ea8dc4b6Seschrock "ZFS-8000-CS", /* overridden */ 85ea8dc4b6Seschrock }; 86ea8dc4b6Seschrock 87*95173954Sek #define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0])) 88fa9e4066Sahrens 89fa9e4066Sahrens /* ARGSUSED */ 90fa9e4066Sahrens static int 91fa9e4066Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 92fa9e4066Sahrens { 93fa9e4066Sahrens return (state == VDEV_STATE_CANT_OPEN && 94fa9e4066Sahrens aux == VDEV_AUX_OPEN_FAILED); 95fa9e4066Sahrens } 96fa9e4066Sahrens 97fa9e4066Sahrens /* ARGSUSED */ 98fa9e4066Sahrens static int 99fa9e4066Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 100fa9e4066Sahrens { 101fa9e4066Sahrens return (errs != 0); 102fa9e4066Sahrens } 103fa9e4066Sahrens 104fa9e4066Sahrens /* ARGSUSED */ 105fa9e4066Sahrens static int 106fa9e4066Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 107fa9e4066Sahrens { 108fa9e4066Sahrens return (state == VDEV_STATE_CANT_OPEN); 109fa9e4066Sahrens } 110fa9e4066Sahrens 111fa9e4066Sahrens /* ARGSUSED */ 112fa9e4066Sahrens static int 113fa9e4066Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 114fa9e4066Sahrens { 115fa9e4066Sahrens return (state == VDEV_STATE_OFFLINE); 116fa9e4066Sahrens } 117fa9e4066Sahrens 118fa9e4066Sahrens /* 119fa9e4066Sahrens * Detect if any leaf devices that have seen errors or could not be opened. 120fa9e4066Sahrens */ 12199653d4eSeschrock static boolean_t 122fa9e4066Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 123fa9e4066Sahrens { 124fa9e4066Sahrens nvlist_t **child; 125fa9e4066Sahrens vdev_stat_t *vs; 126fa9e4066Sahrens uint_t c, children; 127fa9e4066Sahrens char *type; 128fa9e4066Sahrens 129fa9e4066Sahrens /* 130fa9e4066Sahrens * Ignore problems within a 'replacing' vdev, since we're presumably in 131fa9e4066Sahrens * the process of repairing any such errors, and don't want to call them 132fa9e4066Sahrens * out again. We'll pick up the fact that a resilver is happening 133fa9e4066Sahrens * later. 134fa9e4066Sahrens */ 135fa9e4066Sahrens verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 136fa9e4066Sahrens if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 13799653d4eSeschrock return (B_FALSE); 138fa9e4066Sahrens 139fa9e4066Sahrens if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 140fa9e4066Sahrens &children) == 0) { 141fa9e4066Sahrens for (c = 0; c < children; c++) 142fa9e4066Sahrens if (find_vdev_problem(child[c], func)) 14399653d4eSeschrock return (B_TRUE); 144fa9e4066Sahrens } else { 145fa9e4066Sahrens verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 146fa9e4066Sahrens (uint64_t **)&vs, &c) == 0); 147fa9e4066Sahrens 148fa9e4066Sahrens if (func(vs->vs_state, vs->vs_aux, 149fa9e4066Sahrens vs->vs_read_errors + 150fa9e4066Sahrens vs->vs_write_errors + 151fa9e4066Sahrens vs->vs_checksum_errors)) 15299653d4eSeschrock return (B_TRUE); 153fa9e4066Sahrens } 154fa9e4066Sahrens 15599653d4eSeschrock return (B_FALSE); 156fa9e4066Sahrens } 157fa9e4066Sahrens 158fa9e4066Sahrens /* 159fa9e4066Sahrens * Active pool health status. 160fa9e4066Sahrens * 161fa9e4066Sahrens * To determine the status for a pool, we make several passes over the config, 162fa9e4066Sahrens * picking the most egregious error we find. In order of importance, we do the 163fa9e4066Sahrens * following: 164fa9e4066Sahrens * 165fa9e4066Sahrens * - Check for a complete and valid configuration 166ea8dc4b6Seschrock * - Look for any missing devices in a non-replicated config 167fa9e4066Sahrens * - Check for any data errors 168ea8dc4b6Seschrock * - Check for any missing devices in a replicated config 169ea8dc4b6Seschrock * - Look for any devices showing errors 170fa9e4066Sahrens * - Check for any resilvering devices 171fa9e4066Sahrens * 172fa9e4066Sahrens * There can obviously be multiple errors within a single pool, so this routine 173fa9e4066Sahrens * only picks the most damaging of all the current errors to report. 174fa9e4066Sahrens */ 175fa9e4066Sahrens static zpool_status_t 17699653d4eSeschrock check_status(nvlist_t *config, boolean_t isimport) 177fa9e4066Sahrens { 178fa9e4066Sahrens nvlist_t *nvroot; 179fa9e4066Sahrens vdev_stat_t *vs; 180fa9e4066Sahrens uint_t vsc; 181ea8dc4b6Seschrock uint64_t nerr; 182eaca9bbdSeschrock uint64_t version; 183*95173954Sek uint64_t stateval; 184*95173954Sek uint64_t hostid = 0; 185fa9e4066Sahrens 186eaca9bbdSeschrock verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, 187eaca9bbdSeschrock &version) == 0); 188fa9e4066Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 189fa9e4066Sahrens &nvroot) == 0); 190fa9e4066Sahrens verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 191fa9e4066Sahrens (uint64_t **)&vs, &vsc) == 0); 192*95173954Sek verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, 193*95173954Sek &stateval) == 0); 194*95173954Sek (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid); 195*95173954Sek 196*95173954Sek /* 197*95173954Sek * Pool last accessed by another system. 198*95173954Sek */ 199*95173954Sek if (hostid != 0 && (unsigned long)hostid != gethostid() && 200*95173954Sek stateval == POOL_STATE_ACTIVE) 201*95173954Sek return (ZPOOL_STATUS_HOSTID_MISMATCH); 202fa9e4066Sahrens 203eaca9bbdSeschrock /* 204eaca9bbdSeschrock * Newer on-disk version. 205eaca9bbdSeschrock */ 206eaca9bbdSeschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 207eaca9bbdSeschrock vs->vs_aux == VDEV_AUX_VERSION_NEWER) 208eaca9bbdSeschrock return (ZPOOL_STATUS_VERSION_NEWER); 209eaca9bbdSeschrock 210fa9e4066Sahrens /* 211fa9e4066Sahrens * Check that the config is complete. 212fa9e4066Sahrens */ 213fa9e4066Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN && 214ea8dc4b6Seschrock vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) 215fa9e4066Sahrens return (ZPOOL_STATUS_BAD_GUID_SUM); 216fa9e4066Sahrens 217fa9e4066Sahrens /* 218ea8dc4b6Seschrock * Missing devices in non-replicated config. 219fa9e4066Sahrens */ 220ea8dc4b6Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 221ea8dc4b6Seschrock find_vdev_problem(nvroot, vdev_missing)) 222ea8dc4b6Seschrock return (ZPOOL_STATUS_MISSING_DEV_NR); 223ea8dc4b6Seschrock 224ea8dc4b6Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 225ea8dc4b6Seschrock find_vdev_problem(nvroot, vdev_broken)) 226ea8dc4b6Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 227ea8dc4b6Seschrock 228ea8dc4b6Seschrock /* 229ea8dc4b6Seschrock * Corrupted pool metadata 230ea8dc4b6Seschrock */ 231ea8dc4b6Seschrock if (vs->vs_state == VDEV_STATE_CANT_OPEN && 232ea8dc4b6Seschrock vs->vs_aux == VDEV_AUX_CORRUPT_DATA) 233ea8dc4b6Seschrock return (ZPOOL_STATUS_CORRUPT_POOL); 234fa9e4066Sahrens 235fa9e4066Sahrens /* 236ea8dc4b6Seschrock * Persistent data errors. 237fa9e4066Sahrens */ 238ea8dc4b6Seschrock if (!isimport) { 239ea8dc4b6Seschrock if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT, 240ea8dc4b6Seschrock &nerr) == 0 && nerr != 0) 241ea8dc4b6Seschrock return (ZPOOL_STATUS_CORRUPT_DATA); 242fa9e4066Sahrens } 243fa9e4066Sahrens 244ea8dc4b6Seschrock /* 245ea8dc4b6Seschrock * Missing devices in a replicated config. 246ea8dc4b6Seschrock */ 247ea8dc4b6Seschrock if (find_vdev_problem(nvroot, vdev_missing)) 248ea8dc4b6Seschrock return (ZPOOL_STATUS_MISSING_DEV_R); 249ea8dc4b6Seschrock if (find_vdev_problem(nvroot, vdev_broken)) 250ea8dc4b6Seschrock return (ZPOOL_STATUS_CORRUPT_LABEL_R); 251ea8dc4b6Seschrock 252fa9e4066Sahrens /* 253fa9e4066Sahrens * Devices with errors 254fa9e4066Sahrens */ 255fa9e4066Sahrens if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 256fa9e4066Sahrens return (ZPOOL_STATUS_FAILING_DEV); 257fa9e4066Sahrens 258fa9e4066Sahrens /* 259fa9e4066Sahrens * Offlined devices 260fa9e4066Sahrens */ 261fa9e4066Sahrens if (find_vdev_problem(nvroot, vdev_offlined)) 262fa9e4066Sahrens return (ZPOOL_STATUS_OFFLINE_DEV); 263fa9e4066Sahrens 264fa9e4066Sahrens /* 265fa9e4066Sahrens * Currently resilvering 266fa9e4066Sahrens */ 267fa9e4066Sahrens if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 268fa9e4066Sahrens return (ZPOOL_STATUS_RESILVERING); 269fa9e4066Sahrens 270fa9e4066Sahrens /* 271eaca9bbdSeschrock * Outdated, but usable, version 272fa9e4066Sahrens */ 273eaca9bbdSeschrock if (version < ZFS_VERSION) 274eaca9bbdSeschrock return (ZPOOL_STATUS_VERSION_OLDER); 275fa9e4066Sahrens 276fa9e4066Sahrens return (ZPOOL_STATUS_OK); 277fa9e4066Sahrens } 278fa9e4066Sahrens 279fa9e4066Sahrens zpool_status_t 280fa9e4066Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid) 281fa9e4066Sahrens { 28299653d4eSeschrock zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE); 283fa9e4066Sahrens 284fa9e4066Sahrens if (ret >= NMSGID) 285fa9e4066Sahrens *msgid = NULL; 286fa9e4066Sahrens else 287*95173954Sek *msgid = zfs_msgid_table_active[ret]; 288fa9e4066Sahrens 289fa9e4066Sahrens return (ret); 290fa9e4066Sahrens } 291fa9e4066Sahrens 292fa9e4066Sahrens zpool_status_t 293fa9e4066Sahrens zpool_import_status(nvlist_t *config, char **msgid) 294fa9e4066Sahrens { 29599653d4eSeschrock zpool_status_t ret = check_status(config, B_TRUE); 296fa9e4066Sahrens 297fa9e4066Sahrens if (ret >= NMSGID) 298fa9e4066Sahrens *msgid = NULL; 299fa9e4066Sahrens else 300*95173954Sek *msgid = zfs_msgid_table[ret]; 301fa9e4066Sahrens 302fa9e4066Sahrens return (ret); 303fa9e4066Sahrens } 304