1*fa9e4066Sahrens /* 2*fa9e4066Sahrens * CDDL HEADER START 3*fa9e4066Sahrens * 4*fa9e4066Sahrens * The contents of this file are subject to the terms of the 5*fa9e4066Sahrens * Common Development and Distribution License, Version 1.0 only 6*fa9e4066Sahrens * (the "License"). You may not use this file except in compliance 7*fa9e4066Sahrens * with the License. 8*fa9e4066Sahrens * 9*fa9e4066Sahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*fa9e4066Sahrens * or http://www.opensolaris.org/os/licensing. 11*fa9e4066Sahrens * See the License for the specific language governing permissions 12*fa9e4066Sahrens * and limitations under the License. 13*fa9e4066Sahrens * 14*fa9e4066Sahrens * When distributing Covered Code, include this CDDL HEADER in each 15*fa9e4066Sahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*fa9e4066Sahrens * If applicable, add the following below this CDDL HEADER, with the 17*fa9e4066Sahrens * fields enclosed by brackets "[]" replaced with your own identifying 18*fa9e4066Sahrens * information: Portions Copyright [yyyy] [name of copyright owner] 19*fa9e4066Sahrens * 20*fa9e4066Sahrens * CDDL HEADER END 21*fa9e4066Sahrens */ 22*fa9e4066Sahrens /* 23*fa9e4066Sahrens * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*fa9e4066Sahrens * Use is subject to license terms. 25*fa9e4066Sahrens */ 26*fa9e4066Sahrens 27*fa9e4066Sahrens #pragma ident "%Z%%M% %I% %E% SMI" 28*fa9e4066Sahrens 29*fa9e4066Sahrens /* 30*fa9e4066Sahrens * This file contains the functions which analyze the status of a pool. This 31*fa9e4066Sahrens * include both the status of an active pool, as well as the status exported 32*fa9e4066Sahrens * pools. Returns one of the ZPOOL_STATUS_* defines describing the status of 33*fa9e4066Sahrens * the pool. This status is independent (to a certain degree) from the state of 34*fa9e4066Sahrens * the pool. A pool's state descsribes only whether or not it is capable of 35*fa9e4066Sahrens * providing the necessary fault tolerance for data. The status describes the 36*fa9e4066Sahrens * overall status of devices. A pool that is online can still have a device 37*fa9e4066Sahrens * that is experiencing errors. 38*fa9e4066Sahrens * 39*fa9e4066Sahrens * Only a subset of the possible faults can be detected using 'zpool status', 40*fa9e4066Sahrens * and not all possible errors correspond to a FMA message ID. The explanation 41*fa9e4066Sahrens * is left up to the caller, depending on whether it is a live pool or an 42*fa9e4066Sahrens * import. 43*fa9e4066Sahrens */ 44*fa9e4066Sahrens 45*fa9e4066Sahrens #include <libzfs.h> 46*fa9e4066Sahrens #include <string.h> 47*fa9e4066Sahrens #include "libzfs_impl.h" 48*fa9e4066Sahrens 49*fa9e4066Sahrens /* 50*fa9e4066Sahrens * Message ID table. This must be kep in sync with the ZPOOL_STATUS_* defines 51*fa9e4066Sahrens * in libzfs.h. Note that there are some status results which go past the end 52*fa9e4066Sahrens * of this table, and hence have no associated message ID. 53*fa9e4066Sahrens */ 54*fa9e4066Sahrens static char *msgid_table[] = { 55*fa9e4066Sahrens "ZFS-8000-14", 56*fa9e4066Sahrens "ZFS-8000-2Q", 57*fa9e4066Sahrens "ZFS-8000-3C", 58*fa9e4066Sahrens "ZFS-8000-4J", 59*fa9e4066Sahrens "ZFS-8000-5E", 60*fa9e4066Sahrens "ZFS-8000-6X", 61*fa9e4066Sahrens "ZFS-8000-72", 62*fa9e4066Sahrens "ZFS-8000-8A", 63*fa9e4066Sahrens "ZFS-8000-9P", 64*fa9e4066Sahrens "ZFS-8000-A5" 65*fa9e4066Sahrens }; 66*fa9e4066Sahrens 67*fa9e4066Sahrens #define NMSGID (sizeof (msgid_table) / sizeof (msgid_table[0])) 68*fa9e4066Sahrens 69*fa9e4066Sahrens /* ARGSUSED */ 70*fa9e4066Sahrens static int 71*fa9e4066Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs) 72*fa9e4066Sahrens { 73*fa9e4066Sahrens return (state == VDEV_STATE_CANT_OPEN && 74*fa9e4066Sahrens aux == VDEV_AUX_OPEN_FAILED); 75*fa9e4066Sahrens } 76*fa9e4066Sahrens 77*fa9e4066Sahrens /* ARGSUSED */ 78*fa9e4066Sahrens static int 79*fa9e4066Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs) 80*fa9e4066Sahrens { 81*fa9e4066Sahrens return (errs != 0); 82*fa9e4066Sahrens } 83*fa9e4066Sahrens 84*fa9e4066Sahrens /* ARGSUSED */ 85*fa9e4066Sahrens static int 86*fa9e4066Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs) 87*fa9e4066Sahrens { 88*fa9e4066Sahrens return (state == VDEV_STATE_CANT_OPEN); 89*fa9e4066Sahrens } 90*fa9e4066Sahrens 91*fa9e4066Sahrens /* ARGSUSED */ 92*fa9e4066Sahrens static int 93*fa9e4066Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs) 94*fa9e4066Sahrens { 95*fa9e4066Sahrens return (state == VDEV_STATE_OFFLINE); 96*fa9e4066Sahrens } 97*fa9e4066Sahrens 98*fa9e4066Sahrens /* 99*fa9e4066Sahrens * Detect if any leaf devices that have seen errors or could not be opened. 100*fa9e4066Sahrens */ 101*fa9e4066Sahrens static int 102*fa9e4066Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t)) 103*fa9e4066Sahrens { 104*fa9e4066Sahrens nvlist_t **child; 105*fa9e4066Sahrens vdev_stat_t *vs; 106*fa9e4066Sahrens uint_t c, children; 107*fa9e4066Sahrens char *type; 108*fa9e4066Sahrens 109*fa9e4066Sahrens /* 110*fa9e4066Sahrens * Ignore problems within a 'replacing' vdev, since we're presumably in 111*fa9e4066Sahrens * the process of repairing any such errors, and don't want to call them 112*fa9e4066Sahrens * out again. We'll pick up the fact that a resilver is happening 113*fa9e4066Sahrens * later. 114*fa9e4066Sahrens */ 115*fa9e4066Sahrens verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0); 116*fa9e4066Sahrens if (strcmp(type, VDEV_TYPE_REPLACING) == 0) 117*fa9e4066Sahrens return (FALSE); 118*fa9e4066Sahrens 119*fa9e4066Sahrens if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child, 120*fa9e4066Sahrens &children) == 0) { 121*fa9e4066Sahrens for (c = 0; c < children; c++) 122*fa9e4066Sahrens if (find_vdev_problem(child[c], func)) 123*fa9e4066Sahrens return (TRUE); 124*fa9e4066Sahrens } else { 125*fa9e4066Sahrens verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS, 126*fa9e4066Sahrens (uint64_t **)&vs, &c) == 0); 127*fa9e4066Sahrens 128*fa9e4066Sahrens if (func(vs->vs_state, vs->vs_aux, 129*fa9e4066Sahrens vs->vs_read_errors + 130*fa9e4066Sahrens vs->vs_write_errors + 131*fa9e4066Sahrens vs->vs_checksum_errors)) 132*fa9e4066Sahrens return (TRUE); 133*fa9e4066Sahrens } 134*fa9e4066Sahrens 135*fa9e4066Sahrens return (FALSE); 136*fa9e4066Sahrens } 137*fa9e4066Sahrens 138*fa9e4066Sahrens /* 139*fa9e4066Sahrens * Active pool health status. 140*fa9e4066Sahrens * 141*fa9e4066Sahrens * To determine the status for a pool, we make several passes over the config, 142*fa9e4066Sahrens * picking the most egregious error we find. In order of importance, we do the 143*fa9e4066Sahrens * following: 144*fa9e4066Sahrens * 145*fa9e4066Sahrens * - Check for a complete and valid configuration 146*fa9e4066Sahrens * - Look for any missing devices 147*fa9e4066Sahrens * - Look for any devices showing errors 148*fa9e4066Sahrens * - Check for any data errors 149*fa9e4066Sahrens * - Check for any resilvering devices 150*fa9e4066Sahrens * 151*fa9e4066Sahrens * There can obviously be multiple errors within a single pool, so this routine 152*fa9e4066Sahrens * only picks the most damaging of all the current errors to report. 153*fa9e4066Sahrens */ 154*fa9e4066Sahrens static zpool_status_t 155*fa9e4066Sahrens check_status(nvlist_t *config, int isimport) 156*fa9e4066Sahrens { 157*fa9e4066Sahrens nvlist_t *nvroot; 158*fa9e4066Sahrens vdev_stat_t *vs; 159*fa9e4066Sahrens uint_t vsc; 160*fa9e4066Sahrens 161*fa9e4066Sahrens verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, 162*fa9e4066Sahrens &nvroot) == 0); 163*fa9e4066Sahrens verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS, 164*fa9e4066Sahrens (uint64_t **)&vs, &vsc) == 0); 165*fa9e4066Sahrens 166*fa9e4066Sahrens /* 167*fa9e4066Sahrens * Check that the config is complete. 168*fa9e4066Sahrens */ 169*fa9e4066Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN && 170*fa9e4066Sahrens vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) { 171*fa9e4066Sahrens return (ZPOOL_STATUS_BAD_GUID_SUM); 172*fa9e4066Sahrens } 173*fa9e4066Sahrens 174*fa9e4066Sahrens /* 175*fa9e4066Sahrens * Missing devices 176*fa9e4066Sahrens */ 177*fa9e4066Sahrens if (find_vdev_problem(nvroot, vdev_missing)) { 178*fa9e4066Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN) 179*fa9e4066Sahrens return (ZPOOL_STATUS_MISSING_DEV_NR); 180*fa9e4066Sahrens else 181*fa9e4066Sahrens return (ZPOOL_STATUS_MISSING_DEV_R); 182*fa9e4066Sahrens } 183*fa9e4066Sahrens 184*fa9e4066Sahrens /* 185*fa9e4066Sahrens * Devices with corrupted labels. 186*fa9e4066Sahrens */ 187*fa9e4066Sahrens if (find_vdev_problem(nvroot, vdev_broken)) { 188*fa9e4066Sahrens if (vs->vs_state == VDEV_STATE_CANT_OPEN) 189*fa9e4066Sahrens return (ZPOOL_STATUS_CORRUPT_LABEL_NR); 190*fa9e4066Sahrens else 191*fa9e4066Sahrens return (ZPOOL_STATUS_CORRUPT_LABEL_R); 192*fa9e4066Sahrens } 193*fa9e4066Sahrens 194*fa9e4066Sahrens /* 195*fa9e4066Sahrens * Devices with errors 196*fa9e4066Sahrens */ 197*fa9e4066Sahrens if (!isimport && find_vdev_problem(nvroot, vdev_errors)) 198*fa9e4066Sahrens return (ZPOOL_STATUS_FAILING_DEV); 199*fa9e4066Sahrens 200*fa9e4066Sahrens /* 201*fa9e4066Sahrens * Offlined devices 202*fa9e4066Sahrens */ 203*fa9e4066Sahrens if (find_vdev_problem(nvroot, vdev_offlined)) 204*fa9e4066Sahrens return (ZPOOL_STATUS_OFFLINE_DEV); 205*fa9e4066Sahrens 206*fa9e4066Sahrens /* 207*fa9e4066Sahrens * Currently resilvering 208*fa9e4066Sahrens */ 209*fa9e4066Sahrens if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER) 210*fa9e4066Sahrens return (ZPOOL_STATUS_RESILVERING); 211*fa9e4066Sahrens 212*fa9e4066Sahrens /* 213*fa9e4066Sahrens * We currently have no way to detect the following errors: 214*fa9e4066Sahrens * 215*fa9e4066Sahrens * CORRUPT_CACHE 216*fa9e4066Sahrens * VERSION_MISMATCH 217*fa9e4066Sahrens * CORRUPT_POOL 218*fa9e4066Sahrens * CORRUPT_DATA 219*fa9e4066Sahrens */ 220*fa9e4066Sahrens 221*fa9e4066Sahrens return (ZPOOL_STATUS_OK); 222*fa9e4066Sahrens } 223*fa9e4066Sahrens 224*fa9e4066Sahrens zpool_status_t 225*fa9e4066Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid) 226*fa9e4066Sahrens { 227*fa9e4066Sahrens zpool_status_t ret = check_status(zhp->zpool_config, FALSE); 228*fa9e4066Sahrens 229*fa9e4066Sahrens if (ret >= NMSGID) 230*fa9e4066Sahrens *msgid = NULL; 231*fa9e4066Sahrens else 232*fa9e4066Sahrens *msgid = msgid_table[ret]; 233*fa9e4066Sahrens 234*fa9e4066Sahrens return (ret); 235*fa9e4066Sahrens } 236*fa9e4066Sahrens 237*fa9e4066Sahrens zpool_status_t 238*fa9e4066Sahrens zpool_import_status(nvlist_t *config, char **msgid) 239*fa9e4066Sahrens { 240*fa9e4066Sahrens zpool_status_t ret = check_status(config, TRUE); 241*fa9e4066Sahrens 242*fa9e4066Sahrens if (ret >= NMSGID) 243*fa9e4066Sahrens *msgid = NULL; 244*fa9e4066Sahrens else 245*fa9e4066Sahrens *msgid = msgid_table[ret]; 246*fa9e4066Sahrens 247*fa9e4066Sahrens return (ret); 248*fa9e4066Sahrens } 249