1*fa9e4066Sahrens /*
2*fa9e4066Sahrens  * CDDL HEADER START
3*fa9e4066Sahrens  *
4*fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5*fa9e4066Sahrens  * Common Development and Distribution License, Version 1.0 only
6*fa9e4066Sahrens  * (the "License").  You may not use this file except in compliance
7*fa9e4066Sahrens  * with the License.
8*fa9e4066Sahrens  *
9*fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
11*fa9e4066Sahrens  * See the License for the specific language governing permissions
12*fa9e4066Sahrens  * and limitations under the License.
13*fa9e4066Sahrens  *
14*fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
15*fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
17*fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
18*fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
19*fa9e4066Sahrens  *
20*fa9e4066Sahrens  * CDDL HEADER END
21*fa9e4066Sahrens  */
22*fa9e4066Sahrens /*
23*fa9e4066Sahrens  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*fa9e4066Sahrens  * Use is subject to license terms.
25*fa9e4066Sahrens  */
26*fa9e4066Sahrens 
27*fa9e4066Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*fa9e4066Sahrens 
29*fa9e4066Sahrens /*
30*fa9e4066Sahrens  * This file contains the functions which analyze the status of a pool.  This
31*fa9e4066Sahrens  * include both the status of an active pool, as well as the status exported
32*fa9e4066Sahrens  * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
33*fa9e4066Sahrens  * the pool.  This status is independent (to a certain degree) from the state of
34*fa9e4066Sahrens  * the pool.  A pool's state descsribes only whether or not it is capable of
35*fa9e4066Sahrens  * providing the necessary fault tolerance for data.  The status describes the
36*fa9e4066Sahrens  * overall status of devices.  A pool that is online can still have a device
37*fa9e4066Sahrens  * that is experiencing errors.
38*fa9e4066Sahrens  *
39*fa9e4066Sahrens  * Only a subset of the possible faults can be detected using 'zpool status',
40*fa9e4066Sahrens  * and not all possible errors correspond to a FMA message ID.  The explanation
41*fa9e4066Sahrens  * is left up to the caller, depending on whether it is a live pool or an
42*fa9e4066Sahrens  * import.
43*fa9e4066Sahrens  */
44*fa9e4066Sahrens 
45*fa9e4066Sahrens #include <libzfs.h>
46*fa9e4066Sahrens #include <string.h>
47*fa9e4066Sahrens #include "libzfs_impl.h"
48*fa9e4066Sahrens 
49*fa9e4066Sahrens /*
50*fa9e4066Sahrens  * Message ID table.  This must be kep in sync with the ZPOOL_STATUS_* defines
51*fa9e4066Sahrens  * in libzfs.h.  Note that there are some status results which go past the end
52*fa9e4066Sahrens  * of this table, and hence have no associated message ID.
53*fa9e4066Sahrens  */
54*fa9e4066Sahrens static char *msgid_table[] = {
55*fa9e4066Sahrens 	"ZFS-8000-14",
56*fa9e4066Sahrens 	"ZFS-8000-2Q",
57*fa9e4066Sahrens 	"ZFS-8000-3C",
58*fa9e4066Sahrens 	"ZFS-8000-4J",
59*fa9e4066Sahrens 	"ZFS-8000-5E",
60*fa9e4066Sahrens 	"ZFS-8000-6X",
61*fa9e4066Sahrens 	"ZFS-8000-72",
62*fa9e4066Sahrens 	"ZFS-8000-8A",
63*fa9e4066Sahrens 	"ZFS-8000-9P",
64*fa9e4066Sahrens 	"ZFS-8000-A5"
65*fa9e4066Sahrens };
66*fa9e4066Sahrens 
67*fa9e4066Sahrens #define	NMSGID	(sizeof (msgid_table) / sizeof (msgid_table[0]))
68*fa9e4066Sahrens 
69*fa9e4066Sahrens /* ARGSUSED */
70*fa9e4066Sahrens static int
71*fa9e4066Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
72*fa9e4066Sahrens {
73*fa9e4066Sahrens 	return (state == VDEV_STATE_CANT_OPEN &&
74*fa9e4066Sahrens 	    aux == VDEV_AUX_OPEN_FAILED);
75*fa9e4066Sahrens }
76*fa9e4066Sahrens 
77*fa9e4066Sahrens /* ARGSUSED */
78*fa9e4066Sahrens static int
79*fa9e4066Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
80*fa9e4066Sahrens {
81*fa9e4066Sahrens 	return (errs != 0);
82*fa9e4066Sahrens }
83*fa9e4066Sahrens 
84*fa9e4066Sahrens /* ARGSUSED */
85*fa9e4066Sahrens static int
86*fa9e4066Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
87*fa9e4066Sahrens {
88*fa9e4066Sahrens 	return (state == VDEV_STATE_CANT_OPEN);
89*fa9e4066Sahrens }
90*fa9e4066Sahrens 
91*fa9e4066Sahrens /* ARGSUSED */
92*fa9e4066Sahrens static int
93*fa9e4066Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
94*fa9e4066Sahrens {
95*fa9e4066Sahrens 	return (state == VDEV_STATE_OFFLINE);
96*fa9e4066Sahrens }
97*fa9e4066Sahrens 
98*fa9e4066Sahrens /*
99*fa9e4066Sahrens  * Detect if any leaf devices that have seen errors or could not be opened.
100*fa9e4066Sahrens  */
101*fa9e4066Sahrens static int
102*fa9e4066Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
103*fa9e4066Sahrens {
104*fa9e4066Sahrens 	nvlist_t **child;
105*fa9e4066Sahrens 	vdev_stat_t *vs;
106*fa9e4066Sahrens 	uint_t c, children;
107*fa9e4066Sahrens 	char *type;
108*fa9e4066Sahrens 
109*fa9e4066Sahrens 	/*
110*fa9e4066Sahrens 	 * Ignore problems within a 'replacing' vdev, since we're presumably in
111*fa9e4066Sahrens 	 * the process of repairing any such errors, and don't want to call them
112*fa9e4066Sahrens 	 * out again.  We'll pick up the fact that a resilver is happening
113*fa9e4066Sahrens 	 * later.
114*fa9e4066Sahrens 	 */
115*fa9e4066Sahrens 	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
116*fa9e4066Sahrens 	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
117*fa9e4066Sahrens 		return (FALSE);
118*fa9e4066Sahrens 
119*fa9e4066Sahrens 	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
120*fa9e4066Sahrens 	    &children) == 0) {
121*fa9e4066Sahrens 		for (c = 0; c < children; c++)
122*fa9e4066Sahrens 			if (find_vdev_problem(child[c], func))
123*fa9e4066Sahrens 				return (TRUE);
124*fa9e4066Sahrens 	} else {
125*fa9e4066Sahrens 		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
126*fa9e4066Sahrens 		    (uint64_t **)&vs, &c) == 0);
127*fa9e4066Sahrens 
128*fa9e4066Sahrens 		if (func(vs->vs_state, vs->vs_aux,
129*fa9e4066Sahrens 		    vs->vs_read_errors +
130*fa9e4066Sahrens 		    vs->vs_write_errors +
131*fa9e4066Sahrens 		    vs->vs_checksum_errors))
132*fa9e4066Sahrens 			return (TRUE);
133*fa9e4066Sahrens 	}
134*fa9e4066Sahrens 
135*fa9e4066Sahrens 	return (FALSE);
136*fa9e4066Sahrens }
137*fa9e4066Sahrens 
138*fa9e4066Sahrens /*
139*fa9e4066Sahrens  * Active pool health status.
140*fa9e4066Sahrens  *
141*fa9e4066Sahrens  * To determine the status for a pool, we make several passes over the config,
142*fa9e4066Sahrens  * picking the most egregious error we find.  In order of importance, we do the
143*fa9e4066Sahrens  * following:
144*fa9e4066Sahrens  *
145*fa9e4066Sahrens  *	- Check for a complete and valid configuration
146*fa9e4066Sahrens  *	- Look for any missing devices
147*fa9e4066Sahrens  *	- Look for any devices showing errors
148*fa9e4066Sahrens  *	- Check for any data errors
149*fa9e4066Sahrens  *	- Check for any resilvering devices
150*fa9e4066Sahrens  *
151*fa9e4066Sahrens  * There can obviously be multiple errors within a single pool, so this routine
152*fa9e4066Sahrens  * only picks the most damaging of all the current errors to report.
153*fa9e4066Sahrens  */
154*fa9e4066Sahrens static zpool_status_t
155*fa9e4066Sahrens check_status(nvlist_t *config, int isimport)
156*fa9e4066Sahrens {
157*fa9e4066Sahrens 	nvlist_t *nvroot;
158*fa9e4066Sahrens 	vdev_stat_t *vs;
159*fa9e4066Sahrens 	uint_t vsc;
160*fa9e4066Sahrens 
161*fa9e4066Sahrens 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
162*fa9e4066Sahrens 	    &nvroot) == 0);
163*fa9e4066Sahrens 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
164*fa9e4066Sahrens 	    (uint64_t **)&vs, &vsc) == 0);
165*fa9e4066Sahrens 
166*fa9e4066Sahrens 	/*
167*fa9e4066Sahrens 	 * Check that the config is complete.
168*fa9e4066Sahrens 	 */
169*fa9e4066Sahrens 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
170*fa9e4066Sahrens 	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM) {
171*fa9e4066Sahrens 		return (ZPOOL_STATUS_BAD_GUID_SUM);
172*fa9e4066Sahrens 	}
173*fa9e4066Sahrens 
174*fa9e4066Sahrens 	/*
175*fa9e4066Sahrens 	 * Missing devices
176*fa9e4066Sahrens 	 */
177*fa9e4066Sahrens 	if (find_vdev_problem(nvroot, vdev_missing)) {
178*fa9e4066Sahrens 		if (vs->vs_state == VDEV_STATE_CANT_OPEN)
179*fa9e4066Sahrens 			return (ZPOOL_STATUS_MISSING_DEV_NR);
180*fa9e4066Sahrens 		else
181*fa9e4066Sahrens 			return (ZPOOL_STATUS_MISSING_DEV_R);
182*fa9e4066Sahrens 	}
183*fa9e4066Sahrens 
184*fa9e4066Sahrens 	/*
185*fa9e4066Sahrens 	 * Devices with corrupted labels.
186*fa9e4066Sahrens 	 */
187*fa9e4066Sahrens 	if (find_vdev_problem(nvroot, vdev_broken)) {
188*fa9e4066Sahrens 		if (vs->vs_state == VDEV_STATE_CANT_OPEN)
189*fa9e4066Sahrens 			return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
190*fa9e4066Sahrens 		else
191*fa9e4066Sahrens 			return (ZPOOL_STATUS_CORRUPT_LABEL_R);
192*fa9e4066Sahrens 	}
193*fa9e4066Sahrens 
194*fa9e4066Sahrens 	/*
195*fa9e4066Sahrens 	 * Devices with errors
196*fa9e4066Sahrens 	 */
197*fa9e4066Sahrens 	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
198*fa9e4066Sahrens 		return (ZPOOL_STATUS_FAILING_DEV);
199*fa9e4066Sahrens 
200*fa9e4066Sahrens 	/*
201*fa9e4066Sahrens 	 * Offlined devices
202*fa9e4066Sahrens 	 */
203*fa9e4066Sahrens 	if (find_vdev_problem(nvroot, vdev_offlined))
204*fa9e4066Sahrens 		return (ZPOOL_STATUS_OFFLINE_DEV);
205*fa9e4066Sahrens 
206*fa9e4066Sahrens 	/*
207*fa9e4066Sahrens 	 * Currently resilvering
208*fa9e4066Sahrens 	 */
209*fa9e4066Sahrens 	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
210*fa9e4066Sahrens 		return (ZPOOL_STATUS_RESILVERING);
211*fa9e4066Sahrens 
212*fa9e4066Sahrens 	/*
213*fa9e4066Sahrens 	 * We currently have no way to detect the following errors:
214*fa9e4066Sahrens 	 *
215*fa9e4066Sahrens 	 * 	CORRUPT_CACHE
216*fa9e4066Sahrens 	 * 	VERSION_MISMATCH
217*fa9e4066Sahrens 	 * 	CORRUPT_POOL
218*fa9e4066Sahrens 	 * 	CORRUPT_DATA
219*fa9e4066Sahrens 	 */
220*fa9e4066Sahrens 
221*fa9e4066Sahrens 	return (ZPOOL_STATUS_OK);
222*fa9e4066Sahrens }
223*fa9e4066Sahrens 
224*fa9e4066Sahrens zpool_status_t
225*fa9e4066Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid)
226*fa9e4066Sahrens {
227*fa9e4066Sahrens 	zpool_status_t ret = check_status(zhp->zpool_config, FALSE);
228*fa9e4066Sahrens 
229*fa9e4066Sahrens 	if (ret >= NMSGID)
230*fa9e4066Sahrens 		*msgid = NULL;
231*fa9e4066Sahrens 	else
232*fa9e4066Sahrens 		*msgid = msgid_table[ret];
233*fa9e4066Sahrens 
234*fa9e4066Sahrens 	return (ret);
235*fa9e4066Sahrens }
236*fa9e4066Sahrens 
237*fa9e4066Sahrens zpool_status_t
238*fa9e4066Sahrens zpool_import_status(nvlist_t *config, char **msgid)
239*fa9e4066Sahrens {
240*fa9e4066Sahrens 	zpool_status_t ret = check_status(config, TRUE);
241*fa9e4066Sahrens 
242*fa9e4066Sahrens 	if (ret >= NMSGID)
243*fa9e4066Sahrens 		*msgid = NULL;
244*fa9e4066Sahrens 	else
245*fa9e4066Sahrens 		*msgid = msgid_table[ret];
246*fa9e4066Sahrens 
247*fa9e4066Sahrens 	return (ret);
248*fa9e4066Sahrens }
249