1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
22*95173954Sek  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens #pragma ident	"%Z%%M%	%I%	%E% SMI"
27fa9e4066Sahrens 
28fa9e4066Sahrens /*
29fa9e4066Sahrens  * This file contains the functions which analyze the status of a pool.  This
30fa9e4066Sahrens  * include both the status of an active pool, as well as the status exported
31fa9e4066Sahrens  * pools.  Returns one of the ZPOOL_STATUS_* defines describing the status of
32fa9e4066Sahrens  * the pool.  This status is independent (to a certain degree) from the state of
33fa9e4066Sahrens  * the pool.  A pool's state descsribes only whether or not it is capable of
34fa9e4066Sahrens  * providing the necessary fault tolerance for data.  The status describes the
35fa9e4066Sahrens  * overall status of devices.  A pool that is online can still have a device
36fa9e4066Sahrens  * that is experiencing errors.
37fa9e4066Sahrens  *
38fa9e4066Sahrens  * Only a subset of the possible faults can be detected using 'zpool status',
39fa9e4066Sahrens  * and not all possible errors correspond to a FMA message ID.  The explanation
40fa9e4066Sahrens  * is left up to the caller, depending on whether it is a live pool or an
41fa9e4066Sahrens  * import.
42fa9e4066Sahrens  */
43fa9e4066Sahrens 
44fa9e4066Sahrens #include <libzfs.h>
45fa9e4066Sahrens #include <string.h>
46*95173954Sek #include <unistd.h>
47fa9e4066Sahrens #include "libzfs_impl.h"
48fa9e4066Sahrens 
49fa9e4066Sahrens /*
50fa9e4066Sahrens  * Message ID table.  This must be kep in sync with the ZPOOL_STATUS_* defines
51fa9e4066Sahrens  * in libzfs.h.  Note that there are some status results which go past the end
52fa9e4066Sahrens  * of this table, and hence have no associated message ID.
53fa9e4066Sahrens  */
54*95173954Sek static char *zfs_msgid_table[] = {
55fa9e4066Sahrens 	"ZFS-8000-14",
56fa9e4066Sahrens 	"ZFS-8000-2Q",
57fa9e4066Sahrens 	"ZFS-8000-3C",
58fa9e4066Sahrens 	"ZFS-8000-4J",
59fa9e4066Sahrens 	"ZFS-8000-5E",
60fa9e4066Sahrens 	"ZFS-8000-6X",
61fa9e4066Sahrens 	"ZFS-8000-72",
62fa9e4066Sahrens 	"ZFS-8000-8A",
63fa9e4066Sahrens 	"ZFS-8000-9P",
64*95173954Sek 	"ZFS-8000-A5",
65*95173954Sek 	"ZFS-8000-EY"
66fa9e4066Sahrens };
67fa9e4066Sahrens 
68ea8dc4b6Seschrock /*
69ea8dc4b6Seschrock  * If the pool is active, a certain class of static errors is overridden by the
70ea8dc4b6Seschrock  * faults as analayzed by FMA.  These faults have separate knowledge articles,
71ea8dc4b6Seschrock  * and the article referred to by 'zpool status' must match that indicated by
72ea8dc4b6Seschrock  * the syslog error message.  We override missing data as well as corrupt pool.
73ea8dc4b6Seschrock  */
74*95173954Sek static char *zfs_msgid_table_active[] = {
75ea8dc4b6Seschrock 	"ZFS-8000-14",
76ea8dc4b6Seschrock 	"ZFS-8000-D3",		/* overridden */
77ea8dc4b6Seschrock 	"ZFS-8000-D3",		/* overridden */
78ea8dc4b6Seschrock 	"ZFS-8000-4J",
79ea8dc4b6Seschrock 	"ZFS-8000-5E",
80ea8dc4b6Seschrock 	"ZFS-8000-6X",
81ea8dc4b6Seschrock 	"ZFS-8000-CS",		/* overridden */
82ea8dc4b6Seschrock 	"ZFS-8000-8A",
83ea8dc4b6Seschrock 	"ZFS-8000-9P",
84ea8dc4b6Seschrock 	"ZFS-8000-CS",		/* overridden */
85ea8dc4b6Seschrock };
86ea8dc4b6Seschrock 
87*95173954Sek #define	NMSGID	(sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
88fa9e4066Sahrens 
89fa9e4066Sahrens /* ARGSUSED */
90fa9e4066Sahrens static int
91fa9e4066Sahrens vdev_missing(uint64_t state, uint64_t aux, uint64_t errs)
92fa9e4066Sahrens {
93fa9e4066Sahrens 	return (state == VDEV_STATE_CANT_OPEN &&
94fa9e4066Sahrens 	    aux == VDEV_AUX_OPEN_FAILED);
95fa9e4066Sahrens }
96fa9e4066Sahrens 
97fa9e4066Sahrens /* ARGSUSED */
98fa9e4066Sahrens static int
99fa9e4066Sahrens vdev_errors(uint64_t state, uint64_t aux, uint64_t errs)
100fa9e4066Sahrens {
101fa9e4066Sahrens 	return (errs != 0);
102fa9e4066Sahrens }
103fa9e4066Sahrens 
104fa9e4066Sahrens /* ARGSUSED */
105fa9e4066Sahrens static int
106fa9e4066Sahrens vdev_broken(uint64_t state, uint64_t aux, uint64_t errs)
107fa9e4066Sahrens {
108fa9e4066Sahrens 	return (state == VDEV_STATE_CANT_OPEN);
109fa9e4066Sahrens }
110fa9e4066Sahrens 
111fa9e4066Sahrens /* ARGSUSED */
112fa9e4066Sahrens static int
113fa9e4066Sahrens vdev_offlined(uint64_t state, uint64_t aux, uint64_t errs)
114fa9e4066Sahrens {
115fa9e4066Sahrens 	return (state == VDEV_STATE_OFFLINE);
116fa9e4066Sahrens }
117fa9e4066Sahrens 
118fa9e4066Sahrens /*
119fa9e4066Sahrens  * Detect if any leaf devices that have seen errors or could not be opened.
120fa9e4066Sahrens  */
12199653d4eSeschrock static boolean_t
122fa9e4066Sahrens find_vdev_problem(nvlist_t *vdev, int (*func)(uint64_t, uint64_t, uint64_t))
123fa9e4066Sahrens {
124fa9e4066Sahrens 	nvlist_t **child;
125fa9e4066Sahrens 	vdev_stat_t *vs;
126fa9e4066Sahrens 	uint_t c, children;
127fa9e4066Sahrens 	char *type;
128fa9e4066Sahrens 
129fa9e4066Sahrens 	/*
130fa9e4066Sahrens 	 * Ignore problems within a 'replacing' vdev, since we're presumably in
131fa9e4066Sahrens 	 * the process of repairing any such errors, and don't want to call them
132fa9e4066Sahrens 	 * out again.  We'll pick up the fact that a resilver is happening
133fa9e4066Sahrens 	 * later.
134fa9e4066Sahrens 	 */
135fa9e4066Sahrens 	verify(nvlist_lookup_string(vdev, ZPOOL_CONFIG_TYPE, &type) == 0);
136fa9e4066Sahrens 	if (strcmp(type, VDEV_TYPE_REPLACING) == 0)
13799653d4eSeschrock 		return (B_FALSE);
138fa9e4066Sahrens 
139fa9e4066Sahrens 	if (nvlist_lookup_nvlist_array(vdev, ZPOOL_CONFIG_CHILDREN, &child,
140fa9e4066Sahrens 	    &children) == 0) {
141fa9e4066Sahrens 		for (c = 0; c < children; c++)
142fa9e4066Sahrens 			if (find_vdev_problem(child[c], func))
14399653d4eSeschrock 				return (B_TRUE);
144fa9e4066Sahrens 	} else {
145fa9e4066Sahrens 		verify(nvlist_lookup_uint64_array(vdev, ZPOOL_CONFIG_STATS,
146fa9e4066Sahrens 		    (uint64_t **)&vs, &c) == 0);
147fa9e4066Sahrens 
148fa9e4066Sahrens 		if (func(vs->vs_state, vs->vs_aux,
149fa9e4066Sahrens 		    vs->vs_read_errors +
150fa9e4066Sahrens 		    vs->vs_write_errors +
151fa9e4066Sahrens 		    vs->vs_checksum_errors))
15299653d4eSeschrock 			return (B_TRUE);
153fa9e4066Sahrens 	}
154fa9e4066Sahrens 
15599653d4eSeschrock 	return (B_FALSE);
156fa9e4066Sahrens }
157fa9e4066Sahrens 
158fa9e4066Sahrens /*
159fa9e4066Sahrens  * Active pool health status.
160fa9e4066Sahrens  *
161fa9e4066Sahrens  * To determine the status for a pool, we make several passes over the config,
162fa9e4066Sahrens  * picking the most egregious error we find.  In order of importance, we do the
163fa9e4066Sahrens  * following:
164fa9e4066Sahrens  *
165fa9e4066Sahrens  *	- Check for a complete and valid configuration
166ea8dc4b6Seschrock  *	- Look for any missing devices in a non-replicated config
167fa9e4066Sahrens  *	- Check for any data errors
168ea8dc4b6Seschrock  *	- Check for any missing devices in a replicated config
169ea8dc4b6Seschrock  *	- Look for any devices showing errors
170fa9e4066Sahrens  *	- Check for any resilvering devices
171fa9e4066Sahrens  *
172fa9e4066Sahrens  * There can obviously be multiple errors within a single pool, so this routine
173fa9e4066Sahrens  * only picks the most damaging of all the current errors to report.
174fa9e4066Sahrens  */
175fa9e4066Sahrens static zpool_status_t
17699653d4eSeschrock check_status(nvlist_t *config, boolean_t isimport)
177fa9e4066Sahrens {
178fa9e4066Sahrens 	nvlist_t *nvroot;
179fa9e4066Sahrens 	vdev_stat_t *vs;
180fa9e4066Sahrens 	uint_t vsc;
181ea8dc4b6Seschrock 	uint64_t nerr;
182eaca9bbdSeschrock 	uint64_t version;
183*95173954Sek 	uint64_t stateval;
184*95173954Sek 	uint64_t hostid = 0;
185fa9e4066Sahrens 
186eaca9bbdSeschrock 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
187eaca9bbdSeschrock 	    &version) == 0);
188fa9e4066Sahrens 	verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
189fa9e4066Sahrens 	    &nvroot) == 0);
190fa9e4066Sahrens 	verify(nvlist_lookup_uint64_array(nvroot, ZPOOL_CONFIG_STATS,
191fa9e4066Sahrens 	    (uint64_t **)&vs, &vsc) == 0);
192*95173954Sek 	verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE,
193*95173954Sek 	    &stateval) == 0);
194*95173954Sek 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
195*95173954Sek 
196*95173954Sek 	/*
197*95173954Sek 	 * Pool last accessed by another system.
198*95173954Sek 	 */
199*95173954Sek 	if (hostid != 0 && (unsigned long)hostid != gethostid() &&
200*95173954Sek 	    stateval == POOL_STATE_ACTIVE)
201*95173954Sek 		return (ZPOOL_STATUS_HOSTID_MISMATCH);
202fa9e4066Sahrens 
203eaca9bbdSeschrock 	/*
204eaca9bbdSeschrock 	 * Newer on-disk version.
205eaca9bbdSeschrock 	 */
206eaca9bbdSeschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
207eaca9bbdSeschrock 	    vs->vs_aux == VDEV_AUX_VERSION_NEWER)
208eaca9bbdSeschrock 		return (ZPOOL_STATUS_VERSION_NEWER);
209eaca9bbdSeschrock 
210fa9e4066Sahrens 	/*
211fa9e4066Sahrens 	 * Check that the config is complete.
212fa9e4066Sahrens 	 */
213fa9e4066Sahrens 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
214ea8dc4b6Seschrock 	    vs->vs_aux == VDEV_AUX_BAD_GUID_SUM)
215fa9e4066Sahrens 		return (ZPOOL_STATUS_BAD_GUID_SUM);
216fa9e4066Sahrens 
217fa9e4066Sahrens 	/*
218ea8dc4b6Seschrock 	 * Missing devices in non-replicated config.
219fa9e4066Sahrens 	 */
220ea8dc4b6Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
221ea8dc4b6Seschrock 	    find_vdev_problem(nvroot, vdev_missing))
222ea8dc4b6Seschrock 		return (ZPOOL_STATUS_MISSING_DEV_NR);
223ea8dc4b6Seschrock 
224ea8dc4b6Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
225ea8dc4b6Seschrock 	    find_vdev_problem(nvroot, vdev_broken))
226ea8dc4b6Seschrock 		return (ZPOOL_STATUS_CORRUPT_LABEL_NR);
227ea8dc4b6Seschrock 
228ea8dc4b6Seschrock 	/*
229ea8dc4b6Seschrock 	 * Corrupted pool metadata
230ea8dc4b6Seschrock 	 */
231ea8dc4b6Seschrock 	if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
232ea8dc4b6Seschrock 	    vs->vs_aux == VDEV_AUX_CORRUPT_DATA)
233ea8dc4b6Seschrock 		return (ZPOOL_STATUS_CORRUPT_POOL);
234fa9e4066Sahrens 
235fa9e4066Sahrens 	/*
236ea8dc4b6Seschrock 	 * Persistent data errors.
237fa9e4066Sahrens 	 */
238ea8dc4b6Seschrock 	if (!isimport) {
239ea8dc4b6Seschrock 		if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_ERRCOUNT,
240ea8dc4b6Seschrock 		    &nerr) == 0 && nerr != 0)
241ea8dc4b6Seschrock 			return (ZPOOL_STATUS_CORRUPT_DATA);
242fa9e4066Sahrens 	}
243fa9e4066Sahrens 
244ea8dc4b6Seschrock 	/*
245ea8dc4b6Seschrock 	 * Missing devices in a replicated config.
246ea8dc4b6Seschrock 	 */
247ea8dc4b6Seschrock 	if (find_vdev_problem(nvroot, vdev_missing))
248ea8dc4b6Seschrock 		return (ZPOOL_STATUS_MISSING_DEV_R);
249ea8dc4b6Seschrock 	if (find_vdev_problem(nvroot, vdev_broken))
250ea8dc4b6Seschrock 		return (ZPOOL_STATUS_CORRUPT_LABEL_R);
251ea8dc4b6Seschrock 
252fa9e4066Sahrens 	/*
253fa9e4066Sahrens 	 * Devices with errors
254fa9e4066Sahrens 	 */
255fa9e4066Sahrens 	if (!isimport && find_vdev_problem(nvroot, vdev_errors))
256fa9e4066Sahrens 		return (ZPOOL_STATUS_FAILING_DEV);
257fa9e4066Sahrens 
258fa9e4066Sahrens 	/*
259fa9e4066Sahrens 	 * Offlined devices
260fa9e4066Sahrens 	 */
261fa9e4066Sahrens 	if (find_vdev_problem(nvroot, vdev_offlined))
262fa9e4066Sahrens 		return (ZPOOL_STATUS_OFFLINE_DEV);
263fa9e4066Sahrens 
264fa9e4066Sahrens 	/*
265fa9e4066Sahrens 	 * Currently resilvering
266fa9e4066Sahrens 	 */
267fa9e4066Sahrens 	if (!vs->vs_scrub_complete && vs->vs_scrub_type == POOL_SCRUB_RESILVER)
268fa9e4066Sahrens 		return (ZPOOL_STATUS_RESILVERING);
269fa9e4066Sahrens 
270fa9e4066Sahrens 	/*
271eaca9bbdSeschrock 	 * Outdated, but usable, version
272fa9e4066Sahrens 	 */
273eaca9bbdSeschrock 	if (version < ZFS_VERSION)
274eaca9bbdSeschrock 		return (ZPOOL_STATUS_VERSION_OLDER);
275fa9e4066Sahrens 
276fa9e4066Sahrens 	return (ZPOOL_STATUS_OK);
277fa9e4066Sahrens }
278fa9e4066Sahrens 
279fa9e4066Sahrens zpool_status_t
280fa9e4066Sahrens zpool_get_status(zpool_handle_t *zhp, char **msgid)
281fa9e4066Sahrens {
28299653d4eSeschrock 	zpool_status_t ret = check_status(zhp->zpool_config, B_FALSE);
283fa9e4066Sahrens 
284fa9e4066Sahrens 	if (ret >= NMSGID)
285fa9e4066Sahrens 		*msgid = NULL;
286fa9e4066Sahrens 	else
287*95173954Sek 		*msgid = zfs_msgid_table_active[ret];
288fa9e4066Sahrens 
289fa9e4066Sahrens 	return (ret);
290fa9e4066Sahrens }
291fa9e4066Sahrens 
292fa9e4066Sahrens zpool_status_t
293fa9e4066Sahrens zpool_import_status(nvlist_t *config, char **msgid)
294fa9e4066Sahrens {
29599653d4eSeschrock 	zpool_status_t ret = check_status(config, B_TRUE);
296fa9e4066Sahrens 
297fa9e4066Sahrens 	if (ret >= NMSGID)
298fa9e4066Sahrens 		*msgid = NULL;
299fa9e4066Sahrens 	else
300*95173954Sek 		*msgid = zfs_msgid_table[ret];
301fa9e4066Sahrens 
302fa9e4066Sahrens 	return (ret);
303fa9e4066Sahrens }
304