spa.c revision 99653d4ee642c6528e88224f12409a5f23060994
1fa9e406ahrens/*
2fa9e406ahrens * CDDL HEADER START
3fa9e406ahrens *
4fa9e406ahrens * The contents of this file are subject to the terms of the
5ea8dc4beschrock * Common Development and Distribution License (the "License").
6ea8dc4beschrock * You may not use this file except in compliance with the License.
7fa9e406ahrens *
8fa9e406ahrens * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e406ahrens * or http://www.opensolaris.org/os/licensing.
10fa9e406ahrens * See the License for the specific language governing permissions
11fa9e406ahrens * and limitations under the License.
12fa9e406ahrens *
13fa9e406ahrens * When distributing Covered Code, include this CDDL HEADER in each
14fa9e406ahrens * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e406ahrens * If applicable, add the following below this CDDL HEADER, with the
16fa9e406ahrens * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e406ahrens * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e406ahrens *
19fa9e406ahrens * CDDL HEADER END
20fa9e406ahrens */
2199653d4eschrock
22fa9e406ahrens/*
23c67d967eschrock * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24fa9e406ahrens * Use is subject to license terms.
25fa9e406ahrens */
26fa9e406ahrens
27fa9e406ahrens#pragma ident	"%Z%%M%	%I%	%E% SMI"
28fa9e406ahrens
29fa9e406ahrens/*
30fa9e406ahrens * This file contains all the routines used when modifying on-disk SPA state.
31fa9e406ahrens * This includes opening, importing, destroying, exporting a pool, and syncing a
32fa9e406ahrens * pool.
33fa9e406ahrens */
34fa9e406ahrens
35fa9e406ahrens#include <sys/zfs_context.h>
36ea8dc4beschrock#include <sys/fm/fs/zfs.h>
37fa9e406ahrens#include <sys/spa_impl.h>
38fa9e406ahrens#include <sys/zio.h>
39fa9e406ahrens#include <sys/zio_checksum.h>
40fa9e406ahrens#include <sys/zio_compress.h>
41fa9e406ahrens#include <sys/dmu.h>
42fa9e406ahrens#include <sys/dmu_tx.h>
43fa9e406ahrens#include <sys/zap.h>
44fa9e406ahrens#include <sys/zil.h>
45fa9e406ahrens#include <sys/vdev_impl.h>
46fa9e406ahrens#include <sys/metaslab.h>
47fa9e406ahrens#include <sys/uberblock_impl.h>
48fa9e406ahrens#include <sys/txg.h>
49fa9e406ahrens#include <sys/avl.h>
50fa9e406ahrens#include <sys/dmu_traverse.h>
51fa9e406ahrens#include <sys/unique.h>
52fa9e406ahrens#include <sys/dsl_pool.h>
53fa9e406ahrens#include <sys/dsl_dir.h>
54fa9e406ahrens#include <sys/dsl_prop.h>
55fa9e406ahrens#include <sys/fs/zfs.h>
56fa9e406ahrens#include <sys/callb.h>
57fa9e406ahrens
58fa9e406ahrens/*
59fa9e406ahrens * ==========================================================================
60fa9e406ahrens * SPA state manipulation (open/create/destroy/import/export)
61fa9e406ahrens * ==========================================================================
62fa9e406ahrens */
63fa9e406ahrens
64ea8dc4beschrockstatic int
65ea8dc4beschrockspa_error_entry_compare(const void *a, const void *b)
66ea8dc4beschrock{
67ea8dc4beschrock	spa_error_entry_t *sa = (spa_error_entry_t *)a;
68ea8dc4beschrock	spa_error_entry_t *sb = (spa_error_entry_t *)b;
69ea8dc4beschrock	int ret;
70ea8dc4beschrock
71ea8dc4beschrock	ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
72ea8dc4beschrock	    sizeof (zbookmark_t));
73ea8dc4beschrock
74ea8dc4beschrock	if (ret < 0)
75ea8dc4beschrock		return (-1);
76ea8dc4beschrock	else if (ret > 0)
77ea8dc4beschrock		return (1);
78ea8dc4beschrock	else
79ea8dc4beschrock		return (0);
80ea8dc4beschrock}
81ea8dc4beschrock
82ea8dc4beschrock/*
83ea8dc4beschrock * Utility function which retrieves copies of the current logs and
84ea8dc4beschrock * re-initializes them in the process.
85ea8dc4beschrock */
86ea8dc4beschrockvoid
87ea8dc4beschrockspa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
88ea8dc4beschrock{
89ea8dc4beschrock	ASSERT(MUTEX_HELD(&spa->spa_errlist_lock));
90ea8dc4beschrock
91ea8dc4beschrock	bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t));
92ea8dc4beschrock	bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t));
93ea8dc4beschrock
94ea8dc4beschrock	avl_create(&spa->spa_errlist_scrub,
95ea8dc4beschrock	    spa_error_entry_compare, sizeof (spa_error_entry_t),
96ea8dc4beschrock	    offsetof(spa_error_entry_t, se_avl));
97ea8dc4beschrock	avl_create(&spa->spa_errlist_last,
98ea8dc4beschrock	    spa_error_entry_compare, sizeof (spa_error_entry_t),
99ea8dc4beschrock	    offsetof(spa_error_entry_t, se_avl));
100ea8dc4beschrock}
101ea8dc4beschrock
102fa9e406ahrens/*
103fa9e406ahrens * Activate an uninitialized pool.
104fa9e406ahrens */
105fa9e406ahrensstatic void
106fa9e406ahrensspa_activate(spa_t *spa)
107fa9e406ahrens{
108fa9e406ahrens	int t;
109fa9e406ahrens
110fa9e406ahrens	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
111fa9e406ahrens
112fa9e406ahrens	spa->spa_state = POOL_STATE_ACTIVE;
113fa9e406ahrens
114fa9e406ahrens	spa->spa_normal_class = metaslab_class_create();
115fa9e406ahrens
116fa9e406ahrens	for (t = 0; t < ZIO_TYPES; t++) {
117fa9e406ahrens		spa->spa_zio_issue_taskq[t] = taskq_create("spa_zio_issue",
118fa9e406ahrens		    8, maxclsyspri, 50, INT_MAX,
119fa9e406ahrens		    TASKQ_PREPOPULATE);
120fa9e406ahrens		spa->spa_zio_intr_taskq[t] = taskq_create("spa_zio_intr",
121fa9e406ahrens		    8, maxclsyspri, 50, INT_MAX,
122fa9e406ahrens		    TASKQ_PREPOPULATE);
123fa9e406ahrens	}
124fa9e406ahrens
125fa9e406ahrens	rw_init(&spa->spa_traverse_lock, NULL, RW_DEFAULT, NULL);
126fa9e406ahrens
127fa9e406ahrens	list_create(&spa->spa_dirty_list, sizeof (vdev_t),
128fa9e406ahrens	    offsetof(vdev_t, vdev_dirty_node));
129fa9e406ahrens
130fa9e406ahrens	txg_list_create(&spa->spa_vdev_txg_list,
131fa9e406ahrens	    offsetof(struct vdev, vdev_txg_node));
132ea8dc4beschrock
133ea8dc4beschrock	avl_create(&spa->spa_errlist_scrub,
134ea8dc4beschrock	    spa_error_entry_compare, sizeof (spa_error_entry_t),
135ea8dc4beschrock	    offsetof(spa_error_entry_t, se_avl));
136ea8dc4beschrock	avl_create(&spa->spa_errlist_last,
137ea8dc4beschrock	    spa_error_entry_compare, sizeof (spa_error_entry_t),
138ea8dc4beschrock	    offsetof(spa_error_entry_t, se_avl));
139fa9e406ahrens}
140fa9e406ahrens
141fa9e406ahrens/*
142fa9e406ahrens * Opposite of spa_activate().
143fa9e406ahrens */
144fa9e406ahrensstatic void
145fa9e406ahrensspa_deactivate(spa_t *spa)
146fa9e406ahrens{
147fa9e406ahrens	int t;
148fa9e406ahrens
149fa9e406ahrens	ASSERT(spa->spa_sync_on == B_FALSE);
150fa9e406ahrens	ASSERT(spa->spa_dsl_pool == NULL);
151fa9e406ahrens	ASSERT(spa->spa_root_vdev == NULL);
152fa9e406ahrens
153fa9e406ahrens	ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
154fa9e406ahrens
155fa9e406ahrens	txg_list_destroy(&spa->spa_vdev_txg_list);
156fa9e406ahrens
157fa9e406ahrens	list_destroy(&spa->spa_dirty_list);
158fa9e406ahrens
159fa9e406ahrens	rw_destroy(&spa->spa_traverse_lock);
160fa9e406ahrens
161fa9e406ahrens	for (t = 0; t < ZIO_TYPES; t++) {
162fa9e406ahrens		taskq_destroy(spa->spa_zio_issue_taskq[t]);
163fa9e406ahrens		taskq_destroy(spa->spa_zio_intr_taskq[t]);
164fa9e406ahrens		spa->spa_zio_issue_taskq[t] = NULL;
165fa9e406ahrens		spa->spa_zio_intr_taskq[t] = NULL;
166fa9e406ahrens	}
167fa9e406ahrens
168fa9e406ahrens	metaslab_class_destroy(spa->spa_normal_class);
169fa9e406ahrens	spa->spa_normal_class = NULL;
170fa9e406ahrens
171ea8dc4beschrock	/*
172ea8dc4beschrock	 * If this was part of an import or the open otherwise failed, we may
173ea8dc4beschrock	 * still have errors left in the queues.  Empty them just in case.
174ea8dc4beschrock	 */
175ea8dc4beschrock	spa_errlog_drain(spa);
176ea8dc4beschrock
177ea8dc4beschrock	avl_destroy(&spa->spa_errlist_scrub);
178ea8dc4beschrock	avl_destroy(&spa->spa_errlist_last);
179ea8dc4beschrock
180fa9e406ahrens	spa->spa_state = POOL_STATE_UNINITIALIZED;
181fa9e406ahrens}
182fa9e406ahrens
183fa9e406ahrens/*
184fa9e406ahrens * Verify a pool configuration, and construct the vdev tree appropriately.  This
185fa9e406ahrens * will create all the necessary vdevs in the appropriate layout, with each vdev
186fa9e406ahrens * in the CLOSED state.  This will prep the pool before open/creation/import.
187fa9e406ahrens * All vdev validation is done by the vdev_alloc() routine.
188fa9e406ahrens */
18999653d4eschrockstatic int
19099653d4eschrockspa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
19199653d4eschrock    uint_t id, int atype)
192fa9e406ahrens{
193fa9e406ahrens	nvlist_t **child;
194fa9e406ahrens	uint_t c, children;
19599653d4eschrock	int error;
196fa9e406ahrens
19799653d4eschrock	if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
19899653d4eschrock		return (error);
199fa9e406ahrens
20099653d4eschrock	if ((*vdp)->vdev_ops->vdev_op_leaf)
20199653d4eschrock		return (0);
202fa9e406ahrens
203fa9e406ahrens	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
204fa9e406ahrens	    &child, &children) != 0) {
20599653d4eschrock		vdev_free(*vdp);
20699653d4eschrock		*vdp = NULL;
20799653d4eschrock		return (EINVAL);
208fa9e406ahrens	}
209fa9e406ahrens
210fa9e406ahrens	for (c = 0; c < children; c++) {
21199653d4eschrock		vdev_t *vd;
21299653d4eschrock		if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
21399653d4eschrock		    atype)) != 0) {
21499653d4eschrock			vdev_free(*vdp);
21599653d4eschrock			*vdp = NULL;
21699653d4eschrock			return (error);
217fa9e406ahrens		}
218fa9e406ahrens	}
219fa9e406ahrens
22099653d4eschrock	ASSERT(*vdp != NULL);
22199653d4eschrock
22299653d4eschrock	return (0);
223fa9e406ahrens}
224fa9e406ahrens
225fa9e406ahrens/*
226fa9e406ahrens * Opposite of spa_load().
227fa9e406ahrens */
228fa9e406ahrensstatic void
229fa9e406ahrensspa_unload(spa_t *spa)
230fa9e406ahrens{
23199653d4eschrock	int i;
23299653d4eschrock
233fa9e406ahrens	/*
234ea8dc4beschrock	 * Stop async tasks.
235ea8dc4beschrock	 */
236ea8dc4beschrock	spa_async_suspend(spa);
237ea8dc4beschrock
238ea8dc4beschrock	/*
239fa9e406ahrens	 * Stop syncing.
240fa9e406ahrens	 */
241fa9e406ahrens	if (spa->spa_sync_on) {
242fa9e406ahrens		txg_sync_stop(spa->spa_dsl_pool);
243fa9e406ahrens		spa->spa_sync_on = B_FALSE;
244fa9e406ahrens	}
245fa9e406ahrens
246fa9e406ahrens	/*
247fa9e406ahrens	 * Wait for any outstanding prefetch I/O to complete.
248fa9e406ahrens	 */
249ea8dc4beschrock	spa_config_enter(spa, RW_WRITER, FTAG);
250ea8dc4beschrock	spa_config_exit(spa, FTAG);
251fa9e406ahrens
252fa9e406ahrens	/*
253fa9e406ahrens	 * Close the dsl pool.
254fa9e406ahrens	 */
255fa9e406ahrens	if (spa->spa_dsl_pool) {
256fa9e406ahrens		dsl_pool_close(spa->spa_dsl_pool);
257fa9e406ahrens		spa->spa_dsl_pool = NULL;
258fa9e406ahrens	}
259fa9e406ahrens
260fa9e406ahrens	/*
261fa9e406ahrens	 * Close all vdevs.
262fa9e406ahrens	 */
2630e34b6abonwick	if (spa->spa_root_vdev)
264fa9e406ahrens		vdev_free(spa->spa_root_vdev);
2650e34b6abonwick	ASSERT(spa->spa_root_vdev == NULL);
266ea8dc4beschrock
26799653d4eschrock	for (i = 0; i < spa->spa_nspares; i++)
26899653d4eschrock		vdev_free(spa->spa_spares[i]);
26999653d4eschrock	if (spa->spa_spares) {
27099653d4eschrock		kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *));
27199653d4eschrock		spa->spa_spares = NULL;
27299653d4eschrock	}
27399653d4eschrock	if (spa->spa_sparelist) {
27499653d4eschrock		nvlist_free(spa->spa_sparelist);
27599653d4eschrock		spa->spa_sparelist = NULL;
27699653d4eschrock	}
27799653d4eschrock
278ea8dc4beschrock	spa->spa_async_suspended = 0;
279fa9e406ahrens}
280fa9e406ahrens
281fa9e406ahrens/*
28299653d4eschrock * Load (or re-load) the current list of vdevs describing the active spares for
28399653d4eschrock * this pool.  When this is called, we have some form of basic information in
28499653d4eschrock * 'spa_sparelist'.  We parse this into vdevs, try to open them, and then
28599653d4eschrock * re-generate a more complete list including status information.
28699653d4eschrock */
28799653d4eschrockstatic void
28899653d4eschrockspa_load_spares(spa_t *spa)
28999653d4eschrock{
29099653d4eschrock	nvlist_t **spares;
29199653d4eschrock	uint_t nspares;
29299653d4eschrock	int i;
29399653d4eschrock
29499653d4eschrock	/*
29599653d4eschrock	 * First, close and free any existing spare vdevs.
29699653d4eschrock	 */
29799653d4eschrock	for (i = 0; i < spa->spa_nspares; i++) {
29899653d4eschrock		vdev_close(spa->spa_spares[i]);
29999653d4eschrock		vdev_free(spa->spa_spares[i]);
30099653d4eschrock	}
30199653d4eschrock	if (spa->spa_spares)
30299653d4eschrock		kmem_free(spa->spa_spares, spa->spa_nspares * sizeof (void *));
30399653d4eschrock
30499653d4eschrock	if (spa->spa_sparelist == NULL)
30599653d4eschrock		nspares = 0;
30699653d4eschrock	else
30799653d4eschrock		VERIFY(nvlist_lookup_nvlist_array(spa->spa_sparelist,
30899653d4eschrock		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
30999653d4eschrock
31099653d4eschrock	spa->spa_nspares = (int)nspares;
31199653d4eschrock	spa->spa_spares = NULL;
31299653d4eschrock
31399653d4eschrock	if (nspares == 0)
31499653d4eschrock		return;
31599653d4eschrock
31699653d4eschrock	/*
31799653d4eschrock	 * Construct the array of vdevs, opening them to get status in the
31899653d4eschrock	 * process.
31999653d4eschrock	 */
32099653d4eschrock	spa->spa_spares = kmem_alloc(nspares * sizeof (void *), KM_SLEEP);
32199653d4eschrock	for (i = 0; i < spa->spa_nspares; i++) {
32299653d4eschrock		vdev_t *vd;
32399653d4eschrock
32499653d4eschrock		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
32599653d4eschrock		    VDEV_ALLOC_SPARE) == 0);
32699653d4eschrock		ASSERT(vd != NULL);
32799653d4eschrock
32899653d4eschrock		spa->spa_spares[i] = vd;
32999653d4eschrock
33099653d4eschrock		if (vdev_open(vd) != 0)
33199653d4eschrock			continue;
33299653d4eschrock
33399653d4eschrock		vd->vdev_top = vd;
33499653d4eschrock		(void) vdev_validate_spare(vd);
33599653d4eschrock	}
33699653d4eschrock
33799653d4eschrock	/*
33899653d4eschrock	 * Recompute the stashed list of spares, with status information
33999653d4eschrock	 * this time.
34099653d4eschrock	 */
34199653d4eschrock	VERIFY(nvlist_remove(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
34299653d4eschrock	    DATA_TYPE_NVLIST_ARRAY) == 0);
34399653d4eschrock
34499653d4eschrock	spares = kmem_alloc(spa->spa_nspares * sizeof (void *), KM_SLEEP);
34599653d4eschrock	for (i = 0; i < spa->spa_nspares; i++)
34699653d4eschrock		spares[i] = vdev_config_generate(spa, spa->spa_spares[i],
34799653d4eschrock		    B_TRUE, B_TRUE);
34899653d4eschrock	VERIFY(nvlist_add_nvlist_array(spa->spa_sparelist, ZPOOL_CONFIG_SPARES,
34999653d4eschrock	    spares, spa->spa_nspares) == 0);
35099653d4eschrock	for (i = 0; i < spa->spa_nspares; i++)
35199653d4eschrock		nvlist_free(spares[i]);
35299653d4eschrock	kmem_free(spares, spa->spa_nspares * sizeof (void *));
35399653d4eschrock}
35499653d4eschrock
35599653d4eschrockstatic int
35699653d4eschrockload_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
35799653d4eschrock{
35899653d4eschrock	dmu_buf_t *db;
35999653d4eschrock	char *packed = NULL;
36099653d4eschrock	size_t nvsize = 0;
36199653d4eschrock	int error;
36299653d4eschrock	*value = NULL;
36399653d4eschrock
36499653d4eschrock	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
36599653d4eschrock	nvsize = *(uint64_t *)db->db_data;
36699653d4eschrock	dmu_buf_rele(db, FTAG);
36799653d4eschrock
36899653d4eschrock	packed = kmem_alloc(nvsize, KM_SLEEP);
36999653d4eschrock	error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed);
37099653d4eschrock	if (error == 0)
37199653d4eschrock		error = nvlist_unpack(packed, nvsize, value, 0);
37299653d4eschrock	kmem_free(packed, nvsize);
37399653d4eschrock
37499653d4eschrock	return (error);
37599653d4eschrock}
37699653d4eschrock
37799653d4eschrock/*
378fa9e406ahrens * Load an existing storage pool, using the pool's builtin spa_config as a
379ea8dc4beschrock * source of configuration information.
380fa9e406ahrens */
381fa9e406ahrensstatic int
382ea8dc4beschrockspa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
383fa9e406ahrens{
384fa9e406ahrens	int error = 0;
385fa9e406ahrens	nvlist_t *nvroot = NULL;
386fa9e406ahrens	vdev_t *rvd;
387fa9e406ahrens	uberblock_t *ub = &spa->spa_uberblock;
3880373e76bonwick	uint64_t config_cache_txg = spa->spa_config_txg;
389fa9e406ahrens	uint64_t pool_guid;
39099653d4eschrock	uint64_t version;
391fa9e406ahrens	zio_t *zio;
392fa9e406ahrens
393ea8dc4beschrock	spa->spa_load_state = state;
3940373e76bonwick
395fa9e406ahrens	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) ||
396a9926bfbonwick	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) {
397ea8dc4beschrock		error = EINVAL;
398ea8dc4beschrock		goto out;
399ea8dc4beschrock	}
400fa9e406ahrens
40199653d4eschrock	/*
40299653d4eschrock	 * Versioning wasn't explicitly added to the label until later, so if
40399653d4eschrock	 * it's not present treat it as the initial version.
40499653d4eschrock	 */
40599653d4eschrock	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0)
40699653d4eschrock		version = ZFS_VERSION_INITIAL;
40799653d4eschrock
408a9926bfbonwick	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
409a9926bfbonwick	    &spa->spa_config_txg);
410a9926bfbonwick
4110373e76bonwick	if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
412ea8dc4beschrock	    spa_guid_exists(pool_guid, 0)) {
413ea8dc4beschrock		error = EEXIST;
414ea8dc4beschrock		goto out;
415ea8dc4beschrock	}
416fa9e406ahrens
417fa9e406ahrens	/*
41899653d4eschrock	 * Parse the configuration into a vdev tree.  We explicitly set the
41999653d4eschrock	 * value that will be returned by spa_version() since parsing the
42099653d4eschrock	 * configuration requires knowing the version number.
421fa9e406ahrens	 */
422ea8dc4beschrock	spa_config_enter(spa, RW_WRITER, FTAG);
42399653d4eschrock	spa->spa_ubsync.ub_version = version;
42499653d4eschrock	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD);
425ea8dc4beschrock	spa_config_exit(spa, FTAG);
426fa9e406ahrens
42799653d4eschrock	if (error != 0)
428ea8dc4beschrock		goto out;
429fa9e406ahrens
4300e34b6abonwick	ASSERT(spa->spa_root_vdev == rvd);
431fa9e406ahrens	ASSERT(spa_guid(spa) == pool_guid);
432fa9e406ahrens
433fa9e406ahrens	/*
434fa9e406ahrens	 * Try to open all vdevs, loading each label in the process.
435fa9e406ahrens	 */
436ea8dc4beschrock	if (vdev_open(rvd) != 0) {
437ea8dc4beschrock		error = ENXIO;
438ea8dc4beschrock		goto out;
439ea8dc4beschrock	}
440fa9e406ahrens
441fa9e406ahrens	/*
442560e6e9eschrock	 * Validate the labels for all leaf vdevs.  We need to grab the config
443560e6e9eschrock	 * lock because all label I/O is done with the ZIO_FLAG_CONFIG_HELD
444560e6e9eschrock	 * flag.
445560e6e9eschrock	 */
446560e6e9eschrock	spa_config_enter(spa, RW_READER, FTAG);
447560e6e9eschrock	error = vdev_validate(rvd);
448560e6e9eschrock	spa_config_exit(spa, FTAG);
449560e6e9eschrock
450560e6e9eschrock	if (error != 0) {
451560e6e9eschrock		error = EBADF;
452560e6e9eschrock		goto out;
453560e6e9eschrock	}
454560e6e9eschrock
455560e6e9eschrock	if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) {
456560e6e9eschrock		error = ENXIO;
457560e6e9eschrock		goto out;
458560e6e9eschrock	}
459560e6e9eschrock
460560e6e9eschrock	/*
461fa9e406ahrens	 * Find the best uberblock.
462fa9e406ahrens	 */
463fa9e406ahrens	bzero(ub, sizeof (uberblock_t));
464fa9e406ahrens
465fa9e406ahrens	zio = zio_root(spa, NULL, NULL,
466fa9e406ahrens	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
467fa9e406ahrens	vdev_uberblock_load(zio, rvd, ub);
468fa9e406ahrens	error = zio_wait(zio);
469fa9e406ahrens
470fa9e406ahrens	/*
471fa9e406ahrens	 * If we weren't able to find a single valid uberblock, return failure.
472fa9e406ahrens	 */
473fa9e406ahrens	if (ub->ub_txg == 0) {
474eaca9bbeschrock		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
475eaca9bbeschrock		    VDEV_AUX_CORRUPT_DATA);
476ea8dc4beschrock		error = ENXIO;
477ea8dc4beschrock		goto out;
478ea8dc4beschrock	}
479ea8dc4beschrock
480ea8dc4beschrock	/*
481ea8dc4beschrock	 * If the pool is newer than the code, we can't open it.
482ea8dc4beschrock	 */
483eaca9bbeschrock	if (ub->ub_version > ZFS_VERSION) {
484eaca9bbeschrock		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
485eaca9bbeschrock		    VDEV_AUX_VERSION_NEWER);
486ea8dc4beschrock		error = ENOTSUP;
487