xref: /illumos-gate/usr/src/uts/common/fs/zfs/spa.c (revision 80eb36f241abf8c076119fb4c49a55fd61ebc710)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
2199653d4eSeschrock 
22fa9e4066Sahrens /*
23379c004dSEric Schrock  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24fa9e4066Sahrens  * Use is subject to license terms.
25fa9e4066Sahrens  */
26fa9e4066Sahrens 
27fa9e4066Sahrens /*
28fa9e4066Sahrens  * This file contains all the routines used when modifying on-disk SPA state.
29fa9e4066Sahrens  * This includes opening, importing, destroying, exporting a pool, and syncing a
30fa9e4066Sahrens  * pool.
31fa9e4066Sahrens  */
32fa9e4066Sahrens 
33fa9e4066Sahrens #include <sys/zfs_context.h>
34ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h>
35fa9e4066Sahrens #include <sys/spa_impl.h>
36fa9e4066Sahrens #include <sys/zio.h>
37fa9e4066Sahrens #include <sys/zio_checksum.h>
38fa9e4066Sahrens #include <sys/dmu.h>
39fa9e4066Sahrens #include <sys/dmu_tx.h>
40fa9e4066Sahrens #include <sys/zap.h>
41fa9e4066Sahrens #include <sys/zil.h>
42b24ab676SJeff Bonwick #include <sys/ddt.h>
43fa9e4066Sahrens #include <sys/vdev_impl.h>
44fa9e4066Sahrens #include <sys/metaslab.h>
4588ecc943SGeorge Wilson #include <sys/metaslab_impl.h>
46fa9e4066Sahrens #include <sys/uberblock_impl.h>
47fa9e4066Sahrens #include <sys/txg.h>
48fa9e4066Sahrens #include <sys/avl.h>
49fa9e4066Sahrens #include <sys/dmu_traverse.h>
50b1b8ab34Slling #include <sys/dmu_objset.h>
51fa9e4066Sahrens #include <sys/unique.h>
52fa9e4066Sahrens #include <sys/dsl_pool.h>
53b1b8ab34Slling #include <sys/dsl_dataset.h>
54fa9e4066Sahrens #include <sys/dsl_dir.h>
55fa9e4066Sahrens #include <sys/dsl_prop.h>
56b1b8ab34Slling #include <sys/dsl_synctask.h>
57fa9e4066Sahrens #include <sys/fs/zfs.h>
58fa94a07fSbrendan #include <sys/arc.h>
59fa9e4066Sahrens #include <sys/callb.h>
6095173954Sek #include <sys/systeminfo.h>
61e7cbe64fSgw #include <sys/spa_boot.h>
62573ca77eSGeorge Wilson #include <sys/zfs_ioctl.h>
63fa9e4066Sahrens 
645679c89fSjv #ifdef	_KERNEL
655679c89fSjv #include <sys/zone.h>
66dedec472SJack Meng #include <sys/bootprops.h>
675679c89fSjv #endif	/* _KERNEL */
685679c89fSjv 
69990b4856Slling #include "zfs_prop.h"
70b7b97454Sperrin #include "zfs_comutil.h"
71990b4856Slling 
722e0c549eSJonathan Adams enum zti_modes {
732e0c549eSJonathan Adams 	zti_mode_fixed,			/* value is # of threads (min 1) */
742e0c549eSJonathan Adams 	zti_mode_online_percent,	/* value is % of online CPUs */
752e0c549eSJonathan Adams 	zti_mode_tune,			/* fill from zio_taskq_tune_* */
76*80eb36f2SGeorge Wilson 	zti_mode_null,			/* don't create a taskq */
772e0c549eSJonathan Adams 	zti_nmodes
78e14bb325SJeff Bonwick };
79416e0cd8Sek 
80*80eb36f2SGeorge Wilson #define	ZTI_FIX(n)	{ zti_mode_fixed, (n) }
81*80eb36f2SGeorge Wilson #define	ZTI_PCT(n)	{ zti_mode_online_percent, (n) }
82*80eb36f2SGeorge Wilson #define	ZTI_TUNE	{ zti_mode_tune, 0 }
83*80eb36f2SGeorge Wilson #define	ZTI_NULL	{ zti_mode_null, 0 }
842e0c549eSJonathan Adams 
85*80eb36f2SGeorge Wilson #define	ZTI_ONE		ZTI_FIX(1)
862e0c549eSJonathan Adams 
872e0c549eSJonathan Adams typedef struct zio_taskq_info {
88*80eb36f2SGeorge Wilson 	enum zti_modes zti_mode;
89*80eb36f2SGeorge Wilson 	uint_t zti_value;
902e0c549eSJonathan Adams } zio_taskq_info_t;
912e0c549eSJonathan Adams 
922e0c549eSJonathan Adams static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
93*80eb36f2SGeorge Wilson 		"issue", "issue_high", "intr", "intr_high"
942e0c549eSJonathan Adams };
952e0c549eSJonathan Adams 
96*80eb36f2SGeorge Wilson /*
97*80eb36f2SGeorge Wilson  * Define the taskq threads for the following I/O types:
98*80eb36f2SGeorge Wilson  * 	NULL, READ, WRITE, FREE, CLAIM, and IOCTL
99*80eb36f2SGeorge Wilson  */
100*80eb36f2SGeorge Wilson const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
101*80eb36f2SGeorge Wilson 	/* ISSUE	ISSUE_HIGH	INTR		INTR_HIGH */
102*80eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
103*80eb36f2SGeorge Wilson 	{ ZTI_FIX(8),	ZTI_NULL,	ZTI_TUNE,	ZTI_NULL },
104*80eb36f2SGeorge Wilson 	{ ZTI_TUNE,	ZTI_FIX(5),	ZTI_FIX(8),	ZTI_FIX(5) },
105*80eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
106*80eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
107*80eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
1082e0c549eSJonathan Adams };
1092e0c549eSJonathan Adams 
1102e0c549eSJonathan Adams enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent;
1112e0c549eSJonathan Adams uint_t zio_taskq_tune_value = 80;	/* #threads = 80% of # online CPUs */
1122e0c549eSJonathan Adams 
113990b4856Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx);
11489a89ebfSlling static boolean_t spa_has_active_shared_spare(spa_t *spa);
115990b4856Slling 
116990b4856Slling /*
117990b4856Slling  * ==========================================================================
118990b4856Slling  * SPA properties routines
119990b4856Slling  * ==========================================================================
120990b4856Slling  */
121990b4856Slling 
122990b4856Slling /*
123990b4856Slling  * Add a (source=src, propname=propval) list to an nvlist.
124990b4856Slling  */
1259d82f4f6Slling static void
126990b4856Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
127990b4856Slling     uint64_t intval, zprop_source_t src)
128990b4856Slling {
129990b4856Slling 	const char *propname = zpool_prop_to_name(prop);
130990b4856Slling 	nvlist_t *propval;
131990b4856Slling 
1329d82f4f6Slling 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1339d82f4f6Slling 	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
134990b4856Slling 
1359d82f4f6Slling 	if (strval != NULL)
1369d82f4f6Slling 		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
1379d82f4f6Slling 	else
1389d82f4f6Slling 		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
139990b4856Slling 
1409d82f4f6Slling 	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
141990b4856Slling 	nvlist_free(propval);
142990b4856Slling }
143990b4856Slling 
144990b4856Slling /*
145990b4856Slling  * Get property values from the spa configuration.
146990b4856Slling  */
1479d82f4f6Slling static void
148990b4856Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
149990b4856Slling {
150379c004dSEric Schrock 	uint64_t size;
151485bbbf5SGeorge Wilson 	uint64_t alloc;
152990b4856Slling 	uint64_t cap, version;
153990b4856Slling 	zprop_source_t src = ZPROP_SRC_NONE;
154c5904d13Seschrock 	spa_config_dirent_t *dp;
155990b4856Slling 
156e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa->spa_props_lock));
157e14bb325SJeff Bonwick 
158379c004dSEric Schrock 	if (spa->spa_root_vdev != NULL) {
159485bbbf5SGeorge Wilson 		alloc = metaslab_class_get_alloc(spa_normal_class(spa));
160b24ab676SJeff Bonwick 		size = metaslab_class_get_space(spa_normal_class(spa));
161379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
162379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
163485bbbf5SGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
164485bbbf5SGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
165485bbbf5SGeorge Wilson 		    size - alloc, src);
166379c004dSEric Schrock 
167485bbbf5SGeorge Wilson 		cap = (size == 0) ? 0 : (alloc * 100 / size);
168379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
169379c004dSEric Schrock 
170b24ab676SJeff Bonwick 		spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
171b24ab676SJeff Bonwick 		    ddt_get_pool_dedup_ratio(spa), src);
172b24ab676SJeff Bonwick 
173379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
174379c004dSEric Schrock 		    spa->spa_root_vdev->vdev_state, src);
175379c004dSEric Schrock 
176379c004dSEric Schrock 		version = spa_version(spa);
177379c004dSEric Schrock 		if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
178379c004dSEric Schrock 			src = ZPROP_SRC_DEFAULT;
179379c004dSEric Schrock 		else
180379c004dSEric Schrock 			src = ZPROP_SRC_LOCAL;
181379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
182379c004dSEric Schrock 	}
183990b4856Slling 
1849d82f4f6Slling 	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
185990b4856Slling 
1869d82f4f6Slling 	if (spa->spa_root != NULL)
1879d82f4f6Slling 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
1889d82f4f6Slling 		    0, ZPROP_SRC_LOCAL);
189990b4856Slling 
190c5904d13Seschrock 	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
191c5904d13Seschrock 		if (dp->scd_path == NULL) {
1929d82f4f6Slling 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
193c5904d13Seschrock 			    "none", 0, ZPROP_SRC_LOCAL);
194c5904d13Seschrock 		} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
1959d82f4f6Slling 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
196c5904d13Seschrock 			    dp->scd_path, 0, ZPROP_SRC_LOCAL);
1972f8aaab3Seschrock 		}
1982f8aaab3Seschrock 	}
199990b4856Slling }
200990b4856Slling 
201990b4856Slling /*
202990b4856Slling  * Get zpool property values.
203990b4856Slling  */
204990b4856Slling int
205990b4856Slling spa_prop_get(spa_t *spa, nvlist_t **nvp)
206990b4856Slling {
207b24ab676SJeff Bonwick 	objset_t *mos = spa->spa_meta_objset;
208990b4856Slling 	zap_cursor_t zc;
209990b4856Slling 	zap_attribute_t za;
210990b4856Slling 	int err;
211990b4856Slling 
2129d82f4f6Slling 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
213990b4856Slling 
214e14bb325SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);
215e14bb325SJeff Bonwick 
216990b4856Slling 	/*
217990b4856Slling 	 * Get properties from the spa config.
218990b4856Slling 	 */
2199d82f4f6Slling 	spa_prop_get_config(spa, nvp);
220990b4856Slling 
221990b4856Slling 	/* If no pool property object, no more prop to get. */
222990b4856Slling 	if (spa->spa_pool_props_object == 0) {
223990b4856Slling 		mutex_exit(&spa->spa_props_lock);
224990b4856Slling 		return (0);
225990b4856Slling 	}
226990b4856Slling 
227990b4856Slling 	/*
228990b4856Slling 	 * Get properties from the MOS pool property object.
229990b4856Slling 	 */
230990b4856Slling 	for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
231990b4856Slling 	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
232990b4856Slling 	    zap_cursor_advance(&zc)) {
233990b4856Slling 		uint64_t intval = 0;
234990b4856Slling 		char *strval = NULL;
235990b4856Slling 		zprop_source_t src = ZPROP_SRC_DEFAULT;
236990b4856Slling 		zpool_prop_t prop;
237990b4856Slling 
238990b4856Slling 		if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL)
239990b4856Slling 			continue;
240990b4856Slling 
241990b4856Slling 		switch (za.za_integer_length) {
242990b4856Slling 		case 8:
243990b4856Slling 			/* integer property */
244990b4856Slling 			if (za.za_first_integer !=
245990b4856Slling 			    zpool_prop_default_numeric(prop))
246990b4856Slling 				src = ZPROP_SRC_LOCAL;
247990b4856Slling 
248990b4856Slling 			if (prop == ZPOOL_PROP_BOOTFS) {
249990b4856Slling 				dsl_pool_t *dp;
250990b4856Slling 				dsl_dataset_t *ds = NULL;
251990b4856Slling 
252990b4856Slling 				dp = spa_get_dsl(spa);
253990b4856Slling 				rw_enter(&dp->dp_config_rwlock, RW_READER);
254745cd3c5Smaybee 				if (err = dsl_dataset_hold_obj(dp,
255745cd3c5Smaybee 				    za.za_first_integer, FTAG, &ds)) {
256990b4856Slling 					rw_exit(&dp->dp_config_rwlock);
257990b4856Slling 					break;
258990b4856Slling 				}
259990b4856Slling 
260990b4856Slling 				strval = kmem_alloc(
261990b4856Slling 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1,
262990b4856Slling 				    KM_SLEEP);
263990b4856Slling 				dsl_dataset_name(ds, strval);
264745cd3c5Smaybee 				dsl_dataset_rele(ds, FTAG);
265990b4856Slling 				rw_exit(&dp->dp_config_rwlock);
266990b4856Slling 			} else {
267990b4856Slling 				strval = NULL;
268990b4856Slling 				intval = za.za_first_integer;
269990b4856Slling 			}
270990b4856Slling 
2719d82f4f6Slling 			spa_prop_add_list(*nvp, prop, strval, intval, src);
272990b4856Slling 
273990b4856Slling 			if (strval != NULL)
274990b4856Slling 				kmem_free(strval,
275990b4856Slling 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
276990b4856Slling 
277990b4856Slling 			break;
278990b4856Slling 
279990b4856Slling 		case 1:
280990b4856Slling 			/* string property */
281990b4856Slling 			strval = kmem_alloc(za.za_num_integers, KM_SLEEP);
282990b4856Slling 			err = zap_lookup(mos, spa->spa_pool_props_object,
283990b4856Slling 			    za.za_name, 1, za.za_num_integers, strval);
284990b4856Slling 			if (err) {
285990b4856Slling 				kmem_free(strval, za.za_num_integers);
286990b4856Slling 				break;
287990b4856Slling 			}
2889d82f4f6Slling 			spa_prop_add_list(*nvp, prop, strval, 0, src);
289990b4856Slling 			kmem_free(strval, za.za_num_integers);
290990b4856Slling 			break;
291990b4856Slling 
292990b4856Slling 		default:
293990b4856Slling 			break;
294990b4856Slling 		}
295990b4856Slling 	}
296990b4856Slling 	zap_cursor_fini(&zc);
297990b4856Slling 	mutex_exit(&spa->spa_props_lock);
298990b4856Slling out:
299990b4856Slling 	if (err && err != ENOENT) {
300990b4856Slling 		nvlist_free(*nvp);
3019d82f4f6Slling 		*nvp = NULL;
302990b4856Slling 		return (err);
303990b4856Slling 	}
304990b4856Slling 
305990b4856Slling 	return (0);
306990b4856Slling }
307990b4856Slling 
308990b4856Slling /*
309990b4856Slling  * Validate the given pool properties nvlist and modify the list
310990b4856Slling  * for the property values to be set.
311990b4856Slling  */
312990b4856Slling static int
313990b4856Slling spa_prop_validate(spa_t *spa, nvlist_t *props)
314990b4856Slling {
315990b4856Slling 	nvpair_t *elem;
316990b4856Slling 	int error = 0, reset_bootfs = 0;
317990b4856Slling 	uint64_t objnum;
318990b4856Slling 
319990b4856Slling 	elem = NULL;
320990b4856Slling 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
321990b4856Slling 		zpool_prop_t prop;
322990b4856Slling 		char *propname, *strval;
323990b4856Slling 		uint64_t intval;
324990b4856Slling 		objset_t *os;
3252f8aaab3Seschrock 		char *slash;
326990b4856Slling 
327990b4856Slling 		propname = nvpair_name(elem);
328990b4856Slling 
329990b4856Slling 		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
330990b4856Slling 			return (EINVAL);
331990b4856Slling 
332990b4856Slling 		switch (prop) {
333990b4856Slling 		case ZPOOL_PROP_VERSION:
334990b4856Slling 			error = nvpair_value_uint64(elem, &intval);
335990b4856Slling 			if (!error &&
336990b4856Slling 			    (intval < spa_version(spa) || intval > SPA_VERSION))
337990b4856Slling 				error = EINVAL;
338990b4856Slling 			break;
339990b4856Slling 
340990b4856Slling 		case ZPOOL_PROP_DELEGATION:
341990b4856Slling 		case ZPOOL_PROP_AUTOREPLACE:
342d5b5bb25SRich Morris 		case ZPOOL_PROP_LISTSNAPS:
343573ca77eSGeorge Wilson 		case ZPOOL_PROP_AUTOEXPAND:
344990b4856Slling 			error = nvpair_value_uint64(elem, &intval);
345990b4856Slling 			if (!error && intval > 1)
346990b4856Slling 				error = EINVAL;
347990b4856Slling 			break;
348990b4856Slling 
349990b4856Slling 		case ZPOOL_PROP_BOOTFS:
35025f89ee2SJeff Bonwick 			/*
35125f89ee2SJeff Bonwick 			 * If the pool version is less than SPA_VERSION_BOOTFS,
35225f89ee2SJeff Bonwick 			 * or the pool is still being created (version == 0),
35325f89ee2SJeff Bonwick 			 * the bootfs property cannot be set.
35425f89ee2SJeff Bonwick 			 */
355990b4856Slling 			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
356990b4856Slling 				error = ENOTSUP;
357990b4856Slling 				break;
358990b4856Slling 			}
359990b4856Slling 
360990b4856Slling 			/*
36115e6edf1Sgw 			 * Make sure the vdev config is bootable
362990b4856Slling 			 */
36315e6edf1Sgw 			if (!vdev_is_bootable(spa->spa_root_vdev)) {
364990b4856Slling 				error = ENOTSUP;
365990b4856Slling 				break;
366990b4856Slling 			}
367990b4856Slling 
368990b4856Slling 			reset_bootfs = 1;
369990b4856Slling 
370990b4856Slling 			error = nvpair_value_string(elem, &strval);
371990b4856Slling 
372990b4856Slling 			if (!error) {
37315e6edf1Sgw 				uint64_t compress;
37415e6edf1Sgw 
375990b4856Slling 				if (strval == NULL || strval[0] == '\0') {
376990b4856Slling 					objnum = zpool_prop_default_numeric(
377990b4856Slling 					    ZPOOL_PROP_BOOTFS);
378990b4856Slling 					break;
379990b4856Slling 				}
380990b4856Slling 
381503ad85cSMatthew Ahrens 				if (error = dmu_objset_hold(strval, FTAG, &os))
382990b4856Slling 					break;
38315e6edf1Sgw 
384503ad85cSMatthew Ahrens 				/* Must be ZPL and not gzip compressed. */
385503ad85cSMatthew Ahrens 
386503ad85cSMatthew Ahrens 				if (dmu_objset_type(os) != DMU_OST_ZFS) {
387503ad85cSMatthew Ahrens 					error = ENOTSUP;
388503ad85cSMatthew Ahrens 				} else if ((error = dsl_prop_get_integer(strval,
38915e6edf1Sgw 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
39015e6edf1Sgw 				    &compress, NULL)) == 0 &&
39115e6edf1Sgw 				    !BOOTFS_COMPRESS_VALID(compress)) {
39215e6edf1Sgw 					error = ENOTSUP;
39315e6edf1Sgw 				} else {
39415e6edf1Sgw 					objnum = dmu_objset_id(os);
39515e6edf1Sgw 				}
396503ad85cSMatthew Ahrens 				dmu_objset_rele(os, FTAG);
397990b4856Slling 			}
398990b4856Slling 			break;
399e14bb325SJeff Bonwick 
4000a4e9518Sgw 		case ZPOOL_PROP_FAILUREMODE:
4010a4e9518Sgw 			error = nvpair_value_uint64(elem, &intval);
4020a4e9518Sgw 			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
4030a4e9518Sgw 			    intval > ZIO_FAILURE_MODE_PANIC))
4040a4e9518Sgw 				error = EINVAL;
4050a4e9518Sgw 
4060a4e9518Sgw 			/*
4070a4e9518Sgw 			 * This is a special case which only occurs when
4080a4e9518Sgw 			 * the pool has completely failed. This allows
4090a4e9518Sgw 			 * the user to change the in-core failmode property
4100a4e9518Sgw 			 * without syncing it out to disk (I/Os might
4110a4e9518Sgw 			 * currently be blocked). We do this by returning
4120a4e9518Sgw 			 * EIO to the caller (spa_prop_set) to trick it
4130a4e9518Sgw 			 * into thinking we encountered a property validation
4140a4e9518Sgw 			 * error.
4150a4e9518Sgw 			 */
416e14bb325SJeff Bonwick 			if (!error && spa_suspended(spa)) {
4170a4e9518Sgw 				spa->spa_failmode = intval;
4180a4e9518Sgw 				error = EIO;
4190a4e9518Sgw 			}
4200a4e9518Sgw 			break;
4212f8aaab3Seschrock 
4222f8aaab3Seschrock 		case ZPOOL_PROP_CACHEFILE:
4232f8aaab3Seschrock 			if ((error = nvpair_value_string(elem, &strval)) != 0)
4242f8aaab3Seschrock 				break;
4252f8aaab3Seschrock 
4262f8aaab3Seschrock 			if (strval[0] == '\0')
4272f8aaab3Seschrock 				break;
4282f8aaab3Seschrock 
4292f8aaab3Seschrock 			if (strcmp(strval, "none") == 0)
4302f8aaab3Seschrock 				break;
4312f8aaab3Seschrock 
4322f8aaab3Seschrock 			if (strval[0] != '/') {
4332f8aaab3Seschrock 				error = EINVAL;
4342f8aaab3Seschrock 				break;
4352f8aaab3Seschrock 			}
4362f8aaab3Seschrock 
4372f8aaab3Seschrock 			slash = strrchr(strval, '/');
4382f8aaab3Seschrock 			ASSERT(slash != NULL);
4392f8aaab3Seschrock 
4402f8aaab3Seschrock 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
4412f8aaab3Seschrock 			    strcmp(slash, "/..") == 0)
4422f8aaab3Seschrock 				error = EINVAL;
4432f8aaab3Seschrock 			break;
444b24ab676SJeff Bonwick 
445b24ab676SJeff Bonwick 		case ZPOOL_PROP_DEDUPDITTO:
446b24ab676SJeff Bonwick 			if (spa_version(spa) < SPA_VERSION_DEDUP)
447b24ab676SJeff Bonwick 				error = ENOTSUP;
448b24ab676SJeff Bonwick 			else
449b24ab676SJeff Bonwick 				error = nvpair_value_uint64(elem, &intval);
450b24ab676SJeff Bonwick 			if (error == 0 &&
451b24ab676SJeff Bonwick 			    intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
452b24ab676SJeff Bonwick 				error = EINVAL;
453b24ab676SJeff Bonwick 			break;
454990b4856Slling 		}
455990b4856Slling 
456990b4856Slling 		if (error)
457990b4856Slling 			break;
458990b4856Slling 	}
459990b4856Slling 
460990b4856Slling 	if (!error && reset_bootfs) {
461990b4856Slling 		error = nvlist_remove(props,
462990b4856Slling 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
463990b4856Slling 
464990b4856Slling 		if (!error) {
465990b4856Slling 			error = nvlist_add_uint64(props,
466990b4856Slling 			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum);
467990b4856Slling 		}
468990b4856Slling 	}
469990b4856Slling 
470990b4856Slling 	return (error);
471990b4856Slling }
472990b4856Slling 
473379c004dSEric Schrock void
474379c004dSEric Schrock spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync)
475379c004dSEric Schrock {
476379c004dSEric Schrock 	char *cachefile;
477379c004dSEric Schrock 	spa_config_dirent_t *dp;
478379c004dSEric Schrock 
479379c004dSEric Schrock 	if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
480379c004dSEric Schrock 	    &cachefile) != 0)
481379c004dSEric Schrock 		return;
482379c004dSEric Schrock 
483379c004dSEric Schrock 	dp = kmem_alloc(sizeof (spa_config_dirent_t),
484379c004dSEric Schrock 	    KM_SLEEP);
485379c004dSEric Schrock 
486379c004dSEric Schrock 	if (cachefile[0] == '\0')
487379c004dSEric Schrock 		dp->scd_path = spa_strdup(spa_config_path);
488379c004dSEric Schrock 	else if (strcmp(cachefile, "none") == 0)
489379c004dSEric Schrock 		dp->scd_path = NULL;
490379c004dSEric Schrock 	else
491379c004dSEric Schrock 		dp->scd_path = spa_strdup(cachefile);
492379c004dSEric Schrock 
493379c004dSEric Schrock 	list_insert_head(&spa->spa_config_list, dp);
494379c004dSEric Schrock 	if (need_sync)
495379c004dSEric Schrock 		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
496379c004dSEric Schrock }
497379c004dSEric Schrock 
498990b4856Slling int
499990b4856Slling spa_prop_set(spa_t *spa, nvlist_t *nvp)
500990b4856Slling {
501990b4856Slling 	int error;
502379c004dSEric Schrock 	nvpair_t *elem;
503379c004dSEric Schrock 	boolean_t need_sync = B_FALSE;
504379c004dSEric Schrock 	zpool_prop_t prop;
505990b4856Slling 
506990b4856Slling 	if ((error = spa_prop_validate(spa, nvp)) != 0)
507990b4856Slling 		return (error);
508990b4856Slling 
509379c004dSEric Schrock 	elem = NULL;
510379c004dSEric Schrock 	while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
511379c004dSEric Schrock 		if ((prop = zpool_name_to_prop(
512379c004dSEric Schrock 		    nvpair_name(elem))) == ZPROP_INVAL)
513379c004dSEric Schrock 			return (EINVAL);
514379c004dSEric Schrock 
515379c004dSEric Schrock 		if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT)
516379c004dSEric Schrock 			continue;
517379c004dSEric Schrock 
518379c004dSEric Schrock 		need_sync = B_TRUE;
519379c004dSEric Schrock 		break;
520379c004dSEric Schrock 	}
521379c004dSEric Schrock 
522379c004dSEric Schrock 	if (need_sync)
523379c004dSEric Schrock 		return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
524379c004dSEric Schrock 		    spa, nvp, 3));
525379c004dSEric Schrock 	else
526379c004dSEric Schrock 		return (0);
527990b4856Slling }
528990b4856Slling 
529990b4856Slling /*
530990b4856Slling  * If the bootfs property value is dsobj, clear it.
531990b4856Slling  */
532990b4856Slling void
533990b4856Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
534990b4856Slling {
535990b4856Slling 	if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) {
536990b4856Slling 		VERIFY(zap_remove(spa->spa_meta_objset,
537990b4856Slling 		    spa->spa_pool_props_object,
538990b4856Slling 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0);
539990b4856Slling 		spa->spa_bootfs = 0;
540990b4856Slling 	}
541990b4856Slling }
542990b4856Slling 
543fa9e4066Sahrens /*
544fa9e4066Sahrens  * ==========================================================================
545fa9e4066Sahrens  * SPA state manipulation (open/create/destroy/import/export)
546fa9e4066Sahrens  * ==========================================================================
547fa9e4066Sahrens  */
548fa9e4066Sahrens 
549ea8dc4b6Seschrock static int
550ea8dc4b6Seschrock spa_error_entry_compare(const void *a, const void *b)
551ea8dc4b6Seschrock {
552ea8dc4b6Seschrock 	spa_error_entry_t *sa = (spa_error_entry_t *)a;
553ea8dc4b6Seschrock 	spa_error_entry_t *sb = (spa_error_entry_t *)b;
554ea8dc4b6Seschrock 	int ret;
555ea8dc4b6Seschrock 
556ea8dc4b6Seschrock 	ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
557ea8dc4b6Seschrock 	    sizeof (zbookmark_t));
558ea8dc4b6Seschrock 
559ea8dc4b6Seschrock 	if (ret < 0)
560ea8dc4b6Seschrock 		return (-1);
561ea8dc4b6Seschrock 	else if (ret > 0)
562ea8dc4b6Seschrock 		return (1);
563ea8dc4b6Seschrock 	else
564ea8dc4b6Seschrock 		return (0);
565ea8dc4b6Seschrock }
566ea8dc4b6Seschrock 
567ea8dc4b6Seschrock /*
568ea8dc4b6Seschrock  * Utility function which retrieves copies of the current logs and
569ea8dc4b6Seschrock  * re-initializes them in the process.
570ea8dc4b6Seschrock  */
571ea8dc4b6Seschrock void
572ea8dc4b6Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
573ea8dc4b6Seschrock {
574ea8dc4b6Seschrock 	ASSERT(MUTEX_HELD(&spa->spa_errlist_lock));
575ea8dc4b6Seschrock 
576ea8dc4b6Seschrock 	bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t));
577ea8dc4b6Seschrock 	bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t));
578ea8dc4b6Seschrock 
579ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_scrub,
580ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
581ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
582ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_last,
583ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
584ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
585ea8dc4b6Seschrock }
586ea8dc4b6Seschrock 
587fa9e4066Sahrens /*
588fa9e4066Sahrens  * Activate an uninitialized pool.
589fa9e4066Sahrens  */
590fa9e4066Sahrens static void
5918ad4d6ddSJeff Bonwick spa_activate(spa_t *spa, int mode)
592fa9e4066Sahrens {
593fa9e4066Sahrens 	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
594fa9e4066Sahrens 
595fa9e4066Sahrens 	spa->spa_state = POOL_STATE_ACTIVE;
5968ad4d6ddSJeff Bonwick 	spa->spa_mode = mode;
597fa9e4066Sahrens 
59888ecc943SGeorge Wilson 	spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
59988ecc943SGeorge Wilson 	spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
600fa9e4066Sahrens 
601e14bb325SJeff Bonwick 	for (int t = 0; t < ZIO_TYPES; t++) {
602e14bb325SJeff Bonwick 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
603*80eb36f2SGeorge Wilson 			const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
604*80eb36f2SGeorge Wilson 			enum zti_modes mode = ztip->zti_mode;
605*80eb36f2SGeorge Wilson 			uint_t value = ztip->zti_value;
6062e0c549eSJonathan Adams 			char name[32];
6072e0c549eSJonathan Adams 
6082e0c549eSJonathan Adams 			(void) snprintf(name, sizeof (name),
609*80eb36f2SGeorge Wilson 			    "%s_%s", zio_type_name[t], zio_taskq_types[q]);
6102e0c549eSJonathan Adams 
6112e0c549eSJonathan Adams 			if (mode == zti_mode_tune) {
6122e0c549eSJonathan Adams 				mode = zio_taskq_tune_mode;
6132e0c549eSJonathan Adams 				value = zio_taskq_tune_value;
6142e0c549eSJonathan Adams 				if (mode == zti_mode_tune)
6152e0c549eSJonathan Adams 					mode = zti_mode_online_percent;
6162e0c549eSJonathan Adams 			}
6172e0c549eSJonathan Adams 
6182e0c549eSJonathan Adams 			switch (mode) {
6192e0c549eSJonathan Adams 			case zti_mode_fixed:
6202e0c549eSJonathan Adams 				ASSERT3U(value, >=, 1);
6212e0c549eSJonathan Adams 				value = MAX(value, 1);
6222e0c549eSJonathan Adams 
6232e0c549eSJonathan Adams 				spa->spa_zio_taskq[t][q] = taskq_create(name,
6242e0c549eSJonathan Adams 				    value, maxclsyspri, 50, INT_MAX,
6252e0c549eSJonathan Adams 				    TASKQ_PREPOPULATE);
6262e0c549eSJonathan Adams 				break;
6272e0c549eSJonathan Adams 
6282e0c549eSJonathan Adams 			case zti_mode_online_percent:
6292e0c549eSJonathan Adams 				spa->spa_zio_taskq[t][q] = taskq_create(name,
6302e0c549eSJonathan Adams 				    value, maxclsyspri, 50, INT_MAX,
6312e0c549eSJonathan Adams 				    TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT);
6322e0c549eSJonathan Adams 				break;
6332e0c549eSJonathan Adams 
634*80eb36f2SGeorge Wilson 			case zti_mode_null:
635*80eb36f2SGeorge Wilson 				spa->spa_zio_taskq[t][q] = NULL;
636*80eb36f2SGeorge Wilson 				break;
637*80eb36f2SGeorge Wilson 
6382e0c549eSJonathan Adams 			case zti_mode_tune:
6392e0c549eSJonathan Adams 			default:
6402e0c549eSJonathan Adams 				panic("unrecognized mode for "
6412e0c549eSJonathan Adams 				    "zio_taskqs[%u]->zti_nthreads[%u] (%u:%u) "
6422e0c549eSJonathan Adams 				    "in spa_activate()",
6432e0c549eSJonathan Adams 				    t, q, mode, value);
6442e0c549eSJonathan Adams 				break;
6452e0c549eSJonathan Adams 			}
646e14bb325SJeff Bonwick 		}
647fa9e4066Sahrens 	}
648fa9e4066Sahrens 
649e14bb325SJeff Bonwick 	list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
650e14bb325SJeff Bonwick 	    offsetof(vdev_t, vdev_config_dirty_node));
651e14bb325SJeff Bonwick 	list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
652e14bb325SJeff Bonwick 	    offsetof(vdev_t, vdev_state_dirty_node));
653fa9e4066Sahrens 
654fa9e4066Sahrens 	txg_list_create(&spa->spa_vdev_txg_list,
655fa9e4066Sahrens 	    offsetof(struct vdev, vdev_txg_node));
656ea8dc4b6Seschrock 
657ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_scrub,
658ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
659ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
660ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_last,
661ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
662ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
663fa9e4066Sahrens }
664fa9e4066Sahrens 
665fa9e4066Sahrens /*
666fa9e4066Sahrens  * Opposite of spa_activate().
667fa9e4066Sahrens  */
668fa9e4066Sahrens static void
669fa9e4066Sahrens spa_deactivate(spa_t *spa)
670fa9e4066Sahrens {
671fa9e4066Sahrens 	ASSERT(spa->spa_sync_on == B_FALSE);
672fa9e4066Sahrens 	ASSERT(spa->spa_dsl_pool == NULL);
673fa9e4066Sahrens 	ASSERT(spa->spa_root_vdev == NULL);
67425f89ee2SJeff Bonwick 	ASSERT(spa->spa_async_zio_root == NULL);
675fa9e4066Sahrens 	ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
676fa9e4066Sahrens 
677fa9e4066Sahrens 	txg_list_destroy(&spa->spa_vdev_txg_list);
678fa9e4066Sahrens 
679e14bb325SJeff Bonwick 	list_destroy(&spa->spa_config_dirty_list);
680e14bb325SJeff Bonwick 	list_destroy(&spa->spa_state_dirty_list);
681fa9e4066Sahrens 
682e14bb325SJeff Bonwick 	for (int t = 0; t < ZIO_TYPES; t++) {
683e14bb325SJeff Bonwick 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
684*80eb36f2SGeorge Wilson 			if (spa->spa_zio_taskq[t][q] != NULL)
685*80eb36f2SGeorge Wilson 				taskq_destroy(spa->spa_zio_taskq[t][q]);
686e14bb325SJeff Bonwick 			spa->spa_zio_taskq[t][q] = NULL;
687e14bb325SJeff Bonwick 		}
688fa9e4066Sahrens 	}
689fa9e4066Sahrens 
690fa9e4066Sahrens 	metaslab_class_destroy(spa->spa_normal_class);
691fa9e4066Sahrens 	spa->spa_normal_class = NULL;
692fa9e4066Sahrens 
6938654d025Sperrin 	metaslab_class_destroy(spa->spa_log_class);
6948654d025Sperrin 	spa->spa_log_class = NULL;
6958654d025Sperrin 
696ea8dc4b6Seschrock 	/*
697ea8dc4b6Seschrock 	 * If this was part of an import or the open otherwise failed, we may
698ea8dc4b6Seschrock 	 * still have errors left in the queues.  Empty them just in case.
699ea8dc4b6Seschrock 	 */
700ea8dc4b6Seschrock 	spa_errlog_drain(spa);
701ea8dc4b6Seschrock 
702ea8dc4b6Seschrock 	avl_destroy(&spa->spa_errlist_scrub);
703ea8dc4b6Seschrock 	avl_destroy(&spa->spa_errlist_last);
704ea8dc4b6Seschrock 
705fa9e4066Sahrens 	spa->spa_state = POOL_STATE_UNINITIALIZED;
706fa9e4066Sahrens }
707fa9e4066Sahrens 
708fa9e4066Sahrens /*
709fa9e4066Sahrens  * Verify a pool configuration, and construct the vdev tree appropriately.  This
710fa9e4066Sahrens  * will create all the necessary vdevs in the appropriate layout, with each vdev
711fa9e4066Sahrens  * in the CLOSED state.  This will prep the pool before open/creation/import.
712fa9e4066Sahrens  * All vdev validation is done by the vdev_alloc() routine.
713fa9e4066Sahrens  */
71499653d4eSeschrock static int
71599653d4eSeschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
71699653d4eSeschrock     uint_t id, int atype)
717fa9e4066Sahrens {
718fa9e4066Sahrens 	nvlist_t **child;
719573ca77eSGeorge Wilson 	uint_t children;
72099653d4eSeschrock 	int error;
721fa9e4066Sahrens 
72299653d4eSeschrock 	if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
72399653d4eSeschrock 		return (error);
724fa9e4066Sahrens 
72599653d4eSeschrock 	if ((*vdp)->vdev_ops->vdev_op_leaf)
72699653d4eSeschrock 		return (0);
727fa9e4066Sahrens 
728e14bb325SJeff Bonwick 	error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
729e14bb325SJeff Bonwick 	    &child, &children);
730e14bb325SJeff Bonwick 
731e14bb325SJeff Bonwick 	if (error == ENOENT)
732e14bb325SJeff Bonwick 		return (0);
733e14bb325SJeff Bonwick 
734e14bb325SJeff Bonwick 	if (error) {
73599653d4eSeschrock 		vdev_free(*vdp);
73699653d4eSeschrock 		*vdp = NULL;
73799653d4eSeschrock 		return (EINVAL);
738fa9e4066Sahrens 	}
739fa9e4066Sahrens 
740573ca77eSGeorge Wilson 	for (int c = 0; c < children; c++) {
74199653d4eSeschrock 		vdev_t *vd;
74299653d4eSeschrock 		if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
74399653d4eSeschrock 		    atype)) != 0) {
74499653d4eSeschrock 			vdev_free(*vdp);
74599653d4eSeschrock 			*vdp = NULL;
74699653d4eSeschrock 			return (error);
747fa9e4066Sahrens 		}
748fa9e4066Sahrens 	}
749fa9e4066Sahrens 
75099653d4eSeschrock 	ASSERT(*vdp != NULL);
75199653d4eSeschrock 
75299653d4eSeschrock 	return (0);
753fa9e4066Sahrens }
754fa9e4066Sahrens 
755fa9e4066Sahrens /*
756fa9e4066Sahrens  * Opposite of spa_load().
757fa9e4066Sahrens  */
758fa9e4066Sahrens static void
759fa9e4066Sahrens spa_unload(spa_t *spa)
760fa9e4066Sahrens {
76199653d4eSeschrock 	int i;
76299653d4eSeschrock 
763e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
764e14bb325SJeff Bonwick 
765ea8dc4b6Seschrock 	/*
766ea8dc4b6Seschrock 	 * Stop async tasks.
767ea8dc4b6Seschrock 	 */
768ea8dc4b6Seschrock 	spa_async_suspend(spa);
769ea8dc4b6Seschrock 
770fa9e4066Sahrens 	/*
771fa9e4066Sahrens 	 * Stop syncing.
772fa9e4066Sahrens 	 */
773fa9e4066Sahrens 	if (spa->spa_sync_on) {
774fa9e4066Sahrens 		txg_sync_stop(spa->spa_dsl_pool);
775fa9e4066Sahrens 		spa->spa_sync_on = B_FALSE;
776fa9e4066Sahrens 	}
777fa9e4066Sahrens 
778fa9e4066Sahrens 	/*
779e14bb325SJeff Bonwick 	 * Wait for any outstanding async I/O to complete.
780fa9e4066Sahrens 	 */
78154d692b7SGeorge Wilson 	if (spa->spa_async_zio_root != NULL) {
78254d692b7SGeorge Wilson 		(void) zio_wait(spa->spa_async_zio_root);
78354d692b7SGeorge Wilson 		spa->spa_async_zio_root = NULL;
78454d692b7SGeorge Wilson 	}
785fa9e4066Sahrens 
786fa9e4066Sahrens 	/*
787fa9e4066Sahrens 	 * Close the dsl pool.
788fa9e4066Sahrens 	 */
789fa9e4066Sahrens 	if (spa->spa_dsl_pool) {
790fa9e4066Sahrens 		dsl_pool_close(spa->spa_dsl_pool);
791fa9e4066Sahrens 		spa->spa_dsl_pool = NULL;
792fa9e4066Sahrens 	}
793fa9e4066Sahrens 
794b24ab676SJeff Bonwick 	ddt_unload(spa);
795b24ab676SJeff Bonwick 
7968ad4d6ddSJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
7978ad4d6ddSJeff Bonwick 
7988ad4d6ddSJeff Bonwick 	/*
7998ad4d6ddSJeff Bonwick 	 * Drop and purge level 2 cache
8008ad4d6ddSJeff Bonwick 	 */
8018ad4d6ddSJeff Bonwick 	spa_l2cache_drop(spa);
8028ad4d6ddSJeff Bonwick 
803fa9e4066Sahrens 	/*
804fa9e4066Sahrens 	 * Close all vdevs.
805fa9e4066Sahrens 	 */
8060e34b6a7Sbonwick 	if (spa->spa_root_vdev)
807fa9e4066Sahrens 		vdev_free(spa->spa_root_vdev);
8080e34b6a7Sbonwick 	ASSERT(spa->spa_root_vdev == NULL);
809ea8dc4b6Seschrock 
810fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
811fa94a07fSbrendan 		vdev_free(spa->spa_spares.sav_vdevs[i]);
812fa94a07fSbrendan 	if (spa->spa_spares.sav_vdevs) {
813fa94a07fSbrendan 		kmem_free(spa->spa_spares.sav_vdevs,
814fa94a07fSbrendan 		    spa->spa_spares.sav_count * sizeof (void *));
815fa94a07fSbrendan 		spa->spa_spares.sav_vdevs = NULL;
81699653d4eSeschrock 	}
817fa94a07fSbrendan 	if (spa->spa_spares.sav_config) {
818fa94a07fSbrendan 		nvlist_free(spa->spa_spares.sav_config);
819fa94a07fSbrendan 		spa->spa_spares.sav_config = NULL;
820fa94a07fSbrendan 	}
8212ce8af81SEric Schrock 	spa->spa_spares.sav_count = 0;
822fa94a07fSbrendan 
823fa94a07fSbrendan 	for (i = 0; i < spa->spa_l2cache.sav_count; i++)
824fa94a07fSbrendan 		vdev_free(spa->spa_l2cache.sav_vdevs[i]);
825fa94a07fSbrendan 	if (spa->spa_l2cache.sav_vdevs) {
826fa94a07fSbrendan 		kmem_free(spa->spa_l2cache.sav_vdevs,
827fa94a07fSbrendan 		    spa->spa_l2cache.sav_count * sizeof (void *));
828fa94a07fSbrendan 		spa->spa_l2cache.sav_vdevs = NULL;
829fa94a07fSbrendan 	}
830fa94a07fSbrendan 	if (spa->spa_l2cache.sav_config) {
831fa94a07fSbrendan 		nvlist_free(spa->spa_l2cache.sav_config);
832fa94a07fSbrendan 		spa->spa_l2cache.sav_config = NULL;
83399653d4eSeschrock 	}
8342ce8af81SEric Schrock 	spa->spa_l2cache.sav_count = 0;
83599653d4eSeschrock 
836ea8dc4b6Seschrock 	spa->spa_async_suspended = 0;
8378ad4d6ddSJeff Bonwick 
8388ad4d6ddSJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
839fa9e4066Sahrens }
840fa9e4066Sahrens 
84199653d4eSeschrock /*
84299653d4eSeschrock  * Load (or re-load) the current list of vdevs describing the active spares for
84399653d4eSeschrock  * this pool.  When this is called, we have some form of basic information in
844fa94a07fSbrendan  * 'spa_spares.sav_config'.  We parse this into vdevs, try to open them, and
845fa94a07fSbrendan  * then re-generate a more complete list including status information.
84699653d4eSeschrock  */
84799653d4eSeschrock static void
84899653d4eSeschrock spa_load_spares(spa_t *spa)
84999653d4eSeschrock {
85099653d4eSeschrock 	nvlist_t **spares;
85199653d4eSeschrock 	uint_t nspares;
85299653d4eSeschrock 	int i;
85339c23413Seschrock 	vdev_t *vd, *tvd;
85499653d4eSeschrock 
855e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
856e14bb325SJeff Bonwick 
85799653d4eSeschrock 	/*
85899653d4eSeschrock 	 * First, close and free any existing spare vdevs.
85999653d4eSeschrock 	 */
860fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
861fa94a07fSbrendan 		vd = spa->spa_spares.sav_vdevs[i];
86239c23413Seschrock 
86339c23413Seschrock 		/* Undo the call to spa_activate() below */
864c5904d13Seschrock 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
865c5904d13Seschrock 		    B_FALSE)) != NULL && tvd->vdev_isspare)
86639c23413Seschrock 			spa_spare_remove(tvd);
86739c23413Seschrock 		vdev_close(vd);
86839c23413Seschrock 		vdev_free(vd);
86999653d4eSeschrock 	}
87039c23413Seschrock 
871fa94a07fSbrendan 	if (spa->spa_spares.sav_vdevs)
872fa94a07fSbrendan 		kmem_free(spa->spa_spares.sav_vdevs,
873fa94a07fSbrendan 		    spa->spa_spares.sav_count * sizeof (void *));
87499653d4eSeschrock 
875fa94a07fSbrendan 	if (spa->spa_spares.sav_config == NULL)
87699653d4eSeschrock 		nspares = 0;
87799653d4eSeschrock 	else
878fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
87999653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
88099653d4eSeschrock 
881fa94a07fSbrendan 	spa->spa_spares.sav_count = (int)nspares;
882fa94a07fSbrendan 	spa->spa_spares.sav_vdevs = NULL;
88399653d4eSeschrock 
88499653d4eSeschrock 	if (nspares == 0)
88599653d4eSeschrock 		return;
88699653d4eSeschrock 
88799653d4eSeschrock 	/*
88899653d4eSeschrock 	 * Construct the array of vdevs, opening them to get status in the
88939c23413Seschrock 	 * process.   For each spare, there is potentially two different vdev_t
89039c23413Seschrock 	 * structures associated with it: one in the list of spares (used only
89139c23413Seschrock 	 * for basic validation purposes) and one in the active vdev
89239c23413Seschrock 	 * configuration (if it's spared in).  During this phase we open and
89339c23413Seschrock 	 * validate each vdev on the spare list.  If the vdev also exists in the
89439c23413Seschrock 	 * active configuration, then we also mark this vdev as an active spare.
89599653d4eSeschrock 	 */
896fa94a07fSbrendan 	spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *),
897fa94a07fSbrendan 	    KM_SLEEP);
898fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
89999653d4eSeschrock 		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
90099653d4eSeschrock 		    VDEV_ALLOC_SPARE) == 0);
90199653d4eSeschrock 		ASSERT(vd != NULL);
90299653d4eSeschrock 
903fa94a07fSbrendan 		spa->spa_spares.sav_vdevs[i] = vd;
90499653d4eSeschrock 
905c5904d13Seschrock 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
906c5904d13Seschrock 		    B_FALSE)) != NULL) {
90739c23413Seschrock 			if (!tvd->vdev_isspare)
90839c23413Seschrock 				spa_spare_add(tvd);
90939c23413Seschrock 
91039c23413Seschrock 			/*
91139c23413Seschrock 			 * We only mark the spare active if we were successfully
91239c23413Seschrock 			 * able to load the vdev.  Otherwise, importing a pool
91339c23413Seschrock 			 * with a bad active spare would result in strange
91439c23413Seschrock 			 * behavior, because multiple pool would think the spare
91539c23413Seschrock 			 * is actively in use.
91639c23413Seschrock 			 *
91739c23413Seschrock 			 * There is a vulnerability here to an equally bizarre
91839c23413Seschrock 			 * circumstance, where a dead active spare is later
91939c23413Seschrock 			 * brought back to life (onlined or otherwise).  Given
92039c23413Seschrock 			 * the rarity of this scenario, and the extra complexity
92139c23413Seschrock 			 * it adds, we ignore the possibility.
92239c23413Seschrock 			 */
92339c23413Seschrock 			if (!vdev_is_dead(tvd))
92439c23413Seschrock 				spa_spare_activate(tvd);
92539c23413Seschrock 		}
92639c23413Seschrock 
927e14bb325SJeff Bonwick 		vd->vdev_top = vd;
9286809eb4eSEric Schrock 		vd->vdev_aux = &spa->spa_spares;
929e14bb325SJeff Bonwick 
93099653d4eSeschrock 		if (vdev_open(vd) != 0)
93199653d4eSeschrock 			continue;
93299653d4eSeschrock 
933fa94a07fSbrendan 		if (vdev_validate_aux(vd) == 0)
934fa94a07fSbrendan 			spa_spare_add(vd);
93599653d4eSeschrock 	}
93699653d4eSeschrock 
93799653d4eSeschrock 	/*
93899653d4eSeschrock 	 * Recompute the stashed list of spares, with status information
93999653d4eSeschrock 	 * this time.
94099653d4eSeschrock 	 */
941fa94a07fSbrendan 	VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
94299653d4eSeschrock 	    DATA_TYPE_NVLIST_ARRAY) == 0);
94399653d4eSeschrock 
944fa94a07fSbrendan 	spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
945fa94a07fSbrendan 	    KM_SLEEP);
946fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
947fa94a07fSbrendan 		spares[i] = vdev_config_generate(spa,
948fa94a07fSbrendan 		    spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE);
949fa94a07fSbrendan 	VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
950fa94a07fSbrendan 	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
951fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
95299653d4eSeschrock 		nvlist_free(spares[i]);
953fa94a07fSbrendan 	kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
954fa94a07fSbrendan }
955fa94a07fSbrendan 
956fa94a07fSbrendan /*
957fa94a07fSbrendan  * Load (or re-load) the current list of vdevs describing the active l2cache for
958fa94a07fSbrendan  * this pool.  When this is called, we have some form of basic information in
959fa94a07fSbrendan  * 'spa_l2cache.sav_config'.  We parse this into vdevs, try to open them, and
960fa94a07fSbrendan  * then re-generate a more complete list including status information.
961fa94a07fSbrendan  * Devices which are already active have their details maintained, and are
962fa94a07fSbrendan  * not re-opened.
963fa94a07fSbrendan  */
964fa94a07fSbrendan static void
965fa94a07fSbrendan spa_load_l2cache(spa_t *spa)
966fa94a07fSbrendan {
967fa94a07fSbrendan 	nvlist_t **l2cache;
968fa94a07fSbrendan 	uint_t nl2cache;
969fa94a07fSbrendan 	int i, j, oldnvdevs;
970573ca77eSGeorge Wilson 	uint64_t guid;
971fa94a07fSbrendan 	vdev_t *vd, **oldvdevs, **newvdevs;
972fa94a07fSbrendan 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
973fa94a07fSbrendan 
974e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
975e14bb325SJeff Bonwick 
976fa94a07fSbrendan 	if (sav->sav_config != NULL) {
977fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
978fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
979fa94a07fSbrendan 		newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
980fa94a07fSbrendan 	} else {
981fa94a07fSbrendan 		nl2cache = 0;
982fa94a07fSbrendan 	}
983fa94a07fSbrendan 
984fa94a07fSbrendan 	oldvdevs = sav->sav_vdevs;
985fa94a07fSbrendan 	oldnvdevs = sav->sav_count;
986fa94a07fSbrendan 	sav->sav_vdevs = NULL;
987fa94a07fSbrendan 	sav->sav_count = 0;
988fa94a07fSbrendan 
989fa94a07fSbrendan 	/*
990fa94a07fSbrendan 	 * Process new nvlist of vdevs.
991fa94a07fSbrendan 	 */
992fa94a07fSbrendan 	for (i = 0; i < nl2cache; i++) {
993fa94a07fSbrendan 		VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
994fa94a07fSbrendan 		    &guid) == 0);
995fa94a07fSbrendan 
996fa94a07fSbrendan 		newvdevs[i] = NULL;
997fa94a07fSbrendan 		for (j = 0; j < oldnvdevs; j++) {
998fa94a07fSbrendan 			vd = oldvdevs[j];
999fa94a07fSbrendan 			if (vd != NULL && guid == vd->vdev_guid) {
1000fa94a07fSbrendan 				/*
1001fa94a07fSbrendan 				 * Retain previous vdev for add/remove ops.
1002fa94a07fSbrendan 				 */
1003fa94a07fSbrendan 				newvdevs[i] = vd;
1004fa94a07fSbrendan 				oldvdevs[j] = NULL;
1005fa94a07fSbrendan 				break;
1006fa94a07fSbrendan 			}
1007fa94a07fSbrendan 		}
1008fa94a07fSbrendan 
1009fa94a07fSbrendan 		if (newvdevs[i] == NULL) {
1010fa94a07fSbrendan 			/*
1011fa94a07fSbrendan 			 * Create new vdev
1012fa94a07fSbrendan 			 */
1013fa94a07fSbrendan 			VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
1014fa94a07fSbrendan 			    VDEV_ALLOC_L2CACHE) == 0);
1015fa94a07fSbrendan 			ASSERT(vd != NULL);
1016fa94a07fSbrendan 			newvdevs[i] = vd;
1017fa94a07fSbrendan 
1018fa94a07fSbrendan 			/*
1019fa94a07fSbrendan 			 * Commit this vdev as an l2cache device,
1020fa94a07fSbrendan 			 * even if it fails to open.
1021fa94a07fSbrendan 			 */
1022fa94a07fSbrendan 			spa_l2cache_add(vd);
1023fa94a07fSbrendan 
1024c5904d13Seschrock 			vd->vdev_top = vd;
1025c5904d13Seschrock 			vd->vdev_aux = sav;
1026c5904d13Seschrock 
1027c5904d13Seschrock 			spa_l2cache_activate(vd);
1028c5904d13Seschrock 
1029fa94a07fSbrendan 			if (vdev_open(vd) != 0)
1030fa94a07fSbrendan 				continue;
1031fa94a07fSbrendan 
1032fa94a07fSbrendan 			(void) vdev_validate_aux(vd);
1033fa94a07fSbrendan 
1034573ca77eSGeorge Wilson 			if (!vdev_is_dead(vd))
1035573ca77eSGeorge Wilson 				l2arc_add_vdev(spa, vd);
1036fa94a07fSbrendan 		}
1037fa94a07fSbrendan 	}
1038fa94a07fSbrendan 
1039fa94a07fSbrendan 	/*
1040fa94a07fSbrendan 	 * Purge vdevs that were dropped
1041fa94a07fSbrendan 	 */
1042fa94a07fSbrendan 	for (i = 0; i < oldnvdevs; i++) {
1043fa94a07fSbrendan 		uint64_t pool;
1044fa94a07fSbrendan 
1045fa94a07fSbrendan 		vd = oldvdevs[i];
1046fa94a07fSbrendan 		if (vd != NULL) {
10478ad4d6ddSJeff Bonwick 			if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
10488ad4d6ddSJeff Bonwick 			    pool != 0ULL && l2arc_vdev_present(vd))
1049fa94a07fSbrendan 				l2arc_remove_vdev(vd);
1050fa94a07fSbrendan 			(void) vdev_close(vd);
1051fa94a07fSbrendan 			spa_l2cache_remove(vd);
1052fa94a07fSbrendan 		}
1053fa94a07fSbrendan 	}
1054fa94a07fSbrendan 
1055fa94a07fSbrendan 	if (oldvdevs)
1056fa94a07fSbrendan 		kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
1057fa94a07fSbrendan 
1058fa94a07fSbrendan 	if (sav->sav_config == NULL)
1059fa94a07fSbrendan 		goto out;
1060fa94a07fSbrendan 
1061fa94a07fSbrendan 	sav->sav_vdevs = newvdevs;
1062fa94a07fSbrendan 	sav->sav_count = (int)nl2cache;
1063fa94a07fSbrendan 
1064fa94a07fSbrendan 	/*
1065fa94a07fSbrendan 	 * Recompute the stashed list of l2cache devices, with status
1066fa94a07fSbrendan 	 * information this time.
1067fa94a07fSbrendan 	 */
1068fa94a07fSbrendan 	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
1069fa94a07fSbrendan 	    DATA_TYPE_NVLIST_ARRAY) == 0);
1070fa94a07fSbrendan 
1071fa94a07fSbrendan 	l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
1072fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++)
1073fa94a07fSbrendan 		l2cache[i] = vdev_config_generate(spa,
1074fa94a07fSbrendan 		    sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE);
1075fa94a07fSbrendan 	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
1076fa94a07fSbrendan 	    ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
1077fa94a07fSbrendan out:
1078fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++)
1079fa94a07fSbrendan 		nvlist_free(l2cache[i]);
1080fa94a07fSbrendan 	if (sav->sav_count)
1081fa94a07fSbrendan 		kmem_free(l2cache, sav->sav_count * sizeof (void *));
108299653d4eSeschrock }
108399653d4eSeschrock 
108499653d4eSeschrock static int
108599653d4eSeschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
108699653d4eSeschrock {
108799653d4eSeschrock 	dmu_buf_t *db;
108899653d4eSeschrock 	char *packed = NULL;
108999653d4eSeschrock 	size_t nvsize = 0;
109099653d4eSeschrock 	int error;
109199653d4eSeschrock 	*value = NULL;
109299653d4eSeschrock 
109399653d4eSeschrock 	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
109499653d4eSeschrock 	nvsize = *(uint64_t *)db->db_data;
109599653d4eSeschrock 	dmu_buf_rele(db, FTAG);
109699653d4eSeschrock 
109799653d4eSeschrock 	packed = kmem_alloc(nvsize, KM_SLEEP);
10987bfdf011SNeil Perrin 	error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed,
10997bfdf011SNeil Perrin 	    DMU_READ_PREFETCH);
110099653d4eSeschrock 	if (error == 0)
110199653d4eSeschrock 		error = nvlist_unpack(packed, nvsize, value, 0);
110299653d4eSeschrock 	kmem_free(packed, nvsize);
110399653d4eSeschrock 
110499653d4eSeschrock 	return (error);
110599653d4eSeschrock }
110699653d4eSeschrock 
11073d7072f8Seschrock /*
11083d7072f8Seschrock  * Checks to see if the given vdev could not be opened, in which case we post a
11093d7072f8Seschrock  * sysevent to notify the autoreplace code that the device has been removed.
11103d7072f8Seschrock  */
11113d7072f8Seschrock static void
11123d7072f8Seschrock spa_check_removed(vdev_t *vd)
11133d7072f8Seschrock {
1114573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++)
11153d7072f8Seschrock 		spa_check_removed(vd->vdev_child[c]);
11163d7072f8Seschrock 
11173d7072f8Seschrock 	if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
11183d7072f8Seschrock 		zfs_post_autoreplace(vd->vdev_spa, vd);
11193d7072f8Seschrock 		spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK);
11203d7072f8Seschrock 	}
11213d7072f8Seschrock }
11223d7072f8Seschrock 
1123e6ca193dSGeorge Wilson /*
1124e6ca193dSGeorge Wilson  * Load the slog device state from the config object since it's possible
1125e6ca193dSGeorge Wilson  * that the label does not contain the most up-to-date information.
1126e6ca193dSGeorge Wilson  */
1127e6ca193dSGeorge Wilson void
112888ecc943SGeorge Wilson spa_load_log_state(spa_t *spa, nvlist_t *nv)
1129e6ca193dSGeorge Wilson {
113088ecc943SGeorge Wilson 	vdev_t *ovd, *rvd = spa->spa_root_vdev;
1131e6ca193dSGeorge Wilson 
113288ecc943SGeorge Wilson 	/*
113388ecc943SGeorge Wilson 	 * Load the original root vdev tree from the passed config.
113488ecc943SGeorge Wilson 	 */
113588ecc943SGeorge Wilson 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
113688ecc943SGeorge Wilson 	VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
1137e6ca193dSGeorge Wilson 
113888ecc943SGeorge Wilson 	for (int c = 0; c < rvd->vdev_children; c++) {
113988ecc943SGeorge Wilson 		vdev_t *cvd = rvd->vdev_child[c];
114088ecc943SGeorge Wilson 		if (cvd->vdev_islog)
114188ecc943SGeorge Wilson 			vdev_load_log_state(cvd, ovd->vdev_child[c]);
1142e6ca193dSGeorge Wilson 	}
114388ecc943SGeorge Wilson 	vdev_free(ovd);
114488ecc943SGeorge Wilson 	spa_config_exit(spa, SCL_ALL, FTAG);
1145e6ca193dSGeorge Wilson }
1146e6ca193dSGeorge Wilson 
1147b87f3af3Sperrin /*
1148b87f3af3Sperrin  * Check for missing log devices
1149b87f3af3Sperrin  */
1150b87f3af3Sperrin int
1151b87f3af3Sperrin spa_check_logs(spa_t *spa)
1152b87f3af3Sperrin {
1153b87f3af3Sperrin 	switch (spa->spa_log_state) {
1154b87f3af3Sperrin 	case SPA_LOG_MISSING:
1155b87f3af3Sperrin 		/* need to recheck in case slog has been restored */
1156b87f3af3Sperrin 	case SPA_LOG_UNKNOWN:
1157b87f3af3Sperrin 		if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
1158b87f3af3Sperrin 		    DS_FIND_CHILDREN)) {
1159b87f3af3Sperrin 			spa->spa_log_state = SPA_LOG_MISSING;
1160b87f3af3Sperrin 			return (1);
1161b87f3af3Sperrin 		}
1162b87f3af3Sperrin 		break;
1163b87f3af3Sperrin 	}
1164b87f3af3Sperrin 	return (0);
1165b87f3af3Sperrin }
1166b87f3af3Sperrin 
1167b693757aSEric Schrock static void
1168b693757aSEric Schrock spa_aux_check_removed(spa_aux_vdev_t *sav)
1169b693757aSEric Schrock {
1170b24ab676SJeff Bonwick 	for (int i = 0; i < sav->sav_count; i++)
1171b693757aSEric Schrock 		spa_check_removed(sav->sav_vdevs[i]);
1172b693757aSEric Schrock }
1173b693757aSEric Schrock 
1174b24ab676SJeff Bonwick void
1175b24ab676SJeff Bonwick spa_claim_notify(zio_t *zio)
1176b24ab676SJeff Bonwick {
1177b24ab676SJeff Bonwick 	spa_t *spa = zio->io_spa;
1178b24ab676SJeff Bonwick 
1179b24ab676SJeff Bonwick 	if (zio->io_error)
1180b24ab676SJeff Bonwick 		return;
1181b24ab676SJeff Bonwick 
1182b24ab676SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);	/* any mutex will do */
1183b24ab676SJeff Bonwick 	if (spa->spa_claim_max_txg < zio->io_bp->blk_birth)
1184b24ab676SJeff Bonwick 		spa->spa_claim_max_txg = zio->io_bp->blk_birth;
1185b24ab676SJeff Bonwick 	mutex_exit(&spa->spa_props_lock);
1186b24ab676SJeff Bonwick }
1187b24ab676SJeff Bonwick 
1188468c413aSTim Haley typedef struct spa_load_error {
1189468c413aSTim Haley 	uint64_t	sle_metadata_count;
1190468c413aSTim Haley 	uint64_t	sle_data_count;
1191468c413aSTim Haley } spa_load_error_t;
1192468c413aSTim Haley 
1193468c413aSTim Haley static void
1194468c413aSTim Haley spa_load_verify_done(zio_t *zio)
1195468c413aSTim Haley {
1196468c413aSTim Haley 	blkptr_t *bp = zio->io_bp;
1197468c413aSTim Haley 	spa_load_error_t *sle = zio->io_private;
1198468c413aSTim Haley 	dmu_object_type_t type = BP_GET_TYPE(bp);
1199468c413aSTim Haley 	int error = zio->io_error;
1200468c413aSTim Haley 
1201468c413aSTim Haley 	if (error) {
1202468c413aSTim Haley 		if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
1203468c413aSTim Haley 		    type != DMU_OT_INTENT_LOG)
1204468c413aSTim Haley 			atomic_add_64(&sle->sle_metadata_count, 1);
1205468c413aSTim Haley 		else
1206468c413aSTim Haley 			atomic_add_64(&sle->sle_data_count, 1);
1207468c413aSTim Haley 	}
1208468c413aSTim Haley 	zio_data_buf_free(zio->io_data, zio->io_size);
1209468c413aSTim Haley }
1210468c413aSTim Haley 
1211468c413aSTim Haley /*ARGSUSED*/
1212468c413aSTim Haley static int
1213b24ab676SJeff Bonwick spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
1214b24ab676SJeff Bonwick     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1215468c413aSTim Haley {
1216468c413aSTim Haley 	if (bp != NULL) {
1217468c413aSTim Haley 		zio_t *rio = arg;
1218468c413aSTim Haley 		size_t size = BP_GET_PSIZE(bp);
1219468c413aSTim Haley 		void *data = zio_data_buf_alloc(size);
1220468c413aSTim Haley 
1221468c413aSTim Haley 		zio_nowait(zio_read(rio, spa, bp, data, size,
1222468c413aSTim Haley 		    spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
1223468c413aSTim Haley 		    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
1224468c413aSTim Haley 		    ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
1225468c413aSTim Haley 	}
1226468c413aSTim Haley 	return (0);
1227468c413aSTim Haley }
1228468c413aSTim Haley 
1229468c413aSTim Haley static int
1230468c413aSTim Haley spa_load_verify(spa_t *spa)
1231468c413aSTim Haley {
1232468c413aSTim Haley 	zio_t *rio;
1233468c413aSTim Haley 	spa_load_error_t sle = { 0 };
1234468c413aSTim Haley 	zpool_rewind_policy_t policy;
1235468c413aSTim Haley 	boolean_t verify_ok = B_FALSE;
1236468c413aSTim Haley 	int error;
1237468c413aSTim Haley 
1238468c413aSTim Haley 	rio = zio_root(spa, NULL, &sle,
1239468c413aSTim Haley 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
1240468c413aSTim Haley 
1241bbfd46c4SJeff Bonwick 	error = traverse_pool(spa, spa->spa_verify_min_txg,
1242bbfd46c4SJeff Bonwick 	    TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
1243468c413aSTim Haley 
1244468c413aSTim Haley 	(void) zio_wait(rio);
1245468c413aSTim Haley 
1246468c413aSTim Haley 	zpool_get_rewind_policy(spa->spa_config, &policy);
1247468c413aSTim Haley 
1248468c413aSTim Haley 	spa->spa_load_meta_errors = sle.sle_metadata_count;
1249468c413aSTim Haley 	spa->spa_load_data_errors = sle.sle_data_count;
1250468c413aSTim Haley 
1251468c413aSTim Haley 	if (!error && sle.sle_metadata_count <= policy.zrp_maxmeta &&
1252468c413aSTim Haley 	    sle.sle_data_count <= policy.zrp_maxdata) {
1253468c413aSTim Haley 		verify_ok = B_TRUE;
1254468c413aSTim Haley 		spa->spa_load_txg = spa->spa_uberblock.ub_txg;
1255468c413aSTim Haley 		spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
1256a33cae98STim Haley 	} else {
1257a33cae98STim Haley 		spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
1258468c413aSTim Haley 	}
1259468c413aSTim Haley 
1260468c413aSTim Haley 	if (error) {
1261468c413aSTim Haley 		if (error != ENXIO && error != EIO)
1262468c413aSTim Haley 			error = EIO;
1263468c413aSTim Haley 		return (error);
1264468c413aSTim Haley 	}
1265468c413aSTim Haley 
1266468c413aSTim Haley 	return (verify_ok ? 0 : EIO);
1267468c413aSTim Haley }
1268468c413aSTim Haley 
1269fa9e4066Sahrens /*
1270fa9e4066Sahrens  * Load an existing storage pool, using the pool's builtin spa_config as a
1271ea8dc4b6Seschrock  * source of configuration information.
1272fa9e4066Sahrens  */
1273fa9e4066Sahrens static int
1274468c413aSTim Haley spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
1275fa9e4066Sahrens {
1276fa9e4066Sahrens 	int error = 0;
127788ecc943SGeorge Wilson 	nvlist_t *nvconfig, *nvroot = NULL;
1278fa9e4066Sahrens 	vdev_t *rvd;
1279fa9e4066Sahrens 	uberblock_t *ub = &spa->spa_uberblock;
12800373e76bSbonwick 	uint64_t config_cache_txg = spa->spa_config_txg;
1281fa9e4066Sahrens 	uint64_t pool_guid;
128299653d4eSeschrock 	uint64_t version;
12833d7072f8Seschrock 	uint64_t autoreplace = 0;
12848ad4d6ddSJeff Bonwick 	int orig_mode = spa->spa_mode;
1285b87f3af3Sperrin 	char *ereport = FM_EREPORT_ZFS_POOL;
1286468c413aSTim Haley 	nvlist_t *config = spa->spa_config;
1287fa9e4066Sahrens 
12888ad4d6ddSJeff Bonwick 	/*
12898ad4d6ddSJeff Bonwick 	 * If this is an untrusted config, access the pool in read-only mode.
12908ad4d6ddSJeff Bonwick 	 * This prevents things like resilvering recently removed devices.
12918ad4d6ddSJeff Bonwick 	 */
12928ad4d6ddSJeff Bonwick 	if (!mosconfig)
12938ad4d6ddSJeff Bonwick 		spa->spa_mode = FREAD;
12948ad4d6ddSJeff Bonwick 
1295e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
1296e14bb325SJeff Bonwick 
1297ea8dc4b6Seschrock 	spa->spa_load_state = state;
12980373e76bSbonwick 
1299fa9e4066Sahrens 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) ||
1300a9926bf0Sbonwick 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) {
1301ea8dc4b6Seschrock 		error = EINVAL;
1302ea8dc4b6Seschrock 		goto out;
1303ea8dc4b6Seschrock 	}
1304fa9e4066Sahrens 
130599653d4eSeschrock 	/*
130699653d4eSeschrock 	 * Versioning wasn't explicitly added to the label until later, so if
130799653d4eSeschrock 	 * it's not present treat it as the initial version.
130899653d4eSeschrock 	 */
130999653d4eSeschrock 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0)
1310e7437265Sahrens 		version = SPA_VERSION_INITIAL;
131199653d4eSeschrock 
1312a9926bf0Sbonwick 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
1313a9926bf0Sbonwick 	    &spa->spa_config_txg);
1314a9926bf0Sbonwick 
13150373e76bSbonwick 	if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
1316ea8dc4b6Seschrock 	    spa_guid_exists(pool_guid, 0)) {
1317ea8dc4b6Seschrock 		error = EEXIST;
1318ea8dc4b6Seschrock 		goto out;
1319ea8dc4b6Seschrock 	}
1320fa9e4066Sahrens 
1321b5989ec7Seschrock 	spa->spa_load_guid = pool_guid;
1322b5989ec7Seschrock 
132354d692b7SGeorge Wilson 	/*
132454d692b7SGeorge Wilson 	 * Create "The Godfather" zio to hold all async IOs
132554d692b7SGeorge Wilson 	 */
132625f89ee2SJeff Bonwick 	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
132725f89ee2SJeff Bonwick 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
132854d692b7SGeorge Wilson 
1329fa9e4066Sahrens 	/*
133099653d4eSeschrock 	 * Parse the configuration into a vdev tree.  We explicitly set the
133199653d4eSeschrock 	 * value that will be returned by spa_version() since parsing the
133299653d4eSeschrock 	 * configuration requires knowing the version number.
1333fa9e4066Sahrens 	 */
1334e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
133599653d4eSeschrock 	spa->spa_ubsync.ub_version = version;
133699653d4eSeschrock 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD);
1337e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
1338fa9e4066Sahrens 
133999653d4eSeschrock 	if (error != 0)
1340ea8dc4b6Seschrock 		goto out;
1341fa9e4066Sahrens 
13420e34b6a7Sbonwick 	ASSERT(spa->spa_root_vdev == rvd);
1343fa9e4066Sahrens 	ASSERT(spa_guid(spa) == pool_guid);
1344fa9e4066Sahrens 
1345fa9e4066Sahrens 	/*
1346fa9e4066Sahrens 	 * Try to open all vdevs, loading each label in the process.
1347fa9e4066Sahrens 	 */
1348e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
13490bf246f5Smc 	error = vdev_open(rvd);
1350e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
13510bf246f5Smc 	if (error != 0)
1352ea8dc4b6Seschrock 		goto out;
1353fa9e4066Sahrens 
1354560e6e96Seschrock 	/*
135577e3a39cSMark J Musante 	 * We need to validate the vdev labels against the configuration that
135677e3a39cSMark J Musante 	 * we have in hand, which is dependent on the setting of mosconfig. If
135777e3a39cSMark J Musante 	 * mosconfig is true then we're validating the vdev labels based on
135877e3a39cSMark J Musante 	 * that config. Otherwise, we're validating against the cached config
135977e3a39cSMark J Musante 	 * (zpool.cache) that was read when we loaded the zfs module, and then
136077e3a39cSMark J Musante 	 * later we will recursively call spa_load() and validate against
136177e3a39cSMark J Musante 	 * the vdev config.
1362560e6e96Seschrock 	 */
136377e3a39cSMark J Musante 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
136477e3a39cSMark J Musante 	error = vdev_validate(rvd);
136577e3a39cSMark J Musante 	spa_config_exit(spa, SCL_ALL, FTAG);
136677e3a39cSMark J Musante 	if (error != 0)
136777e3a39cSMark J Musante 		goto out;
1368560e6e96Seschrock 
1369560e6e96Seschrock 	if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) {
1370560e6e96Seschrock 		error = ENXIO;
1371560e6e96Seschrock 		goto out;
1372560e6e96Seschrock 	}
1373560e6e96Seschrock 
1374fa9e4066Sahrens 	/*
1375fa9e4066Sahrens 	 * Find the best uberblock.
1376fa9e4066Sahrens 	 */
1377e14bb325SJeff Bonwick 	vdev_uberblock_load(NULL, rvd, ub);
1378fa9e4066Sahrens 
1379fa9e4066Sahrens 	/*
1380fa9e4066Sahrens 	 * If we weren't able to find a single valid uberblock, return failure.
1381fa9e4066Sahrens 	 */
1382fa9e4066Sahrens 	if (ub->ub_txg == 0) {
1383eaca9bbdSeschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1384eaca9bbdSeschrock 		    VDEV_AUX_CORRUPT_DATA);
1385ea8dc4b6Seschrock 		error = ENXIO;
1386ea8dc4b6Seschrock 		goto out;
1387ea8dc4b6Seschrock 	}
1388ea8dc4b6Seschrock 
1389ea8dc4b6Seschrock 	/*
1390ea8dc4b6Seschrock 	 * If the pool is newer than the code, we can't open it.
1391ea8dc4b6Seschrock 	 */
1392e7437265Sahrens 	if (ub->ub_version > SPA_VERSION) {
1393eaca9bbdSeschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1394eaca9bbdSeschrock 		    VDEV_AUX_VERSION_NEWER);
1395ea8dc4b6Seschrock 		error = ENOTSUP;
1396ea8dc4b6Seschrock 		goto out;
1397fa9e4066Sahrens 	}
1398fa9e4066Sahrens 
1399fa9e4066Sahrens 	/*
1400fa9e4066Sahrens 	 * If the vdev guid sum doesn't match the uberblock, we have an
1401fa9e4066Sahrens 	 * incomplete configuration.
1402fa9e4066Sahrens 	 */
1403ecc2d604Sbonwick 	if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) {
1404ea8dc4b6Seschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1405ea8dc4b6Seschrock 		    VDEV_AUX_BAD_GUID_SUM);
1406ea8dc4b6Seschrock 		error = ENXIO;
1407ea8dc4b6Seschrock 		goto out;
1408fa9e4066Sahrens 	}
1409fa9e4066Sahrens 
1410fa9e4066Sahrens 	/*
1411fa9e4066Sahrens 	 * Initialize internal SPA structures.
1412fa9e4066Sahrens 	 */
1413fa9e4066Sahrens 	spa->spa_state = POOL_STATE_ACTIVE;
1414fa9e4066Sahrens 	spa->spa_ubsync = spa->spa_uberblock;
1415468c413aSTim Haley 	spa->spa_verify_min_txg = spa->spa_extreme_rewind ?
1416468c413aSTim Haley 	    TXG_INITIAL : spa_last_synced_txg(spa) - TXG_DEFER_SIZE;
1417468c413aSTim Haley 	spa->spa_first_txg = spa->spa_last_ubsync_txg ?
1418468c413aSTim Haley 	    spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
1419b24ab676SJeff Bonwick 	spa->spa_claim_max_txg = spa->spa_first_txg;
1420b24ab676SJeff Bonwick 
1421ea8dc4b6Seschrock 	error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
1422ea8dc4b6Seschrock 	if (error) {
1423ea8dc4b6Seschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1424ea8dc4b6Seschrock 		    VDEV_AUX_CORRUPT_DATA);
1425468c413aSTim Haley 		error = EIO;
1426ea8dc4b6Seschrock 		goto out;
1427ea8dc4b6Seschrock 	}
1428fa9e4066Sahrens 	spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
1429fa9e4066Sahrens 
1430ea8dc4b6Seschrock 	if (zap_lookup(spa->spa_meta_objset,
1431fa9e4066Sahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
1432ea8dc4b6Seschrock 	    sizeof (uint64_t), 1, &spa->spa_config_object) != 0) {
1433ea8dc4b6Seschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1434ea8dc4b6Seschrock 		    VDEV_AUX_CORRUPT_DATA);
1435ea8dc4b6Seschrock 		error = EIO;
1436ea8dc4b6Seschrock 		goto out;
1437ea8dc4b6Seschrock 	}
1438fa9e4066Sahrens 
143988ecc943SGeorge Wilson 	if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) {
144088ecc943SGeorge Wilson 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
144188ecc943SGeorge Wilson 		    VDEV_AUX_CORRUPT_DATA);
144288ecc943SGeorge Wilson 		error = EIO;
144388ecc943SGeorge Wilson 		goto out;
144488ecc943SGeorge Wilson 	}
144588ecc943SGeorge Wilson 
1446fa9e4066Sahrens 	if (!mosconfig) {
144795173954Sek 		uint64_t hostid;
1448fa9e4066Sahrens 
144988ecc943SGeorge Wilson 		if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig,
145077650510SLin Ling 		    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
145195173954Sek 			char *hostname;
145295173954Sek 			unsigned long myhostid = 0;
145395173954Sek 
145488ecc943SGeorge Wilson 			VERIFY(nvlist_lookup_string(nvconfig,
145595173954Sek 			    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
145695173954Sek 
14575679c89fSjv #ifdef	_KERNEL
14585679c89fSjv 			myhostid = zone_get_hostid(NULL);
14595679c89fSjv #else	/* _KERNEL */
14605679c89fSjv 			/*
14615679c89fSjv 			 * We're emulating the system's hostid in userland, so
14625679c89fSjv 			 * we can't use zone_get_hostid().
14635679c89fSjv 			 */
146495173954Sek 			(void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
14655679c89fSjv #endif	/* _KERNEL */
146617194a52Slling 			if (hostid != 0 && myhostid != 0 &&
14675679c89fSjv 			    hostid != myhostid) {
146895173954Sek 				cmn_err(CE_WARN, "pool '%s' could not be "
146995173954Sek 				    "loaded as it was last accessed by "
147077650510SLin Ling 				    "another system (host: %s hostid: 0x%lx). "
147195173954Sek 				    "See: http://www.sun.com/msg/ZFS-8000-EY",
1472e14bb325SJeff Bonwick 				    spa_name(spa), hostname,
147395173954Sek 				    (unsigned long)hostid);
147495173954Sek 				error = EBADF;
147595173954Sek 				goto out;
147695173954Sek 			}
147795173954Sek 		}
147895173954Sek 
147988ecc943SGeorge Wilson 		spa_config_set(spa, nvconfig);
1480fa9e4066Sahrens 		spa_unload(spa);
1481fa9e4066Sahrens 		spa_deactivate(spa);
14828ad4d6ddSJeff Bonwick 		spa_activate(spa, orig_mode);
1483fa9e4066Sahrens 
1484468c413aSTim Haley 		return (spa_load(spa, state, B_TRUE));
1485fa9e4066Sahrens 	}
1486fa9e4066Sahrens 
1487ea8dc4b6Seschrock 	if (zap_lookup(spa->spa_meta_objset,
1488fa9e4066Sahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST,
1489b24ab676SJeff Bonwick 	    sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj) != 0) {
1490ea8dc4b6Seschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1491ea8dc4b6Seschrock 		    VDEV_AUX_CORRUPT_DATA);
1492ea8dc4b6Seschrock 		error = EIO;
1493ea8dc4b6Seschrock 		goto out;
1494ea8dc4b6Seschrock 	}
1495fa9e4066Sahrens 
149699653d4eSeschrock 	/*
149799653d4eSeschrock 	 * Load the bit that tells us to use the new accounting function
149899653d4eSeschrock 	 * (raid-z deflation).  If we have an older pool, this will not
149999653d4eSeschrock 	 * be present.
150099653d4eSeschrock 	 */
150199653d4eSeschrock 	error = zap_lookup(spa->spa_meta_objset,
150299653d4eSeschrock 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
150399653d4eSeschrock 	    sizeof (uint64_t), 1, &spa->spa_deflate);
150499653d4eSeschrock 	if (error != 0 && error != ENOENT) {
150599653d4eSeschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
150699653d4eSeschrock 		    VDEV_AUX_CORRUPT_DATA);
150799653d4eSeschrock 		error = EIO;
150899653d4eSeschrock 		goto out;
150999653d4eSeschrock 	}
151099653d4eSeschrock 
1511fa9e4066Sahrens 	/*
1512ea8dc4b6Seschrock 	 * Load the persistent error log.  If we have an older pool, this will
1513ea8dc4b6Seschrock 	 * not be present.
1514fa9e4066Sahrens 	 */
1515ea8dc4b6Seschrock 	error = zap_lookup(spa->spa_meta_objset,
1516ea8dc4b6Seschrock 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST,
1517ea8dc4b6Seschrock 	    sizeof (uint64_t), 1, &spa->spa_errlog_last);
1518d80c45e0Sbonwick 	if (error != 0 && error != ENOENT) {
1519ea8dc4b6Seschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1520ea8dc4b6Seschrock 		    VDEV_AUX_CORRUPT_DATA);
1521ea8dc4b6Seschrock 		error = EIO;
1522ea8dc4b6Seschrock 		goto out;
1523ea8dc4b6Seschrock 	}
1524ea8dc4b6Seschrock 
1525ea8dc4b6Seschrock 	error = zap_lookup(spa->spa_meta_objset,
1526ea8dc4b6Seschrock 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB,
1527ea8dc4b6Seschrock 	    sizeof (uint64_t), 1, &spa->spa_errlog_scrub);
1528ea8dc4b6Seschrock 	if (error != 0 && error != ENOENT) {
1529ea8dc4b6Seschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1530ea8dc4b6Seschrock 		    VDEV_AUX_CORRUPT_DATA);
1531ea8dc4b6Seschrock 		error = EIO;
1532ea8dc4b6Seschrock 		goto out;
1533ea8dc4b6Seschrock 	}
1534ea8dc4b6Seschrock 
153506eeb2adSek 	/*
153606eeb2adSek 	 * Load the history object.  If we have an older pool, this
153706eeb2adSek 	 * will not be present.
153806eeb2adSek 	 */
153906eeb2adSek 	error = zap_lookup(spa->spa_meta_objset,
154006eeb2adSek 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY,
154106eeb2adSek 	    sizeof (uint64_t), 1, &spa->spa_history);
154206eeb2adSek 	if (error != 0 && error != ENOENT) {
154306eeb2adSek 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
154406eeb2adSek 		    VDEV_AUX_CORRUPT_DATA);
154506eeb2adSek 		error = EIO;
154606eeb2adSek 		goto out;
154706eeb2adSek 	}
154806eeb2adSek 
154999653d4eSeschrock 	/*
155099653d4eSeschrock 	 * Load any hot spares for this pool.
155199653d4eSeschrock 	 */
155299653d4eSeschrock 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
1553fa94a07fSbrendan 	    DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object);
155499653d4eSeschrock 	if (error != 0 && error != ENOENT) {
155599653d4eSeschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
155699653d4eSeschrock 		    VDEV_AUX_CORRUPT_DATA);
155799653d4eSeschrock 		error = EIO;
155899653d4eSeschrock 		goto out;
155999653d4eSeschrock 	}
156099653d4eSeschrock 	if (error == 0) {
1561e7437265Sahrens 		ASSERT(spa_version(spa) >= SPA_VERSION_SPARES);
1562fa94a07fSbrendan 		if (load_nvlist(spa, spa->spa_spares.sav_object,
1563fa94a07fSbrendan 		    &spa->spa_spares.sav_config) != 0) {
156499653d4eSeschrock 			vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
156599653d4eSeschrock 			    VDEV_AUX_CORRUPT_DATA);
156699653d4eSeschrock 			error = EIO;
156799653d4eSeschrock 			goto out;
156899653d4eSeschrock 		}
156999653d4eSeschrock 
1570e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
157199653d4eSeschrock 		spa_load_spares(spa);
1572e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
157399653d4eSeschrock 	}
157499653d4eSeschrock 
1575fa94a07fSbrendan 	/*
1576fa94a07fSbrendan 	 * Load any level 2 ARC devices for this pool.
1577fa94a07fSbrendan 	 */
1578fa94a07fSbrendan 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
1579fa94a07fSbrendan 	    DMU_POOL_L2CACHE, sizeof (uint64_t), 1,
1580fa94a07fSbrendan 	    &spa->spa_l2cache.sav_object);
1581fa94a07fSbrendan 	if (error != 0 && error != ENOENT) {
1582fa94a07fSbrendan 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1583fa94a07fSbrendan 		    VDEV_AUX_CORRUPT_DATA);
1584fa94a07fSbrendan 		error = EIO;
1585fa94a07fSbrendan 		goto out;
1586fa94a07fSbrendan 	}
1587fa94a07fSbrendan 	if (error == 0) {
1588fa94a07fSbrendan 		ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE);
1589fa94a07fSbrendan 		if (load_nvlist(spa, spa->spa_l2cache.sav_object,
1590fa94a07fSbrendan 		    &spa->spa_l2cache.sav_config) != 0) {
1591fa94a07fSbrendan 			vdev_set_state(rvd, B_TRUE,
1592fa94a07fSbrendan 			    VDEV_STATE_CANT_OPEN,
1593fa94a07fSbrendan 			    VDEV_AUX_CORRUPT_DATA);
1594fa94a07fSbrendan 			error = EIO;
1595fa94a07fSbrendan 			goto out;
1596fa94a07fSbrendan 		}
1597fa94a07fSbrendan 
1598e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
1599fa94a07fSbrendan 		spa_load_l2cache(spa);
1600e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
1601fa94a07fSbrendan 	}
1602fa94a07fSbrendan 
1603990b4856Slling 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
1604ecd6cf80Smarks 
1605b1b8ab34Slling 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
1606b1b8ab34Slling 	    DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object);
1607b1b8ab34Slling 
1608b1b8ab34Slling 	if (error && error != ENOENT) {
1609b1b8ab34Slling 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1610b1b8ab34Slling 		    VDEV_AUX_CORRUPT_DATA);
1611b1b8ab34Slling 		error = EIO;
1612b1b8ab34Slling 		goto out;
1613b1b8ab34Slling 	}
1614b1b8ab34Slling 
1615b1b8ab34Slling 	if (error == 0) {
1616b1b8ab34Slling 		(void) zap_lookup(spa->spa_meta_objset,
1617b1b8ab34Slling 		    spa->spa_pool_props_object,
16183d7072f8Seschrock 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS),
1619b1b8ab34Slling 		    sizeof (uint64_t), 1, &spa->spa_bootfs);
16203d7072f8Seschrock 		(void) zap_lookup(spa->spa_meta_objset,
16213d7072f8Seschrock 		    spa->spa_pool_props_object,
16223d7072f8Seschrock 		    zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE),
16233d7072f8Seschrock 		    sizeof (uint64_t), 1, &autoreplace);
1624b693757aSEric Schrock 		spa->spa_autoreplace = (autoreplace != 0);
1625ecd6cf80Smarks 		(void) zap_lookup(spa->spa_meta_objset,
1626ecd6cf80Smarks 		    spa->spa_pool_props_object,
1627ecd6cf80Smarks 		    zpool_prop_to_name(ZPOOL_PROP_DELEGATION),
1628ecd6cf80Smarks 		    sizeof (uint64_t), 1, &spa->spa_delegation);
16290a4e9518Sgw 		(void) zap_lookup(spa->spa_meta_objset,
16300a4e9518Sgw 		    spa->spa_pool_props_object,
16310a4e9518Sgw 		    zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE),
16320a4e9518Sgw 		    sizeof (uint64_t), 1, &spa->spa_failmode);
1633573ca77eSGeorge Wilson 		(void) zap_lookup(spa->spa_meta_objset,
1634573ca77eSGeorge Wilson 		    spa->spa_pool_props_object,
1635573ca77eSGeorge Wilson 		    zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND),
1636573ca77eSGeorge Wilson 		    sizeof (uint64_t), 1, &spa->spa_autoexpand);
1637b24ab676SJeff Bonwick 		(void) zap_lookup(spa->spa_meta_objset,
1638b24ab676SJeff Bonwick 		    spa->spa_pool_props_object,
1639b24ab676SJeff Bonwick 		    zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO),
1640b24ab676SJeff Bonwick 		    sizeof (uint64_t), 1, &spa->spa_dedup_ditto);
1641b1b8ab34Slling 	}
1642b1b8ab34Slling 
16433d7072f8Seschrock 	/*
16443d7072f8Seschrock 	 * If the 'autoreplace' property is set, then post a resource notifying
16453d7072f8Seschrock 	 * the ZFS DE that it should not issue any faults for unopenable
16463d7072f8Seschrock 	 * devices.  We also iterate over the vdevs, and post a sysevent for any
16473d7072f8Seschrock 	 * unopenable vdevs so that the normal autoreplace handler can take
16483d7072f8Seschrock 	 * over.
16493d7072f8Seschrock 	 */
1650b693757aSEric Schrock 	if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) {
16513d7072f8Seschrock 		spa_check_removed(spa->spa_root_vdev);
1652b693757aSEric Schrock 		/*
1653b693757aSEric Schrock 		 * For the import case, this is done in spa_import(), because
1654b693757aSEric Schrock 		 * at this point we're using the spare definitions from
1655b693757aSEric Schrock 		 * the MOS config, not necessarily from the userland config.
1656b693757aSEric Schrock 		 */
1657b693757aSEric Schrock 		if (state != SPA_LOAD_IMPORT) {
1658b693757aSEric Schrock 			spa_aux_check_removed(&spa->spa_spares);
1659b693757aSEric Schrock 			spa_aux_check_removed(&spa->spa_l2cache);
1660b693757aSEric Schrock 		}
1661b693757aSEric Schrock 	}
16623d7072f8Seschrock 
1663ea8dc4b6Seschrock 	/*
1664560e6e96Seschrock 	 * Load the vdev state for all toplevel vdevs.
1665ea8dc4b6Seschrock 	 */
1666560e6e96Seschrock 	vdev_load(rvd);
16670373e76bSbonwick 
1668fa9e4066Sahrens 	/*
1669fa9e4066Sahrens 	 * Propagate the leaf DTLs we just loaded all the way up the tree.
1670fa9e4066Sahrens 	 */
1671e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
1672fa9e4066Sahrens 	vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
1673e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
1674fa9e4066Sahrens 
1675fa9e4066Sahrens 	/*
1676fa9e4066Sahrens 	 * Check the state of the root vdev.  If it can't be opened, it
1677fa9e4066Sahrens 	 * indicates one or more toplevel vdevs are faulted.
1678fa9e4066Sahrens 	 */
1679ea8dc4b6Seschrock 	if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) {
1680ea8dc4b6Seschrock 		error = ENXIO;
1681ea8dc4b6Seschrock 		goto out;
1682ea8dc4b6Seschrock 	}
1683fa9e4066Sahrens 
1684b24ab676SJeff Bonwick 	/*
1685b24ab676SJeff Bonwick 	 * Load the DDTs (dedup tables).
1686b24ab676SJeff Bonwick 	 */
1687b24ab676SJeff Bonwick 	error = ddt_load(spa);
1688b24ab676SJeff Bonwick 	if (error != 0) {
1689b24ab676SJeff Bonwick 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1690b24ab676SJeff Bonwick 		    VDEV_AUX_CORRUPT_DATA);
1691b24ab676SJeff Bonwick 		error = EIO;
1692b24ab676SJeff Bonwick 		goto out;
1693b24ab676SJeff Bonwick 	}
1694b24ab676SJeff Bonwick 
1695485bbbf5SGeorge Wilson 	spa_update_dspace(spa);
1696485bbbf5SGeorge Wilson 
1697468c413aSTim Haley 	if (state != SPA_LOAD_TRYIMPORT) {
1698468c413aSTim Haley 		error = spa_load_verify(spa);
1699468c413aSTim Haley 		if (error) {
1700468c413aSTim Haley 			vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1701468c413aSTim Haley 			    VDEV_AUX_CORRUPT_DATA);
1702468c413aSTim Haley 			goto out;
1703468c413aSTim Haley 		}
1704468c413aSTim Haley 	}
1705468c413aSTim Haley 
1706b24ab676SJeff Bonwick 	/*
1707b24ab676SJeff Bonwick 	 * Load the intent log state and check log integrity.
1708b24ab676SJeff Bonwick 	 */
1709b24ab676SJeff Bonwick 	VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE,
1710b24ab676SJeff Bonwick 	    &nvroot) == 0);
1711b24ab676SJeff Bonwick 	spa_load_log_state(spa, nvroot);
1712b24ab676SJeff Bonwick 	nvlist_free(nvconfig);
1713b24ab676SJeff Bonwick 
1714b24ab676SJeff Bonwick 	if (spa_check_logs(spa)) {
1715b24ab676SJeff Bonwick 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1716b24ab676SJeff Bonwick 		    VDEV_AUX_BAD_LOG);
1717b24ab676SJeff Bonwick 		error = ENXIO;
1718b24ab676SJeff Bonwick 		ereport = FM_EREPORT_ZFS_LOG_REPLAY;
1719b24ab676SJeff Bonwick 		goto out;
1720b24ab676SJeff Bonwick 	}
1721b24ab676SJeff Bonwick 
1722468c413aSTim Haley 	if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
1723468c413aSTim Haley 	    spa->spa_load_max_txg == UINT64_MAX)) {
17245dabedeeSbonwick 		dmu_tx_t *tx;
17250373e76bSbonwick 		int need_update = B_FALSE;
17268ad4d6ddSJeff Bonwick 
17278ad4d6ddSJeff Bonwick 		ASSERT(state != SPA_LOAD_TRYIMPORT);
17285dabedeeSbonwick 
17290373e76bSbonwick 		/*
17300373e76bSbonwick 		 * Claim log blocks that haven't been committed yet.
17310373e76bSbonwick 		 * This must all happen in a single txg.
1732b24ab676SJeff Bonwick 		 * Note: spa_claim_max_txg is updated by spa_claim_notify(),
1733b24ab676SJeff Bonwick 		 * invoked from zil_claim_log_block()'s i/o done callback.
1734468c413aSTim Haley 		 * Price of rollback is that we abandon the log.
17350373e76bSbonwick 		 */
1736b24ab676SJeff Bonwick 		spa->spa_claiming = B_TRUE;
1737b24ab676SJeff Bonwick 
17385dabedeeSbonwick 		tx = dmu_tx_create_assigned(spa_get_dsl(spa),
1739fa9e4066Sahrens 		    spa_first_txg(spa));
1740e14bb325SJeff Bonwick 		(void) dmu_objset_find(spa_name(spa),
17410b69c2f0Sahrens 		    zil_claim, tx, DS_FIND_CHILDREN);
1742fa9e4066Sahrens 		dmu_tx_commit(tx);
1743fa9e4066Sahrens 
1744b24ab676SJeff Bonwick 		spa->spa_claiming = B_FALSE;
1745b24ab676SJeff Bonwick 
1746e6ca193dSGeorge Wilson 		spa->spa_log_state = SPA_LOG_GOOD;
1747fa9e4066Sahrens 		spa->spa_sync_on = B_TRUE;
1748fa9e4066Sahrens 		txg_sync_start(spa->spa_dsl_pool);
1749fa9e4066Sahrens 
1750fa9e4066Sahrens 		/*
1751b24ab676SJeff Bonwick 		 * Wait for all claims to sync.  We sync up to the highest
1752b24ab676SJeff Bonwick 		 * claimed log block birth time so that claimed log blocks
1753b24ab676SJeff Bonwick 		 * don't appear to be from the future.  spa_claim_max_txg
1754b24ab676SJeff Bonwick 		 * will have been set for us by either zil_check_log_chain()
1755b24ab676SJeff Bonwick 		 * (invoked from spa_check_logs()) or zil_claim() above.
1756fa9e4066Sahrens 		 */
1757b24ab676SJeff Bonwick 		txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
17580e34b6a7Sbonwick 
17590e34b6a7Sbonwick 		/*
17600373e76bSbonwick 		 * If the config cache is stale, or we have uninitialized
17610373e76bSbonwick 		 * metaslabs (see spa_vdev_add()), then update the config.
1762bc758434SLin Ling 		 *
1763bc758434SLin Ling 		 * If spa_load_verbatim is true, trust the current
1764bc758434SLin Ling 		 * in-core spa_config and update the disk labels.
17650e34b6a7Sbonwick 		 */
17660373e76bSbonwick 		if (config_cache_txg != spa->spa_config_txg ||
1767468c413aSTim Haley 		    state == SPA_LOAD_IMPORT || spa->spa_load_verbatim ||
1768468c413aSTim Haley 		    state == SPA_LOAD_RECOVER)
17690373e76bSbonwick 			need_update = B_TRUE;
17700373e76bSbonwick 
17718ad4d6ddSJeff Bonwick 		for (int c = 0; c < rvd->vdev_children; c++)
17720373e76bSbonwick 			if (rvd->vdev_child[c]->vdev_ms_array == 0)
17730373e76bSbonwick 				need_update = B_TRUE;
17740e34b6a7Sbonwick 
17750e34b6a7Sbonwick 		/*
17760373e76bSbonwick 		 * Update the config cache asychronously in case we're the
17770373e76bSbonwick 		 * root pool, in which case the config cache isn't writable yet.
17780e34b6a7Sbonwick 		 */
17790373e76bSbonwick 		if (need_update)
17800373e76bSbonwick 			spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
17818ad4d6ddSJeff Bonwick 
17828ad4d6ddSJeff Bonwick 		/*
17838ad4d6ddSJeff Bonwick 		 * Check all DTLs to see if anything needs resilvering.
17848ad4d6ddSJeff Bonwick 		 */
17858ad4d6ddSJeff Bonwick 		if (vdev_resilver_needed(rvd, NULL, NULL))
17868ad4d6ddSJeff Bonwick 			spa_async_request(spa, SPA_ASYNC_RESILVER);
1787503ad85cSMatthew Ahrens 
1788503ad85cSMatthew Ahrens 		/*
1789503ad85cSMatthew Ahrens 		 * Delete any inconsistent datasets.
1790503ad85cSMatthew Ahrens 		 */
1791503ad85cSMatthew Ahrens 		(void) dmu_objset_find(spa_name(spa),
1792503ad85cSMatthew Ahrens 		    dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
1793ca45db41SChris Kirby 
1794ca45db41SChris Kirby 		/*
1795ca45db41SChris Kirby 		 * Clean up any stale temporary dataset userrefs.
1796ca45db41SChris Kirby 		 */
1797ca45db41SChris Kirby 		dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
1798fa9e4066Sahrens 	}
1799fa9e4066Sahrens 
1800ea8dc4b6Seschrock 	error = 0;
1801ea8dc4b6Seschrock out:
1802468c413aSTim Haley 
1803088f3894Sahrens 	spa->spa_minref = refcount_count(&spa->spa_refcount);
180499653d4eSeschrock 	if (error && error != EBADF)
1805b87f3af3Sperrin 		zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
1806ea8dc4b6Seschrock 	spa->spa_load_state = SPA_LOAD_NONE;
1807ea8dc4b6Seschrock 	spa->spa_ena = 0;
1808ea8dc4b6Seschrock 
1809ea8dc4b6Seschrock 	return (error);
1810fa9e4066Sahrens }
1811fa9e4066Sahrens 
1812468c413aSTim Haley static int
1813468c413aSTim Haley spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
1814468c413aSTim Haley {
1815468c413aSTim Haley 	spa_unload(spa);
1816468c413aSTim Haley 	spa_deactivate(spa);
1817468c413aSTim Haley 
1818468c413aSTim Haley 	spa->spa_load_max_txg--;
1819468c413aSTim Haley 
1820468c413aSTim Haley 	spa_activate(spa, spa_mode_global);
1821468c413aSTim Haley 	spa_async_suspend(spa);
1822468c413aSTim Haley 
1823468c413aSTim Haley 	return (spa_load(spa, state, mosconfig));
1824468c413aSTim Haley }
1825468c413aSTim Haley 
1826468c413aSTim Haley static int
1827468c413aSTim Haley spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
1828468c413aSTim Haley     uint64_t max_request, boolean_t extreme)
1829468c413aSTim Haley {
1830468c413aSTim Haley 	nvlist_t *config = NULL;
1831468c413aSTim Haley 	int load_error, rewind_error;
1832468c413aSTim Haley 	uint64_t safe_rollback_txg;
1833468c413aSTim Haley 	uint64_t min_txg;
1834468c413aSTim Haley 
1835a33cae98STim Haley 	if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) {
1836468c413aSTim Haley 		spa->spa_load_max_txg = spa->spa_load_txg;
1837a33cae98STim Haley 		spa->spa_log_state = SPA_LOG_CLEAR;
1838a33cae98STim Haley 	} else {
1839468c413aSTim Haley 		spa->spa_load_max_txg = max_request;
1840a33cae98STim Haley 	}
1841468c413aSTim Haley 
1842468c413aSTim Haley 	load_error = rewind_error = spa_load(spa, state, mosconfig);
1843468c413aSTim Haley 	if (load_error == 0)
1844468c413aSTim Haley 		return (0);
1845468c413aSTim Haley 
1846468c413aSTim Haley 	if (spa->spa_root_vdev != NULL)
1847468c413aSTim Haley 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
1848468c413aSTim Haley 
1849468c413aSTim Haley 	spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg;
1850468c413aSTim Haley 	spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp;
1851468c413aSTim Haley 
1852468c413aSTim Haley 	/* specific txg requested */
1853468c413aSTim Haley 	if (spa->spa_load_max_txg != UINT64_MAX && !extreme) {
1854468c413aSTim Haley 		nvlist_free(config);
1855468c413aSTim Haley 		return (load_error);
1856468c413aSTim Haley 	}
1857468c413aSTim Haley 
1858468c413aSTim Haley 	/* Price of rolling back is discarding txgs, including log */
1859468c413aSTim Haley 	if (state == SPA_LOAD_RECOVER)
1860468c413aSTim Haley 		spa->spa_log_state = SPA_LOG_CLEAR;
1861468c413aSTim Haley 
1862468c413aSTim Haley 	spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
1863468c413aSTim Haley 	safe_rollback_txg = spa->spa_uberblock.ub_txg - TXG_DEFER_SIZE;
1864468c413aSTim Haley 
1865468c413aSTim Haley 	min_txg = extreme ? TXG_INITIAL : safe_rollback_txg;
1866468c413aSTim Haley 	while (rewind_error && (spa->spa_uberblock.ub_txg >= min_txg)) {
1867468c413aSTim Haley 		if (spa->spa_load_max_txg < safe_rollback_txg)
1868468c413aSTim Haley 			spa->spa_extreme_rewind = B_TRUE;
1869468c413aSTim Haley 		rewind_error = spa_load_retry(spa, state, mosconfig);
1870468c413aSTim Haley 	}
1871468c413aSTim Haley 
1872468c413aSTim Haley 	if (config)
1873468c413aSTim Haley 		spa_rewind_data_to_nvlist(spa, config);
1874468c413aSTim Haley 
1875468c413aSTim Haley 	spa->spa_extreme_rewind = B_FALSE;
1876468c413aSTim Haley 	spa->spa_load_max_txg = UINT64_MAX;
1877468c413aSTim Haley 
1878468c413aSTim Haley 	if (config && (rewind_error || state != SPA_LOAD_RECOVER))
1879468c413aSTim Haley 		spa_config_set(spa, config);
1880468c413aSTim Haley 
1881468c413aSTim Haley 	return (state == SPA_LOAD_RECOVER ? rewind_error : load_error);
1882468c413aSTim Haley }
1883468c413aSTim Haley 
1884fa9e4066Sahrens /*
1885fa9e4066Sahrens  * Pool Open/Import
1886fa9e4066Sahrens  *
1887fa9e4066Sahrens  * The import case is identical to an open except that the configuration is sent
1888fa9e4066Sahrens  * down from userland, instead of grabbed from the configuration cache.  For the
1889fa9e4066Sahrens  * case of an open, the pool configuration will exist in the
18903d7072f8Seschrock  * POOL_STATE_UNINITIALIZED state.
1891fa9e4066Sahrens  *
1892fa9e4066Sahrens  * The stats information (gen/count/ustats) is used to gather vdev statistics at
1893fa9e4066Sahrens  * the same time open the pool, without having to keep around the spa_t in some
1894fa9e4066Sahrens  * ambiguous state.
1895fa9e4066Sahrens  */
1896fa9e4066Sahrens static int
1897468c413aSTim Haley spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
1898468c413aSTim Haley     nvlist_t **config)
1899fa9e4066Sahrens {
1900fa9e4066Sahrens 	spa_t *spa;
1901468c413aSTim Haley 	boolean_t norewind;
1902468c413aSTim Haley 	boolean_t extreme;
1903468c413aSTim Haley 	zpool_rewind_policy_t policy;
1904468c413aSTim Haley 	spa_load_state_t state = SPA_LOAD_OPEN;
1905fa9e4066Sahrens 	int error;
1906fa9e4066Sahrens 	int locked = B_FALSE;
1907fa9e4066Sahrens 
1908fa9e4066Sahrens 	*spapp = NULL;
1909fa9e4066Sahrens 
1910468c413aSTim Haley 	zpool_get_rewind_policy(nvpolicy, &policy);
1911468c413aSTim Haley 	if (policy.zrp_request & ZPOOL_DO_REWIND)
1912468c413aSTim Haley 		state = SPA_LOAD_RECOVER;
1913468c413aSTim Haley 	norewind = (policy.zrp_request == ZPOOL_NO_REWIND);
1914468c413aSTim Haley 	extreme = ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0);
1915468c413aSTim Haley 
1916fa9e4066Sahrens 	/*
1917fa9e4066Sahrens 	 * As disgusting as this is, we need to support recursive calls to this
1918fa9e4066Sahrens 	 * function because dsl_dir_open() is called during spa_load(), and ends
1919fa9e4066Sahrens 	 * up calling spa_open() again.  The real fix is to figure out how to
1920fa9e4066Sahrens 	 * avoid dsl_dir_open() calling this in the first place.
1921fa9e4066Sahrens 	 */
1922fa9e4066Sahrens 	if (mutex_owner(&spa_namespace_lock) != curthread) {
1923fa9e4066Sahrens 		mutex_enter(&spa_namespace_lock);
1924fa9e4066Sahrens 		locked = B_TRUE;
1925fa9e4066Sahrens 	}
1926fa9e4066Sahrens 
1927fa9e4066Sahrens 	if ((spa = spa_lookup(pool)) == NULL) {
1928fa9e4066Sahrens 		if (locked)
1929fa9e4066Sahrens 			mutex_exit(&spa_namespace_lock);
1930fa9e4066Sahrens 		return (ENOENT);
1931fa9e4066Sahrens 	}
1932468c413aSTim Haley 
1933fa9e4066Sahrens 	if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
1934fa9e4066Sahrens 
19358ad4d6ddSJeff Bonwick 		spa_activate(spa, spa_mode_global);
1936fa9e4066Sahrens 
1937468c413aSTim Haley 		if (spa->spa_last_open_failed && norewind) {
1938468c413aSTim Haley 			if (config != NULL && spa->spa_config)
1939468c413aSTim Haley 				VERIFY(nvlist_dup(spa->spa_config,
1940468c413aSTim Haley 				    config, KM_SLEEP) == 0);
1941468c413aSTim Haley 			spa_deactivate(spa);
1942468c413aSTim Haley 			if (locked)
1943468c413aSTim Haley 				mutex_exit(&spa_namespace_lock);
1944468c413aSTim Haley 			return (spa->spa_last_open_failed);
1945468c413aSTim Haley 		}
1946468c413aSTim Haley 
1947468c413aSTim Haley 		if (state != SPA_LOAD_RECOVER)
1948468c413aSTim Haley 			spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
1949468c413aSTim Haley 
1950468c413aSTim Haley 		error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg,
1951468c413aSTim Haley 		    extreme);
1952fa9e4066Sahrens 
1953fa9e4066Sahrens 		if (error == EBADF) {
1954fa9e4066Sahrens 			/*
1955560e6e96Seschrock 			 * If vdev_validate() returns failure (indicated by
1956560e6e96Seschrock 			 * EBADF), it indicates that one of the vdevs indicates
1957560e6e96Seschrock 			 * that the pool has been exported or destroyed.  If
1958560e6e96Seschrock 			 * this is the case, the config cache is out of sync and
1959560e6e96Seschrock 			 * we should remove the pool from the namespace.
1960fa9e4066Sahrens 			 */
1961fa9e4066Sahrens 			spa_unload(spa);
1962fa9e4066Sahrens 			spa_deactivate(spa);
1963c5904d13Seschrock 			spa_config_sync(spa, B_TRUE, B_TRUE);
1964fa9e4066Sahrens 			spa_remove(spa);
1965fa9e4066Sahrens 			if (locked)
1966fa9e4066Sahrens 				mutex_exit(&spa_namespace_lock);
1967fa9e4066Sahrens 			return (ENOENT);
1968ea8dc4b6Seschrock 		}
1969ea8dc4b6Seschrock 
1970ea8dc4b6Seschrock 		if (error) {
1971fa9e4066Sahrens 			/*
1972fa9e4066Sahrens 			 * We can't open the pool, but we still have useful
1973fa9e4066Sahrens 			 * information: the state of each vdev after the
1974fa9e4066Sahrens 			 * attempted vdev_open().  Return this to the user.
1975fa9e4066Sahrens 			 */
1976468c413aSTim Haley 			if (config != NULL && spa->spa_config)
1977468c413aSTim Haley 				VERIFY(nvlist_dup(spa->spa_config, config,
1978468c413aSTim Haley 				    KM_SLEEP) == 0);
1979fa9e4066Sahrens 			spa_unload(spa);
1980fa9e4066Sahrens 			spa_deactivate(spa);
1981468c413aSTim Haley 			spa->spa_last_open_failed = error;
1982fa9e4066Sahrens 			if (locked)
1983fa9e4066Sahrens 				mutex_exit(&spa_namespace_lock);
1984fa9e4066Sahrens 			*spapp = NULL;
1985fa9e4066Sahrens 			return (error);
1986fa9e4066Sahrens 		}
1987468c413aSTim Haley 
1988fa9e4066Sahrens 	}
1989fa9e4066Sahrens 
1990fa9e4066Sahrens 	spa_open_ref(spa, tag);
19913d7072f8Seschrock 
1992468c413aSTim Haley 
1993468c413aSTim Haley 	if (config != NULL)
1994468c413aSTim Haley 		*config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
1995468c413aSTim Haley 
1996a33cae98STim Haley 	if (locked) {
1997a33cae98STim Haley 		spa->spa_last_open_failed = 0;
1998a33cae98STim Haley 		spa->spa_last_ubsync_txg = 0;
1999a33cae98STim Haley 		spa->spa_load_txg = 0;
2000fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
2001a33cae98STim Haley 	}
2002fa9e4066Sahrens 
2003fa9e4066Sahrens 	*spapp = spa;
2004fa9e4066Sahrens 
2005fa9e4066Sahrens 	return (0);
2006fa9e4066Sahrens }
2007fa9e4066Sahrens 
2008468c413aSTim Haley int
2009468c413aSTim Haley spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy,
2010468c413aSTim Haley     nvlist_t **config)
2011468c413aSTim Haley {
2012468c413aSTim Haley 	return (spa_open_common(name, spapp, tag, policy, config));
2013468c413aSTim Haley }
2014468c413aSTim Haley 
2015fa9e4066Sahrens int
2016fa9e4066Sahrens spa_open(const char *name, spa_t **spapp, void *tag)
2017fa9e4066Sahrens {
2018468c413aSTim Haley 	return (spa_open_common(name, spapp, tag, NULL, NULL));
2019fa9e4066Sahrens }
2020fa9e4066Sahrens 
2021ea8dc4b6Seschrock /*
2022ea8dc4b6Seschrock  * Lookup the given spa_t, incrementing the inject count in the process,
2023ea8dc4b6Seschrock  * preventing it from being exported or destroyed.
2024ea8dc4b6Seschrock  */
2025ea8dc4b6Seschrock spa_t *
2026ea8dc4b6Seschrock spa_inject_addref(char *name)
2027ea8dc4b6Seschrock {
2028ea8dc4b6Seschrock 	spa_t *spa;
2029ea8dc4b6Seschrock 
2030ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
2031ea8dc4b6Seschrock 	if ((spa = spa_lookup(name)) == NULL) {
2032ea8dc4b6Seschrock 		mutex_exit(&spa_namespace_lock);
2033ea8dc4b6Seschrock 		return (NULL);
2034ea8dc4b6Seschrock 	}
2035ea8dc4b6Seschrock 	spa->spa_inject_ref++;
2036ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
2037ea8dc4b6Seschrock 
2038ea8dc4b6Seschrock 	return (spa);
2039ea8dc4b6Seschrock }
2040ea8dc4b6Seschrock 
2041ea8dc4b6Seschrock void
2042ea8dc4b6Seschrock spa_inject_delref(spa_t *spa)
2043ea8dc4b6Seschrock {
2044ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
2045ea8dc4b6Seschrock 	spa->spa_inject_ref--;
2046ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
2047ea8dc4b6Seschrock }
2048ea8dc4b6Seschrock 
2049fa94a07fSbrendan /*
2050fa94a07fSbrendan  * Add spares device information to the nvlist.
2051fa94a07fSbrendan  */
205299653d4eSeschrock static void
205399653d4eSeschrock spa_add_spares(spa_t *spa, nvlist_t *config)
205499653d4eSeschrock {
205599653d4eSeschrock 	nvlist_t **spares;
205699653d4eSeschrock 	uint_t i, nspares;
205799653d4eSeschrock 	nvlist_t *nvroot;
205899653d4eSeschrock 	uint64_t guid;
205999653d4eSeschrock 	vdev_stat_t *vs;
206099653d4eSeschrock 	uint_t vsc;
206139c23413Seschrock 	uint64_t pool;
206299653d4eSeschrock 
20636809eb4eSEric Schrock 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
20646809eb4eSEric Schrock 
2065fa94a07fSbrendan 	if (spa->spa_spares.sav_count == 0)
206699653d4eSeschrock 		return;
206799653d4eSeschrock 
206899653d4eSeschrock 	VERIFY(nvlist_lookup_nvlist(config,
206999653d4eSeschrock 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2070fa94a07fSbrendan 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
207199653d4eSeschrock 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
207299653d4eSeschrock 	if (nspares != 0) {
207399653d4eSeschrock 		VERIFY(nvlist_add_nvlist_array(nvroot,
207499653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
207599653d4eSeschrock 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
207699653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
207799653d4eSeschrock 
207899653d4eSeschrock 		/*
207999653d4eSeschrock 		 * Go through and find any spares which have since been
208099653d4eSeschrock 		 * repurposed as an active spare.  If this is the case, update
208199653d4eSeschrock 		 * their status appropriately.
208299653d4eSeschrock 		 */
208399653d4eSeschrock 		for (i = 0; i < nspares; i++) {
208499653d4eSeschrock 			VERIFY(nvlist_lookup_uint64(spares[i],
208599653d4eSeschrock 			    ZPOOL_CONFIG_GUID, &guid) == 0);
208689a89ebfSlling 			if (spa_spare_exists(guid, &pool, NULL) &&
208789a89ebfSlling 			    pool != 0ULL) {
208899653d4eSeschrock 				VERIFY(nvlist_lookup_uint64_array(
208999653d4eSeschrock 				    spares[i], ZPOOL_CONFIG_STATS,
209099653d4eSeschrock 				    (uint64_t **)&vs, &vsc) == 0);
209199653d4eSeschrock 				vs->vs_state = VDEV_STATE_CANT_OPEN;
209299653d4eSeschrock 				vs->vs_aux = VDEV_AUX_SPARED;
209399653d4eSeschrock 			}
209499653d4eSeschrock 		}
209599653d4eSeschrock 	}
209699653d4eSeschrock }
209799653d4eSeschrock 
2098fa94a07fSbrendan /*
2099fa94a07fSbrendan  * Add l2cache device information to the nvlist, including vdev stats.
2100fa94a07fSbrendan  */
2101fa94a07fSbrendan static void
2102fa94a07fSbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config)
2103fa94a07fSbrendan {
2104fa94a07fSbrendan 	nvlist_t **l2cache;
2105fa94a07fSbrendan 	uint_t i, j, nl2cache;
2106fa94a07fSbrendan 	nvlist_t *nvroot;
2107fa94a07fSbrendan 	uint64_t guid;
2108fa94a07fSbrendan 	vdev_t *vd;
2109fa94a07fSbrendan 	vdev_stat_t *vs;
2110fa94a07fSbrendan 	uint_t vsc;
2111fa94a07fSbrendan 
21126809eb4eSEric Schrock 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
21136809eb4eSEric Schrock 
2114fa94a07fSbrendan 	if (spa->spa_l2cache.sav_count == 0)
2115fa94a07fSbrendan 		return;
2116fa94a07fSbrendan 
2117fa94a07fSbrendan 	VERIFY(nvlist_lookup_nvlist(config,
2118fa94a07fSbrendan 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2119fa94a07fSbrendan 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
2120fa94a07fSbrendan 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
2121fa94a07fSbrendan 	if (nl2cache != 0) {
2122fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(nvroot,
2123fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
2124fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
2125fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
2126fa94a07fSbrendan 
2127fa94a07fSbrendan 		/*
2128fa94a07fSbrendan 		 * Update level 2 cache device stats.
2129fa94a07fSbrendan 		 */
2130fa94a07fSbrendan 
2131fa94a07fSbrendan 		for (i = 0; i < nl2cache; i++) {
2132fa94a07fSbrendan 			VERIFY(nvlist_lookup_uint64(l2cache[i],
2133fa94a07fSbrendan 			    ZPOOL_CONFIG_GUID, &guid) == 0);
2134fa94a07fSbrendan 
2135fa94a07fSbrendan 			vd = NULL;
2136fa94a07fSbrendan 			for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
2137fa94a07fSbrendan 				if (guid ==
2138fa94a07fSbrendan 				    spa->spa_l2cache.sav_vdevs[j]->vdev_guid) {
2139fa94a07fSbrendan 					vd = spa->spa_l2cache.sav_vdevs[j];
2140fa94a07fSbrendan 					break;
2141fa94a07fSbrendan 				}
2142fa94a07fSbrendan 			}
2143fa94a07fSbrendan 			ASSERT(vd != NULL);
2144fa94a07fSbrendan 
2145fa94a07fSbrendan 			VERIFY(nvlist_lookup_uint64_array(l2cache[i],
2146fa94a07fSbrendan 			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
2147fa94a07fSbrendan 			vdev_get_stats(vd, vs);
2148fa94a07fSbrendan 		}
2149fa94a07fSbrendan 	}
2150fa94a07fSbrendan }
2151fa94a07fSbrendan 
2152fa9e4066Sahrens int
2153ea8dc4b6Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
2154fa9e4066Sahrens {
2155fa9e4066Sahrens 	int error;
2156fa9e4066Sahrens 	spa_t *spa;
2157fa9e4066Sahrens 
2158fa9e4066Sahrens 	*config = NULL;
2159468c413aSTim Haley 	error = spa_open_common(name, &spa, FTAG, NULL, config);
2160fa9e4066Sahrens 
21616809eb4eSEric Schrock 	if (spa != NULL) {
21626809eb4eSEric Schrock 		/*
21636809eb4eSEric Schrock 		 * This still leaves a window of inconsistency where the spares
21646809eb4eSEric Schrock 		 * or l2cache devices could change and the config would be
21656809eb4eSEric Schrock 		 * self-inconsistent.
21666809eb4eSEric Schrock 		 */
21676809eb4eSEric Schrock 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2168ea8dc4b6Seschrock 
21696809eb4eSEric Schrock 		if (*config != NULL) {
2170e14bb325SJeff Bonwick 			VERIFY(nvlist_add_uint64(*config,
21716809eb4eSEric Schrock 			    ZPOOL_CONFIG_ERRCOUNT,
21726809eb4eSEric Schrock 			    spa_get_errlog_size(spa)) == 0);
2173e14bb325SJeff Bonwick 
21746809eb4eSEric Schrock 			if (spa_suspended(spa))
21756809eb4eSEric Schrock 				VERIFY(nvlist_add_uint64(*config,
21766809eb4eSEric Schrock 				    ZPOOL_CONFIG_SUSPENDED,
21776809eb4eSEric Schrock 				    spa->spa_failmode) == 0);
21786809eb4eSEric Schrock 
21796809eb4eSEric Schrock 			spa_add_spares(spa, *config);
21806809eb4eSEric Schrock 			spa_add_l2cache(spa, *config);
21816809eb4eSEric Schrock 		}
218299653d4eSeschrock 	}
218399653d4eSeschrock 
2184ea8dc4b6Seschrock 	/*
2185ea8dc4b6Seschrock 	 * We want to get the alternate root even for faulted pools, so we cheat
2186ea8dc4b6Seschrock 	 * and call spa_lookup() directly.
2187ea8dc4b6Seschrock 	 */
2188ea8dc4b6Seschrock 	if (altroot) {
2189ea8dc4b6Seschrock 		if (spa == NULL) {
2190ea8dc4b6Seschrock 			mutex_enter(&spa_namespace_lock);
2191ea8dc4b6Seschrock 			spa = spa_lookup(name);
2192ea8dc4b6Seschrock 			if (spa)
2193ea8dc4b6Seschrock 				spa_altroot(spa, altroot, buflen);
2194ea8dc4b6Seschrock 			else
2195ea8dc4b6Seschrock 				altroot[0] = '\0';
2196ea8dc4b6Seschrock 			spa = NULL;
2197ea8dc4b6Seschrock 			mutex_exit(&spa_namespace_lock);
2198ea8dc4b6Seschrock 		} else {
2199ea8dc4b6Seschrock 			spa_altroot(spa, altroot, buflen);
2200ea8dc4b6Seschrock 		}
2201ea8dc4b6Seschrock 	}
2202ea8dc4b6Seschrock 
22036809eb4eSEric Schrock 	if (spa != NULL) {
22046809eb4eSEric Schrock 		spa_config_exit(spa, SCL_CONFIG, FTAG);
2205fa9e4066Sahrens 		spa_close(spa, FTAG);
22066809eb4eSEric Schrock 	}
2207fa9e4066Sahrens 
2208fa9e4066Sahrens 	return (error);
2209fa9e4066Sahrens }
2210fa9e4066Sahrens 
221199653d4eSeschrock /*
2212fa94a07fSbrendan  * Validate that the auxiliary device array is well formed.  We must have an
2213fa94a07fSbrendan  * array of nvlists, each which describes a valid leaf vdev.  If this is an
2214fa94a07fSbrendan  * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be
2215fa94a07fSbrendan  * specified, as long as they are well-formed.
221699653d4eSeschrock  */
221799653d4eSeschrock static int
2218fa94a07fSbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
2219fa94a07fSbrendan     spa_aux_vdev_t *sav, const char *config, uint64_t version,
2220fa94a07fSbrendan     vdev_labeltype_t label)
222199653d4eSeschrock {
2222fa94a07fSbrendan 	nvlist_t **dev;
2223fa94a07fSbrendan 	uint_t i, ndev;
222499653d4eSeschrock 	vdev_t *vd;
222599653d4eSeschrock 	int error;
222699653d4eSeschrock 
2227e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
2228e14bb325SJeff Bonwick 
222999653d4eSeschrock 	/*
2230fa94a07fSbrendan 	 * It's acceptable to have no devs specified.
223199653d4eSeschrock 	 */
2232fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0)
223399653d4eSeschrock 		return (0);
223499653d4eSeschrock 
2235fa94a07fSbrendan 	if (ndev == 0)
223699653d4eSeschrock 		return (EINVAL);
223799653d4eSeschrock 
223899653d4eSeschrock 	/*
2239fa94a07fSbrendan 	 * Make sure the pool is formatted with a version that supports this
2240fa94a07fSbrendan 	 * device type.
224199653d4eSeschrock 	 */
2242fa94a07fSbrendan 	if (spa_version(spa) < version)
224399653d4eSeschrock 		return (ENOTSUP);
224499653d4eSeschrock 
224539c23413Seschrock 	/*
2246fa94a07fSbrendan 	 * Set the pending device list so we correctly handle device in-use
224739c23413Seschrock 	 * checking.
224839c23413Seschrock 	 */
2249fa94a07fSbrendan 	sav->sav_pending = dev;
2250fa94a07fSbrendan 	sav->sav_npending = ndev;
225139c23413Seschrock 
2252fa94a07fSbrendan 	for (i = 0; i < ndev; i++) {
2253fa94a07fSbrendan 		if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0,
225499653d4eSeschrock 		    mode)) != 0)
225539c23413Seschrock 			goto out;
225699653d4eSeschrock 
225799653d4eSeschrock 		if (!vd->vdev_ops->vdev_op_leaf) {
225899653d4eSeschrock 			vdev_free(vd);
225939c23413Seschrock 			error = EINVAL;
226039c23413Seschrock 			goto out;
226199653d4eSeschrock 		}
226299653d4eSeschrock 
2263fa94a07fSbrendan 		/*
2264e14bb325SJeff Bonwick 		 * The L2ARC currently only supports disk devices in
2265e14bb325SJeff Bonwick 		 * kernel context.  For user-level testing, we allow it.
2266fa94a07fSbrendan 		 */
2267e14bb325SJeff Bonwick #ifdef _KERNEL
2268fa94a07fSbrendan 		if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
2269fa94a07fSbrendan 		    strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
2270fa94a07fSbrendan 			error = ENOTBLK;
2271fa94a07fSbrendan 			goto out;
2272fa94a07fSbrendan 		}
2273e14bb325SJeff Bonwick #endif
227499653d4eSeschrock 		vd->vdev_top = vd;
227599653d4eSeschrock 
227639c23413Seschrock 		if ((error = vdev_open(vd)) == 0 &&
2277fa94a07fSbrendan 		    (error = vdev_label_init(vd, crtxg, label)) == 0) {
2278fa94a07fSbrendan 			VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
227939c23413Seschrock 			    vd->vdev_guid) == 0);
228039c23413Seschrock 		}
228199653d4eSeschrock 
228299653d4eSeschrock 		vdev_free(vd);
228339c23413Seschrock 
2284fa94a07fSbrendan 		if (error &&
2285fa94a07fSbrendan 		    (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE))
228639c23413Seschrock 			goto out;
228739c23413Seschrock 		else
228839c23413Seschrock 			error = 0;
228999653d4eSeschrock 	}
229099653d4eSeschrock 
229139c23413Seschrock out:
2292fa94a07fSbrendan 	sav->sav_pending = NULL;
2293fa94a07fSbrendan 	sav->sav_npending = 0;
229439c23413Seschrock 	return (error);
229599653d4eSeschrock }
229699653d4eSeschrock 
2297fa94a07fSbrendan static int
2298fa94a07fSbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
2299fa94a07fSbrendan {
2300fa94a07fSbrendan 	int error;
2301fa94a07fSbrendan 
2302e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
2303e14bb325SJeff Bonwick 
2304fa94a07fSbrendan 	if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode,
2305fa94a07fSbrendan 	    &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES,
2306fa94a07fSbrendan 	    VDEV_LABEL_SPARE)) != 0) {
2307fa94a07fSbrendan 		return (error);
2308fa94a07fSbrendan 	}
2309fa94a07fSbrendan 
2310fa94a07fSbrendan 	return (spa_validate_aux_devs(spa, nvroot, crtxg, mode,
2311fa94a07fSbrendan 	    &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE,
2312fa94a07fSbrendan 	    VDEV_LABEL_L2CACHE));
2313fa94a07fSbrendan }
2314fa94a07fSbrendan 
2315fa94a07fSbrendan static void
2316fa94a07fSbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
2317fa94a07fSbrendan     const char *config)
2318fa94a07fSbrendan {
2319fa94a07fSbrendan 	int i;
2320fa94a07fSbrendan 
2321fa94a07fSbrendan 	if (sav->sav_config != NULL) {
2322fa94a07fSbrendan 		nvlist_t **olddevs;
2323fa94a07fSbrendan 		uint_t oldndevs;
2324fa94a07fSbrendan 		nvlist_t **newdevs;
2325fa94a07fSbrendan 
2326fa94a07fSbrendan 		/*
2327fa94a07fSbrendan 		 * Generate new dev list by concatentating with the
2328fa94a07fSbrendan 		 * current dev list.
2329fa94a07fSbrendan 		 */
2330fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
2331fa94a07fSbrendan 		    &olddevs, &oldndevs) == 0);
2332fa94a07fSbrendan 
2333fa94a07fSbrendan 		newdevs = kmem_alloc(sizeof (void *) *
2334fa94a07fSbrendan 		    (ndevs + oldndevs), KM_SLEEP);
2335fa94a07fSbrendan 		for (i = 0; i < oldndevs; i++)
2336fa94a07fSbrendan 			VERIFY(nvlist_dup(olddevs[i], &newdevs[i],
2337fa94a07fSbrendan 			    KM_SLEEP) == 0);
2338fa94a07fSbrendan 		for (i = 0; i < ndevs; i++)
2339fa94a07fSbrendan 			VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs],
2340fa94a07fSbrendan 			    KM_SLEEP) == 0);
2341fa94a07fSbrendan 
2342fa94a07fSbrendan 		VERIFY(nvlist_remove(sav->sav_config, config,
2343fa94a07fSbrendan 		    DATA_TYPE_NVLIST_ARRAY) == 0);
2344fa94a07fSbrendan 
2345fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(sav->sav_config,
2346fa94a07fSbrendan 		    config, newdevs, ndevs + oldndevs) == 0);
2347fa94a07fSbrendan 		for (i = 0; i < oldndevs + ndevs; i++)
2348fa94a07fSbrendan 			nvlist_free(newdevs[i]);
2349fa94a07fSbrendan 		kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *));
2350fa94a07fSbrendan 	} else {
2351fa94a07fSbrendan 		/*
2352fa94a07fSbrendan 		 * Generate a new dev list.
2353fa94a07fSbrendan 		 */
2354fa94a07fSbrendan 		VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME,
2355fa94a07fSbrendan 		    KM_SLEEP) == 0);
2356fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(sav->sav_config, config,
2357fa94a07fSbrendan 		    devs, ndevs) == 0);
2358fa94a07fSbrendan 	}
2359fa94a07fSbrendan }
2360fa94a07fSbrendan 
2361fa94a07fSbrendan /*
2362fa94a07fSbrendan  * Stop and drop level 2 ARC devices
2363fa94a07fSbrendan  */
2364fa94a07fSbrendan void
2365fa94a07fSbrendan spa_l2cache_drop(spa_t *spa)
2366fa94a07fSbrendan {
2367fa94a07fSbrendan 	vdev_t *vd;
2368fa94a07fSbrendan 	int i;
2369fa94a07fSbrendan 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
2370fa94a07fSbrendan 
2371fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++) {
2372fa94a07fSbrendan 		uint64_t pool;
2373fa94a07fSbrendan 
2374fa94a07fSbrendan 		vd = sav->sav_vdevs[i];
2375fa94a07fSbrendan 		ASSERT(vd != NULL);
2376fa94a07fSbrendan 
23778ad4d6ddSJeff Bonwick 		if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
23788ad4d6ddSJeff Bonwick 		    pool != 0ULL && l2arc_vdev_present(vd))
2379fa94a07fSbrendan 			l2arc_remove_vdev(vd);
2380fa94a07fSbrendan 		if (vd->vdev_isl2cache)
2381fa94a07fSbrendan 			spa_l2cache_remove(vd);
2382fa94a07fSbrendan 		vdev_clear_stats(vd);
2383fa94a07fSbrendan 		(void) vdev_close(vd);
2384fa94a07fSbrendan 	}
2385fa94a07fSbrendan }
2386fa94a07fSbrendan 
2387fa9e4066Sahrens /*
2388fa9e4066Sahrens  * Pool Creation
2389fa9e4066Sahrens  */
2390fa9e4066Sahrens int
2391990b4856Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
23920a48a24eStimh     const char *history_str, nvlist_t *zplprops)
2393fa9e4066Sahrens {
2394fa9e4066Sahrens 	spa_t *spa;
2395990b4856Slling 	char *altroot = NULL;
23960373e76bSbonwick 	vdev_t *rvd;
2397fa9e4066Sahrens 	dsl_pool_t *dp;
2398fa9e4066Sahrens 	dmu_tx_t *tx;
2399573ca77eSGeorge Wilson 	int error = 0;
2400fa9e4066Sahrens 	uint64_t txg = TXG_INITIAL;
2401fa94a07fSbrendan 	nvlist_t **spares, **l2cache;
2402fa94a07fSbrendan 	uint_t nspares, nl2cache;
2403990b4856Slling 	uint64_t version;
2404fa9e4066Sahrens 
2405fa9e4066Sahrens 	/*
2406fa9e4066Sahrens 	 * If this pool already exists, return failure.
2407fa9e4066Sahrens 	 */
2408fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
2409fa9e4066Sahrens 	if (spa_lookup(pool) != NULL) {
2410fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
2411fa9e4066Sahrens 		return (EEXIST);
2412fa9e4066Sahrens 	}
2413fa9e4066Sahrens 
2414fa9e4066Sahrens 	/*
2415fa9e4066Sahrens 	 * Allocate a new spa_t structure.
2416fa9e4066Sahrens 	 */
2417990b4856Slling 	(void) nvlist_lookup_string(props,
2418990b4856Slling 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
2419468c413aSTim Haley 	spa = spa_add(pool, NULL, altroot);
24208ad4d6ddSJeff Bonwick 	spa_activate(spa, spa_mode_global);
2421fa9e4066Sahrens 
2422990b4856Slling 	if (props && (error = spa_prop_validate(spa, props))) {
2423990b4856Slling 		spa_deactivate(spa);
2424990b4856Slling 		spa_remove(spa);
2425c5904d13Seschrock 		mutex_exit(&spa_namespace_lock);
2426990b4856Slling 		return (error);
2427990b4856Slling 	}
2428990b4856Slling 
2429990b4856Slling 	if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION),
2430990b4856Slling 	    &version) != 0)
2431990b4856Slling 		version = SPA_VERSION;
2432990b4856Slling 	ASSERT(version <= SPA_VERSION);
2433b24ab676SJeff Bonwick 
2434b24ab676SJeff Bonwick 	spa->spa_first_txg = txg;
2435b24ab676SJeff Bonwick 	spa->spa_uberblock.ub_txg = txg - 1;
2436990b4856Slling 	spa->spa_uberblock.ub_version = version;
2437fa9e4066Sahrens 	spa->spa_ubsync = spa->spa_uberblock;
2438fa9e4066Sahrens 
243954d692b7SGeorge Wilson 	/*
244054d692b7SGeorge Wilson 	 * Create "The Godfather" zio to hold all async IOs
244154d692b7SGeorge Wilson 	 */
244225f89ee2SJeff Bonwick 	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
244325f89ee2SJeff Bonwick 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
244454d692b7SGeorge Wilson 
24450373e76bSbonwick 	/*
24460373e76bSbonwick 	 * Create the root vdev.
24470373e76bSbonwick 	 */
2448e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
24490373e76bSbonwick 
245099653d4eSeschrock 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD);
24510373e76bSbonwick 
245299653d4eSeschrock 	ASSERT(error != 0 || rvd != NULL);
245399653d4eSeschrock 	ASSERT(error != 0 || spa->spa_root_vdev == rvd);
24540373e76bSbonwick 
2455b7b97454Sperrin 	if (error == 0 && !zfs_allocatable_devs(nvroot))
24560373e76bSbonwick 		error = EINVAL;
245799653d4eSeschrock 
245899653d4eSeschrock 	if (error == 0 &&
245999653d4eSeschrock 	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
2460fa94a07fSbrendan 	    (error = spa_validate_aux(spa, nvroot, txg,
246199653d4eSeschrock 	    VDEV_ALLOC_ADD)) == 0) {
2462573ca77eSGeorge Wilson 		for (int c = 0; c < rvd->vdev_children; c++) {
2463573ca77eSGeorge Wilson 			vdev_metaslab_set_size(rvd->vdev_child[c]);
2464573ca77eSGeorge Wilson 			vdev_expand(rvd->vdev_child[c], txg);
2465573ca77eSGeorge Wilson 		}
24660373e76bSbonwick 	}
24670373e76bSbonwick 
2468e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
2469fa9e4066Sahrens 
247099653d4eSeschrock 	if (error != 0) {
2471fa9e4066Sahrens 		spa_unload(spa);
2472fa9e4066Sahrens 		spa_deactivate(spa);
2473fa9e4066Sahrens 		spa_remove(spa);
2474fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
2475fa9e4066Sahrens 		return (error);
2476fa9e4066Sahrens 	}
2477fa9e4066Sahrens 
247899653d4eSeschrock 	/*
247999653d4eSeschrock 	 * Get the list of spares, if specified.
248099653d4eSeschrock 	 */
248199653d4eSeschrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
248299653d4eSeschrock 	    &spares, &nspares) == 0) {
2483fa94a07fSbrendan 		VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME,
248499653d4eSeschrock 		    KM_SLEEP) == 0);
2485fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
248699653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
2487e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
248899653d4eSeschrock 		spa_load_spares(spa);
2489e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
2490fa94a07fSbrendan 		spa->spa_spares.sav_sync = B_TRUE;
2491fa94a07fSbrendan 	}
2492fa94a07fSbrendan 
2493fa94a07fSbrendan 	/*
2494fa94a07fSbrendan 	 * Get the list of level 2 cache devices, if specified.
2495fa94a07fSbrendan 	 */
2496fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
2497fa94a07fSbrendan 	    &l2cache, &nl2cache) == 0) {
2498fa94a07fSbrendan 		VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
2499fa94a07fSbrendan 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2500fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
2501fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
2502e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
2503fa94a07fSbrendan 		spa_load_l2cache(spa);
2504e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
2505fa94a07fSbrendan 		spa->spa_l2cache.sav_sync = B_TRUE;
250699653d4eSeschrock 	}
250799653d4eSeschrock 
25080a48a24eStimh 	spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
2509fa9e4066Sahrens 	spa->spa_meta_objset = dp->dp_meta_objset;
2510fa9e4066Sahrens 
2511485bbbf5SGeorge Wilson 	/*
2512485bbbf5SGeorge Wilson 	 * Create DDTs (dedup tables).
2513485bbbf5SGeorge Wilson 	 */
2514485bbbf5SGeorge Wilson 	ddt_create(spa);
2515485bbbf5SGeorge Wilson 
2516485bbbf5SGeorge Wilson 	spa_update_dspace(spa);
2517485bbbf5SGeorge Wilson 
2518fa9e4066Sahrens 	tx = dmu_tx_create_assigned(dp, txg);
2519fa9e4066Sahrens 
2520fa9e4066Sahrens 	/*
2521fa9e4066Sahrens 	 * Create the pool config object.
2522fa9e4066Sahrens 	 */
2523fa9e4066Sahrens 	spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset,
2524f7991ba4STim Haley 	    DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE,
2525fa9e4066Sahrens 	    DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
2526fa9e4066Sahrens 
2527ea8dc4b6Seschrock 	if (zap_add(spa->spa_meta_objset,
2528fa9e4066Sahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
2529ea8dc4b6Seschrock 	    sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) {
2530ea8dc4b6Seschrock 		cmn_err(CE_PANIC, "failed to add pool config");
2531ea8dc4b6Seschrock 	}
2532fa9e4066Sahrens 
2533990b4856Slling 	/* Newly created pools with the right version are always deflated. */
2534990b4856Slling 	if (version >= SPA_VERSION_RAIDZ_DEFLATE) {
2535990b4856Slling 		spa->spa_deflate = TRUE;
2536990b4856Slling 		if (zap_add(spa->spa_meta_objset,
2537990b4856Slling 		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
2538990b4856Slling 		    sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) {
2539990b4856Slling 			cmn_err(CE_PANIC, "failed to add deflate");
2540990b4856Slling 		}
254199653d4eSeschrock 	}
254299653d4eSeschrock 
2543fa9e4066Sahrens 	/*
2544fa9e4066Sahrens 	 * Create the deferred-free bplist object.  Turn off compression
2545fa9e4066Sahrens 	 * because sync-to-convergence takes longer if the blocksize
2546fa9e4066Sahrens 	 * keeps changing.
2547fa9e4066Sahrens 	 */
2548b24ab676SJeff Bonwick 	spa->spa_deferred_bplist_obj = bplist_create(spa->spa_meta_objset,
2549fa9e4066Sahrens 	    1 << 14, tx);
2550b24ab676SJeff Bonwick 	dmu_object_set_compress(spa->spa_meta_objset,
2551b24ab676SJeff Bonwick 	    spa->spa_deferred_bplist_obj, ZIO_COMPRESS_OFF, tx);
2552fa9e4066Sahrens 
2553ea8dc4b6Seschrock 	if (zap_add(spa->spa_meta_objset,
2554fa9e4066Sahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST,
2555b24ab676SJeff Bonwick 	    sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj, tx) != 0) {
2556ea8dc4b6Seschrock 		cmn_err(CE_PANIC, "failed to add bplist");
2557ea8dc4b6Seschrock 	}
2558fa9e4066Sahrens 
255906eeb2adSek 	/*
256006eeb2adSek 	 * Create the pool's history object.
256106eeb2adSek 	 */
2562990b4856Slling 	if (version >= SPA_VERSION_ZPOOL_HISTORY)
2563990b4856Slling 		spa_history_create_obj(spa, tx);
2564990b4856Slling 
2565990b4856Slling 	/*
2566990b4856Slling 	 * Set pool properties.
2567990b4856Slling 	 */
2568990b4856Slling 	spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS);
2569990b4856Slling 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
25700a4e9518Sgw 	spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
2571573ca77eSGeorge Wilson 	spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
2572b24ab676SJeff Bonwick 
2573379c004dSEric Schrock 	if (props != NULL) {
2574379c004dSEric Schrock 		spa_configfile_set(spa, props, B_FALSE);
2575990b4856Slling 		spa_sync_props(spa, props, CRED(), tx);
2576379c004dSEric Schrock 	}
257706eeb2adSek 
2578fa9e4066Sahrens 	dmu_tx_commit(tx);
2579fa9e4066Sahrens 
2580fa9e4066Sahrens 	spa->spa_sync_on = B_TRUE;
2581fa9e4066Sahrens 	txg_sync_start(spa->spa_dsl_pool);
2582fa9e4066Sahrens 
2583fa9e4066Sahrens 	/*
2584fa9e4066Sahrens 	 * We explicitly wait for the first transaction to complete so that our
2585fa9e4066Sahrens 	 * bean counters are appropriately updated.
2586fa9e4066Sahrens 	 */
2587fa9e4066Sahrens 	txg_wait_synced(spa->spa_dsl_pool, txg);
2588fa9e4066Sahrens 
2589c5904d13Seschrock 	spa_config_sync(spa, B_FALSE, B_TRUE);
2590fa9e4066Sahrens 
2591990b4856Slling 	if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
2592228975ccSek 		(void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
2593c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_CREATE);
2594228975ccSek 
2595088f3894Sahrens 	spa->spa_minref = refcount_count(&spa->spa_refcount);
2596088f3894Sahrens 
2597daaa36a7SGeorge Wilson 	mutex_exit(&spa_namespace_lock);
2598daaa36a7SGeorge Wilson 
2599fa9e4066Sahrens 	return (0);
2600fa9e4066Sahrens }
2601fa9e4066Sahrens 
2602e7cbe64fSgw #ifdef _KERNEL
2603e7cbe64fSgw /*
260421ecdf64SLin Ling  * Get the root pool information from the root disk, then import the root pool
260521ecdf64SLin Ling  * during the system boot up time.
2606e7cbe64fSgw  */
260721ecdf64SLin Ling extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
260821ecdf64SLin Ling 
260921ecdf64SLin Ling static nvlist_t *
261021ecdf64SLin Ling spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
2611e7cbe64fSgw {
261221ecdf64SLin Ling 	nvlist_t *config;
2613e7cbe64fSgw 	nvlist_t *nvtop, *nvroot;
2614e7cbe64fSgw 	uint64_t pgid;
2615e7cbe64fSgw 
261621ecdf64SLin Ling 	if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0)
261721ecdf64SLin Ling 		return (NULL);
261821ecdf64SLin Ling 
2619e7cbe64fSgw 	/*
2620e7cbe64fSgw 	 * Add this top-level vdev to the child array.
2621e7cbe64fSgw 	 */
262221ecdf64SLin Ling 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
262321ecdf64SLin Ling 	    &nvtop) == 0);
262421ecdf64SLin Ling 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
262521ecdf64SLin Ling 	    &pgid) == 0);
262621ecdf64SLin Ling 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0);
2627e7cbe64fSgw 
2628e7cbe64fSgw 	/*
2629e7cbe64fSgw 	 * Put this pool's top-level vdevs into a root vdev.
2630e7cbe64fSgw 	 */
2631e7cbe64fSgw 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
263221ecdf64SLin Ling 	VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
263321ecdf64SLin Ling 	    VDEV_TYPE_ROOT) == 0);
2634e7cbe64fSgw 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
2635e7cbe64fSgw 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
2636e7cbe64fSgw 	VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2637e7cbe64fSgw 	    &nvtop, 1) == 0);
2638e7cbe64fSgw 
2639e7cbe64fSgw 	/*
2640e7cbe64fSgw 	 * Replace the existing vdev_tree with the new root vdev in
2641e7cbe64fSgw 	 * this pool's configuration (remove the old, add the new).
2642e7cbe64fSgw 	 */
2643e7cbe64fSgw 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
2644e7cbe64fSgw 	nvlist_free(nvroot);
264521ecdf64SLin Ling 	return (config);
2646e7cbe64fSgw }
2647e7cbe64fSgw 
2648e7cbe64fSgw /*
264921ecdf64SLin Ling  * Walk the vdev tree and see if we can find a device with "better"
265021ecdf64SLin Ling  * configuration. A configuration is "better" if the label on that
265121ecdf64SLin Ling  * device has a more recent txg.
2652051aabe6Staylor  */
265321ecdf64SLin Ling static void
265421ecdf64SLin Ling spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg)
2655051aabe6Staylor {
2656573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++)
265721ecdf64SLin Ling 		spa_alt_rootvdev(vd->vdev_child[c], avd, txg);
2658051aabe6Staylor 
265921ecdf64SLin Ling 	if (vd->vdev_ops->vdev_op_leaf) {
266021ecdf64SLin Ling 		nvlist_t *label;
266121ecdf64SLin Ling 		uint64_t label_txg;
2662051aabe6Staylor 
266321ecdf64SLin Ling 		if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid,
266421ecdf64SLin Ling 		    &label) != 0)
266521ecdf64SLin Ling 			return;
2666051aabe6Staylor 
266721ecdf64SLin Ling 		VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
266821ecdf64SLin Ling 		    &label_txg) == 0);
2669051aabe6Staylor 
267021ecdf64SLin Ling 		/*
267121ecdf64SLin Ling 		 * Do we have a better boot device?
267221ecdf64SLin Ling 		 */
267321ecdf64SLin Ling 		if (label_txg > *txg) {
267421ecdf64SLin Ling 			*txg = label_txg;
267521ecdf64SLin Ling 			*avd = vd;
2676051aabe6Staylor 		}
267721ecdf64SLin Ling 		nvlist_free(label);
2678051aabe6Staylor 	}
2679051aabe6Staylor }
2680051aabe6Staylor 
2681e7cbe64fSgw /*
2682e7cbe64fSgw  * Import a root pool.
2683e7cbe64fSgw  *
2684051aabe6Staylor  * For x86. devpath_list will consist of devid and/or physpath name of
2685051aabe6Staylor  * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a").
2686051aabe6Staylor  * The GRUB "findroot" command will return the vdev we should boot.
2687e7cbe64fSgw  *
2688e7cbe64fSgw  * For Sparc, devpath_list consists the physpath name of the booting device
2689e7cbe64fSgw  * no matter the rootpool is a single device pool or a mirrored pool.
2690e7cbe64fSgw  * e.g.
2691e7cbe64fSgw  *	"/pci@1f,0/ide@d/disk@0,0:a"
2692e7cbe64fSgw  */
2693e7cbe64fSgw int
2694051aabe6Staylor spa_import_rootpool(char *devpath, char *devid)
2695e7cbe64fSgw {
269621ecdf64SLin Ling 	spa_t *spa;
269721ecdf64SLin Ling 	vdev_t *rvd, *bvd, *avd = NULL;
269821ecdf64SLin Ling 	nvlist_t *config, *nvtop;
269921ecdf64SLin Ling 	uint64_t guid, txg;
2700e7cbe64fSgw 	char *pname;
2701e7cbe64fSgw 	int error;
2702e7cbe64fSgw 
2703e7cbe64fSgw 	/*
270421ecdf64SLin Ling 	 * Read the label from the boot device and generate a configuration.
2705e7cbe64fSgw 	 */
2706dedec472SJack Meng 	config = spa_generate_rootconf(devpath, devid, &guid);
2707dedec472SJack Meng #if defined(_OBP) && defined(_KERNEL)
2708dedec472SJack Meng 	if (config == NULL) {
2709dedec472SJack Meng 		if (strstr(devpath, "/iscsi/ssd") != NULL) {
2710dedec472SJack Meng 			/* iscsi boot */
2711dedec472SJack Meng 			get_iscsi_bootpath_phy(devpath);
2712dedec472SJack Meng 			config = spa_generate_rootconf(devpath, devid, &guid);
2713dedec472SJack Meng 		}
2714dedec472SJack Meng 	}
2715dedec472SJack Meng #endif
2716dedec472SJack Meng 	if (config == NULL) {
271721ecdf64SLin Ling 		cmn_err(CE_NOTE, "Can not read the pool label from '%s'",
271821ecdf64SLin Ling 		    devpath);
271921ecdf64SLin Ling 		return (EIO);
272021ecdf64SLin Ling 	}
2721e7cbe64fSgw 
272221ecdf64SLin Ling 	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
272321ecdf64SLin Ling 	    &pname) == 0);
272421ecdf64SLin Ling 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
2725e7cbe64fSgw 
27266809eb4eSEric Schrock 	mutex_enter(&spa_namespace_lock);
27276809eb4eSEric Schrock 	if ((spa = spa_lookup(pname)) != NULL) {
27286809eb4eSEric Schrock 		/*
27296809eb4eSEric Schrock 		 * Remove the existing root pool from the namespace so that we
27306809eb4eSEric Schrock 		 * can replace it with the correct config we just read in.
27316809eb4eSEric Schrock 		 */
27326809eb4eSEric Schrock 		spa_remove(spa);
27336809eb4eSEric Schrock 	}
27346809eb4eSEric Schrock 
2735468c413aSTim Haley 	spa = spa_add(pname, config, NULL);
27366809eb4eSEric Schrock 	spa->spa_is_root = B_TRUE;
2737bc758434SLin Ling 	spa->spa_load_verbatim = B_TRUE;
2738e7cbe64fSgw 
273921ecdf64SLin Ling 	/*
274021ecdf64SLin Ling 	 * Build up a vdev tree based on the boot device's label config.
274121ecdf64SLin Ling 	 */
274221ecdf64SLin Ling 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
274321ecdf64SLin Ling 	    &nvtop) == 0);
274421ecdf64SLin Ling 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
274521ecdf64SLin Ling 	error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
274621ecdf64SLin Ling 	    VDEV_ALLOC_ROOTPOOL);
274721ecdf64SLin Ling 	spa_config_exit(spa, SCL_ALL, FTAG);
274821ecdf64SLin Ling 	if (error) {
274921ecdf64SLin Ling 		mutex_exit(&spa_namespace_lock);
275021ecdf64SLin Ling 		nvlist_free(config);
275121ecdf64SLin Ling 		cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
275221ecdf64SLin Ling 		    pname);
275321ecdf64SLin Ling 		return (error);
275421ecdf64SLin Ling 	}
275521ecdf64SLin Ling 
275621ecdf64SLin Ling 	/*
275721ecdf64SLin Ling 	 * Get the boot vdev.
275821ecdf64SLin Ling 	 */
275921ecdf64SLin Ling 	if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
276021ecdf64SLin Ling 		cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu",
276121ecdf64SLin Ling 		    (u_longlong_t)guid);
276221ecdf64SLin Ling 		error = ENOENT;
276321ecdf64SLin Ling 		goto out;
276421ecdf64SLin Ling 	}
2765e7cbe64fSgw 
276621ecdf64SLin Ling 	/*
276721ecdf64SLin Ling 	 * Determine if there is a better boot device.
276821ecdf64SLin Ling 	 */
276921ecdf64SLin Ling 	avd = bvd;
277021ecdf64SLin Ling 	spa_alt_rootvdev(rvd, &avd, &txg);
277121ecdf64SLin Ling 	if (avd != bvd) {
277221ecdf64SLin Ling 		cmn_err(CE_NOTE, "The boot device is 'degraded'. Please "
277321ecdf64SLin Ling 		    "try booting from '%s'", avd->vdev_path);
277421ecdf64SLin Ling 		error = EINVAL;
277521ecdf64SLin Ling 		goto out;
277621ecdf64SLin Ling 	}
2777e7cbe64fSgw 
277821ecdf64SLin Ling 	/*
277921ecdf64SLin Ling 	 * If the boot device is part of a spare vdev then ensure that
278021ecdf64SLin Ling 	 * we're booting off the active spare.
278121ecdf64SLin Ling 	 */
278221ecdf64SLin Ling 	if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
278321ecdf64SLin Ling 	    !bvd->vdev_isspare) {
278421ecdf64SLin Ling 		cmn_err(CE_NOTE, "The boot device is currently spared. Please "
278521ecdf64SLin Ling 		    "try booting from '%s'",
278621ecdf64SLin Ling 		    bvd->vdev_parent->vdev_child[1]->vdev_path);
278721ecdf64SLin Ling 		error = EINVAL;
278821ecdf64SLin Ling 		goto out;
278921ecdf64SLin Ling 	}
279021ecdf64SLin Ling 
279121ecdf64SLin Ling 	error = 0;
2792c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_IMPORT);
279321ecdf64SLin Ling out:
279421ecdf64SLin Ling 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
279521ecdf64SLin Ling 	vdev_free(rvd);
279621ecdf64SLin Ling 	spa_config_exit(spa, SCL_ALL, FTAG);
279721ecdf64SLin Ling 	mutex_exit(&spa_namespace_lock);
279821ecdf64SLin Ling 
279921ecdf64SLin Ling 	nvlist_free(config);
2800e7cbe64fSgw 	return (error);
2801e7cbe64fSgw }
280221ecdf64SLin Ling 
2803e7cbe64fSgw #endif
2804e7cbe64fSgw 
2805e7cbe64fSgw /*
28066809eb4eSEric Schrock  * Take a pool and insert it into the namespace as if it had been loaded at
28076809eb4eSEric Schrock  * boot.
2808e7cbe64fSgw  */
2809e7cbe64fSgw int
28106809eb4eSEric Schrock spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props)
2811e7cbe64fSgw {
28126809eb4eSEric Schrock 	spa_t *spa;
2813468c413aSTim Haley 	zpool_rewind_policy_t policy;
28146809eb4eSEric Schrock 	char *altroot = NULL;
28156809eb4eSEric Schrock 
28166809eb4eSEric Schrock 	mutex_enter(&spa_namespace_lock);
28176809eb4eSEric Schrock 	if (spa_lookup(pool) != NULL) {
28186809eb4eSEric Schrock 		mutex_exit(&spa_namespace_lock);
28196809eb4eSEric Schrock 		return (EEXIST);
28206809eb4eSEric Schrock 	}
28216809eb4eSEric Schrock 
28226809eb4eSEric Schrock 	(void) nvlist_lookup_string(props,
28236809eb4eSEric Schrock 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
2824468c413aSTim Haley 	spa = spa_add(pool, config, altroot);
28256809eb4eSEric Schrock 
2826468c413aSTim Haley 	zpool_get_rewind_policy(config, &policy);
2827468c413aSTim Haley 	spa->spa_load_max_txg = policy.zrp_txg;
28284f0f5e5bSVictor Latushkin 
2829468c413aSTim Haley 	spa->spa_load_verbatim = B_TRUE;
28306809eb4eSEric Schrock 
28316809eb4eSEric Schrock 	if (props != NULL)
28326809eb4eSEric Schrock 		spa_configfile_set(spa, props, B_FALSE);
28336809eb4eSEric Schrock 
28346809eb4eSEric Schrock 	spa_config_sync(spa, B_FALSE, B_TRUE);
28356809eb4eSEric Schrock 
28366809eb4eSEric Schrock 	mutex_exit(&spa_namespace_lock);
2837c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_IMPORT);
28386809eb4eSEric Schrock 
28396809eb4eSEric Schrock 	return (0);
2840e7cbe64fSgw }
2841e7cbe64fSgw 
28426809eb4eSEric Schrock /*
28436809eb4eSEric Schrock  * Import a non-root pool into the system.
28446809eb4eSEric Schrock  */
2845c5904d13Seschrock int
28466809eb4eSEric Schrock spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
2847c5904d13Seschrock {
28486809eb4eSEric Schrock 	spa_t *spa;
28496809eb4eSEric Schrock 	char *altroot = NULL;
2850468c413aSTim Haley 	spa_load_state_t state = SPA_LOAD_IMPORT;
2851468c413aSTim Haley 	zpool_rewind_policy_t policy;
28526809eb4eSEric Schrock 	int error;
28536809eb4eSEric Schrock 	nvlist_t *nvroot;
28546809eb4eSEric Schrock 	nvlist_t **spares, **l2cache;
28556809eb4eSEric Schrock 	uint_t nspares, nl2cache;
28566809eb4eSEric Schrock 
28576809eb4eSEric Schrock 	/*
28586809eb4eSEric Schrock 	 * If a pool with this name exists, return failure.
28596809eb4eSEric Schrock 	 */
28606809eb4eSEric Schrock 	mutex_enter(&spa_namespace_lock);
28616809eb4eSEric Schrock 	if ((spa = spa_lookup(pool)) != NULL) {
28626809eb4eSEric Schrock 		mutex_exit(&spa_namespace_lock);
28636809eb4eSEric Schrock 		return (EEXIST);
28646809eb4eSEric Schrock 	}
28656809eb4eSEric Schrock 
2866468c413aSTim Haley 	zpool_get_rewind_policy(config, &policy);
2867468c413aSTim Haley 	if (policy.zrp_request & ZPOOL_DO_REWIND)
2868468c413aSTim Haley 		state = SPA_LOAD_RECOVER;
2869468c413aSTim Haley 
28706809eb4eSEric Schrock 	/*
28716809eb4eSEric Schrock 	 * Create and initialize the spa structure.
28726809eb4eSEric Schrock 	 */
28736809eb4eSEric Schrock 	(void) nvlist_lookup_string(props,
28746809eb4eSEric Schrock 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
2875468c413aSTim Haley 	spa = spa_add(pool, config, altroot);
28766809eb4eSEric Schrock 	spa_activate(spa, spa_mode_global);
28776809eb4eSEric Schrock 
287825f89ee2SJeff Bonwick 	/*
287925f89ee2SJeff Bonwick 	 * Don't start async tasks until we know everything is healthy.
288025f89ee2SJeff Bonwick 	 */
288125f89ee2SJeff Bonwick 	spa_async_suspend(spa);
288225f89ee2SJeff Bonwick 
28836809eb4eSEric Schrock 	/*
28846809eb4eSEric Schrock 	 * Pass off the heavy lifting to spa_load().  Pass TRUE for mosconfig
28856809eb4eSEric Schrock 	 * because the user-supplied config is actually the one to trust when
28866809eb4eSEric Schrock 	 * doing an import.
28876809eb4eSEric Schrock 	 */
2888468c413aSTim Haley 	if (state != SPA_LOAD_RECOVER)
2889468c413aSTim Haley 		spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
2890468c413aSTim Haley 	error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg,
2891468c413aSTim Haley 	    ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0));
2892468c413aSTim Haley 
2893468c413aSTim Haley 	/*
2894468c413aSTim Haley 	 * Propagate anything learned about failing or best txgs
2895468c413aSTim Haley 	 * back to caller
2896468c413aSTim Haley 	 */
2897468c413aSTim Haley 	spa_rewind_data_to_nvlist(spa, config);
28986809eb4eSEric Schrock 
28996809eb4eSEric Schrock 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
29006809eb4eSEric Schrock 	/*
29016809eb4eSEric Schrock 	 * Toss any existing sparelist, as it doesn't have any validity
29026809eb4eSEric Schrock 	 * anymore, and conflicts with spa_has_spare().
29036809eb4eSEric Schrock 	 */
29046809eb4eSEric Schrock 	if (spa->spa_spares.sav_config) {
29056809eb4eSEric Schrock 		nvlist_free(spa->spa_spares.sav_config);
29066809eb4eSEric Schrock 		spa->spa_spares.sav_config = NULL;
29076809eb4eSEric Schrock 		spa_load_spares(spa);
29086809eb4eSEric Schrock 	}
29096809eb4eSEric Schrock 	if (spa->spa_l2cache.sav_config) {
29106809eb4eSEric Schrock 		nvlist_free(spa->spa_l2cache.sav_config);
29116809eb4eSEric Schrock 		spa->spa_l2cache.sav_config = NULL;
29126809eb4eSEric Schrock 		spa_load_l2cache(spa);
29136809eb4eSEric Schrock 	}
29146809eb4eSEric Schrock 
29156809eb4eSEric Schrock 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
29166809eb4eSEric Schrock 	    &nvroot) == 0);
29176809eb4eSEric Schrock 	if (error == 0)
29186809eb4eSEric Schrock 		error = spa_validate_aux(spa, nvroot, -1ULL,
29196809eb4eSEric Schrock 		    VDEV_ALLOC_SPARE);
29206809eb4eSEric Schrock 	if (error == 0)
29216809eb4eSEric Schrock 		error = spa_validate_aux(spa, nvroot, -1ULL,
29226809eb4eSEric Schrock 		    VDEV_ALLOC_L2CACHE);
29236809eb4eSEric Schrock 	spa_config_exit(spa, SCL_ALL, FTAG);
29246809eb4eSEric Schrock 
29256809eb4eSEric Schrock 	if (props != NULL)
29266809eb4eSEric Schrock 		spa_configfile_set(spa, props, B_FALSE);
29276809eb4eSEric Schrock 
29286809eb4eSEric Schrock 	if (error != 0 || (props && spa_writeable(spa) &&
29296809eb4eSEric Schrock 	    (error = spa_prop_set(spa, props)))) {
29306809eb4eSEric Schrock 		spa_unload(spa);
29316809eb4eSEric Schrock 		spa_deactivate(spa);
29326809eb4eSEric Schrock 		spa_remove(spa);
29336809eb4eSEric Schrock 		mutex_exit(&spa_namespace_lock);
29346809eb4eSEric Schrock 		return (error);
29356809eb4eSEric Schrock 	}
29366809eb4eSEric Schrock 
293725f89ee2SJeff Bonwick 	spa_async_resume(spa);
293825f89ee2SJeff Bonwick 
29396809eb4eSEric Schrock 	/*
29406809eb4eSEric Schrock 	 * Override any spares and level 2 cache devices as specified by
29416809eb4eSEric Schrock 	 * the user, as these may have correct device names/devids, etc.
29426809eb4eSEric Schrock 	 */
29436809eb4eSEric Schrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
29446809eb4eSEric Schrock 	    &spares, &nspares) == 0) {
29456809eb4eSEric Schrock 		if (spa->spa_spares.sav_config)
29466809eb4eSEric Schrock 			VERIFY(nvlist_remove(spa->spa_spares.sav_config,
29476809eb4eSEric Schrock 			    ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
29486809eb4eSEric Schrock 		else
29496809eb4eSEric Schrock 			VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
29506809eb4eSEric Schrock 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
29516809eb4eSEric Schrock 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
29526809eb4eSEric Schrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
29536809eb4eSEric Schrock 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
29546809eb4eSEric Schrock 		spa_load_spares(spa);
29556809eb4eSEric Schrock 		spa_config_exit(spa, SCL_ALL, FTAG);
29566809eb4eSEric Schrock 		spa->spa_spares.sav_sync = B_TRUE;
29576809eb4eSEric Schrock 	}
29586809eb4eSEric Schrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
29596809eb4eSEric Schrock 	    &l2cache, &nl2cache) == 0) {
29606809eb4eSEric Schrock 		if (spa->spa_l2cache.sav_config)
29616809eb4eSEric Schrock 			VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
29626809eb4eSEric Schrock 			    ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
29636809eb4eSEric Schrock 		else
29646809eb4eSEric Schrock 			VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
29656809eb4eSEric Schrock 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
29666809eb4eSEric Schrock 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
29676809eb4eSEric Schrock 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
29686809eb4eSEric Schrock 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
29696809eb4eSEric Schrock 		spa_load_l2cache(spa);
29706809eb4eSEric Schrock 		spa_config_exit(spa, SCL_ALL, FTAG);
29716809eb4eSEric Schrock 		spa->spa_l2cache.sav_sync = B_TRUE;
29726809eb4eSEric Schrock 	}
29736809eb4eSEric Schrock 
2974b693757aSEric Schrock 	/*
2975b693757aSEric Schrock 	 * Check for any removed devices.
2976b693757aSEric Schrock 	 */
2977b693757aSEric Schrock 	if (spa->spa_autoreplace) {
2978b693757aSEric Schrock 		spa_aux_check_removed(&spa->spa_spares);
2979b693757aSEric Schrock 		spa_aux_check_removed(&spa->spa_l2cache);
2980b693757aSEric Schrock 	}
2981b693757aSEric Schrock 
29826809eb4eSEric Schrock 	if (spa_writeable(spa)) {
29836809eb4eSEric Schrock 		/*
29846809eb4eSEric Schrock 		 * Update the config cache to include the newly-imported pool.
29856809eb4eSEric Schrock 		 */
2986bc758434SLin Ling 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
29876809eb4eSEric Schrock 	}
29886809eb4eSEric Schrock 
2989573ca77eSGeorge Wilson 	/*
2990573ca77eSGeorge Wilson 	 * It's possible that the pool was expanded while it was exported.
2991573ca77eSGeorge Wilson 	 * We kick off an async task to handle this for us.
2992573ca77eSGeorge Wilson 	 */
2993573ca77eSGeorge Wilson 	spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
2994573ca77eSGeorge Wilson 
29956809eb4eSEric Schrock 	mutex_exit(&spa_namespace_lock);
2996c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_IMPORT);
29976809eb4eSEric Schrock 
29986809eb4eSEric Schrock 	return (0);
2999c5904d13Seschrock }
3000c5904d13Seschrock 
3001c5904d13Seschrock 
3002fa9e4066Sahrens /*
3003fa9e4066Sahrens  * This (illegal) pool name is used when temporarily importing a spa_t in order
3004fa9e4066Sahrens  * to get the vdev stats associated with the imported devices.
3005fa9e4066Sahrens  */
3006fa9e4066Sahrens #define	TRYIMPORT_NAME	"$import"
3007fa9e4066Sahrens 
3008fa9e4066Sahrens nvlist_t *
3009fa9e4066Sahrens spa_tryimport(nvlist_t *tryconfig)
3010fa9e4066Sahrens {
3011fa9e4066Sahrens 	nvlist_t *config = NULL;
3012fa9e4066Sahrens 	char *poolname;
3013fa9e4066Sahrens 	spa_t *spa;
3014fa9e4066Sahrens 	uint64_t state;
30157b7154beSLin Ling 	int error;
3016fa9e4066Sahrens 
3017fa9e4066Sahrens 	if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname))
3018fa9e4066Sahrens 		return (NULL);
3019fa9e4066Sahrens 
3020fa9e4066Sahrens 	if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state))
3021fa9e4066Sahrens 		return (NULL);
3022fa9e4066Sahrens 
3023fa9e4066Sahrens 	/*
30240373e76bSbonwick 	 * Create and initialize the spa structure.
3025fa9e4066Sahrens 	 */
30260373e76bSbonwick 	mutex_enter(&spa_namespace_lock);
3027468c413aSTim Haley 	spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL);
30288ad4d6ddSJeff Bonwick 	spa_activate(spa, FREAD);
3029fa9e4066Sahrens 
3030fa9e4066Sahrens 	/*
30310373e76bSbonwick 	 * Pass off the heavy lifting to spa_load().
3032ecc2d604Sbonwick 	 * Pass TRUE for mosconfig because the user-supplied config
3033ecc2d604Sbonwick 	 * is actually the one to trust when doing an import.
3034fa9e4066Sahrens 	 */
3035468c413aSTim Haley 	error = spa_load(spa, SPA_LOAD_TRYIMPORT, B_TRUE);
3036fa9e4066Sahrens 
3037fa9e4066Sahrens 	/*
3038fa9e4066Sahrens 	 * If 'tryconfig' was at least parsable, return the current config.
3039fa9e4066Sahrens 	 */
3040fa9e4066Sahrens 	if (spa->spa_root_vdev != NULL) {
3041fa9e4066Sahrens 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
3042fa9e4066Sahrens 		VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
3043fa9e4066Sahrens 		    poolname) == 0);
3044fa9e4066Sahrens 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
3045fa9e4066Sahrens 		    state) == 0);
304695173954Sek 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
304795173954Sek 		    spa->spa_uberblock.ub_timestamp) == 0);
304899653d4eSeschrock 
3049e7cbe64fSgw 		/*
3050e7cbe64fSgw 		 * If the bootfs property exists on this pool then we
3051e7cbe64fSgw 		 * copy it out so that external consumers can tell which
3052e7cbe64fSgw 		 * pools are bootable.
3053e7cbe64fSgw 		 */
30547b7154beSLin Ling 		if ((!error || error == EEXIST) && spa->spa_bootfs) {
3055e7cbe64fSgw 			char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3056e7cbe64fSgw 
3057e7cbe64fSgw 			/*
3058e7cbe64fSgw 			 * We have to play games with the name since the
3059e7cbe64fSgw 			 * pool was opened as TRYIMPORT_NAME.
3060e7cbe64fSgw 			 */
3061e14bb325SJeff Bonwick 			if (dsl_dsobj_to_dsname(spa_name(spa),
3062e7cbe64fSgw 			    spa->spa_bootfs, tmpname) == 0) {
3063e7cbe64fSgw 				char *cp;
3064e7cbe64fSgw 				char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3065e7cbe64fSgw 
3066e7cbe64fSgw 				cp = strchr(tmpname, '/');
3067e7cbe64fSgw 				if (cp == NULL) {
3068e7cbe64fSgw 					(void) strlcpy(dsname, tmpname,
3069e7cbe64fSgw 					    MAXPATHLEN);
3070e7cbe64fSgw 				} else {
3071e7cbe64fSgw 					(void) snprintf(dsname, MAXPATHLEN,
3072e7cbe64fSgw 					    "%s/%s", poolname, ++cp);
3073e7cbe64fSgw 				}
3074e7cbe64fSgw 				VERIFY(nvlist_add_string(config,
3075e7cbe64fSgw 				    ZPOOL_CONFIG_BOOTFS, dsname) == 0);
3076e7cbe64fSgw 				kmem_free(dsname, MAXPATHLEN);
3077e7cbe64fSgw 			}
3078e7cbe64fSgw 			kmem_free(tmpname, MAXPATHLEN);
3079e7cbe64fSgw 		}
3080e7cbe64fSgw 
308199653d4eSeschrock 		/*
3082fa94a07fSbrendan 		 * Add the list of hot spares and level 2 cache devices.
308399653d4eSeschrock 		 */
30846809eb4eSEric Schrock 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
308599653d4eSeschrock 		spa_add_spares(spa, config);
3086fa94a07fSbrendan 		spa_add_l2cache(spa, config);
30876809eb4eSEric Schrock 		spa_config_exit(spa, SCL_CONFIG, FTAG);
3088fa9e4066Sahrens 	}
3089fa9e4066Sahrens 
3090fa9e4066Sahrens 	spa_unload(spa);
3091fa9e4066Sahrens 	spa_deactivate(spa);
3092fa9e4066Sahrens 	spa_remove(spa);
3093fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
3094fa9e4066Sahrens 
3095fa9e4066Sahrens 	return (config);
3096fa9e4066Sahrens }
3097fa9e4066Sahrens 
3098fa9e4066Sahrens /*
3099fa9e4066Sahrens  * Pool export/destroy
3100fa9e4066Sahrens  *
3101fa9e4066Sahrens  * The act of destroying or exporting a pool is very simple.  We make sure there
3102fa9e4066Sahrens  * is no more pending I/O and any references to the pool are gone.  Then, we
3103fa9e4066Sahrens  * update the pool state and sync all the labels to disk, removing the
3104394ab0cbSGeorge Wilson  * configuration from the cache afterwards. If the 'hardforce' flag is set, then
3105394ab0cbSGeorge Wilson  * we don't sync the labels or remove the configuration cache.
3106fa9e4066Sahrens  */
3107fa9e4066Sahrens static int
310889a89ebfSlling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
3109394ab0cbSGeorge Wilson     boolean_t force, boolean_t hardforce)
3110fa9e4066Sahrens {
3111fa9e4066Sahrens 	spa_t *spa;
3112fa9e4066Sahrens 
311344cd46caSbillm 	if (oldconfig)
311444cd46caSbillm 		*oldconfig = NULL;
311544cd46caSbillm 
31168ad4d6ddSJeff Bonwick 	if (!(spa_mode_global & FWRITE))
3117fa9e4066Sahrens 		return (EROFS);
3118fa9e4066Sahrens 
3119fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
3120fa9e4066Sahrens 	if ((spa = spa_lookup(pool)) == NULL) {
3121fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
3122fa9e4066Sahrens 		return (ENOENT);
3123fa9e4066Sahrens 	}
3124fa9e4066Sahrens 
3125ea8dc4b6Seschrock 	/*
3126ea8dc4b6Seschrock 	 * Put a hold on the pool, drop the namespace lock, stop async tasks,
3127ea8dc4b6Seschrock 	 * reacquire the namespace lock, and see if we can export.
3128ea8dc4b6Seschrock 	 */
3129ea8dc4b6Seschrock 	spa_open_ref(spa, FTAG);
3130ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
3131ea8dc4b6Seschrock 	spa_async_suspend(spa);
3132ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
3133ea8dc4b6Seschrock 	spa_close(spa, FTAG);
3134ea8dc4b6Seschrock 
3135fa9e4066Sahrens 	/*
3136fa9e4066Sahrens 	 * The pool will be in core if it's openable,
3137fa9e4066Sahrens 	 * in which case we can modify its state.
3138fa9e4066Sahrens 	 */
3139fa9e4066Sahrens 	if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) {
3140fa9e4066Sahrens 		/*
3141fa9e4066Sahrens 		 * Objsets may be open only because they're dirty, so we
3142fa9e4066Sahrens 		 * have to force it to sync before checking spa_refcnt.
3143fa9e4066Sahrens 		 */
3144fa9e4066Sahrens 		txg_wait_synced(spa->spa_dsl_pool, 0);
3145fa9e4066Sahrens 
3146ea8dc4b6Seschrock 		/*
3147ea8dc4b6Seschrock 		 * A pool cannot be exported or destroyed if there are active
3148ea8dc4b6Seschrock 		 * references.  If we are resetting a pool, allow references by
3149ea8dc4b6Seschrock 		 * fault injection handlers.
3150ea8dc4b6Seschrock 		 */
3151ea8dc4b6Seschrock 		if (!spa_refcount_zero(spa) ||
3152ea8dc4b6Seschrock 		    (spa->spa_inject_ref != 0 &&
3153ea8dc4b6Seschrock 		    new_state != POOL_STATE_UNINITIALIZED)) {
3154ea8dc4b6Seschrock 			spa_async_resume(spa);
3155fa9e4066Sahrens 			mutex_exit(&spa_namespace_lock);
3156fa9e4066Sahrens 			return (EBUSY);
3157fa9e4066Sahrens 		}
3158fa9e4066Sahrens 
315989a89ebfSlling 		/*
316089a89ebfSlling 		 * A pool cannot be exported if it has an active shared spare.
316189a89ebfSlling 		 * This is to prevent other pools stealing the active spare
316289a89ebfSlling 		 * from an exported pool. At user's own will, such pool can
316389a89ebfSlling 		 * be forcedly exported.
316489a89ebfSlling 		 */
316589a89ebfSlling 		if (!force && new_state == POOL_STATE_EXPORTED &&
316689a89ebfSlling 		    spa_has_active_shared_spare(spa)) {
316789a89ebfSlling 			spa_async_resume(spa);
316889a89ebfSlling 			mutex_exit(&spa_namespace_lock);
316989a89ebfSlling 			return (EXDEV);
317089a89ebfSlling 		}
317189a89ebfSlling 
3172fa9e4066Sahrens 		/*
3173fa9e4066Sahrens 		 * We want this to be reflected on every label,
3174fa9e4066Sahrens 		 * so mark them all dirty.  spa_unload() will do the
3175fa9e4066Sahrens 		 * final sync that pushes these changes out.
3176fa9e4066Sahrens 		 */
3177394ab0cbSGeorge Wilson 		if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
3178e14bb325SJeff Bonwick 			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
3179ea8dc4b6Seschrock 			spa->spa_state = new_state;
31800373e76bSbonwick 			spa->spa_final_txg = spa_last_synced_txg(spa) + 1;
3181ea8dc4b6Seschrock 			vdev_config_dirty(spa->spa_root_vdev);
3182e14bb325SJeff Bonwick 			spa_config_exit(spa, SCL_ALL, FTAG);
3183ea8dc4b6Seschrock 		}
3184fa9e4066Sahrens 	}
3185fa9e4066Sahrens 
31863d7072f8Seschrock 	spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY);
31873d7072f8Seschrock 
3188fa9e4066Sahrens 	if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
3189fa9e4066Sahrens 		spa_unload(spa);
3190fa9e4066Sahrens 		spa_deactivate(spa);
3191fa9e4066Sahrens 	}
3192fa9e4066Sahrens 
319344cd46caSbillm 	if (oldconfig && spa->spa_config)
319444cd46caSbillm 		VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0);
319544cd46caSbillm 
3196ea8dc4b6Seschrock 	if (new_state != POOL_STATE_UNINITIALIZED) {
3197394ab0cbSGeorge Wilson 		if (!hardforce)
3198394ab0cbSGeorge Wilson 			spa_config_sync(spa, B_TRUE, B_TRUE);
3199ea8dc4b6Seschrock 		spa_remove(spa);
3200ea8dc4b6Seschrock 	}
3201fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
3202fa9e4066Sahrens 
3203fa9e4066Sahrens 	return (0);
3204fa9e4066Sahrens }
3205fa9e4066Sahrens 
3206fa9e4066Sahrens /*
3207fa9e4066Sahrens  * Destroy a storage pool.
3208fa9e4066Sahrens  */
3209fa9e4066Sahrens int
3210fa9e4066Sahrens spa_destroy(char *pool)
3211fa9e4066Sahrens {
3212394ab0cbSGeorge Wilson 	return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL,
3213394ab0cbSGeorge Wilson 	    B_FALSE, B_FALSE));
3214fa9e4066Sahrens }
3215fa9e4066Sahrens 
3216fa9e4066Sahrens /*
3217fa9e4066Sahrens  * Export a storage pool.
3218fa9e4066Sahrens  */
3219fa9e4066Sahrens int
3220394ab0cbSGeorge Wilson spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
3221394ab0cbSGeorge Wilson     boolean_t hardforce)
3222fa9e4066Sahrens {
3223394ab0cbSGeorge Wilson 	return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig,
3224394ab0cbSGeorge Wilson 	    force, hardforce));
3225fa9e4066Sahrens }
3226fa9e4066Sahrens 
3227ea8dc4b6Seschrock /*
3228ea8dc4b6Seschrock  * Similar to spa_export(), this unloads the spa_t without actually removing it
3229ea8dc4b6Seschrock  * from the namespace in any way.
3230ea8dc4b6Seschrock  */
3231ea8dc4b6Seschrock int
3232ea8dc4b6Seschrock spa_reset(char *pool)
3233ea8dc4b6Seschrock {
323489a89ebfSlling 	return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL,
3235394ab0cbSGeorge Wilson 	    B_FALSE, B_FALSE));
3236ea8dc4b6Seschrock }
3237ea8dc4b6Seschrock 
3238fa9e4066Sahrens /*
3239fa9e4066Sahrens  * ==========================================================================
3240fa9e4066Sahrens  * Device manipulation
3241fa9e4066Sahrens  * ==========================================================================
3242fa9e4066Sahrens  */
3243fa9e4066Sahrens 
3244fa9e4066Sahrens /*
32458654d025Sperrin  * Add a device to a storage pool.
3246fa9e4066Sahrens  */
3247fa9e4066Sahrens int
3248fa9e4066Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
3249fa9e4066Sahrens {
325088ecc943SGeorge Wilson 	uint64_t txg, id;
32518ad4d6ddSJeff Bonwick 	int error;
3252fa9e4066Sahrens 	vdev_t *rvd = spa->spa_root_vdev;
32530e34b6a7Sbonwick 	vdev_t *vd, *tvd;
3254fa94a07fSbrendan 	nvlist_t **spares, **l2cache;
3255fa94a07fSbrendan 	uint_t nspares, nl2cache;
3256fa9e4066Sahrens 
3257fa9e4066Sahrens 	txg = spa_vdev_enter(spa);
3258fa9e4066Sahrens 
325999653d4eSeschrock 	if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0,
326099653d4eSeschrock 	    VDEV_ALLOC_ADD)) != 0)
326199653d4eSeschrock 		return (spa_vdev_exit(spa, NULL, txg, error));
3262fa9e4066Sahrens 
3263e14bb325SJeff Bonwick 	spa->spa_pending_vdev = vd;	/* spa_vdev_exit() will clear this */
326499653d4eSeschrock 
3265fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
3266fa94a07fSbrendan 	    &nspares) != 0)
326799653d4eSeschrock 		nspares = 0;
326899653d4eSeschrock 
3269fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache,
3270fa94a07fSbrendan 	    &nl2cache) != 0)
3271fa94a07fSbrendan 		nl2cache = 0;
3272fa94a07fSbrendan 
3273e14bb325SJeff Bonwick 	if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0)
3274fa9e4066Sahrens 		return (spa_vdev_exit(spa, vd, txg, EINVAL));
3275fa9e4066Sahrens 
3276e14bb325SJeff Bonwick 	if (vd->vdev_children != 0 &&
3277e14bb325SJeff Bonwick 	    (error = vdev_create(vd, txg, B_FALSE)) != 0)
3278e14bb325SJeff Bonwick 		return (spa_vdev_exit(spa, vd, txg, error));
327999653d4eSeschrock 
328039c23413Seschrock 	/*
3281fa94a07fSbrendan 	 * We must validate the spares and l2cache devices after checking the
3282fa94a07fSbrendan 	 * children.  Otherwise, vdev_inuse() will blindly overwrite the spare.
328339c23413Seschrock 	 */
3284e14bb325SJeff Bonwick 	if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0)
328539c23413Seschrock 		return (spa_vdev_exit(spa, vd, txg, error));
328639c23413Seschrock 
328739c23413Seschrock 	/*
328839c23413Seschrock 	 * Transfer each new top-level vdev from vd to rvd.
328939c23413Seschrock 	 */
32908ad4d6ddSJeff Bonwick 	for (int c = 0; c < vd->vdev_children; c++) {
329188ecc943SGeorge Wilson 
329288ecc943SGeorge Wilson 		/*
329388ecc943SGeorge Wilson 		 * Set the vdev id to the first hole, if one exists.
329488ecc943SGeorge Wilson 		 */
329588ecc943SGeorge Wilson 		for (id = 0; id < rvd->vdev_children; id++) {
329688ecc943SGeorge Wilson 			if (rvd->vdev_child[id]->vdev_ishole) {
329788ecc943SGeorge Wilson 				vdev_free(rvd->vdev_child[id]);
329888ecc943SGeorge Wilson 				break;
329988ecc943SGeorge Wilson 			}
330088ecc943SGeorge Wilson 		}
330139c23413Seschrock 		tvd = vd->vdev_child[c];
330239c23413Seschrock 		vdev_remove_child(vd, tvd);
330388ecc943SGeorge Wilson 		tvd->vdev_id = id;
330439c23413Seschrock 		vdev_add_child(rvd, tvd);
330539c23413Seschrock 		vdev_config_dirty(tvd);
330639c23413Seschrock 	}
330739c23413Seschrock 
330899653d4eSeschrock 	if (nspares != 0) {
3309fa94a07fSbrendan 		spa_set_aux_vdevs(&spa->spa_spares, spares, nspares,
3310fa94a07fSbrendan 		    ZPOOL_CONFIG_SPARES);
331199653d4eSeschrock 		spa_load_spares(spa);
3312fa94a07fSbrendan 		spa->spa_spares.sav_sync = B_TRUE;
3313fa94a07fSbrendan 	}
3314fa94a07fSbrendan 
3315fa94a07fSbrendan 	if (nl2cache != 0) {
3316fa94a07fSbrendan 		spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache,
3317fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE);
3318fa94a07fSbrendan 		spa_load_l2cache(spa);
3319fa94a07fSbrendan 		spa->spa_l2cache.sav_sync = B_TRUE;
3320fa9e4066Sahrens 	}
3321fa9e4066Sahrens 
3322fa9e4066Sahrens 	/*
33230e34b6a7Sbonwick 	 * We have to be careful when adding new vdevs to an existing pool.
33240e34b6a7Sbonwick 	 * If other threads start allocating from these vdevs before we
33250e34b6a7Sbonwick 	 * sync the config cache, and we lose power, then upon reboot we may
33260e34b6a7Sbonwick 	 * fail to open the pool because there are DVAs that the config cache
33270e34b6a7Sbonwick 	 * can't translate.  Therefore, we first add the vdevs without
33280e34b6a7Sbonwick 	 * initializing metaslabs; sync the config cache (via spa_vdev_exit());
33290373e76bSbonwick 	 * and then let spa_config_update() initialize the new metaslabs.
33300e34b6a7Sbonwick 	 *
33310e34b6a7Sbonwick 	 * spa_load() checks for added-but-not-initialized vdevs, so that
33320e34b6a7Sbonwick 	 * if we lose power at any point in this sequence, the remaining
33330e34b6a7Sbonwick 	 * steps will be completed the next time we load the pool.
33340e34b6a7Sbonwick 	 */
33350373e76bSbonwick 	(void) spa_vdev_exit(spa, vd, txg, 0);
33360e34b6a7Sbonwick 
33370373e76bSbonwick 	mutex_enter(&spa_namespace_lock);
33380373e76bSbonwick 	spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
33390373e76bSbonwick 	mutex_exit(&spa_namespace_lock);
3340fa9e4066Sahrens 
33410373e76bSbonwick 	return (0);
3342fa9e4066Sahrens }
3343fa9e4066Sahrens 
3344fa9e4066Sahrens /*
3345fa9e4066Sahrens  * Attach a device to a mirror.  The arguments are the path to any device
3346fa9e4066Sahrens  * in the mirror, and the nvroot for the new device.  If the path specifies
3347fa9e4066Sahrens  * a device that is not mirrored, we automatically insert the mirror vdev.
3348fa9e4066Sahrens  *
3349fa9e4066Sahrens  * If 'replacing' is specified, the new device is intended to replace the
3350fa9e4066Sahrens  * existing device; in this case the two devices are made into their own
33513d7072f8Seschrock  * mirror using the 'replacing' vdev, which is functionally identical to
3352fa9e4066Sahrens  * the mirror vdev (it actually reuses all the same ops) but has a few
3353fa9e4066Sahrens  * extra rules: you can't attach to it after it's been created, and upon
3354fa9e4066Sahrens  * completion of resilvering, the first disk (the one being replaced)
3355fa9e4066Sahrens  * is automatically detached.
3356fa9e4066Sahrens  */
3357fa9e4066Sahrens int
3358ea8dc4b6Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
3359fa9e4066Sahrens {
3360fa9e4066Sahrens 	uint64_t txg, open_txg;
3361fa9e4066Sahrens 	vdev_t *rvd = spa->spa_root_vdev;
3362fa9e4066Sahrens 	vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
336399653d4eSeschrock 	vdev_ops_t *pvops;
33649b3f6b42SEric Kustarz 	char *oldvdpath, *newvdpath;
33659b3f6b42SEric Kustarz 	int newvd_isspare;
33669b3f6b42SEric Kustarz 	int error;
3367fa9e4066Sahrens 
3368fa9e4066Sahrens 	txg = spa_vdev_enter(spa);
3369fa9e4066Sahrens 
3370c5904d13Seschrock 	oldvd = spa_lookup_by_guid(spa, guid, B_FALSE);
3371fa9e4066Sahrens 
3372fa9e4066Sahrens 	if (oldvd == NULL)
3373fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
3374fa9e4066Sahrens 
33750e34b6a7Sbonwick 	if (!oldvd->vdev_ops->vdev_op_leaf)
33760e34b6a7Sbonwick 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
33770e34b6a7Sbonwick 
3378fa9e4066Sahrens 	pvd = oldvd->vdev_parent;
3379fa9e4066Sahrens 
338099653d4eSeschrock 	if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
33813d7072f8Seschrock 	    VDEV_ALLOC_ADD)) != 0)
33823d7072f8Seschrock 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
33833d7072f8Seschrock 
33843d7072f8Seschrock 	if (newrootvd->vdev_children != 1)
3385fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
3386fa9e4066Sahrens 
3387fa9e4066Sahrens 	newvd = newrootvd->vdev_child[0];
3388fa9e4066Sahrens 
3389fa9e4066Sahrens 	if (!newvd->vdev_ops->vdev_op_leaf)
3390fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
3391fa9e4066Sahrens 
339299653d4eSeschrock 	if ((error = vdev_create(newrootvd, txg, replacing)) != 0)
3393fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, error));
3394fa9e4066Sahrens 
33958654d025Sperrin 	/*
33968654d025Sperrin 	 * Spares can't replace logs
33978654d025Sperrin 	 */
3398ee0eb9f2SEric Schrock 	if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
33998654d025Sperrin 		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
34008654d025Sperrin 
340199653d4eSeschrock 	if (!replacing) {
340299653d4eSeschrock 		/*
340399653d4eSeschrock 		 * For attach, the only allowable parent is a mirror or the root
340499653d4eSeschrock 		 * vdev.
340599653d4eSeschrock 		 */
340699653d4eSeschrock 		if (pvd->vdev_ops != &vdev_mirror_ops &&
340799653d4eSeschrock 		    pvd->vdev_ops != &vdev_root_ops)
340899653d4eSeschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
340999653d4eSeschrock 
341099653d4eSeschrock 		pvops = &vdev_mirror_ops;
341199653d4eSeschrock 	} else {
341299653d4eSeschrock 		/*
341399653d4eSeschrock 		 * Active hot spares can only be replaced by inactive hot
341499653d4eSeschrock 		 * spares.
341599653d4eSeschrock 		 */
341699653d4eSeschrock 		if (pvd->vdev_ops == &vdev_spare_ops &&
341799653d4eSeschrock 		    pvd->vdev_child[1] == oldvd &&
341899653d4eSeschrock 		    !spa_has_spare(spa, newvd->vdev_guid))
341999653d4eSeschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
342099653d4eSeschrock 
342199653d4eSeschrock 		/*
342299653d4eSeschrock 		 * If the source is a hot spare, and the parent isn't already a
342399653d4eSeschrock 		 * spare, then we want to create a new hot spare.  Otherwise, we
342439c23413Seschrock 		 * want to create a replacing vdev.  The user is not allowed to
342539c23413Seschrock 		 * attach to a spared vdev child unless the 'isspare' state is
342639c23413Seschrock 		 * the same (spare replaces spare, non-spare replaces
342739c23413Seschrock 		 * non-spare).
342899653d4eSeschrock 		 */
342999653d4eSeschrock 		if (pvd->vdev_ops == &vdev_replacing_ops)
343099653d4eSeschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
343139c23413Seschrock 		else if (pvd->vdev_ops == &vdev_spare_ops &&
343239c23413Seschrock 		    newvd->vdev_isspare != oldvd->vdev_isspare)
343339c23413Seschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
343499653d4eSeschrock 		else if (pvd->vdev_ops != &vdev_spare_ops &&
343599653d4eSeschrock 		    newvd->vdev_isspare)
343699653d4eSeschrock 			pvops = &vdev_spare_ops;
343799653d4eSeschrock 		else
343899653d4eSeschrock 			pvops = &vdev_replacing_ops;
343999653d4eSeschrock 	}
344099653d4eSeschrock 
34412a79c5feSlling 	/*
3442573ca77eSGeorge Wilson 	 * Make sure the new device is big enough.
34432a79c5feSlling 	 */
3444573ca77eSGeorge Wilson 	if (newvd->vdev_asize < vdev_get_min_asize(oldvd))
3445fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
3446fa9e4066Sahrens 
3447ecc2d604Sbonwick 	/*
3448ecc2d604Sbonwick 	 * The new device cannot have a higher alignment requirement
3449ecc2d604Sbonwick 	 * than the top-level vdev.
3450ecc2d604Sbonwick 	 */
3451ecc2d604Sbonwick 	if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
3452fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
3453fa9e4066Sahrens 
3454fa9e4066Sahrens 	/*
3455fa9e4066Sahrens 	 * If this is an in-place replacement, update oldvd's path and devid
3456fa9e4066Sahrens 	 * to make it distinguishable from newvd, and unopenable from now on.
3457fa9e4066Sahrens 	 */
3458fa9e4066Sahrens 	if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) {
3459fa9e4066Sahrens 		spa_strfree(oldvd->vdev_path);
3460fa9e4066Sahrens 		oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5,
3461fa9e4066Sahrens 		    KM_SLEEP);
3462fa9e4066Sahrens 		(void) sprintf(oldvd->vdev_path, "%s/%s",
3463fa9e4066Sahrens 		    newvd->vdev_path, "old");
3464fa9e4066Sahrens 		if (oldvd->vdev_devid != NULL) {
3465fa9e4066Sahrens 			spa_strfree(oldvd->vdev_devid);
3466fa9e4066Sahrens 			oldvd->vdev_devid = NULL;
3467fa9e4066Sahrens 		}
3468fa9e4066Sahrens 	}
3469fa9e4066Sahrens 
3470fa9e4066Sahrens 	/*
347199653d4eSeschrock 	 * If the parent is not a mirror, or if we're replacing, insert the new
347299653d4eSeschrock 	 * mirror/replacing/spare vdev above oldvd.
3473fa9e4066Sahrens 	 */
3474fa9e4066Sahrens 	if (pvd->vdev_ops != pvops)
3475fa9e4066Sahrens 		pvd = vdev_add_parent(oldvd, pvops);
3476fa9e4066Sahrens 
3477fa9e4066Sahrens 	ASSERT(pvd->vdev_top->vdev_parent == rvd);
3478fa9e4066Sahrens 	ASSERT(pvd->vdev_ops == pvops);
3479fa9e4066Sahrens 	ASSERT(oldvd->vdev_parent == pvd);
3480fa9e4066Sahrens 
3481fa9e4066Sahrens 	/*
3482fa9e4066Sahrens 	 * Extract the new device from its root and add it to pvd.
3483fa9e4066Sahrens 	 */
3484fa9e4066Sahrens 	vdev_remove_child(newrootvd, newvd);
3485fa9e4066Sahrens 	newvd->vdev_id = pvd->vdev_children;
348688ecc943SGeorge Wilson 	newvd->vdev_crtxg = oldvd->vdev_crtxg;
3487fa9e4066Sahrens 	vdev_add_child(pvd, newvd);
3488fa9e4066Sahrens 
3489fa9e4066Sahrens 	tvd = newvd->vdev_top;
3490fa9e4066Sahrens 	ASSERT(pvd->vdev_top == tvd);
3491fa9e4066Sahrens 	ASSERT(tvd->vdev_parent == rvd);
3492fa9e4066Sahrens 
3493fa9e4066Sahrens 	vdev_config_dirty(tvd);
3494fa9e4066Sahrens 
3495fa9e4066Sahrens 	/*
3496fa9e4066Sahrens 	 * Set newvd's DTL to [TXG_INITIAL, open_txg].  It will propagate
3497fa9e4066Sahrens 	 * upward when spa_vdev_exit() calls vdev_dtl_reassess().
3498fa9e4066Sahrens 	 */
3499fa9e4066Sahrens 	open_txg = txg + TXG_CONCURRENT_STATES - 1;
3500fa9e4066Sahrens 
35018ad4d6ddSJeff Bonwick 	vdev_dtl_dirty(newvd, DTL_MISSING,
35028ad4d6ddSJeff Bonwick 	    TXG_INITIAL, open_txg - TXG_INITIAL + 1);
3503fa9e4066Sahrens 
35046809eb4eSEric Schrock 	if (newvd->vdev_isspare) {
350539c23413Seschrock 		spa_spare_activate(newvd);
35066809eb4eSEric Schrock 		spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE);
35076809eb4eSEric Schrock 	}
35086809eb4eSEric Schrock 
3509e14bb325SJeff Bonwick 	oldvdpath = spa_strdup(oldvd->vdev_path);
3510e14bb325SJeff Bonwick 	newvdpath = spa_strdup(newvd->vdev_path);
35119b3f6b42SEric Kustarz 	newvd_isspare = newvd->vdev_isspare;
3512ea8dc4b6Seschrock 
3513fa9e4066Sahrens 	/*
3514fa9e4066Sahrens 	 * Mark newvd's DTL dirty in this txg.
3515fa9e4066Sahrens 	 */
3516ecc2d604Sbonwick 	vdev_dirty(tvd, VDD_DTL, newvd, txg);
3517fa9e4066Sahrens 
3518fa9e4066Sahrens 	(void) spa_vdev_exit(spa, newrootvd, open_txg, 0);
3519fa9e4066Sahrens 
3520c8e1f6d2SMark J Musante 	spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL,
3521c8e1f6d2SMark J Musante 	    CRED(),  "%s vdev=%s %s vdev=%s",
3522c8e1f6d2SMark J Musante 	    replacing && newvd_isspare ? "spare in" :
3523c8e1f6d2SMark J Musante 	    replacing ? "replace" : "attach", newvdpath,
3524c8e1f6d2SMark J Musante 	    replacing ? "for" : "to", oldvdpath);
35259b3f6b42SEric Kustarz 
35269b3f6b42SEric Kustarz 	spa_strfree(oldvdpath);
35279b3f6b42SEric Kustarz 	spa_strfree(newvdpath);
35289b3f6b42SEric Kustarz 
3529fa9e4066Sahrens 	/*
3530088f3894Sahrens 	 * Kick off a resilver to update newvd.
3531fa9e4066Sahrens 	 */
3532088f3894Sahrens 	VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0);
3533fa9e4066Sahrens 
3534fa9e4066Sahrens 	return (0);
3535fa9e4066Sahrens }
3536fa9e4066Sahrens 
3537fa9e4066Sahrens /*
3538fa9e4066Sahrens  * Detach a device from a mirror or replacing vdev.
3539fa9e4066Sahrens  * If 'replace_done' is specified, only detach if the parent
3540fa9e4066Sahrens  * is a replacing vdev.
3541fa9e4066Sahrens  */
3542fa9e4066Sahrens int
35438ad4d6ddSJeff Bonwick spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
3544fa9e4066Sahrens {
3545fa9e4066Sahrens 	uint64_t txg;
35468ad4d6ddSJeff Bonwick 	int error;
3547fa9e4066Sahrens 	vdev_t *rvd = spa->spa_root_vdev;
3548fa9e4066Sahrens 	vdev_t *vd, *pvd, *cvd, *tvd;
354999653d4eSeschrock 	boolean_t unspare = B_FALSE;
355099653d4eSeschrock 	uint64_t unspare_guid;
3551bf82a41bSeschrock 	size_t len;
3552fa9e4066Sahrens 
3553fa9e4066Sahrens 	txg = spa_vdev_enter(spa);
3554fa9e4066Sahrens 
3555c5904d13Seschrock 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
3556fa9e4066Sahrens 
3557fa9e4066Sahrens 	if (vd == NULL)
3558fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
3559fa9e4066Sahrens 
35600e34b6a7Sbonwick 	if (!vd->vdev_ops->vdev_op_leaf)
35610e34b6a7Sbonwick 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
35620e34b6a7Sbonwick 
3563fa9e4066Sahrens 	pvd = vd->vdev_parent;
3564fa9e4066Sahrens 
35658ad4d6ddSJeff Bonwick 	/*
35668ad4d6ddSJeff Bonwick 	 * If the parent/child relationship is not as expected, don't do it.
35678ad4d6ddSJeff Bonwick 	 * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing
35688ad4d6ddSJeff Bonwick 	 * vdev that's replacing B with C.  The user's intent in replacing
35698ad4d6ddSJeff Bonwick 	 * is to go from M(A,B) to M(A,C).  If the user decides to cancel
35708ad4d6ddSJeff Bonwick 	 * the replace by detaching C, the expected behavior is to end up
35718ad4d6ddSJeff Bonwick 	 * M(A,B).  But suppose that right after deciding to detach C,
35728ad4d6ddSJeff Bonwick 	 * the replacement of B completes.  We would have M(A,C), and then
35738ad4d6ddSJeff Bonwick 	 * ask to detach C, which would leave us with just A -- not what
35748ad4d6ddSJeff Bonwick 	 * the user wanted.  To prevent this, we make sure that the
35758ad4d6ddSJeff Bonwick 	 * parent/child relationship hasn't changed -- in this example,
35768ad4d6ddSJeff Bonwick 	 * that C's parent is still the replacing vdev R.
35778ad4d6ddSJeff Bonwick 	 */
35788ad4d6ddSJeff Bonwick 	if (pvd->vdev_guid != pguid && pguid != 0)
35798ad4d6ddSJeff Bonwick 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
35808ad4d6ddSJeff Bonwick 
3581fa9e4066Sahrens 	/*
3582fa9e4066Sahrens 	 * If replace_done is specified, only remove this device if it's
358399653d4eSeschrock 	 * the first child of a replacing vdev.  For the 'spare' vdev, either
358499653d4eSeschrock 	 * disk can be removed.
358599653d4eSeschrock 	 */
358699653d4eSeschrock 	if (replace_done) {
358799653d4eSeschrock 		if (pvd->vdev_ops == &vdev_replacing_ops) {
358899653d4eSeschrock 			if (vd->vdev_id != 0)
358999653d4eSeschrock 				return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
359099653d4eSeschrock 		} else if (pvd->vdev_ops != &vdev_spare_ops) {
359199653d4eSeschrock 			return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
359299653d4eSeschrock 		}
359399653d4eSeschrock 	}
359499653d4eSeschrock 
359599653d4eSeschrock 	ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
3596e7437265Sahrens 	    spa_version(spa) >= SPA_VERSION_SPARES);
3597fa9e4066Sahrens 
3598fa9e4066Sahrens 	/*
359999653d4eSeschrock 	 * Only mirror, replacing, and spare vdevs support detach.
3600fa9e4066Sahrens 	 */
3601fa9e4066Sahrens 	if (pvd->vdev_ops != &vdev_replacing_ops &&
360299653d4eSeschrock 	    pvd->vdev_ops != &vdev_mirror_ops &&
360399653d4eSeschrock 	    pvd->vdev_ops != &vdev_spare_ops)
3604fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
3605fa9e4066Sahrens 
3606fa9e4066Sahrens 	/*
36078ad4d6ddSJeff Bonwick 	 * If this device has the only valid copy of some data,
36088ad4d6ddSJeff Bonwick 	 * we cannot safely detach it.
3609fa9e4066Sahrens 	 */
36108ad4d6ddSJeff Bonwick 	if (vdev_dtl_required(vd))
3611fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
3612fa9e4066Sahrens 
36138ad4d6ddSJeff Bonwick 	ASSERT(pvd->vdev_children >= 2);
3614fa9e4066Sahrens 
3615bf82a41bSeschrock 	/*
3616bf82a41bSeschrock 	 * If we are detaching the second disk from a replacing vdev, then
3617bf82a41bSeschrock 	 * check to see if we changed the original vdev's path to have "/old"
3618bf82a41bSeschrock 	 * at the end in spa_vdev_attach().  If so, undo that change now.
3619bf82a41bSeschrock 	 */
3620bf82a41bSeschrock 	if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 &&
3621bf82a41bSeschrock 	    pvd->vdev_child[0]->vdev_path != NULL &&
3622bf82a41bSeschrock 	    pvd->vdev_child[1]->vdev_path != NULL) {
3623bf82a41bSeschrock 		ASSERT(pvd->vdev_child[1] == vd);
3624bf82a41bSeschrock 		cvd = pvd->vdev_child[0];
3625bf82a41bSeschrock 		len = strlen(vd->vdev_path);
3626bf82a41bSeschrock 		if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 &&
3627bf82a41bSeschrock 		    strcmp(cvd->vdev_path + len, "/old") == 0) {
3628bf82a41bSeschrock 			spa_strfree(cvd->vdev_path);
3629bf82a41bSeschrock 			cvd->vdev_path = spa_strdup(vd->vdev_path);
3630bf82a41bSeschrock 		}
3631bf82a41bSeschrock 	}
3632bf82a41bSeschrock 
363399653d4eSeschrock 	/*
363499653d4eSeschrock 	 * If we are detaching the original disk from a spare, then it implies
363599653d4eSeschrock 	 * that the spare should become a real disk, and be removed from the
363699653d4eSeschrock 	 * active spare list for the pool.
363799653d4eSeschrock 	 */
363899653d4eSeschrock 	if (pvd->vdev_ops == &vdev_spare_ops &&
36398ad4d6ddSJeff Bonwick 	    vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare)
364099653d4eSeschrock 		unspare = B_TRUE;
364199653d4eSeschrock 
3642fa9e4066Sahrens 	/*
3643fa9e4066Sahrens 	 * Erase the disk labels so the disk can be used for other things.
3644fa9e4066Sahrens 	 * This must be done after all other error cases are handled,
3645fa9e4066Sahrens 	 * but before we disembowel vd (so we can still do I/O to it).
3646fa9e4066Sahrens 	 * But if we can't do it, don't treat the error as fatal --
3647fa9e4066Sahrens 	 * it may be that the unwritability of the disk is the reason
3648fa9e4066Sahrens 	 * it's being detached!
3649fa9e4066Sahrens 	 */
365039c23413Seschrock 	error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
3651fa9e4066Sahrens 
3652fa9e4066Sahrens 	/*
3653fa9e4066Sahrens 	 * Remove vd from its parent and compact the parent's children.
3654fa9e4066Sahrens 	 */
3655fa9e4066Sahrens 	vdev_remove_child(pvd, vd);
3656fa9e4066Sahrens 	vdev_compact_children(pvd);
3657fa9e4066Sahrens 
3658fa9e4066Sahrens 	/*
3659fa9e4066Sahrens 	 * Remember one of the remaining children so we can get tvd below.
3660fa9e4066Sahrens 	 */
3661fa9e4066Sahrens 	cvd = pvd->vdev_child[0];
3662fa9e4066Sahrens 
366399653d4eSeschrock 	/*
366499653d4eSeschrock 	 * If we need to remove the remaining child from the list of hot spares,
36658ad4d6ddSJeff Bonwick 	 * do it now, marking the vdev as no longer a spare in the process.
36668ad4d6ddSJeff Bonwick 	 * We must do this before vdev_remove_parent(), because that can
36678ad4d6ddSJeff Bonwick 	 * change the GUID if it creates a new toplevel GUID.  For a similar
36688ad4d6ddSJeff Bonwick 	 * reason, we must remove the spare now, in the same txg as the detach;
36698ad4d6ddSJeff Bonwick 	 * otherwise someone could attach a new sibling, change the GUID, and
36708ad4d6ddSJeff Bonwick 	 * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail.
367199653d4eSeschrock 	 */
367299653d4eSeschrock 	if (unspare) {
367399653d4eSeschrock 		ASSERT(cvd->vdev_isspare);
367439c23413Seschrock 		spa_spare_remove(cvd);
367599653d4eSeschrock 		unspare_guid = cvd->vdev_guid;
36768ad4d6ddSJeff Bonwick 		(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
367799653d4eSeschrock 	}
367899653d4eSeschrock 
3679fa9e4066Sahrens 	/*
3680fa9e4066Sahrens 	 * If the parent mirror/replacing vdev only has one child,
3681fa9e4066Sahrens 	 * the parent is no longer needed.  Remove it from the tree.
3682fa9e4066Sahrens 	 */
3683fa9e4066Sahrens 	if (pvd->vdev_children == 1)
3684fa9e4066Sahrens 		vdev_remove_parent(cvd);
3685fa9e4066Sahrens 
3686fa9e4066Sahrens 	/*
3687fa9e4066Sahrens 	 * We don't set tvd until now because the parent we just removed
3688fa9e4066Sahrens 	 * may have been the previous top-level vdev.
3689fa9e4066Sahrens 	 */
3690fa9e4066Sahrens 	tvd = cvd->vdev_top;
3691fa9e4066Sahrens 	ASSERT(tvd->vdev_parent == rvd);
3692fa9e4066Sahrens 
3693fa9e4066Sahrens 	/*
369439c23413Seschrock 	 * Reevaluate the parent vdev state.
3695fa9e4066Sahrens 	 */
36963d7072f8Seschrock 	vdev_propagate_state(cvd);
3697fa9e4066Sahrens 
3698fa9e4066Sahrens 	/*
3699573ca77eSGeorge Wilson 	 * If the 'autoexpand' property is set on the pool then automatically
3700573ca77eSGeorge Wilson 	 * try to expand the size of the pool. For example if the device we
3701573ca77eSGeorge Wilson 	 * just detached was smaller than the others, it may be possible to
3702573ca77eSGeorge Wilson 	 * add metaslabs (i.e. grow the pool). We need to reopen the vdev
3703573ca77eSGeorge Wilson 	 * first so that we can obtain the updated sizes of the leaf vdevs.
3704fa9e4066Sahrens 	 */
3705573ca77eSGeorge Wilson 	if (spa->spa_autoexpand) {
3706573ca77eSGeorge Wilson 		vdev_reopen(tvd);
3707573ca77eSGeorge Wilson 		vdev_expand(tvd, txg);
3708573ca77eSGeorge Wilson 	}
3709fa9e4066Sahrens 
3710fa9e4066Sahrens 	vdev_config_dirty(tvd);
3711fa9e4066Sahrens 
3712fa9e4066Sahrens 	/*
371339c23413Seschrock 	 * Mark vd's DTL as dirty in this txg.  vdev_dtl_sync() will see that
371439c23413Seschrock 	 * vd->vdev_detached is set and free vd's DTL object in syncing context.
371539c23413Seschrock 	 * But first make sure we're not on any *other* txg's DTL list, to
371639c23413Seschrock 	 * prevent vd from being accessed after it's freed.
3717fa9e4066Sahrens 	 */
37188ad4d6ddSJeff Bonwick 	for (int t = 0; t < TXG_SIZE; t++)
3719fa9e4066Sahrens 		(void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
3720ecc2d604Sbonwick 	vd->vdev_detached = B_TRUE;
3721ecc2d604Sbonwick 	vdev_dirty(tvd, VDD_DTL, vd, txg);
3722fa9e4066Sahrens 
37233d7072f8Seschrock 	spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
37243d7072f8Seschrock 
372599653d4eSeschrock 	error = spa_vdev_exit(spa, vd, txg, 0);
372699653d4eSeschrock 
372799653d4eSeschrock 	/*
372839c23413Seschrock 	 * If this was the removal of the original device in a hot spare vdev,
372939c23413Seschrock 	 * then we want to go through and remove the device from the hot spare
373039c23413Seschrock 	 * list of every other pool.
373199653d4eSeschrock 	 */
373299653d4eSeschrock 	if (unspare) {
37338ad4d6ddSJeff Bonwick 		spa_t *myspa = spa;
373499653d4eSeschrock 		spa = NULL;
373599653d4eSeschrock 		mutex_enter(&spa_namespace_lock);
373699653d4eSeschrock 		while ((spa = spa_next(spa)) != NULL) {
373799653d4eSeschrock 			if (spa->spa_state != POOL_STATE_ACTIVE)
373899653d4eSeschrock 				continue;
37398ad4d6ddSJeff Bonwick 			if (spa == myspa)
37408ad4d6ddSJeff Bonwick 				continue;
37419af0a4dfSJeff Bonwick 			spa_open_ref(spa, FTAG);
37429af0a4dfSJeff Bonwick 			mutex_exit(&spa_namespace_lock);
374399653d4eSeschrock 			(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
37449af0a4dfSJeff Bonwick 			mutex_enter(&spa_namespace_lock);
37459af0a4dfSJeff Bonwick 			spa_close(spa, FTAG);
374699653d4eSeschrock 		}
374799653d4eSeschrock 		mutex_exit(&spa_namespace_lock);
374899653d4eSeschrock 	}
374999653d4eSeschrock 
375099653d4eSeschrock 	return (error);
375199653d4eSeschrock }
375299653d4eSeschrock 
3753e14bb325SJeff Bonwick static nvlist_t *
3754e14bb325SJeff Bonwick spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid)
375599653d4eSeschrock {
3756e14bb325SJeff Bonwick 	for (int i = 0; i < count; i++) {
3757e14bb325SJeff Bonwick 		uint64_t guid;
375899653d4eSeschrock 
3759e14bb325SJeff Bonwick 		VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID,
3760e14bb325SJeff Bonwick 		    &guid) == 0);
376199653d4eSeschrock 
3762e14bb325SJeff Bonwick 		if (guid == target_guid)
3763e14bb325SJeff Bonwick 			return (nvpp[i]);
376499653d4eSeschrock 	}
376599653d4eSeschrock 
3766e14bb325SJeff Bonwick 	return (NULL);
3767fa94a07fSbrendan }
3768fa94a07fSbrendan 
3769e14bb325SJeff Bonwick static void
3770e14bb325SJeff Bonwick spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count,
3771e14bb325SJeff Bonwick 	nvlist_t *dev_to_remove)
3772fa94a07fSbrendan {
3773e14bb325SJeff Bonwick 	nvlist_t **newdev = NULL;
3774fa94a07fSbrendan 
3775e14bb325SJeff Bonwick 	if (count > 1)
3776e14bb325SJeff Bonwick 		newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP);
3777fa94a07fSbrendan 
3778e14bb325SJeff Bonwick 	for (int i = 0, j = 0; i < count; i++) {
3779e14bb325SJeff Bonwick 		if (dev[i] == dev_to_remove)
3780e14bb325SJeff Bonwick 			continue;
3781e14bb325SJeff Bonwick 		VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0);
3782fa94a07fSbrendan 	}
3783fa94a07fSbrendan 
3784e14bb325SJeff Bonwick 	VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0);
3785e14bb325SJeff Bonwick 	VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0);
3786fa94a07fSbrendan 
3787e14bb325SJeff Bonwick 	for (int i = 0; i < count - 1; i++)
3788e14bb325SJeff Bonwick 		nvlist_free(newdev[i]);
3789fa94a07fSbrendan 
3790e14bb325SJeff Bonwick 	if (count > 1)
3791e14bb325SJeff Bonwick 		kmem_free(newdev, (count - 1) * sizeof (void *));
3792fa94a07fSbrendan }
3793fa94a07fSbrendan 
379488ecc943SGeorge Wilson /*
379588ecc943SGeorge Wilson  * Removing a device from the vdev namespace requires several steps
379688ecc943SGeorge Wilson  * and can take a significant amount of time.  As a result we use
379788ecc943SGeorge Wilson  * the spa_vdev_config_[enter/exit] functions which allow us to
379888ecc943SGeorge Wilson  * grab and release the spa_config_lock while still holding the namespace
379988ecc943SGeorge Wilson  * lock.  During each step the configuration is synced out.
380088ecc943SGeorge Wilson  */
380188ecc943SGeorge Wilson 
380288ecc943SGeorge Wilson /*
380388ecc943SGeorge Wilson  * Evacuate the device.
380488ecc943SGeorge Wilson  */
380588ecc943SGeorge Wilson int
380688ecc943SGeorge Wilson spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd)
380788ecc943SGeorge Wilson {
3808a1521560SJeff Bonwick 	int error = 0;
380988ecc943SGeorge Wilson 	uint64_t txg;
381088ecc943SGeorge Wilson 
381188ecc943SGeorge Wilson 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
381288ecc943SGeorge Wilson 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
3813b24ab676SJeff Bonwick 	ASSERT(vd == vd->vdev_top);
381488ecc943SGeorge Wilson 
381588ecc943SGeorge Wilson 	/*
381688ecc943SGeorge Wilson 	 * Evacuate the device.  We don't hold the config lock as writer
381788ecc943SGeorge Wilson 	 * since we need to do I/O but we do keep the
381888ecc943SGeorge Wilson 	 * spa_namespace_lock held.  Once this completes the device
381988ecc943SGeorge Wilson 	 * should no longer have any blocks allocated on it.
382088ecc943SGeorge Wilson 	 */
382188ecc943SGeorge Wilson 	if (vd->vdev_islog) {
3822a1521560SJeff Bonwick 		error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
3823a1521560SJeff Bonwick 		    NULL, DS_FIND_CHILDREN);
3824a1521560SJeff Bonwick 	} else {
3825a1521560SJeff Bonwick 		error = ENOTSUP;	/* until we have bp rewrite */
382688ecc943SGeorge Wilson 	}
382788ecc943SGeorge Wilson 
3828a1521560SJeff Bonwick 	txg_wait_synced(spa_get_dsl(spa), 0);
3829a1521560SJeff Bonwick 
3830a1521560SJeff Bonwick 	if (error)
3831a1521560SJeff Bonwick 		return (error);
3832a1521560SJeff Bonwick 
383388ecc943SGeorge Wilson 	/*
3834a1521560SJeff Bonwick 	 * The evacuation succeeded.  Remove any remaining MOS metadata
3835a1521560SJeff Bonwick 	 * associated with this vdev, and wait for these changes to sync.
383688ecc943SGeorge Wilson 	 */
383788ecc943SGeorge Wilson 	txg = spa_vdev_config_enter(spa);
383888ecc943SGeorge Wilson 	vd->vdev_removing = B_TRUE;
383988ecc943SGeorge Wilson 	vdev_dirty(vd, 0, NULL, txg);
384088ecc943SGeorge Wilson 	vdev_config_dirty(vd);
384188ecc943SGeorge Wilson 	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
384288ecc943SGeorge Wilson 
384388ecc943SGeorge Wilson 	return (0);
384488ecc943SGeorge Wilson }
384588ecc943SGeorge Wilson 
384688ecc943SGeorge Wilson /*
384788ecc943SGeorge Wilson  * Complete the removal by cleaning up the namespace.
384888ecc943SGeorge Wilson  */
384988ecc943SGeorge Wilson void
3850a1521560SJeff Bonwick spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd)
385188ecc943SGeorge Wilson {
385288ecc943SGeorge Wilson 	vdev_t *rvd = spa->spa_root_vdev;
385388ecc943SGeorge Wilson 	uint64_t id = vd->vdev_id;
385488ecc943SGeorge Wilson 	boolean_t last_vdev = (id == (rvd->vdev_children - 1));
385588ecc943SGeorge Wilson 
385688ecc943SGeorge Wilson 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
385788ecc943SGeorge Wilson 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
3858b24ab676SJeff Bonwick 	ASSERT(vd == vd->vdev_top);
385988ecc943SGeorge Wilson 
386088ecc943SGeorge Wilson 	(void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
3861b24ab676SJeff Bonwick 
3862b24ab676SJeff Bonwick 	if (list_link_active(&vd->vdev_state_dirty_node))
3863b24ab676SJeff Bonwick 		vdev_state_clean(vd);
3864b24ab676SJeff Bonwick 	if (list_link_active(&vd->vdev_config_dirty_node))
3865b24ab676SJeff Bonwick 		vdev_config_clean(vd);
3866b24ab676SJeff Bonwick 
386788ecc943SGeorge Wilson 	vdev_free(vd);
386888ecc943SGeorge Wilson 
386988ecc943SGeorge Wilson 	if (last_vdev) {
387088ecc943SGeorge Wilson 		vdev_compact_children(rvd);
387188ecc943SGeorge Wilson 	} else {
387288ecc943SGeorge Wilson 		vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops);
387388ecc943SGeorge Wilson 		vdev_add_child(rvd, vd);
387488ecc943SGeorge Wilson 	}
387588ecc943SGeorge Wilson 	vdev_config_dirty(rvd);
387688ecc943SGeorge Wilson 
387788ecc943SGeorge Wilson 	/*
387888ecc943SGeorge Wilson 	 * Reassess the health of our root vdev.
387988ecc943SGeorge Wilson 	 */
388088ecc943SGeorge Wilson 	vdev_reopen(rvd);
388188ecc943SGeorge Wilson }
388288ecc943SGeorge Wilson 
3883fa94a07fSbrendan /*
3884fa94a07fSbrendan  * Remove a device from the pool.  Currently, this supports removing only hot
388588ecc943SGeorge Wilson  * spares, slogs, and level 2 ARC devices.
3886fa94a07fSbrendan  */
3887fa94a07fSbrendan int
3888fa94a07fSbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
3889fa94a07fSbrendan {
3890fa94a07fSbrendan 	vdev_t *vd;
3891a1521560SJeff Bonwick 	metaslab_group_t *mg;
3892e14bb325SJeff Bonwick 	nvlist_t **spares, **l2cache, *nv;
38938ad4d6ddSJeff Bonwick 	uint64_t txg = 0;
389488ecc943SGeorge Wilson 	uint_t nspares, nl2cache;
3895fa94a07fSbrendan 	int error = 0;
38968ad4d6ddSJeff Bonwick 	boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
3897fa94a07fSbrendan 
38988ad4d6ddSJeff Bonwick 	if (!locked)
38998ad4d6ddSJeff Bonwick 		txg = spa_vdev_enter(spa);
3900fa94a07fSbrendan 
3901c5904d13Seschrock 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
3902fa94a07fSbrendan 
3903fa94a07fSbrendan 	if (spa->spa_spares.sav_vdevs != NULL &&
3904fa94a07fSbrendan 	    nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
3905e14bb325SJeff Bonwick 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 &&
3906e14bb325SJeff Bonwick 	    (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) {
3907e14bb325SJeff Bonwick 		/*
3908e14bb325SJeff Bonwick 		 * Only remove the hot spare if it's not currently in use
3909e14bb325SJeff Bonwick 		 * in this pool.
3910e14bb325SJeff Bonwick 		 */
3911e14bb325SJeff Bonwick 		if (vd == NULL || unspare) {
3912e14bb325SJeff Bonwick 			spa_vdev_remove_aux(spa->spa_spares.sav_config,
3913e14bb325SJeff Bonwick 			    ZPOOL_CONFIG_SPARES, spares, nspares, nv);
3914e14bb325SJeff Bonwick 			spa_load_spares(spa);
3915e14bb325SJeff Bonwick 			spa->spa_spares.sav_sync = B_TRUE;
3916e14bb325SJeff Bonwick 		} else {
3917e14bb325SJeff Bonwick 			error = EBUSY;
3918e14bb325SJeff Bonwick 		}
3919e14bb325SJeff Bonwick 	} else if (spa->spa_l2cache.sav_vdevs != NULL &&
3920fa94a07fSbrendan 	    nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
3921e14bb325SJeff Bonwick 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 &&
3922e14bb325SJeff Bonwick 	    (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) {
3923e14bb325SJeff Bonwick 		/*
3924e14bb325SJeff Bonwick 		 * Cache devices can always be removed.
3925e14bb325SJeff Bonwick 		 */
3926e14bb325SJeff Bonwick 		spa_vdev_remove_aux(spa->spa_l2cache.sav_config,
3927e14bb325SJeff Bonwick 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv);
3928fa94a07fSbrendan 		spa_load_l2cache(spa);
3929fa94a07fSbrendan 		spa->spa_l2cache.sav_sync = B_TRUE;
393088ecc943SGeorge Wilson 	} else if (vd != NULL && vd->vdev_islog) {
393188ecc943SGeorge Wilson 		ASSERT(!locked);
3932b24ab676SJeff Bonwick 		ASSERT(vd == vd->vdev_top);
393388ecc943SGeorge Wilson 
393488ecc943SGeorge Wilson 		/*
393588ecc943SGeorge Wilson 		 * XXX - Once we have bp-rewrite this should
393688ecc943SGeorge Wilson 		 * become the common case.
393788ecc943SGeorge Wilson 		 */
393888ecc943SGeorge Wilson 
3939a1521560SJeff Bonwick 		mg = vd->vdev_mg;
3940a1521560SJeff Bonwick 
394188ecc943SGeorge Wilson 		/*
3942a1521560SJeff Bonwick 		 * Stop allocating from this vdev.
394388ecc943SGeorge Wilson 		 */
3944a1521560SJeff Bonwick 		metaslab_group_passivate(mg);
394588ecc943SGeorge Wilson 
3946b24ab676SJeff Bonwick 		/*
3947b24ab676SJeff Bonwick 		 * Wait for the youngest allocations and frees to sync,
3948b24ab676SJeff Bonwick 		 * and then wait for the deferral of those frees to finish.
3949b24ab676SJeff Bonwick 		 */
3950b24ab676SJeff Bonwick 		spa_vdev_config_exit(spa, NULL,
3951b24ab676SJeff Bonwick 		    txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
3952b24ab676SJeff Bonwick 
3953a1521560SJeff Bonwick 		/*
3954a1521560SJeff Bonwick 		 * Attempt to evacuate the vdev.
3955a1521560SJeff Bonwick 		 */
3956a1521560SJeff Bonwick 		error = spa_vdev_remove_evacuate(spa, vd);
3957a1521560SJeff Bonwick 
395888ecc943SGeorge Wilson 		txg = spa_vdev_config_enter(spa);
395988ecc943SGeorge Wilson 
3960a1521560SJeff Bonwick 		/*
3961a1521560SJeff Bonwick 		 * If we couldn't evacuate the vdev, unwind.
3962a1521560SJeff Bonwick 		 */
3963a1521560SJeff Bonwick 		if (error) {
3964a1521560SJeff Bonwick 			metaslab_group_activate(mg);
3965a1521560SJeff Bonwick 			return (spa_vdev_exit(spa, NULL, txg, error));
3966a1521560SJeff Bonwick 		}
3967a1521560SJeff Bonwick 
3968a1521560SJeff Bonwick 		/*
3969a1521560SJeff Bonwick 		 * Clean up the vdev namespace.
3970a1521560SJeff Bonwick 		 */
3971a1521560SJeff Bonwick 		spa_vdev_remove_from_namespace(spa, vd);
397288ecc943SGeorge Wilson 
3973e14bb325SJeff Bonwick 	} else if (vd != NULL) {
3974e14bb325SJeff Bonwick 		/*
3975e14bb325SJeff Bonwick 		 * Normal vdevs cannot be removed (yet).
3976e14bb325SJeff Bonwick 		 */
3977e14bb325SJeff Bonwick 		error = ENOTSUP;
3978e14bb325SJeff Bonwick 	} else {
3979e14bb325SJeff Bonwick 		/*
3980e14bb325SJeff Bonwick 		 * There is no vdev of any kind with the specified guid.
3981e14bb325SJeff Bonwick 		 */
3982e14bb325SJeff Bonwick 		error = ENOENT;
3983fa94a07fSbrendan 	}
398499653d4eSeschrock 
39858ad4d6ddSJeff Bonwick 	if (!locked)
39868ad4d6ddSJeff Bonwick 		return (spa_vdev_exit(spa, NULL, txg, error));
39878ad4d6ddSJeff Bonwick 
39888ad4d6ddSJeff Bonwick 	return (error);
3989fa9e4066Sahrens }
3990fa9e4066Sahrens 
3991fa9e4066Sahrens /*
39923d7072f8Seschrock  * Find any device that's done replacing, or a vdev marked 'unspare' that's
39933d7072f8Seschrock  * current spared, so we can detach it.
3994fa9e4066Sahrens  */
3995ea8dc4b6Seschrock static vdev_t *
39963d7072f8Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd)
3997fa9e4066Sahrens {
3998ea8dc4b6Seschrock 	vdev_t *newvd, *oldvd;
3999fa9e4066Sahrens 
4000573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++) {
40013d7072f8Seschrock 		oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]);
4002ea8dc4b6Seschrock 		if (oldvd != NULL)
4003ea8dc4b6Seschrock 			return (oldvd);
4004ea8dc4b6Seschrock 	}
4005fa9e4066Sahrens 
40063d7072f8Seschrock 	/*
40073d7072f8Seschrock 	 * Check for a completed replacement.
40083d7072f8Seschrock 	 */
4009fa9e4066Sahrens 	if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) {
4010ea8dc4b6Seschrock 		oldvd = vd->vdev_child[0];
4011ea8dc4b6Seschrock 		newvd = vd->vdev_child[1];
4012ea8dc4b6Seschrock 
40138ad4d6ddSJeff Bonwick 		if (vdev_dtl_empty(newvd, DTL_MISSING) &&
40148ad4d6ddSJeff Bonwick 		    !vdev_dtl_required(oldvd))
4015ea8dc4b6Seschrock 			return (oldvd);
4016fa9e4066Sahrens 	}
4017ea8dc4b6Seschrock 
40183d7072f8Seschrock 	/*
40193d7072f8Seschrock 	 * Check for a completed resilver with the 'unspare' flag set.
40203d7072f8Seschrock 	 */
40213d7072f8Seschrock 	if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) {
40223d7072f8Seschrock 		newvd = vd->vdev_child[0];
40233d7072f8Seschrock 		oldvd = vd->vdev_child[1];
40243d7072f8Seschrock 
40253d7072f8Seschrock 		if (newvd->vdev_unspare &&
40268ad4d6ddSJeff Bonwick 		    vdev_dtl_empty(newvd, DTL_MISSING) &&
40278ad4d6ddSJeff Bonwick 		    !vdev_dtl_required(oldvd)) {
40283d7072f8Seschrock 			newvd->vdev_unspare = 0;
40293d7072f8Seschrock 			return (oldvd);
40303d7072f8Seschrock 		}
40313d7072f8Seschrock 	}
40323d7072f8Seschrock 
4033ea8dc4b6Seschrock 	return (NULL);
4034fa9e4066Sahrens }
4035fa9e4066Sahrens 
4036ea8dc4b6Seschrock static void
40373d7072f8Seschrock spa_vdev_resilver_done(spa_t *spa)
4038fa9e4066Sahrens {
40398ad4d6ddSJeff Bonwick 	vdev_t *vd, *pvd, *ppvd;
40408ad4d6ddSJeff Bonwick 	uint64_t guid, sguid, pguid, ppguid;
4041ea8dc4b6Seschrock 
40428ad4d6ddSJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
4043ea8dc4b6Seschrock 
40443d7072f8Seschrock 	while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) {
40458ad4d6ddSJeff Bonwick 		pvd = vd->vdev_parent;
40468ad4d6ddSJeff Bonwick 		ppvd = pvd->vdev_parent;
4047ea8dc4b6Seschrock 		guid = vd->vdev_guid;
40488ad4d6ddSJeff Bonwick 		pguid = pvd->vdev_guid;
40498ad4d6ddSJeff Bonwick 		ppguid = ppvd->vdev_guid;
40508ad4d6ddSJeff Bonwick 		sguid = 0;
405199653d4eSeschrock 		/*
405299653d4eSeschrock 		 * If we have just finished replacing a hot spared device, then
405399653d4eSeschrock 		 * we need to detach the parent's first child (the original hot
405499653d4eSeschrock 		 * spare) as well.
405599653d4eSeschrock 		 */
40568ad4d6ddSJeff Bonwick 		if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) {
405799653d4eSeschrock 			ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
40588ad4d6ddSJeff Bonwick 			ASSERT(ppvd->vdev_children == 2);
40598ad4d6ddSJeff Bonwick 			sguid = ppvd->vdev_child[1]->vdev_guid;
406099653d4eSeschrock 		}
40618ad4d6ddSJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
40628ad4d6ddSJeff Bonwick 		if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0)
4063ea8dc4b6Seschrock 			return;
40648ad4d6ddSJeff Bonwick 		if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0)
406599653d4eSeschrock 			return;
40668ad4d6ddSJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
4067fa9e4066Sahrens 	}
4068fa9e4066Sahrens 
40698ad4d6ddSJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
4070fa9e4066Sahrens }
4071fa9e4066Sahrens 
4072c67d9675Seschrock /*
4073b3388e4fSEric Taylor  * Update the stored path or FRU for this vdev.
4074c67d9675Seschrock  */
4075c67d9675Seschrock int
40766809eb4eSEric Schrock spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value,
40776809eb4eSEric Schrock     boolean_t ispath)
4078c67d9675Seschrock {
4079c5904d13Seschrock 	vdev_t *vd;
4080c67d9675Seschrock 
4081b3388e4fSEric Taylor 	spa_vdev_state_enter(spa, SCL_ALL);
4082c67d9675Seschrock 
40836809eb4eSEric Schrock 	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
4084b3388e4fSEric Taylor 		return (spa_vdev_state_exit(spa, NULL, ENOENT));
4085c67d9675Seschrock 
40860e34b6a7Sbonwick 	if (!vd->vdev_ops->vdev_op_leaf)
4087b3388e4fSEric Taylor 		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
40880e34b6a7Sbonwick 
40896809eb4eSEric Schrock 	if (ispath) {
40906809eb4eSEric Schrock 		spa_strfree(vd->vdev_path);
40916809eb4eSEric Schrock 		vd->vdev_path = spa_strdup(value);
40926809eb4eSEric Schrock 	} else {
40936809eb4eSEric Schrock 		if (vd->vdev_fru != NULL)
40946809eb4eSEric Schrock 			spa_strfree(vd->vdev_fru);
40956809eb4eSEric Schrock 		vd->vdev_fru = spa_strdup(value);
40966809eb4eSEric Schrock 	}
4097c67d9675Seschrock 
4098b3388e4fSEric Taylor 	return (spa_vdev_state_exit(spa, vd, 0));
4099c67d9675Seschrock }
4100c67d9675Seschrock 
41016809eb4eSEric Schrock int
41026809eb4eSEric Schrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath)
41036809eb4eSEric Schrock {
41046809eb4eSEric Schrock 	return (spa_vdev_set_common(spa, guid, newpath, B_TRUE));
41056809eb4eSEric Schrock }
41066809eb4eSEric Schrock 
41076809eb4eSEric Schrock int
41086809eb4eSEric Schrock spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
41096809eb4eSEric Schrock {
41106809eb4eSEric Schrock 	return (spa_vdev_set_common(spa, guid, newfru, B_FALSE));
41116809eb4eSEric Schrock }
41126809eb4eSEric Schrock 
4113fa9e4066Sahrens /*
4114fa9e4066Sahrens  * ==========================================================================
4115fa9e4066Sahrens  * SPA Scrubbing
4116fa9e4066Sahrens  * ==========================================================================
4117fa9e4066Sahrens  */
4118fa9e4066Sahrens 
4119ea8dc4b6Seschrock int
4120088f3894Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type)
4121fa9e4066Sahrens {
4122e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
4123bb8b5132Sek 
4124fa9e4066Sahrens 	if ((uint_t)type >= POOL_SCRUB_TYPES)
4125fa9e4066Sahrens 		return (ENOTSUP);
4126fa9e4066Sahrens 
4127fa9e4066Sahrens 	/*
4128088f3894Sahrens 	 * If a resilver was requested, but there is no DTL on a
4129088f3894Sahrens 	 * writeable leaf device, we have nothing to do.
4130fa9e4066Sahrens 	 */
4131088f3894Sahrens 	if (type == POOL_SCRUB_RESILVER &&
4132088f3894Sahrens 	    !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
4133088f3894Sahrens 		spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
4134ea8dc4b6Seschrock 		return (0);
4135ea8dc4b6Seschrock 	}
4136fa9e4066Sahrens 
4137088f3894Sahrens 	if (type == POOL_SCRUB_EVERYTHING &&
4138088f3894Sahrens 	    spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE &&
4139088f3894Sahrens 	    spa->spa_dsl_pool->dp_scrub_isresilver)
4140088f3894Sahrens 		return (EBUSY);
4141fa9e4066Sahrens 
4142088f3894Sahrens 	if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) {
4143088f3894Sahrens 		return (dsl_pool_scrub_clean(spa->spa_dsl_pool));
4144088f3894Sahrens 	} else if (type == POOL_SCRUB_NONE) {
4145088f3894Sahrens 		return (dsl_pool_scrub_cancel(spa->spa_dsl_pool));
4146ea8dc4b6Seschrock 	} else {
4147088f3894Sahrens 		return (EINVAL);
4148fa9e4066Sahrens 	}
4149fa9e4066Sahrens }
4150fa9e4066Sahrens 
4151ea8dc4b6Seschrock /*
4152ea8dc4b6Seschrock  * ==========================================================================
4153ea8dc4b6Seschrock  * SPA async task processing
4154ea8dc4b6Seschrock  * ==========================================================================
4155ea8dc4b6Seschrock  */
4156ea8dc4b6Seschrock 
4157ea8dc4b6Seschrock static void
41583d7072f8Seschrock spa_async_remove(spa_t *spa, vdev_t *vd)
4159fa9e4066Sahrens {
416049cf58c0SBrendan Gregg - Sun Microsystems 	if (vd->vdev_remove_wanted) {
416149cf58c0SBrendan Gregg - Sun Microsystems 		vd->vdev_remove_wanted = 0;
416249cf58c0SBrendan Gregg - Sun Microsystems 		vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE);
41631d713200SEric Schrock 
41641d713200SEric Schrock 		/*
41651d713200SEric Schrock 		 * We want to clear the stats, but we don't want to do a full
41661d713200SEric Schrock 		 * vdev_clear() as that will cause us to throw away
41671d713200SEric Schrock 		 * degraded/faulted state as well as attempt to reopen the
41681d713200SEric Schrock 		 * device, all of which is a waste.
41691d713200SEric Schrock 		 */
41701d713200SEric Schrock 		vd->vdev_stat.vs_read_errors = 0;
41711d713200SEric Schrock 		vd->vdev_stat.vs_write_errors = 0;
41721d713200SEric Schrock 		vd->vdev_stat.vs_checksum_errors = 0;
41731d713200SEric Schrock 
4174e14bb325SJeff Bonwick 		vdev_state_dirty(vd->vdev_top);
4175ea8dc4b6Seschrock 	}
417649cf58c0SBrendan Gregg - Sun Microsystems 
4177e14bb325SJeff Bonwick 	for (int c = 0; c < vd->vdev_children; c++)
417849cf58c0SBrendan Gregg - Sun Microsystems 		spa_async_remove(spa, vd->vdev_child[c]);
4179ea8dc4b6Seschrock }
4180fa9e4066Sahrens 
4181e14bb325SJeff Bonwick static void
4182e14bb325SJeff Bonwick spa_async_probe(spa_t *spa, vdev_t *vd)
4183e14bb325SJeff Bonwick {
4184e14bb325SJeff Bonwick 	if (vd->vdev_probe_wanted) {
4185e14bb325SJeff Bonwick 		vd->vdev_probe_wanted = 0;
4186e14bb325SJeff Bonwick 		vdev_reopen(vd);	/* vdev_open() does the actual probe */
4187e14bb325SJeff Bonwick 	}
4188e14bb325SJeff Bonwick 
4189e14bb325SJeff Bonwick 	for (int c = 0; c < vd->vdev_children; c++)
4190e14bb325SJeff Bonwick 		spa_async_probe(spa, vd->vdev_child[c]);
4191e14bb325SJeff Bonwick }
4192e14bb325SJeff Bonwick 
4193573ca77eSGeorge Wilson static void
4194573ca77eSGeorge Wilson spa_async_autoexpand(spa_t *spa, vdev_t *vd)
4195573ca77eSGeorge Wilson {
4196573ca77eSGeorge Wilson 	sysevent_id_t eid;
4197573ca77eSGeorge Wilson 	nvlist_t *attr;
4198573ca77eSGeorge Wilson 	char *physpath;
4199573ca77eSGeorge Wilson 
4200573ca77eSGeorge Wilson 	if (!spa->spa_autoexpand)
4201573ca77eSGeorge Wilson 		return;
4202573ca77eSGeorge Wilson 
4203573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++) {
4204573ca77eSGeorge Wilson 		vdev_t *cvd = vd->vdev_child[c];
4205573ca77eSGeorge Wilson 		spa_async_autoexpand(spa, cvd);
4206573ca77eSGeorge Wilson 	}
4207573ca77eSGeorge Wilson 
4208573ca77eSGeorge Wilson 	if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
4209573ca77eSGeorge Wilson 		return;
4210573ca77eSGeorge Wilson 
4211573ca77eSGeorge Wilson 	physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4212573ca77eSGeorge Wilson 	(void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath);
4213573ca77eSGeorge Wilson 
4214573ca77eSGeorge Wilson 	VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4215573ca77eSGeorge Wilson 	VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
4216573ca77eSGeorge Wilson 
4217573ca77eSGeorge Wilson 	(void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
4218573ca77eSGeorge Wilson 	    ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
4219573ca77eSGeorge Wilson 
4220573ca77eSGeorge Wilson 	nvlist_free(attr);
4221573ca77eSGeorge Wilson 	kmem_free(physpath, MAXPATHLEN);
4222573ca77eSGeorge Wilson }
4223573ca77eSGeorge Wilson 
4224ea8dc4b6Seschrock static void
4225ea8dc4b6Seschrock spa_async_thread(spa_t *spa)
4226ea8dc4b6Seschrock {
4227e14bb325SJeff Bonwick 	int tasks;
4228ea8dc4b6Seschrock 
4229ea8dc4b6Seschrock 	ASSERT(spa->spa_sync_on);
4230ea8dc4b6Seschrock 
4231ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4232ea8dc4b6Seschrock 	tasks = spa->spa_async_tasks;
4233ea8dc4b6Seschrock 	spa->spa_async_tasks = 0;
4234ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4235ea8dc4b6Seschrock 
42360373e76bSbonwick 	/*
42370373e76bSbonwick 	 * See if the config needs to be updated.
42380373e76bSbonwick 	 */
42390373e76bSbonwick 	if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
4240b24ab676SJeff Bonwick 		uint64_t old_space, new_space;
4241573ca77eSGeorge Wilson 
42420373e76bSbonwick 		mutex_enter(&spa_namespace_lock);
4243b24ab676SJeff Bonwick 		old_space = metaslab_class_get_space(spa_normal_class(spa));
42440373e76bSbonwick 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
4245b24ab676SJeff Bonwick 		new_space = metaslab_class_get_space(spa_normal_class(spa));
42460373e76bSbonwick 		mutex_exit(&spa_namespace_lock);
4247573ca77eSGeorge Wilson 
4248573ca77eSGeorge Wilson 		/*
4249573ca77eSGeorge Wilson 		 * If the pool grew as a result of the config update,
4250573ca77eSGeorge Wilson 		 * then log an internal history event.
4251573ca77eSGeorge Wilson 		 */
4252b24ab676SJeff Bonwick 		if (new_space != old_space) {
4253c8e1f6d2SMark J Musante 			spa_history_internal_log(LOG_POOL_VDEV_ONLINE,
4254c8e1f6d2SMark J Musante 			    spa, NULL, CRED(),
4255c8e1f6d2SMark J Musante 			    "pool '%s' size: %llu(+%llu)",
4256b24ab676SJeff Bonwick 			    spa_name(spa), new_space, new_space - old_space);
4257573ca77eSGeorge Wilson 		}
42580373e76bSbonwick 	}
42590373e76bSbonwick 
4260ea8dc4b6Seschrock 	/*
42613d7072f8Seschrock 	 * See if any devices need to be marked REMOVED.
4262ea8dc4b6Seschrock 	 */
4263e14bb325SJeff Bonwick 	if (tasks & SPA_ASYNC_REMOVE) {
42648f18d1faSGeorge Wilson 		spa_vdev_state_enter(spa, SCL_NONE);
42653d7072f8Seschrock 		spa_async_remove(spa, spa->spa_root_vdev);
4266e14bb325SJeff Bonwick 		for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
426749cf58c0SBrendan Gregg - Sun Microsystems 			spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
4268e14bb325SJeff Bonwick 		for (int i = 0; i < spa->spa_spares.sav_count; i++)
426949cf58c0SBrendan Gregg - Sun Microsystems 			spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
4270e14bb325SJeff Bonwick 		(void) spa_vdev_state_exit(spa, NULL, 0);
4271e14bb325SJeff Bonwick 	}
4272e14bb325SJeff Bonwick 
4273573ca77eSGeorge Wilson 	if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
4274573ca77eSGeorge Wilson 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
4275573ca77eSGeorge Wilson 		spa_async_autoexpand(spa, spa->spa_root_vdev);
4276573ca77eSGeorge Wilson 		spa_config_exit(spa, SCL_CONFIG, FTAG);
4277573ca77eSGeorge Wilson 	}
4278573ca77eSGeorge Wilson 
4279e14bb325SJeff Bonwick 	/*
4280e14bb325SJeff Bonwick 	 * See if any devices need to be probed.
4281e14bb325SJeff Bonwick 	 */
4282e14bb325SJeff Bonwick 	if (tasks & SPA_ASYNC_PROBE) {
42838f18d1faSGeorge Wilson 		spa_vdev_state_enter(spa, SCL_NONE);
4284e14bb325SJeff Bonwick 		spa_async_probe(spa, spa->spa_root_vdev);
4285e14bb325SJeff Bonwick 		(void) spa_vdev_state_exit(spa, NULL, 0);
42863d7072f8Seschrock 	}
4287ea8dc4b6Seschrock 
4288ea8dc4b6Seschrock 	/*
4289ea8dc4b6Seschrock 	 * If any devices are done replacing, detach them.
4290ea8dc4b6Seschrock 	 */
42913d7072f8Seschrock 	if (tasks & SPA_ASYNC_RESILVER_DONE)
42923d7072f8Seschrock 		spa_vdev_resilver_done(spa);
4293fa9e4066Sahrens 
4294ea8dc4b6Seschrock 	/*
4295ea8dc4b6Seschrock 	 * Kick off a resilver.
4296ea8dc4b6Seschrock 	 */
4297088f3894Sahrens 	if (tasks & SPA_ASYNC_RESILVER)
4298088f3894Sahrens 		VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0);
4299ea8dc4b6Seschrock 
4300ea8dc4b6Seschrock 	/*
4301ea8dc4b6Seschrock 	 * Let the world know that we're done.
4302ea8dc4b6Seschrock 	 */
4303ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4304ea8dc4b6Seschrock 	spa->spa_async_thread = NULL;
4305ea8dc4b6Seschrock 	cv_broadcast(&spa->spa_async_cv);
4306ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4307ea8dc4b6Seschrock 	thread_exit();
4308ea8dc4b6Seschrock }
4309ea8dc4b6Seschrock 
4310ea8dc4b6Seschrock void
4311ea8dc4b6Seschrock spa_async_suspend(spa_t *spa)
4312ea8dc4b6Seschrock {
4313ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4314ea8dc4b6Seschrock 	spa->spa_async_suspended++;
4315ea8dc4b6Seschrock 	while (spa->spa_async_thread != NULL)
4316ea8dc4b6Seschrock 		cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
4317ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4318ea8dc4b6Seschrock }
4319ea8dc4b6Seschrock 
4320ea8dc4b6Seschrock void
4321ea8dc4b6Seschrock spa_async_resume(spa_t *spa)
4322ea8dc4b6Seschrock {
4323ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4324ea8dc4b6Seschrock 	ASSERT(spa->spa_async_suspended != 0);
4325ea8dc4b6Seschrock 	spa->spa_async_suspended--;
4326ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4327ea8dc4b6Seschrock }
4328ea8dc4b6Seschrock 
4329ea8dc4b6Seschrock static void
4330ea8dc4b6Seschrock spa_async_dispatch(spa_t *spa)
4331ea8dc4b6Seschrock {
4332ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4333ea8dc4b6Seschrock 	if (spa->spa_async_tasks && !spa->spa_async_suspended &&
43340373e76bSbonwick 	    spa->spa_async_thread == NULL &&
43350373e76bSbonwick 	    rootdir != NULL && !vn_is_readonly(rootdir))
4336ea8dc4b6Seschrock 		spa->spa_async_thread = thread_create(NULL, 0,
4337ea8dc4b6Seschrock 		    spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
4338ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4339ea8dc4b6Seschrock }
4340ea8dc4b6Seschrock 
4341ea8dc4b6Seschrock void
4342ea8dc4b6Seschrock spa_async_request(spa_t *spa, int task)
4343ea8dc4b6Seschrock {
4344ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4345ea8dc4b6Seschrock 	spa->spa_async_tasks |= task;
4346ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4347fa9e4066Sahrens }
4348fa9e4066Sahrens 
4349fa9e4066Sahrens /*
4350fa9e4066Sahrens  * ==========================================================================
4351fa9e4066Sahrens  * SPA syncing routines
4352fa9e4066Sahrens  * ==========================================================================
4353fa9e4066Sahrens  */
4354fa9e4066Sahrens static void
4355b24ab676SJeff Bonwick spa_sync_deferred_bplist(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx, uint64_t txg)
4356fa9e4066Sahrens {
4357fa9e4066Sahrens 	blkptr_t blk;
4358fa9e4066Sahrens 	uint64_t itor = 0;
4359fa9e4066Sahrens 	uint8_t c = 1;
4360fa9e4066Sahrens 
4361e14bb325SJeff Bonwick 	while (bplist_iterate(bpl, &itor, &blk) == 0) {
4362e14bb325SJeff Bonwick 		ASSERT(blk.blk_birth < txg);
4363b24ab676SJeff Bonwick 		zio_free(spa, txg, &blk);
4364e14bb325SJeff Bonwick 	}
4365fa9e4066Sahrens 
4366fa9e4066Sahrens 	bplist_vacate(bpl, tx);
4367fa9e4066Sahrens 
4368fa9e4066Sahrens 	/*
4369fa9e4066Sahrens 	 * Pre-dirty the first block so we sync to convergence faster.
4370fa9e4066Sahrens 	 * (Usually only the first block is needed.)
4371fa9e4066Sahrens 	 */
4372b24ab676SJeff Bonwick 	dmu_write(bpl->bpl_mos, spa->spa_deferred_bplist_obj, 0, 1, &c, tx);
4373b24ab676SJeff Bonwick }
4374b24ab676SJeff Bonwick 
4375b24ab676SJeff Bonwick static void
4376b24ab676SJeff Bonwick spa_sync_free(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
4377b24ab676SJeff Bonwick {
4378b24ab676SJeff Bonwick 	zio_t *zio = arg;
4379b24ab676SJeff Bonwick 
4380b24ab676SJeff Bonwick 	zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
4381b24ab676SJeff Bonwick 	    zio->io_flags));
4382fa9e4066Sahrens }
4383fa9e4066Sahrens 
4384fa9e4066Sahrens static void
438599653d4eSeschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
4386fa9e4066Sahrens {
4387fa9e4066Sahrens 	char *packed = NULL;
4388f7991ba4STim Haley 	size_t bufsize;
4389fa9e4066Sahrens 	size_t nvsize = 0;
4390fa9e4066Sahrens 	dmu_buf_t *db;
4391fa9e4066Sahrens 
439299653d4eSeschrock 	VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
4393fa9e4066Sahrens 
4394f7991ba4STim Haley 	/*
4395f7991ba4STim Haley 	 * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
4396f7991ba4STim Haley 	 * information.  This avoids the dbuf_will_dirty() path and
4397f7991ba4STim Haley 	 * saves us a pre-read to get data we don't actually care about.
4398f7991ba4STim Haley 	 */
4399f7991ba4STim Haley 	bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
4400f7991ba4STim Haley 	packed = kmem_alloc(bufsize, KM_SLEEP);
4401fa9e4066Sahrens 
440299653d4eSeschrock 	VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
4403ea8dc4b6Seschrock 	    KM_SLEEP) == 0);
4404f7991ba4STim Haley 	bzero(packed + nvsize, bufsize - nvsize);
4405fa9e4066Sahrens 
4406f7991ba4STim Haley 	dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
4407fa9e4066Sahrens 
4408f7991ba4STim Haley 	kmem_free(packed, bufsize);
4409fa9e4066Sahrens 
441099653d4eSeschrock 	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
4411fa9e4066Sahrens 	dmu_buf_will_dirty(db, tx);
4412fa9e4066Sahrens 	*(uint64_t *)db->db_data = nvsize;
4413ea8dc4b6Seschrock 	dmu_buf_rele(db, FTAG);
4414fa9e4066Sahrens }
4415fa9e4066Sahrens 
441699653d4eSeschrock static void
4417fa94a07fSbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx,
4418fa94a07fSbrendan     const char *config, const char *entry)
441999653d4eSeschrock {
442099653d4eSeschrock 	nvlist_t *nvroot;
4421fa94a07fSbrendan 	nvlist_t **list;
442299653d4eSeschrock 	int i;
442399653d4eSeschrock 
4424fa94a07fSbrendan 	if (!sav->sav_sync)
442599653d4eSeschrock 		return;
442699653d4eSeschrock 
442799653d4eSeschrock 	/*
4428fa94a07fSbrendan 	 * Update the MOS nvlist describing the list of available devices.
4429fa94a07fSbrendan 	 * spa_validate_aux() will have already made sure this nvlist is
44303d7072f8Seschrock 	 * valid and the vdevs are labeled appropriately.
443199653d4eSeschrock 	 */
4432fa94a07fSbrendan 	if (sav->sav_object == 0) {
4433fa94a07fSbrendan 		sav->sav_object = dmu_object_alloc(spa->spa_meta_objset,
4434fa94a07fSbrendan 		    DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE,
4435fa94a07fSbrendan 		    sizeof (uint64_t), tx);
443699653d4eSeschrock 		VERIFY(zap_update(spa->spa_meta_objset,
4437fa94a07fSbrendan 		    DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1,
4438fa94a07fSbrendan 		    &sav->sav_object, tx) == 0);
443999653d4eSeschrock 	}
444099653d4eSeschrock 
444199653d4eSeschrock 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4442fa94a07fSbrendan 	if (sav->sav_count == 0) {
4443fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0);
444499653d4eSeschrock 	} else {
4445fa94a07fSbrendan 		list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
4446fa94a07fSbrendan 		for (i = 0; i < sav->sav_count; i++)
4447fa94a07fSbrendan 			list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
4448fa94a07fSbrendan 			    B_FALSE, B_FALSE, B_TRUE);
4449fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(nvroot, config, list,
4450fa94a07fSbrendan 		    sav->sav_count) == 0);
4451fa94a07fSbrendan 		for (i = 0; i < sav->sav_count; i++)
4452fa94a07fSbrendan 			nvlist_free(list[i]);
4453fa94a07fSbrendan 		kmem_free(list, sav->sav_count * sizeof (void *));
445499653d4eSeschrock 	}
445599653d4eSeschrock 
4456fa94a07fSbrendan 	spa_sync_nvlist(spa, sav->sav_object, nvroot, tx);
445706eeb2adSek 	nvlist_free(nvroot);
445899653d4eSeschrock 
4459fa94a07fSbrendan 	sav->sav_sync = B_FALSE;
446099653d4eSeschrock }
446199653d4eSeschrock 
446299653d4eSeschrock static void
446399653d4eSeschrock spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
446499653d4eSeschrock {
446599653d4eSeschrock 	nvlist_t *config;
446699653d4eSeschrock 
4467e14bb325SJeff Bonwick 	if (list_is_empty(&spa->spa_config_dirty_list))
446899653d4eSeschrock 		return;
446999653d4eSeschrock 
4470e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
4471e14bb325SJeff Bonwick 
4472e14bb325SJeff Bonwick 	config = spa_config_generate(spa, spa->spa_root_vdev,
4473e14bb325SJeff Bonwick 	    dmu_tx_get_txg(tx), B_FALSE);
4474e14bb325SJeff Bonwick 
4475e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_STATE, FTAG);
447699653d4eSeschrock 
447799653d4eSeschrock 	if (spa->spa_config_syncing)
447899653d4eSeschrock 		nvlist_free(spa->spa_config_syncing);
447999653d4eSeschrock 	spa->spa_config_syncing = config;
448099653d4eSeschrock 
448199653d4eSeschrock 	spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
448299653d4eSeschrock }
448399653d4eSeschrock 
4484990b4856Slling /*
4485990b4856Slling  * Set zpool properties.
4486990b4856Slling  */
4487b1b8ab34Slling static void
4488ecd6cf80Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
4489b1b8ab34Slling {
4490b1b8ab34Slling 	spa_t *spa = arg1;
4491b1b8ab34Slling 	objset_t *mos = spa->spa_meta_objset;
4492990b4856Slling 	nvlist_t *nvp = arg2;
4493990b4856Slling 	nvpair_t *elem;
44943d7072f8Seschrock 	uint64_t intval;
4495c5904d13Seschrock 	char *strval;
4496990b4856Slling 	zpool_prop_t prop;
4497990b4856Slling 	const char *propname;
4498990b4856Slling 	zprop_type_t proptype;
4499b1b8ab34Slling 
4500e14bb325SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);
4501e14bb325SJeff Bonwick 
4502990b4856Slling 	elem = NULL;
4503990b4856Slling 	while ((elem = nvlist_next_nvpair(nvp, elem))) {
4504990b4856Slling 		switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
4505990b4856Slling 		case ZPOOL_PROP_VERSION:
4506990b4856Slling 			/*
4507990b4856Slling 			 * Only set version for non-zpool-creation cases
4508990b4856Slling 			 * (set/import). spa_create() needs special care
4509990b4856Slling 			 * for version setting.
4510990b4856Slling 			 */
4511990b4856Slling 			if (tx->tx_txg != TXG_INITIAL) {
4512990b4856Slling 				VERIFY(nvpair_value_uint64(elem,
4513990b4856Slling 				    &intval) == 0);
4514990b4856Slling 				ASSERT(intval <= SPA_VERSION);
4515990b4856Slling 				ASSERT(intval >= spa_version(spa));
4516990b4856Slling 				spa->spa_uberblock.ub_version = intval;
4517990b4856Slling 				vdev_config_dirty(spa->spa_root_vdev);
4518990b4856Slling 			}
4519ecd6cf80Smarks 			break;
4520990b4856Slling 
4521990b4856Slling 		case ZPOOL_PROP_ALTROOT:
4522990b4856Slling 			/*
4523990b4856Slling 			 * 'altroot' is a non-persistent property. It should
4524990b4856Slling 			 * have been set temporarily at creation or import time.
4525990b4856Slling 			 */
4526990b4856Slling 			ASSERT(spa->spa_root != NULL);
4527b1b8ab34Slling 			break;
45283d7072f8Seschrock 
45292f8aaab3Seschrock 		case ZPOOL_PROP_CACHEFILE:
4530990b4856Slling 			/*
4531379c004dSEric Schrock 			 * 'cachefile' is also a non-persisitent property.
4532990b4856Slling 			 */
45333d7072f8Seschrock 			break;
4534990b4856Slling 		default:
4535990b4856Slling 			/*
4536990b4856Slling 			 * Set pool property values in the poolprops mos object.
4537990b4856Slling 			 */
4538990b4856Slling 			if (spa->spa_pool_props_object == 0) {
4539990b4856Slling 				VERIFY((spa->spa_pool_props_object =
4540990b4856Slling 				    zap_create(mos, DMU_OT_POOL_PROPS,
4541990b4856Slling 				    DMU_OT_NONE, 0, tx)) > 0);
4542990b4856Slling 
4543990b4856Slling 				VERIFY(zap_update(mos,
4544990b4856Slling 				    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
4545990b4856Slling 				    8, 1, &spa->spa_pool_props_object, tx)
4546990b4856Slling 				    == 0);
4547990b4856Slling 			}
4548990b4856Slling 
4549990b4856Slling 			/* normalize the property name */
4550990b4856Slling 			propname = zpool_prop_to_name(prop);
4551990b4856Slling 			proptype = zpool_prop_get_type(prop);
4552990b4856Slling 
4553990b4856Slling 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
4554990b4856Slling 				ASSERT(proptype == PROP_TYPE_STRING);
4555990b4856Slling 				VERIFY(nvpair_value_string(elem, &strval) == 0);
4556990b4856Slling 				VERIFY(zap_update(mos,
4557990b4856Slling 				    spa->spa_pool_props_object, propname,
4558990b4856Slling 				    1, strlen(strval) + 1, strval, tx) == 0);
4559990b4856Slling 
4560990b4856Slling 			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
4561990b4856Slling 				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
4562990b4856Slling 
4563990b4856Slling 				if (proptype == PROP_TYPE_INDEX) {
4564990b4856Slling 					const char *unused;
4565990b4856Slling 					VERIFY(zpool_prop_index_to_string(
4566990b4856Slling 					    prop, intval, &unused) == 0);
4567990b4856Slling 				}
4568990b4856Slling 				VERIFY(zap_update(mos,
4569990b4856Slling 				    spa->spa_pool_props_object, propname,
4570990b4856Slling 				    8, 1, &intval, tx) == 0);
4571990b4856Slling 			} else {
4572990b4856Slling 				ASSERT(0); /* not allowed */
4573990b4856Slling 			}
4574990b4856Slling 
45750a4e9518Sgw 			switch (prop) {
45760a4e9518Sgw 			case ZPOOL_PROP_DELEGATION:
4577990b4856Slling 				spa->spa_delegation = intval;
45780a4e9518Sgw 				break;
45790a4e9518Sgw 			case ZPOOL_PROP_BOOTFS:
4580990b4856Slling 				spa->spa_bootfs = intval;
45810a4e9518Sgw 				break;
45820a4e9518Sgw 			case ZPOOL_PROP_FAILUREMODE:
45830a4e9518Sgw 				spa->spa_failmode = intval;
45840a4e9518Sgw 				break;
4585573ca77eSGeorge Wilson 			case ZPOOL_PROP_AUTOEXPAND:
4586573ca77eSGeorge Wilson 				spa->spa_autoexpand = intval;
4587573ca77eSGeorge Wilson 				spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
4588573ca77eSGeorge Wilson 				break;
4589b24ab676SJeff Bonwick 			case ZPOOL_PROP_DEDUPDITTO:
4590b24ab676SJeff Bonwick 				spa->spa_dedup_ditto = intval;
4591b24ab676SJeff Bonwick 				break;
45920a4e9518Sgw 			default:
45930a4e9518Sgw 				break;
45940a4e9518Sgw 			}
4595990b4856Slling 		}
4596990b4856Slling 
4597990b4856Slling 		/* log internal history if this is not a zpool create */
4598990b4856Slling 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY &&
4599990b4856Slling 		    tx->tx_txg != TXG_INITIAL) {
4600990b4856Slling 			spa_history_internal_log(LOG_POOL_PROPSET,
4601990b4856Slling 			    spa, tx, cr, "%s %lld %s",
4602e14bb325SJeff Bonwick 			    nvpair_name(elem), intval, spa_name(spa));
4603b1b8ab34Slling 		}
4604b1b8ab34Slling 	}
4605e14bb325SJeff Bonwick 
4606e14bb325SJeff Bonwick 	mutex_exit(&spa->spa_props_lock);
4607b1b8ab34Slling }
4608b1b8ab34Slling 
4609fa9e4066Sahrens /*
4610fa9e4066Sahrens  * Sync the specified transaction group.  New blocks may be dirtied as
4611fa9e4066Sahrens  * part of the process, so we iterate until it converges.
4612fa9e4066Sahrens  */
4613fa9e4066Sahrens void
4614fa9e4066Sahrens spa_sync(spa_t *spa, uint64_t txg)
4615fa9e4066Sahrens {
4616fa9e4066Sahrens 	dsl_pool_t *dp = spa->spa_dsl_pool;
4617fa9e4066Sahrens 	objset_t *mos = spa->spa_meta_objset;
4618b24ab676SJeff Bonwick 	bplist_t *defer_bpl = &spa->spa_deferred_bplist;
4619b24ab676SJeff Bonwick 	bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
46200373e76bSbonwick 	vdev_t *rvd = spa->spa_root_vdev;
4621fa9e4066Sahrens 	vdev_t *vd;
4622fa9e4066Sahrens 	dmu_tx_t *tx;
4623e14bb325SJeff Bonwick 	int error;
4624fa9e4066Sahrens 
4625fa9e4066Sahrens 	/*
4626fa9e4066Sahrens 	 * Lock out configuration changes.
4627fa9e4066Sahrens 	 */
4628e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
4629fa9e4066Sahrens 
4630fa9e4066Sahrens 	spa->spa_syncing_txg = txg;
4631fa9e4066Sahrens 	spa->spa_sync_pass = 0;
4632fa9e4066Sahrens 
4633e14bb325SJeff Bonwick 	/*
4634e14bb325SJeff Bonwick 	 * If there are any pending vdev state changes, convert them
4635e14bb325SJeff Bonwick 	 * into config changes that go out with this transaction group.
4636e14bb325SJeff Bonwick 	 */
4637e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
46388ad4d6ddSJeff Bonwick 	while (list_head(&spa->spa_state_dirty_list) != NULL) {
46398ad4d6ddSJeff Bonwick 		/*
46408ad4d6ddSJeff Bonwick 		 * We need the write lock here because, for aux vdevs,
46418ad4d6ddSJeff Bonwick 		 * calling vdev_config_dirty() modifies sav_config.
46428ad4d6ddSJeff Bonwick 		 * This is ugly and will become unnecessary when we
46438ad4d6ddSJeff Bonwick 		 * eliminate the aux vdev wart by integrating all vdevs
46448ad4d6ddSJeff Bonwick 		 * into the root vdev tree.
46458ad4d6ddSJeff Bonwick 		 */
46468ad4d6ddSJeff Bonwick 		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
46478ad4d6ddSJeff Bonwick 		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER);
46488ad4d6ddSJeff Bonwick 		while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
46498ad4d6ddSJeff Bonwick 			vdev_state_clean(vd);
46508ad4d6ddSJeff Bonwick 			vdev_config_dirty(vd);
46518ad4d6ddSJeff Bonwick 		}
46528ad4d6ddSJeff Bonwick 		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
46538ad4d6ddSJeff Bonwick 		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
4654e14bb325SJeff Bonwick 	}
4655e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_STATE, FTAG);
4656e14bb325SJeff Bonwick 
4657b24ab676SJeff Bonwick 	VERIFY(0 == bplist_open(defer_bpl, mos, spa->spa_deferred_bplist_obj));
4658fa9e4066Sahrens 
465999653d4eSeschrock 	tx = dmu_tx_create_assigned(dp, txg);
466099653d4eSeschrock 
466199653d4eSeschrock 	/*
4662e7437265Sahrens 	 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
466399653d4eSeschrock 	 * set spa_deflate if we have no raid-z vdevs.
466499653d4eSeschrock 	 */
4665e7437265Sahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE &&
4666e7437265Sahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) {
466799653d4eSeschrock 		int i;
466899653d4eSeschrock 
466999653d4eSeschrock 		for (i = 0; i < rvd->vdev_children; i++) {
467099653d4eSeschrock 			vd = rvd->vdev_child[i];
467199653d4eSeschrock 			if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
467299653d4eSeschrock 				break;
467399653d4eSeschrock 		}
467499653d4eSeschrock 		if (i == rvd->vdev_children) {
467599653d4eSeschrock 			spa->spa_deflate = TRUE;
467699653d4eSeschrock 			VERIFY(0 == zap_add(spa->spa_meta_objset,
467799653d4eSeschrock 			    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
467899653d4eSeschrock 			    sizeof (uint64_t), 1, &spa->spa_deflate, tx));
467999653d4eSeschrock 		}
468099653d4eSeschrock 	}
468199653d4eSeschrock 
4682088f3894Sahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
4683088f3894Sahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
4684088f3894Sahrens 		dsl_pool_create_origin(dp, tx);
4685088f3894Sahrens 
4686088f3894Sahrens 		/* Keeping the origin open increases spa_minref */
4687088f3894Sahrens 		spa->spa_minref += 3;
4688088f3894Sahrens 	}
4689088f3894Sahrens 
4690088f3894Sahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES &&
4691088f3894Sahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) {
4692088f3894Sahrens 		dsl_pool_upgrade_clones(dp, tx);
4693088f3894Sahrens 	}
4694088f3894Sahrens 
4695fa9e4066Sahrens 	/*
4696fa9e4066Sahrens 	 * If anything has changed in this txg, push the deferred frees
4697fa9e4066Sahrens 	 * from the previous txg.  If not, leave them alone so that we
4698fa9e4066Sahrens 	 * don't generate work on an otherwise idle system.
4699fa9e4066Sahrens 	 */
4700fa9e4066Sahrens 	if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
47011615a317Sek 	    !txg_list_empty(&dp->dp_dirty_dirs, txg) ||
47021615a317Sek 	    !txg_list_empty(&dp->dp_sync_tasks, txg))
4703b24ab676SJeff Bonwick 		spa_sync_deferred_bplist(spa, defer_bpl, tx, txg);
4704fa9e4066Sahrens 
4705fa9e4066Sahrens 	/*
4706fa9e4066Sahrens 	 * Iterate to convergence.
4707fa9e4066Sahrens 	 */
4708fa9e4066Sahrens 	do {
4709b24ab676SJeff Bonwick 		int pass = ++spa->spa_sync_pass;
4710fa9e4066Sahrens 
4711fa9e4066Sahrens 		spa_sync_config_object(spa, tx);
4712fa94a07fSbrendan 		spa_sync_aux_dev(spa, &spa->spa_spares, tx,
4713fa94a07fSbrendan 		    ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES);
4714fa94a07fSbrendan 		spa_sync_aux_dev(spa, &spa->spa_l2cache, tx,
4715fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE);
4716ea8dc4b6Seschrock 		spa_errlog_sync(spa, txg);
4717fa9e4066Sahrens 		dsl_pool_sync(dp, txg);
4718fa9e4066Sahrens 
4719b24ab676SJeff Bonwick 		if (pass <= SYNC_PASS_DEFERRED_FREE) {
4720b24ab676SJeff Bonwick 			zio_t *zio = zio_root(spa, NULL, NULL, 0);
4721b24ab676SJeff Bonwick 			bplist_sync(free_bpl, spa_sync_free, zio, tx);
4722b24ab676SJeff Bonwick 			VERIFY(zio_wait(zio) == 0);
4723b24ab676SJeff Bonwick 		} else {
4724b24ab676SJeff Bonwick 			bplist_sync(free_bpl, bplist_enqueue_cb, defer_bpl, tx);
4725fa9e4066Sahrens 		}
4726fa9e4066Sahrens 
4727b24ab676SJeff Bonwick 		ddt_sync(spa, txg);
4728b24ab676SJeff Bonwick 
4729b24ab676SJeff Bonwick 		while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))
4730b24ab676SJeff Bonwick 			vdev_sync(vd, txg);
4731b24ab676SJeff Bonwick 
4732b24ab676SJeff Bonwick 	} while (dmu_objset_is_dirty(mos, txg));
4733fa9e4066Sahrens 
4734b24ab676SJeff Bonwick 	ASSERT(free_bpl->bpl_queue == NULL);
4735fa9e4066Sahrens 
4736b24ab676SJeff Bonwick 	bplist_close(defer_bpl);
4737fa9e4066Sahrens 
4738fa9e4066Sahrens 	/*
4739fa9e4066Sahrens 	 * Rewrite the vdev configuration (which includes the uberblock)
4740fa9e4066Sahrens 	 * to commit the transaction group.
47410373e76bSbonwick 	 *
474217f17c2dSbonwick 	 * If there are no dirty vdevs, we sync the uberblock to a few
474317f17c2dSbonwick 	 * random top-level vdevs that are known to be visible in the
4744e14bb325SJeff Bonwick 	 * config cache (see spa_vdev_add() for a complete description).
4745e14bb325SJeff Bonwick 	 * If there *are* dirty vdevs, sync the uberblock to all vdevs.
47460373e76bSbonwick 	 */
4747e14bb325SJeff Bonwick 	for (;;) {
4748e14bb325SJeff Bonwick 		/*
4749e14bb325SJeff Bonwick 		 * We hold SCL_STATE to prevent vdev open/close/etc.
4750e14bb325SJeff Bonwick 		 * while we're attempting to write the vdev labels.
4751e14bb325SJeff Bonwick 		 */
4752e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
4753e14bb325SJeff Bonwick 
4754e14bb325SJeff Bonwick 		if (list_is_empty(&spa->spa_config_dirty_list)) {
4755e14bb325SJeff Bonwick 			vdev_t *svd[SPA_DVAS_PER_BP];
4756e14bb325SJeff Bonwick 			int svdcount = 0;
4757e14bb325SJeff Bonwick 			int children = rvd->vdev_children;
4758e14bb325SJeff Bonwick 			int c0 = spa_get_random(children);
4759e14bb325SJeff Bonwick 
4760573ca77eSGeorge Wilson 			for (int c = 0; c < children; c++) {
4761e14bb325SJeff Bonwick 				vd = rvd->vdev_child[(c0 + c) % children];
4762e14bb325SJeff Bonwick 				if (vd->vdev_ms_array == 0 || vd->vdev_islog)
4763e14bb325SJeff Bonwick 					continue;
4764e14bb325SJeff Bonwick 				svd[svdcount++] = vd;
4765e14bb325SJeff Bonwick 				if (svdcount == SPA_DVAS_PER_BP)
4766e14bb325SJeff Bonwick 					break;
4767e14bb325SJeff Bonwick 			}
47688956713aSEric Schrock 			error = vdev_config_sync(svd, svdcount, txg, B_FALSE);
47698956713aSEric Schrock 			if (error != 0)
47708956713aSEric Schrock 				error = vdev_config_sync(svd, svdcount, txg,
47718956713aSEric Schrock 				    B_TRUE);
4772e14bb325SJeff Bonwick 		} else {
4773e14bb325SJeff Bonwick 			error = vdev_config_sync(rvd->vdev_child,
47748956713aSEric Schrock 			    rvd->vdev_children, txg, B_FALSE);
47758956713aSEric Schrock 			if (error != 0)
47768956713aSEric Schrock 				error = vdev_config_sync(rvd->vdev_child,
47778956713aSEric Schrock 				    rvd->vdev_children, txg, B_TRUE);
47780373e76bSbonwick 		}
4779e14bb325SJeff Bonwick 
4780e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_STATE, FTAG);
4781e14bb325SJeff Bonwick 
4782e14bb325SJeff Bonwick 		if (error == 0)
4783e14bb325SJeff Bonwick 			break;
4784e14bb325SJeff Bonwick 		zio_suspend(spa, NULL);
4785e14bb325SJeff Bonwick 		zio_resume_wait(spa);
47860373e76bSbonwick 	}
478799653d4eSeschrock 	dmu_tx_commit(tx);
478899653d4eSeschrock 
47890373e76bSbonwick 	/*
47900373e76bSbonwick 	 * Clear the dirty config list.
4791fa9e4066Sahrens 	 */
4792e14bb325SJeff Bonwick 	while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL)
47930373e76bSbonwick 		vdev_config_clean(vd);
47940373e76bSbonwick 
47950373e76bSbonwick 	/*
47960373e76bSbonwick 	 * Now that the new config has synced transactionally,
47970373e76bSbonwick 	 * let it become visible to the config cache.
47980373e76bSbonwick 	 */
47990373e76bSbonwick 	if (spa->spa_config_syncing != NULL) {
48000373e76bSbonwick 		spa_config_set(spa, spa->spa_config_syncing);
48010373e76bSbonwick 		spa->spa_config_txg = txg;
48020373e76bSbonwick 		spa->spa_config_syncing = NULL;
48030373e76bSbonwick 	}
4804fa9e4066Sahrens 
4805fa9e4066Sahrens 	spa->spa_ubsync = spa->spa_uberblock;
4806fa9e4066Sahrens 
4807b24ab676SJeff Bonwick 	dsl_pool_sync_done(dp, txg);
4808fa9e4066Sahrens 
4809fa9e4066Sahrens 	/*
4810fa9e4066Sahrens 	 * Update usable space statistics.
4811fa9e4066Sahrens 	 */
4812fa9e4066Sahrens 	while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
4813fa9e4066Sahrens 		vdev_sync_done(vd, txg);
4814fa9e4066Sahrens 
4815485bbbf5SGeorge Wilson 	spa_update_dspace(spa);
4816485bbbf5SGeorge Wilson 
4817fa9e4066Sahrens 	/*
4818fa9e4066Sahrens 	 * It had better be the case that we didn't dirty anything
481999653d4eSeschrock 	 * since vdev_config_sync().
4820fa9e4066Sahrens 	 */
4821fa9e4066Sahrens 	ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
4822fa9e4066Sahrens 	ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
4823fa9e4066Sahrens 	ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg));
4824b24ab676SJeff Bonwick 	ASSERT(defer_bpl->bpl_queue == NULL);
4825b24ab676SJeff Bonwick 	ASSERT(free_bpl->bpl_queue == NULL);
4826b24ab676SJeff Bonwick 
4827b24ab676SJeff Bonwick 	spa->spa_sync_pass = 0;
4828fa9e4066Sahrens 
4829e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_CONFIG, FTAG);
4830ea8dc4b6Seschrock 
4831468c413aSTim Haley 	spa_handle_ignored_writes(spa);
4832468c413aSTim Haley 
4833ea8dc4b6Seschrock 	/*
4834ea8dc4b6Seschrock 	 * If any async tasks have been requested, kick them off.
4835ea8dc4b6Seschrock 	 */
4836ea8dc4b6Seschrock 	spa_async_dispatch(spa);
4837fa9e4066Sahrens }
4838fa9e4066Sahrens 
4839fa9e4066Sahrens /*
4840fa9e4066Sahrens  * Sync all pools.  We don't want to hold the namespace lock across these
4841fa9e4066Sahrens  * operations, so we take a reference on the spa_t and drop the lock during the
4842fa9e4066Sahrens  * sync.
4843fa9e4066Sahrens  */
4844fa9e4066Sahrens void
4845fa9e4066Sahrens spa_sync_allpools(void)
4846fa9e4066Sahrens {
4847fa9e4066Sahrens 	spa_t *spa = NULL;
4848fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
4849fa9e4066Sahrens 	while ((spa = spa_next(spa)) != NULL) {
4850e14bb325SJeff Bonwick 		if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa))
4851fa9e4066Sahrens 			continue;
4852fa9e4066Sahrens 		spa_open_ref(spa, FTAG);
4853fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
4854fa9e4066Sahrens 		txg_wait_synced(spa_get_dsl(spa), 0);
4855fa9e4066Sahrens 		mutex_enter(&spa_namespace_lock);
4856fa9e4066Sahrens 		spa_close(spa, FTAG);
4857fa9e4066Sahrens 	}
4858fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
4859fa9e4066Sahrens }
4860fa9e4066Sahrens 
4861fa9e4066Sahrens /*
4862fa9e4066Sahrens  * ==========================================================================
4863fa9e4066Sahrens  * Miscellaneous routines
4864fa9e4066Sahrens  * ==========================================================================
4865fa9e4066Sahrens  */
4866fa9e4066Sahrens 
4867fa9e4066Sahrens /*
4868fa9e4066Sahrens  * Remove all pools in the system.
4869fa9e4066Sahrens  */
4870fa9e4066Sahrens void
4871fa9e4066Sahrens spa_evict_all(void)
4872fa9e4066Sahrens {
4873fa9e4066Sahrens 	spa_t *spa;
4874fa9e4066Sahrens 
4875fa9e4066Sahrens 	/*
4876fa9e4066Sahrens 	 * Remove all cached state.  All pools should be closed now,
4877fa9e4066Sahrens 	 * so every spa in the AVL tree should be unreferenced.
4878fa9e4066Sahrens 	 */
4879fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
4880fa9e4066Sahrens 	while ((spa = spa_next(NULL)) != NULL) {
4881fa9e4066Sahrens 		/*
4882ea8dc4b6Seschrock 		 * Stop async tasks.  The async thread may need to detach
4883ea8dc4b6Seschrock 		 * a device that's been replaced, which requires grabbing
4884ea8dc4b6Seschrock 		 * spa_namespace_lock, so we must drop it here.
4885fa9e4066Sahrens 		 */
4886fa9e4066Sahrens 		spa_open_ref(spa, FTAG);
4887fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
4888ea8dc4b6Seschrock 		spa_async_suspend(spa);
4889fa9e4066Sahrens 		mutex_enter(&spa_namespace_lock);
4890fa9e4066Sahrens 		spa_close(spa, FTAG);
4891fa9e4066Sahrens 
4892fa9e4066Sahrens 		if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
4893fa9e4066Sahrens 			spa_unload(spa);
4894fa9e4066Sahrens 			spa_deactivate(spa);
4895fa9e4066Sahrens 		}
4896fa9e4066Sahrens 		spa_remove(spa);
4897fa9e4066Sahrens 	}
4898fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
4899fa9e4066Sahrens }
4900ea8dc4b6Seschrock 
4901ea8dc4b6Seschrock vdev_t *
49026809eb4eSEric Schrock spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux)
4903ea8dc4b6Seschrock {
4904c5904d13Seschrock 	vdev_t *vd;
4905c5904d13Seschrock 	int i;
4906c5904d13Seschrock 
4907c5904d13Seschrock 	if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL)
4908c5904d13Seschrock 		return (vd);
4909c5904d13Seschrock 
49106809eb4eSEric Schrock 	if (aux) {
4911c5904d13Seschrock 		for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
4912c5904d13Seschrock 			vd = spa->spa_l2cache.sav_vdevs[i];
49136809eb4eSEric Schrock 			if (vd->vdev_guid == guid)
49146809eb4eSEric Schrock 				return (vd);
49156809eb4eSEric Schrock 		}
49166809eb4eSEric Schrock 
49176809eb4eSEric Schrock 		for (i = 0; i < spa->spa_spares.sav_count; i++) {
49186809eb4eSEric Schrock 			vd = spa->spa_spares.sav_vdevs[i];
4919c5904d13Seschrock 			if (vd->vdev_guid == guid)
4920c5904d13Seschrock 				return (vd);
4921c5904d13Seschrock 		}
4922c5904d13Seschrock 	}
4923c5904d13Seschrock 
4924c5904d13Seschrock 	return (NULL);
4925ea8dc4b6Seschrock }
4926eaca9bbdSeschrock 
4927eaca9bbdSeschrock void
4928990b4856Slling spa_upgrade(spa_t *spa, uint64_t version)
4929eaca9bbdSeschrock {
4930e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
4931eaca9bbdSeschrock 
4932eaca9bbdSeschrock 	/*
4933eaca9bbdSeschrock 	 * This should only be called for a non-faulted pool, and since a
4934eaca9bbdSeschrock 	 * future version would result in an unopenable pool, this shouldn't be
4935eaca9bbdSeschrock 	 * possible.
4936eaca9bbdSeschrock 	 */
4937e7437265Sahrens 	ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION);
4938990b4856Slling 	ASSERT(version >= spa->spa_uberblock.ub_version);
4939eaca9bbdSeschrock 
4940990b4856Slling 	spa->spa_uberblock.ub_version = version;
4941eaca9bbdSeschrock 	vdev_config_dirty(spa->spa_root_vdev);
4942eaca9bbdSeschrock 
4943e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
494499653d4eSeschrock 
494599653d4eSeschrock 	txg_wait_synced(spa_get_dsl(spa), 0);
494699653d4eSeschrock }
494799653d4eSeschrock 
494899653d4eSeschrock boolean_t
494999653d4eSeschrock spa_has_spare(spa_t *spa, uint64_t guid)
495099653d4eSeschrock {
495199653d4eSeschrock 	int i;
495239c23413Seschrock 	uint64_t spareguid;
4953fa94a07fSbrendan 	spa_aux_vdev_t *sav = &spa->spa_spares;
495499653d4eSeschrock 
4955fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++)
4956fa94a07fSbrendan 		if (sav->sav_vdevs[i]->vdev_guid == guid)
495799653d4eSeschrock 			return (B_TRUE);
495899653d4eSeschrock 
4959fa94a07fSbrendan 	for (i = 0; i < sav->sav_npending; i++) {
4960fa94a07fSbrendan 		if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID,
4961fa94a07fSbrendan 		    &spareguid) == 0 && spareguid == guid)
496239c23413Seschrock 			return (B_TRUE);
496339c23413Seschrock 	}
496439c23413Seschrock 
496599653d4eSeschrock 	return (B_FALSE);
4966eaca9bbdSeschrock }
4967b1b8ab34Slling 
496889a89ebfSlling /*
496989a89ebfSlling  * Check if a pool has an active shared spare device.
497089a89ebfSlling  * Note: reference count of an active spare is 2, as a spare and as a replace
497189a89ebfSlling  */
497289a89ebfSlling static boolean_t
497389a89ebfSlling spa_has_active_shared_spare(spa_t *spa)
497489a89ebfSlling {
497589a89ebfSlling 	int i, refcnt;
497689a89ebfSlling 	uint64_t pool;
497789a89ebfSlling 	spa_aux_vdev_t *sav = &spa->spa_spares;
497889a89ebfSlling 
497989a89ebfSlling 	for (i = 0; i < sav->sav_count; i++) {
498089a89ebfSlling 		if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool,
498189a89ebfSlling 		    &refcnt) && pool != 0ULL && pool == spa_guid(spa) &&
498289a89ebfSlling 		    refcnt > 2)
498389a89ebfSlling 			return (B_TRUE);
498489a89ebfSlling 	}
498589a89ebfSlling 
498689a89ebfSlling 	return (B_FALSE);
498789a89ebfSlling }
498889a89ebfSlling 
49893d7072f8Seschrock /*
49903d7072f8Seschrock  * Post a sysevent corresponding to the given event.  The 'name' must be one of
49913d7072f8Seschrock  * the event definitions in sys/sysevent/eventdefs.h.  The payload will be
49923d7072f8Seschrock  * filled in from the spa and (optionally) the vdev.  This doesn't do anything
49933d7072f8Seschrock  * in the userland libzpool, as we don't want consumers to misinterpret ztest
49943d7072f8Seschrock  * or zdb as real changes.
49953d7072f8Seschrock  */
49963d7072f8Seschrock void
49973d7072f8Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
49983d7072f8Seschrock {
49993d7072f8Seschrock #ifdef _KERNEL
50003d7072f8Seschrock 	sysevent_t		*ev;
50013d7072f8Seschrock 	sysevent_attr_list_t	*attr = NULL;
50023d7072f8Seschrock 	sysevent_value_t	value;
50033d7072f8Seschrock 	sysevent_id_t		eid;
50043d7072f8Seschrock 
50053d7072f8Seschrock 	ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs",
50063d7072f8Seschrock 	    SE_SLEEP);
50073d7072f8Seschrock 
50083d7072f8Seschrock 	value.value_type = SE_DATA_TYPE_STRING;
50093d7072f8Seschrock 	value.value.sv_string = spa_name(spa);
50103d7072f8Seschrock 	if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0)
50113d7072f8Seschrock 		goto done;
50123d7072f8Seschrock 
50133d7072f8Seschrock 	value.value_type = SE_DATA_TYPE_UINT64;
50143d7072f8Seschrock 	value.value.sv_uint64 = spa_guid(spa);
50153d7072f8Seschrock 	if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0)
50163d7072f8Seschrock 		goto done;
50173d7072f8Seschrock 
50183d7072f8Seschrock 	if (vd) {
50193d7072f8Seschrock 		value.value_type = SE_DATA_TYPE_UINT64;
50203d7072f8Seschrock 		value.value.sv_uint64 = vd->vdev_guid;
50213d7072f8Seschrock 		if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value,
50223d7072f8Seschrock 		    SE_SLEEP) != 0)
50233d7072f8Seschrock 			goto done;
50243d7072f8Seschrock 
50253d7072f8Seschrock 		if (vd->vdev_path) {
50263d7072f8Seschrock 			value.value_type = SE_DATA_TYPE_STRING;
50273d7072f8Seschrock 			value.value.sv_string = vd->vdev_path;
50283d7072f8Seschrock 			if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH,
50293d7072f8Seschrock 			    &value, SE_SLEEP) != 0)
50303d7072f8Seschrock 				goto done;
50313d7072f8Seschrock 		}
50323d7072f8Seschrock 	}
50333d7072f8Seschrock 
5034b01c3b58Seschrock 	if (sysevent_attach_attributes(ev, attr) != 0)
5035b01c3b58Seschrock 		goto done;
5036b01c3b58Seschrock 	attr = NULL;
5037b01c3b58Seschrock 
50383d7072f8Seschrock 	(void) log_sysevent(ev, SE_SLEEP, &eid);
50393d7072f8Seschrock 
50403d7072f8Seschrock done:
50413d7072f8Seschrock 	if (attr)
50423d7072f8Seschrock 		sysevent_free_attr(attr);
50433d7072f8Seschrock 	sysevent_free(ev);
50443d7072f8Seschrock #endif
50453d7072f8Seschrock }
5046