xref: /illumos-gate/usr/src/uts/common/fs/zfs/spa.c (revision 35a5a3587fd94b666239c157d3722745250ccbd7)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
2199653d4eSeschrock 
22fa9e4066Sahrens /*
23379c004dSEric Schrock  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24fa9e4066Sahrens  * Use is subject to license terms.
25fa9e4066Sahrens  */
26fa9e4066Sahrens 
27fa9e4066Sahrens /*
28fa9e4066Sahrens  * This file contains all the routines used when modifying on-disk SPA state.
29fa9e4066Sahrens  * This includes opening, importing, destroying, exporting a pool, and syncing a
30fa9e4066Sahrens  * pool.
31fa9e4066Sahrens  */
32fa9e4066Sahrens 
33fa9e4066Sahrens #include <sys/zfs_context.h>
34ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h>
35fa9e4066Sahrens #include <sys/spa_impl.h>
36fa9e4066Sahrens #include <sys/zio.h>
37fa9e4066Sahrens #include <sys/zio_checksum.h>
38fa9e4066Sahrens #include <sys/dmu.h>
39fa9e4066Sahrens #include <sys/dmu_tx.h>
40fa9e4066Sahrens #include <sys/zap.h>
41fa9e4066Sahrens #include <sys/zil.h>
42b24ab676SJeff Bonwick #include <sys/ddt.h>
43fa9e4066Sahrens #include <sys/vdev_impl.h>
44fa9e4066Sahrens #include <sys/metaslab.h>
4588ecc943SGeorge Wilson #include <sys/metaslab_impl.h>
46fa9e4066Sahrens #include <sys/uberblock_impl.h>
47fa9e4066Sahrens #include <sys/txg.h>
48fa9e4066Sahrens #include <sys/avl.h>
49fa9e4066Sahrens #include <sys/dmu_traverse.h>
50b1b8ab34Slling #include <sys/dmu_objset.h>
51fa9e4066Sahrens #include <sys/unique.h>
52fa9e4066Sahrens #include <sys/dsl_pool.h>
53b1b8ab34Slling #include <sys/dsl_dataset.h>
54fa9e4066Sahrens #include <sys/dsl_dir.h>
55fa9e4066Sahrens #include <sys/dsl_prop.h>
56b1b8ab34Slling #include <sys/dsl_synctask.h>
57fa9e4066Sahrens #include <sys/fs/zfs.h>
58fa94a07fSbrendan #include <sys/arc.h>
59fa9e4066Sahrens #include <sys/callb.h>
6095173954Sek #include <sys/systeminfo.h>
61e7cbe64fSgw #include <sys/spa_boot.h>
62573ca77eSGeorge Wilson #include <sys/zfs_ioctl.h>
63fa9e4066Sahrens 
645679c89fSjv #ifdef	_KERNEL
65dedec472SJack Meng #include <sys/bootprops.h>
66*35a5a358SJonathan Adams #include <sys/callb.h>
67*35a5a358SJonathan Adams #include <sys/cpupart.h>
68*35a5a358SJonathan Adams #include <sys/pool.h>
69*35a5a358SJonathan Adams #include <sys/sysdc.h>
70*35a5a358SJonathan Adams #include <sys/zone.h>
715679c89fSjv #endif	/* _KERNEL */
725679c89fSjv 
73990b4856Slling #include "zfs_prop.h"
74b7b97454Sperrin #include "zfs_comutil.h"
75990b4856Slling 
76*35a5a358SJonathan Adams typedef enum zti_modes {
772e0c549eSJonathan Adams 	zti_mode_fixed,			/* value is # of threads (min 1) */
782e0c549eSJonathan Adams 	zti_mode_online_percent,	/* value is % of online CPUs */
79*35a5a358SJonathan Adams 	zti_mode_batch,			/* cpu-intensive; value is ignored */
8080eb36f2SGeorge Wilson 	zti_mode_null,			/* don't create a taskq */
812e0c549eSJonathan Adams 	zti_nmodes
82*35a5a358SJonathan Adams } zti_modes_t;
83416e0cd8Sek 
8480eb36f2SGeorge Wilson #define	ZTI_FIX(n)	{ zti_mode_fixed, (n) }
8580eb36f2SGeorge Wilson #define	ZTI_PCT(n)	{ zti_mode_online_percent, (n) }
86*35a5a358SJonathan Adams #define	ZTI_BATCH	{ zti_mode_batch, 0 }
8780eb36f2SGeorge Wilson #define	ZTI_NULL	{ zti_mode_null, 0 }
882e0c549eSJonathan Adams 
8980eb36f2SGeorge Wilson #define	ZTI_ONE		ZTI_FIX(1)
902e0c549eSJonathan Adams 
912e0c549eSJonathan Adams typedef struct zio_taskq_info {
9280eb36f2SGeorge Wilson 	enum zti_modes zti_mode;
9380eb36f2SGeorge Wilson 	uint_t zti_value;
942e0c549eSJonathan Adams } zio_taskq_info_t;
952e0c549eSJonathan Adams 
962e0c549eSJonathan Adams static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
97*35a5a358SJonathan Adams 	"issue", "issue_high", "intr", "intr_high"
982e0c549eSJonathan Adams };
992e0c549eSJonathan Adams 
10080eb36f2SGeorge Wilson /*
10180eb36f2SGeorge Wilson  * Define the taskq threads for the following I/O types:
10280eb36f2SGeorge Wilson  * 	NULL, READ, WRITE, FREE, CLAIM, and IOCTL
10380eb36f2SGeorge Wilson  */
10480eb36f2SGeorge Wilson const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
10580eb36f2SGeorge Wilson 	/* ISSUE	ISSUE_HIGH	INTR		INTR_HIGH */
10680eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
107*35a5a358SJonathan Adams 	{ ZTI_FIX(8),	ZTI_NULL,	ZTI_BATCH,	ZTI_NULL },
108*35a5a358SJonathan Adams 	{ ZTI_BATCH,	ZTI_FIX(5),	ZTI_FIX(8),	ZTI_FIX(5) },
10980eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
11080eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
11180eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
1122e0c549eSJonathan Adams };
1132e0c549eSJonathan Adams 
114990b4856Slling static void spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx);
11589a89ebfSlling static boolean_t spa_has_active_shared_spare(spa_t *spa);
116990b4856Slling 
117*35a5a358SJonathan Adams uint_t		zio_taskq_batch_pct = 100;	/* 1 thread per cpu in pset */
118*35a5a358SJonathan Adams id_t		zio_taskq_psrset_bind = PS_NONE;
119*35a5a358SJonathan Adams boolean_t	zio_taskq_sysdc = B_TRUE;	/* use SDC scheduling class */
120*35a5a358SJonathan Adams uint_t		zio_taskq_basedc = 80;		/* base duty cycle */
121*35a5a358SJonathan Adams 
122*35a5a358SJonathan Adams boolean_t	spa_create_process = B_TRUE;	/* no process ==> no sysdc */
123*35a5a358SJonathan Adams 
124*35a5a358SJonathan Adams /*
125*35a5a358SJonathan Adams  * This (illegal) pool name is used when temporarily importing a spa_t in order
126*35a5a358SJonathan Adams  * to get the vdev stats associated with the imported devices.
127*35a5a358SJonathan Adams  */
128*35a5a358SJonathan Adams #define	TRYIMPORT_NAME	"$import"
129*35a5a358SJonathan Adams 
130990b4856Slling /*
131990b4856Slling  * ==========================================================================
132990b4856Slling  * SPA properties routines
133990b4856Slling  * ==========================================================================
134990b4856Slling  */
135990b4856Slling 
136990b4856Slling /*
137990b4856Slling  * Add a (source=src, propname=propval) list to an nvlist.
138990b4856Slling  */
1399d82f4f6Slling static void
140990b4856Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
141990b4856Slling     uint64_t intval, zprop_source_t src)
142990b4856Slling {
143990b4856Slling 	const char *propname = zpool_prop_to_name(prop);
144990b4856Slling 	nvlist_t *propval;
145990b4856Slling 
1469d82f4f6Slling 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1479d82f4f6Slling 	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
148990b4856Slling 
1499d82f4f6Slling 	if (strval != NULL)
1509d82f4f6Slling 		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
1519d82f4f6Slling 	else
1529d82f4f6Slling 		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
153990b4856Slling 
1549d82f4f6Slling 	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
155990b4856Slling 	nvlist_free(propval);
156990b4856Slling }
157990b4856Slling 
158990b4856Slling /*
159990b4856Slling  * Get property values from the spa configuration.
160990b4856Slling  */
1619d82f4f6Slling static void
162990b4856Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
163990b4856Slling {
164379c004dSEric Schrock 	uint64_t size;
165485bbbf5SGeorge Wilson 	uint64_t alloc;
166990b4856Slling 	uint64_t cap, version;
167990b4856Slling 	zprop_source_t src = ZPROP_SRC_NONE;
168c5904d13Seschrock 	spa_config_dirent_t *dp;
169990b4856Slling 
170e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa->spa_props_lock));
171e14bb325SJeff Bonwick 
172379c004dSEric Schrock 	if (spa->spa_root_vdev != NULL) {
173485bbbf5SGeorge Wilson 		alloc = metaslab_class_get_alloc(spa_normal_class(spa));
174b24ab676SJeff Bonwick 		size = metaslab_class_get_space(spa_normal_class(spa));
175379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
176379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
177485bbbf5SGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
178485bbbf5SGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
179485bbbf5SGeorge Wilson 		    size - alloc, src);
180379c004dSEric Schrock 
181485bbbf5SGeorge Wilson 		cap = (size == 0) ? 0 : (alloc * 100 / size);
182379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
183379c004dSEric Schrock 
184b24ab676SJeff Bonwick 		spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
185b24ab676SJeff Bonwick 		    ddt_get_pool_dedup_ratio(spa), src);
186b24ab676SJeff Bonwick 
187379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
188379c004dSEric Schrock 		    spa->spa_root_vdev->vdev_state, src);
189379c004dSEric Schrock 
190379c004dSEric Schrock 		version = spa_version(spa);
191379c004dSEric Schrock 		if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
192379c004dSEric Schrock 			src = ZPROP_SRC_DEFAULT;
193379c004dSEric Schrock 		else
194379c004dSEric Schrock 			src = ZPROP_SRC_LOCAL;
195379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
196379c004dSEric Schrock 	}
197990b4856Slling 
1989d82f4f6Slling 	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
199990b4856Slling 
2009d82f4f6Slling 	if (spa->spa_root != NULL)
2019d82f4f6Slling 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
2029d82f4f6Slling 		    0, ZPROP_SRC_LOCAL);
203990b4856Slling 
204c5904d13Seschrock 	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
205c5904d13Seschrock 		if (dp->scd_path == NULL) {
2069d82f4f6Slling 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
207c5904d13Seschrock 			    "none", 0, ZPROP_SRC_LOCAL);
208c5904d13Seschrock 		} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
2099d82f4f6Slling 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
210c5904d13Seschrock 			    dp->scd_path, 0, ZPROP_SRC_LOCAL);
2112f8aaab3Seschrock 		}
2122f8aaab3Seschrock 	}
213990b4856Slling }
214990b4856Slling 
215990b4856Slling /*
216990b4856Slling  * Get zpool property values.
217990b4856Slling  */
218990b4856Slling int
219990b4856Slling spa_prop_get(spa_t *spa, nvlist_t **nvp)
220990b4856Slling {
221b24ab676SJeff Bonwick 	objset_t *mos = spa->spa_meta_objset;
222990b4856Slling 	zap_cursor_t zc;
223990b4856Slling 	zap_attribute_t za;
224990b4856Slling 	int err;
225990b4856Slling 
2269d82f4f6Slling 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
227990b4856Slling 
228e14bb325SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);
229e14bb325SJeff Bonwick 
230990b4856Slling 	/*
231990b4856Slling 	 * Get properties from the spa config.
232990b4856Slling 	 */
2339d82f4f6Slling 	spa_prop_get_config(spa, nvp);
234990b4856Slling 
235990b4856Slling 	/* If no pool property object, no more prop to get. */
236990b4856Slling 	if (spa->spa_pool_props_object == 0) {
237990b4856Slling 		mutex_exit(&spa->spa_props_lock);
238990b4856Slling 		return (0);
239990b4856Slling 	}
240990b4856Slling 
241990b4856Slling 	/*
242990b4856Slling 	 * Get properties from the MOS pool property object.
243990b4856Slling 	 */
244990b4856Slling 	for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
245990b4856Slling 	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
246990b4856Slling 	    zap_cursor_advance(&zc)) {
247990b4856Slling 		uint64_t intval = 0;
248990b4856Slling 		char *strval = NULL;
249990b4856Slling 		zprop_source_t src = ZPROP_SRC_DEFAULT;
250990b4856Slling 		zpool_prop_t prop;
251990b4856Slling 
252990b4856Slling 		if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL)
253990b4856Slling 			continue;
254990b4856Slling 
255990b4856Slling 		switch (za.za_integer_length) {
256990b4856Slling 		case 8:
257990b4856Slling 			/* integer property */
258990b4856Slling 			if (za.za_first_integer !=
259990b4856Slling 			    zpool_prop_default_numeric(prop))
260990b4856Slling 				src = ZPROP_SRC_LOCAL;
261990b4856Slling 
262990b4856Slling 			if (prop == ZPOOL_PROP_BOOTFS) {
263990b4856Slling 				dsl_pool_t *dp;
264990b4856Slling 				dsl_dataset_t *ds = NULL;
265990b4856Slling 
266990b4856Slling 				dp = spa_get_dsl(spa);
267990b4856Slling 				rw_enter(&dp->dp_config_rwlock, RW_READER);
268745cd3c5Smaybee 				if (err = dsl_dataset_hold_obj(dp,
269745cd3c5Smaybee 				    za.za_first_integer, FTAG, &ds)) {
270990b4856Slling 					rw_exit(&dp->dp_config_rwlock);
271990b4856Slling 					break;
272990b4856Slling 				}
273990b4856Slling 
274990b4856Slling 				strval = kmem_alloc(
275990b4856Slling 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1,
276990b4856Slling 				    KM_SLEEP);
277990b4856Slling 				dsl_dataset_name(ds, strval);
278745cd3c5Smaybee 				dsl_dataset_rele(ds, FTAG);
279990b4856Slling 				rw_exit(&dp->dp_config_rwlock);
280990b4856Slling 			} else {
281990b4856Slling 				strval = NULL;
282990b4856Slling 				intval = za.za_first_integer;
283990b4856Slling 			}
284990b4856Slling 
2859d82f4f6Slling 			spa_prop_add_list(*nvp, prop, strval, intval, src);
286990b4856Slling 
287990b4856Slling 			if (strval != NULL)
288990b4856Slling 				kmem_free(strval,
289990b4856Slling 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
290990b4856Slling 
291990b4856Slling 			break;
292990b4856Slling 
293990b4856Slling 		case 1:
294990b4856Slling 			/* string property */
295990b4856Slling 			strval = kmem_alloc(za.za_num_integers, KM_SLEEP);
296990b4856Slling 			err = zap_lookup(mos, spa->spa_pool_props_object,
297990b4856Slling 			    za.za_name, 1, za.za_num_integers, strval);
298990b4856Slling 			if (err) {
299990b4856Slling 				kmem_free(strval, za.za_num_integers);
300990b4856Slling 				break;
301990b4856Slling 			}
3029d82f4f6Slling 			spa_prop_add_list(*nvp, prop, strval, 0, src);
303990b4856Slling 			kmem_free(strval, za.za_num_integers);
304990b4856Slling 			break;
305990b4856Slling 
306990b4856Slling 		default:
307990b4856Slling 			break;
308990b4856Slling 		}
309990b4856Slling 	}
310990b4856Slling 	zap_cursor_fini(&zc);
311990b4856Slling 	mutex_exit(&spa->spa_props_lock);
312990b4856Slling out:
313990b4856Slling 	if (err && err != ENOENT) {
314990b4856Slling 		nvlist_free(*nvp);
3159d82f4f6Slling 		*nvp = NULL;
316990b4856Slling 		return (err);
317990b4856Slling 	}
318990b4856Slling 
319990b4856Slling 	return (0);
320990b4856Slling }
321990b4856Slling 
322990b4856Slling /*
323990b4856Slling  * Validate the given pool properties nvlist and modify the list
324990b4856Slling  * for the property values to be set.
325990b4856Slling  */
326990b4856Slling static int
327990b4856Slling spa_prop_validate(spa_t *spa, nvlist_t *props)
328990b4856Slling {
329990b4856Slling 	nvpair_t *elem;
330990b4856Slling 	int error = 0, reset_bootfs = 0;
331990b4856Slling 	uint64_t objnum;
332990b4856Slling 
333990b4856Slling 	elem = NULL;
334990b4856Slling 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
335990b4856Slling 		zpool_prop_t prop;
336990b4856Slling 		char *propname, *strval;
337990b4856Slling 		uint64_t intval;
338990b4856Slling 		objset_t *os;
3392f8aaab3Seschrock 		char *slash;
340990b4856Slling 
341990b4856Slling 		propname = nvpair_name(elem);
342990b4856Slling 
343990b4856Slling 		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
344990b4856Slling 			return (EINVAL);
345990b4856Slling 
346990b4856Slling 		switch (prop) {
347990b4856Slling 		case ZPOOL_PROP_VERSION:
348990b4856Slling 			error = nvpair_value_uint64(elem, &intval);
349990b4856Slling 			if (!error &&
350990b4856Slling 			    (intval < spa_version(spa) || intval > SPA_VERSION))
351990b4856Slling 				error = EINVAL;
352990b4856Slling 			break;
353990b4856Slling 
354990b4856Slling 		case ZPOOL_PROP_DELEGATION:
355990b4856Slling 		case ZPOOL_PROP_AUTOREPLACE:
356d5b5bb25SRich Morris 		case ZPOOL_PROP_LISTSNAPS:
357573ca77eSGeorge Wilson 		case ZPOOL_PROP_AUTOEXPAND:
358990b4856Slling 			error = nvpair_value_uint64(elem, &intval);
359990b4856Slling 			if (!error && intval > 1)
360990b4856Slling 				error = EINVAL;
361990b4856Slling 			break;
362990b4856Slling 
363990b4856Slling 		case ZPOOL_PROP_BOOTFS:
36425f89ee2SJeff Bonwick 			/*
36525f89ee2SJeff Bonwick 			 * If the pool version is less than SPA_VERSION_BOOTFS,
36625f89ee2SJeff Bonwick 			 * or the pool is still being created (version == 0),
36725f89ee2SJeff Bonwick 			 * the bootfs property cannot be set.
36825f89ee2SJeff Bonwick 			 */
369990b4856Slling 			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
370990b4856Slling 				error = ENOTSUP;
371990b4856Slling 				break;
372990b4856Slling 			}
373990b4856Slling 
374990b4856Slling 			/*
37515e6edf1Sgw 			 * Make sure the vdev config is bootable
376990b4856Slling 			 */
37715e6edf1Sgw 			if (!vdev_is_bootable(spa->spa_root_vdev)) {
378990b4856Slling 				error = ENOTSUP;
379990b4856Slling 				break;
380990b4856Slling 			}
381990b4856Slling 
382990b4856Slling 			reset_bootfs = 1;
383990b4856Slling 
384990b4856Slling 			error = nvpair_value_string(elem, &strval);
385990b4856Slling 
386990b4856Slling 			if (!error) {
38715e6edf1Sgw 				uint64_t compress;
38815e6edf1Sgw 
389990b4856Slling 				if (strval == NULL || strval[0] == '\0') {
390990b4856Slling 					objnum = zpool_prop_default_numeric(
391990b4856Slling 					    ZPOOL_PROP_BOOTFS);
392990b4856Slling 					break;
393990b4856Slling 				}
394990b4856Slling 
395503ad85cSMatthew Ahrens 				if (error = dmu_objset_hold(strval, FTAG, &os))
396990b4856Slling 					break;
39715e6edf1Sgw 
398503ad85cSMatthew Ahrens 				/* Must be ZPL and not gzip compressed. */
399503ad85cSMatthew Ahrens 
400503ad85cSMatthew Ahrens 				if (dmu_objset_type(os) != DMU_OST_ZFS) {
401503ad85cSMatthew Ahrens 					error = ENOTSUP;
402503ad85cSMatthew Ahrens 				} else if ((error = dsl_prop_get_integer(strval,
40315e6edf1Sgw 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
40415e6edf1Sgw 				    &compress, NULL)) == 0 &&
40515e6edf1Sgw 				    !BOOTFS_COMPRESS_VALID(compress)) {
40615e6edf1Sgw 					error = ENOTSUP;
40715e6edf1Sgw 				} else {
40815e6edf1Sgw 					objnum = dmu_objset_id(os);
40915e6edf1Sgw 				}
410503ad85cSMatthew Ahrens 				dmu_objset_rele(os, FTAG);
411990b4856Slling 			}
412990b4856Slling 			break;
413e14bb325SJeff Bonwick 
4140a4e9518Sgw 		case ZPOOL_PROP_FAILUREMODE:
4150a4e9518Sgw 			error = nvpair_value_uint64(elem, &intval);
4160a4e9518Sgw 			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
4170a4e9518Sgw 			    intval > ZIO_FAILURE_MODE_PANIC))
4180a4e9518Sgw 				error = EINVAL;
4190a4e9518Sgw 
4200a4e9518Sgw 			/*
4210a4e9518Sgw 			 * This is a special case which only occurs when
4220a4e9518Sgw 			 * the pool has completely failed. This allows
4230a4e9518Sgw 			 * the user to change the in-core failmode property
4240a4e9518Sgw 			 * without syncing it out to disk (I/Os might
4250a4e9518Sgw 			 * currently be blocked). We do this by returning
4260a4e9518Sgw 			 * EIO to the caller (spa_prop_set) to trick it
4270a4e9518Sgw 			 * into thinking we encountered a property validation
4280a4e9518Sgw 			 * error.
4290a4e9518Sgw 			 */
430e14bb325SJeff Bonwick 			if (!error && spa_suspended(spa)) {
4310a4e9518Sgw 				spa->spa_failmode = intval;
4320a4e9518Sgw 				error = EIO;
4330a4e9518Sgw 			}
4340a4e9518Sgw 			break;
4352f8aaab3Seschrock 
4362f8aaab3Seschrock 		case ZPOOL_PROP_CACHEFILE:
4372f8aaab3Seschrock 			if ((error = nvpair_value_string(elem, &strval)) != 0)
4382f8aaab3Seschrock 				break;
4392f8aaab3Seschrock 
4402f8aaab3Seschrock 			if (strval[0] == '\0')
4412f8aaab3Seschrock 				break;
4422f8aaab3Seschrock 
4432f8aaab3Seschrock 			if (strcmp(strval, "none") == 0)
4442f8aaab3Seschrock 				break;
4452f8aaab3Seschrock 
4462f8aaab3Seschrock 			if (strval[0] != '/') {
4472f8aaab3Seschrock 				error = EINVAL;
4482f8aaab3Seschrock 				break;
4492f8aaab3Seschrock 			}
4502f8aaab3Seschrock 
4512f8aaab3Seschrock 			slash = strrchr(strval, '/');
4522f8aaab3Seschrock 			ASSERT(slash != NULL);
4532f8aaab3Seschrock 
4542f8aaab3Seschrock 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
4552f8aaab3Seschrock 			    strcmp(slash, "/..") == 0)
4562f8aaab3Seschrock 				error = EINVAL;
4572f8aaab3Seschrock 			break;
458b24ab676SJeff Bonwick 
459b24ab676SJeff Bonwick 		case ZPOOL_PROP_DEDUPDITTO:
460b24ab676SJeff Bonwick 			if (spa_version(spa) < SPA_VERSION_DEDUP)
461b24ab676SJeff Bonwick 				error = ENOTSUP;
462b24ab676SJeff Bonwick 			else
463b24ab676SJeff Bonwick 				error = nvpair_value_uint64(elem, &intval);
464b24ab676SJeff Bonwick 			if (error == 0 &&
465b24ab676SJeff Bonwick 			    intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
466b24ab676SJeff Bonwick 				error = EINVAL;
467b24ab676SJeff Bonwick 			break;
468990b4856Slling 		}
469990b4856Slling 
470990b4856Slling 		if (error)
471990b4856Slling 			break;
472990b4856Slling 	}
473990b4856Slling 
474990b4856Slling 	if (!error && reset_bootfs) {
475990b4856Slling 		error = nvlist_remove(props,
476990b4856Slling 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
477990b4856Slling 
478990b4856Slling 		if (!error) {
479990b4856Slling 			error = nvlist_add_uint64(props,
480990b4856Slling 			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum);
481990b4856Slling 		}
482990b4856Slling 	}
483990b4856Slling 
484990b4856Slling 	return (error);
485990b4856Slling }
486990b4856Slling 
487379c004dSEric Schrock void
488379c004dSEric Schrock spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync)
489379c004dSEric Schrock {
490379c004dSEric Schrock 	char *cachefile;
491379c004dSEric Schrock 	spa_config_dirent_t *dp;
492379c004dSEric Schrock 
493379c004dSEric Schrock 	if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
494379c004dSEric Schrock 	    &cachefile) != 0)
495379c004dSEric Schrock 		return;
496379c004dSEric Schrock 
497379c004dSEric Schrock 	dp = kmem_alloc(sizeof (spa_config_dirent_t),
498379c004dSEric Schrock 	    KM_SLEEP);
499379c004dSEric Schrock 
500379c004dSEric Schrock 	if (cachefile[0] == '\0')
501379c004dSEric Schrock 		dp->scd_path = spa_strdup(spa_config_path);
502379c004dSEric Schrock 	else if (strcmp(cachefile, "none") == 0)
503379c004dSEric Schrock 		dp->scd_path = NULL;
504379c004dSEric Schrock 	else
505379c004dSEric Schrock 		dp->scd_path = spa_strdup(cachefile);
506379c004dSEric Schrock 
507379c004dSEric Schrock 	list_insert_head(&spa->spa_config_list, dp);
508379c004dSEric Schrock 	if (need_sync)
509379c004dSEric Schrock 		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
510379c004dSEric Schrock }
511379c004dSEric Schrock 
512990b4856Slling int
513990b4856Slling spa_prop_set(spa_t *spa, nvlist_t *nvp)
514990b4856Slling {
515990b4856Slling 	int error;
516379c004dSEric Schrock 	nvpair_t *elem;
517379c004dSEric Schrock 	boolean_t need_sync = B_FALSE;
518379c004dSEric Schrock 	zpool_prop_t prop;
519990b4856Slling 
520990b4856Slling 	if ((error = spa_prop_validate(spa, nvp)) != 0)
521990b4856Slling 		return (error);
522990b4856Slling 
523379c004dSEric Schrock 	elem = NULL;
524379c004dSEric Schrock 	while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
525379c004dSEric Schrock 		if ((prop = zpool_name_to_prop(
526379c004dSEric Schrock 		    nvpair_name(elem))) == ZPROP_INVAL)
527379c004dSEric Schrock 			return (EINVAL);
528379c004dSEric Schrock 
529379c004dSEric Schrock 		if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT)
530379c004dSEric Schrock 			continue;
531379c004dSEric Schrock 
532379c004dSEric Schrock 		need_sync = B_TRUE;
533379c004dSEric Schrock 		break;
534379c004dSEric Schrock 	}
535379c004dSEric Schrock 
536379c004dSEric Schrock 	if (need_sync)
537379c004dSEric Schrock 		return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
538379c004dSEric Schrock 		    spa, nvp, 3));
539379c004dSEric Schrock 	else
540379c004dSEric Schrock 		return (0);
541990b4856Slling }
542990b4856Slling 
543990b4856Slling /*
544990b4856Slling  * If the bootfs property value is dsobj, clear it.
545990b4856Slling  */
546990b4856Slling void
547990b4856Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
548990b4856Slling {
549990b4856Slling 	if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) {
550990b4856Slling 		VERIFY(zap_remove(spa->spa_meta_objset,
551990b4856Slling 		    spa->spa_pool_props_object,
552990b4856Slling 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0);
553990b4856Slling 		spa->spa_bootfs = 0;
554990b4856Slling 	}
555990b4856Slling }
556990b4856Slling 
557fa9e4066Sahrens /*
558fa9e4066Sahrens  * ==========================================================================
559fa9e4066Sahrens  * SPA state manipulation (open/create/destroy/import/export)
560fa9e4066Sahrens  * ==========================================================================
561fa9e4066Sahrens  */
562fa9e4066Sahrens 
563ea8dc4b6Seschrock static int
564ea8dc4b6Seschrock spa_error_entry_compare(const void *a, const void *b)
565ea8dc4b6Seschrock {
566ea8dc4b6Seschrock 	spa_error_entry_t *sa = (spa_error_entry_t *)a;
567ea8dc4b6Seschrock 	spa_error_entry_t *sb = (spa_error_entry_t *)b;
568ea8dc4b6Seschrock 	int ret;
569ea8dc4b6Seschrock 
570ea8dc4b6Seschrock 	ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
571ea8dc4b6Seschrock 	    sizeof (zbookmark_t));
572ea8dc4b6Seschrock 
573ea8dc4b6Seschrock 	if (ret < 0)
574ea8dc4b6Seschrock 		return (-1);
575ea8dc4b6Seschrock 	else if (ret > 0)
576ea8dc4b6Seschrock 		return (1);
577ea8dc4b6Seschrock 	else
578ea8dc4b6Seschrock 		return (0);
579ea8dc4b6Seschrock }
580ea8dc4b6Seschrock 
581ea8dc4b6Seschrock /*
582ea8dc4b6Seschrock  * Utility function which retrieves copies of the current logs and
583ea8dc4b6Seschrock  * re-initializes them in the process.
584ea8dc4b6Seschrock  */
585ea8dc4b6Seschrock void
586ea8dc4b6Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
587ea8dc4b6Seschrock {
588ea8dc4b6Seschrock 	ASSERT(MUTEX_HELD(&spa->spa_errlist_lock));
589ea8dc4b6Seschrock 
590ea8dc4b6Seschrock 	bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t));
591ea8dc4b6Seschrock 	bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t));
592ea8dc4b6Seschrock 
593ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_scrub,
594ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
595ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
596ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_last,
597ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
598ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
599ea8dc4b6Seschrock }
600ea8dc4b6Seschrock 
601*35a5a358SJonathan Adams static taskq_t *
602*35a5a358SJonathan Adams spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
603*35a5a358SJonathan Adams     uint_t value)
604fa9e4066Sahrens {
605*35a5a358SJonathan Adams 	uint_t flags = TASKQ_PREPOPULATE;
606*35a5a358SJonathan Adams 	boolean_t batch = B_FALSE;
607fa9e4066Sahrens 
608*35a5a358SJonathan Adams 	switch (mode) {
609*35a5a358SJonathan Adams 	case zti_mode_null:
610*35a5a358SJonathan Adams 		return (NULL);		/* no taskq needed */
611fa9e4066Sahrens 
612*35a5a358SJonathan Adams 	case zti_mode_fixed:
613*35a5a358SJonathan Adams 		ASSERT3U(value, >=, 1);
614*35a5a358SJonathan Adams 		value = MAX(value, 1);
615*35a5a358SJonathan Adams 		break;
616fa9e4066Sahrens 
617*35a5a358SJonathan Adams 	case zti_mode_batch:
618*35a5a358SJonathan Adams 		batch = B_TRUE;
619*35a5a358SJonathan Adams 		flags |= TASKQ_THREADS_CPU_PCT;
620*35a5a358SJonathan Adams 		value = zio_taskq_batch_pct;
621*35a5a358SJonathan Adams 		break;
622*35a5a358SJonathan Adams 
623*35a5a358SJonathan Adams 	case zti_mode_online_percent:
624*35a5a358SJonathan Adams 		flags |= TASKQ_THREADS_CPU_PCT;
625*35a5a358SJonathan Adams 		break;
626*35a5a358SJonathan Adams 
627*35a5a358SJonathan Adams 	default:
628*35a5a358SJonathan Adams 		panic("unrecognized mode for %s taskq (%u:%u) in "
629*35a5a358SJonathan Adams 		    "spa_activate()",
630*35a5a358SJonathan Adams 		    name, mode, value);
631*35a5a358SJonathan Adams 		break;
632*35a5a358SJonathan Adams 	}
633*35a5a358SJonathan Adams 
634*35a5a358SJonathan Adams 	if (zio_taskq_sysdc && spa->spa_proc != &p0) {
635*35a5a358SJonathan Adams 		if (batch)
636*35a5a358SJonathan Adams 			flags |= TASKQ_DC_BATCH;
637*35a5a358SJonathan Adams 
638*35a5a358SJonathan Adams 		return (taskq_create_sysdc(name, value, 50, INT_MAX,
639*35a5a358SJonathan Adams 		    spa->spa_proc, zio_taskq_basedc, flags));
640*35a5a358SJonathan Adams 	}
641*35a5a358SJonathan Adams 	return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX,
642*35a5a358SJonathan Adams 	    spa->spa_proc, flags));
643*35a5a358SJonathan Adams }
644*35a5a358SJonathan Adams 
645*35a5a358SJonathan Adams static void
646*35a5a358SJonathan Adams spa_create_zio_taskqs(spa_t *spa)
647*35a5a358SJonathan Adams {
648e14bb325SJeff Bonwick 	for (int t = 0; t < ZIO_TYPES; t++) {
649e14bb325SJeff Bonwick 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
65080eb36f2SGeorge Wilson 			const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
65180eb36f2SGeorge Wilson 			enum zti_modes mode = ztip->zti_mode;
65280eb36f2SGeorge Wilson 			uint_t value = ztip->zti_value;
6532e0c549eSJonathan Adams 			char name[32];
6542e0c549eSJonathan Adams 
6552e0c549eSJonathan Adams 			(void) snprintf(name, sizeof (name),
65680eb36f2SGeorge Wilson 			    "%s_%s", zio_type_name[t], zio_taskq_types[q]);
6572e0c549eSJonathan Adams 
658*35a5a358SJonathan Adams 			spa->spa_zio_taskq[t][q] =
659*35a5a358SJonathan Adams 			    spa_taskq_create(spa, name, mode, value);
660*35a5a358SJonathan Adams 		}
661*35a5a358SJonathan Adams 	}
662*35a5a358SJonathan Adams }
663*35a5a358SJonathan Adams 
664*35a5a358SJonathan Adams #ifdef _KERNEL
665*35a5a358SJonathan Adams static void
666*35a5a358SJonathan Adams spa_thread(void *arg)
667*35a5a358SJonathan Adams {
668*35a5a358SJonathan Adams 	callb_cpr_t cprinfo;
6692e0c549eSJonathan Adams 
670*35a5a358SJonathan Adams 	spa_t *spa = arg;
671*35a5a358SJonathan Adams 	user_t *pu = PTOU(curproc);
6722e0c549eSJonathan Adams 
673*35a5a358SJonathan Adams 	CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr,
674*35a5a358SJonathan Adams 	    spa->spa_name);
6752e0c549eSJonathan Adams 
676*35a5a358SJonathan Adams 	ASSERT(curproc != &p0);
677*35a5a358SJonathan Adams 	(void) snprintf(pu->u_psargs, sizeof (pu->u_psargs),
678*35a5a358SJonathan Adams 	    "zpool-%s", spa->spa_name);
679*35a5a358SJonathan Adams 	(void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm));
6802e0c549eSJonathan Adams 
681*35a5a358SJonathan Adams 	/* bind this thread to the requested psrset */
682*35a5a358SJonathan Adams 	if (zio_taskq_psrset_bind != PS_NONE) {
683*35a5a358SJonathan Adams 		pool_lock();
684*35a5a358SJonathan Adams 		mutex_enter(&cpu_lock);
685*35a5a358SJonathan Adams 		mutex_enter(&pidlock);
686*35a5a358SJonathan Adams 		mutex_enter(&curproc->p_lock);
68780eb36f2SGeorge Wilson 
688*35a5a358SJonathan Adams 		if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind,
689*35a5a358SJonathan Adams 		    0, NULL, NULL) == 0)  {
690*35a5a358SJonathan Adams 			curthread->t_bind_pset = zio_taskq_psrset_bind;
691*35a5a358SJonathan Adams 		} else {
692*35a5a358SJonathan Adams 			cmn_err(CE_WARN,
693*35a5a358SJonathan Adams 			    "Couldn't bind process for zfs pool \"%s\" to "
694*35a5a358SJonathan Adams 			    "pset %d\n", spa->spa_name, zio_taskq_psrset_bind);
695*35a5a358SJonathan Adams 		}
696*35a5a358SJonathan Adams 
697*35a5a358SJonathan Adams 		mutex_exit(&curproc->p_lock);
698*35a5a358SJonathan Adams 		mutex_exit(&pidlock);
699*35a5a358SJonathan Adams 		mutex_exit(&cpu_lock);
700*35a5a358SJonathan Adams 		pool_unlock();
701*35a5a358SJonathan Adams 	}
702*35a5a358SJonathan Adams 
703*35a5a358SJonathan Adams 	if (zio_taskq_sysdc) {
704*35a5a358SJonathan Adams 		sysdc_thread_enter(curthread, 100, 0);
705*35a5a358SJonathan Adams 	}
706*35a5a358SJonathan Adams 
707*35a5a358SJonathan Adams 	spa->spa_proc = curproc;
708*35a5a358SJonathan Adams 	spa->spa_did = curthread->t_did;
709*35a5a358SJonathan Adams 
710*35a5a358SJonathan Adams 	spa_create_zio_taskqs(spa);
711*35a5a358SJonathan Adams 
712*35a5a358SJonathan Adams 	mutex_enter(&spa->spa_proc_lock);
713*35a5a358SJonathan Adams 	ASSERT(spa->spa_proc_state == SPA_PROC_CREATED);
714*35a5a358SJonathan Adams 
715*35a5a358SJonathan Adams 	spa->spa_proc_state = SPA_PROC_ACTIVE;
716*35a5a358SJonathan Adams 	cv_broadcast(&spa->spa_proc_cv);
717*35a5a358SJonathan Adams 
718*35a5a358SJonathan Adams 	CALLB_CPR_SAFE_BEGIN(&cprinfo);
719*35a5a358SJonathan Adams 	while (spa->spa_proc_state == SPA_PROC_ACTIVE)
720*35a5a358SJonathan Adams 		cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
721*35a5a358SJonathan Adams 	CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock);
722*35a5a358SJonathan Adams 
723*35a5a358SJonathan Adams 	ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE);
724*35a5a358SJonathan Adams 	spa->spa_proc_state = SPA_PROC_GONE;
725*35a5a358SJonathan Adams 	spa->spa_proc = &p0;
726*35a5a358SJonathan Adams 	cv_broadcast(&spa->spa_proc_cv);
727*35a5a358SJonathan Adams 	CALLB_CPR_EXIT(&cprinfo);	/* drops spa_proc_lock */
728*35a5a358SJonathan Adams 
729*35a5a358SJonathan Adams 	mutex_enter(&curproc->p_lock);
730*35a5a358SJonathan Adams 	lwp_exit();
731*35a5a358SJonathan Adams }
732*35a5a358SJonathan Adams #endif
733*35a5a358SJonathan Adams 
734*35a5a358SJonathan Adams /*
735*35a5a358SJonathan Adams  * Activate an uninitialized pool.
736*35a5a358SJonathan Adams  */
737*35a5a358SJonathan Adams static void
738*35a5a358SJonathan Adams spa_activate(spa_t *spa, int mode)
739*35a5a358SJonathan Adams {
740*35a5a358SJonathan Adams 	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
741*35a5a358SJonathan Adams 
742*35a5a358SJonathan Adams 	spa->spa_state = POOL_STATE_ACTIVE;
743*35a5a358SJonathan Adams 	spa->spa_mode = mode;
744*35a5a358SJonathan Adams 
745*35a5a358SJonathan Adams 	spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
746*35a5a358SJonathan Adams 	spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
747*35a5a358SJonathan Adams 
748*35a5a358SJonathan Adams 	/* Try to create a covering process */
749*35a5a358SJonathan Adams 	mutex_enter(&spa->spa_proc_lock);
750*35a5a358SJonathan Adams 	ASSERT(spa->spa_proc_state == SPA_PROC_NONE);
751*35a5a358SJonathan Adams 	ASSERT(spa->spa_proc == &p0);
752*35a5a358SJonathan Adams 	spa->spa_did = 0;
753*35a5a358SJonathan Adams 
754*35a5a358SJonathan Adams 	/* Only create a process if we're going to be around a while. */
755*35a5a358SJonathan Adams 	if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) {
756*35a5a358SJonathan Adams 		if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri,
757*35a5a358SJonathan Adams 		    NULL, 0) == 0) {
758*35a5a358SJonathan Adams 			spa->spa_proc_state = SPA_PROC_CREATED;
759*35a5a358SJonathan Adams 			while (spa->spa_proc_state == SPA_PROC_CREATED) {
760*35a5a358SJonathan Adams 				cv_wait(&spa->spa_proc_cv,
761*35a5a358SJonathan Adams 				    &spa->spa_proc_lock);
7622e0c549eSJonathan Adams 			}
763*35a5a358SJonathan Adams 			ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
764*35a5a358SJonathan Adams 			ASSERT(spa->spa_proc != &p0);
765*35a5a358SJonathan Adams 			ASSERT(spa->spa_did != 0);
766*35a5a358SJonathan Adams 		} else {
767*35a5a358SJonathan Adams #ifdef _KERNEL
768*35a5a358SJonathan Adams 			cmn_err(CE_WARN,
769*35a5a358SJonathan Adams 			    "Couldn't create process for zfs pool \"%s\"\n",
770*35a5a358SJonathan Adams 			    spa->spa_name);
771*35a5a358SJonathan Adams #endif
772e14bb325SJeff Bonwick 		}
773fa9e4066Sahrens 	}
774*35a5a358SJonathan Adams 	mutex_exit(&spa->spa_proc_lock);
775*35a5a358SJonathan Adams 
776*35a5a358SJonathan Adams 	/* If we didn't create a process, we need to create our taskqs. */
777*35a5a358SJonathan Adams 	if (spa->spa_proc == &p0) {
778*35a5a358SJonathan Adams 		spa_create_zio_taskqs(spa);
779*35a5a358SJonathan Adams 	}
780fa9e4066Sahrens 
781e14bb325SJeff Bonwick 	list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
782e14bb325SJeff Bonwick 	    offsetof(vdev_t, vdev_config_dirty_node));
783e14bb325SJeff Bonwick 	list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
784e14bb325SJeff Bonwick 	    offsetof(vdev_t, vdev_state_dirty_node));
785fa9e4066Sahrens 
786fa9e4066Sahrens 	txg_list_create(&spa->spa_vdev_txg_list,
787fa9e4066Sahrens 	    offsetof(struct vdev, vdev_txg_node));
788ea8dc4b6Seschrock 
789ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_scrub,
790ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
791ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
792ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_last,
793ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
794ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
795fa9e4066Sahrens }
796fa9e4066Sahrens 
797fa9e4066Sahrens /*
798fa9e4066Sahrens  * Opposite of spa_activate().
799fa9e4066Sahrens  */
800fa9e4066Sahrens static void
801fa9e4066Sahrens spa_deactivate(spa_t *spa)
802fa9e4066Sahrens {
803fa9e4066Sahrens 	ASSERT(spa->spa_sync_on == B_FALSE);
804fa9e4066Sahrens 	ASSERT(spa->spa_dsl_pool == NULL);
805fa9e4066Sahrens 	ASSERT(spa->spa_root_vdev == NULL);
80625f89ee2SJeff Bonwick 	ASSERT(spa->spa_async_zio_root == NULL);
807fa9e4066Sahrens 	ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
808fa9e4066Sahrens 
809fa9e4066Sahrens 	txg_list_destroy(&spa->spa_vdev_txg_list);
810fa9e4066Sahrens 
811e14bb325SJeff Bonwick 	list_destroy(&spa->spa_config_dirty_list);
812e14bb325SJeff Bonwick 	list_destroy(&spa->spa_state_dirty_list);
813fa9e4066Sahrens 
814e14bb325SJeff Bonwick 	for (int t = 0; t < ZIO_TYPES; t++) {
815e14bb325SJeff Bonwick 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
81680eb36f2SGeorge Wilson 			if (spa->spa_zio_taskq[t][q] != NULL)
81780eb36f2SGeorge Wilson 				taskq_destroy(spa->spa_zio_taskq[t][q]);
818e14bb325SJeff Bonwick 			spa->spa_zio_taskq[t][q] = NULL;
819e14bb325SJeff Bonwick 		}
820fa9e4066Sahrens 	}
821fa9e4066Sahrens 
822fa9e4066Sahrens 	metaslab_class_destroy(spa->spa_normal_class);
823fa9e4066Sahrens 	spa->spa_normal_class = NULL;
824fa9e4066Sahrens 
8258654d025Sperrin 	metaslab_class_destroy(spa->spa_log_class);
8268654d025Sperrin 	spa->spa_log_class = NULL;
8278654d025Sperrin 
828ea8dc4b6Seschrock 	/*
829ea8dc4b6Seschrock 	 * If this was part of an import or the open otherwise failed, we may
830ea8dc4b6Seschrock 	 * still have errors left in the queues.  Empty them just in case.
831ea8dc4b6Seschrock 	 */
832ea8dc4b6Seschrock 	spa_errlog_drain(spa);
833ea8dc4b6Seschrock 
834ea8dc4b6Seschrock 	avl_destroy(&spa->spa_errlist_scrub);
835ea8dc4b6Seschrock 	avl_destroy(&spa->spa_errlist_last);
836ea8dc4b6Seschrock 
837fa9e4066Sahrens 	spa->spa_state = POOL_STATE_UNINITIALIZED;
838*35a5a358SJonathan Adams 
839*35a5a358SJonathan Adams 	mutex_enter(&spa->spa_proc_lock);
840*35a5a358SJonathan Adams 	if (spa->spa_proc_state != SPA_PROC_NONE) {
841*35a5a358SJonathan Adams 		ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
842*35a5a358SJonathan Adams 		spa->spa_proc_state = SPA_PROC_DEACTIVATE;
843*35a5a358SJonathan Adams 		cv_broadcast(&spa->spa_proc_cv);
844*35a5a358SJonathan Adams 		while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) {
845*35a5a358SJonathan Adams 			ASSERT(spa->spa_proc != &p0);
846*35a5a358SJonathan Adams 			cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
847*35a5a358SJonathan Adams 		}
848*35a5a358SJonathan Adams 		ASSERT(spa->spa_proc_state == SPA_PROC_GONE);
849*35a5a358SJonathan Adams 		spa->spa_proc_state = SPA_PROC_NONE;
850*35a5a358SJonathan Adams 	}
851*35a5a358SJonathan Adams 	ASSERT(spa->spa_proc == &p0);
852*35a5a358SJonathan Adams 	mutex_exit(&spa->spa_proc_lock);
853*35a5a358SJonathan Adams 
854*35a5a358SJonathan Adams 	/*
855*35a5a358SJonathan Adams 	 * We want to make sure spa_thread() has actually exited the ZFS
856*35a5a358SJonathan Adams 	 * module, so that the module can't be unloaded out from underneath
857*35a5a358SJonathan Adams 	 * it.
858*35a5a358SJonathan Adams 	 */
859*35a5a358SJonathan Adams 	if (spa->spa_did != 0) {
860*35a5a358SJonathan Adams 		thread_join(spa->spa_did);
861*35a5a358SJonathan Adams 		spa->spa_did = 0;
862*35a5a358SJonathan Adams 	}
863fa9e4066Sahrens }
864fa9e4066Sahrens 
865fa9e4066Sahrens /*
866fa9e4066Sahrens  * Verify a pool configuration, and construct the vdev tree appropriately.  This
867fa9e4066Sahrens  * will create all the necessary vdevs in the appropriate layout, with each vdev
868fa9e4066Sahrens  * in the CLOSED state.  This will prep the pool before open/creation/import.
869fa9e4066Sahrens  * All vdev validation is done by the vdev_alloc() routine.
870fa9e4066Sahrens  */
87199653d4eSeschrock static int
87299653d4eSeschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
87399653d4eSeschrock     uint_t id, int atype)
874fa9e4066Sahrens {
875fa9e4066Sahrens 	nvlist_t **child;
876573ca77eSGeorge Wilson 	uint_t children;
87799653d4eSeschrock 	int error;
878fa9e4066Sahrens 
87999653d4eSeschrock 	if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
88099653d4eSeschrock 		return (error);
881fa9e4066Sahrens 
88299653d4eSeschrock 	if ((*vdp)->vdev_ops->vdev_op_leaf)
88399653d4eSeschrock 		return (0);
884fa9e4066Sahrens 
885e14bb325SJeff Bonwick 	error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
886e14bb325SJeff Bonwick 	    &child, &children);
887e14bb325SJeff Bonwick 
888e14bb325SJeff Bonwick 	if (error == ENOENT)
889e14bb325SJeff Bonwick 		return (0);
890e14bb325SJeff Bonwick 
891e14bb325SJeff Bonwick 	if (error) {
89299653d4eSeschrock 		vdev_free(*vdp);
89399653d4eSeschrock 		*vdp = NULL;
89499653d4eSeschrock 		return (EINVAL);
895fa9e4066Sahrens 	}
896fa9e4066Sahrens 
897573ca77eSGeorge Wilson 	for (int c = 0; c < children; c++) {
89899653d4eSeschrock 		vdev_t *vd;
89999653d4eSeschrock 		if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
90099653d4eSeschrock 		    atype)) != 0) {
90199653d4eSeschrock 			vdev_free(*vdp);
90299653d4eSeschrock 			*vdp = NULL;
90399653d4eSeschrock 			return (error);
904fa9e4066Sahrens 		}
905fa9e4066Sahrens 	}
906fa9e4066Sahrens 
90799653d4eSeschrock 	ASSERT(*vdp != NULL);
90899653d4eSeschrock 
90999653d4eSeschrock 	return (0);
910fa9e4066Sahrens }
911fa9e4066Sahrens 
912fa9e4066Sahrens /*
913fa9e4066Sahrens  * Opposite of spa_load().
914fa9e4066Sahrens  */
915fa9e4066Sahrens static void
916fa9e4066Sahrens spa_unload(spa_t *spa)
917fa9e4066Sahrens {
91899653d4eSeschrock 	int i;
91999653d4eSeschrock 
920e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
921e14bb325SJeff Bonwick 
922ea8dc4b6Seschrock 	/*
923ea8dc4b6Seschrock 	 * Stop async tasks.
924ea8dc4b6Seschrock 	 */
925ea8dc4b6Seschrock 	spa_async_suspend(spa);
926ea8dc4b6Seschrock 
927fa9e4066Sahrens 	/*
928fa9e4066Sahrens 	 * Stop syncing.
929fa9e4066Sahrens 	 */
930fa9e4066Sahrens 	if (spa->spa_sync_on) {
931fa9e4066Sahrens 		txg_sync_stop(spa->spa_dsl_pool);
932fa9e4066Sahrens 		spa->spa_sync_on = B_FALSE;
933fa9e4066Sahrens 	}
934fa9e4066Sahrens 
935fa9e4066Sahrens 	/*
936e14bb325SJeff Bonwick 	 * Wait for any outstanding async I/O to complete.
937fa9e4066Sahrens 	 */
93854d692b7SGeorge Wilson 	if (spa->spa_async_zio_root != NULL) {
93954d692b7SGeorge Wilson 		(void) zio_wait(spa->spa_async_zio_root);
94054d692b7SGeorge Wilson 		spa->spa_async_zio_root = NULL;
94154d692b7SGeorge Wilson 	}
942fa9e4066Sahrens 
943fa9e4066Sahrens 	/*
944fa9e4066Sahrens 	 * Close the dsl pool.
945fa9e4066Sahrens 	 */
946fa9e4066Sahrens 	if (spa->spa_dsl_pool) {
947fa9e4066Sahrens 		dsl_pool_close(spa->spa_dsl_pool);
948fa9e4066Sahrens 		spa->spa_dsl_pool = NULL;
949fa9e4066Sahrens 	}
950fa9e4066Sahrens 
951b24ab676SJeff Bonwick 	ddt_unload(spa);
952b24ab676SJeff Bonwick 
9538ad4d6ddSJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
9548ad4d6ddSJeff Bonwick 
9558ad4d6ddSJeff Bonwick 	/*
9568ad4d6ddSJeff Bonwick 	 * Drop and purge level 2 cache
9578ad4d6ddSJeff Bonwick 	 */
9588ad4d6ddSJeff Bonwick 	spa_l2cache_drop(spa);
9598ad4d6ddSJeff Bonwick 
960fa9e4066Sahrens 	/*
961fa9e4066Sahrens 	 * Close all vdevs.
962fa9e4066Sahrens 	 */
9630e34b6a7Sbonwick 	if (spa->spa_root_vdev)
964fa9e4066Sahrens 		vdev_free(spa->spa_root_vdev);
9650e34b6a7Sbonwick 	ASSERT(spa->spa_root_vdev == NULL);
966ea8dc4b6Seschrock 
967fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
968fa94a07fSbrendan 		vdev_free(spa->spa_spares.sav_vdevs[i]);
969fa94a07fSbrendan 	if (spa->spa_spares.sav_vdevs) {
970fa94a07fSbrendan 		kmem_free(spa->spa_spares.sav_vdevs,
971fa94a07fSbrendan 		    spa->spa_spares.sav_count * sizeof (void *));
972fa94a07fSbrendan 		spa->spa_spares.sav_vdevs = NULL;
97399653d4eSeschrock 	}
974fa94a07fSbrendan 	if (spa->spa_spares.sav_config) {
975fa94a07fSbrendan 		nvlist_free(spa->spa_spares.sav_config);
976fa94a07fSbrendan 		spa->spa_spares.sav_config = NULL;
977fa94a07fSbrendan 	}
9782ce8af81SEric Schrock 	spa->spa_spares.sav_count = 0;
979fa94a07fSbrendan 
980fa94a07fSbrendan 	for (i = 0; i < spa->spa_l2cache.sav_count; i++)
981fa94a07fSbrendan 		vdev_free(spa->spa_l2cache.sav_vdevs[i]);
982fa94a07fSbrendan 	if (spa->spa_l2cache.sav_vdevs) {
983fa94a07fSbrendan 		kmem_free(spa->spa_l2cache.sav_vdevs,
984fa94a07fSbrendan 		    spa->spa_l2cache.sav_count * sizeof (void *));
985fa94a07fSbrendan 		spa->spa_l2cache.sav_vdevs = NULL;
986fa94a07fSbrendan 	}
987fa94a07fSbrendan 	if (spa->spa_l2cache.sav_config) {
988fa94a07fSbrendan 		nvlist_free(spa->spa_l2cache.sav_config);
989fa94a07fSbrendan 		spa->spa_l2cache.sav_config = NULL;
99099653d4eSeschrock 	}
9912ce8af81SEric Schrock 	spa->spa_l2cache.sav_count = 0;
99299653d4eSeschrock 
993ea8dc4b6Seschrock 	spa->spa_async_suspended = 0;
9948ad4d6ddSJeff Bonwick 
9958ad4d6ddSJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
996fa9e4066Sahrens }
997fa9e4066Sahrens 
99899653d4eSeschrock /*
99999653d4eSeschrock  * Load (or re-load) the current list of vdevs describing the active spares for
100099653d4eSeschrock  * this pool.  When this is called, we have some form of basic information in
1001fa94a07fSbrendan  * 'spa_spares.sav_config'.  We parse this into vdevs, try to open them, and
1002fa94a07fSbrendan  * then re-generate a more complete list including status information.
100399653d4eSeschrock  */
100499653d4eSeschrock static void
100599653d4eSeschrock spa_load_spares(spa_t *spa)
100699653d4eSeschrock {
100799653d4eSeschrock 	nvlist_t **spares;
100899653d4eSeschrock 	uint_t nspares;
100999653d4eSeschrock 	int i;
101039c23413Seschrock 	vdev_t *vd, *tvd;
101199653d4eSeschrock 
1012e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
1013e14bb325SJeff Bonwick 
101499653d4eSeschrock 	/*
101599653d4eSeschrock 	 * First, close and free any existing spare vdevs.
101699653d4eSeschrock 	 */
1017fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
1018fa94a07fSbrendan 		vd = spa->spa_spares.sav_vdevs[i];
101939c23413Seschrock 
102039c23413Seschrock 		/* Undo the call to spa_activate() below */
1021c5904d13Seschrock 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
1022c5904d13Seschrock 		    B_FALSE)) != NULL && tvd->vdev_isspare)
102339c23413Seschrock 			spa_spare_remove(tvd);
102439c23413Seschrock 		vdev_close(vd);
102539c23413Seschrock 		vdev_free(vd);
102699653d4eSeschrock 	}
102739c23413Seschrock 
1028fa94a07fSbrendan 	if (spa->spa_spares.sav_vdevs)
1029fa94a07fSbrendan 		kmem_free(spa->spa_spares.sav_vdevs,
1030fa94a07fSbrendan 		    spa->spa_spares.sav_count * sizeof (void *));
103199653d4eSeschrock 
1032fa94a07fSbrendan 	if (spa->spa_spares.sav_config == NULL)
103399653d4eSeschrock 		nspares = 0;
103499653d4eSeschrock 	else
1035fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
103699653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
103799653d4eSeschrock 
1038fa94a07fSbrendan 	spa->spa_spares.sav_count = (int)nspares;
1039fa94a07fSbrendan 	spa->spa_spares.sav_vdevs = NULL;
104099653d4eSeschrock 
104199653d4eSeschrock 	if (nspares == 0)
104299653d4eSeschrock 		return;
104399653d4eSeschrock 
104499653d4eSeschrock 	/*
104599653d4eSeschrock 	 * Construct the array of vdevs, opening them to get status in the
104639c23413Seschrock 	 * process.   For each spare, there is potentially two different vdev_t
104739c23413Seschrock 	 * structures associated with it: one in the list of spares (used only
104839c23413Seschrock 	 * for basic validation purposes) and one in the active vdev
104939c23413Seschrock 	 * configuration (if it's spared in).  During this phase we open and
105039c23413Seschrock 	 * validate each vdev on the spare list.  If the vdev also exists in the
105139c23413Seschrock 	 * active configuration, then we also mark this vdev as an active spare.
105299653d4eSeschrock 	 */
1053fa94a07fSbrendan 	spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *),
1054fa94a07fSbrendan 	    KM_SLEEP);
1055fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
105699653d4eSeschrock 		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
105799653d4eSeschrock 		    VDEV_ALLOC_SPARE) == 0);
105899653d4eSeschrock 		ASSERT(vd != NULL);
105999653d4eSeschrock 
1060fa94a07fSbrendan 		spa->spa_spares.sav_vdevs[i] = vd;
106199653d4eSeschrock 
1062c5904d13Seschrock 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
1063c5904d13Seschrock 		    B_FALSE)) != NULL) {
106439c23413Seschrock 			if (!tvd->vdev_isspare)
106539c23413Seschrock 				spa_spare_add(tvd);
106639c23413Seschrock 
106739c23413Seschrock 			/*
106839c23413Seschrock 			 * We only mark the spare active if we were successfully
106939c23413Seschrock 			 * able to load the vdev.  Otherwise, importing a pool
107039c23413Seschrock 			 * with a bad active spare would result in strange
107139c23413Seschrock 			 * behavior, because multiple pool would think the spare
107239c23413Seschrock 			 * is actively in use.
107339c23413Seschrock 			 *
107439c23413Seschrock 			 * There is a vulnerability here to an equally bizarre
107539c23413Seschrock 			 * circumstance, where a dead active spare is later
107639c23413Seschrock 			 * brought back to life (onlined or otherwise).  Given
107739c23413Seschrock 			 * the rarity of this scenario, and the extra complexity
107839c23413Seschrock 			 * it adds, we ignore the possibility.
107939c23413Seschrock 			 */
108039c23413Seschrock 			if (!vdev_is_dead(tvd))
108139c23413Seschrock 				spa_spare_activate(tvd);
108239c23413Seschrock 		}
108339c23413Seschrock 
1084e14bb325SJeff Bonwick 		vd->vdev_top = vd;
10856809eb4eSEric Schrock 		vd->vdev_aux = &spa->spa_spares;
1086e14bb325SJeff Bonwick 
108799653d4eSeschrock 		if (vdev_open(vd) != 0)
108899653d4eSeschrock 			continue;
108999653d4eSeschrock 
1090fa94a07fSbrendan 		if (vdev_validate_aux(vd) == 0)
1091fa94a07fSbrendan 			spa_spare_add(vd);
109299653d4eSeschrock 	}
109399653d4eSeschrock 
109499653d4eSeschrock 	/*
109599653d4eSeschrock 	 * Recompute the stashed list of spares, with status information
109699653d4eSeschrock 	 * this time.
109799653d4eSeschrock 	 */
1098fa94a07fSbrendan 	VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
109999653d4eSeschrock 	    DATA_TYPE_NVLIST_ARRAY) == 0);
110099653d4eSeschrock 
1101fa94a07fSbrendan 	spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
1102fa94a07fSbrendan 	    KM_SLEEP);
1103fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
1104fa94a07fSbrendan 		spares[i] = vdev_config_generate(spa,
1105fa94a07fSbrendan 		    spa->spa_spares.sav_vdevs[i], B_TRUE, B_TRUE, B_FALSE);
1106fa94a07fSbrendan 	VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
1107fa94a07fSbrendan 	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
1108fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
110999653d4eSeschrock 		nvlist_free(spares[i]);
1110fa94a07fSbrendan 	kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
1111fa94a07fSbrendan }
1112fa94a07fSbrendan 
1113fa94a07fSbrendan /*
1114fa94a07fSbrendan  * Load (or re-load) the current list of vdevs describing the active l2cache for
1115fa94a07fSbrendan  * this pool.  When this is called, we have some form of basic information in
1116fa94a07fSbrendan  * 'spa_l2cache.sav_config'.  We parse this into vdevs, try to open them, and
1117fa94a07fSbrendan  * then re-generate a more complete list including status information.
1118fa94a07fSbrendan  * Devices which are already active have their details maintained, and are
1119fa94a07fSbrendan  * not re-opened.
1120fa94a07fSbrendan  */
1121fa94a07fSbrendan static void
1122fa94a07fSbrendan spa_load_l2cache(spa_t *spa)
1123fa94a07fSbrendan {
1124fa94a07fSbrendan 	nvlist_t **l2cache;
1125fa94a07fSbrendan 	uint_t nl2cache;
1126fa94a07fSbrendan 	int i, j, oldnvdevs;
1127573ca77eSGeorge Wilson 	uint64_t guid;
1128fa94a07fSbrendan 	vdev_t *vd, **oldvdevs, **newvdevs;
1129fa94a07fSbrendan 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
1130fa94a07fSbrendan 
1131e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
1132e14bb325SJeff Bonwick 
1133fa94a07fSbrendan 	if (sav->sav_config != NULL) {
1134fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
1135fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
1136fa94a07fSbrendan 		newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
1137fa94a07fSbrendan 	} else {
1138fa94a07fSbrendan 		nl2cache = 0;
1139fa94a07fSbrendan 	}
1140fa94a07fSbrendan 
1141fa94a07fSbrendan 	oldvdevs = sav->sav_vdevs;
1142fa94a07fSbrendan 	oldnvdevs = sav->sav_count;
1143fa94a07fSbrendan 	sav->sav_vdevs = NULL;
1144fa94a07fSbrendan 	sav->sav_count = 0;
1145fa94a07fSbrendan 
1146fa94a07fSbrendan 	/*
1147fa94a07fSbrendan 	 * Process new nvlist of vdevs.
1148fa94a07fSbrendan 	 */
1149fa94a07fSbrendan 	for (i = 0; i < nl2cache; i++) {
1150fa94a07fSbrendan 		VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
1151fa94a07fSbrendan 		    &guid) == 0);
1152fa94a07fSbrendan 
1153fa94a07fSbrendan 		newvdevs[i] = NULL;
1154fa94a07fSbrendan 		for (j = 0; j < oldnvdevs; j++) {
1155fa94a07fSbrendan 			vd = oldvdevs[j];
1156fa94a07fSbrendan 			if (vd != NULL && guid == vd->vdev_guid) {
1157fa94a07fSbrendan 				/*
1158fa94a07fSbrendan 				 * Retain previous vdev for add/remove ops.
1159fa94a07fSbrendan 				 */
1160fa94a07fSbrendan 				newvdevs[i] = vd;
1161fa94a07fSbrendan 				oldvdevs[j] = NULL;
1162fa94a07fSbrendan 				break;
1163fa94a07fSbrendan 			}
1164fa94a07fSbrendan 		}
1165fa94a07fSbrendan 
1166fa94a07fSbrendan 		if (newvdevs[i] == NULL) {
1167fa94a07fSbrendan 			/*
1168fa94a07fSbrendan 			 * Create new vdev
1169fa94a07fSbrendan 			 */
1170fa94a07fSbrendan 			VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
1171fa94a07fSbrendan 			    VDEV_ALLOC_L2CACHE) == 0);
1172fa94a07fSbrendan 			ASSERT(vd != NULL);
1173fa94a07fSbrendan 			newvdevs[i] = vd;
1174fa94a07fSbrendan 
1175fa94a07fSbrendan 			/*
1176fa94a07fSbrendan 			 * Commit this vdev as an l2cache device,
1177fa94a07fSbrendan 			 * even if it fails to open.
1178fa94a07fSbrendan 			 */
1179fa94a07fSbrendan 			spa_l2cache_add(vd);
1180fa94a07fSbrendan 
1181c5904d13Seschrock 			vd->vdev_top = vd;
1182c5904d13Seschrock 			vd->vdev_aux = sav;
1183c5904d13Seschrock 
1184c5904d13Seschrock 			spa_l2cache_activate(vd);
1185c5904d13Seschrock 
1186fa94a07fSbrendan 			if (vdev_open(vd) != 0)
1187fa94a07fSbrendan 				continue;
1188fa94a07fSbrendan 
1189fa94a07fSbrendan 			(void) vdev_validate_aux(vd);
1190fa94a07fSbrendan 
1191573ca77eSGeorge Wilson 			if (!vdev_is_dead(vd))
1192573ca77eSGeorge Wilson 				l2arc_add_vdev(spa, vd);
1193fa94a07fSbrendan 		}
1194fa94a07fSbrendan 	}
1195fa94a07fSbrendan 
1196fa94a07fSbrendan 	/*
1197fa94a07fSbrendan 	 * Purge vdevs that were dropped
1198fa94a07fSbrendan 	 */
1199fa94a07fSbrendan 	for (i = 0; i < oldnvdevs; i++) {
1200fa94a07fSbrendan 		uint64_t pool;
1201fa94a07fSbrendan 
1202fa94a07fSbrendan 		vd = oldvdevs[i];
1203fa94a07fSbrendan 		if (vd != NULL) {
12048ad4d6ddSJeff Bonwick 			if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
12058ad4d6ddSJeff Bonwick 			    pool != 0ULL && l2arc_vdev_present(vd))
1206fa94a07fSbrendan 				l2arc_remove_vdev(vd);
1207fa94a07fSbrendan 			(void) vdev_close(vd);
1208fa94a07fSbrendan 			spa_l2cache_remove(vd);
1209fa94a07fSbrendan 		}
1210fa94a07fSbrendan 	}
1211fa94a07fSbrendan 
1212fa94a07fSbrendan 	if (oldvdevs)
1213fa94a07fSbrendan 		kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
1214fa94a07fSbrendan 
1215fa94a07fSbrendan 	if (sav->sav_config == NULL)
1216fa94a07fSbrendan 		goto out;
1217fa94a07fSbrendan 
1218fa94a07fSbrendan 	sav->sav_vdevs = newvdevs;
1219fa94a07fSbrendan 	sav->sav_count = (int)nl2cache;
1220fa94a07fSbrendan 
1221fa94a07fSbrendan 	/*
1222fa94a07fSbrendan 	 * Recompute the stashed list of l2cache devices, with status
1223fa94a07fSbrendan 	 * information this time.
1224fa94a07fSbrendan 	 */
1225fa94a07fSbrendan 	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
1226fa94a07fSbrendan 	    DATA_TYPE_NVLIST_ARRAY) == 0);
1227fa94a07fSbrendan 
1228fa94a07fSbrendan 	l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
1229fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++)
1230fa94a07fSbrendan 		l2cache[i] = vdev_config_generate(spa,
1231fa94a07fSbrendan 		    sav->sav_vdevs[i], B_TRUE, B_FALSE, B_TRUE);
1232fa94a07fSbrendan 	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
1233fa94a07fSbrendan 	    ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
1234fa94a07fSbrendan out:
1235fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++)
1236fa94a07fSbrendan 		nvlist_free(l2cache[i]);
1237fa94a07fSbrendan 	if (sav->sav_count)
1238fa94a07fSbrendan 		kmem_free(l2cache, sav->sav_count * sizeof (void *));
123999653d4eSeschrock }
124099653d4eSeschrock 
124199653d4eSeschrock static int
124299653d4eSeschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
124399653d4eSeschrock {
124499653d4eSeschrock 	dmu_buf_t *db;
124599653d4eSeschrock 	char *packed = NULL;
124699653d4eSeschrock 	size_t nvsize = 0;
124799653d4eSeschrock 	int error;
124899653d4eSeschrock 	*value = NULL;
124999653d4eSeschrock 
125099653d4eSeschrock 	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
125199653d4eSeschrock 	nvsize = *(uint64_t *)db->db_data;
125299653d4eSeschrock 	dmu_buf_rele(db, FTAG);
125399653d4eSeschrock 
125499653d4eSeschrock 	packed = kmem_alloc(nvsize, KM_SLEEP);
12557bfdf011SNeil Perrin 	error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed,
12567bfdf011SNeil Perrin 	    DMU_READ_PREFETCH);
125799653d4eSeschrock 	if (error == 0)
125899653d4eSeschrock 		error = nvlist_unpack(packed, nvsize, value, 0);
125999653d4eSeschrock 	kmem_free(packed, nvsize);
126099653d4eSeschrock 
126199653d4eSeschrock 	return (error);
126299653d4eSeschrock }
126399653d4eSeschrock 
12643d7072f8Seschrock /*
12653d7072f8Seschrock  * Checks to see if the given vdev could not be opened, in which case we post a
12663d7072f8Seschrock  * sysevent to notify the autoreplace code that the device has been removed.
12673d7072f8Seschrock  */
12683d7072f8Seschrock static void
12693d7072f8Seschrock spa_check_removed(vdev_t *vd)
12703d7072f8Seschrock {
1271573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++)
12723d7072f8Seschrock 		spa_check_removed(vd->vdev_child[c]);
12733d7072f8Seschrock 
12743d7072f8Seschrock 	if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
12753d7072f8Seschrock 		zfs_post_autoreplace(vd->vdev_spa, vd);
12763d7072f8Seschrock 		spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK);
12773d7072f8Seschrock 	}
12783d7072f8Seschrock }
12793d7072f8Seschrock 
1280e6ca193dSGeorge Wilson /*
1281e6ca193dSGeorge Wilson  * Load the slog device state from the config object since it's possible
1282e6ca193dSGeorge Wilson  * that the label does not contain the most up-to-date information.
1283e6ca193dSGeorge Wilson  */
1284e6ca193dSGeorge Wilson void
128588ecc943SGeorge Wilson spa_load_log_state(spa_t *spa, nvlist_t *nv)
1286e6ca193dSGeorge Wilson {
128788ecc943SGeorge Wilson 	vdev_t *ovd, *rvd = spa->spa_root_vdev;
1288e6ca193dSGeorge Wilson 
128988ecc943SGeorge Wilson 	/*
129088ecc943SGeorge Wilson 	 * Load the original root vdev tree from the passed config.
129188ecc943SGeorge Wilson 	 */
129288ecc943SGeorge Wilson 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
129388ecc943SGeorge Wilson 	VERIFY(spa_config_parse(spa, &ovd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
1294e6ca193dSGeorge Wilson 
129588ecc943SGeorge Wilson 	for (int c = 0; c < rvd->vdev_children; c++) {
129688ecc943SGeorge Wilson 		vdev_t *cvd = rvd->vdev_child[c];
129788ecc943SGeorge Wilson 		if (cvd->vdev_islog)
129888ecc943SGeorge Wilson 			vdev_load_log_state(cvd, ovd->vdev_child[c]);
1299e6ca193dSGeorge Wilson 	}
130088ecc943SGeorge Wilson 	vdev_free(ovd);
130188ecc943SGeorge Wilson 	spa_config_exit(spa, SCL_ALL, FTAG);
1302e6ca193dSGeorge Wilson }
1303e6ca193dSGeorge Wilson 
1304b87f3af3Sperrin /*
1305b87f3af3Sperrin  * Check for missing log devices
1306b87f3af3Sperrin  */
1307b87f3af3Sperrin int
1308b87f3af3Sperrin spa_check_logs(spa_t *spa)
1309b87f3af3Sperrin {
1310b87f3af3Sperrin 	switch (spa->spa_log_state) {
1311b87f3af3Sperrin 	case SPA_LOG_MISSING:
1312b87f3af3Sperrin 		/* need to recheck in case slog has been restored */
1313b87f3af3Sperrin 	case SPA_LOG_UNKNOWN:
1314b87f3af3Sperrin 		if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
1315b87f3af3Sperrin 		    DS_FIND_CHILDREN)) {
1316b87f3af3Sperrin 			spa->spa_log_state = SPA_LOG_MISSING;
1317b87f3af3Sperrin 			return (1);
1318b87f3af3Sperrin 		}
1319b87f3af3Sperrin 		break;
1320b87f3af3Sperrin 	}
1321b87f3af3Sperrin 	return (0);
1322b87f3af3Sperrin }
1323b87f3af3Sperrin 
1324b693757aSEric Schrock static void
1325b693757aSEric Schrock spa_aux_check_removed(spa_aux_vdev_t *sav)
1326b693757aSEric Schrock {
1327b24ab676SJeff Bonwick 	for (int i = 0; i < sav->sav_count; i++)
1328b693757aSEric Schrock 		spa_check_removed(sav->sav_vdevs[i]);
1329b693757aSEric Schrock }
1330b693757aSEric Schrock 
1331b24ab676SJeff Bonwick void
1332b24ab676SJeff Bonwick spa_claim_notify(zio_t *zio)
1333b24ab676SJeff Bonwick {
1334b24ab676SJeff Bonwick 	spa_t *spa = zio->io_spa;
1335b24ab676SJeff Bonwick 
1336b24ab676SJeff Bonwick 	if (zio->io_error)
1337b24ab676SJeff Bonwick 		return;
1338b24ab676SJeff Bonwick 
1339b24ab676SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);	/* any mutex will do */
1340b24ab676SJeff Bonwick 	if (spa->spa_claim_max_txg < zio->io_bp->blk_birth)
1341b24ab676SJeff Bonwick 		spa->spa_claim_max_txg = zio->io_bp->blk_birth;
1342b24ab676SJeff Bonwick 	mutex_exit(&spa->spa_props_lock);
1343b24ab676SJeff Bonwick }
1344b24ab676SJeff Bonwick 
1345468c413aSTim Haley typedef struct spa_load_error {
1346468c413aSTim Haley 	uint64_t	sle_metadata_count;
1347468c413aSTim Haley 	uint64_t	sle_data_count;
1348468c413aSTim Haley } spa_load_error_t;
1349468c413aSTim Haley 
1350468c413aSTim Haley static void
1351468c413aSTim Haley spa_load_verify_done(zio_t *zio)
1352468c413aSTim Haley {
1353468c413aSTim Haley 	blkptr_t *bp = zio->io_bp;
1354468c413aSTim Haley 	spa_load_error_t *sle = zio->io_private;
1355468c413aSTim Haley 	dmu_object_type_t type = BP_GET_TYPE(bp);
1356468c413aSTim Haley 	int error = zio->io_error;
1357468c413aSTim Haley 
1358468c413aSTim Haley 	if (error) {
1359468c413aSTim Haley 		if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
1360468c413aSTim Haley 		    type != DMU_OT_INTENT_LOG)
1361468c413aSTim Haley 			atomic_add_64(&sle->sle_metadata_count, 1);
1362468c413aSTim Haley 		else
1363468c413aSTim Haley 			atomic_add_64(&sle->sle_data_count, 1);
1364468c413aSTim Haley 	}
1365468c413aSTim Haley 	zio_data_buf_free(zio->io_data, zio->io_size);
1366468c413aSTim Haley }
1367468c413aSTim Haley 
1368468c413aSTim Haley /*ARGSUSED*/
1369468c413aSTim Haley static int
1370b24ab676SJeff Bonwick spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
1371b24ab676SJeff Bonwick     const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1372468c413aSTim Haley {
1373468c413aSTim Haley 	if (bp != NULL) {
1374468c413aSTim Haley 		zio_t *rio = arg;
1375468c413aSTim Haley 		size_t size = BP_GET_PSIZE(bp);
1376468c413aSTim Haley 		void *data = zio_data_buf_alloc(size);
1377468c413aSTim Haley 
1378468c413aSTim Haley 		zio_nowait(zio_read(rio, spa, bp, data, size,
1379468c413aSTim Haley 		    spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
1380468c413aSTim Haley 		    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
1381468c413aSTim Haley 		    ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
1382468c413aSTim Haley 	}
1383468c413aSTim Haley 	return (0);
1384468c413aSTim Haley }
1385468c413aSTim Haley 
1386468c413aSTim Haley static int
1387468c413aSTim Haley spa_load_verify(spa_t *spa)
1388468c413aSTim Haley {
1389468c413aSTim Haley 	zio_t *rio;
1390468c413aSTim Haley 	spa_load_error_t sle = { 0 };
1391468c413aSTim Haley 	zpool_rewind_policy_t policy;
1392468c413aSTim Haley 	boolean_t verify_ok = B_FALSE;
1393468c413aSTim Haley 	int error;
1394468c413aSTim Haley 
1395468c413aSTim Haley 	rio = zio_root(spa, NULL, &sle,
1396468c413aSTim Haley 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
1397468c413aSTim Haley 
1398bbfd46c4SJeff Bonwick 	error = traverse_pool(spa, spa->spa_verify_min_txg,
1399bbfd46c4SJeff Bonwick 	    TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
1400468c413aSTim Haley 
1401468c413aSTim Haley 	(void) zio_wait(rio);
1402468c413aSTim Haley 
1403468c413aSTim Haley 	zpool_get_rewind_policy(spa->spa_config, &policy);
1404468c413aSTim Haley 
1405468c413aSTim Haley 	spa->spa_load_meta_errors = sle.sle_metadata_count;
1406468c413aSTim Haley 	spa->spa_load_data_errors = sle.sle_data_count;
1407468c413aSTim Haley 
1408468c413aSTim Haley 	if (!error && sle.sle_metadata_count <= policy.zrp_maxmeta &&
1409468c413aSTim Haley 	    sle.sle_data_count <= policy.zrp_maxdata) {
1410468c413aSTim Haley 		verify_ok = B_TRUE;
1411468c413aSTim Haley 		spa->spa_load_txg = spa->spa_uberblock.ub_txg;
1412468c413aSTim Haley 		spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
1413a33cae98STim Haley 	} else {
1414a33cae98STim Haley 		spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
1415468c413aSTim Haley 	}
1416468c413aSTim Haley 
1417468c413aSTim Haley 	if (error) {
1418468c413aSTim Haley 		if (error != ENXIO && error != EIO)
1419468c413aSTim Haley 			error = EIO;
1420468c413aSTim Haley 		return (error);
1421468c413aSTim Haley 	}
1422468c413aSTim Haley 
1423468c413aSTim Haley 	return (verify_ok ? 0 : EIO);
1424468c413aSTim Haley }
1425468c413aSTim Haley 
1426fa9e4066Sahrens /*
1427fa9e4066Sahrens  * Load an existing storage pool, using the pool's builtin spa_config as a
1428ea8dc4b6Seschrock  * source of configuration information.
1429fa9e4066Sahrens  */
1430fa9e4066Sahrens static int
1431468c413aSTim Haley spa_load(spa_t *spa, spa_load_state_t state, int mosconfig)
1432fa9e4066Sahrens {
1433fa9e4066Sahrens 	int error = 0;
143488ecc943SGeorge Wilson 	nvlist_t *nvconfig, *nvroot = NULL;
1435fa9e4066Sahrens 	vdev_t *rvd;
1436fa9e4066Sahrens 	uberblock_t *ub = &spa->spa_uberblock;
14370373e76bSbonwick 	uint64_t config_cache_txg = spa->spa_config_txg;
1438fa9e4066Sahrens 	uint64_t pool_guid;
143999653d4eSeschrock 	uint64_t version;
14403d7072f8Seschrock 	uint64_t autoreplace = 0;
14418ad4d6ddSJeff Bonwick 	int orig_mode = spa->spa_mode;
1442b87f3af3Sperrin 	char *ereport = FM_EREPORT_ZFS_POOL;
1443468c413aSTim Haley 	nvlist_t *config = spa->spa_config;
1444fa9e4066Sahrens 
14458ad4d6ddSJeff Bonwick 	/*
14468ad4d6ddSJeff Bonwick 	 * If this is an untrusted config, access the pool in read-only mode.
14478ad4d6ddSJeff Bonwick 	 * This prevents things like resilvering recently removed devices.
14488ad4d6ddSJeff Bonwick 	 */
14498ad4d6ddSJeff Bonwick 	if (!mosconfig)
14508ad4d6ddSJeff Bonwick 		spa->spa_mode = FREAD;
14518ad4d6ddSJeff Bonwick 
1452e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
1453e14bb325SJeff Bonwick 
1454ea8dc4b6Seschrock 	spa->spa_load_state = state;
14550373e76bSbonwick 
1456fa9e4066Sahrens 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot) ||
1457a9926bf0Sbonwick 	    nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid)) {
1458ea8dc4b6Seschrock 		error = EINVAL;
1459ea8dc4b6Seschrock 		goto out;
1460ea8dc4b6Seschrock 	}
1461fa9e4066Sahrens 
146299653d4eSeschrock 	/*
146399653d4eSeschrock 	 * Versioning wasn't explicitly added to the label until later, so if
146499653d4eSeschrock 	 * it's not present treat it as the initial version.
146599653d4eSeschrock 	 */
146699653d4eSeschrock 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION, &version) != 0)
1467e7437265Sahrens 		version = SPA_VERSION_INITIAL;
146899653d4eSeschrock 
1469a9926bf0Sbonwick 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
1470a9926bf0Sbonwick 	    &spa->spa_config_txg);
1471a9926bf0Sbonwick 
14720373e76bSbonwick 	if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
1473ea8dc4b6Seschrock 	    spa_guid_exists(pool_guid, 0)) {
1474ea8dc4b6Seschrock 		error = EEXIST;
1475ea8dc4b6Seschrock 		goto out;
1476ea8dc4b6Seschrock 	}
1477fa9e4066Sahrens 
1478b5989ec7Seschrock 	spa->spa_load_guid = pool_guid;
1479b5989ec7Seschrock 
148054d692b7SGeorge Wilson 	/*
148154d692b7SGeorge Wilson 	 * Create "The Godfather" zio to hold all async IOs
148254d692b7SGeorge Wilson 	 */
148325f89ee2SJeff Bonwick 	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
148425f89ee2SJeff Bonwick 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
148554d692b7SGeorge Wilson 
1486fa9e4066Sahrens 	/*
148799653d4eSeschrock 	 * Parse the configuration into a vdev tree.  We explicitly set the
148899653d4eSeschrock 	 * value that will be returned by spa_version() since parsing the
148999653d4eSeschrock 	 * configuration requires knowing the version number.
1490fa9e4066Sahrens 	 */
1491e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
149299653d4eSeschrock 	spa->spa_ubsync.ub_version = version;
149399653d4eSeschrock 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_LOAD);
1494e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
1495fa9e4066Sahrens 
149699653d4eSeschrock 	if (error != 0)
1497ea8dc4b6Seschrock 		goto out;
1498fa9e4066Sahrens 
14990e34b6a7Sbonwick 	ASSERT(spa->spa_root_vdev == rvd);
1500fa9e4066Sahrens 	ASSERT(spa_guid(spa) == pool_guid);
1501fa9e4066Sahrens 
1502fa9e4066Sahrens 	/*
1503fa9e4066Sahrens 	 * Try to open all vdevs, loading each label in the process.
1504fa9e4066Sahrens 	 */
1505e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
15060bf246f5Smc 	error = vdev_open(rvd);
1507e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
15080bf246f5Smc 	if (error != 0)
1509ea8dc4b6Seschrock 		goto out;
1510fa9e4066Sahrens 
1511560e6e96Seschrock 	/*
151277e3a39cSMark J Musante 	 * We need to validate the vdev labels against the configuration that
151377e3a39cSMark J Musante 	 * we have in hand, which is dependent on the setting of mosconfig. If
151477e3a39cSMark J Musante 	 * mosconfig is true then we're validating the vdev labels based on
151577e3a39cSMark J Musante 	 * that config. Otherwise, we're validating against the cached config
151677e3a39cSMark J Musante 	 * (zpool.cache) that was read when we loaded the zfs module, and then
151777e3a39cSMark J Musante 	 * later we will recursively call spa_load() and validate against
151877e3a39cSMark J Musante 	 * the vdev config.
1519560e6e96Seschrock 	 */
152077e3a39cSMark J Musante 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
152177e3a39cSMark J Musante 	error = vdev_validate(rvd);
152277e3a39cSMark J Musante 	spa_config_exit(spa, SCL_ALL, FTAG);
152377e3a39cSMark J Musante 	if (error != 0)
152477e3a39cSMark J Musante 		goto out;
1525560e6e96Seschrock 
1526560e6e96Seschrock 	if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) {
1527560e6e96Seschrock 		error = ENXIO;
1528560e6e96Seschrock 		goto out;
1529560e6e96Seschrock 	}
1530560e6e96Seschrock 
1531fa9e4066Sahrens 	/*
1532fa9e4066Sahrens 	 * Find the best uberblock.
1533fa9e4066Sahrens 	 */
1534e14bb325SJeff Bonwick 	vdev_uberblock_load(NULL, rvd, ub);
1535fa9e4066Sahrens 
1536fa9e4066Sahrens 	/*
1537fa9e4066Sahrens 	 * If we weren't able to find a single valid uberblock, return failure.
1538fa9e4066Sahrens 	 */
1539fa9e4066Sahrens 	if (ub->ub_txg == 0) {
1540eaca9bbdSeschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1541eaca9bbdSeschrock 		    VDEV_AUX_CORRUPT_DATA);
1542ea8dc4b6Seschrock 		error = ENXIO;
1543ea8dc4b6Seschrock 		goto out;
1544ea8dc4b6Seschrock 	}
1545ea8dc4b6Seschrock 
1546ea8dc4b6Seschrock 	/*
1547ea8dc4b6Seschrock 	 * If the pool is newer than the code, we can't open it.
1548ea8dc4b6Seschrock 	 */
1549e7437265Sahrens 	if (ub->ub_version > SPA_VERSION) {
1550eaca9bbdSeschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1551eaca9bbdSeschrock 		    VDEV_AUX_VERSION_NEWER);
1552ea8dc4b6Seschrock 		error = ENOTSUP;
1553ea8dc4b6Seschrock 		goto out;
1554fa9e4066Sahrens 	}
1555fa9e4066Sahrens 
1556fa9e4066Sahrens 	/*
1557fa9e4066Sahrens 	 * If the vdev guid sum doesn't match the uberblock, we have an
1558fa9e4066Sahrens 	 * incomplete configuration.
1559fa9e4066Sahrens 	 */
1560ecc2d604Sbonwick 	if (rvd->vdev_guid_sum != ub->ub_guid_sum && mosconfig) {
1561ea8dc4b6Seschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1562ea8dc4b6Seschrock 		    VDEV_AUX_BAD_GUID_SUM);
1563ea8dc4b6Seschrock 		error = ENXIO;
1564ea8dc4b6Seschrock 		goto out;
1565fa9e4066Sahrens 	}
1566fa9e4066Sahrens 
1567fa9e4066Sahrens 	/*
1568fa9e4066Sahrens 	 * Initialize internal SPA structures.
1569fa9e4066Sahrens 	 */
1570fa9e4066Sahrens 	spa->spa_state = POOL_STATE_ACTIVE;
1571fa9e4066Sahrens 	spa->spa_ubsync = spa->spa_uberblock;
1572468c413aSTim Haley 	spa->spa_verify_min_txg = spa->spa_extreme_rewind ?
1573468c413aSTim Haley 	    TXG_INITIAL : spa_last_synced_txg(spa) - TXG_DEFER_SIZE;
1574468c413aSTim Haley 	spa->spa_first_txg = spa->spa_last_ubsync_txg ?
1575468c413aSTim Haley 	    spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
1576b24ab676SJeff Bonwick 	spa->spa_claim_max_txg = spa->spa_first_txg;
1577b24ab676SJeff Bonwick 
1578ea8dc4b6Seschrock 	error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
1579ea8dc4b6Seschrock 	if (error) {
1580ea8dc4b6Seschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1581ea8dc4b6Seschrock 		    VDEV_AUX_CORRUPT_DATA);
1582468c413aSTim Haley 		error = EIO;
1583ea8dc4b6Seschrock 		goto out;
1584ea8dc4b6Seschrock 	}
1585fa9e4066Sahrens 	spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
1586fa9e4066Sahrens 
1587ea8dc4b6Seschrock 	if (zap_lookup(spa->spa_meta_objset,
1588fa9e4066Sahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
1589ea8dc4b6Seschrock 	    sizeof (uint64_t), 1, &spa->spa_config_object) != 0) {
1590ea8dc4b6Seschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1591ea8dc4b6Seschrock 		    VDEV_AUX_CORRUPT_DATA);
1592ea8dc4b6Seschrock 		error = EIO;
1593ea8dc4b6Seschrock 		goto out;
1594ea8dc4b6Seschrock 	}
1595fa9e4066Sahrens 
159688ecc943SGeorge Wilson 	if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0) {
159788ecc943SGeorge Wilson 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
159888ecc943SGeorge Wilson 		    VDEV_AUX_CORRUPT_DATA);
159988ecc943SGeorge Wilson 		error = EIO;
160088ecc943SGeorge Wilson 		goto out;
160188ecc943SGeorge Wilson 	}
160288ecc943SGeorge Wilson 
1603fa9e4066Sahrens 	if (!mosconfig) {
160495173954Sek 		uint64_t hostid;
1605fa9e4066Sahrens 
160688ecc943SGeorge Wilson 		if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig,
160777650510SLin Ling 		    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
160895173954Sek 			char *hostname;
160995173954Sek 			unsigned long myhostid = 0;
161095173954Sek 
161188ecc943SGeorge Wilson 			VERIFY(nvlist_lookup_string(nvconfig,
161295173954Sek 			    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
161395173954Sek 
16145679c89fSjv #ifdef	_KERNEL
16155679c89fSjv 			myhostid = zone_get_hostid(NULL);
16165679c89fSjv #else	/* _KERNEL */
16175679c89fSjv 			/*
16185679c89fSjv 			 * We're emulating the system's hostid in userland, so
16195679c89fSjv 			 * we can't use zone_get_hostid().
16205679c89fSjv 			 */
162195173954Sek 			(void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
16225679c89fSjv #endif	/* _KERNEL */
162317194a52Slling 			if (hostid != 0 && myhostid != 0 &&
16245679c89fSjv 			    hostid != myhostid) {
162595173954Sek 				cmn_err(CE_WARN, "pool '%s' could not be "
162695173954Sek 				    "loaded as it was last accessed by "
162777650510SLin Ling 				    "another system (host: %s hostid: 0x%lx). "
162895173954Sek 				    "See: http://www.sun.com/msg/ZFS-8000-EY",
1629e14bb325SJeff Bonwick 				    spa_name(spa), hostname,
163095173954Sek 				    (unsigned long)hostid);
163195173954Sek 				error = EBADF;
163295173954Sek 				goto out;
163395173954Sek 			}
163495173954Sek 		}
163595173954Sek 
163688ecc943SGeorge Wilson 		spa_config_set(spa, nvconfig);
1637fa9e4066Sahrens 		spa_unload(spa);
1638fa9e4066Sahrens 		spa_deactivate(spa);
16398ad4d6ddSJeff Bonwick 		spa_activate(spa, orig_mode);
1640fa9e4066Sahrens 
1641468c413aSTim Haley 		return (spa_load(spa, state, B_TRUE));
1642fa9e4066Sahrens 	}
1643fa9e4066Sahrens 
1644ea8dc4b6Seschrock 	if (zap_lookup(spa->spa_meta_objset,
1645fa9e4066Sahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST,
1646b24ab676SJeff Bonwick 	    sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj) != 0) {
1647ea8dc4b6Seschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1648ea8dc4b6Seschrock 		    VDEV_AUX_CORRUPT_DATA);
1649ea8dc4b6Seschrock 		error = EIO;
1650ea8dc4b6Seschrock 		goto out;
1651ea8dc4b6Seschrock 	}
1652fa9e4066Sahrens 
165399653d4eSeschrock 	/*
165499653d4eSeschrock 	 * Load the bit that tells us to use the new accounting function
165599653d4eSeschrock 	 * (raid-z deflation).  If we have an older pool, this will not
165699653d4eSeschrock 	 * be present.
165799653d4eSeschrock 	 */
165899653d4eSeschrock 	error = zap_lookup(spa->spa_meta_objset,
165999653d4eSeschrock 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
166099653d4eSeschrock 	    sizeof (uint64_t), 1, &spa->spa_deflate);
166199653d4eSeschrock 	if (error != 0 && error != ENOENT) {
166299653d4eSeschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
166399653d4eSeschrock 		    VDEV_AUX_CORRUPT_DATA);
166499653d4eSeschrock 		error = EIO;
166599653d4eSeschrock 		goto out;
166699653d4eSeschrock 	}
166799653d4eSeschrock 
1668fa9e4066Sahrens 	/*
1669ea8dc4b6Seschrock 	 * Load the persistent error log.  If we have an older pool, this will
1670ea8dc4b6Seschrock 	 * not be present.
1671fa9e4066Sahrens 	 */
1672ea8dc4b6Seschrock 	error = zap_lookup(spa->spa_meta_objset,
1673ea8dc4b6Seschrock 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_LAST,
1674ea8dc4b6Seschrock 	    sizeof (uint64_t), 1, &spa->spa_errlog_last);
1675d80c45e0Sbonwick 	if (error != 0 && error != ENOENT) {
1676ea8dc4b6Seschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1677ea8dc4b6Seschrock 		    VDEV_AUX_CORRUPT_DATA);
1678ea8dc4b6Seschrock 		error = EIO;
1679ea8dc4b6Seschrock 		goto out;
1680ea8dc4b6Seschrock 	}
1681ea8dc4b6Seschrock 
1682ea8dc4b6Seschrock 	error = zap_lookup(spa->spa_meta_objset,
1683ea8dc4b6Seschrock 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_ERRLOG_SCRUB,
1684ea8dc4b6Seschrock 	    sizeof (uint64_t), 1, &spa->spa_errlog_scrub);
1685ea8dc4b6Seschrock 	if (error != 0 && error != ENOENT) {
1686ea8dc4b6Seschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1687ea8dc4b6Seschrock 		    VDEV_AUX_CORRUPT_DATA);
1688ea8dc4b6Seschrock 		error = EIO;
1689ea8dc4b6Seschrock 		goto out;
1690ea8dc4b6Seschrock 	}
1691ea8dc4b6Seschrock 
169206eeb2adSek 	/*
169306eeb2adSek 	 * Load the history object.  If we have an older pool, this
169406eeb2adSek 	 * will not be present.
169506eeb2adSek 	 */
169606eeb2adSek 	error = zap_lookup(spa->spa_meta_objset,
169706eeb2adSek 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_HISTORY,
169806eeb2adSek 	    sizeof (uint64_t), 1, &spa->spa_history);
169906eeb2adSek 	if (error != 0 && error != ENOENT) {
170006eeb2adSek 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
170106eeb2adSek 		    VDEV_AUX_CORRUPT_DATA);
170206eeb2adSek 		error = EIO;
170306eeb2adSek 		goto out;
170406eeb2adSek 	}
170506eeb2adSek 
170699653d4eSeschrock 	/*
170799653d4eSeschrock 	 * Load any hot spares for this pool.
170899653d4eSeschrock 	 */
170999653d4eSeschrock 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
1710fa94a07fSbrendan 	    DMU_POOL_SPARES, sizeof (uint64_t), 1, &spa->spa_spares.sav_object);
171199653d4eSeschrock 	if (error != 0 && error != ENOENT) {
171299653d4eSeschrock 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
171399653d4eSeschrock 		    VDEV_AUX_CORRUPT_DATA);
171499653d4eSeschrock 		error = EIO;
171599653d4eSeschrock 		goto out;
171699653d4eSeschrock 	}
171799653d4eSeschrock 	if (error == 0) {
1718e7437265Sahrens 		ASSERT(spa_version(spa) >= SPA_VERSION_SPARES);
1719fa94a07fSbrendan 		if (load_nvlist(spa, spa->spa_spares.sav_object,
1720fa94a07fSbrendan 		    &spa->spa_spares.sav_config) != 0) {
172199653d4eSeschrock 			vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
172299653d4eSeschrock 			    VDEV_AUX_CORRUPT_DATA);
172399653d4eSeschrock 			error = EIO;
172499653d4eSeschrock 			goto out;
172599653d4eSeschrock 		}
172699653d4eSeschrock 
1727e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
172899653d4eSeschrock 		spa_load_spares(spa);
1729e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
173099653d4eSeschrock 	}
173199653d4eSeschrock 
1732fa94a07fSbrendan 	/*
1733fa94a07fSbrendan 	 * Load any level 2 ARC devices for this pool.
1734fa94a07fSbrendan 	 */
1735fa94a07fSbrendan 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
1736fa94a07fSbrendan 	    DMU_POOL_L2CACHE, sizeof (uint64_t), 1,
1737fa94a07fSbrendan 	    &spa->spa_l2cache.sav_object);
1738fa94a07fSbrendan 	if (error != 0 && error != ENOENT) {
1739fa94a07fSbrendan 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1740fa94a07fSbrendan 		    VDEV_AUX_CORRUPT_DATA);
1741fa94a07fSbrendan 		error = EIO;
1742fa94a07fSbrendan 		goto out;
1743fa94a07fSbrendan 	}
1744fa94a07fSbrendan 	if (error == 0) {
1745fa94a07fSbrendan 		ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE);
1746fa94a07fSbrendan 		if (load_nvlist(spa, spa->spa_l2cache.sav_object,
1747fa94a07fSbrendan 		    &spa->spa_l2cache.sav_config) != 0) {
1748fa94a07fSbrendan 			vdev_set_state(rvd, B_TRUE,
1749fa94a07fSbrendan 			    VDEV_STATE_CANT_OPEN,
1750fa94a07fSbrendan 			    VDEV_AUX_CORRUPT_DATA);
1751fa94a07fSbrendan 			error = EIO;
1752fa94a07fSbrendan 			goto out;
1753fa94a07fSbrendan 		}
1754fa94a07fSbrendan 
1755e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
1756fa94a07fSbrendan 		spa_load_l2cache(spa);
1757e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
1758fa94a07fSbrendan 	}
1759fa94a07fSbrendan 
1760990b4856Slling 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
1761ecd6cf80Smarks 
1762b1b8ab34Slling 	error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
1763b1b8ab34Slling 	    DMU_POOL_PROPS, sizeof (uint64_t), 1, &spa->spa_pool_props_object);
1764b1b8ab34Slling 
1765b1b8ab34Slling 	if (error && error != ENOENT) {
1766b1b8ab34Slling 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1767b1b8ab34Slling 		    VDEV_AUX_CORRUPT_DATA);
1768b1b8ab34Slling 		error = EIO;
1769b1b8ab34Slling 		goto out;
1770b1b8ab34Slling 	}
1771b1b8ab34Slling 
1772b1b8ab34Slling 	if (error == 0) {
1773b1b8ab34Slling 		(void) zap_lookup(spa->spa_meta_objset,
1774b1b8ab34Slling 		    spa->spa_pool_props_object,
17753d7072f8Seschrock 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS),
1776b1b8ab34Slling 		    sizeof (uint64_t), 1, &spa->spa_bootfs);
17773d7072f8Seschrock 		(void) zap_lookup(spa->spa_meta_objset,
17783d7072f8Seschrock 		    spa->spa_pool_props_object,
17793d7072f8Seschrock 		    zpool_prop_to_name(ZPOOL_PROP_AUTOREPLACE),
17803d7072f8Seschrock 		    sizeof (uint64_t), 1, &autoreplace);
1781b693757aSEric Schrock 		spa->spa_autoreplace = (autoreplace != 0);
1782ecd6cf80Smarks 		(void) zap_lookup(spa->spa_meta_objset,
1783ecd6cf80Smarks 		    spa->spa_pool_props_object,
1784ecd6cf80Smarks 		    zpool_prop_to_name(ZPOOL_PROP_DELEGATION),
1785ecd6cf80Smarks 		    sizeof (uint64_t), 1, &spa->spa_delegation);
17860a4e9518Sgw 		(void) zap_lookup(spa->spa_meta_objset,
17870a4e9518Sgw 		    spa->spa_pool_props_object,
17880a4e9518Sgw 		    zpool_prop_to_name(ZPOOL_PROP_FAILUREMODE),
17890a4e9518Sgw 		    sizeof (uint64_t), 1, &spa->spa_failmode);
1790573ca77eSGeorge Wilson 		(void) zap_lookup(spa->spa_meta_objset,
1791573ca77eSGeorge Wilson 		    spa->spa_pool_props_object,
1792573ca77eSGeorge Wilson 		    zpool_prop_to_name(ZPOOL_PROP_AUTOEXPAND),
1793573ca77eSGeorge Wilson 		    sizeof (uint64_t), 1, &spa->spa_autoexpand);
1794b24ab676SJeff Bonwick 		(void) zap_lookup(spa->spa_meta_objset,
1795b24ab676SJeff Bonwick 		    spa->spa_pool_props_object,
1796b24ab676SJeff Bonwick 		    zpool_prop_to_name(ZPOOL_PROP_DEDUPDITTO),
1797b24ab676SJeff Bonwick 		    sizeof (uint64_t), 1, &spa->spa_dedup_ditto);
1798b1b8ab34Slling 	}
1799b1b8ab34Slling 
18003d7072f8Seschrock 	/*
18013d7072f8Seschrock 	 * If the 'autoreplace' property is set, then post a resource notifying
18023d7072f8Seschrock 	 * the ZFS DE that it should not issue any faults for unopenable
18033d7072f8Seschrock 	 * devices.  We also iterate over the vdevs, and post a sysevent for any
18043d7072f8Seschrock 	 * unopenable vdevs so that the normal autoreplace handler can take
18053d7072f8Seschrock 	 * over.
18063d7072f8Seschrock 	 */
1807b693757aSEric Schrock 	if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) {
18083d7072f8Seschrock 		spa_check_removed(spa->spa_root_vdev);
1809b693757aSEric Schrock 		/*
1810b693757aSEric Schrock 		 * For the import case, this is done in spa_import(), because
1811b693757aSEric Schrock 		 * at this point we're using the spare definitions from
1812b693757aSEric Schrock 		 * the MOS config, not necessarily from the userland config.
1813b693757aSEric Schrock 		 */
1814b693757aSEric Schrock 		if (state != SPA_LOAD_IMPORT) {
1815b693757aSEric Schrock 			spa_aux_check_removed(&spa->spa_spares);
1816b693757aSEric Schrock 			spa_aux_check_removed(&spa->spa_l2cache);
1817b693757aSEric Schrock 		}
1818b693757aSEric Schrock 	}
18193d7072f8Seschrock 
1820ea8dc4b6Seschrock 	/*
1821560e6e96Seschrock 	 * Load the vdev state for all toplevel vdevs.
1822ea8dc4b6Seschrock 	 */
1823560e6e96Seschrock 	vdev_load(rvd);
18240373e76bSbonwick 
1825fa9e4066Sahrens 	/*
1826fa9e4066Sahrens 	 * Propagate the leaf DTLs we just loaded all the way up the tree.
1827fa9e4066Sahrens 	 */
1828e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
1829fa9e4066Sahrens 	vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
1830e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
1831fa9e4066Sahrens 
1832fa9e4066Sahrens 	/*
1833fa9e4066Sahrens 	 * Check the state of the root vdev.  If it can't be opened, it
1834fa9e4066Sahrens 	 * indicates one or more toplevel vdevs are faulted.
1835fa9e4066Sahrens 	 */
1836ea8dc4b6Seschrock 	if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN) {
1837ea8dc4b6Seschrock 		error = ENXIO;
1838ea8dc4b6Seschrock 		goto out;
1839ea8dc4b6Seschrock 	}
1840fa9e4066Sahrens 
1841b24ab676SJeff Bonwick 	/*
1842b24ab676SJeff Bonwick 	 * Load the DDTs (dedup tables).
1843b24ab676SJeff Bonwick 	 */
1844b24ab676SJeff Bonwick 	error = ddt_load(spa);
1845b24ab676SJeff Bonwick 	if (error != 0) {
1846b24ab676SJeff Bonwick 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1847b24ab676SJeff Bonwick 		    VDEV_AUX_CORRUPT_DATA);
1848b24ab676SJeff Bonwick 		error = EIO;
1849b24ab676SJeff Bonwick 		goto out;
1850b24ab676SJeff Bonwick 	}
1851b24ab676SJeff Bonwick 
1852485bbbf5SGeorge Wilson 	spa_update_dspace(spa);
1853485bbbf5SGeorge Wilson 
1854468c413aSTim Haley 	if (state != SPA_LOAD_TRYIMPORT) {
1855468c413aSTim Haley 		error = spa_load_verify(spa);
1856468c413aSTim Haley 		if (error) {
1857468c413aSTim Haley 			vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1858468c413aSTim Haley 			    VDEV_AUX_CORRUPT_DATA);
1859468c413aSTim Haley 			goto out;
1860468c413aSTim Haley 		}
1861468c413aSTim Haley 	}
1862468c413aSTim Haley 
1863b24ab676SJeff Bonwick 	/*
1864b24ab676SJeff Bonwick 	 * Load the intent log state and check log integrity.
1865b24ab676SJeff Bonwick 	 */
1866b24ab676SJeff Bonwick 	VERIFY(nvlist_lookup_nvlist(nvconfig, ZPOOL_CONFIG_VDEV_TREE,
1867b24ab676SJeff Bonwick 	    &nvroot) == 0);
1868b24ab676SJeff Bonwick 	spa_load_log_state(spa, nvroot);
1869b24ab676SJeff Bonwick 	nvlist_free(nvconfig);
1870b24ab676SJeff Bonwick 
1871b24ab676SJeff Bonwick 	if (spa_check_logs(spa)) {
1872b24ab676SJeff Bonwick 		vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
1873b24ab676SJeff Bonwick 		    VDEV_AUX_BAD_LOG);
1874b24ab676SJeff Bonwick 		error = ENXIO;
1875b24ab676SJeff Bonwick 		ereport = FM_EREPORT_ZFS_LOG_REPLAY;
1876b24ab676SJeff Bonwick 		goto out;
1877b24ab676SJeff Bonwick 	}
1878b24ab676SJeff Bonwick 
1879468c413aSTim Haley 	if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
1880468c413aSTim Haley 	    spa->spa_load_max_txg == UINT64_MAX)) {
18815dabedeeSbonwick 		dmu_tx_t *tx;
18820373e76bSbonwick 		int need_update = B_FALSE;
18838ad4d6ddSJeff Bonwick 
18848ad4d6ddSJeff Bonwick 		ASSERT(state != SPA_LOAD_TRYIMPORT);
18855dabedeeSbonwick 
18860373e76bSbonwick 		/*
18870373e76bSbonwick 		 * Claim log blocks that haven't been committed yet.
18880373e76bSbonwick 		 * This must all happen in a single txg.
1889b24ab676SJeff Bonwick 		 * Note: spa_claim_max_txg is updated by spa_claim_notify(),
1890b24ab676SJeff Bonwick 		 * invoked from zil_claim_log_block()'s i/o done callback.
1891468c413aSTim Haley 		 * Price of rollback is that we abandon the log.
18920373e76bSbonwick 		 */
1893b24ab676SJeff Bonwick 		spa->spa_claiming = B_TRUE;
1894b24ab676SJeff Bonwick 
18955dabedeeSbonwick 		tx = dmu_tx_create_assigned(spa_get_dsl(spa),
1896fa9e4066Sahrens 		    spa_first_txg(spa));
1897e14bb325SJeff Bonwick 		(void) dmu_objset_find(spa_name(spa),
18980b69c2f0Sahrens 		    zil_claim, tx, DS_FIND_CHILDREN);
1899fa9e4066Sahrens 		dmu_tx_commit(tx);
1900fa9e4066Sahrens 
1901b24ab676SJeff Bonwick 		spa->spa_claiming = B_FALSE;
1902b24ab676SJeff Bonwick 
1903e6ca193dSGeorge Wilson 		spa->spa_log_state = SPA_LOG_GOOD;
1904fa9e4066Sahrens 		spa->spa_sync_on = B_TRUE;
1905fa9e4066Sahrens 		txg_sync_start(spa->spa_dsl_pool);
1906fa9e4066Sahrens 
1907fa9e4066Sahrens 		/*
1908b24ab676SJeff Bonwick 		 * Wait for all claims to sync.  We sync up to the highest
1909b24ab676SJeff Bonwick 		 * claimed log block birth time so that claimed log blocks
1910b24ab676SJeff Bonwick 		 * don't appear to be from the future.  spa_claim_max_txg
1911b24ab676SJeff Bonwick 		 * will have been set for us by either zil_check_log_chain()
1912b24ab676SJeff Bonwick 		 * (invoked from spa_check_logs()) or zil_claim() above.
1913fa9e4066Sahrens 		 */
1914b24ab676SJeff Bonwick 		txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
19150e34b6a7Sbonwick 
19160e34b6a7Sbonwick 		/*
19170373e76bSbonwick 		 * If the config cache is stale, or we have uninitialized
19180373e76bSbonwick 		 * metaslabs (see spa_vdev_add()), then update the config.
1919bc758434SLin Ling 		 *
1920bc758434SLin Ling 		 * If spa_load_verbatim is true, trust the current
1921bc758434SLin Ling 		 * in-core spa_config and update the disk labels.
19220e34b6a7Sbonwick 		 */
19230373e76bSbonwick 		if (config_cache_txg != spa->spa_config_txg ||
1924468c413aSTim Haley 		    state == SPA_LOAD_IMPORT || spa->spa_load_verbatim ||
1925468c413aSTim Haley 		    state == SPA_LOAD_RECOVER)
19260373e76bSbonwick 			need_update = B_TRUE;
19270373e76bSbonwick 
19288ad4d6ddSJeff Bonwick 		for (int c = 0; c < rvd->vdev_children; c++)
19290373e76bSbonwick 			if (rvd->vdev_child[c]->vdev_ms_array == 0)
19300373e76bSbonwick 				need_update = B_TRUE;
19310e34b6a7Sbonwick 
19320e34b6a7Sbonwick 		/*
19330373e76bSbonwick 		 * Update the config cache asychronously in case we're the
19340373e76bSbonwick 		 * root pool, in which case the config cache isn't writable yet.
19350e34b6a7Sbonwick 		 */
19360373e76bSbonwick 		if (need_update)
19370373e76bSbonwick 			spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
19388ad4d6ddSJeff Bonwick 
19398ad4d6ddSJeff Bonwick 		/*
19408ad4d6ddSJeff Bonwick 		 * Check all DTLs to see if anything needs resilvering.
19418ad4d6ddSJeff Bonwick 		 */
19428ad4d6ddSJeff Bonwick 		if (vdev_resilver_needed(rvd, NULL, NULL))
19438ad4d6ddSJeff Bonwick 			spa_async_request(spa, SPA_ASYNC_RESILVER);
1944503ad85cSMatthew Ahrens 
1945503ad85cSMatthew Ahrens 		/*
1946503ad85cSMatthew Ahrens 		 * Delete any inconsistent datasets.
1947503ad85cSMatthew Ahrens 		 */
1948503ad85cSMatthew Ahrens 		(void) dmu_objset_find(spa_name(spa),
1949503ad85cSMatthew Ahrens 		    dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
1950ca45db41SChris Kirby 
1951ca45db41SChris Kirby 		/*
1952ca45db41SChris Kirby 		 * Clean up any stale temporary dataset userrefs.
1953ca45db41SChris Kirby 		 */
1954ca45db41SChris Kirby 		dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
1955fa9e4066Sahrens 	}
1956fa9e4066Sahrens 
1957ea8dc4b6Seschrock 	error = 0;
1958ea8dc4b6Seschrock out:
1959468c413aSTim Haley 
1960088f3894Sahrens 	spa->spa_minref = refcount_count(&spa->spa_refcount);
196199653d4eSeschrock 	if (error && error != EBADF)
1962b87f3af3Sperrin 		zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
19639eb19f4dSGeorge Wilson 
19649eb19f4dSGeorge Wilson 	spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
1965ea8dc4b6Seschrock 	spa->spa_ena = 0;
1966ea8dc4b6Seschrock 
1967ea8dc4b6Seschrock 	return (error);
1968fa9e4066Sahrens }
1969fa9e4066Sahrens 
1970468c413aSTim Haley static int
1971468c413aSTim Haley spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
1972468c413aSTim Haley {
1973468c413aSTim Haley 	spa_unload(spa);
1974468c413aSTim Haley 	spa_deactivate(spa);
1975468c413aSTim Haley 
1976468c413aSTim Haley 	spa->spa_load_max_txg--;
1977468c413aSTim Haley 
1978468c413aSTim Haley 	spa_activate(spa, spa_mode_global);
1979468c413aSTim Haley 	spa_async_suspend(spa);
1980468c413aSTim Haley 
1981468c413aSTim Haley 	return (spa_load(spa, state, mosconfig));
1982468c413aSTim Haley }
1983468c413aSTim Haley 
1984468c413aSTim Haley static int
1985468c413aSTim Haley spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
1986468c413aSTim Haley     uint64_t max_request, boolean_t extreme)
1987468c413aSTim Haley {
1988468c413aSTim Haley 	nvlist_t *config = NULL;
1989468c413aSTim Haley 	int load_error, rewind_error;
1990468c413aSTim Haley 	uint64_t safe_rollback_txg;
1991468c413aSTim Haley 	uint64_t min_txg;
1992468c413aSTim Haley 
1993a33cae98STim Haley 	if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) {
1994468c413aSTim Haley 		spa->spa_load_max_txg = spa->spa_load_txg;
1995a33cae98STim Haley 		spa->spa_log_state = SPA_LOG_CLEAR;
1996a33cae98STim Haley 	} else {
1997468c413aSTim Haley 		spa->spa_load_max_txg = max_request;
1998a33cae98STim Haley 	}
1999468c413aSTim Haley 
2000468c413aSTim Haley 	load_error = rewind_error = spa_load(spa, state, mosconfig);
2001468c413aSTim Haley 	if (load_error == 0)
2002468c413aSTim Haley 		return (0);
2003468c413aSTim Haley 
2004468c413aSTim Haley 	if (spa->spa_root_vdev != NULL)
2005468c413aSTim Haley 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
2006468c413aSTim Haley 
2007468c413aSTim Haley 	spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg;
2008468c413aSTim Haley 	spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp;
2009468c413aSTim Haley 
2010468c413aSTim Haley 	/* specific txg requested */
2011468c413aSTim Haley 	if (spa->spa_load_max_txg != UINT64_MAX && !extreme) {
2012468c413aSTim Haley 		nvlist_free(config);
2013468c413aSTim Haley 		return (load_error);
2014468c413aSTim Haley 	}
2015468c413aSTim Haley 
2016468c413aSTim Haley 	/* Price of rolling back is discarding txgs, including log */
2017468c413aSTim Haley 	if (state == SPA_LOAD_RECOVER)
2018468c413aSTim Haley 		spa->spa_log_state = SPA_LOG_CLEAR;
2019468c413aSTim Haley 
2020468c413aSTim Haley 	spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
2021468c413aSTim Haley 	safe_rollback_txg = spa->spa_uberblock.ub_txg - TXG_DEFER_SIZE;
2022468c413aSTim Haley 
2023468c413aSTim Haley 	min_txg = extreme ? TXG_INITIAL : safe_rollback_txg;
2024468c413aSTim Haley 	while (rewind_error && (spa->spa_uberblock.ub_txg >= min_txg)) {
2025468c413aSTim Haley 		if (spa->spa_load_max_txg < safe_rollback_txg)
2026468c413aSTim Haley 			spa->spa_extreme_rewind = B_TRUE;
2027468c413aSTim Haley 		rewind_error = spa_load_retry(spa, state, mosconfig);
2028468c413aSTim Haley 	}
2029468c413aSTim Haley 
2030468c413aSTim Haley 	if (config)
2031468c413aSTim Haley 		spa_rewind_data_to_nvlist(spa, config);
2032468c413aSTim Haley 
2033468c413aSTim Haley 	spa->spa_extreme_rewind = B_FALSE;
2034468c413aSTim Haley 	spa->spa_load_max_txg = UINT64_MAX;
2035468c413aSTim Haley 
2036468c413aSTim Haley 	if (config && (rewind_error || state != SPA_LOAD_RECOVER))
2037468c413aSTim Haley 		spa_config_set(spa, config);
2038468c413aSTim Haley 
2039468c413aSTim Haley 	return (state == SPA_LOAD_RECOVER ? rewind_error : load_error);
2040468c413aSTim Haley }
2041468c413aSTim Haley 
2042fa9e4066Sahrens /*
2043fa9e4066Sahrens  * Pool Open/Import
2044fa9e4066Sahrens  *
2045fa9e4066Sahrens  * The import case is identical to an open except that the configuration is sent
2046fa9e4066Sahrens  * down from userland, instead of grabbed from the configuration cache.  For the
2047fa9e4066Sahrens  * case of an open, the pool configuration will exist in the
20483d7072f8Seschrock  * POOL_STATE_UNINITIALIZED state.
2049fa9e4066Sahrens  *
2050fa9e4066Sahrens  * The stats information (gen/count/ustats) is used to gather vdev statistics at
2051fa9e4066Sahrens  * the same time open the pool, without having to keep around the spa_t in some
2052fa9e4066Sahrens  * ambiguous state.
2053fa9e4066Sahrens  */
2054fa9e4066Sahrens static int
2055468c413aSTim Haley spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
2056468c413aSTim Haley     nvlist_t **config)
2057fa9e4066Sahrens {
2058fa9e4066Sahrens 	spa_t *spa;
2059468c413aSTim Haley 	boolean_t norewind;
2060468c413aSTim Haley 	boolean_t extreme;
2061468c413aSTim Haley 	zpool_rewind_policy_t policy;
2062468c413aSTim Haley 	spa_load_state_t state = SPA_LOAD_OPEN;
2063fa9e4066Sahrens 	int error;
2064fa9e4066Sahrens 	int locked = B_FALSE;
2065fa9e4066Sahrens 
2066fa9e4066Sahrens 	*spapp = NULL;
2067fa9e4066Sahrens 
2068468c413aSTim Haley 	zpool_get_rewind_policy(nvpolicy, &policy);
2069468c413aSTim Haley 	if (policy.zrp_request & ZPOOL_DO_REWIND)
2070468c413aSTim Haley 		state = SPA_LOAD_RECOVER;
2071468c413aSTim Haley 	norewind = (policy.zrp_request == ZPOOL_NO_REWIND);
2072468c413aSTim Haley 	extreme = ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0);
2073468c413aSTim Haley 
2074fa9e4066Sahrens 	/*
2075fa9e4066Sahrens 	 * As disgusting as this is, we need to support recursive calls to this
2076fa9e4066Sahrens 	 * function because dsl_dir_open() is called during spa_load(), and ends
2077fa9e4066Sahrens 	 * up calling spa_open() again.  The real fix is to figure out how to
2078fa9e4066Sahrens 	 * avoid dsl_dir_open() calling this in the first place.
2079fa9e4066Sahrens 	 */
2080fa9e4066Sahrens 	if (mutex_owner(&spa_namespace_lock) != curthread) {
2081fa9e4066Sahrens 		mutex_enter(&spa_namespace_lock);
2082fa9e4066Sahrens 		locked = B_TRUE;
2083fa9e4066Sahrens 	}
2084fa9e4066Sahrens 
2085fa9e4066Sahrens 	if ((spa = spa_lookup(pool)) == NULL) {
2086fa9e4066Sahrens 		if (locked)
2087fa9e4066Sahrens 			mutex_exit(&spa_namespace_lock);
2088fa9e4066Sahrens 		return (ENOENT);
2089fa9e4066Sahrens 	}
2090468c413aSTim Haley 
2091fa9e4066Sahrens 	if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
2092fa9e4066Sahrens 
20938ad4d6ddSJeff Bonwick 		spa_activate(spa, spa_mode_global);
2094fa9e4066Sahrens 
2095468c413aSTim Haley 		if (spa->spa_last_open_failed && norewind) {
2096468c413aSTim Haley 			if (config != NULL && spa->spa_config)
2097468c413aSTim Haley 				VERIFY(nvlist_dup(spa->spa_config,
2098468c413aSTim Haley 				    config, KM_SLEEP) == 0);
2099468c413aSTim Haley 			spa_deactivate(spa);
2100468c413aSTim Haley 			if (locked)
2101468c413aSTim Haley 				mutex_exit(&spa_namespace_lock);
2102468c413aSTim Haley 			return (spa->spa_last_open_failed);
2103468c413aSTim Haley 		}
2104468c413aSTim Haley 
2105468c413aSTim Haley 		if (state != SPA_LOAD_RECOVER)
2106468c413aSTim Haley 			spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
2107468c413aSTim Haley 
2108468c413aSTim Haley 		error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg,
2109468c413aSTim Haley 		    extreme);
2110fa9e4066Sahrens 
2111fa9e4066Sahrens 		if (error == EBADF) {
2112fa9e4066Sahrens 			/*
2113560e6e96Seschrock 			 * If vdev_validate() returns failure (indicated by
2114560e6e96Seschrock 			 * EBADF), it indicates that one of the vdevs indicates
2115560e6e96Seschrock 			 * that the pool has been exported or destroyed.  If
2116560e6e96Seschrock 			 * this is the case, the config cache is out of sync and
2117560e6e96Seschrock 			 * we should remove the pool from the namespace.
2118fa9e4066Sahrens 			 */
2119fa9e4066Sahrens 			spa_unload(spa);
2120fa9e4066Sahrens 			spa_deactivate(spa);
2121c5904d13Seschrock 			spa_config_sync(spa, B_TRUE, B_TRUE);
2122fa9e4066Sahrens 			spa_remove(spa);
2123fa9e4066Sahrens 			if (locked)
2124fa9e4066Sahrens 				mutex_exit(&spa_namespace_lock);
2125fa9e4066Sahrens 			return (ENOENT);
2126ea8dc4b6Seschrock 		}
2127ea8dc4b6Seschrock 
2128ea8dc4b6Seschrock 		if (error) {
2129fa9e4066Sahrens 			/*
2130fa9e4066Sahrens 			 * We can't open the pool, but we still have useful
2131fa9e4066Sahrens 			 * information: the state of each vdev after the
2132fa9e4066Sahrens 			 * attempted vdev_open().  Return this to the user.
2133fa9e4066Sahrens 			 */
2134468c413aSTim Haley 			if (config != NULL && spa->spa_config)
2135468c413aSTim Haley 				VERIFY(nvlist_dup(spa->spa_config, config,
2136468c413aSTim Haley 				    KM_SLEEP) == 0);
2137fa9e4066Sahrens 			spa_unload(spa);
2138fa9e4066Sahrens 			spa_deactivate(spa);
2139468c413aSTim Haley 			spa->spa_last_open_failed = error;
2140fa9e4066Sahrens 			if (locked)
2141fa9e4066Sahrens 				mutex_exit(&spa_namespace_lock);
2142fa9e4066Sahrens 			*spapp = NULL;
2143fa9e4066Sahrens 			return (error);
2144fa9e4066Sahrens 		}
2145468c413aSTim Haley 
2146fa9e4066Sahrens 	}
2147fa9e4066Sahrens 
2148fa9e4066Sahrens 	spa_open_ref(spa, tag);
21493d7072f8Seschrock 
2150468c413aSTim Haley 
2151468c413aSTim Haley 	if (config != NULL)
2152468c413aSTim Haley 		*config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
2153468c413aSTim Haley 
2154a33cae98STim Haley 	if (locked) {
2155a33cae98STim Haley 		spa->spa_last_open_failed = 0;
2156a33cae98STim Haley 		spa->spa_last_ubsync_txg = 0;
2157a33cae98STim Haley 		spa->spa_load_txg = 0;
2158fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
2159a33cae98STim Haley 	}
2160fa9e4066Sahrens 
2161fa9e4066Sahrens 	*spapp = spa;
2162fa9e4066Sahrens 
2163fa9e4066Sahrens 	return (0);
2164fa9e4066Sahrens }
2165fa9e4066Sahrens 
2166468c413aSTim Haley int
2167468c413aSTim Haley spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy,
2168468c413aSTim Haley     nvlist_t **config)
2169468c413aSTim Haley {
2170468c413aSTim Haley 	return (spa_open_common(name, spapp, tag, policy, config));
2171468c413aSTim Haley }
2172468c413aSTim Haley 
2173fa9e4066Sahrens int
2174fa9e4066Sahrens spa_open(const char *name, spa_t **spapp, void *tag)
2175fa9e4066Sahrens {
2176468c413aSTim Haley 	return (spa_open_common(name, spapp, tag, NULL, NULL));
2177fa9e4066Sahrens }
2178fa9e4066Sahrens 
2179ea8dc4b6Seschrock /*
2180ea8dc4b6Seschrock  * Lookup the given spa_t, incrementing the inject count in the process,
2181ea8dc4b6Seschrock  * preventing it from being exported or destroyed.
2182ea8dc4b6Seschrock  */
2183ea8dc4b6Seschrock spa_t *
2184ea8dc4b6Seschrock spa_inject_addref(char *name)
2185ea8dc4b6Seschrock {
2186ea8dc4b6Seschrock 	spa_t *spa;
2187ea8dc4b6Seschrock 
2188ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
2189ea8dc4b6Seschrock 	if ((spa = spa_lookup(name)) == NULL) {
2190ea8dc4b6Seschrock 		mutex_exit(&spa_namespace_lock);
2191ea8dc4b6Seschrock 		return (NULL);
2192ea8dc4b6Seschrock 	}
2193ea8dc4b6Seschrock 	spa->spa_inject_ref++;
2194ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
2195ea8dc4b6Seschrock 
2196ea8dc4b6Seschrock 	return (spa);
2197ea8dc4b6Seschrock }
2198ea8dc4b6Seschrock 
2199ea8dc4b6Seschrock void
2200ea8dc4b6Seschrock spa_inject_delref(spa_t *spa)
2201ea8dc4b6Seschrock {
2202ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
2203ea8dc4b6Seschrock 	spa->spa_inject_ref--;
2204ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
2205ea8dc4b6Seschrock }
2206ea8dc4b6Seschrock 
2207fa94a07fSbrendan /*
2208fa94a07fSbrendan  * Add spares device information to the nvlist.
2209fa94a07fSbrendan  */
221099653d4eSeschrock static void
221199653d4eSeschrock spa_add_spares(spa_t *spa, nvlist_t *config)
221299653d4eSeschrock {
221399653d4eSeschrock 	nvlist_t **spares;
221499653d4eSeschrock 	uint_t i, nspares;
221599653d4eSeschrock 	nvlist_t *nvroot;
221699653d4eSeschrock 	uint64_t guid;
221799653d4eSeschrock 	vdev_stat_t *vs;
221899653d4eSeschrock 	uint_t vsc;
221939c23413Seschrock 	uint64_t pool;
222099653d4eSeschrock 
22216809eb4eSEric Schrock 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
22226809eb4eSEric Schrock 
2223fa94a07fSbrendan 	if (spa->spa_spares.sav_count == 0)
222499653d4eSeschrock 		return;
222599653d4eSeschrock 
222699653d4eSeschrock 	VERIFY(nvlist_lookup_nvlist(config,
222799653d4eSeschrock 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2228fa94a07fSbrendan 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
222999653d4eSeschrock 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
223099653d4eSeschrock 	if (nspares != 0) {
223199653d4eSeschrock 		VERIFY(nvlist_add_nvlist_array(nvroot,
223299653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
223399653d4eSeschrock 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
223499653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
223599653d4eSeschrock 
223699653d4eSeschrock 		/*
223799653d4eSeschrock 		 * Go through and find any spares which have since been
223899653d4eSeschrock 		 * repurposed as an active spare.  If this is the case, update
223999653d4eSeschrock 		 * their status appropriately.
224099653d4eSeschrock 		 */
224199653d4eSeschrock 		for (i = 0; i < nspares; i++) {
224299653d4eSeschrock 			VERIFY(nvlist_lookup_uint64(spares[i],
224399653d4eSeschrock 			    ZPOOL_CONFIG_GUID, &guid) == 0);
224489a89ebfSlling 			if (spa_spare_exists(guid, &pool, NULL) &&
224589a89ebfSlling 			    pool != 0ULL) {
224699653d4eSeschrock 				VERIFY(nvlist_lookup_uint64_array(
224799653d4eSeschrock 				    spares[i], ZPOOL_CONFIG_STATS,
224899653d4eSeschrock 				    (uint64_t **)&vs, &vsc) == 0);
224999653d4eSeschrock 				vs->vs_state = VDEV_STATE_CANT_OPEN;
225099653d4eSeschrock 				vs->vs_aux = VDEV_AUX_SPARED;
225199653d4eSeschrock 			}
225299653d4eSeschrock 		}
225399653d4eSeschrock 	}
225499653d4eSeschrock }
225599653d4eSeschrock 
2256fa94a07fSbrendan /*
2257fa94a07fSbrendan  * Add l2cache device information to the nvlist, including vdev stats.
2258fa94a07fSbrendan  */
2259fa94a07fSbrendan static void
2260fa94a07fSbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config)
2261fa94a07fSbrendan {
2262fa94a07fSbrendan 	nvlist_t **l2cache;
2263fa94a07fSbrendan 	uint_t i, j, nl2cache;
2264fa94a07fSbrendan 	nvlist_t *nvroot;
2265fa94a07fSbrendan 	uint64_t guid;
2266fa94a07fSbrendan 	vdev_t *vd;
2267fa94a07fSbrendan 	vdev_stat_t *vs;
2268fa94a07fSbrendan 	uint_t vsc;
2269fa94a07fSbrendan 
22706809eb4eSEric Schrock 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
22716809eb4eSEric Schrock 
2272fa94a07fSbrendan 	if (spa->spa_l2cache.sav_count == 0)
2273fa94a07fSbrendan 		return;
2274fa94a07fSbrendan 
2275fa94a07fSbrendan 	VERIFY(nvlist_lookup_nvlist(config,
2276fa94a07fSbrendan 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2277fa94a07fSbrendan 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
2278fa94a07fSbrendan 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
2279fa94a07fSbrendan 	if (nl2cache != 0) {
2280fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(nvroot,
2281fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
2282fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
2283fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
2284fa94a07fSbrendan 
2285fa94a07fSbrendan 		/*
2286fa94a07fSbrendan 		 * Update level 2 cache device stats.
2287fa94a07fSbrendan 		 */
2288fa94a07fSbrendan 
2289fa94a07fSbrendan 		for (i = 0; i < nl2cache; i++) {
2290fa94a07fSbrendan 			VERIFY(nvlist_lookup_uint64(l2cache[i],
2291fa94a07fSbrendan 			    ZPOOL_CONFIG_GUID, &guid) == 0);
2292fa94a07fSbrendan 
2293fa94a07fSbrendan 			vd = NULL;
2294fa94a07fSbrendan 			for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
2295fa94a07fSbrendan 				if (guid ==
2296fa94a07fSbrendan 				    spa->spa_l2cache.sav_vdevs[j]->vdev_guid) {
2297fa94a07fSbrendan 					vd = spa->spa_l2cache.sav_vdevs[j];
2298fa94a07fSbrendan 					break;
2299fa94a07fSbrendan 				}
2300fa94a07fSbrendan 			}
2301fa94a07fSbrendan 			ASSERT(vd != NULL);
2302fa94a07fSbrendan 
2303fa94a07fSbrendan 			VERIFY(nvlist_lookup_uint64_array(l2cache[i],
2304fa94a07fSbrendan 			    ZPOOL_CONFIG_STATS, (uint64_t **)&vs, &vsc) == 0);
2305fa94a07fSbrendan 			vdev_get_stats(vd, vs);
2306fa94a07fSbrendan 		}
2307fa94a07fSbrendan 	}
2308fa94a07fSbrendan }
2309fa94a07fSbrendan 
2310fa9e4066Sahrens int
2311ea8dc4b6Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
2312fa9e4066Sahrens {
2313fa9e4066Sahrens 	int error;
2314fa9e4066Sahrens 	spa_t *spa;
2315fa9e4066Sahrens 
2316fa9e4066Sahrens 	*config = NULL;
2317468c413aSTim Haley 	error = spa_open_common(name, &spa, FTAG, NULL, config);
2318fa9e4066Sahrens 
23196809eb4eSEric Schrock 	if (spa != NULL) {
23206809eb4eSEric Schrock 		/*
23216809eb4eSEric Schrock 		 * This still leaves a window of inconsistency where the spares
23226809eb4eSEric Schrock 		 * or l2cache devices could change and the config would be
23236809eb4eSEric Schrock 		 * self-inconsistent.
23246809eb4eSEric Schrock 		 */
23256809eb4eSEric Schrock 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2326ea8dc4b6Seschrock 
23276809eb4eSEric Schrock 		if (*config != NULL) {
2328e14bb325SJeff Bonwick 			VERIFY(nvlist_add_uint64(*config,
23296809eb4eSEric Schrock 			    ZPOOL_CONFIG_ERRCOUNT,
23306809eb4eSEric Schrock 			    spa_get_errlog_size(spa)) == 0);
2331e14bb325SJeff Bonwick 
23326809eb4eSEric Schrock 			if (spa_suspended(spa))
23336809eb4eSEric Schrock 				VERIFY(nvlist_add_uint64(*config,
23346809eb4eSEric Schrock 				    ZPOOL_CONFIG_SUSPENDED,
23356809eb4eSEric Schrock 				    spa->spa_failmode) == 0);
23366809eb4eSEric Schrock 
23376809eb4eSEric Schrock 			spa_add_spares(spa, *config);
23386809eb4eSEric Schrock 			spa_add_l2cache(spa, *config);
23396809eb4eSEric Schrock 		}
234099653d4eSeschrock 	}
234199653d4eSeschrock 
2342ea8dc4b6Seschrock 	/*
2343ea8dc4b6Seschrock 	 * We want to get the alternate root even for faulted pools, so we cheat
2344ea8dc4b6Seschrock 	 * and call spa_lookup() directly.
2345ea8dc4b6Seschrock 	 */
2346ea8dc4b6Seschrock 	if (altroot) {
2347ea8dc4b6Seschrock 		if (spa == NULL) {
2348ea8dc4b6Seschrock 			mutex_enter(&spa_namespace_lock);
2349ea8dc4b6Seschrock 			spa = spa_lookup(name);
2350ea8dc4b6Seschrock 			if (spa)
2351ea8dc4b6Seschrock 				spa_altroot(spa, altroot, buflen);
2352ea8dc4b6Seschrock 			else
2353ea8dc4b6Seschrock 				altroot[0] = '\0';
2354ea8dc4b6Seschrock 			spa = NULL;
2355ea8dc4b6Seschrock 			mutex_exit(&spa_namespace_lock);
2356ea8dc4b6Seschrock 		} else {
2357ea8dc4b6Seschrock 			spa_altroot(spa, altroot, buflen);
2358ea8dc4b6Seschrock 		}
2359ea8dc4b6Seschrock 	}
2360ea8dc4b6Seschrock 
23616809eb4eSEric Schrock 	if (spa != NULL) {
23626809eb4eSEric Schrock 		spa_config_exit(spa, SCL_CONFIG, FTAG);
2363fa9e4066Sahrens 		spa_close(spa, FTAG);
23646809eb4eSEric Schrock 	}
2365fa9e4066Sahrens 
2366fa9e4066Sahrens 	return (error);
2367fa9e4066Sahrens }
2368fa9e4066Sahrens 
236999653d4eSeschrock /*
2370fa94a07fSbrendan  * Validate that the auxiliary device array is well formed.  We must have an
2371fa94a07fSbrendan  * array of nvlists, each which describes a valid leaf vdev.  If this is an
2372fa94a07fSbrendan  * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be
2373fa94a07fSbrendan  * specified, as long as they are well-formed.
237499653d4eSeschrock  */
237599653d4eSeschrock static int
2376fa94a07fSbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
2377fa94a07fSbrendan     spa_aux_vdev_t *sav, const char *config, uint64_t version,
2378fa94a07fSbrendan     vdev_labeltype_t label)
237999653d4eSeschrock {
2380fa94a07fSbrendan 	nvlist_t **dev;
2381fa94a07fSbrendan 	uint_t i, ndev;
238299653d4eSeschrock 	vdev_t *vd;
238399653d4eSeschrock 	int error;
238499653d4eSeschrock 
2385e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
2386e14bb325SJeff Bonwick 
238799653d4eSeschrock 	/*
2388fa94a07fSbrendan 	 * It's acceptable to have no devs specified.
238999653d4eSeschrock 	 */
2390fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0)
239199653d4eSeschrock 		return (0);
239299653d4eSeschrock 
2393fa94a07fSbrendan 	if (ndev == 0)
239499653d4eSeschrock 		return (EINVAL);
239599653d4eSeschrock 
239699653d4eSeschrock 	/*
2397fa94a07fSbrendan 	 * Make sure the pool is formatted with a version that supports this
2398fa94a07fSbrendan 	 * device type.
239999653d4eSeschrock 	 */
2400fa94a07fSbrendan 	if (spa_version(spa) < version)
240199653d4eSeschrock 		return (ENOTSUP);
240299653d4eSeschrock 
240339c23413Seschrock 	/*
2404fa94a07fSbrendan 	 * Set the pending device list so we correctly handle device in-use
240539c23413Seschrock 	 * checking.
240639c23413Seschrock 	 */
2407fa94a07fSbrendan 	sav->sav_pending = dev;
2408fa94a07fSbrendan 	sav->sav_npending = ndev;
240939c23413Seschrock 
2410fa94a07fSbrendan 	for (i = 0; i < ndev; i++) {
2411fa94a07fSbrendan 		if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0,
241299653d4eSeschrock 		    mode)) != 0)
241339c23413Seschrock 			goto out;
241499653d4eSeschrock 
241599653d4eSeschrock 		if (!vd->vdev_ops->vdev_op_leaf) {
241699653d4eSeschrock 			vdev_free(vd);
241739c23413Seschrock 			error = EINVAL;
241839c23413Seschrock 			goto out;
241999653d4eSeschrock 		}
242099653d4eSeschrock 
2421fa94a07fSbrendan 		/*
2422e14bb325SJeff Bonwick 		 * The L2ARC currently only supports disk devices in
2423e14bb325SJeff Bonwick 		 * kernel context.  For user-level testing, we allow it.
2424fa94a07fSbrendan 		 */
2425e14bb325SJeff Bonwick #ifdef _KERNEL
2426fa94a07fSbrendan 		if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
2427fa94a07fSbrendan 		    strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
2428fa94a07fSbrendan 			error = ENOTBLK;
2429fa94a07fSbrendan 			goto out;
2430fa94a07fSbrendan 		}
2431e14bb325SJeff Bonwick #endif
243299653d4eSeschrock 		vd->vdev_top = vd;
243399653d4eSeschrock 
243439c23413Seschrock 		if ((error = vdev_open(vd)) == 0 &&
2435fa94a07fSbrendan 		    (error = vdev_label_init(vd, crtxg, label)) == 0) {
2436fa94a07fSbrendan 			VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
243739c23413Seschrock 			    vd->vdev_guid) == 0);
243839c23413Seschrock 		}
243999653d4eSeschrock 
244099653d4eSeschrock 		vdev_free(vd);
244139c23413Seschrock 
2442fa94a07fSbrendan 		if (error &&
2443fa94a07fSbrendan 		    (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE))
244439c23413Seschrock 			goto out;
244539c23413Seschrock 		else
244639c23413Seschrock 			error = 0;
244799653d4eSeschrock 	}
244899653d4eSeschrock 
244939c23413Seschrock out:
2450fa94a07fSbrendan 	sav->sav_pending = NULL;
2451fa94a07fSbrendan 	sav->sav_npending = 0;
245239c23413Seschrock 	return (error);
245399653d4eSeschrock }
245499653d4eSeschrock 
2455fa94a07fSbrendan static int
2456fa94a07fSbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
2457fa94a07fSbrendan {
2458fa94a07fSbrendan 	int error;
2459fa94a07fSbrendan 
2460e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
2461e14bb325SJeff Bonwick 
2462fa94a07fSbrendan 	if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode,
2463fa94a07fSbrendan 	    &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES,
2464fa94a07fSbrendan 	    VDEV_LABEL_SPARE)) != 0) {
2465fa94a07fSbrendan 		return (error);
2466fa94a07fSbrendan 	}
2467fa94a07fSbrendan 
2468fa94a07fSbrendan 	return (spa_validate_aux_devs(spa, nvroot, crtxg, mode,
2469fa94a07fSbrendan 	    &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE,
2470fa94a07fSbrendan 	    VDEV_LABEL_L2CACHE));
2471fa94a07fSbrendan }
2472fa94a07fSbrendan 
2473fa94a07fSbrendan static void
2474fa94a07fSbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
2475fa94a07fSbrendan     const char *config)
2476fa94a07fSbrendan {
2477fa94a07fSbrendan 	int i;
2478fa94a07fSbrendan 
2479fa94a07fSbrendan 	if (sav->sav_config != NULL) {
2480fa94a07fSbrendan 		nvlist_t **olddevs;
2481fa94a07fSbrendan 		uint_t oldndevs;
2482fa94a07fSbrendan 		nvlist_t **newdevs;
2483fa94a07fSbrendan 
2484fa94a07fSbrendan 		/*
2485fa94a07fSbrendan 		 * Generate new dev list by concatentating with the
2486fa94a07fSbrendan 		 * current dev list.
2487fa94a07fSbrendan 		 */
2488fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
2489fa94a07fSbrendan 		    &olddevs, &oldndevs) == 0);
2490fa94a07fSbrendan 
2491fa94a07fSbrendan 		newdevs = kmem_alloc(sizeof (void *) *
2492fa94a07fSbrendan 		    (ndevs + oldndevs), KM_SLEEP);
2493fa94a07fSbrendan 		for (i = 0; i < oldndevs; i++)
2494fa94a07fSbrendan 			VERIFY(nvlist_dup(olddevs[i], &newdevs[i],
2495fa94a07fSbrendan 			    KM_SLEEP) == 0);
2496fa94a07fSbrendan 		for (i = 0; i < ndevs; i++)
2497fa94a07fSbrendan 			VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs],
2498fa94a07fSbrendan 			    KM_SLEEP) == 0);
2499fa94a07fSbrendan 
2500fa94a07fSbrendan 		VERIFY(nvlist_remove(sav->sav_config, config,
2501fa94a07fSbrendan 		    DATA_TYPE_NVLIST_ARRAY) == 0);
2502fa94a07fSbrendan 
2503fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(sav->sav_config,
2504fa94a07fSbrendan 		    config, newdevs, ndevs + oldndevs) == 0);
2505fa94a07fSbrendan 		for (i = 0; i < oldndevs + ndevs; i++)
2506fa94a07fSbrendan 			nvlist_free(newdevs[i]);
2507fa94a07fSbrendan 		kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *));
2508fa94a07fSbrendan 	} else {
2509fa94a07fSbrendan 		/*
2510fa94a07fSbrendan 		 * Generate a new dev list.
2511fa94a07fSbrendan 		 */
2512fa94a07fSbrendan 		VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME,
2513fa94a07fSbrendan 		    KM_SLEEP) == 0);
2514fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(sav->sav_config, config,
2515fa94a07fSbrendan 		    devs, ndevs) == 0);
2516fa94a07fSbrendan 	}
2517fa94a07fSbrendan }
2518fa94a07fSbrendan 
2519fa94a07fSbrendan /*
2520fa94a07fSbrendan  * Stop and drop level 2 ARC devices
2521fa94a07fSbrendan  */
2522fa94a07fSbrendan void
2523fa94a07fSbrendan spa_l2cache_drop(spa_t *spa)
2524fa94a07fSbrendan {
2525fa94a07fSbrendan 	vdev_t *vd;
2526fa94a07fSbrendan 	int i;
2527fa94a07fSbrendan 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
2528fa94a07fSbrendan 
2529fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++) {
2530fa94a07fSbrendan 		uint64_t pool;
2531fa94a07fSbrendan 
2532fa94a07fSbrendan 		vd = sav->sav_vdevs[i];
2533fa94a07fSbrendan 		ASSERT(vd != NULL);
2534fa94a07fSbrendan 
25358ad4d6ddSJeff Bonwick 		if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
25368ad4d6ddSJeff Bonwick 		    pool != 0ULL && l2arc_vdev_present(vd))
2537fa94a07fSbrendan 			l2arc_remove_vdev(vd);
2538fa94a07fSbrendan 		if (vd->vdev_isl2cache)
2539fa94a07fSbrendan 			spa_l2cache_remove(vd);
2540fa94a07fSbrendan 		vdev_clear_stats(vd);
2541fa94a07fSbrendan 		(void) vdev_close(vd);
2542fa94a07fSbrendan 	}
2543fa94a07fSbrendan }
2544fa94a07fSbrendan 
2545fa9e4066Sahrens /*
2546fa9e4066Sahrens  * Pool Creation
2547fa9e4066Sahrens  */
2548fa9e4066Sahrens int
2549990b4856Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
25500a48a24eStimh     const char *history_str, nvlist_t *zplprops)
2551fa9e4066Sahrens {
2552fa9e4066Sahrens 	spa_t *spa;
2553990b4856Slling 	char *altroot = NULL;
25540373e76bSbonwick 	vdev_t *rvd;
2555fa9e4066Sahrens 	dsl_pool_t *dp;
2556fa9e4066Sahrens 	dmu_tx_t *tx;
2557573ca77eSGeorge Wilson 	int error = 0;
2558fa9e4066Sahrens 	uint64_t txg = TXG_INITIAL;
2559fa94a07fSbrendan 	nvlist_t **spares, **l2cache;
2560fa94a07fSbrendan 	uint_t nspares, nl2cache;
2561990b4856Slling 	uint64_t version;
2562fa9e4066Sahrens 
2563fa9e4066Sahrens 	/*
2564fa9e4066Sahrens 	 * If this pool already exists, return failure.
2565fa9e4066Sahrens 	 */
2566fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
2567fa9e4066Sahrens 	if (spa_lookup(pool) != NULL) {
2568fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
2569fa9e4066Sahrens 		return (EEXIST);
2570fa9e4066Sahrens 	}
2571fa9e4066Sahrens 
2572fa9e4066Sahrens 	/*
2573fa9e4066Sahrens 	 * Allocate a new spa_t structure.
2574fa9e4066Sahrens 	 */
2575990b4856Slling 	(void) nvlist_lookup_string(props,
2576990b4856Slling 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
2577468c413aSTim Haley 	spa = spa_add(pool, NULL, altroot);
25788ad4d6ddSJeff Bonwick 	spa_activate(spa, spa_mode_global);
2579fa9e4066Sahrens 
2580990b4856Slling 	if (props && (error = spa_prop_validate(spa, props))) {
2581990b4856Slling 		spa_deactivate(spa);
2582990b4856Slling 		spa_remove(spa);
2583c5904d13Seschrock 		mutex_exit(&spa_namespace_lock);
2584990b4856Slling 		return (error);
2585990b4856Slling 	}
2586990b4856Slling 
2587990b4856Slling 	if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION),
2588990b4856Slling 	    &version) != 0)
2589990b4856Slling 		version = SPA_VERSION;
2590990b4856Slling 	ASSERT(version <= SPA_VERSION);
2591b24ab676SJeff Bonwick 
2592b24ab676SJeff Bonwick 	spa->spa_first_txg = txg;
2593b24ab676SJeff Bonwick 	spa->spa_uberblock.ub_txg = txg - 1;
2594990b4856Slling 	spa->spa_uberblock.ub_version = version;
2595fa9e4066Sahrens 	spa->spa_ubsync = spa->spa_uberblock;
2596fa9e4066Sahrens 
259754d692b7SGeorge Wilson 	/*
259854d692b7SGeorge Wilson 	 * Create "The Godfather" zio to hold all async IOs
259954d692b7SGeorge Wilson 	 */
260025f89ee2SJeff Bonwick 	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
260125f89ee2SJeff Bonwick 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
260254d692b7SGeorge Wilson 
26030373e76bSbonwick 	/*
26040373e76bSbonwick 	 * Create the root vdev.
26050373e76bSbonwick 	 */
2606e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
26070373e76bSbonwick 
260899653d4eSeschrock 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD);
26090373e76bSbonwick 
261099653d4eSeschrock 	ASSERT(error != 0 || rvd != NULL);
261199653d4eSeschrock 	ASSERT(error != 0 || spa->spa_root_vdev == rvd);
26120373e76bSbonwick 
2613b7b97454Sperrin 	if (error == 0 && !zfs_allocatable_devs(nvroot))
26140373e76bSbonwick 		error = EINVAL;
261599653d4eSeschrock 
261699653d4eSeschrock 	if (error == 0 &&
261799653d4eSeschrock 	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
2618fa94a07fSbrendan 	    (error = spa_validate_aux(spa, nvroot, txg,
261999653d4eSeschrock 	    VDEV_ALLOC_ADD)) == 0) {
2620573ca77eSGeorge Wilson 		for (int c = 0; c < rvd->vdev_children; c++) {
2621573ca77eSGeorge Wilson 			vdev_metaslab_set_size(rvd->vdev_child[c]);
2622573ca77eSGeorge Wilson 			vdev_expand(rvd->vdev_child[c], txg);
2623573ca77eSGeorge Wilson 		}
26240373e76bSbonwick 	}
26250373e76bSbonwick 
2626e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
2627fa9e4066Sahrens 
262899653d4eSeschrock 	if (error != 0) {
2629fa9e4066Sahrens 		spa_unload(spa);
2630fa9e4066Sahrens 		spa_deactivate(spa);
2631fa9e4066Sahrens 		spa_remove(spa);
2632fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
2633fa9e4066Sahrens 		return (error);
2634fa9e4066Sahrens 	}
2635fa9e4066Sahrens 
263699653d4eSeschrock 	/*
263799653d4eSeschrock 	 * Get the list of spares, if specified.
263899653d4eSeschrock 	 */
263999653d4eSeschrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
264099653d4eSeschrock 	    &spares, &nspares) == 0) {
2641fa94a07fSbrendan 		VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME,
264299653d4eSeschrock 		    KM_SLEEP) == 0);
2643fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
264499653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
2645e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
264699653d4eSeschrock 		spa_load_spares(spa);
2647e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
2648fa94a07fSbrendan 		spa->spa_spares.sav_sync = B_TRUE;
2649fa94a07fSbrendan 	}
2650fa94a07fSbrendan 
2651fa94a07fSbrendan 	/*
2652fa94a07fSbrendan 	 * Get the list of level 2 cache devices, if specified.
2653fa94a07fSbrendan 	 */
2654fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
2655fa94a07fSbrendan 	    &l2cache, &nl2cache) == 0) {
2656fa94a07fSbrendan 		VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
2657fa94a07fSbrendan 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2658fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
2659fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
2660e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
2661fa94a07fSbrendan 		spa_load_l2cache(spa);
2662e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
2663fa94a07fSbrendan 		spa->spa_l2cache.sav_sync = B_TRUE;
266499653d4eSeschrock 	}
266599653d4eSeschrock 
26660a48a24eStimh 	spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
2667fa9e4066Sahrens 	spa->spa_meta_objset = dp->dp_meta_objset;
2668fa9e4066Sahrens 
2669485bbbf5SGeorge Wilson 	/*
2670485bbbf5SGeorge Wilson 	 * Create DDTs (dedup tables).
2671485bbbf5SGeorge Wilson 	 */
2672485bbbf5SGeorge Wilson 	ddt_create(spa);
2673485bbbf5SGeorge Wilson 
2674485bbbf5SGeorge Wilson 	spa_update_dspace(spa);
2675485bbbf5SGeorge Wilson 
2676fa9e4066Sahrens 	tx = dmu_tx_create_assigned(dp, txg);
2677fa9e4066Sahrens 
2678fa9e4066Sahrens 	/*
2679fa9e4066Sahrens 	 * Create the pool config object.
2680fa9e4066Sahrens 	 */
2681fa9e4066Sahrens 	spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset,
2682f7991ba4STim Haley 	    DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE,
2683fa9e4066Sahrens 	    DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
2684fa9e4066Sahrens 
2685ea8dc4b6Seschrock 	if (zap_add(spa->spa_meta_objset,
2686fa9e4066Sahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
2687ea8dc4b6Seschrock 	    sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) {
2688ea8dc4b6Seschrock 		cmn_err(CE_PANIC, "failed to add pool config");
2689ea8dc4b6Seschrock 	}
2690fa9e4066Sahrens 
2691990b4856Slling 	/* Newly created pools with the right version are always deflated. */
2692990b4856Slling 	if (version >= SPA_VERSION_RAIDZ_DEFLATE) {
2693990b4856Slling 		spa->spa_deflate = TRUE;
2694990b4856Slling 		if (zap_add(spa->spa_meta_objset,
2695990b4856Slling 		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
2696990b4856Slling 		    sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) {
2697990b4856Slling 			cmn_err(CE_PANIC, "failed to add deflate");
2698990b4856Slling 		}
269999653d4eSeschrock 	}
270099653d4eSeschrock 
2701fa9e4066Sahrens 	/*
2702fa9e4066Sahrens 	 * Create the deferred-free bplist object.  Turn off compression
2703fa9e4066Sahrens 	 * because sync-to-convergence takes longer if the blocksize
2704fa9e4066Sahrens 	 * keeps changing.
2705fa9e4066Sahrens 	 */
2706b24ab676SJeff Bonwick 	spa->spa_deferred_bplist_obj = bplist_create(spa->spa_meta_objset,
2707fa9e4066Sahrens 	    1 << 14, tx);
2708b24ab676SJeff Bonwick 	dmu_object_set_compress(spa->spa_meta_objset,
2709b24ab676SJeff Bonwick 	    spa->spa_deferred_bplist_obj, ZIO_COMPRESS_OFF, tx);
2710fa9e4066Sahrens 
2711ea8dc4b6Seschrock 	if (zap_add(spa->spa_meta_objset,
2712fa9e4066Sahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPLIST,
2713b24ab676SJeff Bonwick 	    sizeof (uint64_t), 1, &spa->spa_deferred_bplist_obj, tx) != 0) {
2714ea8dc4b6Seschrock 		cmn_err(CE_PANIC, "failed to add bplist");
2715ea8dc4b6Seschrock 	}
2716fa9e4066Sahrens 
271706eeb2adSek 	/*
271806eeb2adSek 	 * Create the pool's history object.
271906eeb2adSek 	 */
2720990b4856Slling 	if (version >= SPA_VERSION_ZPOOL_HISTORY)
2721990b4856Slling 		spa_history_create_obj(spa, tx);
2722990b4856Slling 
2723990b4856Slling 	/*
2724990b4856Slling 	 * Set pool properties.
2725990b4856Slling 	 */
2726990b4856Slling 	spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS);
2727990b4856Slling 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
27280a4e9518Sgw 	spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
2729573ca77eSGeorge Wilson 	spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
2730b24ab676SJeff Bonwick 
2731379c004dSEric Schrock 	if (props != NULL) {
2732379c004dSEric Schrock 		spa_configfile_set(spa, props, B_FALSE);
2733990b4856Slling 		spa_sync_props(spa, props, CRED(), tx);
2734379c004dSEric Schrock 	}
273506eeb2adSek 
2736fa9e4066Sahrens 	dmu_tx_commit(tx);
2737fa9e4066Sahrens 
2738fa9e4066Sahrens 	spa->spa_sync_on = B_TRUE;
2739fa9e4066Sahrens 	txg_sync_start(spa->spa_dsl_pool);
2740fa9e4066Sahrens 
2741fa9e4066Sahrens 	/*
2742fa9e4066Sahrens 	 * We explicitly wait for the first transaction to complete so that our
2743fa9e4066Sahrens 	 * bean counters are appropriately updated.
2744fa9e4066Sahrens 	 */
2745fa9e4066Sahrens 	txg_wait_synced(spa->spa_dsl_pool, txg);
2746fa9e4066Sahrens 
2747c5904d13Seschrock 	spa_config_sync(spa, B_FALSE, B_TRUE);
2748fa9e4066Sahrens 
2749990b4856Slling 	if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
2750228975ccSek 		(void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
2751c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_CREATE);
2752228975ccSek 
2753088f3894Sahrens 	spa->spa_minref = refcount_count(&spa->spa_refcount);
2754088f3894Sahrens 
2755daaa36a7SGeorge Wilson 	mutex_exit(&spa_namespace_lock);
2756daaa36a7SGeorge Wilson 
2757fa9e4066Sahrens 	return (0);
2758fa9e4066Sahrens }
2759fa9e4066Sahrens 
2760e7cbe64fSgw #ifdef _KERNEL
2761e7cbe64fSgw /*
276221ecdf64SLin Ling  * Get the root pool information from the root disk, then import the root pool
276321ecdf64SLin Ling  * during the system boot up time.
2764e7cbe64fSgw  */
276521ecdf64SLin Ling extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
276621ecdf64SLin Ling 
276721ecdf64SLin Ling static nvlist_t *
276821ecdf64SLin Ling spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
2769e7cbe64fSgw {
277021ecdf64SLin Ling 	nvlist_t *config;
2771e7cbe64fSgw 	nvlist_t *nvtop, *nvroot;
2772e7cbe64fSgw 	uint64_t pgid;
2773e7cbe64fSgw 
277421ecdf64SLin Ling 	if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0)
277521ecdf64SLin Ling 		return (NULL);
277621ecdf64SLin Ling 
2777e7cbe64fSgw 	/*
2778e7cbe64fSgw 	 * Add this top-level vdev to the child array.
2779e7cbe64fSgw 	 */
278021ecdf64SLin Ling 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
278121ecdf64SLin Ling 	    &nvtop) == 0);
278221ecdf64SLin Ling 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
278321ecdf64SLin Ling 	    &pgid) == 0);
278421ecdf64SLin Ling 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0);
2785e7cbe64fSgw 
2786e7cbe64fSgw 	/*
2787e7cbe64fSgw 	 * Put this pool's top-level vdevs into a root vdev.
2788e7cbe64fSgw 	 */
2789e7cbe64fSgw 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
279021ecdf64SLin Ling 	VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
279121ecdf64SLin Ling 	    VDEV_TYPE_ROOT) == 0);
2792e7cbe64fSgw 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
2793e7cbe64fSgw 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
2794e7cbe64fSgw 	VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
2795e7cbe64fSgw 	    &nvtop, 1) == 0);
2796e7cbe64fSgw 
2797e7cbe64fSgw 	/*
2798e7cbe64fSgw 	 * Replace the existing vdev_tree with the new root vdev in
2799e7cbe64fSgw 	 * this pool's configuration (remove the old, add the new).
2800e7cbe64fSgw 	 */
2801e7cbe64fSgw 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
2802e7cbe64fSgw 	nvlist_free(nvroot);
280321ecdf64SLin Ling 	return (config);
2804e7cbe64fSgw }
2805e7cbe64fSgw 
2806e7cbe64fSgw /*
280721ecdf64SLin Ling  * Walk the vdev tree and see if we can find a device with "better"
280821ecdf64SLin Ling  * configuration. A configuration is "better" if the label on that
280921ecdf64SLin Ling  * device has a more recent txg.
2810051aabe6Staylor  */
281121ecdf64SLin Ling static void
281221ecdf64SLin Ling spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg)
2813051aabe6Staylor {
2814573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++)
281521ecdf64SLin Ling 		spa_alt_rootvdev(vd->vdev_child[c], avd, txg);
2816051aabe6Staylor 
281721ecdf64SLin Ling 	if (vd->vdev_ops->vdev_op_leaf) {
281821ecdf64SLin Ling 		nvlist_t *label;
281921ecdf64SLin Ling 		uint64_t label_txg;
2820051aabe6Staylor 
282121ecdf64SLin Ling 		if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid,
282221ecdf64SLin Ling 		    &label) != 0)
282321ecdf64SLin Ling 			return;
2824051aabe6Staylor 
282521ecdf64SLin Ling 		VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
282621ecdf64SLin Ling 		    &label_txg) == 0);
2827051aabe6Staylor 
282821ecdf64SLin Ling 		/*
282921ecdf64SLin Ling 		 * Do we have a better boot device?
283021ecdf64SLin Ling 		 */
283121ecdf64SLin Ling 		if (label_txg > *txg) {
283221ecdf64SLin Ling 			*txg = label_txg;
283321ecdf64SLin Ling 			*avd = vd;
2834051aabe6Staylor 		}
283521ecdf64SLin Ling 		nvlist_free(label);
2836051aabe6Staylor 	}
2837051aabe6Staylor }
2838051aabe6Staylor 
2839e7cbe64fSgw /*
2840e7cbe64fSgw  * Import a root pool.
2841e7cbe64fSgw  *
2842051aabe6Staylor  * For x86. devpath_list will consist of devid and/or physpath name of
2843051aabe6Staylor  * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a").
2844051aabe6Staylor  * The GRUB "findroot" command will return the vdev we should boot.
2845e7cbe64fSgw  *
2846e7cbe64fSgw  * For Sparc, devpath_list consists the physpath name of the booting device
2847e7cbe64fSgw  * no matter the rootpool is a single device pool or a mirrored pool.
2848e7cbe64fSgw  * e.g.
2849e7cbe64fSgw  *	"/pci@1f,0/ide@d/disk@0,0:a"
2850e7cbe64fSgw  */
2851e7cbe64fSgw int
2852051aabe6Staylor spa_import_rootpool(char *devpath, char *devid)
2853e7cbe64fSgw {
285421ecdf64SLin Ling 	spa_t *spa;
285521ecdf64SLin Ling 	vdev_t *rvd, *bvd, *avd = NULL;
285621ecdf64SLin Ling 	nvlist_t *config, *nvtop;
285721ecdf64SLin Ling 	uint64_t guid, txg;
2858e7cbe64fSgw 	char *pname;
2859e7cbe64fSgw 	int error;
2860e7cbe64fSgw 
2861e7cbe64fSgw 	/*
286221ecdf64SLin Ling 	 * Read the label from the boot device and generate a configuration.
2863e7cbe64fSgw 	 */
2864dedec472SJack Meng 	config = spa_generate_rootconf(devpath, devid, &guid);
2865dedec472SJack Meng #if defined(_OBP) && defined(_KERNEL)
2866dedec472SJack Meng 	if (config == NULL) {
2867dedec472SJack Meng 		if (strstr(devpath, "/iscsi/ssd") != NULL) {
2868dedec472SJack Meng 			/* iscsi boot */
2869dedec472SJack Meng 			get_iscsi_bootpath_phy(devpath);
2870dedec472SJack Meng 			config = spa_generate_rootconf(devpath, devid, &guid);
2871dedec472SJack Meng 		}
2872dedec472SJack Meng 	}
2873dedec472SJack Meng #endif
2874dedec472SJack Meng 	if (config == NULL) {
287521ecdf64SLin Ling 		cmn_err(CE_NOTE, "Can not read the pool label from '%s'",
287621ecdf64SLin Ling 		    devpath);
287721ecdf64SLin Ling 		return (EIO);
287821ecdf64SLin Ling 	}
2879e7cbe64fSgw 
288021ecdf64SLin Ling 	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
288121ecdf64SLin Ling 	    &pname) == 0);
288221ecdf64SLin Ling 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
2883e7cbe64fSgw 
28846809eb4eSEric Schrock 	mutex_enter(&spa_namespace_lock);
28856809eb4eSEric Schrock 	if ((spa = spa_lookup(pname)) != NULL) {
28866809eb4eSEric Schrock 		/*
28876809eb4eSEric Schrock 		 * Remove the existing root pool from the namespace so that we
28886809eb4eSEric Schrock 		 * can replace it with the correct config we just read in.
28896809eb4eSEric Schrock 		 */
28906809eb4eSEric Schrock 		spa_remove(spa);
28916809eb4eSEric Schrock 	}
28926809eb4eSEric Schrock 
2893468c413aSTim Haley 	spa = spa_add(pname, config, NULL);
28946809eb4eSEric Schrock 	spa->spa_is_root = B_TRUE;
2895bc758434SLin Ling 	spa->spa_load_verbatim = B_TRUE;
2896e7cbe64fSgw 
289721ecdf64SLin Ling 	/*
289821ecdf64SLin Ling 	 * Build up a vdev tree based on the boot device's label config.
289921ecdf64SLin Ling 	 */
290021ecdf64SLin Ling 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
290121ecdf64SLin Ling 	    &nvtop) == 0);
290221ecdf64SLin Ling 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
290321ecdf64SLin Ling 	error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
290421ecdf64SLin Ling 	    VDEV_ALLOC_ROOTPOOL);
290521ecdf64SLin Ling 	spa_config_exit(spa, SCL_ALL, FTAG);
290621ecdf64SLin Ling 	if (error) {
290721ecdf64SLin Ling 		mutex_exit(&spa_namespace_lock);
290821ecdf64SLin Ling 		nvlist_free(config);
290921ecdf64SLin Ling 		cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
291021ecdf64SLin Ling 		    pname);
291121ecdf64SLin Ling 		return (error);
291221ecdf64SLin Ling 	}
291321ecdf64SLin Ling 
291421ecdf64SLin Ling 	/*
291521ecdf64SLin Ling 	 * Get the boot vdev.
291621ecdf64SLin Ling 	 */
291721ecdf64SLin Ling 	if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
291821ecdf64SLin Ling 		cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu",
291921ecdf64SLin Ling 		    (u_longlong_t)guid);
292021ecdf64SLin Ling 		error = ENOENT;
292121ecdf64SLin Ling 		goto out;
292221ecdf64SLin Ling 	}
2923e7cbe64fSgw 
292421ecdf64SLin Ling 	/*
292521ecdf64SLin Ling 	 * Determine if there is a better boot device.
292621ecdf64SLin Ling 	 */
292721ecdf64SLin Ling 	avd = bvd;
292821ecdf64SLin Ling 	spa_alt_rootvdev(rvd, &avd, &txg);
292921ecdf64SLin Ling 	if (avd != bvd) {
293021ecdf64SLin Ling 		cmn_err(CE_NOTE, "The boot device is 'degraded'. Please "
293121ecdf64SLin Ling 		    "try booting from '%s'", avd->vdev_path);
293221ecdf64SLin Ling 		error = EINVAL;
293321ecdf64SLin Ling 		goto out;
293421ecdf64SLin Ling 	}
2935e7cbe64fSgw 
293621ecdf64SLin Ling 	/*
293721ecdf64SLin Ling 	 * If the boot device is part of a spare vdev then ensure that
293821ecdf64SLin Ling 	 * we're booting off the active spare.
293921ecdf64SLin Ling 	 */
294021ecdf64SLin Ling 	if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
294121ecdf64SLin Ling 	    !bvd->vdev_isspare) {
294221ecdf64SLin Ling 		cmn_err(CE_NOTE, "The boot device is currently spared. Please "
294321ecdf64SLin Ling 		    "try booting from '%s'",
294421ecdf64SLin Ling 		    bvd->vdev_parent->vdev_child[1]->vdev_path);
294521ecdf64SLin Ling 		error = EINVAL;
294621ecdf64SLin Ling 		goto out;
294721ecdf64SLin Ling 	}
294821ecdf64SLin Ling 
294921ecdf64SLin Ling 	error = 0;
2950c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_IMPORT);
295121ecdf64SLin Ling out:
295221ecdf64SLin Ling 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
295321ecdf64SLin Ling 	vdev_free(rvd);
295421ecdf64SLin Ling 	spa_config_exit(spa, SCL_ALL, FTAG);
295521ecdf64SLin Ling 	mutex_exit(&spa_namespace_lock);
295621ecdf64SLin Ling 
295721ecdf64SLin Ling 	nvlist_free(config);
2958e7cbe64fSgw 	return (error);
2959e7cbe64fSgw }
296021ecdf64SLin Ling 
2961e7cbe64fSgw #endif
2962e7cbe64fSgw 
2963e7cbe64fSgw /*
29646809eb4eSEric Schrock  * Take a pool and insert it into the namespace as if it had been loaded at
29656809eb4eSEric Schrock  * boot.
2966e7cbe64fSgw  */
2967e7cbe64fSgw int
29686809eb4eSEric Schrock spa_import_verbatim(const char *pool, nvlist_t *config, nvlist_t *props)
2969e7cbe64fSgw {
29706809eb4eSEric Schrock 	spa_t *spa;
2971468c413aSTim Haley 	zpool_rewind_policy_t policy;
29726809eb4eSEric Schrock 	char *altroot = NULL;
29736809eb4eSEric Schrock 
29746809eb4eSEric Schrock 	mutex_enter(&spa_namespace_lock);
29756809eb4eSEric Schrock 	if (spa_lookup(pool) != NULL) {
29766809eb4eSEric Schrock 		mutex_exit(&spa_namespace_lock);
29776809eb4eSEric Schrock 		return (EEXIST);
29786809eb4eSEric Schrock 	}
29796809eb4eSEric Schrock 
29806809eb4eSEric Schrock 	(void) nvlist_lookup_string(props,
29816809eb4eSEric Schrock 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
2982468c413aSTim Haley 	spa = spa_add(pool, config, altroot);
29836809eb4eSEric Schrock 
2984468c413aSTim Haley 	zpool_get_rewind_policy(config, &policy);
2985468c413aSTim Haley 	spa->spa_load_max_txg = policy.zrp_txg;
29864f0f5e5bSVictor Latushkin 
2987468c413aSTim Haley 	spa->spa_load_verbatim = B_TRUE;
29886809eb4eSEric Schrock 
29896809eb4eSEric Schrock 	if (props != NULL)
29906809eb4eSEric Schrock 		spa_configfile_set(spa, props, B_FALSE);
29916809eb4eSEric Schrock 
29926809eb4eSEric Schrock 	spa_config_sync(spa, B_FALSE, B_TRUE);
29936809eb4eSEric Schrock 
29946809eb4eSEric Schrock 	mutex_exit(&spa_namespace_lock);
2995c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_IMPORT);
29966809eb4eSEric Schrock 
29976809eb4eSEric Schrock 	return (0);
2998e7cbe64fSgw }
2999e7cbe64fSgw 
30006809eb4eSEric Schrock /*
30016809eb4eSEric Schrock  * Import a non-root pool into the system.
30026809eb4eSEric Schrock  */
3003c5904d13Seschrock int
30046809eb4eSEric Schrock spa_import(const char *pool, nvlist_t *config, nvlist_t *props)
3005c5904d13Seschrock {
30066809eb4eSEric Schrock 	spa_t *spa;
30076809eb4eSEric Schrock 	char *altroot = NULL;
3008468c413aSTim Haley 	spa_load_state_t state = SPA_LOAD_IMPORT;
3009468c413aSTim Haley 	zpool_rewind_policy_t policy;
30106809eb4eSEric Schrock 	int error;
30116809eb4eSEric Schrock 	nvlist_t *nvroot;
30126809eb4eSEric Schrock 	nvlist_t **spares, **l2cache;
30136809eb4eSEric Schrock 	uint_t nspares, nl2cache;
30146809eb4eSEric Schrock 
30156809eb4eSEric Schrock 	/*
30166809eb4eSEric Schrock 	 * If a pool with this name exists, return failure.
30176809eb4eSEric Schrock 	 */
30186809eb4eSEric Schrock 	mutex_enter(&spa_namespace_lock);
30196809eb4eSEric Schrock 	if ((spa = spa_lookup(pool)) != NULL) {
30206809eb4eSEric Schrock 		mutex_exit(&spa_namespace_lock);
30216809eb4eSEric Schrock 		return (EEXIST);
30226809eb4eSEric Schrock 	}
30236809eb4eSEric Schrock 
3024468c413aSTim Haley 	zpool_get_rewind_policy(config, &policy);
3025468c413aSTim Haley 	if (policy.zrp_request & ZPOOL_DO_REWIND)
3026468c413aSTim Haley 		state = SPA_LOAD_RECOVER;
3027468c413aSTim Haley 
30286809eb4eSEric Schrock 	/*
30296809eb4eSEric Schrock 	 * Create and initialize the spa structure.
30306809eb4eSEric Schrock 	 */
30316809eb4eSEric Schrock 	(void) nvlist_lookup_string(props,
30326809eb4eSEric Schrock 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
3033468c413aSTim Haley 	spa = spa_add(pool, config, altroot);
30346809eb4eSEric Schrock 	spa_activate(spa, spa_mode_global);
30356809eb4eSEric Schrock 
303625f89ee2SJeff Bonwick 	/*
303725f89ee2SJeff Bonwick 	 * Don't start async tasks until we know everything is healthy.
303825f89ee2SJeff Bonwick 	 */
303925f89ee2SJeff Bonwick 	spa_async_suspend(spa);
304025f89ee2SJeff Bonwick 
30416809eb4eSEric Schrock 	/*
30426809eb4eSEric Schrock 	 * Pass off the heavy lifting to spa_load().  Pass TRUE for mosconfig
30436809eb4eSEric Schrock 	 * because the user-supplied config is actually the one to trust when
30446809eb4eSEric Schrock 	 * doing an import.
30456809eb4eSEric Schrock 	 */
3046468c413aSTim Haley 	if (state != SPA_LOAD_RECOVER)
3047468c413aSTim Haley 		spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
3048468c413aSTim Haley 	error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg,
3049468c413aSTim Haley 	    ((policy.zrp_request & ZPOOL_EXTREME_REWIND) != 0));
3050468c413aSTim Haley 
3051468c413aSTim Haley 	/*
3052468c413aSTim Haley 	 * Propagate anything learned about failing or best txgs
3053468c413aSTim Haley 	 * back to caller
3054468c413aSTim Haley 	 */
3055468c413aSTim Haley 	spa_rewind_data_to_nvlist(spa, config);
30566809eb4eSEric Schrock 
30576809eb4eSEric Schrock 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
30586809eb4eSEric Schrock 	/*
30596809eb4eSEric Schrock 	 * Toss any existing sparelist, as it doesn't have any validity
30606809eb4eSEric Schrock 	 * anymore, and conflicts with spa_has_spare().
30616809eb4eSEric Schrock 	 */
30626809eb4eSEric Schrock 	if (spa->spa_spares.sav_config) {
30636809eb4eSEric Schrock 		nvlist_free(spa->spa_spares.sav_config);
30646809eb4eSEric Schrock 		spa->spa_spares.sav_config = NULL;
30656809eb4eSEric Schrock 		spa_load_spares(spa);
30666809eb4eSEric Schrock 	}
30676809eb4eSEric Schrock 	if (spa->spa_l2cache.sav_config) {
30686809eb4eSEric Schrock 		nvlist_free(spa->spa_l2cache.sav_config);
30696809eb4eSEric Schrock 		spa->spa_l2cache.sav_config = NULL;
30706809eb4eSEric Schrock 		spa_load_l2cache(spa);
30716809eb4eSEric Schrock 	}
30726809eb4eSEric Schrock 
30736809eb4eSEric Schrock 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
30746809eb4eSEric Schrock 	    &nvroot) == 0);
30756809eb4eSEric Schrock 	if (error == 0)
30766809eb4eSEric Schrock 		error = spa_validate_aux(spa, nvroot, -1ULL,
30776809eb4eSEric Schrock 		    VDEV_ALLOC_SPARE);
30786809eb4eSEric Schrock 	if (error == 0)
30796809eb4eSEric Schrock 		error = spa_validate_aux(spa, nvroot, -1ULL,
30806809eb4eSEric Schrock 		    VDEV_ALLOC_L2CACHE);
30816809eb4eSEric Schrock 	spa_config_exit(spa, SCL_ALL, FTAG);
30826809eb4eSEric Schrock 
30836809eb4eSEric Schrock 	if (props != NULL)
30846809eb4eSEric Schrock 		spa_configfile_set(spa, props, B_FALSE);
30856809eb4eSEric Schrock 
30866809eb4eSEric Schrock 	if (error != 0 || (props && spa_writeable(spa) &&
30876809eb4eSEric Schrock 	    (error = spa_prop_set(spa, props)))) {
30886809eb4eSEric Schrock 		spa_unload(spa);
30896809eb4eSEric Schrock 		spa_deactivate(spa);
30906809eb4eSEric Schrock 		spa_remove(spa);
30916809eb4eSEric Schrock 		mutex_exit(&spa_namespace_lock);
30926809eb4eSEric Schrock 		return (error);
30936809eb4eSEric Schrock 	}
30946809eb4eSEric Schrock 
309525f89ee2SJeff Bonwick 	spa_async_resume(spa);
309625f89ee2SJeff Bonwick 
30976809eb4eSEric Schrock 	/*
30986809eb4eSEric Schrock 	 * Override any spares and level 2 cache devices as specified by
30996809eb4eSEric Schrock 	 * the user, as these may have correct device names/devids, etc.
31006809eb4eSEric Schrock 	 */
31016809eb4eSEric Schrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
31026809eb4eSEric Schrock 	    &spares, &nspares) == 0) {
31036809eb4eSEric Schrock 		if (spa->spa_spares.sav_config)
31046809eb4eSEric Schrock 			VERIFY(nvlist_remove(spa->spa_spares.sav_config,
31056809eb4eSEric Schrock 			    ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
31066809eb4eSEric Schrock 		else
31076809eb4eSEric Schrock 			VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
31086809eb4eSEric Schrock 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
31096809eb4eSEric Schrock 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
31106809eb4eSEric Schrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
31116809eb4eSEric Schrock 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
31126809eb4eSEric Schrock 		spa_load_spares(spa);
31136809eb4eSEric Schrock 		spa_config_exit(spa, SCL_ALL, FTAG);
31146809eb4eSEric Schrock 		spa->spa_spares.sav_sync = B_TRUE;
31156809eb4eSEric Schrock 	}
31166809eb4eSEric Schrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
31176809eb4eSEric Schrock 	    &l2cache, &nl2cache) == 0) {
31186809eb4eSEric Schrock 		if (spa->spa_l2cache.sav_config)
31196809eb4eSEric Schrock 			VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
31206809eb4eSEric Schrock 			    ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
31216809eb4eSEric Schrock 		else
31226809eb4eSEric Schrock 			VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
31236809eb4eSEric Schrock 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
31246809eb4eSEric Schrock 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
31256809eb4eSEric Schrock 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
31266809eb4eSEric Schrock 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
31276809eb4eSEric Schrock 		spa_load_l2cache(spa);
31286809eb4eSEric Schrock 		spa_config_exit(spa, SCL_ALL, FTAG);
31296809eb4eSEric Schrock 		spa->spa_l2cache.sav_sync = B_TRUE;
31306809eb4eSEric Schrock 	}
31316809eb4eSEric Schrock 
3132b693757aSEric Schrock 	/*
3133b693757aSEric Schrock 	 * Check for any removed devices.
3134b693757aSEric Schrock 	 */
3135b693757aSEric Schrock 	if (spa->spa_autoreplace) {
3136b693757aSEric Schrock 		spa_aux_check_removed(&spa->spa_spares);
3137b693757aSEric Schrock 		spa_aux_check_removed(&spa->spa_l2cache);
3138b693757aSEric Schrock 	}
3139b693757aSEric Schrock 
31406809eb4eSEric Schrock 	if (spa_writeable(spa)) {
31416809eb4eSEric Schrock 		/*
31426809eb4eSEric Schrock 		 * Update the config cache to include the newly-imported pool.
31436809eb4eSEric Schrock 		 */
3144bc758434SLin Ling 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
31456809eb4eSEric Schrock 	}
31466809eb4eSEric Schrock 
3147573ca77eSGeorge Wilson 	/*
3148573ca77eSGeorge Wilson 	 * It's possible that the pool was expanded while it was exported.
3149573ca77eSGeorge Wilson 	 * We kick off an async task to handle this for us.
3150573ca77eSGeorge Wilson 	 */
3151573ca77eSGeorge Wilson 	spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
3152573ca77eSGeorge Wilson 
31536809eb4eSEric Schrock 	mutex_exit(&spa_namespace_lock);
3154c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_IMPORT);
31556809eb4eSEric Schrock 
31566809eb4eSEric Schrock 	return (0);
3157c5904d13Seschrock }
3158c5904d13Seschrock 
3159fa9e4066Sahrens nvlist_t *
3160fa9e4066Sahrens spa_tryimport(nvlist_t *tryconfig)
3161fa9e4066Sahrens {
3162fa9e4066Sahrens 	nvlist_t *config = NULL;
3163fa9e4066Sahrens 	char *poolname;
3164fa9e4066Sahrens 	spa_t *spa;
3165fa9e4066Sahrens 	uint64_t state;
31667b7154beSLin Ling 	int error;
3167fa9e4066Sahrens 
3168fa9e4066Sahrens 	if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname))
3169fa9e4066Sahrens 		return (NULL);
3170fa9e4066Sahrens 
3171fa9e4066Sahrens 	if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state))
3172fa9e4066Sahrens 		return (NULL);
3173fa9e4066Sahrens 
3174fa9e4066Sahrens 	/*
31750373e76bSbonwick 	 * Create and initialize the spa structure.
3176fa9e4066Sahrens 	 */
31770373e76bSbonwick 	mutex_enter(&spa_namespace_lock);
3178468c413aSTim Haley 	spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL);
31798ad4d6ddSJeff Bonwick 	spa_activate(spa, FREAD);
3180fa9e4066Sahrens 
3181fa9e4066Sahrens 	/*
31820373e76bSbonwick 	 * Pass off the heavy lifting to spa_load().
3183ecc2d604Sbonwick 	 * Pass TRUE for mosconfig because the user-supplied config
3184ecc2d604Sbonwick 	 * is actually the one to trust when doing an import.
3185fa9e4066Sahrens 	 */
3186468c413aSTim Haley 	error = spa_load(spa, SPA_LOAD_TRYIMPORT, B_TRUE);
3187fa9e4066Sahrens 
3188fa9e4066Sahrens 	/*
3189fa9e4066Sahrens 	 * If 'tryconfig' was at least parsable, return the current config.
3190fa9e4066Sahrens 	 */
3191fa9e4066Sahrens 	if (spa->spa_root_vdev != NULL) {
3192fa9e4066Sahrens 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
3193fa9e4066Sahrens 		VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
3194fa9e4066Sahrens 		    poolname) == 0);
3195fa9e4066Sahrens 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
3196fa9e4066Sahrens 		    state) == 0);
319795173954Sek 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
319895173954Sek 		    spa->spa_uberblock.ub_timestamp) == 0);
319999653d4eSeschrock 
3200e7cbe64fSgw 		/*
3201e7cbe64fSgw 		 * If the bootfs property exists on this pool then we
3202e7cbe64fSgw 		 * copy it out so that external consumers can tell which
3203e7cbe64fSgw 		 * pools are bootable.
3204e7cbe64fSgw 		 */
32057b7154beSLin Ling 		if ((!error || error == EEXIST) && spa->spa_bootfs) {
3206e7cbe64fSgw 			char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3207e7cbe64fSgw 
3208e7cbe64fSgw 			/*
3209e7cbe64fSgw 			 * We have to play games with the name since the
3210e7cbe64fSgw 			 * pool was opened as TRYIMPORT_NAME.
3211e7cbe64fSgw 			 */
3212e14bb325SJeff Bonwick 			if (dsl_dsobj_to_dsname(spa_name(spa),
3213e7cbe64fSgw 			    spa->spa_bootfs, tmpname) == 0) {
3214e7cbe64fSgw 				char *cp;
3215e7cbe64fSgw 				char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3216e7cbe64fSgw 
3217e7cbe64fSgw 				cp = strchr(tmpname, '/');
3218e7cbe64fSgw 				if (cp == NULL) {
3219e7cbe64fSgw 					(void) strlcpy(dsname, tmpname,
3220e7cbe64fSgw 					    MAXPATHLEN);
3221e7cbe64fSgw 				} else {
3222e7cbe64fSgw 					(void) snprintf(dsname, MAXPATHLEN,
3223e7cbe64fSgw 					    "%s/%s", poolname, ++cp);
3224e7cbe64fSgw 				}
3225e7cbe64fSgw 				VERIFY(nvlist_add_string(config,
3226e7cbe64fSgw 				    ZPOOL_CONFIG_BOOTFS, dsname) == 0);
3227e7cbe64fSgw 				kmem_free(dsname, MAXPATHLEN);
3228e7cbe64fSgw 			}
3229e7cbe64fSgw 			kmem_free(tmpname, MAXPATHLEN);
3230e7cbe64fSgw 		}
3231e7cbe64fSgw 
323299653d4eSeschrock 		/*
3233fa94a07fSbrendan 		 * Add the list of hot spares and level 2 cache devices.
323499653d4eSeschrock 		 */
32356809eb4eSEric Schrock 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
323699653d4eSeschrock 		spa_add_spares(spa, config);
3237fa94a07fSbrendan 		spa_add_l2cache(spa, config);
32386809eb4eSEric Schrock 		spa_config_exit(spa, SCL_CONFIG, FTAG);
3239fa9e4066Sahrens 	}
3240fa9e4066Sahrens 
3241fa9e4066Sahrens 	spa_unload(spa);
3242fa9e4066Sahrens 	spa_deactivate(spa);
3243fa9e4066Sahrens 	spa_remove(spa);
3244fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
3245fa9e4066Sahrens 
3246fa9e4066Sahrens 	return (config);
3247fa9e4066Sahrens }
3248fa9e4066Sahrens 
3249fa9e4066Sahrens /*
3250fa9e4066Sahrens  * Pool export/destroy
3251fa9e4066Sahrens  *
3252fa9e4066Sahrens  * The act of destroying or exporting a pool is very simple.  We make sure there
3253fa9e4066Sahrens  * is no more pending I/O and any references to the pool are gone.  Then, we
3254fa9e4066Sahrens  * update the pool state and sync all the labels to disk, removing the
3255394ab0cbSGeorge Wilson  * configuration from the cache afterwards. If the 'hardforce' flag is set, then
3256394ab0cbSGeorge Wilson  * we don't sync the labels or remove the configuration cache.
3257fa9e4066Sahrens  */
3258fa9e4066Sahrens static int
325989a89ebfSlling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
3260394ab0cbSGeorge Wilson     boolean_t force, boolean_t hardforce)
3261fa9e4066Sahrens {
3262fa9e4066Sahrens 	spa_t *spa;
3263fa9e4066Sahrens 
326444cd46caSbillm 	if (oldconfig)
326544cd46caSbillm 		*oldconfig = NULL;
326644cd46caSbillm 
32678ad4d6ddSJeff Bonwick 	if (!(spa_mode_global & FWRITE))
3268fa9e4066Sahrens 		return (EROFS);
3269fa9e4066Sahrens 
3270fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
3271fa9e4066Sahrens 	if ((spa = spa_lookup(pool)) == NULL) {
3272fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
3273fa9e4066Sahrens 		return (ENOENT);
3274fa9e4066Sahrens 	}
3275fa9e4066Sahrens 
3276ea8dc4b6Seschrock 	/*
3277ea8dc4b6Seschrock 	 * Put a hold on the pool, drop the namespace lock, stop async tasks,
3278ea8dc4b6Seschrock 	 * reacquire the namespace lock, and see if we can export.
3279ea8dc4b6Seschrock 	 */
3280ea8dc4b6Seschrock 	spa_open_ref(spa, FTAG);
3281ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
3282ea8dc4b6Seschrock 	spa_async_suspend(spa);
3283ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
3284ea8dc4b6Seschrock 	spa_close(spa, FTAG);
3285ea8dc4b6Seschrock 
3286fa9e4066Sahrens 	/*
3287fa9e4066Sahrens 	 * The pool will be in core if it's openable,
3288fa9e4066Sahrens 	 * in which case we can modify its state.
3289fa9e4066Sahrens 	 */
3290fa9e4066Sahrens 	if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) {
3291fa9e4066Sahrens 		/*
3292fa9e4066Sahrens 		 * Objsets may be open only because they're dirty, so we
3293fa9e4066Sahrens 		 * have to force it to sync before checking spa_refcnt.
3294fa9e4066Sahrens 		 */
3295fa9e4066Sahrens 		txg_wait_synced(spa->spa_dsl_pool, 0);
3296fa9e4066Sahrens 
3297ea8dc4b6Seschrock 		/*
3298ea8dc4b6Seschrock 		 * A pool cannot be exported or destroyed if there are active
3299ea8dc4b6Seschrock 		 * references.  If we are resetting a pool, allow references by
3300ea8dc4b6Seschrock 		 * fault injection handlers.
3301ea8dc4b6Seschrock 		 */
3302ea8dc4b6Seschrock 		if (!spa_refcount_zero(spa) ||
3303ea8dc4b6Seschrock 		    (spa->spa_inject_ref != 0 &&
3304ea8dc4b6Seschrock 		    new_state != POOL_STATE_UNINITIALIZED)) {
3305ea8dc4b6Seschrock 			spa_async_resume(spa);
3306fa9e4066Sahrens 			mutex_exit(&spa_namespace_lock);
3307fa9e4066Sahrens 			return (EBUSY);
3308fa9e4066Sahrens 		}
3309fa9e4066Sahrens 
331089a89ebfSlling 		/*
331189a89ebfSlling 		 * A pool cannot be exported if it has an active shared spare.
331289a89ebfSlling 		 * This is to prevent other pools stealing the active spare
331389a89ebfSlling 		 * from an exported pool. At user's own will, such pool can
331489a89ebfSlling 		 * be forcedly exported.
331589a89ebfSlling 		 */
331689a89ebfSlling 		if (!force && new_state == POOL_STATE_EXPORTED &&
331789a89ebfSlling 		    spa_has_active_shared_spare(spa)) {
331889a89ebfSlling 			spa_async_resume(spa);
331989a89ebfSlling 			mutex_exit(&spa_namespace_lock);
332089a89ebfSlling 			return (EXDEV);
332189a89ebfSlling 		}
332289a89ebfSlling 
3323fa9e4066Sahrens 		/*
3324fa9e4066Sahrens 		 * We want this to be reflected on every label,
3325fa9e4066Sahrens 		 * so mark them all dirty.  spa_unload() will do the
3326fa9e4066Sahrens 		 * final sync that pushes these changes out.
3327fa9e4066Sahrens 		 */
3328394ab0cbSGeorge Wilson 		if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
3329e14bb325SJeff Bonwick 			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
3330ea8dc4b6Seschrock 			spa->spa_state = new_state;
33310373e76bSbonwick 			spa->spa_final_txg = spa_last_synced_txg(spa) + 1;
3332ea8dc4b6Seschrock 			vdev_config_dirty(spa->spa_root_vdev);
3333e14bb325SJeff Bonwick 			spa_config_exit(spa, SCL_ALL, FTAG);
3334ea8dc4b6Seschrock 		}
3335fa9e4066Sahrens 	}
3336fa9e4066Sahrens 
33373d7072f8Seschrock 	spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY);
33383d7072f8Seschrock 
3339fa9e4066Sahrens 	if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
3340fa9e4066Sahrens 		spa_unload(spa);
3341fa9e4066Sahrens 		spa_deactivate(spa);
3342fa9e4066Sahrens 	}
3343fa9e4066Sahrens 
334444cd46caSbillm 	if (oldconfig && spa->spa_config)
334544cd46caSbillm 		VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0);
334644cd46caSbillm 
3347ea8dc4b6Seschrock 	if (new_state != POOL_STATE_UNINITIALIZED) {
3348394ab0cbSGeorge Wilson 		if (!hardforce)
3349394ab0cbSGeorge Wilson 			spa_config_sync(spa, B_TRUE, B_TRUE);
3350ea8dc4b6Seschrock 		spa_remove(spa);
3351ea8dc4b6Seschrock 	}
3352fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
3353fa9e4066Sahrens 
3354fa9e4066Sahrens 	return (0);
3355fa9e4066Sahrens }
3356fa9e4066Sahrens 
3357fa9e4066Sahrens /*
3358fa9e4066Sahrens  * Destroy a storage pool.
3359fa9e4066Sahrens  */
3360fa9e4066Sahrens int
3361fa9e4066Sahrens spa_destroy(char *pool)
3362fa9e4066Sahrens {
3363394ab0cbSGeorge Wilson 	return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL,
3364394ab0cbSGeorge Wilson 	    B_FALSE, B_FALSE));
3365fa9e4066Sahrens }
3366fa9e4066Sahrens 
3367fa9e4066Sahrens /*
3368fa9e4066Sahrens  * Export a storage pool.
3369fa9e4066Sahrens  */
3370fa9e4066Sahrens int
3371394ab0cbSGeorge Wilson spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
3372394ab0cbSGeorge Wilson     boolean_t hardforce)
3373fa9e4066Sahrens {
3374394ab0cbSGeorge Wilson 	return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig,
3375394ab0cbSGeorge Wilson 	    force, hardforce));
3376fa9e4066Sahrens }
3377fa9e4066Sahrens 
3378ea8dc4b6Seschrock /*
3379ea8dc4b6Seschrock  * Similar to spa_export(), this unloads the spa_t without actually removing it
3380ea8dc4b6Seschrock  * from the namespace in any way.
3381ea8dc4b6Seschrock  */
3382ea8dc4b6Seschrock int
3383ea8dc4b6Seschrock spa_reset(char *pool)
3384ea8dc4b6Seschrock {
338589a89ebfSlling 	return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL,
3386394ab0cbSGeorge Wilson 	    B_FALSE, B_FALSE));
3387ea8dc4b6Seschrock }
3388ea8dc4b6Seschrock 
3389fa9e4066Sahrens /*
3390fa9e4066Sahrens  * ==========================================================================
3391fa9e4066Sahrens  * Device manipulation
3392fa9e4066Sahrens  * ==========================================================================
3393fa9e4066Sahrens  */
3394fa9e4066Sahrens 
3395fa9e4066Sahrens /*
33968654d025Sperrin  * Add a device to a storage pool.
3397fa9e4066Sahrens  */
3398fa9e4066Sahrens int
3399fa9e4066Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
3400fa9e4066Sahrens {
340188ecc943SGeorge Wilson 	uint64_t txg, id;
34028ad4d6ddSJeff Bonwick 	int error;
3403fa9e4066Sahrens 	vdev_t *rvd = spa->spa_root_vdev;
34040e34b6a7Sbonwick 	vdev_t *vd, *tvd;
3405fa94a07fSbrendan 	nvlist_t **spares, **l2cache;
3406fa94a07fSbrendan 	uint_t nspares, nl2cache;
3407fa9e4066Sahrens 
3408fa9e4066Sahrens 	txg = spa_vdev_enter(spa);
3409fa9e4066Sahrens 
341099653d4eSeschrock 	if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0,
341199653d4eSeschrock 	    VDEV_ALLOC_ADD)) != 0)
341299653d4eSeschrock 		return (spa_vdev_exit(spa, NULL, txg, error));
3413fa9e4066Sahrens 
3414e14bb325SJeff Bonwick 	spa->spa_pending_vdev = vd;	/* spa_vdev_exit() will clear this */
341599653d4eSeschrock 
3416fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
3417fa94a07fSbrendan 	    &nspares) != 0)
341899653d4eSeschrock 		nspares = 0;
341999653d4eSeschrock 
3420fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache,
3421fa94a07fSbrendan 	    &nl2cache) != 0)
3422fa94a07fSbrendan 		nl2cache = 0;
3423fa94a07fSbrendan 
3424e14bb325SJeff Bonwick 	if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0)
3425fa9e4066Sahrens 		return (spa_vdev_exit(spa, vd, txg, EINVAL));
3426fa9e4066Sahrens 
3427e14bb325SJeff Bonwick 	if (vd->vdev_children != 0 &&
3428e14bb325SJeff Bonwick 	    (error = vdev_create(vd, txg, B_FALSE)) != 0)
3429e14bb325SJeff Bonwick 		return (spa_vdev_exit(spa, vd, txg, error));
343099653d4eSeschrock 
343139c23413Seschrock 	/*
3432fa94a07fSbrendan 	 * We must validate the spares and l2cache devices after checking the
3433fa94a07fSbrendan 	 * children.  Otherwise, vdev_inuse() will blindly overwrite the spare.
343439c23413Seschrock 	 */
3435e14bb325SJeff Bonwick 	if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0)
343639c23413Seschrock 		return (spa_vdev_exit(spa, vd, txg, error));
343739c23413Seschrock 
343839c23413Seschrock 	/*
343939c23413Seschrock 	 * Transfer each new top-level vdev from vd to rvd.
344039c23413Seschrock 	 */
34418ad4d6ddSJeff Bonwick 	for (int c = 0; c < vd->vdev_children; c++) {
344288ecc943SGeorge Wilson 
344388ecc943SGeorge Wilson 		/*
344488ecc943SGeorge Wilson 		 * Set the vdev id to the first hole, if one exists.
344588ecc943SGeorge Wilson 		 */
344688ecc943SGeorge Wilson 		for (id = 0; id < rvd->vdev_children; id++) {
344788ecc943SGeorge Wilson 			if (rvd->vdev_child[id]->vdev_ishole) {
344888ecc943SGeorge Wilson 				vdev_free(rvd->vdev_child[id]);
344988ecc943SGeorge Wilson 				break;
345088ecc943SGeorge Wilson 			}
345188ecc943SGeorge Wilson 		}
345239c23413Seschrock 		tvd = vd->vdev_child[c];
345339c23413Seschrock 		vdev_remove_child(vd, tvd);
345488ecc943SGeorge Wilson 		tvd->vdev_id = id;
345539c23413Seschrock 		vdev_add_child(rvd, tvd);
345639c23413Seschrock 		vdev_config_dirty(tvd);
345739c23413Seschrock 	}
345839c23413Seschrock 
345999653d4eSeschrock 	if (nspares != 0) {
3460fa94a07fSbrendan 		spa_set_aux_vdevs(&spa->spa_spares, spares, nspares,
3461fa94a07fSbrendan 		    ZPOOL_CONFIG_SPARES);
346299653d4eSeschrock 		spa_load_spares(spa);
3463fa94a07fSbrendan 		spa->spa_spares.sav_sync = B_TRUE;
3464fa94a07fSbrendan 	}
3465fa94a07fSbrendan 
3466fa94a07fSbrendan 	if (nl2cache != 0) {
3467fa94a07fSbrendan 		spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache,
3468fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE);
3469fa94a07fSbrendan 		spa_load_l2cache(spa);
3470fa94a07fSbrendan 		spa->spa_l2cache.sav_sync = B_TRUE;
3471fa9e4066Sahrens 	}
3472fa9e4066Sahrens 
3473fa9e4066Sahrens 	/*
34740e34b6a7Sbonwick 	 * We have to be careful when adding new vdevs to an existing pool.
34750e34b6a7Sbonwick 	 * If other threads start allocating from these vdevs before we
34760e34b6a7Sbonwick 	 * sync the config cache, and we lose power, then upon reboot we may
34770e34b6a7Sbonwick 	 * fail to open the pool because there are DVAs that the config cache
34780e34b6a7Sbonwick 	 * can't translate.  Therefore, we first add the vdevs without
34790e34b6a7Sbonwick 	 * initializing metaslabs; sync the config cache (via spa_vdev_exit());
34800373e76bSbonwick 	 * and then let spa_config_update() initialize the new metaslabs.
34810e34b6a7Sbonwick 	 *
34820e34b6a7Sbonwick 	 * spa_load() checks for added-but-not-initialized vdevs, so that
34830e34b6a7Sbonwick 	 * if we lose power at any point in this sequence, the remaining
34840e34b6a7Sbonwick 	 * steps will be completed the next time we load the pool.
34850e34b6a7Sbonwick 	 */
34860373e76bSbonwick 	(void) spa_vdev_exit(spa, vd, txg, 0);
34870e34b6a7Sbonwick 
34880373e76bSbonwick 	mutex_enter(&spa_namespace_lock);
34890373e76bSbonwick 	spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
34900373e76bSbonwick 	mutex_exit(&spa_namespace_lock);
3491fa9e4066Sahrens 
34920373e76bSbonwick 	return (0);
3493fa9e4066Sahrens }
3494fa9e4066Sahrens 
3495fa9e4066Sahrens /*
3496fa9e4066Sahrens  * Attach a device to a mirror.  The arguments are the path to any device
3497fa9e4066Sahrens  * in the mirror, and the nvroot for the new device.  If the path specifies
3498fa9e4066Sahrens  * a device that is not mirrored, we automatically insert the mirror vdev.
3499fa9e4066Sahrens  *
3500fa9e4066Sahrens  * If 'replacing' is specified, the new device is intended to replace the
3501fa9e4066Sahrens  * existing device; in this case the two devices are made into their own
35023d7072f8Seschrock  * mirror using the 'replacing' vdev, which is functionally identical to
3503fa9e4066Sahrens  * the mirror vdev (it actually reuses all the same ops) but has a few
3504fa9e4066Sahrens  * extra rules: you can't attach to it after it's been created, and upon
3505fa9e4066Sahrens  * completion of resilvering, the first disk (the one being replaced)
3506fa9e4066Sahrens  * is automatically detached.
3507fa9e4066Sahrens  */
3508fa9e4066Sahrens int
3509ea8dc4b6Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
3510fa9e4066Sahrens {
3511fa9e4066Sahrens 	uint64_t txg, open_txg;
3512fa9e4066Sahrens 	vdev_t *rvd = spa->spa_root_vdev;
3513fa9e4066Sahrens 	vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
351499653d4eSeschrock 	vdev_ops_t *pvops;
35159b3f6b42SEric Kustarz 	char *oldvdpath, *newvdpath;
35169b3f6b42SEric Kustarz 	int newvd_isspare;
35179b3f6b42SEric Kustarz 	int error;
3518fa9e4066Sahrens 
3519fa9e4066Sahrens 	txg = spa_vdev_enter(spa);
3520fa9e4066Sahrens 
3521c5904d13Seschrock 	oldvd = spa_lookup_by_guid(spa, guid, B_FALSE);
3522fa9e4066Sahrens 
3523fa9e4066Sahrens 	if (oldvd == NULL)
3524fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
3525fa9e4066Sahrens 
35260e34b6a7Sbonwick 	if (!oldvd->vdev_ops->vdev_op_leaf)
35270e34b6a7Sbonwick 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
35280e34b6a7Sbonwick 
3529fa9e4066Sahrens 	pvd = oldvd->vdev_parent;
3530fa9e4066Sahrens 
353199653d4eSeschrock 	if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
35323d7072f8Seschrock 	    VDEV_ALLOC_ADD)) != 0)
35333d7072f8Seschrock 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
35343d7072f8Seschrock 
35353d7072f8Seschrock 	if (newrootvd->vdev_children != 1)
3536fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
3537fa9e4066Sahrens 
3538fa9e4066Sahrens 	newvd = newrootvd->vdev_child[0];
3539fa9e4066Sahrens 
3540fa9e4066Sahrens 	if (!newvd->vdev_ops->vdev_op_leaf)
3541fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
3542fa9e4066Sahrens 
354399653d4eSeschrock 	if ((error = vdev_create(newrootvd, txg, replacing)) != 0)
3544fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, error));
3545fa9e4066Sahrens 
35468654d025Sperrin 	/*
35478654d025Sperrin 	 * Spares can't replace logs
35488654d025Sperrin 	 */
3549ee0eb9f2SEric Schrock 	if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
35508654d025Sperrin 		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
35518654d025Sperrin 
355299653d4eSeschrock 	if (!replacing) {
355399653d4eSeschrock 		/*
355499653d4eSeschrock 		 * For attach, the only allowable parent is a mirror or the root
355599653d4eSeschrock 		 * vdev.
355699653d4eSeschrock 		 */
355799653d4eSeschrock 		if (pvd->vdev_ops != &vdev_mirror_ops &&
355899653d4eSeschrock 		    pvd->vdev_ops != &vdev_root_ops)
355999653d4eSeschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
356099653d4eSeschrock 
356199653d4eSeschrock 		pvops = &vdev_mirror_ops;
356299653d4eSeschrock 	} else {
356399653d4eSeschrock 		/*
356499653d4eSeschrock 		 * Active hot spares can only be replaced by inactive hot
356599653d4eSeschrock 		 * spares.
356699653d4eSeschrock 		 */
356799653d4eSeschrock 		if (pvd->vdev_ops == &vdev_spare_ops &&
356899653d4eSeschrock 		    pvd->vdev_child[1] == oldvd &&
356999653d4eSeschrock 		    !spa_has_spare(spa, newvd->vdev_guid))
357099653d4eSeschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
357199653d4eSeschrock 
357299653d4eSeschrock 		/*
357399653d4eSeschrock 		 * If the source is a hot spare, and the parent isn't already a
357499653d4eSeschrock 		 * spare, then we want to create a new hot spare.  Otherwise, we
357539c23413Seschrock 		 * want to create a replacing vdev.  The user is not allowed to
357639c23413Seschrock 		 * attach to a spared vdev child unless the 'isspare' state is
357739c23413Seschrock 		 * the same (spare replaces spare, non-spare replaces
357839c23413Seschrock 		 * non-spare).
357999653d4eSeschrock 		 */
358099653d4eSeschrock 		if (pvd->vdev_ops == &vdev_replacing_ops)
358199653d4eSeschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
358239c23413Seschrock 		else if (pvd->vdev_ops == &vdev_spare_ops &&
358339c23413Seschrock 		    newvd->vdev_isspare != oldvd->vdev_isspare)
358439c23413Seschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
358599653d4eSeschrock 		else if (pvd->vdev_ops != &vdev_spare_ops &&
358699653d4eSeschrock 		    newvd->vdev_isspare)
358799653d4eSeschrock 			pvops = &vdev_spare_ops;
358899653d4eSeschrock 		else
358999653d4eSeschrock 			pvops = &vdev_replacing_ops;
359099653d4eSeschrock 	}
359199653d4eSeschrock 
35922a79c5feSlling 	/*
3593573ca77eSGeorge Wilson 	 * Make sure the new device is big enough.
35942a79c5feSlling 	 */
3595573ca77eSGeorge Wilson 	if (newvd->vdev_asize < vdev_get_min_asize(oldvd))
3596fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
3597fa9e4066Sahrens 
3598ecc2d604Sbonwick 	/*
3599ecc2d604Sbonwick 	 * The new device cannot have a higher alignment requirement
3600ecc2d604Sbonwick 	 * than the top-level vdev.
3601ecc2d604Sbonwick 	 */
3602ecc2d604Sbonwick 	if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
3603fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
3604fa9e4066Sahrens 
3605fa9e4066Sahrens 	/*
3606fa9e4066Sahrens 	 * If this is an in-place replacement, update oldvd's path and devid
3607fa9e4066Sahrens 	 * to make it distinguishable from newvd, and unopenable from now on.
3608fa9e4066Sahrens 	 */
3609fa9e4066Sahrens 	if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) {
3610fa9e4066Sahrens 		spa_strfree(oldvd->vdev_path);
3611fa9e4066Sahrens 		oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5,
3612fa9e4066Sahrens 		    KM_SLEEP);
3613fa9e4066Sahrens 		(void) sprintf(oldvd->vdev_path, "%s/%s",
3614fa9e4066Sahrens 		    newvd->vdev_path, "old");
3615fa9e4066Sahrens 		if (oldvd->vdev_devid != NULL) {
3616fa9e4066Sahrens 			spa_strfree(oldvd->vdev_devid);
3617fa9e4066Sahrens 			oldvd->vdev_devid = NULL;
3618fa9e4066Sahrens 		}
3619fa9e4066Sahrens 	}
3620fa9e4066Sahrens 
3621fa9e4066Sahrens 	/*
362299653d4eSeschrock 	 * If the parent is not a mirror, or if we're replacing, insert the new
362399653d4eSeschrock 	 * mirror/replacing/spare vdev above oldvd.
3624fa9e4066Sahrens 	 */
3625fa9e4066Sahrens 	if (pvd->vdev_ops != pvops)
3626fa9e4066Sahrens 		pvd = vdev_add_parent(oldvd, pvops);
3627fa9e4066Sahrens 
3628fa9e4066Sahrens 	ASSERT(pvd->vdev_top->vdev_parent == rvd);
3629fa9e4066Sahrens 	ASSERT(pvd->vdev_ops == pvops);
3630fa9e4066Sahrens 	ASSERT(oldvd->vdev_parent == pvd);
3631fa9e4066Sahrens 
3632fa9e4066Sahrens 	/*
3633fa9e4066Sahrens 	 * Extract the new device from its root and add it to pvd.
3634fa9e4066Sahrens 	 */
3635fa9e4066Sahrens 	vdev_remove_child(newrootvd, newvd);
3636fa9e4066Sahrens 	newvd->vdev_id = pvd->vdev_children;
363788ecc943SGeorge Wilson 	newvd->vdev_crtxg = oldvd->vdev_crtxg;
3638fa9e4066Sahrens 	vdev_add_child(pvd, newvd);
3639fa9e4066Sahrens 
3640fa9e4066Sahrens 	tvd = newvd->vdev_top;
3641fa9e4066Sahrens 	ASSERT(pvd->vdev_top == tvd);
3642fa9e4066Sahrens 	ASSERT(tvd->vdev_parent == rvd);
3643fa9e4066Sahrens 
3644fa9e4066Sahrens 	vdev_config_dirty(tvd);
3645fa9e4066Sahrens 
3646fa9e4066Sahrens 	/*
3647fa9e4066Sahrens 	 * Set newvd's DTL to [TXG_INITIAL, open_txg].  It will propagate
3648fa9e4066Sahrens 	 * upward when spa_vdev_exit() calls vdev_dtl_reassess().
3649fa9e4066Sahrens 	 */
3650fa9e4066Sahrens 	open_txg = txg + TXG_CONCURRENT_STATES - 1;
3651fa9e4066Sahrens 
36528ad4d6ddSJeff Bonwick 	vdev_dtl_dirty(newvd, DTL_MISSING,
36538ad4d6ddSJeff Bonwick 	    TXG_INITIAL, open_txg - TXG_INITIAL + 1);
3654fa9e4066Sahrens 
36556809eb4eSEric Schrock 	if (newvd->vdev_isspare) {
365639c23413Seschrock 		spa_spare_activate(newvd);
36576809eb4eSEric Schrock 		spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE);
36586809eb4eSEric Schrock 	}
36596809eb4eSEric Schrock 
3660e14bb325SJeff Bonwick 	oldvdpath = spa_strdup(oldvd->vdev_path);
3661e14bb325SJeff Bonwick 	newvdpath = spa_strdup(newvd->vdev_path);
36629b3f6b42SEric Kustarz 	newvd_isspare = newvd->vdev_isspare;
3663ea8dc4b6Seschrock 
3664fa9e4066Sahrens 	/*
3665fa9e4066Sahrens 	 * Mark newvd's DTL dirty in this txg.
3666fa9e4066Sahrens 	 */
3667ecc2d604Sbonwick 	vdev_dirty(tvd, VDD_DTL, newvd, txg);
3668fa9e4066Sahrens 
3669fa9e4066Sahrens 	(void) spa_vdev_exit(spa, newrootvd, open_txg, 0);
3670fa9e4066Sahrens 
3671c8e1f6d2SMark J Musante 	spa_history_internal_log(LOG_POOL_VDEV_ATTACH, spa, NULL,
3672c8e1f6d2SMark J Musante 	    CRED(),  "%s vdev=%s %s vdev=%s",
3673c8e1f6d2SMark J Musante 	    replacing && newvd_isspare ? "spare in" :
3674c8e1f6d2SMark J Musante 	    replacing ? "replace" : "attach", newvdpath,
3675c8e1f6d2SMark J Musante 	    replacing ? "for" : "to", oldvdpath);
36769b3f6b42SEric Kustarz 
36779b3f6b42SEric Kustarz 	spa_strfree(oldvdpath);
36789b3f6b42SEric Kustarz 	spa_strfree(newvdpath);
36799b3f6b42SEric Kustarz 
3680fa9e4066Sahrens 	/*
3681088f3894Sahrens 	 * Kick off a resilver to update newvd.
3682fa9e4066Sahrens 	 */
3683088f3894Sahrens 	VERIFY3U(spa_scrub(spa, POOL_SCRUB_RESILVER), ==, 0);
3684fa9e4066Sahrens 
3685fa9e4066Sahrens 	return (0);
3686fa9e4066Sahrens }
3687fa9e4066Sahrens 
3688fa9e4066Sahrens /*
3689fa9e4066Sahrens  * Detach a device from a mirror or replacing vdev.
3690fa9e4066Sahrens  * If 'replace_done' is specified, only detach if the parent
3691fa9e4066Sahrens  * is a replacing vdev.
3692fa9e4066Sahrens  */
3693fa9e4066Sahrens int
36948ad4d6ddSJeff Bonwick spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
3695fa9e4066Sahrens {
3696fa9e4066Sahrens 	uint64_t txg;
36978ad4d6ddSJeff Bonwick 	int error;
3698fa9e4066Sahrens 	vdev_t *rvd = spa->spa_root_vdev;
3699fa9e4066Sahrens 	vdev_t *vd, *pvd, *cvd, *tvd;
370099653d4eSeschrock 	boolean_t unspare = B_FALSE;
370199653d4eSeschrock 	uint64_t unspare_guid;
3702bf82a41bSeschrock 	size_t len;
3703fa9e4066Sahrens 
3704fa9e4066Sahrens 	txg = spa_vdev_enter(spa);
3705fa9e4066Sahrens 
3706c5904d13Seschrock 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
3707fa9e4066Sahrens 
3708fa9e4066Sahrens 	if (vd == NULL)
3709fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
3710fa9e4066Sahrens 
37110e34b6a7Sbonwick 	if (!vd->vdev_ops->vdev_op_leaf)
37120e34b6a7Sbonwick 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
37130e34b6a7Sbonwick 
3714fa9e4066Sahrens 	pvd = vd->vdev_parent;
3715fa9e4066Sahrens 
37168ad4d6ddSJeff Bonwick 	/*
37178ad4d6ddSJeff Bonwick 	 * If the parent/child relationship is not as expected, don't do it.
37188ad4d6ddSJeff Bonwick 	 * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing
37198ad4d6ddSJeff Bonwick 	 * vdev that's replacing B with C.  The user's intent in replacing
37208ad4d6ddSJeff Bonwick 	 * is to go from M(A,B) to M(A,C).  If the user decides to cancel
37218ad4d6ddSJeff Bonwick 	 * the replace by detaching C, the expected behavior is to end up
37228ad4d6ddSJeff Bonwick 	 * M(A,B).  But suppose that right after deciding to detach C,
37238ad4d6ddSJeff Bonwick 	 * the replacement of B completes.  We would have M(A,C), and then
37248ad4d6ddSJeff Bonwick 	 * ask to detach C, which would leave us with just A -- not what
37258ad4d6ddSJeff Bonwick 	 * the user wanted.  To prevent this, we make sure that the
37268ad4d6ddSJeff Bonwick 	 * parent/child relationship hasn't changed -- in this example,
37278ad4d6ddSJeff Bonwick 	 * that C's parent is still the replacing vdev R.
37288ad4d6ddSJeff Bonwick 	 */
37298ad4d6ddSJeff Bonwick 	if (pvd->vdev_guid != pguid && pguid != 0)
37308ad4d6ddSJeff Bonwick 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
37318ad4d6ddSJeff Bonwick 
3732fa9e4066Sahrens 	/*
3733fa9e4066Sahrens 	 * If replace_done is specified, only remove this device if it's
373499653d4eSeschrock 	 * the first child of a replacing vdev.  For the 'spare' vdev, either
373599653d4eSeschrock 	 * disk can be removed.
373699653d4eSeschrock 	 */
373799653d4eSeschrock 	if (replace_done) {
373899653d4eSeschrock 		if (pvd->vdev_ops == &vdev_replacing_ops) {
373999653d4eSeschrock 			if (vd->vdev_id != 0)
374099653d4eSeschrock 				return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
374199653d4eSeschrock 		} else if (pvd->vdev_ops != &vdev_spare_ops) {
374299653d4eSeschrock 			return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
374399653d4eSeschrock 		}
374499653d4eSeschrock 	}
374599653d4eSeschrock 
374699653d4eSeschrock 	ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
3747e7437265Sahrens 	    spa_version(spa) >= SPA_VERSION_SPARES);
3748fa9e4066Sahrens 
3749fa9e4066Sahrens 	/*
375099653d4eSeschrock 	 * Only mirror, replacing, and spare vdevs support detach.
3751fa9e4066Sahrens 	 */
3752fa9e4066Sahrens 	if (pvd->vdev_ops != &vdev_replacing_ops &&
375399653d4eSeschrock 	    pvd->vdev_ops != &vdev_mirror_ops &&
375499653d4eSeschrock 	    pvd->vdev_ops != &vdev_spare_ops)
3755fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
3756fa9e4066Sahrens 
3757fa9e4066Sahrens 	/*
37588ad4d6ddSJeff Bonwick 	 * If this device has the only valid copy of some data,
37598ad4d6ddSJeff Bonwick 	 * we cannot safely detach it.
3760fa9e4066Sahrens 	 */
37618ad4d6ddSJeff Bonwick 	if (vdev_dtl_required(vd))
3762fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
3763fa9e4066Sahrens 
37648ad4d6ddSJeff Bonwick 	ASSERT(pvd->vdev_children >= 2);
3765fa9e4066Sahrens 
3766bf82a41bSeschrock 	/*
3767bf82a41bSeschrock 	 * If we are detaching the second disk from a replacing vdev, then
3768bf82a41bSeschrock 	 * check to see if we changed the original vdev's path to have "/old"
3769bf82a41bSeschrock 	 * at the end in spa_vdev_attach().  If so, undo that change now.
3770bf82a41bSeschrock 	 */
3771bf82a41bSeschrock 	if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id == 1 &&
3772bf82a41bSeschrock 	    pvd->vdev_child[0]->vdev_path != NULL &&
3773bf82a41bSeschrock 	    pvd->vdev_child[1]->vdev_path != NULL) {
3774bf82a41bSeschrock 		ASSERT(pvd->vdev_child[1] == vd);
3775bf82a41bSeschrock 		cvd = pvd->vdev_child[0];
3776bf82a41bSeschrock 		len = strlen(vd->vdev_path);
3777bf82a41bSeschrock 		if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 &&
3778bf82a41bSeschrock 		    strcmp(cvd->vdev_path + len, "/old") == 0) {
3779bf82a41bSeschrock 			spa_strfree(cvd->vdev_path);
3780bf82a41bSeschrock 			cvd->vdev_path = spa_strdup(vd->vdev_path);
3781bf82a41bSeschrock 		}
3782bf82a41bSeschrock 	}
3783bf82a41bSeschrock 
378499653d4eSeschrock 	/*
378599653d4eSeschrock 	 * If we are detaching the original disk from a spare, then it implies
378699653d4eSeschrock 	 * that the spare should become a real disk, and be removed from the
378799653d4eSeschrock 	 * active spare list for the pool.
378899653d4eSeschrock 	 */
378999653d4eSeschrock 	if (pvd->vdev_ops == &vdev_spare_ops &&
37908ad4d6ddSJeff Bonwick 	    vd->vdev_id == 0 && pvd->vdev_child[1]->vdev_isspare)
379199653d4eSeschrock 		unspare = B_TRUE;
379299653d4eSeschrock 
3793fa9e4066Sahrens 	/*
3794fa9e4066Sahrens 	 * Erase the disk labels so the disk can be used for other things.
3795fa9e4066Sahrens 	 * This must be done after all other error cases are handled,
3796fa9e4066Sahrens 	 * but before we disembowel vd (so we can still do I/O to it).
3797fa9e4066Sahrens 	 * But if we can't do it, don't treat the error as fatal --
3798fa9e4066Sahrens 	 * it may be that the unwritability of the disk is the reason
3799fa9e4066Sahrens 	 * it's being detached!
3800fa9e4066Sahrens 	 */
380139c23413Seschrock 	error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
3802fa9e4066Sahrens 
3803fa9e4066Sahrens 	/*
3804fa9e4066Sahrens 	 * Remove vd from its parent and compact the parent's children.
3805fa9e4066Sahrens 	 */
3806fa9e4066Sahrens 	vdev_remove_child(pvd, vd);
3807fa9e4066Sahrens 	vdev_compact_children(pvd);
3808fa9e4066Sahrens 
3809fa9e4066Sahrens 	/*
3810fa9e4066Sahrens 	 * Remember one of the remaining children so we can get tvd below.
3811fa9e4066Sahrens 	 */
3812fa9e4066Sahrens 	cvd = pvd->vdev_child[0];
3813fa9e4066Sahrens 
381499653d4eSeschrock 	/*
381599653d4eSeschrock 	 * If we need to remove the remaining child from the list of hot spares,
38168ad4d6ddSJeff Bonwick 	 * do it now, marking the vdev as no longer a spare in the process.
38178ad4d6ddSJeff Bonwick 	 * We must do this before vdev_remove_parent(), because that can
38188ad4d6ddSJeff Bonwick 	 * change the GUID if it creates a new toplevel GUID.  For a similar
38198ad4d6ddSJeff Bonwick 	 * reason, we must remove the spare now, in the same txg as the detach;
38208ad4d6ddSJeff Bonwick 	 * otherwise someone could attach a new sibling, change the GUID, and
38218ad4d6ddSJeff Bonwick 	 * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail.
382299653d4eSeschrock 	 */
382399653d4eSeschrock 	if (unspare) {
382499653d4eSeschrock 		ASSERT(cvd->vdev_isspare);
382539c23413Seschrock 		spa_spare_remove(cvd);
382699653d4eSeschrock 		unspare_guid = cvd->vdev_guid;
38278ad4d6ddSJeff Bonwick 		(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
382899653d4eSeschrock 	}
382999653d4eSeschrock 
3830fa9e4066Sahrens 	/*
3831fa9e4066Sahrens 	 * If the parent mirror/replacing vdev only has one child,
3832fa9e4066Sahrens 	 * the parent is no longer needed.  Remove it from the tree.
3833fa9e4066Sahrens 	 */
3834fa9e4066Sahrens 	if (pvd->vdev_children == 1)
3835fa9e4066Sahrens 		vdev_remove_parent(cvd);
3836fa9e4066Sahrens 
3837fa9e4066Sahrens 	/*
3838fa9e4066Sahrens 	 * We don't set tvd until now because the parent we just removed
3839fa9e4066Sahrens 	 * may have been the previous top-level vdev.
3840fa9e4066Sahrens 	 */
3841fa9e4066Sahrens 	tvd = cvd->vdev_top;
3842fa9e4066Sahrens 	ASSERT(tvd->vdev_parent == rvd);
3843fa9e4066Sahrens 
3844fa9e4066Sahrens 	/*
384539c23413Seschrock 	 * Reevaluate the parent vdev state.
3846fa9e4066Sahrens 	 */
38473d7072f8Seschrock 	vdev_propagate_state(cvd);
3848fa9e4066Sahrens 
3849fa9e4066Sahrens 	/*
3850573ca77eSGeorge Wilson 	 * If the 'autoexpand' property is set on the pool then automatically
3851573ca77eSGeorge Wilson 	 * try to expand the size of the pool. For example if the device we
3852573ca77eSGeorge Wilson 	 * just detached was smaller than the others, it may be possible to
3853573ca77eSGeorge Wilson 	 * add metaslabs (i.e. grow the pool). We need to reopen the vdev
3854573ca77eSGeorge Wilson 	 * first so that we can obtain the updated sizes of the leaf vdevs.
3855fa9e4066Sahrens 	 */
3856573ca77eSGeorge Wilson 	if (spa->spa_autoexpand) {
3857573ca77eSGeorge Wilson 		vdev_reopen(tvd);
3858573ca77eSGeorge Wilson 		vdev_expand(tvd, txg);
3859573ca77eSGeorge Wilson 	}
3860fa9e4066Sahrens 
3861fa9e4066Sahrens 	vdev_config_dirty(tvd);
3862fa9e4066Sahrens 
3863fa9e4066Sahrens 	/*
386439c23413Seschrock 	 * Mark vd's DTL as dirty in this txg.  vdev_dtl_sync() will see that
386539c23413Seschrock 	 * vd->vdev_detached is set and free vd's DTL object in syncing context.
386639c23413Seschrock 	 * But first make sure we're not on any *other* txg's DTL list, to
386739c23413Seschrock 	 * prevent vd from being accessed after it's freed.
3868fa9e4066Sahrens 	 */
38698ad4d6ddSJeff Bonwick 	for (int t = 0; t < TXG_SIZE; t++)
3870fa9e4066Sahrens 		(void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
3871ecc2d604Sbonwick 	vd->vdev_detached = B_TRUE;
3872ecc2d604Sbonwick 	vdev_dirty(tvd, VDD_DTL, vd, txg);
3873fa9e4066Sahrens 
38743d7072f8Seschrock 	spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
38753d7072f8Seschrock 
387699653d4eSeschrock 	error = spa_vdev_exit(spa, vd, txg, 0);
387799653d4eSeschrock 
387899653d4eSeschrock 	/*
387939c23413Seschrock 	 * If this was the removal of the original device in a hot spare vdev,
388039c23413Seschrock 	 * then we want to go through and remove the device from the hot spare
388139c23413Seschrock 	 * list of every other pool.
388299653d4eSeschrock 	 */
388399653d4eSeschrock 	if (unspare) {
38848ad4d6ddSJeff Bonwick 		spa_t *myspa = spa;
388599653d4eSeschrock 		spa = NULL;
388699653d4eSeschrock 		mutex_enter(&spa_namespace_lock);
388799653d4eSeschrock 		while ((spa = spa_next(spa)) != NULL) {
388899653d4eSeschrock 			if (spa->spa_state != POOL_STATE_ACTIVE)
388999653d4eSeschrock 				continue;
38908ad4d6ddSJeff Bonwick 			if (spa == myspa)
38918ad4d6ddSJeff Bonwick 				continue;
38929af0a4dfSJeff Bonwick 			spa_open_ref(spa, FTAG);
38939af0a4dfSJeff Bonwick 			mutex_exit(&spa_namespace_lock);
389499653d4eSeschrock 			(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
38959af0a4dfSJeff Bonwick 			mutex_enter(&spa_namespace_lock);
38969af0a4dfSJeff Bonwick 			spa_close(spa, FTAG);
389799653d4eSeschrock 		}
389899653d4eSeschrock 		mutex_exit(&spa_namespace_lock);
389999653d4eSeschrock 	}
390099653d4eSeschrock 
390199653d4eSeschrock 	return (error);
390299653d4eSeschrock }
390399653d4eSeschrock 
3904e14bb325SJeff Bonwick static nvlist_t *
3905e14bb325SJeff Bonwick spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid)
390699653d4eSeschrock {
3907e14bb325SJeff Bonwick 	for (int i = 0; i < count; i++) {
3908e14bb325SJeff Bonwick 		uint64_t guid;
390999653d4eSeschrock 
3910e14bb325SJeff Bonwick 		VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID,
3911e14bb325SJeff Bonwick 		    &guid) == 0);
391299653d4eSeschrock 
3913e14bb325SJeff Bonwick 		if (guid == target_guid)
3914e14bb325SJeff Bonwick 			return (nvpp[i]);
391599653d4eSeschrock 	}
391699653d4eSeschrock 
3917e14bb325SJeff Bonwick 	return (NULL);
3918fa94a07fSbrendan }
3919fa94a07fSbrendan 
3920e14bb325SJeff Bonwick static void
3921e14bb325SJeff Bonwick spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count,
3922e14bb325SJeff Bonwick 	nvlist_t *dev_to_remove)
3923fa94a07fSbrendan {
3924e14bb325SJeff Bonwick 	nvlist_t **newdev = NULL;
3925fa94a07fSbrendan 
3926e14bb325SJeff Bonwick 	if (count > 1)
3927e14bb325SJeff Bonwick 		newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP);
3928fa94a07fSbrendan 
3929e14bb325SJeff Bonwick 	for (int i = 0, j = 0; i < count; i++) {
3930e14bb325SJeff Bonwick 		if (dev[i] == dev_to_remove)
3931e14bb325SJeff Bonwick 			continue;
3932e14bb325SJeff Bonwick 		VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0);
3933fa94a07fSbrendan 	}
3934fa94a07fSbrendan 
3935e14bb325SJeff Bonwick 	VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0);
3936e14bb325SJeff Bonwick 	VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0);
3937fa94a07fSbrendan 
3938e14bb325SJeff Bonwick 	for (int i = 0; i < count - 1; i++)
3939e14bb325SJeff Bonwick 		nvlist_free(newdev[i]);
3940fa94a07fSbrendan 
3941e14bb325SJeff Bonwick 	if (count > 1)
3942e14bb325SJeff Bonwick 		kmem_free(newdev, (count - 1) * sizeof (void *));
3943fa94a07fSbrendan }
3944fa94a07fSbrendan 
394588ecc943SGeorge Wilson /*
394688ecc943SGeorge Wilson  * Removing a device from the vdev namespace requires several steps
394788ecc943SGeorge Wilson  * and can take a significant amount of time.  As a result we use
394888ecc943SGeorge Wilson  * the spa_vdev_config_[enter/exit] functions which allow us to
394988ecc943SGeorge Wilson  * grab and release the spa_config_lock while still holding the namespace
395088ecc943SGeorge Wilson  * lock.  During each step the configuration is synced out.
395188ecc943SGeorge Wilson  */
395288ecc943SGeorge Wilson 
395388ecc943SGeorge Wilson /*
395488ecc943SGeorge Wilson  * Evacuate the device.
395588ecc943SGeorge Wilson  */
395688ecc943SGeorge Wilson int
395788ecc943SGeorge Wilson spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd)
395888ecc943SGeorge Wilson {
3959a1521560SJeff Bonwick 	int error = 0;
396088ecc943SGeorge Wilson 	uint64_t txg;
396188ecc943SGeorge Wilson 
396288ecc943SGeorge Wilson 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
396388ecc943SGeorge Wilson 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
3964b24ab676SJeff Bonwick 	ASSERT(vd == vd->vdev_top);
396588ecc943SGeorge Wilson 
396688ecc943SGeorge Wilson 	/*
396788ecc943SGeorge Wilson 	 * Evacuate the device.  We don't hold the config lock as writer
396888ecc943SGeorge Wilson 	 * since we need to do I/O but we do keep the
396988ecc943SGeorge Wilson 	 * spa_namespace_lock held.  Once this completes the device
397088ecc943SGeorge Wilson 	 * should no longer have any blocks allocated on it.
397188ecc943SGeorge Wilson 	 */
397288ecc943SGeorge Wilson 	if (vd->vdev_islog) {
3973a1521560SJeff Bonwick 		error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
3974a1521560SJeff Bonwick 		    NULL, DS_FIND_CHILDREN);
3975a1521560SJeff Bonwick 	} else {
3976a1521560SJeff Bonwick 		error = ENOTSUP;	/* until we have bp rewrite */
397788ecc943SGeorge Wilson 	}
397888ecc943SGeorge Wilson 
3979a1521560SJeff Bonwick 	txg_wait_synced(spa_get_dsl(spa), 0);
3980a1521560SJeff Bonwick 
3981a1521560SJeff Bonwick 	if (error)
3982a1521560SJeff Bonwick 		return (error);
3983a1521560SJeff Bonwick 
398488ecc943SGeorge Wilson 	/*
3985a1521560SJeff Bonwick 	 * The evacuation succeeded.  Remove any remaining MOS metadata
3986a1521560SJeff Bonwick 	 * associated with this vdev, and wait for these changes to sync.
398788ecc943SGeorge Wilson 	 */
398888ecc943SGeorge Wilson 	txg = spa_vdev_config_enter(spa);
398988ecc943SGeorge Wilson 	vd->vdev_removing = B_TRUE;
399088ecc943SGeorge Wilson 	vdev_dirty(vd, 0, NULL, txg);
399188ecc943SGeorge Wilson 	vdev_config_dirty(vd);
399288ecc943SGeorge Wilson 	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
399388ecc943SGeorge Wilson 
399488ecc943SGeorge Wilson 	return (0);
399588ecc943SGeorge Wilson }
399688ecc943SGeorge Wilson 
399788ecc943SGeorge Wilson /*
399888ecc943SGeorge Wilson  * Complete the removal by cleaning up the namespace.
399988ecc943SGeorge Wilson  */
400088ecc943SGeorge Wilson void
4001a1521560SJeff Bonwick spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd)
400288ecc943SGeorge Wilson {
400388ecc943SGeorge Wilson 	vdev_t *rvd = spa->spa_root_vdev;
400488ecc943SGeorge Wilson 	uint64_t id = vd->vdev_id;
400588ecc943SGeorge Wilson 	boolean_t last_vdev = (id == (rvd->vdev_children - 1));
400688ecc943SGeorge Wilson 
400788ecc943SGeorge Wilson 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
400888ecc943SGeorge Wilson 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
4009b24ab676SJeff Bonwick 	ASSERT(vd == vd->vdev_top);
401088ecc943SGeorge Wilson 
401188ecc943SGeorge Wilson 	(void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
4012b24ab676SJeff Bonwick 
4013b24ab676SJeff Bonwick 	if (list_link_active(&vd->vdev_state_dirty_node))
4014b24ab676SJeff Bonwick 		vdev_state_clean(vd);
4015b24ab676SJeff Bonwick 	if (list_link_active(&vd->vdev_config_dirty_node))
4016b24ab676SJeff Bonwick 		vdev_config_clean(vd);
4017b24ab676SJeff Bonwick 
401888ecc943SGeorge Wilson 	vdev_free(vd);
401988ecc943SGeorge Wilson 
402088ecc943SGeorge Wilson 	if (last_vdev) {
402188ecc943SGeorge Wilson 		vdev_compact_children(rvd);
402288ecc943SGeorge Wilson 	} else {
402388ecc943SGeorge Wilson 		vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops);
402488ecc943SGeorge Wilson 		vdev_add_child(rvd, vd);
402588ecc943SGeorge Wilson 	}
402688ecc943SGeorge Wilson 	vdev_config_dirty(rvd);
402788ecc943SGeorge Wilson 
402888ecc943SGeorge Wilson 	/*
402988ecc943SGeorge Wilson 	 * Reassess the health of our root vdev.
403088ecc943SGeorge Wilson 	 */
403188ecc943SGeorge Wilson 	vdev_reopen(rvd);
403288ecc943SGeorge Wilson }
403388ecc943SGeorge Wilson 
4034fa94a07fSbrendan /*
4035fa94a07fSbrendan  * Remove a device from the pool.  Currently, this supports removing only hot
403688ecc943SGeorge Wilson  * spares, slogs, and level 2 ARC devices.
4037fa94a07fSbrendan  */
4038fa94a07fSbrendan int
4039fa94a07fSbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
4040fa94a07fSbrendan {
4041fa94a07fSbrendan 	vdev_t *vd;
4042a1521560SJeff Bonwick 	metaslab_group_t *mg;
4043e14bb325SJeff Bonwick 	nvlist_t **spares, **l2cache, *nv;
40448ad4d6ddSJeff Bonwick 	uint64_t txg = 0;
404588ecc943SGeorge Wilson 	uint_t nspares, nl2cache;
4046fa94a07fSbrendan 	int error = 0;
40478ad4d6ddSJeff Bonwick 	boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
4048fa94a07fSbrendan 
40498ad4d6ddSJeff Bonwick 	if (!locked)
40508ad4d6ddSJeff Bonwick 		txg = spa_vdev_enter(spa);
4051fa94a07fSbrendan 
4052c5904d13Seschrock 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
4053fa94a07fSbrendan 
4054fa94a07fSbrendan 	if (spa->spa_spares.sav_vdevs != NULL &&
4055fa94a07fSbrendan 	    nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
4056e14bb325SJeff Bonwick 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 &&
4057e14bb325SJeff Bonwick 	    (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) {
4058e14bb325SJeff Bonwick 		/*
4059e14bb325SJeff Bonwick 		 * Only remove the hot spare if it's not currently in use
4060e14bb325SJeff Bonwick 		 * in this pool.
4061e14bb325SJeff Bonwick 		 */
4062e14bb325SJeff Bonwick 		if (vd == NULL || unspare) {
4063e14bb325SJeff Bonwick 			spa_vdev_remove_aux(spa->spa_spares.sav_config,
4064e14bb325SJeff Bonwick 			    ZPOOL_CONFIG_SPARES, spares, nspares, nv);
4065e14bb325SJeff Bonwick 			spa_load_spares(spa);
4066e14bb325SJeff Bonwick 			spa->spa_spares.sav_sync = B_TRUE;
4067e14bb325SJeff Bonwick 		} else {
4068e14bb325SJeff Bonwick 			error = EBUSY;
4069e14bb325SJeff Bonwick 		}
4070e14bb325SJeff Bonwick 	} else if (spa->spa_l2cache.sav_vdevs != NULL &&
4071fa94a07fSbrendan 	    nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
4072e14bb325SJeff Bonwick 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 &&
4073e14bb325SJeff Bonwick 	    (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) {
4074e14bb325SJeff Bonwick 		/*
4075e14bb325SJeff Bonwick 		 * Cache devices can always be removed.
4076e14bb325SJeff Bonwick 		 */
4077e14bb325SJeff Bonwick 		spa_vdev_remove_aux(spa->spa_l2cache.sav_config,
4078e14bb325SJeff Bonwick 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv);
4079fa94a07fSbrendan 		spa_load_l2cache(spa);
4080fa94a07fSbrendan 		spa->spa_l2cache.sav_sync = B_TRUE;
408188ecc943SGeorge Wilson 	} else if (vd != NULL && vd->vdev_islog) {
408288ecc943SGeorge Wilson 		ASSERT(!locked);
4083b24ab676SJeff Bonwick 		ASSERT(vd == vd->vdev_top);
408488ecc943SGeorge Wilson 
408588ecc943SGeorge Wilson 		/*
408688ecc943SGeorge Wilson 		 * XXX - Once we have bp-rewrite this should
408788ecc943SGeorge Wilson 		 * become the common case.
408888ecc943SGeorge Wilson 		 */
408988ecc943SGeorge Wilson 
4090a1521560SJeff Bonwick 		mg = vd->vdev_mg;
4091a1521560SJeff Bonwick 
409288ecc943SGeorge Wilson 		/*
4093a1521560SJeff Bonwick 		 * Stop allocating from this vdev.
409488ecc943SGeorge Wilson 		 */
4095a1521560SJeff Bonwick 		metaslab_group_passivate(mg);
409688ecc943SGeorge Wilson 
4097b24ab676SJeff Bonwick 		/*
4098b24ab676SJeff Bonwick 		 * Wait for the youngest allocations and frees to sync,
4099b24ab676SJeff Bonwick 		 * and then wait for the deferral of those frees to finish.
4100b24ab676SJeff Bonwick 		 */
4101b24ab676SJeff Bonwick 		spa_vdev_config_exit(spa, NULL,
4102b24ab676SJeff Bonwick 		    txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
4103b24ab676SJeff Bonwick 
4104a1521560SJeff Bonwick 		/*
4105a1521560SJeff Bonwick 		 * Attempt to evacuate the vdev.
4106a1521560SJeff Bonwick 		 */
4107a1521560SJeff Bonwick 		error = spa_vdev_remove_evacuate(spa, vd);
4108a1521560SJeff Bonwick 
410988ecc943SGeorge Wilson 		txg = spa_vdev_config_enter(spa);
411088ecc943SGeorge Wilson 
4111a1521560SJeff Bonwick 		/*
4112a1521560SJeff Bonwick 		 * If we couldn't evacuate the vdev, unwind.
4113a1521560SJeff Bonwick 		 */
4114a1521560SJeff Bonwick 		if (error) {
4115a1521560SJeff Bonwick 			metaslab_group_activate(mg);
4116a1521560SJeff Bonwick 			return (spa_vdev_exit(spa, NULL, txg, error));
4117a1521560SJeff Bonwick 		}
4118a1521560SJeff Bonwick 
4119a1521560SJeff Bonwick 		/*
4120a1521560SJeff Bonwick 		 * Clean up the vdev namespace.
4121a1521560SJeff Bonwick 		 */
4122a1521560SJeff Bonwick 		spa_vdev_remove_from_namespace(spa, vd);
412388ecc943SGeorge Wilson 
4124e14bb325SJeff Bonwick 	} else if (vd != NULL) {
4125e14bb325SJeff Bonwick 		/*
4126e14bb325SJeff Bonwick 		 * Normal vdevs cannot be removed (yet).
4127e14bb325SJeff Bonwick 		 */
4128e14bb325SJeff Bonwick 		error = ENOTSUP;
4129e14bb325SJeff Bonwick 	} else {
4130e14bb325SJeff Bonwick 		/*
4131e14bb325SJeff Bonwick 		 * There is no vdev of any kind with the specified guid.
4132e14bb325SJeff Bonwick 		 */
4133e14bb325SJeff Bonwick 		error = ENOENT;
4134fa94a07fSbrendan 	}
413599653d4eSeschrock 
41368ad4d6ddSJeff Bonwick 	if (!locked)
41378ad4d6ddSJeff Bonwick 		return (spa_vdev_exit(spa, NULL, txg, error));
41388ad4d6ddSJeff Bonwick 
41398ad4d6ddSJeff Bonwick 	return (error);
4140fa9e4066Sahrens }
4141fa9e4066Sahrens 
4142fa9e4066Sahrens /*
41433d7072f8Seschrock  * Find any device that's done replacing, or a vdev marked 'unspare' that's
41443d7072f8Seschrock  * current spared, so we can detach it.
4145fa9e4066Sahrens  */
4146ea8dc4b6Seschrock static vdev_t *
41473d7072f8Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd)
4148fa9e4066Sahrens {
4149ea8dc4b6Seschrock 	vdev_t *newvd, *oldvd;
4150fa9e4066Sahrens 
4151573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++) {
41523d7072f8Seschrock 		oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]);
4153ea8dc4b6Seschrock 		if (oldvd != NULL)
4154ea8dc4b6Seschrock 			return (oldvd);
4155ea8dc4b6Seschrock 	}
4156fa9e4066Sahrens 
41573d7072f8Seschrock 	/*
41583d7072f8Seschrock 	 * Check for a completed replacement.
41593d7072f8Seschrock 	 */
4160fa9e4066Sahrens 	if (vd->vdev_ops == &vdev_replacing_ops && vd->vdev_children == 2) {
4161ea8dc4b6Seschrock 		oldvd = vd->vdev_child[0];
4162ea8dc4b6Seschrock 		newvd = vd->vdev_child[1];
4163ea8dc4b6Seschrock 
41648ad4d6ddSJeff Bonwick 		if (vdev_dtl_empty(newvd, DTL_MISSING) &&
41658ad4d6ddSJeff Bonwick 		    !vdev_dtl_required(oldvd))
4166ea8dc4b6Seschrock 			return (oldvd);
4167fa9e4066Sahrens 	}
4168ea8dc4b6Seschrock 
41693d7072f8Seschrock 	/*
41703d7072f8Seschrock 	 * Check for a completed resilver with the 'unspare' flag set.
41713d7072f8Seschrock 	 */
41723d7072f8Seschrock 	if (vd->vdev_ops == &vdev_spare_ops && vd->vdev_children == 2) {
41733d7072f8Seschrock 		newvd = vd->vdev_child[0];
41743d7072f8Seschrock 		oldvd = vd->vdev_child[1];
41753d7072f8Seschrock 
41763d7072f8Seschrock 		if (newvd->vdev_unspare &&
41778ad4d6ddSJeff Bonwick 		    vdev_dtl_empty(newvd, DTL_MISSING) &&
41788ad4d6ddSJeff Bonwick 		    !vdev_dtl_required(oldvd)) {
41793d7072f8Seschrock 			newvd->vdev_unspare = 0;
41803d7072f8Seschrock 			return (oldvd);
41813d7072f8Seschrock 		}
41823d7072f8Seschrock 	}
41833d7072f8Seschrock 
4184ea8dc4b6Seschrock 	return (NULL);
4185fa9e4066Sahrens }
4186fa9e4066Sahrens 
4187ea8dc4b6Seschrock static void
41883d7072f8Seschrock spa_vdev_resilver_done(spa_t *spa)
4189fa9e4066Sahrens {
41908ad4d6ddSJeff Bonwick 	vdev_t *vd, *pvd, *ppvd;
41918ad4d6ddSJeff Bonwick 	uint64_t guid, sguid, pguid, ppguid;
4192ea8dc4b6Seschrock 
41938ad4d6ddSJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
4194ea8dc4b6Seschrock 
41953d7072f8Seschrock 	while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) {
41968ad4d6ddSJeff Bonwick 		pvd = vd->vdev_parent;
41978ad4d6ddSJeff Bonwick 		ppvd = pvd->vdev_parent;
4198ea8dc4b6Seschrock 		guid = vd->vdev_guid;
41998ad4d6ddSJeff Bonwick 		pguid = pvd->vdev_guid;
42008ad4d6ddSJeff Bonwick 		ppguid = ppvd->vdev_guid;
42018ad4d6ddSJeff Bonwick 		sguid = 0;
420299653d4eSeschrock 		/*
420399653d4eSeschrock 		 * If we have just finished replacing a hot spared device, then
420499653d4eSeschrock 		 * we need to detach the parent's first child (the original hot
420599653d4eSeschrock 		 * spare) as well.
420699653d4eSeschrock 		 */
42078ad4d6ddSJeff Bonwick 		if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0) {
420899653d4eSeschrock 			ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
42098ad4d6ddSJeff Bonwick 			ASSERT(ppvd->vdev_children == 2);
42108ad4d6ddSJeff Bonwick 			sguid = ppvd->vdev_child[1]->vdev_guid;
421199653d4eSeschrock 		}
42128ad4d6ddSJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
42138ad4d6ddSJeff Bonwick 		if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0)
4214ea8dc4b6Seschrock 			return;
42158ad4d6ddSJeff Bonwick 		if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0)
421699653d4eSeschrock 			return;
42178ad4d6ddSJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
4218fa9e4066Sahrens 	}
4219fa9e4066Sahrens 
42208ad4d6ddSJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
4221fa9e4066Sahrens }
4222fa9e4066Sahrens 
4223c67d9675Seschrock /*
4224b3388e4fSEric Taylor  * Update the stored path or FRU for this vdev.
4225c67d9675Seschrock  */
4226c67d9675Seschrock int
42276809eb4eSEric Schrock spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value,
42286809eb4eSEric Schrock     boolean_t ispath)
4229c67d9675Seschrock {
4230c5904d13Seschrock 	vdev_t *vd;
4231c67d9675Seschrock 
4232b3388e4fSEric Taylor 	spa_vdev_state_enter(spa, SCL_ALL);
4233c67d9675Seschrock 
42346809eb4eSEric Schrock 	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
4235b3388e4fSEric Taylor 		return (spa_vdev_state_exit(spa, NULL, ENOENT));
4236c67d9675Seschrock 
42370e34b6a7Sbonwick 	if (!vd->vdev_ops->vdev_op_leaf)
4238b3388e4fSEric Taylor 		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
42390e34b6a7Sbonwick 
42406809eb4eSEric Schrock 	if (ispath) {
42416809eb4eSEric Schrock 		spa_strfree(vd->vdev_path);
42426809eb4eSEric Schrock 		vd->vdev_path = spa_strdup(value);
42436809eb4eSEric Schrock 	} else {
42446809eb4eSEric Schrock 		if (vd->vdev_fru != NULL)
42456809eb4eSEric Schrock 			spa_strfree(vd->vdev_fru);
42466809eb4eSEric Schrock 		vd->vdev_fru = spa_strdup(value);
42476809eb4eSEric Schrock 	}
4248c67d9675Seschrock 
4249b3388e4fSEric Taylor 	return (spa_vdev_state_exit(spa, vd, 0));
4250c67d9675Seschrock }
4251c67d9675Seschrock 
42526809eb4eSEric Schrock int
42536809eb4eSEric Schrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath)
42546809eb4eSEric Schrock {
42556809eb4eSEric Schrock 	return (spa_vdev_set_common(spa, guid, newpath, B_TRUE));
42566809eb4eSEric Schrock }
42576809eb4eSEric Schrock 
42586809eb4eSEric Schrock int
42596809eb4eSEric Schrock spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
42606809eb4eSEric Schrock {
42616809eb4eSEric Schrock 	return (spa_vdev_set_common(spa, guid, newfru, B_FALSE));
42626809eb4eSEric Schrock }
42636809eb4eSEric Schrock 
4264fa9e4066Sahrens /*
4265fa9e4066Sahrens  * ==========================================================================
4266fa9e4066Sahrens  * SPA Scrubbing
4267fa9e4066Sahrens  * ==========================================================================
4268fa9e4066Sahrens  */
4269fa9e4066Sahrens 
4270ea8dc4b6Seschrock int
4271088f3894Sahrens spa_scrub(spa_t *spa, pool_scrub_type_t type)
4272fa9e4066Sahrens {
4273e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
4274bb8b5132Sek 
4275fa9e4066Sahrens 	if ((uint_t)type >= POOL_SCRUB_TYPES)
4276fa9e4066Sahrens 		return (ENOTSUP);
4277fa9e4066Sahrens 
4278fa9e4066Sahrens 	/*
4279088f3894Sahrens 	 * If a resilver was requested, but there is no DTL on a
4280088f3894Sahrens 	 * writeable leaf device, we have nothing to do.
4281fa9e4066Sahrens 	 */
4282088f3894Sahrens 	if (type == POOL_SCRUB_RESILVER &&
4283088f3894Sahrens 	    !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
4284088f3894Sahrens 		spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
4285ea8dc4b6Seschrock 		return (0);
4286ea8dc4b6Seschrock 	}
4287fa9e4066Sahrens 
4288088f3894Sahrens 	if (type == POOL_SCRUB_EVERYTHING &&
4289088f3894Sahrens 	    spa->spa_dsl_pool->dp_scrub_func != SCRUB_FUNC_NONE &&
4290088f3894Sahrens 	    spa->spa_dsl_pool->dp_scrub_isresilver)
4291088f3894Sahrens 		return (EBUSY);
4292fa9e4066Sahrens 
4293088f3894Sahrens 	if (type == POOL_SCRUB_EVERYTHING || type == POOL_SCRUB_RESILVER) {
4294088f3894Sahrens 		return (dsl_pool_scrub_clean(spa->spa_dsl_pool));
4295088f3894Sahrens 	} else if (type == POOL_SCRUB_NONE) {
4296088f3894Sahrens 		return (dsl_pool_scrub_cancel(spa->spa_dsl_pool));
4297ea8dc4b6Seschrock 	} else {
4298088f3894Sahrens 		return (EINVAL);
4299fa9e4066Sahrens 	}
4300fa9e4066Sahrens }
4301fa9e4066Sahrens 
4302ea8dc4b6Seschrock /*
4303ea8dc4b6Seschrock  * ==========================================================================
4304ea8dc4b6Seschrock  * SPA async task processing
4305ea8dc4b6Seschrock  * ==========================================================================
4306ea8dc4b6Seschrock  */
4307ea8dc4b6Seschrock 
4308ea8dc4b6Seschrock static void
43093d7072f8Seschrock spa_async_remove(spa_t *spa, vdev_t *vd)
4310fa9e4066Sahrens {
431149cf58c0SBrendan Gregg - Sun Microsystems 	if (vd->vdev_remove_wanted) {
431249cf58c0SBrendan Gregg - Sun Microsystems 		vd->vdev_remove_wanted = 0;
431349cf58c0SBrendan Gregg - Sun Microsystems 		vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE);
43141d713200SEric Schrock 
43151d713200SEric Schrock 		/*
43161d713200SEric Schrock 		 * We want to clear the stats, but we don't want to do a full
43171d713200SEric Schrock 		 * vdev_clear() as that will cause us to throw away
43181d713200SEric Schrock 		 * degraded/faulted state as well as attempt to reopen the
43191d713200SEric Schrock 		 * device, all of which is a waste.
43201d713200SEric Schrock 		 */
43211d713200SEric Schrock 		vd->vdev_stat.vs_read_errors = 0;
43221d713200SEric Schrock 		vd->vdev_stat.vs_write_errors = 0;
43231d713200SEric Schrock 		vd->vdev_stat.vs_checksum_errors = 0;
43241d713200SEric Schrock 
4325e14bb325SJeff Bonwick 		vdev_state_dirty(vd->vdev_top);
4326ea8dc4b6Seschrock 	}
432749cf58c0SBrendan Gregg - Sun Microsystems 
4328e14bb325SJeff Bonwick 	for (int c = 0; c < vd->vdev_children; c++)
432949cf58c0SBrendan Gregg - Sun Microsystems 		spa_async_remove(spa, vd->vdev_child[c]);
4330ea8dc4b6Seschrock }
4331fa9e4066Sahrens 
4332e14bb325SJeff Bonwick static void
4333e14bb325SJeff Bonwick spa_async_probe(spa_t *spa, vdev_t *vd)
4334e14bb325SJeff Bonwick {
4335e14bb325SJeff Bonwick 	if (vd->vdev_probe_wanted) {
4336e14bb325SJeff Bonwick 		vd->vdev_probe_wanted = 0;
4337e14bb325SJeff Bonwick 		vdev_reopen(vd);	/* vdev_open() does the actual probe */
4338e14bb325SJeff Bonwick 	}
4339e14bb325SJeff Bonwick 
4340e14bb325SJeff Bonwick 	for (int c = 0; c < vd->vdev_children; c++)
4341e14bb325SJeff Bonwick 		spa_async_probe(spa, vd->vdev_child[c]);
4342e14bb325SJeff Bonwick }
4343e14bb325SJeff Bonwick 
4344573ca77eSGeorge Wilson static void
4345573ca77eSGeorge Wilson spa_async_autoexpand(spa_t *spa, vdev_t *vd)
4346573ca77eSGeorge Wilson {
4347573ca77eSGeorge Wilson 	sysevent_id_t eid;
4348573ca77eSGeorge Wilson 	nvlist_t *attr;
4349573ca77eSGeorge Wilson 	char *physpath;
4350573ca77eSGeorge Wilson 
4351573ca77eSGeorge Wilson 	if (!spa->spa_autoexpand)
4352573ca77eSGeorge Wilson 		return;
4353573ca77eSGeorge Wilson 
4354573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++) {
4355573ca77eSGeorge Wilson 		vdev_t *cvd = vd->vdev_child[c];
4356573ca77eSGeorge Wilson 		spa_async_autoexpand(spa, cvd);
4357573ca77eSGeorge Wilson 	}
4358573ca77eSGeorge Wilson 
4359573ca77eSGeorge Wilson 	if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
4360573ca77eSGeorge Wilson 		return;
4361573ca77eSGeorge Wilson 
4362573ca77eSGeorge Wilson 	physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
4363573ca77eSGeorge Wilson 	(void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath);
4364573ca77eSGeorge Wilson 
4365573ca77eSGeorge Wilson 	VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4366573ca77eSGeorge Wilson 	VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
4367573ca77eSGeorge Wilson 
4368573ca77eSGeorge Wilson 	(void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
4369573ca77eSGeorge Wilson 	    ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
4370573ca77eSGeorge Wilson 
4371573ca77eSGeorge Wilson 	nvlist_free(attr);
4372573ca77eSGeorge Wilson 	kmem_free(physpath, MAXPATHLEN);
4373573ca77eSGeorge Wilson }
4374573ca77eSGeorge Wilson 
4375ea8dc4b6Seschrock static void
4376ea8dc4b6Seschrock spa_async_thread(spa_t *spa)
4377ea8dc4b6Seschrock {
4378e14bb325SJeff Bonwick 	int tasks;
4379ea8dc4b6Seschrock 
4380ea8dc4b6Seschrock 	ASSERT(spa->spa_sync_on);
4381ea8dc4b6Seschrock 
4382ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4383ea8dc4b6Seschrock 	tasks = spa->spa_async_tasks;
4384ea8dc4b6Seschrock 	spa->spa_async_tasks = 0;
4385ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4386ea8dc4b6Seschrock 
43870373e76bSbonwick 	/*
43880373e76bSbonwick 	 * See if the config needs to be updated.
43890373e76bSbonwick 	 */
43900373e76bSbonwick 	if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
4391b24ab676SJeff Bonwick 		uint64_t old_space, new_space;
4392573ca77eSGeorge Wilson 
43930373e76bSbonwick 		mutex_enter(&spa_namespace_lock);
4394b24ab676SJeff Bonwick 		old_space = metaslab_class_get_space(spa_normal_class(spa));
43950373e76bSbonwick 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
4396b24ab676SJeff Bonwick 		new_space = metaslab_class_get_space(spa_normal_class(spa));
43970373e76bSbonwick 		mutex_exit(&spa_namespace_lock);
4398573ca77eSGeorge Wilson 
4399573ca77eSGeorge Wilson 		/*
4400573ca77eSGeorge Wilson 		 * If the pool grew as a result of the config update,
4401573ca77eSGeorge Wilson 		 * then log an internal history event.
4402573ca77eSGeorge Wilson 		 */
4403b24ab676SJeff Bonwick 		if (new_space != old_space) {
4404c8e1f6d2SMark J Musante 			spa_history_internal_log(LOG_POOL_VDEV_ONLINE,
4405c8e1f6d2SMark J Musante 			    spa, NULL, CRED(),
4406c8e1f6d2SMark J Musante 			    "pool '%s' size: %llu(+%llu)",
4407b24ab676SJeff Bonwick 			    spa_name(spa), new_space, new_space - old_space);
4408573ca77eSGeorge Wilson 		}
44090373e76bSbonwick 	}
44100373e76bSbonwick 
4411ea8dc4b6Seschrock 	/*
44123d7072f8Seschrock 	 * See if any devices need to be marked REMOVED.
4413ea8dc4b6Seschrock 	 */
4414e14bb325SJeff Bonwick 	if (tasks & SPA_ASYNC_REMOVE) {
44158f18d1faSGeorge Wilson 		spa_vdev_state_enter(spa, SCL_NONE);
44163d7072f8Seschrock 		spa_async_remove(spa, spa->spa_root_vdev);
4417e14bb325SJeff Bonwick 		for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
441849cf58c0SBrendan Gregg - Sun Microsystems 			spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
4419e14bb325SJeff Bonwick 		for (int i = 0; i < spa->spa_spares.sav_count; i++)
442049cf58c0SBrendan Gregg - Sun Microsystems 			spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
4421e14bb325SJeff Bonwick 		(void) spa_vdev_state_exit(spa, NULL, 0);
4422e14bb325SJeff Bonwick 	}
4423e14bb325SJeff Bonwick 
4424573ca77eSGeorge Wilson 	if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
4425573ca77eSGeorge Wilson 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
4426573ca77eSGeorge Wilson 		spa_async_autoexpand(spa, spa->spa_root_vdev);
4427573ca77eSGeorge Wilson 		spa_config_exit(spa, SCL_CONFIG, FTAG);
4428573ca77eSGeorge Wilson 	}
4429573ca77eSGeorge Wilson 
4430e14bb325SJeff Bonwick 	/*
4431e14bb325SJeff Bonwick 	 * See if any devices need to be probed.
4432e14bb325SJeff Bonwick 	 */
4433e14bb325SJeff Bonwick 	if (tasks & SPA_ASYNC_PROBE) {
44348f18d1faSGeorge Wilson 		spa_vdev_state_enter(spa, SCL_NONE);
4435e14bb325SJeff Bonwick 		spa_async_probe(spa, spa->spa_root_vdev);
4436e14bb325SJeff Bonwick 		(void) spa_vdev_state_exit(spa, NULL, 0);
44373d7072f8Seschrock 	}
4438ea8dc4b6Seschrock 
4439ea8dc4b6Seschrock 	/*
4440ea8dc4b6Seschrock 	 * If any devices are done replacing, detach them.
4441ea8dc4b6Seschrock 	 */
44423d7072f8Seschrock 	if (tasks & SPA_ASYNC_RESILVER_DONE)
44433d7072f8Seschrock 		spa_vdev_resilver_done(spa);
4444fa9e4066Sahrens 
4445ea8dc4b6Seschrock 	/*
4446ea8dc4b6Seschrock 	 * Kick off a resilver.
4447ea8dc4b6Seschrock 	 */
4448088f3894Sahrens 	if (tasks & SPA_ASYNC_RESILVER)
4449088f3894Sahrens 		VERIFY(spa_scrub(spa, POOL_SCRUB_RESILVER) == 0);
4450ea8dc4b6Seschrock 
4451ea8dc4b6Seschrock 	/*
4452ea8dc4b6Seschrock 	 * Let the world know that we're done.
4453ea8dc4b6Seschrock 	 */
4454ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4455ea8dc4b6Seschrock 	spa->spa_async_thread = NULL;
4456ea8dc4b6Seschrock 	cv_broadcast(&spa->spa_async_cv);
4457ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4458ea8dc4b6Seschrock 	thread_exit();
4459ea8dc4b6Seschrock }
4460ea8dc4b6Seschrock 
4461ea8dc4b6Seschrock void
4462ea8dc4b6Seschrock spa_async_suspend(spa_t *spa)
4463ea8dc4b6Seschrock {
4464ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4465ea8dc4b6Seschrock 	spa->spa_async_suspended++;
4466ea8dc4b6Seschrock 	while (spa->spa_async_thread != NULL)
4467ea8dc4b6Seschrock 		cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
4468ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4469ea8dc4b6Seschrock }
4470ea8dc4b6Seschrock 
4471ea8dc4b6Seschrock void
4472ea8dc4b6Seschrock spa_async_resume(spa_t *spa)
4473ea8dc4b6Seschrock {
4474ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4475ea8dc4b6Seschrock 	ASSERT(spa->spa_async_suspended != 0);
4476ea8dc4b6Seschrock 	spa->spa_async_suspended--;
4477ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4478ea8dc4b6Seschrock }
4479ea8dc4b6Seschrock 
4480ea8dc4b6Seschrock static void
4481ea8dc4b6Seschrock spa_async_dispatch(spa_t *spa)
4482ea8dc4b6Seschrock {
4483ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4484ea8dc4b6Seschrock 	if (spa->spa_async_tasks && !spa->spa_async_suspended &&
44850373e76bSbonwick 	    spa->spa_async_thread == NULL &&
44860373e76bSbonwick 	    rootdir != NULL && !vn_is_readonly(rootdir))
4487ea8dc4b6Seschrock 		spa->spa_async_thread = thread_create(NULL, 0,
4488ea8dc4b6Seschrock 		    spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
4489ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4490ea8dc4b6Seschrock }
4491ea8dc4b6Seschrock 
4492ea8dc4b6Seschrock void
4493ea8dc4b6Seschrock spa_async_request(spa_t *spa, int task)
4494ea8dc4b6Seschrock {
4495ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
4496ea8dc4b6Seschrock 	spa->spa_async_tasks |= task;
4497ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
4498fa9e4066Sahrens }
4499fa9e4066Sahrens 
4500fa9e4066Sahrens /*
4501fa9e4066Sahrens  * ==========================================================================
4502fa9e4066Sahrens  * SPA syncing routines
4503fa9e4066Sahrens  * ==========================================================================
4504fa9e4066Sahrens  */
4505fa9e4066Sahrens static void
4506b24ab676SJeff Bonwick spa_sync_deferred_bplist(spa_t *spa, bplist_t *bpl, dmu_tx_t *tx, uint64_t txg)
4507fa9e4066Sahrens {
4508fa9e4066Sahrens 	blkptr_t blk;
4509fa9e4066Sahrens 	uint64_t itor = 0;
4510fa9e4066Sahrens 	uint8_t c = 1;
4511fa9e4066Sahrens 
4512e14bb325SJeff Bonwick 	while (bplist_iterate(bpl, &itor, &blk) == 0) {
4513e14bb325SJeff Bonwick 		ASSERT(blk.blk_birth < txg);
4514b24ab676SJeff Bonwick 		zio_free(spa, txg, &blk);
4515e14bb325SJeff Bonwick 	}
4516fa9e4066Sahrens 
4517fa9e4066Sahrens 	bplist_vacate(bpl, tx);
4518fa9e4066Sahrens 
4519fa9e4066Sahrens 	/*
4520fa9e4066Sahrens 	 * Pre-dirty the first block so we sync to convergence faster.
4521fa9e4066Sahrens 	 * (Usually only the first block is needed.)
4522fa9e4066Sahrens 	 */
4523b24ab676SJeff Bonwick 	dmu_write(bpl->bpl_mos, spa->spa_deferred_bplist_obj, 0, 1, &c, tx);
4524b24ab676SJeff Bonwick }
4525b24ab676SJeff Bonwick 
4526b24ab676SJeff Bonwick static void
4527b24ab676SJeff Bonwick spa_sync_free(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
4528b24ab676SJeff Bonwick {
4529b24ab676SJeff Bonwick 	zio_t *zio = arg;
4530b24ab676SJeff Bonwick 
4531b24ab676SJeff Bonwick 	zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
4532b24ab676SJeff Bonwick 	    zio->io_flags));
4533fa9e4066Sahrens }
4534fa9e4066Sahrens 
4535fa9e4066Sahrens static void
453699653d4eSeschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
4537fa9e4066Sahrens {
4538fa9e4066Sahrens 	char *packed = NULL;
4539f7991ba4STim Haley 	size_t bufsize;
4540fa9e4066Sahrens 	size_t nvsize = 0;
4541fa9e4066Sahrens 	dmu_buf_t *db;
4542fa9e4066Sahrens 
454399653d4eSeschrock 	VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
4544fa9e4066Sahrens 
4545f7991ba4STim Haley 	/*
4546f7991ba4STim Haley 	 * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
4547f7991ba4STim Haley 	 * information.  This avoids the dbuf_will_dirty() path and
4548f7991ba4STim Haley 	 * saves us a pre-read to get data we don't actually care about.
4549f7991ba4STim Haley 	 */
4550f7991ba4STim Haley 	bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
4551f7991ba4STim Haley 	packed = kmem_alloc(bufsize, KM_SLEEP);
4552fa9e4066Sahrens 
455399653d4eSeschrock 	VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
4554ea8dc4b6Seschrock 	    KM_SLEEP) == 0);
4555f7991ba4STim Haley 	bzero(packed + nvsize, bufsize - nvsize);
4556fa9e4066Sahrens 
4557f7991ba4STim Haley 	dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
4558fa9e4066Sahrens 
4559f7991ba4STim Haley 	kmem_free(packed, bufsize);
4560fa9e4066Sahrens 
456199653d4eSeschrock 	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
4562fa9e4066Sahrens 	dmu_buf_will_dirty(db, tx);
4563fa9e4066Sahrens 	*(uint64_t *)db->db_data = nvsize;
4564ea8dc4b6Seschrock 	dmu_buf_rele(db, FTAG);
4565fa9e4066Sahrens }
4566fa9e4066Sahrens 
456799653d4eSeschrock static void
4568fa94a07fSbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx,
4569fa94a07fSbrendan     const char *config, const char *entry)
457099653d4eSeschrock {
457199653d4eSeschrock 	nvlist_t *nvroot;
4572fa94a07fSbrendan 	nvlist_t **list;
457399653d4eSeschrock 	int i;
457499653d4eSeschrock 
4575fa94a07fSbrendan 	if (!sav->sav_sync)
457699653d4eSeschrock 		return;
457799653d4eSeschrock 
457899653d4eSeschrock 	/*
4579fa94a07fSbrendan 	 * Update the MOS nvlist describing the list of available devices.
4580fa94a07fSbrendan 	 * spa_validate_aux() will have already made sure this nvlist is
45813d7072f8Seschrock 	 * valid and the vdevs are labeled appropriately.
458299653d4eSeschrock 	 */
4583fa94a07fSbrendan 	if (sav->sav_object == 0) {
4584fa94a07fSbrendan 		sav->sav_object = dmu_object_alloc(spa->spa_meta_objset,
4585fa94a07fSbrendan 		    DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE,
4586fa94a07fSbrendan 		    sizeof (uint64_t), tx);
458799653d4eSeschrock 		VERIFY(zap_update(spa->spa_meta_objset,
4588fa94a07fSbrendan 		    DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1,
4589fa94a07fSbrendan 		    &sav->sav_object, tx) == 0);
459099653d4eSeschrock 	}
459199653d4eSeschrock 
459299653d4eSeschrock 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
4593fa94a07fSbrendan 	if (sav->sav_count == 0) {
4594fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0);
459599653d4eSeschrock 	} else {
4596fa94a07fSbrendan 		list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
4597fa94a07fSbrendan 		for (i = 0; i < sav->sav_count; i++)
4598fa94a07fSbrendan 			list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
4599fa94a07fSbrendan 			    B_FALSE, B_FALSE, B_TRUE);
4600fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(nvroot, config, list,
4601fa94a07fSbrendan 		    sav->sav_count) == 0);
4602fa94a07fSbrendan 		for (i = 0; i < sav->sav_count; i++)
4603fa94a07fSbrendan 			nvlist_free(list[i]);
4604fa94a07fSbrendan 		kmem_free(list, sav->sav_count * sizeof (void *));
460599653d4eSeschrock 	}
460699653d4eSeschrock 
4607fa94a07fSbrendan 	spa_sync_nvlist(spa, sav->sav_object, nvroot, tx);
460806eeb2adSek 	nvlist_free(nvroot);
460999653d4eSeschrock 
4610fa94a07fSbrendan 	sav->sav_sync = B_FALSE;
461199653d4eSeschrock }
461299653d4eSeschrock 
461399653d4eSeschrock static void
461499653d4eSeschrock spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
461599653d4eSeschrock {
461699653d4eSeschrock 	nvlist_t *config;
461799653d4eSeschrock 
4618e14bb325SJeff Bonwick 	if (list_is_empty(&spa->spa_config_dirty_list))
461999653d4eSeschrock 		return;
462099653d4eSeschrock 
4621e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
4622e14bb325SJeff Bonwick 
4623e14bb325SJeff Bonwick 	config = spa_config_generate(spa, spa->spa_root_vdev,
4624e14bb325SJeff Bonwick 	    dmu_tx_get_txg(tx), B_FALSE);
4625e14bb325SJeff Bonwick 
4626e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_STATE, FTAG);
462799653d4eSeschrock 
462899653d4eSeschrock 	if (spa->spa_config_syncing)
462999653d4eSeschrock 		nvlist_free(spa->spa_config_syncing);
463099653d4eSeschrock 	spa->spa_config_syncing = config;
463199653d4eSeschrock 
463299653d4eSeschrock 	spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
463399653d4eSeschrock }
463499653d4eSeschrock 
4635990b4856Slling /*
4636990b4856Slling  * Set zpool properties.
4637990b4856Slling  */
4638b1b8ab34Slling static void
4639ecd6cf80Smarks spa_sync_props(void *arg1, void *arg2, cred_t *cr, dmu_tx_t *tx)
4640b1b8ab34Slling {
4641b1b8ab34Slling 	spa_t *spa = arg1;
4642b1b8ab34Slling 	objset_t *mos = spa->spa_meta_objset;
4643990b4856Slling 	nvlist_t *nvp = arg2;
4644990b4856Slling 	nvpair_t *elem;
46453d7072f8Seschrock 	uint64_t intval;
4646c5904d13Seschrock 	char *strval;
4647990b4856Slling 	zpool_prop_t prop;
4648990b4856Slling 	const char *propname;
4649990b4856Slling 	zprop_type_t proptype;
4650b1b8ab34Slling 
4651e14bb325SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);
4652e14bb325SJeff Bonwick 
4653990b4856Slling 	elem = NULL;
4654990b4856Slling 	while ((elem = nvlist_next_nvpair(nvp, elem))) {
4655990b4856Slling 		switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
4656990b4856Slling 		case ZPOOL_PROP_VERSION:
4657990b4856Slling 			/*
4658990b4856Slling 			 * Only set version for non-zpool-creation cases
4659990b4856Slling 			 * (set/import). spa_create() needs special care
4660990b4856Slling 			 * for version setting.
4661990b4856Slling 			 */
4662990b4856Slling 			if (tx->tx_txg != TXG_INITIAL) {
4663990b4856Slling 				VERIFY(nvpair_value_uint64(elem,
4664990b4856Slling 				    &intval) == 0);
4665990b4856Slling 				ASSERT(intval <= SPA_VERSION);
4666990b4856Slling 				ASSERT(intval >= spa_version(spa));
4667990b4856Slling 				spa->spa_uberblock.ub_version = intval;
4668990b4856Slling 				vdev_config_dirty(spa->spa_root_vdev);
4669990b4856Slling 			}
4670ecd6cf80Smarks 			break;
4671990b4856Slling 
4672990b4856Slling 		case ZPOOL_PROP_ALTROOT:
4673990b4856Slling 			/*
4674990b4856Slling 			 * 'altroot' is a non-persistent property. It should
4675990b4856Slling 			 * have been set temporarily at creation or import time.
4676990b4856Slling 			 */
4677990b4856Slling 			ASSERT(spa->spa_root != NULL);
4678b1b8ab34Slling 			break;
46793d7072f8Seschrock 
46802f8aaab3Seschrock 		case ZPOOL_PROP_CACHEFILE:
4681990b4856Slling 			/*
4682379c004dSEric Schrock 			 * 'cachefile' is also a non-persisitent property.
4683990b4856Slling 			 */
46843d7072f8Seschrock 			break;
4685990b4856Slling 		default:
4686990b4856Slling 			/*
4687990b4856Slling 			 * Set pool property values in the poolprops mos object.
4688990b4856Slling 			 */
4689990b4856Slling 			if (spa->spa_pool_props_object == 0) {
4690990b4856Slling 				VERIFY((spa->spa_pool_props_object =
4691990b4856Slling 				    zap_create(mos, DMU_OT_POOL_PROPS,
4692990b4856Slling 				    DMU_OT_NONE, 0, tx)) > 0);
4693990b4856Slling 
4694990b4856Slling 				VERIFY(zap_update(mos,
4695990b4856Slling 				    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
4696990b4856Slling 				    8, 1, &spa->spa_pool_props_object, tx)
4697990b4856Slling 				    == 0);
4698990b4856Slling 			}
4699990b4856Slling 
4700990b4856Slling 			/* normalize the property name */
4701990b4856Slling 			propname = zpool_prop_to_name(prop);
4702990b4856Slling 			proptype = zpool_prop_get_type(prop);
4703990b4856Slling 
4704990b4856Slling 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
4705990b4856Slling 				ASSERT(proptype == PROP_TYPE_STRING);
4706990b4856Slling 				VERIFY(nvpair_value_string(elem, &strval) == 0);
4707990b4856Slling 				VERIFY(zap_update(mos,
4708990b4856Slling 				    spa->spa_pool_props_object, propname,
4709990b4856Slling 				    1, strlen(strval) + 1, strval, tx) == 0);
4710990b4856Slling 
4711990b4856Slling 			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
4712990b4856Slling 				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
4713990b4856Slling 
4714990b4856Slling 				if (proptype == PROP_TYPE_INDEX) {
4715990b4856Slling 					const char *unused;
4716990b4856Slling 					VERIFY(zpool_prop_index_to_string(
4717990b4856Slling 					    prop, intval, &unused) == 0);
4718990b4856Slling 				}
4719990b4856Slling 				VERIFY(zap_update(mos,
4720990b4856Slling 				    spa->spa_pool_props_object, propname,
4721990b4856Slling 				    8, 1, &intval, tx) == 0);
4722990b4856Slling 			} else {
4723990b4856Slling 				ASSERT(0); /* not allowed */
4724990b4856Slling 			}
4725990b4856Slling 
47260a4e9518Sgw 			switch (prop) {
47270a4e9518Sgw 			case ZPOOL_PROP_DELEGATION:
4728990b4856Slling 				spa->spa_delegation = intval;
47290a4e9518Sgw 				break;
47300a4e9518Sgw 			case ZPOOL_PROP_BOOTFS:
4731990b4856Slling 				spa->spa_bootfs = intval;
47320a4e9518Sgw 				break;
47330a4e9518Sgw 			case ZPOOL_PROP_FAILUREMODE:
47340a4e9518Sgw 				spa->spa_failmode = intval;
47350a4e9518Sgw 				break;
4736573ca77eSGeorge Wilson 			case ZPOOL_PROP_AUTOEXPAND:
4737573ca77eSGeorge Wilson 				spa->spa_autoexpand = intval;
4738573ca77eSGeorge Wilson 				spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
4739573ca77eSGeorge Wilson 				break;
4740b24ab676SJeff Bonwick 			case ZPOOL_PROP_DEDUPDITTO:
4741b24ab676SJeff Bonwick 				spa->spa_dedup_ditto = intval;
4742b24ab676SJeff Bonwick 				break;
47430a4e9518Sgw 			default:
47440a4e9518Sgw 				break;
47450a4e9518Sgw 			}
4746990b4856Slling 		}
4747990b4856Slling 
4748990b4856Slling 		/* log internal history if this is not a zpool create */
4749990b4856Slling 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY &&
4750990b4856Slling 		    tx->tx_txg != TXG_INITIAL) {
4751990b4856Slling 			spa_history_internal_log(LOG_POOL_PROPSET,
4752990b4856Slling 			    spa, tx, cr, "%s %lld %s",
4753e14bb325SJeff Bonwick 			    nvpair_name(elem), intval, spa_name(spa));
4754b1b8ab34Slling 		}
4755b1b8ab34Slling 	}
4756e14bb325SJeff Bonwick 
4757e14bb325SJeff Bonwick 	mutex_exit(&spa->spa_props_lock);
4758b1b8ab34Slling }
4759b1b8ab34Slling 
4760fa9e4066Sahrens /*
4761fa9e4066Sahrens  * Sync the specified transaction group.  New blocks may be dirtied as
4762fa9e4066Sahrens  * part of the process, so we iterate until it converges.
4763fa9e4066Sahrens  */
4764fa9e4066Sahrens void
4765fa9e4066Sahrens spa_sync(spa_t *spa, uint64_t txg)
4766fa9e4066Sahrens {
4767fa9e4066Sahrens 	dsl_pool_t *dp = spa->spa_dsl_pool;
4768fa9e4066Sahrens 	objset_t *mos = spa->spa_meta_objset;
4769b24ab676SJeff Bonwick 	bplist_t *defer_bpl = &spa->spa_deferred_bplist;
4770b24ab676SJeff Bonwick 	bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
47710373e76bSbonwick 	vdev_t *rvd = spa->spa_root_vdev;
4772fa9e4066Sahrens 	vdev_t *vd;
4773fa9e4066Sahrens 	dmu_tx_t *tx;
4774e14bb325SJeff Bonwick 	int error;
4775fa9e4066Sahrens 
4776fa9e4066Sahrens 	/*
4777fa9e4066Sahrens 	 * Lock out configuration changes.
4778fa9e4066Sahrens 	 */
4779e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
4780fa9e4066Sahrens 
4781fa9e4066Sahrens 	spa->spa_syncing_txg = txg;
4782fa9e4066Sahrens 	spa->spa_sync_pass = 0;
4783fa9e4066Sahrens 
4784e14bb325SJeff Bonwick 	/*
4785e14bb325SJeff Bonwick 	 * If there are any pending vdev state changes, convert them
4786e14bb325SJeff Bonwick 	 * into config changes that go out with this transaction group.
4787e14bb325SJeff Bonwick 	 */
4788e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
47898ad4d6ddSJeff Bonwick 	while (list_head(&spa->spa_state_dirty_list) != NULL) {
47908ad4d6ddSJeff Bonwick 		/*
47918ad4d6ddSJeff Bonwick 		 * We need the write lock here because, for aux vdevs,
47928ad4d6ddSJeff Bonwick 		 * calling vdev_config_dirty() modifies sav_config.
47938ad4d6ddSJeff Bonwick 		 * This is ugly and will become unnecessary when we
47948ad4d6ddSJeff Bonwick 		 * eliminate the aux vdev wart by integrating all vdevs
47958ad4d6ddSJeff Bonwick 		 * into the root vdev tree.
47968ad4d6ddSJeff Bonwick 		 */
47978ad4d6ddSJeff Bonwick 		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
47988ad4d6ddSJeff Bonwick 		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER);
47998ad4d6ddSJeff Bonwick 		while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
48008ad4d6ddSJeff Bonwick 			vdev_state_clean(vd);
48018ad4d6ddSJeff Bonwick 			vdev_config_dirty(vd);
48028ad4d6ddSJeff Bonwick 		}
48038ad4d6ddSJeff Bonwick 		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
48048ad4d6ddSJeff Bonwick 		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
4805e14bb325SJeff Bonwick 	}
4806e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_STATE, FTAG);
4807e14bb325SJeff Bonwick 
4808b24ab676SJeff Bonwick 	VERIFY(0 == bplist_open(defer_bpl, mos, spa->spa_deferred_bplist_obj));
4809fa9e4066Sahrens 
481099653d4eSeschrock 	tx = dmu_tx_create_assigned(dp, txg);
481199653d4eSeschrock 
481299653d4eSeschrock 	/*
4813e7437265Sahrens 	 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
481499653d4eSeschrock 	 * set spa_deflate if we have no raid-z vdevs.
481599653d4eSeschrock 	 */
4816e7437265Sahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE &&
4817e7437265Sahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) {
481899653d4eSeschrock 		int i;
481999653d4eSeschrock 
482099653d4eSeschrock 		for (i = 0; i < rvd->vdev_children; i++) {
482199653d4eSeschrock 			vd = rvd->vdev_child[i];
482299653d4eSeschrock 			if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
482399653d4eSeschrock 				break;
482499653d4eSeschrock 		}
482599653d4eSeschrock 		if (i == rvd->vdev_children) {
482699653d4eSeschrock 			spa->spa_deflate = TRUE;
482799653d4eSeschrock 			VERIFY(0 == zap_add(spa->spa_meta_objset,
482899653d4eSeschrock 			    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
482999653d4eSeschrock 			    sizeof (uint64_t), 1, &spa->spa_deflate, tx));
483099653d4eSeschrock 		}
483199653d4eSeschrock 	}
483299653d4eSeschrock 
4833088f3894Sahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
4834088f3894Sahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
4835088f3894Sahrens 		dsl_pool_create_origin(dp, tx);
4836088f3894Sahrens 
4837088f3894Sahrens 		/* Keeping the origin open increases spa_minref */
4838088f3894Sahrens 		spa->spa_minref += 3;
4839088f3894Sahrens 	}
4840088f3894Sahrens 
4841088f3894Sahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES &&
4842088f3894Sahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) {
4843088f3894Sahrens 		dsl_pool_upgrade_clones(dp, tx);
4844088f3894Sahrens 	}
4845088f3894Sahrens 
4846fa9e4066Sahrens 	/*
4847fa9e4066Sahrens 	 * If anything has changed in this txg, push the deferred frees
4848fa9e4066Sahrens 	 * from the previous txg.  If not, leave them alone so that we
4849fa9e4066Sahrens 	 * don't generate work on an otherwise idle system.
4850fa9e4066Sahrens 	 */
4851fa9e4066Sahrens 	if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
48521615a317Sek 	    !txg_list_empty(&dp->dp_dirty_dirs, txg) ||
48531615a317Sek 	    !txg_list_empty(&dp->dp_sync_tasks, txg))
4854b24ab676SJeff Bonwick 		spa_sync_deferred_bplist(spa, defer_bpl, tx, txg);
4855fa9e4066Sahrens 
4856fa9e4066Sahrens 	/*
4857fa9e4066Sahrens 	 * Iterate to convergence.
4858fa9e4066Sahrens 	 */
4859fa9e4066Sahrens 	do {
4860b24ab676SJeff Bonwick 		int pass = ++spa->spa_sync_pass;
4861fa9e4066Sahrens 
4862fa9e4066Sahrens 		spa_sync_config_object(spa, tx);
4863fa94a07fSbrendan 		spa_sync_aux_dev(spa, &spa->spa_spares, tx,
4864fa94a07fSbrendan 		    ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES);
4865fa94a07fSbrendan 		spa_sync_aux_dev(spa, &spa->spa_l2cache, tx,
4866fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE);
4867ea8dc4b6Seschrock 		spa_errlog_sync(spa, txg);
4868fa9e4066Sahrens 		dsl_pool_sync(dp, txg);
4869fa9e4066Sahrens 
4870b24ab676SJeff Bonwick 		if (pass <= SYNC_PASS_DEFERRED_FREE) {
4871b24ab676SJeff Bonwick 			zio_t *zio = zio_root(spa, NULL, NULL, 0);
4872b24ab676SJeff Bonwick 			bplist_sync(free_bpl, spa_sync_free, zio, tx);
4873b24ab676SJeff Bonwick 			VERIFY(zio_wait(zio) == 0);
4874b24ab676SJeff Bonwick 		} else {
4875b24ab676SJeff Bonwick 			bplist_sync(free_bpl, bplist_enqueue_cb, defer_bpl, tx);
4876fa9e4066Sahrens 		}
4877fa9e4066Sahrens 
4878b24ab676SJeff Bonwick 		ddt_sync(spa, txg);
4879b24ab676SJeff Bonwick 
4880b24ab676SJeff Bonwick 		while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))
4881b24ab676SJeff Bonwick 			vdev_sync(vd, txg);
4882b24ab676SJeff Bonwick 
4883b24ab676SJeff Bonwick 	} while (dmu_objset_is_dirty(mos, txg));
4884fa9e4066Sahrens 
4885b24ab676SJeff Bonwick 	ASSERT(free_bpl->bpl_queue == NULL);
4886fa9e4066Sahrens 
4887b24ab676SJeff Bonwick 	bplist_close(defer_bpl);
4888fa9e4066Sahrens 
4889fa9e4066Sahrens 	/*
4890fa9e4066Sahrens 	 * Rewrite the vdev configuration (which includes the uberblock)
4891fa9e4066Sahrens 	 * to commit the transaction group.
48920373e76bSbonwick 	 *
489317f17c2dSbonwick 	 * If there are no dirty vdevs, we sync the uberblock to a few
489417f17c2dSbonwick 	 * random top-level vdevs that are known to be visible in the
4895e14bb325SJeff Bonwick 	 * config cache (see spa_vdev_add() for a complete description).
4896e14bb325SJeff Bonwick 	 * If there *are* dirty vdevs, sync the uberblock to all vdevs.
48970373e76bSbonwick 	 */
4898e14bb325SJeff Bonwick 	for (;;) {
4899e14bb325SJeff Bonwick 		/*
4900e14bb325SJeff Bonwick 		 * We hold SCL_STATE to prevent vdev open/close/etc.
4901e14bb325SJeff Bonwick 		 * while we're attempting to write the vdev labels.
4902e14bb325SJeff Bonwick 		 */
4903e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
4904e14bb325SJeff Bonwick 
4905e14bb325SJeff Bonwick 		if (list_is_empty(&spa->spa_config_dirty_list)) {
4906e14bb325SJeff Bonwick 			vdev_t *svd[SPA_DVAS_PER_BP];
4907e14bb325SJeff Bonwick 			int svdcount = 0;
4908e14bb325SJeff Bonwick 			int children = rvd->vdev_children;
4909e14bb325SJeff Bonwick 			int c0 = spa_get_random(children);
4910e14bb325SJeff Bonwick 
4911573ca77eSGeorge Wilson 			for (int c = 0; c < children; c++) {
4912e14bb325SJeff Bonwick 				vd = rvd->vdev_child[(c0 + c) % children];
4913e14bb325SJeff Bonwick 				if (vd->vdev_ms_array == 0 || vd->vdev_islog)
4914e14bb325SJeff Bonwick 					continue;
4915e14bb325SJeff Bonwick 				svd[svdcount++] = vd;
4916e14bb325SJeff Bonwick 				if (svdcount == SPA_DVAS_PER_BP)
4917e14bb325SJeff Bonwick 					break;
4918e14bb325SJeff Bonwick 			}
49198956713aSEric Schrock 			error = vdev_config_sync(svd, svdcount, txg, B_FALSE);
49208956713aSEric Schrock 			if (error != 0)
49218956713aSEric Schrock 				error = vdev_config_sync(svd, svdcount, txg,
49228956713aSEric Schrock 				    B_TRUE);
4923e14bb325SJeff Bonwick 		} else {
4924e14bb325SJeff Bonwick 			error = vdev_config_sync(rvd->vdev_child,
49258956713aSEric Schrock 			    rvd->vdev_children, txg, B_FALSE);
49268956713aSEric Schrock 			if (error != 0)
49278956713aSEric Schrock 				error = vdev_config_sync(rvd->vdev_child,
49288956713aSEric Schrock 				    rvd->vdev_children, txg, B_TRUE);
49290373e76bSbonwick 		}
4930e14bb325SJeff Bonwick 
4931e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_STATE, FTAG);
4932e14bb325SJeff Bonwick 
4933e14bb325SJeff Bonwick 		if (error == 0)
4934e14bb325SJeff Bonwick 			break;
4935e14bb325SJeff Bonwick 		zio_suspend(spa, NULL);
4936e14bb325SJeff Bonwick 		zio_resume_wait(spa);
49370373e76bSbonwick 	}
493899653d4eSeschrock 	dmu_tx_commit(tx);
493999653d4eSeschrock 
49400373e76bSbonwick 	/*
49410373e76bSbonwick 	 * Clear the dirty config list.
4942fa9e4066Sahrens 	 */
4943e14bb325SJeff Bonwick 	while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL)
49440373e76bSbonwick 		vdev_config_clean(vd);
49450373e76bSbonwick 
49460373e76bSbonwick 	/*
49470373e76bSbonwick 	 * Now that the new config has synced transactionally,
49480373e76bSbonwick 	 * let it become visible to the config cache.
49490373e76bSbonwick 	 */
49500373e76bSbonwick 	if (spa->spa_config_syncing != NULL) {
49510373e76bSbonwick 		spa_config_set(spa, spa->spa_config_syncing);
49520373e76bSbonwick 		spa->spa_config_txg = txg;
49530373e76bSbonwick 		spa->spa_config_syncing = NULL;
49540373e76bSbonwick 	}
4955fa9e4066Sahrens 
4956fa9e4066Sahrens 	spa->spa_ubsync = spa->spa_uberblock;
4957fa9e4066Sahrens 
4958b24ab676SJeff Bonwick 	dsl_pool_sync_done(dp, txg);
4959fa9e4066Sahrens 
4960fa9e4066Sahrens 	/*
4961fa9e4066Sahrens 	 * Update usable space statistics.
4962fa9e4066Sahrens 	 */
4963fa9e4066Sahrens 	while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
4964fa9e4066Sahrens 		vdev_sync_done(vd, txg);
4965fa9e4066Sahrens 
4966485bbbf5SGeorge Wilson 	spa_update_dspace(spa);
4967485bbbf5SGeorge Wilson 
4968fa9e4066Sahrens 	/*
4969fa9e4066Sahrens 	 * It had better be the case that we didn't dirty anything
497099653d4eSeschrock 	 * since vdev_config_sync().
4971fa9e4066Sahrens 	 */
4972fa9e4066Sahrens 	ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
4973fa9e4066Sahrens 	ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
4974fa9e4066Sahrens 	ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg));
4975b24ab676SJeff Bonwick 	ASSERT(defer_bpl->bpl_queue == NULL);
4976b24ab676SJeff Bonwick 	ASSERT(free_bpl->bpl_queue == NULL);
4977b24ab676SJeff Bonwick 
4978b24ab676SJeff Bonwick 	spa->spa_sync_pass = 0;
4979fa9e4066Sahrens 
4980e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_CONFIG, FTAG);
4981ea8dc4b6Seschrock 
4982468c413aSTim Haley 	spa_handle_ignored_writes(spa);
4983468c413aSTim Haley 
4984ea8dc4b6Seschrock 	/*
4985ea8dc4b6Seschrock 	 * If any async tasks have been requested, kick them off.
4986ea8dc4b6Seschrock 	 */
4987ea8dc4b6Seschrock 	spa_async_dispatch(spa);
4988fa9e4066Sahrens }
4989fa9e4066Sahrens 
4990fa9e4066Sahrens /*
4991fa9e4066Sahrens  * Sync all pools.  We don't want to hold the namespace lock across these
4992fa9e4066Sahrens  * operations, so we take a reference on the spa_t and drop the lock during the
4993fa9e4066Sahrens  * sync.
4994fa9e4066Sahrens  */
4995fa9e4066Sahrens void
4996fa9e4066Sahrens spa_sync_allpools(void)
4997fa9e4066Sahrens {
4998fa9e4066Sahrens 	spa_t *spa = NULL;
4999fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
5000fa9e4066Sahrens 	while ((spa = spa_next(spa)) != NULL) {
5001e14bb325SJeff Bonwick 		if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa))
5002fa9e4066Sahrens 			continue;
5003fa9e4066Sahrens 		spa_open_ref(spa, FTAG);
5004fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
5005fa9e4066Sahrens 		txg_wait_synced(spa_get_dsl(spa), 0);
5006fa9e4066Sahrens 		mutex_enter(&spa_namespace_lock);
5007fa9e4066Sahrens 		spa_close(spa, FTAG);
5008fa9e4066Sahrens 	}
5009fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
5010fa9e4066Sahrens }
5011fa9e4066Sahrens 
5012fa9e4066Sahrens /*
5013fa9e4066Sahrens  * ==========================================================================
5014fa9e4066Sahrens  * Miscellaneous routines
5015fa9e4066Sahrens  * ==========================================================================
5016fa9e4066Sahrens  */
5017fa9e4066Sahrens 
5018fa9e4066Sahrens /*
5019fa9e4066Sahrens  * Remove all pools in the system.
5020fa9e4066Sahrens  */
5021fa9e4066Sahrens void
5022fa9e4066Sahrens spa_evict_all(void)
5023fa9e4066Sahrens {
5024fa9e4066Sahrens 	spa_t *spa;
5025fa9e4066Sahrens 
5026fa9e4066Sahrens 	/*
5027fa9e4066Sahrens 	 * Remove all cached state.  All pools should be closed now,
5028fa9e4066Sahrens 	 * so every spa in the AVL tree should be unreferenced.
5029fa9e4066Sahrens 	 */
5030fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
5031fa9e4066Sahrens 	while ((spa = spa_next(NULL)) != NULL) {
5032fa9e4066Sahrens 		/*
5033ea8dc4b6Seschrock 		 * Stop async tasks.  The async thread may need to detach
5034ea8dc4b6Seschrock 		 * a device that's been replaced, which requires grabbing
5035ea8dc4b6Seschrock 		 * spa_namespace_lock, so we must drop it here.
5036fa9e4066Sahrens 		 */
5037fa9e4066Sahrens 		spa_open_ref(spa, FTAG);
5038fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
5039ea8dc4b6Seschrock 		spa_async_suspend(spa);
5040fa9e4066Sahrens 		mutex_enter(&spa_namespace_lock);
5041fa9e4066Sahrens 		spa_close(spa, FTAG);
5042fa9e4066Sahrens 
5043fa9e4066Sahrens 		if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
5044fa9e4066Sahrens 			spa_unload(spa);
5045fa9e4066Sahrens 			spa_deactivate(spa);
5046fa9e4066Sahrens 		}
5047fa9e4066Sahrens 		spa_remove(spa);
5048fa9e4066Sahrens 	}
5049fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
5050fa9e4066Sahrens }
5051ea8dc4b6Seschrock 
5052ea8dc4b6Seschrock vdev_t *
50536809eb4eSEric Schrock spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux)
5054ea8dc4b6Seschrock {
5055c5904d13Seschrock 	vdev_t *vd;
5056c5904d13Seschrock 	int i;
5057c5904d13Seschrock 
5058c5904d13Seschrock 	if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL)
5059c5904d13Seschrock 		return (vd);
5060c5904d13Seschrock 
50616809eb4eSEric Schrock 	if (aux) {
5062c5904d13Seschrock 		for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
5063c5904d13Seschrock 			vd = spa->spa_l2cache.sav_vdevs[i];
50646809eb4eSEric Schrock 			if (vd->vdev_guid == guid)
50656809eb4eSEric Schrock 				return (vd);
50666809eb4eSEric Schrock 		}
50676809eb4eSEric Schrock 
50686809eb4eSEric Schrock 		for (i = 0; i < spa->spa_spares.sav_count; i++) {
50696809eb4eSEric Schrock 			vd = spa->spa_spares.sav_vdevs[i];
5070c5904d13Seschrock 			if (vd->vdev_guid == guid)
5071c5904d13Seschrock 				return (vd);
5072c5904d13Seschrock 		}
5073c5904d13Seschrock 	}
5074c5904d13Seschrock 
5075c5904d13Seschrock 	return (NULL);
5076ea8dc4b6Seschrock }
5077eaca9bbdSeschrock 
5078eaca9bbdSeschrock void
5079990b4856Slling spa_upgrade(spa_t *spa, uint64_t version)
5080eaca9bbdSeschrock {
5081e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
5082eaca9bbdSeschrock 
5083eaca9bbdSeschrock 	/*
5084eaca9bbdSeschrock 	 * This should only be called for a non-faulted pool, and since a
5085eaca9bbdSeschrock 	 * future version would result in an unopenable pool, this shouldn't be
5086eaca9bbdSeschrock 	 * possible.
5087eaca9bbdSeschrock 	 */
5088e7437265Sahrens 	ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION);
5089990b4856Slling 	ASSERT(version >= spa->spa_uberblock.ub_version);
5090eaca9bbdSeschrock 
5091990b4856Slling 	spa->spa_uberblock.ub_version = version;
5092eaca9bbdSeschrock 	vdev_config_dirty(spa->spa_root_vdev);
5093eaca9bbdSeschrock 
5094e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
509599653d4eSeschrock 
509699653d4eSeschrock 	txg_wait_synced(spa_get_dsl(spa), 0);
509799653d4eSeschrock }
509899653d4eSeschrock 
509999653d4eSeschrock boolean_t
510099653d4eSeschrock spa_has_spare(spa_t *spa, uint64_t guid)
510199653d4eSeschrock {
510299653d4eSeschrock 	int i;
510339c23413Seschrock 	uint64_t spareguid;
5104fa94a07fSbrendan 	spa_aux_vdev_t *sav = &spa->spa_spares;
510599653d4eSeschrock 
5106fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++)
5107fa94a07fSbrendan 		if (sav->sav_vdevs[i]->vdev_guid == guid)
510899653d4eSeschrock 			return (B_TRUE);
510999653d4eSeschrock 
5110fa94a07fSbrendan 	for (i = 0; i < sav->sav_npending; i++) {
5111fa94a07fSbrendan 		if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID,
5112fa94a07fSbrendan 		    &spareguid) == 0 && spareguid == guid)
511339c23413Seschrock 			return (B_TRUE);
511439c23413Seschrock 	}
511539c23413Seschrock 
511699653d4eSeschrock 	return (B_FALSE);
5117eaca9bbdSeschrock }
5118b1b8ab34Slling 
511989a89ebfSlling /*
512089a89ebfSlling  * Check if a pool has an active shared spare device.
512189a89ebfSlling  * Note: reference count of an active spare is 2, as a spare and as a replace
512289a89ebfSlling  */
512389a89ebfSlling static boolean_t
512489a89ebfSlling spa_has_active_shared_spare(spa_t *spa)
512589a89ebfSlling {
512689a89ebfSlling 	int i, refcnt;
512789a89ebfSlling 	uint64_t pool;
512889a89ebfSlling 	spa_aux_vdev_t *sav = &spa->spa_spares;
512989a89ebfSlling 
513089a89ebfSlling 	for (i = 0; i < sav->sav_count; i++) {
513189a89ebfSlling 		if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool,
513289a89ebfSlling 		    &refcnt) && pool != 0ULL && pool == spa_guid(spa) &&
513389a89ebfSlling 		    refcnt > 2)
513489a89ebfSlling 			return (B_TRUE);
513589a89ebfSlling 	}
513689a89ebfSlling 
513789a89ebfSlling 	return (B_FALSE);
513889a89ebfSlling }
513989a89ebfSlling 
51403d7072f8Seschrock /*
51413d7072f8Seschrock  * Post a sysevent corresponding to the given event.  The 'name' must be one of
51423d7072f8Seschrock  * the event definitions in sys/sysevent/eventdefs.h.  The payload will be
51433d7072f8Seschrock  * filled in from the spa and (optionally) the vdev.  This doesn't do anything
51443d7072f8Seschrock  * in the userland libzpool, as we don't want consumers to misinterpret ztest
51453d7072f8Seschrock  * or zdb as real changes.
51463d7072f8Seschrock  */
51473d7072f8Seschrock void
51483d7072f8Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
51493d7072f8Seschrock {
51503d7072f8Seschrock #ifdef _KERNEL
51513d7072f8Seschrock 	sysevent_t		*ev;
51523d7072f8Seschrock 	sysevent_attr_list_t	*attr = NULL;
51533d7072f8Seschrock 	sysevent_value_t	value;
51543d7072f8Seschrock 	sysevent_id_t		eid;
51553d7072f8Seschrock 
51563d7072f8Seschrock 	ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs",
51573d7072f8Seschrock 	    SE_SLEEP);
51583d7072f8Seschrock 
51593d7072f8Seschrock 	value.value_type = SE_DATA_TYPE_STRING;
51603d7072f8Seschrock 	value.value.sv_string = spa_name(spa);
51613d7072f8Seschrock 	if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0)
51623d7072f8Seschrock 		goto done;
51633d7072f8Seschrock 
51643d7072f8Seschrock 	value.value_type = SE_DATA_TYPE_UINT64;
51653d7072f8Seschrock 	value.value.sv_uint64 = spa_guid(spa);
51663d7072f8Seschrock 	if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0)
51673d7072f8Seschrock 		goto done;
51683d7072f8Seschrock 
51693d7072f8Seschrock 	if (vd) {
51703d7072f8Seschrock 		value.value_type = SE_DATA_TYPE_UINT64;
51713d7072f8Seschrock 		value.value.sv_uint64 = vd->vdev_guid;
51723d7072f8Seschrock 		if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value,
51733d7072f8Seschrock 		    SE_SLEEP) != 0)
51743d7072f8Seschrock 			goto done;
51753d7072f8Seschrock 
51763d7072f8Seschrock 		if (vd->vdev_path) {
51773d7072f8Seschrock 			value.value_type = SE_DATA_TYPE_STRING;
51783d7072f8Seschrock 			value.value.sv_string = vd->vdev_path;
51793d7072f8Seschrock 			if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH,
51803d7072f8Seschrock 			    &value, SE_SLEEP) != 0)
51813d7072f8Seschrock 				goto done;
51823d7072f8Seschrock 		}
51833d7072f8Seschrock 	}
51843d7072f8Seschrock 
5185b01c3b58Seschrock 	if (sysevent_attach_attributes(ev, attr) != 0)
5186b01c3b58Seschrock 		goto done;
5187b01c3b58Seschrock 	attr = NULL;
5188b01c3b58Seschrock 
51893d7072f8Seschrock 	(void) log_sysevent(ev, SE_SLEEP, &eid);
51903d7072f8Seschrock 
51913d7072f8Seschrock done:
51923d7072f8Seschrock 	if (attr)
51933d7072f8Seschrock 		sysevent_free_attr(attr);
51943d7072f8Seschrock 	sysevent_free(ev);
51953d7072f8Seschrock #endif
51963d7072f8Seschrock }
5197