xref: /illumos-gate/usr/src/uts/common/fs/zfs/spa.c (revision cb04b8739c50e3e6d12e89b790fa7b8d0d899865)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5ea8dc4b6Seschrock  * Common Development and Distribution License (the "License").
6ea8dc4b6Seschrock  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
2199653d4eSeschrock 
22fa9e4066Sahrens /*
2398d1cbfeSGeorge Wilson  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
26fa9e4066Sahrens /*
27fa9e4066Sahrens  * This file contains all the routines used when modifying on-disk SPA state.
28fa9e4066Sahrens  * This includes opening, importing, destroying, exporting a pool, and syncing a
29fa9e4066Sahrens  * pool.
30fa9e4066Sahrens  */
31fa9e4066Sahrens 
32fa9e4066Sahrens #include <sys/zfs_context.h>
33ea8dc4b6Seschrock #include <sys/fm/fs/zfs.h>
34fa9e4066Sahrens #include <sys/spa_impl.h>
35fa9e4066Sahrens #include <sys/zio.h>
36fa9e4066Sahrens #include <sys/zio_checksum.h>
37fa9e4066Sahrens #include <sys/dmu.h>
38fa9e4066Sahrens #include <sys/dmu_tx.h>
39fa9e4066Sahrens #include <sys/zap.h>
40fa9e4066Sahrens #include <sys/zil.h>
41b24ab676SJeff Bonwick #include <sys/ddt.h>
42fa9e4066Sahrens #include <sys/vdev_impl.h>
43fa9e4066Sahrens #include <sys/metaslab.h>
4488ecc943SGeorge Wilson #include <sys/metaslab_impl.h>
45fa9e4066Sahrens #include <sys/uberblock_impl.h>
46fa9e4066Sahrens #include <sys/txg.h>
47fa9e4066Sahrens #include <sys/avl.h>
48fa9e4066Sahrens #include <sys/dmu_traverse.h>
49b1b8ab34Slling #include <sys/dmu_objset.h>
50fa9e4066Sahrens #include <sys/unique.h>
51fa9e4066Sahrens #include <sys/dsl_pool.h>
52b1b8ab34Slling #include <sys/dsl_dataset.h>
53fa9e4066Sahrens #include <sys/dsl_dir.h>
54fa9e4066Sahrens #include <sys/dsl_prop.h>
55b1b8ab34Slling #include <sys/dsl_synctask.h>
56fa9e4066Sahrens #include <sys/fs/zfs.h>
57fa94a07fSbrendan #include <sys/arc.h>
58fa9e4066Sahrens #include <sys/callb.h>
5995173954Sek #include <sys/systeminfo.h>
60e7cbe64fSgw #include <sys/spa_boot.h>
61573ca77eSGeorge Wilson #include <sys/zfs_ioctl.h>
623f9d6ad7SLin Ling #include <sys/dsl_scan.h>
63fa9e4066Sahrens 
645679c89fSjv #ifdef	_KERNEL
65dedec472SJack Meng #include <sys/bootprops.h>
6635a5a358SJonathan Adams #include <sys/callb.h>
6735a5a358SJonathan Adams #include <sys/cpupart.h>
6835a5a358SJonathan Adams #include <sys/pool.h>
6935a5a358SJonathan Adams #include <sys/sysdc.h>
7035a5a358SJonathan Adams #include <sys/zone.h>
715679c89fSjv #endif	/* _KERNEL */
725679c89fSjv 
73990b4856Slling #include "zfs_prop.h"
74b7b97454Sperrin #include "zfs_comutil.h"
75990b4856Slling 
7635a5a358SJonathan Adams typedef enum zti_modes {
772e0c549eSJonathan Adams 	zti_mode_fixed,			/* value is # of threads (min 1) */
782e0c549eSJonathan Adams 	zti_mode_online_percent,	/* value is % of online CPUs */
7935a5a358SJonathan Adams 	zti_mode_batch,			/* cpu-intensive; value is ignored */
8080eb36f2SGeorge Wilson 	zti_mode_null,			/* don't create a taskq */
812e0c549eSJonathan Adams 	zti_nmodes
8235a5a358SJonathan Adams } zti_modes_t;
83416e0cd8Sek 
8480eb36f2SGeorge Wilson #define	ZTI_FIX(n)	{ zti_mode_fixed, (n) }
8580eb36f2SGeorge Wilson #define	ZTI_PCT(n)	{ zti_mode_online_percent, (n) }
8635a5a358SJonathan Adams #define	ZTI_BATCH	{ zti_mode_batch, 0 }
8780eb36f2SGeorge Wilson #define	ZTI_NULL	{ zti_mode_null, 0 }
882e0c549eSJonathan Adams 
8980eb36f2SGeorge Wilson #define	ZTI_ONE		ZTI_FIX(1)
902e0c549eSJonathan Adams 
912e0c549eSJonathan Adams typedef struct zio_taskq_info {
9280eb36f2SGeorge Wilson 	enum zti_modes zti_mode;
9380eb36f2SGeorge Wilson 	uint_t zti_value;
942e0c549eSJonathan Adams } zio_taskq_info_t;
952e0c549eSJonathan Adams 
962e0c549eSJonathan Adams static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
9735a5a358SJonathan Adams 	"issue", "issue_high", "intr", "intr_high"
982e0c549eSJonathan Adams };
992e0c549eSJonathan Adams 
10080eb36f2SGeorge Wilson /*
10180eb36f2SGeorge Wilson  * Define the taskq threads for the following I/O types:
10280eb36f2SGeorge Wilson  * 	NULL, READ, WRITE, FREE, CLAIM, and IOCTL
10380eb36f2SGeorge Wilson  */
10480eb36f2SGeorge Wilson const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
10580eb36f2SGeorge Wilson 	/* ISSUE	ISSUE_HIGH	INTR		INTR_HIGH */
10680eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
10735a5a358SJonathan Adams 	{ ZTI_FIX(8),	ZTI_NULL,	ZTI_BATCH,	ZTI_NULL },
10835a5a358SJonathan Adams 	{ ZTI_BATCH,	ZTI_FIX(5),	ZTI_FIX(8),	ZTI_FIX(5) },
109c7cd2421SGeorge Wilson 	{ ZTI_FIX(100),	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
11080eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
11180eb36f2SGeorge Wilson 	{ ZTI_ONE,	ZTI_NULL,	ZTI_ONE,	ZTI_NULL },
1122e0c549eSJonathan Adams };
1132e0c549eSJonathan Adams 
1143f9d6ad7SLin Ling static dsl_syncfunc_t spa_sync_props;
11589a89ebfSlling static boolean_t spa_has_active_shared_spare(spa_t *spa);
1161195e687SMark J Musante static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
1171195e687SMark J Musante     spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
1181195e687SMark J Musante     char **ereport);
119*cb04b873SMark J Musante static void spa_vdev_resilver_done(spa_t *spa);
120990b4856Slling 
12135a5a358SJonathan Adams uint_t		zio_taskq_batch_pct = 100;	/* 1 thread per cpu in pset */
12235a5a358SJonathan Adams id_t		zio_taskq_psrset_bind = PS_NONE;
12335a5a358SJonathan Adams boolean_t	zio_taskq_sysdc = B_TRUE;	/* use SDC scheduling class */
12435a5a358SJonathan Adams uint_t		zio_taskq_basedc = 80;		/* base duty cycle */
12535a5a358SJonathan Adams 
12635a5a358SJonathan Adams boolean_t	spa_create_process = B_TRUE;	/* no process ==> no sysdc */
12735a5a358SJonathan Adams 
12835a5a358SJonathan Adams /*
12935a5a358SJonathan Adams  * This (illegal) pool name is used when temporarily importing a spa_t in order
13035a5a358SJonathan Adams  * to get the vdev stats associated with the imported devices.
13135a5a358SJonathan Adams  */
13235a5a358SJonathan Adams #define	TRYIMPORT_NAME	"$import"
13335a5a358SJonathan Adams 
134990b4856Slling /*
135990b4856Slling  * ==========================================================================
136990b4856Slling  * SPA properties routines
137990b4856Slling  * ==========================================================================
138990b4856Slling  */
139990b4856Slling 
140990b4856Slling /*
141990b4856Slling  * Add a (source=src, propname=propval) list to an nvlist.
142990b4856Slling  */
1439d82f4f6Slling static void
144990b4856Slling spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
145990b4856Slling     uint64_t intval, zprop_source_t src)
146990b4856Slling {
147990b4856Slling 	const char *propname = zpool_prop_to_name(prop);
148990b4856Slling 	nvlist_t *propval;
149990b4856Slling 
1509d82f4f6Slling 	VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1519d82f4f6Slling 	VERIFY(nvlist_add_uint64(propval, ZPROP_SOURCE, src) == 0);
152990b4856Slling 
1539d82f4f6Slling 	if (strval != NULL)
1549d82f4f6Slling 		VERIFY(nvlist_add_string(propval, ZPROP_VALUE, strval) == 0);
1559d82f4f6Slling 	else
1569d82f4f6Slling 		VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, intval) == 0);
157990b4856Slling 
1589d82f4f6Slling 	VERIFY(nvlist_add_nvlist(nvl, propname, propval) == 0);
159990b4856Slling 	nvlist_free(propval);
160990b4856Slling }
161990b4856Slling 
162990b4856Slling /*
163990b4856Slling  * Get property values from the spa configuration.
164990b4856Slling  */
1659d82f4f6Slling static void
166990b4856Slling spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
167990b4856Slling {
168379c004dSEric Schrock 	uint64_t size;
169485bbbf5SGeorge Wilson 	uint64_t alloc;
170990b4856Slling 	uint64_t cap, version;
171990b4856Slling 	zprop_source_t src = ZPROP_SRC_NONE;
172c5904d13Seschrock 	spa_config_dirent_t *dp;
173990b4856Slling 
174e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa->spa_props_lock));
175e14bb325SJeff Bonwick 
176379c004dSEric Schrock 	if (spa->spa_root_vdev != NULL) {
177485bbbf5SGeorge Wilson 		alloc = metaslab_class_get_alloc(spa_normal_class(spa));
178b24ab676SJeff Bonwick 		size = metaslab_class_get_space(spa_normal_class(spa));
179379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
180379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_SIZE, NULL, size, src);
181485bbbf5SGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
182485bbbf5SGeorge Wilson 		spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
183485bbbf5SGeorge Wilson 		    size - alloc, src);
184379c004dSEric Schrock 
185485bbbf5SGeorge Wilson 		cap = (size == 0) ? 0 : (alloc * 100 / size);
186379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_CAPACITY, NULL, cap, src);
187379c004dSEric Schrock 
188b24ab676SJeff Bonwick 		spa_prop_add_list(*nvp, ZPOOL_PROP_DEDUPRATIO, NULL,
189b24ab676SJeff Bonwick 		    ddt_get_pool_dedup_ratio(spa), src);
190b24ab676SJeff Bonwick 
191379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
192379c004dSEric Schrock 		    spa->spa_root_vdev->vdev_state, src);
193379c004dSEric Schrock 
194379c004dSEric Schrock 		version = spa_version(spa);
195379c004dSEric Schrock 		if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
196379c004dSEric Schrock 			src = ZPROP_SRC_DEFAULT;
197379c004dSEric Schrock 		else
198379c004dSEric Schrock 			src = ZPROP_SRC_LOCAL;
199379c004dSEric Schrock 		spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
200379c004dSEric Schrock 	}
201990b4856Slling 
2029d82f4f6Slling 	spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
203990b4856Slling 
2049d82f4f6Slling 	if (spa->spa_root != NULL)
2059d82f4f6Slling 		spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
2069d82f4f6Slling 		    0, ZPROP_SRC_LOCAL);
207990b4856Slling 
208c5904d13Seschrock 	if ((dp = list_head(&spa->spa_config_list)) != NULL) {
209c5904d13Seschrock 		if (dp->scd_path == NULL) {
2109d82f4f6Slling 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
211c5904d13Seschrock 			    "none", 0, ZPROP_SRC_LOCAL);
212c5904d13Seschrock 		} else if (strcmp(dp->scd_path, spa_config_path) != 0) {
2139d82f4f6Slling 			spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
214c5904d13Seschrock 			    dp->scd_path, 0, ZPROP_SRC_LOCAL);
2152f8aaab3Seschrock 		}
2162f8aaab3Seschrock 	}
217990b4856Slling }
218990b4856Slling 
219990b4856Slling /*
220990b4856Slling  * Get zpool property values.
221990b4856Slling  */
222990b4856Slling int
223990b4856Slling spa_prop_get(spa_t *spa, nvlist_t **nvp)
224990b4856Slling {
225b24ab676SJeff Bonwick 	objset_t *mos = spa->spa_meta_objset;
226990b4856Slling 	zap_cursor_t zc;
227990b4856Slling 	zap_attribute_t za;
228990b4856Slling 	int err;
229990b4856Slling 
2309d82f4f6Slling 	VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
231990b4856Slling 
232e14bb325SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);
233e14bb325SJeff Bonwick 
234990b4856Slling 	/*
235990b4856Slling 	 * Get properties from the spa config.
236990b4856Slling 	 */
2379d82f4f6Slling 	spa_prop_get_config(spa, nvp);
238990b4856Slling 
239990b4856Slling 	/* If no pool property object, no more prop to get. */
240afee20e4SGeorge Wilson 	if (mos == NULL || spa->spa_pool_props_object == 0) {
241990b4856Slling 		mutex_exit(&spa->spa_props_lock);
242990b4856Slling 		return (0);
243990b4856Slling 	}
244990b4856Slling 
245990b4856Slling 	/*
246990b4856Slling 	 * Get properties from the MOS pool property object.
247990b4856Slling 	 */
248990b4856Slling 	for (zap_cursor_init(&zc, mos, spa->spa_pool_props_object);
249990b4856Slling 	    (err = zap_cursor_retrieve(&zc, &za)) == 0;
250990b4856Slling 	    zap_cursor_advance(&zc)) {
251990b4856Slling 		uint64_t intval = 0;
252990b4856Slling 		char *strval = NULL;
253990b4856Slling 		zprop_source_t src = ZPROP_SRC_DEFAULT;
254990b4856Slling 		zpool_prop_t prop;
255990b4856Slling 
256990b4856Slling 		if ((prop = zpool_name_to_prop(za.za_name)) == ZPROP_INVAL)
257990b4856Slling 			continue;
258990b4856Slling 
259990b4856Slling 		switch (za.za_integer_length) {
260990b4856Slling 		case 8:
261990b4856Slling 			/* integer property */
262990b4856Slling 			if (za.za_first_integer !=
263990b4856Slling 			    zpool_prop_default_numeric(prop))
264990b4856Slling 				src = ZPROP_SRC_LOCAL;
265990b4856Slling 
266990b4856Slling 			if (prop == ZPOOL_PROP_BOOTFS) {
267990b4856Slling 				dsl_pool_t *dp;
268990b4856Slling 				dsl_dataset_t *ds = NULL;
269990b4856Slling 
270990b4856Slling 				dp = spa_get_dsl(spa);
271990b4856Slling 				rw_enter(&dp->dp_config_rwlock, RW_READER);
272745cd3c5Smaybee 				if (err = dsl_dataset_hold_obj(dp,
273745cd3c5Smaybee 				    za.za_first_integer, FTAG, &ds)) {
274990b4856Slling 					rw_exit(&dp->dp_config_rwlock);
275990b4856Slling 					break;
276990b4856Slling 				}
277990b4856Slling 
278990b4856Slling 				strval = kmem_alloc(
279990b4856Slling 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1,
280990b4856Slling 				    KM_SLEEP);
281990b4856Slling 				dsl_dataset_name(ds, strval);
282745cd3c5Smaybee 				dsl_dataset_rele(ds, FTAG);
283990b4856Slling 				rw_exit(&dp->dp_config_rwlock);
284990b4856Slling 			} else {
285990b4856Slling 				strval = NULL;
286990b4856Slling 				intval = za.za_first_integer;
287990b4856Slling 			}
288990b4856Slling 
2899d82f4f6Slling 			spa_prop_add_list(*nvp, prop, strval, intval, src);
290990b4856Slling 
291990b4856Slling 			if (strval != NULL)
292990b4856Slling 				kmem_free(strval,
293990b4856Slling 				    MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
294990b4856Slling 
295990b4856Slling 			break;
296990b4856Slling 
297990b4856Slling 		case 1:
298990b4856Slling 			/* string property */
299990b4856Slling 			strval = kmem_alloc(za.za_num_integers, KM_SLEEP);
300990b4856Slling 			err = zap_lookup(mos, spa->spa_pool_props_object,
301990b4856Slling 			    za.za_name, 1, za.za_num_integers, strval);
302990b4856Slling 			if (err) {
303990b4856Slling 				kmem_free(strval, za.za_num_integers);
304990b4856Slling 				break;
305990b4856Slling 			}
3069d82f4f6Slling 			spa_prop_add_list(*nvp, prop, strval, 0, src);
307990b4856Slling 			kmem_free(strval, za.za_num_integers);
308990b4856Slling 			break;
309990b4856Slling 
310990b4856Slling 		default:
311990b4856Slling 			break;
312990b4856Slling 		}
313990b4856Slling 	}
314990b4856Slling 	zap_cursor_fini(&zc);
315990b4856Slling 	mutex_exit(&spa->spa_props_lock);
316990b4856Slling out:
317990b4856Slling 	if (err && err != ENOENT) {
318990b4856Slling 		nvlist_free(*nvp);
3199d82f4f6Slling 		*nvp = NULL;
320990b4856Slling 		return (err);
321990b4856Slling 	}
322990b4856Slling 
323990b4856Slling 	return (0);
324990b4856Slling }
325990b4856Slling 
326990b4856Slling /*
327990b4856Slling  * Validate the given pool properties nvlist and modify the list
328990b4856Slling  * for the property values to be set.
329990b4856Slling  */
330990b4856Slling static int
331990b4856Slling spa_prop_validate(spa_t *spa, nvlist_t *props)
332990b4856Slling {
333990b4856Slling 	nvpair_t *elem;
334990b4856Slling 	int error = 0, reset_bootfs = 0;
335990b4856Slling 	uint64_t objnum;
336990b4856Slling 
337990b4856Slling 	elem = NULL;
338990b4856Slling 	while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
339990b4856Slling 		zpool_prop_t prop;
340990b4856Slling 		char *propname, *strval;
341990b4856Slling 		uint64_t intval;
342990b4856Slling 		objset_t *os;
3432f8aaab3Seschrock 		char *slash;
344990b4856Slling 
345990b4856Slling 		propname = nvpair_name(elem);
346990b4856Slling 
347990b4856Slling 		if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
348990b4856Slling 			return (EINVAL);
349990b4856Slling 
350990b4856Slling 		switch (prop) {
351990b4856Slling 		case ZPOOL_PROP_VERSION:
352990b4856Slling 			error = nvpair_value_uint64(elem, &intval);
353990b4856Slling 			if (!error &&
354990b4856Slling 			    (intval < spa_version(spa) || intval > SPA_VERSION))
355990b4856Slling 				error = EINVAL;
356990b4856Slling 			break;
357990b4856Slling 
358990b4856Slling 		case ZPOOL_PROP_DELEGATION:
359990b4856Slling 		case ZPOOL_PROP_AUTOREPLACE:
360d5b5bb25SRich Morris 		case ZPOOL_PROP_LISTSNAPS:
361573ca77eSGeorge Wilson 		case ZPOOL_PROP_AUTOEXPAND:
362990b4856Slling 			error = nvpair_value_uint64(elem, &intval);
363990b4856Slling 			if (!error && intval > 1)
364990b4856Slling 				error = EINVAL;
365990b4856Slling 			break;
366990b4856Slling 
367990b4856Slling 		case ZPOOL_PROP_BOOTFS:
36825f89ee2SJeff Bonwick 			/*
36925f89ee2SJeff Bonwick 			 * If the pool version is less than SPA_VERSION_BOOTFS,
37025f89ee2SJeff Bonwick 			 * or the pool is still being created (version == 0),
37125f89ee2SJeff Bonwick 			 * the bootfs property cannot be set.
37225f89ee2SJeff Bonwick 			 */
373990b4856Slling 			if (spa_version(spa) < SPA_VERSION_BOOTFS) {
374990b4856Slling 				error = ENOTSUP;
375990b4856Slling 				break;
376990b4856Slling 			}
377990b4856Slling 
378990b4856Slling 			/*
37915e6edf1Sgw 			 * Make sure the vdev config is bootable
380990b4856Slling 			 */
38115e6edf1Sgw 			if (!vdev_is_bootable(spa->spa_root_vdev)) {
382990b4856Slling 				error = ENOTSUP;
383990b4856Slling 				break;
384990b4856Slling 			}
385990b4856Slling 
386990b4856Slling 			reset_bootfs = 1;
387990b4856Slling 
388990b4856Slling 			error = nvpair_value_string(elem, &strval);
389990b4856Slling 
390990b4856Slling 			if (!error) {
39115e6edf1Sgw 				uint64_t compress;
39215e6edf1Sgw 
393990b4856Slling 				if (strval == NULL || strval[0] == '\0') {
394990b4856Slling 					objnum = zpool_prop_default_numeric(
395990b4856Slling 					    ZPOOL_PROP_BOOTFS);
396990b4856Slling 					break;
397990b4856Slling 				}
398990b4856Slling 
399503ad85cSMatthew Ahrens 				if (error = dmu_objset_hold(strval, FTAG, &os))
400990b4856Slling 					break;
40115e6edf1Sgw 
402503ad85cSMatthew Ahrens 				/* Must be ZPL and not gzip compressed. */
403503ad85cSMatthew Ahrens 
404503ad85cSMatthew Ahrens 				if (dmu_objset_type(os) != DMU_OST_ZFS) {
405503ad85cSMatthew Ahrens 					error = ENOTSUP;
406503ad85cSMatthew Ahrens 				} else if ((error = dsl_prop_get_integer(strval,
40715e6edf1Sgw 				    zfs_prop_to_name(ZFS_PROP_COMPRESSION),
40815e6edf1Sgw 				    &compress, NULL)) == 0 &&
40915e6edf1Sgw 				    !BOOTFS_COMPRESS_VALID(compress)) {
41015e6edf1Sgw 					error = ENOTSUP;
41115e6edf1Sgw 				} else {
41215e6edf1Sgw 					objnum = dmu_objset_id(os);
41315e6edf1Sgw 				}
414503ad85cSMatthew Ahrens 				dmu_objset_rele(os, FTAG);
415990b4856Slling 			}
416990b4856Slling 			break;
417e14bb325SJeff Bonwick 
4180a4e9518Sgw 		case ZPOOL_PROP_FAILUREMODE:
4190a4e9518Sgw 			error = nvpair_value_uint64(elem, &intval);
4200a4e9518Sgw 			if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
4210a4e9518Sgw 			    intval > ZIO_FAILURE_MODE_PANIC))
4220a4e9518Sgw 				error = EINVAL;
4230a4e9518Sgw 
4240a4e9518Sgw 			/*
4250a4e9518Sgw 			 * This is a special case which only occurs when
4260a4e9518Sgw 			 * the pool has completely failed. This allows
4270a4e9518Sgw 			 * the user to change the in-core failmode property
4280a4e9518Sgw 			 * without syncing it out to disk (I/Os might
4290a4e9518Sgw 			 * currently be blocked). We do this by returning
4300a4e9518Sgw 			 * EIO to the caller (spa_prop_set) to trick it
4310a4e9518Sgw 			 * into thinking we encountered a property validation
4320a4e9518Sgw 			 * error.
4330a4e9518Sgw 			 */
434e14bb325SJeff Bonwick 			if (!error && spa_suspended(spa)) {
4350a4e9518Sgw 				spa->spa_failmode = intval;
4360a4e9518Sgw 				error = EIO;
4370a4e9518Sgw 			}
4380a4e9518Sgw 			break;
4392f8aaab3Seschrock 
4402f8aaab3Seschrock 		case ZPOOL_PROP_CACHEFILE:
4412f8aaab3Seschrock 			if ((error = nvpair_value_string(elem, &strval)) != 0)
4422f8aaab3Seschrock 				break;
4432f8aaab3Seschrock 
4442f8aaab3Seschrock 			if (strval[0] == '\0')
4452f8aaab3Seschrock 				break;
4462f8aaab3Seschrock 
4472f8aaab3Seschrock 			if (strcmp(strval, "none") == 0)
4482f8aaab3Seschrock 				break;
4492f8aaab3Seschrock 
4502f8aaab3Seschrock 			if (strval[0] != '/') {
4512f8aaab3Seschrock 				error = EINVAL;
4522f8aaab3Seschrock 				break;
4532f8aaab3Seschrock 			}
4542f8aaab3Seschrock 
4552f8aaab3Seschrock 			slash = strrchr(strval, '/');
4562f8aaab3Seschrock 			ASSERT(slash != NULL);
4572f8aaab3Seschrock 
4582f8aaab3Seschrock 			if (slash[1] == '\0' || strcmp(slash, "/.") == 0 ||
4592f8aaab3Seschrock 			    strcmp(slash, "/..") == 0)
4602f8aaab3Seschrock 				error = EINVAL;
4612f8aaab3Seschrock 			break;
462b24ab676SJeff Bonwick 
463b24ab676SJeff Bonwick 		case ZPOOL_PROP_DEDUPDITTO:
464b24ab676SJeff Bonwick 			if (spa_version(spa) < SPA_VERSION_DEDUP)
465b24ab676SJeff Bonwick 				error = ENOTSUP;
466b24ab676SJeff Bonwick 			else
467b24ab676SJeff Bonwick 				error = nvpair_value_uint64(elem, &intval);
468b24ab676SJeff Bonwick 			if (error == 0 &&
469b24ab676SJeff Bonwick 			    intval != 0 && intval < ZIO_DEDUPDITTO_MIN)
470b24ab676SJeff Bonwick 				error = EINVAL;
471b24ab676SJeff Bonwick 			break;
472990b4856Slling 		}
473990b4856Slling 
474990b4856Slling 		if (error)
475990b4856Slling 			break;
476990b4856Slling 	}
477990b4856Slling 
478990b4856Slling 	if (!error && reset_bootfs) {
479990b4856Slling 		error = nvlist_remove(props,
480990b4856Slling 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), DATA_TYPE_STRING);
481990b4856Slling 
482990b4856Slling 		if (!error) {
483990b4856Slling 			error = nvlist_add_uint64(props,
484990b4856Slling 			    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), objnum);
485990b4856Slling 		}
486990b4856Slling 	}
487990b4856Slling 
488990b4856Slling 	return (error);
489990b4856Slling }
490990b4856Slling 
491379c004dSEric Schrock void
492379c004dSEric Schrock spa_configfile_set(spa_t *spa, nvlist_t *nvp, boolean_t need_sync)
493379c004dSEric Schrock {
494379c004dSEric Schrock 	char *cachefile;
495379c004dSEric Schrock 	spa_config_dirent_t *dp;
496379c004dSEric Schrock 
497379c004dSEric Schrock 	if (nvlist_lookup_string(nvp, zpool_prop_to_name(ZPOOL_PROP_CACHEFILE),
498379c004dSEric Schrock 	    &cachefile) != 0)
499379c004dSEric Schrock 		return;
500379c004dSEric Schrock 
501379c004dSEric Schrock 	dp = kmem_alloc(sizeof (spa_config_dirent_t),
502379c004dSEric Schrock 	    KM_SLEEP);
503379c004dSEric Schrock 
504379c004dSEric Schrock 	if (cachefile[0] == '\0')
505379c004dSEric Schrock 		dp->scd_path = spa_strdup(spa_config_path);
506379c004dSEric Schrock 	else if (strcmp(cachefile, "none") == 0)
507379c004dSEric Schrock 		dp->scd_path = NULL;
508379c004dSEric Schrock 	else
509379c004dSEric Schrock 		dp->scd_path = spa_strdup(cachefile);
510379c004dSEric Schrock 
511379c004dSEric Schrock 	list_insert_head(&spa->spa_config_list, dp);
512379c004dSEric Schrock 	if (need_sync)
513379c004dSEric Schrock 		spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
514379c004dSEric Schrock }
515379c004dSEric Schrock 
516990b4856Slling int
517990b4856Slling spa_prop_set(spa_t *spa, nvlist_t *nvp)
518990b4856Slling {
519990b4856Slling 	int error;
520379c004dSEric Schrock 	nvpair_t *elem;
521379c004dSEric Schrock 	boolean_t need_sync = B_FALSE;
522379c004dSEric Schrock 	zpool_prop_t prop;
523990b4856Slling 
524990b4856Slling 	if ((error = spa_prop_validate(spa, nvp)) != 0)
525990b4856Slling 		return (error);
526990b4856Slling 
527379c004dSEric Schrock 	elem = NULL;
528379c004dSEric Schrock 	while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
529379c004dSEric Schrock 		if ((prop = zpool_name_to_prop(
530379c004dSEric Schrock 		    nvpair_name(elem))) == ZPROP_INVAL)
531379c004dSEric Schrock 			return (EINVAL);
532379c004dSEric Schrock 
533379c004dSEric Schrock 		if (prop == ZPOOL_PROP_CACHEFILE || prop == ZPOOL_PROP_ALTROOT)
534379c004dSEric Schrock 			continue;
535379c004dSEric Schrock 
536379c004dSEric Schrock 		need_sync = B_TRUE;
537379c004dSEric Schrock 		break;
538379c004dSEric Schrock 	}
539379c004dSEric Schrock 
540379c004dSEric Schrock 	if (need_sync)
541379c004dSEric Schrock 		return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
542379c004dSEric Schrock 		    spa, nvp, 3));
543379c004dSEric Schrock 	else
544379c004dSEric Schrock 		return (0);
545990b4856Slling }
546990b4856Slling 
547990b4856Slling /*
548990b4856Slling  * If the bootfs property value is dsobj, clear it.
549990b4856Slling  */
550990b4856Slling void
551990b4856Slling spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
552990b4856Slling {
553990b4856Slling 	if (spa->spa_bootfs == dsobj && spa->spa_pool_props_object != 0) {
554990b4856Slling 		VERIFY(zap_remove(spa->spa_meta_objset,
555990b4856Slling 		    spa->spa_pool_props_object,
556990b4856Slling 		    zpool_prop_to_name(ZPOOL_PROP_BOOTFS), tx) == 0);
557990b4856Slling 		spa->spa_bootfs = 0;
558990b4856Slling 	}
559990b4856Slling }
560990b4856Slling 
561fa9e4066Sahrens /*
562fa9e4066Sahrens  * ==========================================================================
563fa9e4066Sahrens  * SPA state manipulation (open/create/destroy/import/export)
564fa9e4066Sahrens  * ==========================================================================
565fa9e4066Sahrens  */
566fa9e4066Sahrens 
567ea8dc4b6Seschrock static int
568ea8dc4b6Seschrock spa_error_entry_compare(const void *a, const void *b)
569ea8dc4b6Seschrock {
570ea8dc4b6Seschrock 	spa_error_entry_t *sa = (spa_error_entry_t *)a;
571ea8dc4b6Seschrock 	spa_error_entry_t *sb = (spa_error_entry_t *)b;
572ea8dc4b6Seschrock 	int ret;
573ea8dc4b6Seschrock 
574ea8dc4b6Seschrock 	ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
575ea8dc4b6Seschrock 	    sizeof (zbookmark_t));
576ea8dc4b6Seschrock 
577ea8dc4b6Seschrock 	if (ret < 0)
578ea8dc4b6Seschrock 		return (-1);
579ea8dc4b6Seschrock 	else if (ret > 0)
580ea8dc4b6Seschrock 		return (1);
581ea8dc4b6Seschrock 	else
582ea8dc4b6Seschrock 		return (0);
583ea8dc4b6Seschrock }
584ea8dc4b6Seschrock 
585ea8dc4b6Seschrock /*
586ea8dc4b6Seschrock  * Utility function which retrieves copies of the current logs and
587ea8dc4b6Seschrock  * re-initializes them in the process.
588ea8dc4b6Seschrock  */
589ea8dc4b6Seschrock void
590ea8dc4b6Seschrock spa_get_errlists(spa_t *spa, avl_tree_t *last, avl_tree_t *scrub)
591ea8dc4b6Seschrock {
592ea8dc4b6Seschrock 	ASSERT(MUTEX_HELD(&spa->spa_errlist_lock));
593ea8dc4b6Seschrock 
594ea8dc4b6Seschrock 	bcopy(&spa->spa_errlist_last, last, sizeof (avl_tree_t));
595ea8dc4b6Seschrock 	bcopy(&spa->spa_errlist_scrub, scrub, sizeof (avl_tree_t));
596ea8dc4b6Seschrock 
597ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_scrub,
598ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
599ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
600ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_last,
601ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
602ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
603ea8dc4b6Seschrock }
604ea8dc4b6Seschrock 
60535a5a358SJonathan Adams static taskq_t *
60635a5a358SJonathan Adams spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
60735a5a358SJonathan Adams     uint_t value)
608fa9e4066Sahrens {
60935a5a358SJonathan Adams 	uint_t flags = TASKQ_PREPOPULATE;
61035a5a358SJonathan Adams 	boolean_t batch = B_FALSE;
611fa9e4066Sahrens 
61235a5a358SJonathan Adams 	switch (mode) {
61335a5a358SJonathan Adams 	case zti_mode_null:
61435a5a358SJonathan Adams 		return (NULL);		/* no taskq needed */
615fa9e4066Sahrens 
61635a5a358SJonathan Adams 	case zti_mode_fixed:
61735a5a358SJonathan Adams 		ASSERT3U(value, >=, 1);
61835a5a358SJonathan Adams 		value = MAX(value, 1);
61935a5a358SJonathan Adams 		break;
620fa9e4066Sahrens 
62135a5a358SJonathan Adams 	case zti_mode_batch:
62235a5a358SJonathan Adams 		batch = B_TRUE;
62335a5a358SJonathan Adams 		flags |= TASKQ_THREADS_CPU_PCT;
62435a5a358SJonathan Adams 		value = zio_taskq_batch_pct;
62535a5a358SJonathan Adams 		break;
62635a5a358SJonathan Adams 
62735a5a358SJonathan Adams 	case zti_mode_online_percent:
62835a5a358SJonathan Adams 		flags |= TASKQ_THREADS_CPU_PCT;
62935a5a358SJonathan Adams 		break;
63035a5a358SJonathan Adams 
63135a5a358SJonathan Adams 	default:
63235a5a358SJonathan Adams 		panic("unrecognized mode for %s taskq (%u:%u) in "
63335a5a358SJonathan Adams 		    "spa_activate()",
63435a5a358SJonathan Adams 		    name, mode, value);
63535a5a358SJonathan Adams 		break;
63635a5a358SJonathan Adams 	}
63735a5a358SJonathan Adams 
63835a5a358SJonathan Adams 	if (zio_taskq_sysdc && spa->spa_proc != &p0) {
63935a5a358SJonathan Adams 		if (batch)
64035a5a358SJonathan Adams 			flags |= TASKQ_DC_BATCH;
64135a5a358SJonathan Adams 
64235a5a358SJonathan Adams 		return (taskq_create_sysdc(name, value, 50, INT_MAX,
64335a5a358SJonathan Adams 		    spa->spa_proc, zio_taskq_basedc, flags));
64435a5a358SJonathan Adams 	}
64535a5a358SJonathan Adams 	return (taskq_create_proc(name, value, maxclsyspri, 50, INT_MAX,
64635a5a358SJonathan Adams 	    spa->spa_proc, flags));
64735a5a358SJonathan Adams }
64835a5a358SJonathan Adams 
64935a5a358SJonathan Adams static void
65035a5a358SJonathan Adams spa_create_zio_taskqs(spa_t *spa)
65135a5a358SJonathan Adams {
652e14bb325SJeff Bonwick 	for (int t = 0; t < ZIO_TYPES; t++) {
653e14bb325SJeff Bonwick 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
65480eb36f2SGeorge Wilson 			const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
65580eb36f2SGeorge Wilson 			enum zti_modes mode = ztip->zti_mode;
65680eb36f2SGeorge Wilson 			uint_t value = ztip->zti_value;
6572e0c549eSJonathan Adams 			char name[32];
6582e0c549eSJonathan Adams 
6592e0c549eSJonathan Adams 			(void) snprintf(name, sizeof (name),
66080eb36f2SGeorge Wilson 			    "%s_%s", zio_type_name[t], zio_taskq_types[q]);
6612e0c549eSJonathan Adams 
66235a5a358SJonathan Adams 			spa->spa_zio_taskq[t][q] =
66335a5a358SJonathan Adams 			    spa_taskq_create(spa, name, mode, value);
66435a5a358SJonathan Adams 		}
66535a5a358SJonathan Adams 	}
66635a5a358SJonathan Adams }
66735a5a358SJonathan Adams 
66835a5a358SJonathan Adams #ifdef _KERNEL
66935a5a358SJonathan Adams static void
67035a5a358SJonathan Adams spa_thread(void *arg)
67135a5a358SJonathan Adams {
67235a5a358SJonathan Adams 	callb_cpr_t cprinfo;
6732e0c549eSJonathan Adams 
67435a5a358SJonathan Adams 	spa_t *spa = arg;
67535a5a358SJonathan Adams 	user_t *pu = PTOU(curproc);
6762e0c549eSJonathan Adams 
67735a5a358SJonathan Adams 	CALLB_CPR_INIT(&cprinfo, &spa->spa_proc_lock, callb_generic_cpr,
67835a5a358SJonathan Adams 	    spa->spa_name);
6792e0c549eSJonathan Adams 
68035a5a358SJonathan Adams 	ASSERT(curproc != &p0);
68135a5a358SJonathan Adams 	(void) snprintf(pu->u_psargs, sizeof (pu->u_psargs),
68235a5a358SJonathan Adams 	    "zpool-%s", spa->spa_name);
68335a5a358SJonathan Adams 	(void) strlcpy(pu->u_comm, pu->u_psargs, sizeof (pu->u_comm));
6842e0c549eSJonathan Adams 
68535a5a358SJonathan Adams 	/* bind this thread to the requested psrset */
68635a5a358SJonathan Adams 	if (zio_taskq_psrset_bind != PS_NONE) {
68735a5a358SJonathan Adams 		pool_lock();
68835a5a358SJonathan Adams 		mutex_enter(&cpu_lock);
68935a5a358SJonathan Adams 		mutex_enter(&pidlock);
69035a5a358SJonathan Adams 		mutex_enter(&curproc->p_lock);
69180eb36f2SGeorge Wilson 
69235a5a358SJonathan Adams 		if (cpupart_bind_thread(curthread, zio_taskq_psrset_bind,
69335a5a358SJonathan Adams 		    0, NULL, NULL) == 0)  {
69435a5a358SJonathan Adams 			curthread->t_bind_pset = zio_taskq_psrset_bind;
69535a5a358SJonathan Adams 		} else {
69635a5a358SJonathan Adams 			cmn_err(CE_WARN,
69735a5a358SJonathan Adams 			    "Couldn't bind process for zfs pool \"%s\" to "
69835a5a358SJonathan Adams 			    "pset %d\n", spa->spa_name, zio_taskq_psrset_bind);
69935a5a358SJonathan Adams 		}
70035a5a358SJonathan Adams 
70135a5a358SJonathan Adams 		mutex_exit(&curproc->p_lock);
70235a5a358SJonathan Adams 		mutex_exit(&pidlock);
70335a5a358SJonathan Adams 		mutex_exit(&cpu_lock);
70435a5a358SJonathan Adams 		pool_unlock();
70535a5a358SJonathan Adams 	}
70635a5a358SJonathan Adams 
70735a5a358SJonathan Adams 	if (zio_taskq_sysdc) {
70835a5a358SJonathan Adams 		sysdc_thread_enter(curthread, 100, 0);
70935a5a358SJonathan Adams 	}
71035a5a358SJonathan Adams 
71135a5a358SJonathan Adams 	spa->spa_proc = curproc;
71235a5a358SJonathan Adams 	spa->spa_did = curthread->t_did;
71335a5a358SJonathan Adams 
71435a5a358SJonathan Adams 	spa_create_zio_taskqs(spa);
71535a5a358SJonathan Adams 
71635a5a358SJonathan Adams 	mutex_enter(&spa->spa_proc_lock);
71735a5a358SJonathan Adams 	ASSERT(spa->spa_proc_state == SPA_PROC_CREATED);
71835a5a358SJonathan Adams 
71935a5a358SJonathan Adams 	spa->spa_proc_state = SPA_PROC_ACTIVE;
72035a5a358SJonathan Adams 	cv_broadcast(&spa->spa_proc_cv);
72135a5a358SJonathan Adams 
72235a5a358SJonathan Adams 	CALLB_CPR_SAFE_BEGIN(&cprinfo);
72335a5a358SJonathan Adams 	while (spa->spa_proc_state == SPA_PROC_ACTIVE)
72435a5a358SJonathan Adams 		cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
72535a5a358SJonathan Adams 	CALLB_CPR_SAFE_END(&cprinfo, &spa->spa_proc_lock);
72635a5a358SJonathan Adams 
72735a5a358SJonathan Adams 	ASSERT(spa->spa_proc_state == SPA_PROC_DEACTIVATE);
72835a5a358SJonathan Adams 	spa->spa_proc_state = SPA_PROC_GONE;
72935a5a358SJonathan Adams 	spa->spa_proc = &p0;
73035a5a358SJonathan Adams 	cv_broadcast(&spa->spa_proc_cv);
73135a5a358SJonathan Adams 	CALLB_CPR_EXIT(&cprinfo);	/* drops spa_proc_lock */
73235a5a358SJonathan Adams 
73335a5a358SJonathan Adams 	mutex_enter(&curproc->p_lock);
73435a5a358SJonathan Adams 	lwp_exit();
73535a5a358SJonathan Adams }
73635a5a358SJonathan Adams #endif
73735a5a358SJonathan Adams 
73835a5a358SJonathan Adams /*
73935a5a358SJonathan Adams  * Activate an uninitialized pool.
74035a5a358SJonathan Adams  */
74135a5a358SJonathan Adams static void
74235a5a358SJonathan Adams spa_activate(spa_t *spa, int mode)
74335a5a358SJonathan Adams {
74435a5a358SJonathan Adams 	ASSERT(spa->spa_state == POOL_STATE_UNINITIALIZED);
74535a5a358SJonathan Adams 
74635a5a358SJonathan Adams 	spa->spa_state = POOL_STATE_ACTIVE;
74735a5a358SJonathan Adams 	spa->spa_mode = mode;
74835a5a358SJonathan Adams 
74935a5a358SJonathan Adams 	spa->spa_normal_class = metaslab_class_create(spa, zfs_metaslab_ops);
75035a5a358SJonathan Adams 	spa->spa_log_class = metaslab_class_create(spa, zfs_metaslab_ops);
75135a5a358SJonathan Adams 
75235a5a358SJonathan Adams 	/* Try to create a covering process */
75335a5a358SJonathan Adams 	mutex_enter(&spa->spa_proc_lock);
75435a5a358SJonathan Adams 	ASSERT(spa->spa_proc_state == SPA_PROC_NONE);
75535a5a358SJonathan Adams 	ASSERT(spa->spa_proc == &p0);
75635a5a358SJonathan Adams 	spa->spa_did = 0;
75735a5a358SJonathan Adams 
75835a5a358SJonathan Adams 	/* Only create a process if we're going to be around a while. */
75935a5a358SJonathan Adams 	if (spa_create_process && strcmp(spa->spa_name, TRYIMPORT_NAME) != 0) {
76035a5a358SJonathan Adams 		if (newproc(spa_thread, (caddr_t)spa, syscid, maxclsyspri,
76135a5a358SJonathan Adams 		    NULL, 0) == 0) {
76235a5a358SJonathan Adams 			spa->spa_proc_state = SPA_PROC_CREATED;
76335a5a358SJonathan Adams 			while (spa->spa_proc_state == SPA_PROC_CREATED) {
76435a5a358SJonathan Adams 				cv_wait(&spa->spa_proc_cv,
76535a5a358SJonathan Adams 				    &spa->spa_proc_lock);
7662e0c549eSJonathan Adams 			}
76735a5a358SJonathan Adams 			ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
76835a5a358SJonathan Adams 			ASSERT(spa->spa_proc != &p0);
76935a5a358SJonathan Adams 			ASSERT(spa->spa_did != 0);
77035a5a358SJonathan Adams 		} else {
77135a5a358SJonathan Adams #ifdef _KERNEL
77235a5a358SJonathan Adams 			cmn_err(CE_WARN,
77335a5a358SJonathan Adams 			    "Couldn't create process for zfs pool \"%s\"\n",
77435a5a358SJonathan Adams 			    spa->spa_name);
77535a5a358SJonathan Adams #endif
776e14bb325SJeff Bonwick 		}
777fa9e4066Sahrens 	}
77835a5a358SJonathan Adams 	mutex_exit(&spa->spa_proc_lock);
77935a5a358SJonathan Adams 
78035a5a358SJonathan Adams 	/* If we didn't create a process, we need to create our taskqs. */
78135a5a358SJonathan Adams 	if (spa->spa_proc == &p0) {
78235a5a358SJonathan Adams 		spa_create_zio_taskqs(spa);
78335a5a358SJonathan Adams 	}
784fa9e4066Sahrens 
785e14bb325SJeff Bonwick 	list_create(&spa->spa_config_dirty_list, sizeof (vdev_t),
786e14bb325SJeff Bonwick 	    offsetof(vdev_t, vdev_config_dirty_node));
787e14bb325SJeff Bonwick 	list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
788e14bb325SJeff Bonwick 	    offsetof(vdev_t, vdev_state_dirty_node));
789fa9e4066Sahrens 
790fa9e4066Sahrens 	txg_list_create(&spa->spa_vdev_txg_list,
791fa9e4066Sahrens 	    offsetof(struct vdev, vdev_txg_node));
792ea8dc4b6Seschrock 
793ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_scrub,
794ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
795ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
796ea8dc4b6Seschrock 	avl_create(&spa->spa_errlist_last,
797ea8dc4b6Seschrock 	    spa_error_entry_compare, sizeof (spa_error_entry_t),
798ea8dc4b6Seschrock 	    offsetof(spa_error_entry_t, se_avl));
799fa9e4066Sahrens }
800fa9e4066Sahrens 
801fa9e4066Sahrens /*
802fa9e4066Sahrens  * Opposite of spa_activate().
803fa9e4066Sahrens  */
804fa9e4066Sahrens static void
805fa9e4066Sahrens spa_deactivate(spa_t *spa)
806fa9e4066Sahrens {
807fa9e4066Sahrens 	ASSERT(spa->spa_sync_on == B_FALSE);
808fa9e4066Sahrens 	ASSERT(spa->spa_dsl_pool == NULL);
809fa9e4066Sahrens 	ASSERT(spa->spa_root_vdev == NULL);
81025f89ee2SJeff Bonwick 	ASSERT(spa->spa_async_zio_root == NULL);
811fa9e4066Sahrens 	ASSERT(spa->spa_state != POOL_STATE_UNINITIALIZED);
812fa9e4066Sahrens 
813fa9e4066Sahrens 	txg_list_destroy(&spa->spa_vdev_txg_list);
814fa9e4066Sahrens 
815e14bb325SJeff Bonwick 	list_destroy(&spa->spa_config_dirty_list);
816e14bb325SJeff Bonwick 	list_destroy(&spa->spa_state_dirty_list);
817fa9e4066Sahrens 
818e14bb325SJeff Bonwick 	for (int t = 0; t < ZIO_TYPES; t++) {
819e14bb325SJeff Bonwick 		for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
82080eb36f2SGeorge Wilson 			if (spa->spa_zio_taskq[t][q] != NULL)
82180eb36f2SGeorge Wilson 				taskq_destroy(spa->spa_zio_taskq[t][q]);
822e14bb325SJeff Bonwick 			spa->spa_zio_taskq[t][q] = NULL;
823e14bb325SJeff Bonwick 		}
824fa9e4066Sahrens 	}
825fa9e4066Sahrens 
826fa9e4066Sahrens 	metaslab_class_destroy(spa->spa_normal_class);
827fa9e4066Sahrens 	spa->spa_normal_class = NULL;
828fa9e4066Sahrens 
8298654d025Sperrin 	metaslab_class_destroy(spa->spa_log_class);
8308654d025Sperrin 	spa->spa_log_class = NULL;
8318654d025Sperrin 
832ea8dc4b6Seschrock 	/*
833ea8dc4b6Seschrock 	 * If this was part of an import or the open otherwise failed, we may
834ea8dc4b6Seschrock 	 * still have errors left in the queues.  Empty them just in case.
835ea8dc4b6Seschrock 	 */
836ea8dc4b6Seschrock 	spa_errlog_drain(spa);
837ea8dc4b6Seschrock 
838ea8dc4b6Seschrock 	avl_destroy(&spa->spa_errlist_scrub);
839ea8dc4b6Seschrock 	avl_destroy(&spa->spa_errlist_last);
840ea8dc4b6Seschrock 
841fa9e4066Sahrens 	spa->spa_state = POOL_STATE_UNINITIALIZED;
84235a5a358SJonathan Adams 
84335a5a358SJonathan Adams 	mutex_enter(&spa->spa_proc_lock);
84435a5a358SJonathan Adams 	if (spa->spa_proc_state != SPA_PROC_NONE) {
84535a5a358SJonathan Adams 		ASSERT(spa->spa_proc_state == SPA_PROC_ACTIVE);
84635a5a358SJonathan Adams 		spa->spa_proc_state = SPA_PROC_DEACTIVATE;
84735a5a358SJonathan Adams 		cv_broadcast(&spa->spa_proc_cv);
84835a5a358SJonathan Adams 		while (spa->spa_proc_state == SPA_PROC_DEACTIVATE) {
84935a5a358SJonathan Adams 			ASSERT(spa->spa_proc != &p0);
85035a5a358SJonathan Adams 			cv_wait(&spa->spa_proc_cv, &spa->spa_proc_lock);
85135a5a358SJonathan Adams 		}
85235a5a358SJonathan Adams 		ASSERT(spa->spa_proc_state == SPA_PROC_GONE);
85335a5a358SJonathan Adams 		spa->spa_proc_state = SPA_PROC_NONE;
85435a5a358SJonathan Adams 	}
85535a5a358SJonathan Adams 	ASSERT(spa->spa_proc == &p0);
85635a5a358SJonathan Adams 	mutex_exit(&spa->spa_proc_lock);
85735a5a358SJonathan Adams 
85835a5a358SJonathan Adams 	/*
85935a5a358SJonathan Adams 	 * We want to make sure spa_thread() has actually exited the ZFS
86035a5a358SJonathan Adams 	 * module, so that the module can't be unloaded out from underneath
86135a5a358SJonathan Adams 	 * it.
86235a5a358SJonathan Adams 	 */
86335a5a358SJonathan Adams 	if (spa->spa_did != 0) {
86435a5a358SJonathan Adams 		thread_join(spa->spa_did);
86535a5a358SJonathan Adams 		spa->spa_did = 0;
86635a5a358SJonathan Adams 	}
867fa9e4066Sahrens }
868fa9e4066Sahrens 
869fa9e4066Sahrens /*
870fa9e4066Sahrens  * Verify a pool configuration, and construct the vdev tree appropriately.  This
871fa9e4066Sahrens  * will create all the necessary vdevs in the appropriate layout, with each vdev
872fa9e4066Sahrens  * in the CLOSED state.  This will prep the pool before open/creation/import.
873fa9e4066Sahrens  * All vdev validation is done by the vdev_alloc() routine.
874fa9e4066Sahrens  */
87599653d4eSeschrock static int
87699653d4eSeschrock spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
87799653d4eSeschrock     uint_t id, int atype)
878fa9e4066Sahrens {
879fa9e4066Sahrens 	nvlist_t **child;
880573ca77eSGeorge Wilson 	uint_t children;
88199653d4eSeschrock 	int error;
882fa9e4066Sahrens 
88399653d4eSeschrock 	if ((error = vdev_alloc(spa, vdp, nv, parent, id, atype)) != 0)
88499653d4eSeschrock 		return (error);
885fa9e4066Sahrens 
88699653d4eSeschrock 	if ((*vdp)->vdev_ops->vdev_op_leaf)
88799653d4eSeschrock 		return (0);
888fa9e4066Sahrens 
889e14bb325SJeff Bonwick 	error = nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
890e14bb325SJeff Bonwick 	    &child, &children);
891e14bb325SJeff Bonwick 
892e14bb325SJeff Bonwick 	if (error == ENOENT)
893e14bb325SJeff Bonwick 		return (0);
894e14bb325SJeff Bonwick 
895e14bb325SJeff Bonwick 	if (error) {
89699653d4eSeschrock 		vdev_free(*vdp);
89799653d4eSeschrock 		*vdp = NULL;
89899653d4eSeschrock 		return (EINVAL);
899fa9e4066Sahrens 	}
900fa9e4066Sahrens 
901573ca77eSGeorge Wilson 	for (int c = 0; c < children; c++) {
90299653d4eSeschrock 		vdev_t *vd;
90399653d4eSeschrock 		if ((error = spa_config_parse(spa, &vd, child[c], *vdp, c,
90499653d4eSeschrock 		    atype)) != 0) {
90599653d4eSeschrock 			vdev_free(*vdp);
90699653d4eSeschrock 			*vdp = NULL;
90799653d4eSeschrock 			return (error);
908fa9e4066Sahrens 		}
909fa9e4066Sahrens 	}
910fa9e4066Sahrens 
91199653d4eSeschrock 	ASSERT(*vdp != NULL);
91299653d4eSeschrock 
91399653d4eSeschrock 	return (0);
914fa9e4066Sahrens }
915fa9e4066Sahrens 
916fa9e4066Sahrens /*
917fa9e4066Sahrens  * Opposite of spa_load().
918fa9e4066Sahrens  */
919fa9e4066Sahrens static void
920fa9e4066Sahrens spa_unload(spa_t *spa)
921fa9e4066Sahrens {
92299653d4eSeschrock 	int i;
92399653d4eSeschrock 
924e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
925e14bb325SJeff Bonwick 
926ea8dc4b6Seschrock 	/*
927ea8dc4b6Seschrock 	 * Stop async tasks.
928ea8dc4b6Seschrock 	 */
929ea8dc4b6Seschrock 	spa_async_suspend(spa);
930ea8dc4b6Seschrock 
931fa9e4066Sahrens 	/*
932fa9e4066Sahrens 	 * Stop syncing.
933fa9e4066Sahrens 	 */
934fa9e4066Sahrens 	if (spa->spa_sync_on) {
935fa9e4066Sahrens 		txg_sync_stop(spa->spa_dsl_pool);
936fa9e4066Sahrens 		spa->spa_sync_on = B_FALSE;
937fa9e4066Sahrens 	}
938fa9e4066Sahrens 
939fa9e4066Sahrens 	/*
940e14bb325SJeff Bonwick 	 * Wait for any outstanding async I/O to complete.
941fa9e4066Sahrens 	 */
94254d692b7SGeorge Wilson 	if (spa->spa_async_zio_root != NULL) {
94354d692b7SGeorge Wilson 		(void) zio_wait(spa->spa_async_zio_root);
94454d692b7SGeorge Wilson 		spa->spa_async_zio_root = NULL;
94554d692b7SGeorge Wilson 	}
946fa9e4066Sahrens 
947cde58dbcSMatthew Ahrens 	bpobj_close(&spa->spa_deferred_bpobj);
948cde58dbcSMatthew Ahrens 
949fa9e4066Sahrens 	/*
950fa9e4066Sahrens 	 * Close the dsl pool.
951fa9e4066Sahrens 	 */
952fa9e4066Sahrens 	if (spa->spa_dsl_pool) {
953fa9e4066Sahrens 		dsl_pool_close(spa->spa_dsl_pool);
954fa9e4066Sahrens 		spa->spa_dsl_pool = NULL;
955afee20e4SGeorge Wilson 		spa->spa_meta_objset = NULL;
956fa9e4066Sahrens 	}
957fa9e4066Sahrens 
958b24ab676SJeff Bonwick 	ddt_unload(spa);
959b24ab676SJeff Bonwick 
9608ad4d6ddSJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
9618ad4d6ddSJeff Bonwick 
9628ad4d6ddSJeff Bonwick 	/*
9638ad4d6ddSJeff Bonwick 	 * Drop and purge level 2 cache
9648ad4d6ddSJeff Bonwick 	 */
9658ad4d6ddSJeff Bonwick 	spa_l2cache_drop(spa);
9668ad4d6ddSJeff Bonwick 
967fa9e4066Sahrens 	/*
968fa9e4066Sahrens 	 * Close all vdevs.
969fa9e4066Sahrens 	 */
9700e34b6a7Sbonwick 	if (spa->spa_root_vdev)
971fa9e4066Sahrens 		vdev_free(spa->spa_root_vdev);
9720e34b6a7Sbonwick 	ASSERT(spa->spa_root_vdev == NULL);
973ea8dc4b6Seschrock 
974fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
975fa94a07fSbrendan 		vdev_free(spa->spa_spares.sav_vdevs[i]);
976fa94a07fSbrendan 	if (spa->spa_spares.sav_vdevs) {
977fa94a07fSbrendan 		kmem_free(spa->spa_spares.sav_vdevs,
978fa94a07fSbrendan 		    spa->spa_spares.sav_count * sizeof (void *));
979fa94a07fSbrendan 		spa->spa_spares.sav_vdevs = NULL;
98099653d4eSeschrock 	}
981fa94a07fSbrendan 	if (spa->spa_spares.sav_config) {
982fa94a07fSbrendan 		nvlist_free(spa->spa_spares.sav_config);
983fa94a07fSbrendan 		spa->spa_spares.sav_config = NULL;
984fa94a07fSbrendan 	}
9852ce8af81SEric Schrock 	spa->spa_spares.sav_count = 0;
986fa94a07fSbrendan 
987fa94a07fSbrendan 	for (i = 0; i < spa->spa_l2cache.sav_count; i++)
988fa94a07fSbrendan 		vdev_free(spa->spa_l2cache.sav_vdevs[i]);
989fa94a07fSbrendan 	if (spa->spa_l2cache.sav_vdevs) {
990fa94a07fSbrendan 		kmem_free(spa->spa_l2cache.sav_vdevs,
991fa94a07fSbrendan 		    spa->spa_l2cache.sav_count * sizeof (void *));
992fa94a07fSbrendan 		spa->spa_l2cache.sav_vdevs = NULL;
993fa94a07fSbrendan 	}
994fa94a07fSbrendan 	if (spa->spa_l2cache.sav_config) {
995fa94a07fSbrendan 		nvlist_free(spa->spa_l2cache.sav_config);
996fa94a07fSbrendan 		spa->spa_l2cache.sav_config = NULL;
99799653d4eSeschrock 	}
9982ce8af81SEric Schrock 	spa->spa_l2cache.sav_count = 0;
99999653d4eSeschrock 
1000ea8dc4b6Seschrock 	spa->spa_async_suspended = 0;
10018ad4d6ddSJeff Bonwick 
10028ad4d6ddSJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
1003fa9e4066Sahrens }
1004fa9e4066Sahrens 
100599653d4eSeschrock /*
100699653d4eSeschrock  * Load (or re-load) the current list of vdevs describing the active spares for
100799653d4eSeschrock  * this pool.  When this is called, we have some form of basic information in
1008fa94a07fSbrendan  * 'spa_spares.sav_config'.  We parse this into vdevs, try to open them, and
1009fa94a07fSbrendan  * then re-generate a more complete list including status information.
101099653d4eSeschrock  */
101199653d4eSeschrock static void
101299653d4eSeschrock spa_load_spares(spa_t *spa)
101399653d4eSeschrock {
101499653d4eSeschrock 	nvlist_t **spares;
101599653d4eSeschrock 	uint_t nspares;
101699653d4eSeschrock 	int i;
101739c23413Seschrock 	vdev_t *vd, *tvd;
101899653d4eSeschrock 
1019e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
1020e14bb325SJeff Bonwick 
102199653d4eSeschrock 	/*
102299653d4eSeschrock 	 * First, close and free any existing spare vdevs.
102399653d4eSeschrock 	 */
1024fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
1025fa94a07fSbrendan 		vd = spa->spa_spares.sav_vdevs[i];
102639c23413Seschrock 
102739c23413Seschrock 		/* Undo the call to spa_activate() below */
1028c5904d13Seschrock 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
1029c5904d13Seschrock 		    B_FALSE)) != NULL && tvd->vdev_isspare)
103039c23413Seschrock 			spa_spare_remove(tvd);
103139c23413Seschrock 		vdev_close(vd);
103239c23413Seschrock 		vdev_free(vd);
103399653d4eSeschrock 	}
103439c23413Seschrock 
1035fa94a07fSbrendan 	if (spa->spa_spares.sav_vdevs)
1036fa94a07fSbrendan 		kmem_free(spa->spa_spares.sav_vdevs,
1037fa94a07fSbrendan 		    spa->spa_spares.sav_count * sizeof (void *));
103899653d4eSeschrock 
1039fa94a07fSbrendan 	if (spa->spa_spares.sav_config == NULL)
104099653d4eSeschrock 		nspares = 0;
104199653d4eSeschrock 	else
1042fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
104399653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
104499653d4eSeschrock 
1045fa94a07fSbrendan 	spa->spa_spares.sav_count = (int)nspares;
1046fa94a07fSbrendan 	spa->spa_spares.sav_vdevs = NULL;
104799653d4eSeschrock 
104899653d4eSeschrock 	if (nspares == 0)
104999653d4eSeschrock 		return;
105099653d4eSeschrock 
105199653d4eSeschrock 	/*
105299653d4eSeschrock 	 * Construct the array of vdevs, opening them to get status in the
105339c23413Seschrock 	 * process.   For each spare, there is potentially two different vdev_t
105439c23413Seschrock 	 * structures associated with it: one in the list of spares (used only
105539c23413Seschrock 	 * for basic validation purposes) and one in the active vdev
105639c23413Seschrock 	 * configuration (if it's spared in).  During this phase we open and
105739c23413Seschrock 	 * validate each vdev on the spare list.  If the vdev also exists in the
105839c23413Seschrock 	 * active configuration, then we also mark this vdev as an active spare.
105999653d4eSeschrock 	 */
1060fa94a07fSbrendan 	spa->spa_spares.sav_vdevs = kmem_alloc(nspares * sizeof (void *),
1061fa94a07fSbrendan 	    KM_SLEEP);
1062fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++) {
106399653d4eSeschrock 		VERIFY(spa_config_parse(spa, &vd, spares[i], NULL, 0,
106499653d4eSeschrock 		    VDEV_ALLOC_SPARE) == 0);
106599653d4eSeschrock 		ASSERT(vd != NULL);
106699653d4eSeschrock 
1067fa94a07fSbrendan 		spa->spa_spares.sav_vdevs[i] = vd;
106899653d4eSeschrock 
1069c5904d13Seschrock 		if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
1070c5904d13Seschrock 		    B_FALSE)) != NULL) {
107139c23413Seschrock 			if (!tvd->vdev_isspare)
107239c23413Seschrock 				spa_spare_add(tvd);
107339c23413Seschrock 
107439c23413Seschrock 			/*
107539c23413Seschrock 			 * We only mark the spare active if we were successfully
107639c23413Seschrock 			 * able to load the vdev.  Otherwise, importing a pool
107739c23413Seschrock 			 * with a bad active spare would result in strange
107839c23413Seschrock 			 * behavior, because multiple pool would think the spare
107939c23413Seschrock 			 * is actively in use.
108039c23413Seschrock 			 *
108139c23413Seschrock 			 * There is a vulnerability here to an equally bizarre
108239c23413Seschrock 			 * circumstance, where a dead active spare is later
108339c23413Seschrock 			 * brought back to life (onlined or otherwise).  Given
108439c23413Seschrock 			 * the rarity of this scenario, and the extra complexity
108539c23413Seschrock 			 * it adds, we ignore the possibility.
108639c23413Seschrock 			 */
108739c23413Seschrock 			if (!vdev_is_dead(tvd))
108839c23413Seschrock 				spa_spare_activate(tvd);
108939c23413Seschrock 		}
109039c23413Seschrock 
1091e14bb325SJeff Bonwick 		vd->vdev_top = vd;
10926809eb4eSEric Schrock 		vd->vdev_aux = &spa->spa_spares;
1093e14bb325SJeff Bonwick 
109499653d4eSeschrock 		if (vdev_open(vd) != 0)
109599653d4eSeschrock 			continue;
109699653d4eSeschrock 
1097fa94a07fSbrendan 		if (vdev_validate_aux(vd) == 0)
1098fa94a07fSbrendan 			spa_spare_add(vd);
109999653d4eSeschrock 	}
110099653d4eSeschrock 
110199653d4eSeschrock 	/*
110299653d4eSeschrock 	 * Recompute the stashed list of spares, with status information
110399653d4eSeschrock 	 * this time.
110499653d4eSeschrock 	 */
1105fa94a07fSbrendan 	VERIFY(nvlist_remove(spa->spa_spares.sav_config, ZPOOL_CONFIG_SPARES,
110699653d4eSeschrock 	    DATA_TYPE_NVLIST_ARRAY) == 0);
110799653d4eSeschrock 
1108fa94a07fSbrendan 	spares = kmem_alloc(spa->spa_spares.sav_count * sizeof (void *),
1109fa94a07fSbrendan 	    KM_SLEEP);
1110fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
1111fa94a07fSbrendan 		spares[i] = vdev_config_generate(spa,
11123f9d6ad7SLin Ling 		    spa->spa_spares.sav_vdevs[i], B_TRUE, VDEV_CONFIG_SPARE);
1113fa94a07fSbrendan 	VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
1114fa94a07fSbrendan 	    ZPOOL_CONFIG_SPARES, spares, spa->spa_spares.sav_count) == 0);
1115fa94a07fSbrendan 	for (i = 0; i < spa->spa_spares.sav_count; i++)
111699653d4eSeschrock 		nvlist_free(spares[i]);
1117fa94a07fSbrendan 	kmem_free(spares, spa->spa_spares.sav_count * sizeof (void *));
1118fa94a07fSbrendan }
1119fa94a07fSbrendan 
1120fa94a07fSbrendan /*
1121fa94a07fSbrendan  * Load (or re-load) the current list of vdevs describing the active l2cache for
1122fa94a07fSbrendan  * this pool.  When this is called, we have some form of basic information in
1123fa94a07fSbrendan  * 'spa_l2cache.sav_config'.  We parse this into vdevs, try to open them, and
1124fa94a07fSbrendan  * then re-generate a more complete list including status information.
1125fa94a07fSbrendan  * Devices which are already active have their details maintained, and are
1126fa94a07fSbrendan  * not re-opened.
1127fa94a07fSbrendan  */
1128fa94a07fSbrendan static void
1129fa94a07fSbrendan spa_load_l2cache(spa_t *spa)
1130fa94a07fSbrendan {
1131fa94a07fSbrendan 	nvlist_t **l2cache;
1132fa94a07fSbrendan 	uint_t nl2cache;
1133fa94a07fSbrendan 	int i, j, oldnvdevs;
1134573ca77eSGeorge Wilson 	uint64_t guid;
1135fa94a07fSbrendan 	vdev_t *vd, **oldvdevs, **newvdevs;
1136fa94a07fSbrendan 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
1137fa94a07fSbrendan 
1138e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
1139e14bb325SJeff Bonwick 
1140fa94a07fSbrendan 	if (sav->sav_config != NULL) {
1141fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
1142fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
1143fa94a07fSbrendan 		newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
1144fa94a07fSbrendan 	} else {
1145fa94a07fSbrendan 		nl2cache = 0;
1146fa94a07fSbrendan 	}
1147fa94a07fSbrendan 
1148fa94a07fSbrendan 	oldvdevs = sav->sav_vdevs;
1149fa94a07fSbrendan 	oldnvdevs = sav->sav_count;
1150fa94a07fSbrendan 	sav->sav_vdevs = NULL;
1151fa94a07fSbrendan 	sav->sav_count = 0;
1152fa94a07fSbrendan 
1153fa94a07fSbrendan 	/*
1154fa94a07fSbrendan 	 * Process new nvlist of vdevs.
1155fa94a07fSbrendan 	 */
1156fa94a07fSbrendan 	for (i = 0; i < nl2cache; i++) {
1157fa94a07fSbrendan 		VERIFY(nvlist_lookup_uint64(l2cache[i], ZPOOL_CONFIG_GUID,
1158fa94a07fSbrendan 		    &guid) == 0);
1159fa94a07fSbrendan 
1160fa94a07fSbrendan 		newvdevs[i] = NULL;
1161fa94a07fSbrendan 		for (j = 0; j < oldnvdevs; j++) {
1162fa94a07fSbrendan 			vd = oldvdevs[j];
1163fa94a07fSbrendan 			if (vd != NULL && guid == vd->vdev_guid) {
1164fa94a07fSbrendan 				/*
1165fa94a07fSbrendan 				 * Retain previous vdev for add/remove ops.
1166fa94a07fSbrendan 				 */
1167fa94a07fSbrendan 				newvdevs[i] = vd;
1168fa94a07fSbrendan 				oldvdevs[j] = NULL;
1169fa94a07fSbrendan 				break;
1170fa94a07fSbrendan 			}
1171fa94a07fSbrendan 		}
1172fa94a07fSbrendan 
1173fa94a07fSbrendan 		if (newvdevs[i] == NULL) {
1174fa94a07fSbrendan 			/*
1175fa94a07fSbrendan 			 * Create new vdev
1176fa94a07fSbrendan 			 */
1177fa94a07fSbrendan 			VERIFY(spa_config_parse(spa, &vd, l2cache[i], NULL, 0,
1178fa94a07fSbrendan 			    VDEV_ALLOC_L2CACHE) == 0);
1179fa94a07fSbrendan 			ASSERT(vd != NULL);
1180fa94a07fSbrendan 			newvdevs[i] = vd;
1181fa94a07fSbrendan 
1182fa94a07fSbrendan 			/*
1183fa94a07fSbrendan 			 * Commit this vdev as an l2cache device,
1184fa94a07fSbrendan 			 * even if it fails to open.
1185fa94a07fSbrendan 			 */
1186fa94a07fSbrendan 			spa_l2cache_add(vd);
1187fa94a07fSbrendan 
1188c5904d13Seschrock 			vd->vdev_top = vd;
1189c5904d13Seschrock 			vd->vdev_aux = sav;
1190c5904d13Seschrock 
1191c5904d13Seschrock 			spa_l2cache_activate(vd);
1192c5904d13Seschrock 
1193fa94a07fSbrendan 			if (vdev_open(vd) != 0)
1194fa94a07fSbrendan 				continue;
1195fa94a07fSbrendan 
1196fa94a07fSbrendan 			(void) vdev_validate_aux(vd);
1197fa94a07fSbrendan 
1198573ca77eSGeorge Wilson 			if (!vdev_is_dead(vd))
1199573ca77eSGeorge Wilson 				l2arc_add_vdev(spa, vd);
1200fa94a07fSbrendan 		}
1201fa94a07fSbrendan 	}
1202fa94a07fSbrendan 
1203fa94a07fSbrendan 	/*
1204fa94a07fSbrendan 	 * Purge vdevs that were dropped
1205fa94a07fSbrendan 	 */
1206fa94a07fSbrendan 	for (i = 0; i < oldnvdevs; i++) {
1207fa94a07fSbrendan 		uint64_t pool;
1208fa94a07fSbrendan 
1209fa94a07fSbrendan 		vd = oldvdevs[i];
1210fa94a07fSbrendan 		if (vd != NULL) {
12118ad4d6ddSJeff Bonwick 			if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
12128ad4d6ddSJeff Bonwick 			    pool != 0ULL && l2arc_vdev_present(vd))
1213fa94a07fSbrendan 				l2arc_remove_vdev(vd);
1214fa94a07fSbrendan 			(void) vdev_close(vd);
1215fa94a07fSbrendan 			spa_l2cache_remove(vd);
1216fa94a07fSbrendan 		}
1217fa94a07fSbrendan 	}
1218fa94a07fSbrendan 
1219fa94a07fSbrendan 	if (oldvdevs)
1220fa94a07fSbrendan 		kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
1221fa94a07fSbrendan 
1222fa94a07fSbrendan 	if (sav->sav_config == NULL)
1223fa94a07fSbrendan 		goto out;
1224fa94a07fSbrendan 
1225fa94a07fSbrendan 	sav->sav_vdevs = newvdevs;
1226fa94a07fSbrendan 	sav->sav_count = (int)nl2cache;
1227fa94a07fSbrendan 
1228fa94a07fSbrendan 	/*
1229fa94a07fSbrendan 	 * Recompute the stashed list of l2cache devices, with status
1230fa94a07fSbrendan 	 * information this time.
1231fa94a07fSbrendan 	 */
1232fa94a07fSbrendan 	VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
1233fa94a07fSbrendan 	    DATA_TYPE_NVLIST_ARRAY) == 0);
1234fa94a07fSbrendan 
1235fa94a07fSbrendan 	l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
1236fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++)
1237fa94a07fSbrendan 		l2cache[i] = vdev_config_generate(spa,
12383f9d6ad7SLin Ling 		    sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE);
1239fa94a07fSbrendan 	VERIFY(nvlist_add_nvlist_array(sav->sav_config,
1240fa94a07fSbrendan 	    ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
1241fa94a07fSbrendan out:
1242fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++)
1243fa94a07fSbrendan 		nvlist_free(l2cache[i]);
1244fa94a07fSbrendan 	if (sav->sav_count)
1245fa94a07fSbrendan 		kmem_free(l2cache, sav->sav_count * sizeof (void *));
124699653d4eSeschrock }
124799653d4eSeschrock 
124899653d4eSeschrock static int
124999653d4eSeschrock load_nvlist(spa_t *spa, uint64_t obj, nvlist_t **value)
125099653d4eSeschrock {
125199653d4eSeschrock 	dmu_buf_t *db;
125299653d4eSeschrock 	char *packed = NULL;
125399653d4eSeschrock 	size_t nvsize = 0;
125499653d4eSeschrock 	int error;
125599653d4eSeschrock 	*value = NULL;
125699653d4eSeschrock 
125799653d4eSeschrock 	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
125899653d4eSeschrock 	nvsize = *(uint64_t *)db->db_data;
125999653d4eSeschrock 	dmu_buf_rele(db, FTAG);
126099653d4eSeschrock 
126199653d4eSeschrock 	packed = kmem_alloc(nvsize, KM_SLEEP);
12627bfdf011SNeil Perrin 	error = dmu_read(spa->spa_meta_objset, obj, 0, nvsize, packed,
12637bfdf011SNeil Perrin 	    DMU_READ_PREFETCH);
126499653d4eSeschrock 	if (error == 0)
126599653d4eSeschrock 		error = nvlist_unpack(packed, nvsize, value, 0);
126699653d4eSeschrock 	kmem_free(packed, nvsize);
126799653d4eSeschrock 
126899653d4eSeschrock 	return (error);
126999653d4eSeschrock }
127099653d4eSeschrock 
12713d7072f8Seschrock /*
12723d7072f8Seschrock  * Checks to see if the given vdev could not be opened, in which case we post a
12733d7072f8Seschrock  * sysevent to notify the autoreplace code that the device has been removed.
12743d7072f8Seschrock  */
12753d7072f8Seschrock static void
12763d7072f8Seschrock spa_check_removed(vdev_t *vd)
12773d7072f8Seschrock {
1278573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++)
12793d7072f8Seschrock 		spa_check_removed(vd->vdev_child[c]);
12803d7072f8Seschrock 
12813d7072f8Seschrock 	if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd)) {
12823d7072f8Seschrock 		zfs_post_autoreplace(vd->vdev_spa, vd);
12833d7072f8Seschrock 		spa_event_notify(vd->vdev_spa, vd, ESC_ZFS_VDEV_CHECK);
12843d7072f8Seschrock 	}
12853d7072f8Seschrock }
12863d7072f8Seschrock 
1287e6ca193dSGeorge Wilson /*
12884b964adaSGeorge Wilson  * Validate the current config against the MOS config
1289e6ca193dSGeorge Wilson  */
12904b964adaSGeorge Wilson static boolean_t
12914b964adaSGeorge Wilson spa_config_valid(spa_t *spa, nvlist_t *config)
1292e6ca193dSGeorge Wilson {
12934b964adaSGeorge Wilson 	vdev_t *mrvd, *rvd = spa->spa_root_vdev;
12944b964adaSGeorge Wilson 	nvlist_t *nv;
12954b964adaSGeorge Wilson 
12964b964adaSGeorge Wilson 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nv) == 0);
12974b964adaSGeorge Wilson 
12984b964adaSGeorge Wilson 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
12994b964adaSGeorge Wilson 	VERIFY(spa_config_parse(spa, &mrvd, nv, NULL, 0, VDEV_ALLOC_LOAD) == 0);
13004b964adaSGeorge Wilson 
13014b964adaSGeorge Wilson 	ASSERT3U(rvd->vdev_children, ==, mrvd->vdev_children);
1302e6ca193dSGeorge Wilson 
130388ecc943SGeorge Wilson 	/*
13044b964adaSGeorge Wilson 	 * If we're doing a normal import, then build up any additional
13054b964adaSGeorge Wilson 	 * diagnostic information about missing devices in this config.
13064b964adaSGeorge Wilson 	 * We'll pass this up to the user for further processing.
130788ecc943SGeorge Wilson 	 */
13084b964adaSGeorge Wilson 	if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG)) {
13094b964adaSGeorge Wilson 		nvlist_t **child, *nv;
13104b964adaSGeorge Wilson 		uint64_t idx = 0;
13114b964adaSGeorge Wilson 
13124b964adaSGeorge Wilson 		child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **),
13134b964adaSGeorge Wilson 		    KM_SLEEP);
13144b964adaSGeorge Wilson 		VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
1315e6ca193dSGeorge Wilson 
13164b964adaSGeorge Wilson 		for (int c = 0; c < rvd->vdev_children; c++) {
13174b964adaSGeorge Wilson 			vdev_t *tvd = rvd->vdev_child[c];
13184b964adaSGeorge Wilson 			vdev_t *mtvd  = mrvd->vdev_child[c];
13194b964adaSGeorge Wilson 
13204b964adaSGeorge Wilson 			if (tvd->vdev_ops == &vdev_missing_ops &&
13214b964adaSGeorge Wilson 			    mtvd->vdev_ops != &vdev_missing_ops &&
13224b964adaSGeorge Wilson 			    mtvd->vdev_islog)
13234b964adaSGeorge Wilson 				child[idx++] = vdev_config_generate(spa, mtvd,
13244b964adaSGeorge Wilson 				    B_FALSE, 0);
13254b964adaSGeorge Wilson 		}
13264b964adaSGeorge Wilson 
13274b964adaSGeorge Wilson 		if (idx) {
13284b964adaSGeorge Wilson 			VERIFY(nvlist_add_nvlist_array(nv,
13294b964adaSGeorge Wilson 			    ZPOOL_CONFIG_CHILDREN, child, idx) == 0);
13304b964adaSGeorge Wilson 			VERIFY(nvlist_add_nvlist(spa->spa_load_info,
13314b964adaSGeorge Wilson 			    ZPOOL_CONFIG_MISSING_DEVICES, nv) == 0);
13324b964adaSGeorge Wilson 
13334b964adaSGeorge Wilson 			for (int i = 0; i < idx; i++)
13344b964adaSGeorge Wilson 				nvlist_free(child[i]);
13354b964adaSGeorge Wilson 		}
13364b964adaSGeorge Wilson 		nvlist_free(nv);
13374b964adaSGeorge Wilson 		kmem_free(child, rvd->vdev_children * sizeof (char **));
13384b964adaSGeorge Wilson 	}
13394b964adaSGeorge Wilson 
13404b964adaSGeorge Wilson 	/*
13414b964adaSGeorge Wilson 	 * Compare the root vdev tree with the information we have
13424b964adaSGeorge Wilson 	 * from the MOS config (mrvd). Check each top-level vdev
13434b964adaSGeorge Wilson 	 * with the corresponding MOS config top-level (mtvd).
13444b964adaSGeorge Wilson 	 */
134588ecc943SGeorge Wilson 	for (int c = 0; c < rvd->vdev_children; c++) {
13464b964adaSGeorge Wilson 		vdev_t *tvd = rvd->vdev_child[c];
13474b964adaSGeorge Wilson 		vdev_t *mtvd  = mrvd->vdev_child[c];
13484b964adaSGeorge Wilson 
13494b964adaSGeorge Wilson 		/*
13504b964adaSGeorge Wilson 		 * Resolve any "missing" vdevs in the current configuration.
13514b964adaSGeorge Wilson 		 * If we find that the MOS config has more accurate information
13524b964adaSGeorge Wilson 		 * about the top-level vdev then use that vdev instead.
13534b964adaSGeorge Wilson 		 */
13544b964adaSGeorge Wilson 		if (tvd->vdev_ops == &vdev_missing_ops &&
13554b964adaSGeorge Wilson 		    mtvd->vdev_ops != &vdev_missing_ops) {
13564b964adaSGeorge Wilson 
13574b964adaSGeorge Wilson 			if (!(spa->spa_import_flags & ZFS_IMPORT_MISSING_LOG))
13584b964adaSGeorge Wilson 				continue;
13594b964adaSGeorge Wilson 
13604b964adaSGeorge Wilson 			/*
13614b964adaSGeorge Wilson 			 * Device specific actions.
13624b964adaSGeorge Wilson 			 */
13634b964adaSGeorge Wilson 			if (mtvd->vdev_islog) {
13644b964adaSGeorge Wilson 				spa_set_log_state(spa, SPA_LOG_CLEAR);
13654b964adaSGeorge Wilson 			} else {
13664b964adaSGeorge Wilson 				/*
13674b964adaSGeorge Wilson 				 * XXX - once we have 'readonly' pool
13684b964adaSGeorge Wilson 				 * support we should be able to handle
13694b964adaSGeorge Wilson 				 * missing data devices by transitioning
13704b964adaSGeorge Wilson 				 * the pool to readonly.
13714b964adaSGeorge Wilson 				 */
13724b964adaSGeorge Wilson 				continue;
13734b964adaSGeorge Wilson 			}
13744b964adaSGeorge Wilson 
13754b964adaSGeorge Wilson 			/*
13764b964adaSGeorge Wilson 			 * Swap the missing vdev with the data we were
13774b964adaSGeorge Wilson 			 * able to obtain from the MOS config.
13784b964adaSGeorge Wilson 			 */
13794b964adaSGeorge Wilson 			vdev_remove_child(rvd, tvd);
13804b964adaSGeorge Wilson 			vdev_remove_child(mrvd, mtvd);
13814b964adaSGeorge Wilson 
13824b964adaSGeorge Wilson 			vdev_add_child(rvd, mtvd);
13834b964adaSGeorge Wilson 			vdev_add_child(mrvd, tvd);
13844b964adaSGeorge Wilson 
13854b964adaSGeorge Wilson 			spa_config_exit(spa, SCL_ALL, FTAG);
13864b964adaSGeorge Wilson 			vdev_load(mtvd);
13874b964adaSGeorge Wilson 			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
13884b964adaSGeorge Wilson 
13894b964adaSGeorge Wilson 			vdev_reopen(rvd);
13904b964adaSGeorge Wilson 		} else if (mtvd->vdev_islog) {
13914b964adaSGeorge Wilson 			/*
13924b964adaSGeorge Wilson 			 * Load the slog device's state from the MOS config
13934b964adaSGeorge Wilson 			 * since it's possible that the label does not
13944b964adaSGeorge Wilson 			 * contain the most up-to-date information.
13954b964adaSGeorge Wilson 			 */
13964b964adaSGeorge Wilson 			vdev_load_log_state(tvd, mtvd);
13974b964adaSGeorge Wilson 			vdev_reopen(tvd);
13984b964adaSGeorge Wilson 		}
1399e6ca193dSGeorge Wilson 	}
14004b964adaSGeorge Wilson 	vdev_free(mrvd);
140188ecc943SGeorge Wilson 	spa_config_exit(spa, SCL_ALL, FTAG);
14024b964adaSGeorge Wilson 
14034b964adaSGeorge Wilson 	/*
14044b964adaSGeorge Wilson 	 * Ensure we were able to validate the config.
14054b964adaSGeorge Wilson 	 */
14064b964adaSGeorge Wilson 	return (rvd->vdev_guid_sum == spa->spa_uberblock.ub_guid_sum);
1407e6ca193dSGeorge Wilson }
1408e6ca193dSGeorge Wilson 
1409b87f3af3Sperrin /*
1410b87f3af3Sperrin  * Check for missing log devices
1411b87f3af3Sperrin  */
14124b964adaSGeorge Wilson static int
1413b87f3af3Sperrin spa_check_logs(spa_t *spa)
1414b87f3af3Sperrin {
1415b87f3af3Sperrin 	switch (spa->spa_log_state) {
1416b87f3af3Sperrin 	case SPA_LOG_MISSING:
1417b87f3af3Sperrin 		/* need to recheck in case slog has been restored */
1418b87f3af3Sperrin 	case SPA_LOG_UNKNOWN:
1419b87f3af3Sperrin 		if (dmu_objset_find(spa->spa_name, zil_check_log_chain, NULL,
1420b87f3af3Sperrin 		    DS_FIND_CHILDREN)) {
14211195e687SMark J Musante 			spa_set_log_state(spa, SPA_LOG_MISSING);
1422b87f3af3Sperrin 			return (1);
1423b87f3af3Sperrin 		}
1424b87f3af3Sperrin 		break;
1425b87f3af3Sperrin 	}
1426b87f3af3Sperrin 	return (0);
1427b87f3af3Sperrin }
1428b87f3af3Sperrin 
14291195e687SMark J Musante static boolean_t
14301195e687SMark J Musante spa_passivate_log(spa_t *spa)
14311195e687SMark J Musante {
14321195e687SMark J Musante 	vdev_t *rvd = spa->spa_root_vdev;
14331195e687SMark J Musante 	boolean_t slog_found = B_FALSE;
14341195e687SMark J Musante 
14351195e687SMark J Musante 	ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
14361195e687SMark J Musante 
14371195e687SMark J Musante 	if (!spa_has_slogs(spa))
14381195e687SMark J Musante 		return (B_FALSE);
14391195e687SMark J Musante 
14401195e687SMark J Musante 	for (int c = 0; c < rvd->vdev_children; c++) {
14411195e687SMark J Musante 		vdev_t *tvd = rvd->vdev_child[c];
14421195e687SMark J Musante 		metaslab_group_t *mg = tvd->vdev_mg;
14431195e687SMark J Musante 
14441195e687SMark J Musante 		if (tvd->vdev_islog) {
14451195e687SMark J Musante 			metaslab_group_passivate(mg);
14461195e687SMark J Musante 			slog_found = B_TRUE;
14471195e687SMark J Musante 		}
14481195e687SMark J Musante 	}
14491195e687SMark J Musante 
14501195e687SMark J Musante 	return (slog_found);
14511195e687SMark J Musante }
14521195e687SMark J Musante 
14531195e687SMark J Musante static void
14541195e687SMark J Musante spa_activate_log(spa_t *spa)
14551195e687SMark J Musante {
14561195e687SMark J Musante 	vdev_t *rvd = spa->spa_root_vdev;
14571195e687SMark J Musante 
14581195e687SMark J Musante 	ASSERT(spa_config_held(spa, SCL_ALLOC, RW_WRITER));
14591195e687SMark J Musante 
14601195e687SMark J Musante 	for (int c = 0; c < rvd->vdev_children; c++) {
14611195e687SMark J Musante 		vdev_t *tvd = rvd->vdev_child[c];
14621195e687SMark J Musante 		metaslab_group_t *mg = tvd->vdev_mg;
14631195e687SMark J Musante 
14641195e687SMark J Musante 		if (tvd->vdev_islog)
14651195e687SMark J Musante 			metaslab_group_activate(mg);
14661195e687SMark J Musante 	}
14671195e687SMark J Musante }
14681195e687SMark J Musante 
14691195e687SMark J Musante int
14701195e687SMark J Musante spa_offline_log(spa_t *spa)
14711195e687SMark J Musante {
14721195e687SMark J Musante 	int error = 0;
14731195e687SMark J Musante 
14741195e687SMark J Musante 	if ((error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
14751195e687SMark J Musante 	    NULL, DS_FIND_CHILDREN)) == 0) {
14761195e687SMark J Musante 
14771195e687SMark J Musante 		/*
14781195e687SMark J Musante 		 * We successfully offlined the log device, sync out the
14791195e687SMark J Musante 		 * current txg so that the "stubby" block can be removed
14801195e687SMark J Musante 		 * by zil_sync().
14811195e687SMark J Musante 		 */
14821195e687SMark J Musante 		txg_wait_synced(spa->spa_dsl_pool, 0);
14831195e687SMark J Musante 	}
14841195e687SMark J Musante 	return (error);
14851195e687SMark J Musante }
14861195e687SMark J Musante 
1487b693757aSEric Schrock static void
1488b693757aSEric Schrock spa_aux_check_removed(spa_aux_vdev_t *sav)
1489b693757aSEric Schrock {
1490b24ab676SJeff Bonwick 	for (int i = 0; i < sav->sav_count; i++)
1491b693757aSEric Schrock 		spa_check_removed(sav->sav_vdevs[i]);
1492b693757aSEric Schrock }
1493b693757aSEric Schrock 
1494b24ab676SJeff Bonwick void
1495b24ab676SJeff Bonwick spa_claim_notify(zio_t *zio)
1496b24ab676SJeff Bonwick {
1497b24ab676SJeff Bonwick 	spa_t *spa = zio->io_spa;
1498b24ab676SJeff Bonwick 
1499b24ab676SJeff Bonwick 	if (zio->io_error)
1500b24ab676SJeff Bonwick 		return;
1501b24ab676SJeff Bonwick 
1502b24ab676SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);	/* any mutex will do */
1503b24ab676SJeff Bonwick 	if (spa->spa_claim_max_txg < zio->io_bp->blk_birth)
1504b24ab676SJeff Bonwick 		spa->spa_claim_max_txg = zio->io_bp->blk_birth;
1505b24ab676SJeff Bonwick 	mutex_exit(&spa->spa_props_lock);
1506b24ab676SJeff Bonwick }
1507b24ab676SJeff Bonwick 
1508468c413aSTim Haley typedef struct spa_load_error {
1509c8ee1847SVictor Latushkin 	uint64_t	sle_meta_count;
1510468c413aSTim Haley 	uint64_t	sle_data_count;
1511468c413aSTim Haley } spa_load_error_t;
1512468c413aSTim Haley 
1513468c413aSTim Haley static void
1514468c413aSTim Haley spa_load_verify_done(zio_t *zio)
1515468c413aSTim Haley {
1516468c413aSTim Haley 	blkptr_t *bp = zio->io_bp;
1517468c413aSTim Haley 	spa_load_error_t *sle = zio->io_private;
1518468c413aSTim Haley 	dmu_object_type_t type = BP_GET_TYPE(bp);
1519468c413aSTim Haley 	int error = zio->io_error;
1520468c413aSTim Haley 
1521468c413aSTim Haley 	if (error) {
1522468c413aSTim Haley 		if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
1523468c413aSTim Haley 		    type != DMU_OT_INTENT_LOG)
1524c8ee1847SVictor Latushkin 			atomic_add_64(&sle->sle_meta_count, 1);
1525468c413aSTim Haley 		else
1526468c413aSTim Haley 			atomic_add_64(&sle->sle_data_count, 1);
1527468c413aSTim Haley 	}
1528468c413aSTim Haley 	zio_data_buf_free(zio->io_data, zio->io_size);
1529468c413aSTim Haley }
1530468c413aSTim Haley 
1531468c413aSTim Haley /*ARGSUSED*/
1532468c413aSTim Haley static int
1533b24ab676SJeff Bonwick spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
15343f9d6ad7SLin Ling     arc_buf_t *pbuf, const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
1535468c413aSTim Haley {
1536468c413aSTim Haley 	if (bp != NULL) {
1537468c413aSTim Haley 		zio_t *rio = arg;
1538468c413aSTim Haley 		size_t size = BP_GET_PSIZE(bp);
1539468c413aSTim Haley 		void *data = zio_data_buf_alloc(size);
1540468c413aSTim Haley 
1541468c413aSTim Haley 		zio_nowait(zio_read(rio, spa, bp, data, size,
1542468c413aSTim Haley 		    spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
1543468c413aSTim Haley 		    ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
1544468c413aSTim Haley 		    ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
1545468c413aSTim Haley 	}
1546468c413aSTim Haley 	return (0);
1547468c413aSTim Haley }
1548468c413aSTim Haley 
1549468c413aSTim Haley static int
1550468c413aSTim Haley spa_load_verify(spa_t *spa)
1551468c413aSTim Haley {
1552468c413aSTim Haley 	zio_t *rio;
1553468c413aSTim Haley 	spa_load_error_t sle = { 0 };
1554468c413aSTim Haley 	zpool_rewind_policy_t policy;
1555468c413aSTim Haley 	boolean_t verify_ok = B_FALSE;
1556468c413aSTim Haley 	int error;
1557468c413aSTim Haley 
1558c8ee1847SVictor Latushkin 	zpool_get_rewind_policy(spa->spa_config, &policy);
1559c8ee1847SVictor Latushkin 
1560c8ee1847SVictor Latushkin 	if (policy.zrp_request & ZPOOL_NEVER_REWIND)
1561c8ee1847SVictor Latushkin 		return (0);
1562c8ee1847SVictor Latushkin 
1563468c413aSTim Haley 	rio = zio_root(spa, NULL, &sle,
1564468c413aSTim Haley 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
1565468c413aSTim Haley 
1566bbfd46c4SJeff Bonwick 	error = traverse_pool(spa, spa->spa_verify_min_txg,
1567bbfd46c4SJeff Bonwick 	    TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
1568468c413aSTim Haley 
1569468c413aSTim Haley 	(void) zio_wait(rio);
1570468c413aSTim Haley 
1571c8ee1847SVictor Latushkin 	spa->spa_load_meta_errors = sle.sle_meta_count;
1572468c413aSTim Haley 	spa->spa_load_data_errors = sle.sle_data_count;
1573468c413aSTim Haley 
1574c8ee1847SVictor Latushkin 	if (!error && sle.sle_meta_count <= policy.zrp_maxmeta &&
1575468c413aSTim Haley 	    sle.sle_data_count <= policy.zrp_maxdata) {
15764b964adaSGeorge Wilson 		int64_t loss = 0;
15774b964adaSGeorge Wilson 
1578468c413aSTim Haley 		verify_ok = B_TRUE;
1579468c413aSTim Haley 		spa->spa_load_txg = spa->spa_uberblock.ub_txg;
1580468c413aSTim Haley 		spa->spa_load_txg_ts = spa->spa_uberblock.ub_timestamp;
15814b964adaSGeorge Wilson 
15824b964adaSGeorge Wilson 		loss = spa->spa_last_ubsync_txg_ts - spa->spa_load_txg_ts;
15834b964adaSGeorge Wilson 		VERIFY(nvlist_add_uint64(spa->spa_load_info,
15844b964adaSGeorge Wilson 		    ZPOOL_CONFIG_LOAD_TIME, spa->spa_load_txg_ts) == 0);
15854b964adaSGeorge Wilson 		VERIFY(nvlist_add_int64(spa->spa_load_info,
15864b964adaSGeorge Wilson 		    ZPOOL_CONFIG_REWIND_TIME, loss) == 0);
15874b964adaSGeorge Wilson 		VERIFY(nvlist_add_uint64(spa->spa_load_info,
15884b964adaSGeorge Wilson 		    ZPOOL_CONFIG_LOAD_DATA_ERRORS, sle.sle_data_count) == 0);
1589a33cae98STim Haley 	} else {
1590a33cae98STim Haley 		spa->spa_load_max_txg = spa->spa_uberblock.ub_txg;
1591468c413aSTim Haley 	}
1592468c413aSTim Haley 
1593468c413aSTim Haley 	if (error) {
1594468c413aSTim Haley 		if (error != ENXIO && error != EIO)
1595468c413aSTim Haley 			error = EIO;
1596468c413aSTim Haley 		return (error);
1597468c413aSTim Haley 	}
1598468c413aSTim Haley 
1599468c413aSTim Haley 	return (verify_ok ? 0 : EIO);
1600468c413aSTim Haley }
1601468c413aSTim Haley 
16021195e687SMark J Musante /*
16031195e687SMark J Musante  * Find a value in the pool props object.
16041195e687SMark J Musante  */
16051195e687SMark J Musante static void
16061195e687SMark J Musante spa_prop_find(spa_t *spa, zpool_prop_t prop, uint64_t *val)
16071195e687SMark J Musante {
16081195e687SMark J Musante 	(void) zap_lookup(spa->spa_meta_objset, spa->spa_pool_props_object,
16091195e687SMark J Musante 	    zpool_prop_to_name(prop), sizeof (uint64_t), 1, val);
16101195e687SMark J Musante }
16111195e687SMark J Musante 
16121195e687SMark J Musante /*
16131195e687SMark J Musante  * Find a value in the pool directory object.
16141195e687SMark J Musante  */
16151195e687SMark J Musante static int
16161195e687SMark J Musante spa_dir_prop(spa_t *spa, const char *name, uint64_t *val)
16171195e687SMark J Musante {
16181195e687SMark J Musante 	return (zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
16191195e687SMark J Musante 	    name, sizeof (uint64_t), 1, val));
16201195e687SMark J Musante }
16211195e687SMark J Musante 
16221195e687SMark J Musante static int
16231195e687SMark J Musante spa_vdev_err(vdev_t *vdev, vdev_aux_t aux, int err)
16241195e687SMark J Musante {
16251195e687SMark J Musante 	vdev_set_state(vdev, B_TRUE, VDEV_STATE_CANT_OPEN, aux);
16261195e687SMark J Musante 	return (err);
16271195e687SMark J Musante }
16281195e687SMark J Musante 
16291195e687SMark J Musante /*
16301195e687SMark J Musante  * Fix up config after a partly-completed split.  This is done with the
16311195e687SMark J Musante  * ZPOOL_CONFIG_SPLIT nvlist.  Both the splitting pool and the split-off
16321195e687SMark J Musante  * pool have that entry in their config, but only the splitting one contains
16331195e687SMark J Musante  * a list of all the guids of the vdevs that are being split off.
16341195e687SMark J Musante  *
16351195e687SMark J Musante  * This function determines what to do with that list: either rejoin
16361195e687SMark J Musante  * all the disks to the pool, or complete the splitting process.  To attempt
16371195e687SMark J Musante  * the rejoin, each disk that is offlined is marked online again, and
16381195e687SMark J Musante  * we do a reopen() call.  If the vdev label for every disk that was
16391195e687SMark J Musante  * marked online indicates it was successfully split off (VDEV_AUX_SPLIT_POOL)
16401195e687SMark J Musante  * then we call vdev_split() on each disk, and complete the split.
16411195e687SMark J Musante  *
1642d41c4376SMark J Musante  * Otherwise we leave the config alone, with all the vdevs in place in
1643d41c4376SMark J Musante  * the original pool.
16441195e687SMark J Musante  */
16451195e687SMark J Musante static void
16461195e687SMark J Musante spa_try_repair(spa_t *spa, nvlist_t *config)
16471195e687SMark J Musante {
16481195e687SMark J Musante 	uint_t extracted;
16491195e687SMark J Musante 	uint64_t *glist;
16501195e687SMark J Musante 	uint_t i, gcount;
16511195e687SMark J Musante 	nvlist_t *nvl;
16521195e687SMark J Musante 	vdev_t **vd;
16531195e687SMark J Musante 	boolean_t attempt_reopen;
16541195e687SMark J Musante 
16551195e687SMark J Musante 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT, &nvl) != 0)
16561195e687SMark J Musante 		return;
16571195e687SMark J Musante 
16581195e687SMark J Musante 	/* check that the config is complete */
16591195e687SMark J Musante 	if (nvlist_lookup_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
16601195e687SMark J Musante 	    &glist, &gcount) != 0)
16611195e687SMark J Musante 		return;
16621195e687SMark J Musante 
16631195e687SMark J Musante 	vd = kmem_zalloc(gcount * sizeof (vdev_t *), KM_SLEEP);
16641195e687SMark J Musante 
16651195e687SMark J Musante 	/* attempt to online all the vdevs & validate */
16661195e687SMark J Musante 	attempt_reopen = B_TRUE;
16671195e687SMark J Musante 	for (i = 0; i < gcount; i++) {
16681195e687SMark J Musante 		if (glist[i] == 0)	/* vdev is hole */
16691195e687SMark J Musante 			continue;
16701195e687SMark J Musante 
16711195e687SMark J Musante 		vd[i] = spa_lookup_by_guid(spa, glist[i], B_FALSE);
16721195e687SMark J Musante 		if (vd[i] == NULL) {
16731195e687SMark J Musante 			/*
16741195e687SMark J Musante 			 * Don't bother attempting to reopen the disks;
16751195e687SMark J Musante 			 * just do the split.
16761195e687SMark J Musante 			 */
16771195e687SMark J Musante 			attempt_reopen = B_FALSE;
16781195e687SMark J Musante 		} else {
16791195e687SMark J Musante 			/* attempt to re-online it */
16801195e687SMark J Musante 			vd[i]->vdev_offline = B_FALSE;
16811195e687SMark J Musante 		}
16821195e687SMark J Musante 	}
16831195e687SMark J Musante 
16841195e687SMark J Musante 	if (attempt_reopen) {
16851195e687SMark J Musante 		vdev_reopen(spa->spa_root_vdev);
16861195e687SMark J Musante 
16871195e687SMark J Musante 		/* check each device to see what state it's in */
16881195e687SMark J Musante 		for (extracted = 0, i = 0; i < gcount; i++) {
16891195e687SMark J Musante 			if (vd[i] != NULL &&
16901195e687SMark J Musante 			    vd[i]->vdev_stat.vs_aux != VDEV_AUX_SPLIT_POOL)
16911195e687SMark J Musante 				break;
16921195e687SMark J Musante 			++extracted;
16931195e687SMark J Musante 		}
16941195e687SMark J Musante 	}
16951195e687SMark J Musante 
16961195e687SMark J Musante 	/*
16971195e687SMark J Musante 	 * If every disk has been moved to the new pool, or if we never
16981195e687SMark J Musante 	 * even attempted to look at them, then we split them off for
16991195e687SMark J Musante 	 * good.
17001195e687SMark J Musante 	 */
17011195e687SMark J Musante 	if (!attempt_reopen || gcount == extracted) {
17021195e687SMark J Musante 		for (i = 0; i < gcount; i++)
17031195e687SMark J Musante 			if (vd[i] != NULL)
17041195e687SMark J Musante 				vdev_split(vd[i]);
17051195e687SMark J Musante 		vdev_reopen(spa->spa_root_vdev);
17061195e687SMark J Musante 	}
17071195e687SMark J Musante 
17081195e687SMark J Musante 	kmem_free(vd, gcount * sizeof (vdev_t *));
17091195e687SMark J Musante }
17101195e687SMark J Musante 
17111195e687SMark J Musante static int
17121195e687SMark J Musante spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
17131195e687SMark J Musante     boolean_t mosconfig)
17141195e687SMark J Musante {
17151195e687SMark J Musante 	nvlist_t *config = spa->spa_config;
17161195e687SMark J Musante 	char *ereport = FM_EREPORT_ZFS_POOL;
17171195e687SMark J Musante 	int error;
17181195e687SMark J Musante 	uint64_t pool_guid;
17191195e687SMark J Musante 	nvlist_t *nvl;
17201195e687SMark J Musante 
17211195e687SMark J Musante 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid))
17221195e687SMark J Musante 		return (EINVAL);
17231195e687SMark J Musante 
17241195e687SMark J Musante 	/*
17251195e687SMark J Musante 	 * Versioning wasn't explicitly added to the label until later, so if
17261195e687SMark J Musante 	 * it's not present treat it as the initial version.
17271195e687SMark J Musante 	 */
17281195e687SMark J Musante 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
17291195e687SMark J Musante 	    &spa->spa_ubsync.ub_version) != 0)
17301195e687SMark J Musante 		spa->spa_ubsync.ub_version = SPA_VERSION_INITIAL;
17311195e687SMark J Musante 
17321195e687SMark J Musante 	(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG,
17331195e687SMark J Musante 	    &spa->spa_config_txg);
17341195e687SMark J Musante 
17351195e687SMark J Musante 	if ((state == SPA_LOAD_IMPORT || state == SPA_LOAD_TRYIMPORT) &&
17361195e687SMark J Musante 	    spa_guid_exists(pool_guid, 0)) {
17371195e687SMark J Musante 		error = EEXIST;
17381195e687SMark J Musante 	} else {
17391195e687SMark J Musante 		spa->spa_load_guid = pool_guid;
17401195e687SMark J Musante 
17411195e687SMark J Musante 		if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
17421195e687SMark J Musante 		    &nvl) == 0) {
17431195e687SMark J Musante 			VERIFY(nvlist_dup(nvl, &spa->spa_config_splitting,
17441195e687SMark J Musante 			    KM_SLEEP) == 0);
17451195e687SMark J Musante 		}
17461195e687SMark J Musante 
174711027bc7STim Haley 		gethrestime(&spa->spa_loaded_ts);
17481195e687SMark J Musante 		error = spa_load_impl(spa, pool_guid, config, state, type,
17491195e687SMark J Musante 		    mosconfig, &ereport);
17501195e687SMark J Musante 	}
17511195e687SMark J Musante 
17521195e687SMark J Musante 	spa->spa_minref = refcount_count(&spa->spa_refcount);
175311027bc7STim Haley 	if (error) {
175411027bc7STim Haley 		if (error != EEXIST) {
175511027bc7STim Haley 			spa->spa_loaded_ts.tv_sec = 0;
175611027bc7STim Haley 			spa->spa_loaded_ts.tv_nsec = 0;
175711027bc7STim Haley 		}
175811027bc7STim Haley 		if (error != EBADF) {
175911027bc7STim Haley 			zfs_ereport_post(ereport, spa, NULL, NULL, 0, 0);
176011027bc7STim Haley 		}
176111027bc7STim Haley 	}
17621195e687SMark J Musante 	spa->spa_load_state = error ? SPA_LOAD_ERROR : SPA_LOAD_NONE;
17631195e687SMark J Musante 	spa->spa_ena = 0;
17641195e687SMark J Musante 
17651195e687SMark J Musante 	return (error);
17661195e687SMark J Musante }
17671195e687SMark J Musante 
1768fa9e4066Sahrens /*
1769fa9e4066Sahrens  * Load an existing storage pool, using the pool's builtin spa_config as a
1770ea8dc4b6Seschrock  * source of configuration information.
1771fa9e4066Sahrens  */
1772fa9e4066Sahrens static int
17731195e687SMark J Musante spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
17741195e687SMark J Musante     spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
17751195e687SMark J Musante     char **ereport)
1776fa9e4066Sahrens {
1777fa9e4066Sahrens 	int error = 0;
1778871a9500SMark J Musante 	nvlist_t *nvroot = NULL;
1779fa9e4066Sahrens 	vdev_t *rvd;
1780fa9e4066Sahrens 	uberblock_t *ub = &spa->spa_uberblock;
17814b964adaSGeorge Wilson 	uint64_t children, config_cache_txg = spa->spa_config_txg;
17828ad4d6ddSJeff Bonwick 	int orig_mode = spa->spa_mode;
17831195e687SMark J Musante 	int parse;
1784cde58dbcSMatthew Ahrens 	uint64_t obj;
1785fa9e4066Sahrens 
17868ad4d6ddSJeff Bonwick 	/*
17878ad4d6ddSJeff Bonwick 	 * If this is an untrusted config, access the pool in read-only mode.
17888ad4d6ddSJeff Bonwick 	 * This prevents things like resilvering recently removed devices.
17898ad4d6ddSJeff Bonwick 	 */
17908ad4d6ddSJeff Bonwick 	if (!mosconfig)
17918ad4d6ddSJeff Bonwick 		spa->spa_mode = FREAD;
17928ad4d6ddSJeff Bonwick 
1793e14bb325SJeff Bonwick 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
1794e14bb325SJeff Bonwick 
1795ea8dc4b6Seschrock 	spa->spa_load_state = state;
17960373e76bSbonwick 
17971195e687SMark J Musante 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot))
17981195e687SMark J Musante 		return (EINVAL);
1799fa9e4066Sahrens 
18001195e687SMark J Musante 	parse = (type == SPA_IMPORT_EXISTING ?
18011195e687SMark J Musante 	    VDEV_ALLOC_LOAD : VDEV_ALLOC_SPLIT);
1802b5989ec7Seschrock 
180354d692b7SGeorge Wilson 	/*
180454d692b7SGeorge Wilson 	 * Create "The Godfather" zio to hold all async IOs
180554d692b7SGeorge Wilson 	 */
180625f89ee2SJeff Bonwick 	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
180725f89ee2SJeff Bonwick 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
180854d692b7SGeorge Wilson 
1809fa9e4066Sahrens 	/*
181099653d4eSeschrock 	 * Parse the configuration into a vdev tree.  We explicitly set the
181199653d4eSeschrock 	 * value that will be returned by spa_version() since parsing the
181299653d4eSeschrock 	 * configuration requires knowing the version number.
1813fa9e4066Sahrens 	 */
1814e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
18151195e687SMark J Musante 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, parse);
1816e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
1817fa9e4066Sahrens 
181899653d4eSeschrock 	if (error != 0)
18191195e687SMark J Musante 		return (error);
1820fa9e4066Sahrens 
18210e34b6a7Sbonwick 	ASSERT(spa->spa_root_vdev == rvd);
18221195e687SMark J Musante 
18231195e687SMark J Musante 	if (type != SPA_IMPORT_ASSEMBLE) {
18241195e687SMark J Musante 		ASSERT(spa_guid(spa) == pool_guid);
18251195e687SMark J Musante 	}
1826fa9e4066Sahrens 
1827fa9e4066Sahrens 	/*
1828fa9e4066Sahrens 	 * Try to open all vdevs, loading each label in the process.
1829fa9e4066Sahrens 	 */
1830e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
18310bf246f5Smc 	error = vdev_open(rvd);
1832e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
18330bf246f5Smc 	if (error != 0)
18341195e687SMark J Musante 		return (error);
1835fa9e4066Sahrens 
1836560e6e96Seschrock 	/*
183777e3a39cSMark J Musante 	 * We need to validate the vdev labels against the configuration that
183877e3a39cSMark J Musante 	 * we have in hand, which is dependent on the setting of mosconfig. If
183977e3a39cSMark J Musante 	 * mosconfig is true then we're validating the vdev labels based on
18401195e687SMark J Musante 	 * that config.  Otherwise, we're validating against the cached config
184177e3a39cSMark J Musante 	 * (zpool.cache) that was read when we loaded the zfs module, and then
184277e3a39cSMark J Musante 	 * later we will recursively call spa_load() and validate against
184377e3a39cSMark J Musante 	 * the vdev config.
18441195e687SMark J Musante 	 *
18451195e687SMark J Musante 	 * If we're assembling a new pool that's been split off from an
18461195e687SMark J Musante 	 * existing pool, the labels haven't yet been updated so we skip
18471195e687SMark J Musante 	 * validation for now.
1848560e6e96Seschrock 	 */
18491195e687SMark J Musante 	if (type != SPA_IMPORT_ASSEMBLE) {
18501195e687SMark J Musante 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
18511195e687SMark J Musante 		error = vdev_validate(rvd);
18521195e687SMark J Musante 		spa_config_exit(spa, SCL_ALL, FTAG);
1853560e6e96Seschrock 
18541195e687SMark J Musante 		if (error != 0)
18551195e687SMark J Musante 			return (error);
18561195e687SMark J Musante 
18571195e687SMark J Musante 		if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
18581195e687SMark J Musante 			return (ENXIO);
1859560e6e96Seschrock 	}
1860560e6e96Seschrock 
1861fa9e4066Sahrens 	/*
1862fa9e4066Sahrens 	 * Find the best uberblock.
1863fa9e4066Sahrens 	 */
1864e14bb325SJeff Bonwick 	vdev_uberblock_load(NULL, rvd, ub);
1865fa9e4066Sahrens 
1866fa9e4066Sahrens 	/*
1867fa9e4066Sahrens 	 * If we weren't able to find a single valid uberblock, return failure.
1868fa9e4066Sahrens 	 */
18691195e687SMark J Musante 	if (ub->ub_txg == 0)
18701195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
1871ea8dc4b6Seschrock 
1872ea8dc4b6Seschrock 	/*
1873ea8dc4b6Seschrock 	 * If the pool is newer than the code, we can't open it.
1874ea8dc4b6Seschrock 	 */
18751195e687SMark J Musante 	if (ub->ub_version > SPA_VERSION)
18761195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
1877fa9e4066Sahrens 
1878fa9e4066Sahrens 	/*
1879fa9e4066Sahrens 	 * If the vdev guid sum doesn't match the uberblock, we have an
18804b964adaSGeorge Wilson 	 * incomplete configuration.  We first check to see if the pool
18814b964adaSGeorge Wilson 	 * is aware of the complete config (i.e ZPOOL_CONFIG_VDEV_CHILDREN).
18824b964adaSGeorge Wilson 	 * If it is, defer the vdev_guid_sum check till later so we
18834b964adaSGeorge Wilson 	 * can handle missing vdevs.
1884fa9e4066Sahrens 	 */
18854b964adaSGeorge Wilson 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_VDEV_CHILDREN,
18864b964adaSGeorge Wilson 	    &children) != 0 && mosconfig && type != SPA_IMPORT_ASSEMBLE &&
18871195e687SMark J Musante 	    rvd->vdev_guid_sum != ub->ub_guid_sum)
18881195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM, ENXIO));
18891195e687SMark J Musante 
18901195e687SMark J Musante 	if (type != SPA_IMPORT_ASSEMBLE && spa->spa_config_splitting) {
18911195e687SMark J Musante 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
18921195e687SMark J Musante 		spa_try_repair(spa, config);
18931195e687SMark J Musante 		spa_config_exit(spa, SCL_ALL, FTAG);
18941195e687SMark J Musante 		nvlist_free(spa->spa_config_splitting);
18951195e687SMark J Musante 		spa->spa_config_splitting = NULL;
1896fa9e4066Sahrens 	}
1897fa9e4066Sahrens 
1898fa9e4066Sahrens 	/*
1899fa9e4066Sahrens 	 * Initialize internal SPA structures.
1900fa9e4066Sahrens 	 */
1901fa9e4066Sahrens 	spa->spa_state = POOL_STATE_ACTIVE;
1902fa9e4066Sahrens 	spa->spa_ubsync = spa->spa_uberblock;
1903468c413aSTim Haley 	spa->spa_verify_min_txg = spa->spa_extreme_rewind ?
1904c8ee1847SVictor Latushkin 	    TXG_INITIAL - 1 : spa_last_synced_txg(spa) - TXG_DEFER_SIZE - 1;
1905468c413aSTim Haley 	spa->spa_first_txg = spa->spa_last_ubsync_txg ?
1906468c413aSTim Haley 	    spa->spa_last_ubsync_txg : spa_last_synced_txg(spa) + 1;
1907b24ab676SJeff Bonwick 	spa->spa_claim_max_txg = spa->spa_first_txg;
19083f9d6ad7SLin Ling 	spa->spa_prev_software_version = ub->ub_software_version;
1909b24ab676SJeff Bonwick 
1910ea8dc4b6Seschrock 	error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
19111195e687SMark J Musante 	if (error)
19121195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1913fa9e4066Sahrens 	spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
1914fa9e4066Sahrens 
19151195e687SMark J Musante 	if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
19161195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1917fa9e4066Sahrens 
1918fa9e4066Sahrens 	if (!mosconfig) {
191995173954Sek 		uint64_t hostid;
1920871a9500SMark J Musante 		nvlist_t *policy = NULL, *nvconfig;
1921871a9500SMark J Musante 
1922871a9500SMark J Musante 		if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
1923871a9500SMark J Musante 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1924fa9e4066Sahrens 
192588ecc943SGeorge Wilson 		if (!spa_is_root(spa) && nvlist_lookup_uint64(nvconfig,
192677650510SLin Ling 		    ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
192795173954Sek 			char *hostname;
192895173954Sek 			unsigned long myhostid = 0;
192995173954Sek 
193088ecc943SGeorge Wilson 			VERIFY(nvlist_lookup_string(nvconfig,
193195173954Sek 			    ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
193295173954Sek 
19335679c89fSjv #ifdef	_KERNEL
19345679c89fSjv 			myhostid = zone_get_hostid(NULL);
19355679c89fSjv #else	/* _KERNEL */
19365679c89fSjv 			/*
19375679c89fSjv 			 * We're emulating the system's hostid in userland, so
19385679c89fSjv 			 * we can't use zone_get_hostid().
19395679c89fSjv 			 */
194095173954Sek 			(void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
19415679c89fSjv #endif	/* _KERNEL */
194217194a52Slling 			if (hostid != 0 && myhostid != 0 &&
19435679c89fSjv 			    hostid != myhostid) {
1944871a9500SMark J Musante 				nvlist_free(nvconfig);
194595173954Sek 				cmn_err(CE_WARN, "pool '%s' could not be "
194695173954Sek 				    "loaded as it was last accessed by "
194777650510SLin Ling 				    "another system (host: %s hostid: 0x%lx). "
194895173954Sek 				    "See: http://www.sun.com/msg/ZFS-8000-EY",
1949e14bb325SJeff Bonwick 				    spa_name(spa), hostname,
195095173954Sek 				    (unsigned long)hostid);
19511195e687SMark J Musante 				return (EBADF);
195295173954Sek 			}
195395173954Sek 		}
1954c8ee1847SVictor Latushkin 		if (nvlist_lookup_nvlist(spa->spa_config,
1955c8ee1847SVictor Latushkin 		    ZPOOL_REWIND_POLICY, &policy) == 0)
1956c8ee1847SVictor Latushkin 			VERIFY(nvlist_add_nvlist(nvconfig,
1957c8ee1847SVictor Latushkin 			    ZPOOL_REWIND_POLICY, policy) == 0);
195895173954Sek 
195988ecc943SGeorge Wilson 		spa_config_set(spa, nvconfig);
1960fa9e4066Sahrens 		spa_unload(spa);
1961fa9e4066Sahrens 		spa_deactivate(spa);
19628ad4d6ddSJeff Bonwick 		spa_activate(spa, orig_mode);
1963fa9e4066Sahrens 
19641195e687SMark J Musante 		return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE));
1965fa9e4066Sahrens 	}
1966fa9e4066Sahrens 
1967cde58dbcSMatthew Ahrens 	if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0)
1968cde58dbcSMatthew Ahrens 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1969cde58dbcSMatthew Ahrens 	error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj);
1970cde58dbcSMatthew Ahrens 	if (error != 0)
19711195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1972fa9e4066Sahrens 
197399653d4eSeschrock 	/*
197499653d4eSeschrock 	 * Load the bit that tells us to use the new accounting function
197599653d4eSeschrock 	 * (raid-z deflation).  If we have an older pool, this will not
197699653d4eSeschrock 	 * be present.
197799653d4eSeschrock 	 */
19781195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_DEFLATE, &spa->spa_deflate);
19791195e687SMark J Musante 	if (error != 0 && error != ENOENT)
19801195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
198199653d4eSeschrock 
19823f9d6ad7SLin Ling 	error = spa_dir_prop(spa, DMU_POOL_CREATION_VERSION,
19833f9d6ad7SLin Ling 	    &spa->spa_creation_version);
19843f9d6ad7SLin Ling 	if (error != 0 && error != ENOENT)
19853f9d6ad7SLin Ling 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
19863f9d6ad7SLin Ling 
1987fa9e4066Sahrens 	/*
1988ea8dc4b6Seschrock 	 * Load the persistent error log.  If we have an older pool, this will
1989ea8dc4b6Seschrock 	 * not be present.
1990fa9e4066Sahrens 	 */
19911195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_ERRLOG_LAST, &spa->spa_errlog_last);
19921195e687SMark J Musante 	if (error != 0 && error != ENOENT)
19931195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1994ea8dc4b6Seschrock 
19951195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_ERRLOG_SCRUB,
19961195e687SMark J Musante 	    &spa->spa_errlog_scrub);
19971195e687SMark J Musante 	if (error != 0 && error != ENOENT)
19981195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
1999ea8dc4b6Seschrock 
200006eeb2adSek 	/*
200106eeb2adSek 	 * Load the history object.  If we have an older pool, this
200206eeb2adSek 	 * will not be present.
200306eeb2adSek 	 */
20041195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_HISTORY, &spa->spa_history);
20051195e687SMark J Musante 	if (error != 0 && error != ENOENT)
20061195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
20071195e687SMark J Musante 
20081195e687SMark J Musante 	/*
20091195e687SMark J Musante 	 * If we're assembling the pool from the split-off vdevs of
20101195e687SMark J Musante 	 * an existing pool, we don't want to attach the spares & cache
20111195e687SMark J Musante 	 * devices.
20121195e687SMark J Musante 	 */
201306eeb2adSek 
201499653d4eSeschrock 	/*
201599653d4eSeschrock 	 * Load any hot spares for this pool.
201699653d4eSeschrock 	 */
20171195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_SPARES, &spa->spa_spares.sav_object);
20181195e687SMark J Musante 	if (error != 0 && error != ENOENT)
20191195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
20201195e687SMark J Musante 	if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
2021e7437265Sahrens 		ASSERT(spa_version(spa) >= SPA_VERSION_SPARES);
2022fa94a07fSbrendan 		if (load_nvlist(spa, spa->spa_spares.sav_object,
20231195e687SMark J Musante 		    &spa->spa_spares.sav_config) != 0)
20241195e687SMark J Musante 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
202599653d4eSeschrock 
2026e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
202799653d4eSeschrock 		spa_load_spares(spa);
2028e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
20291195e687SMark J Musante 	} else if (error == 0) {
20301195e687SMark J Musante 		spa->spa_spares.sav_sync = B_TRUE;
203199653d4eSeschrock 	}
203299653d4eSeschrock 
2033fa94a07fSbrendan 	/*
2034fa94a07fSbrendan 	 * Load any level 2 ARC devices for this pool.
2035fa94a07fSbrendan 	 */
20361195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_L2CACHE,
2037fa94a07fSbrendan 	    &spa->spa_l2cache.sav_object);
20381195e687SMark J Musante 	if (error != 0 && error != ENOENT)
20391195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
20401195e687SMark J Musante 	if (error == 0 && type != SPA_IMPORT_ASSEMBLE) {
2041fa94a07fSbrendan 		ASSERT(spa_version(spa) >= SPA_VERSION_L2CACHE);
2042fa94a07fSbrendan 		if (load_nvlist(spa, spa->spa_l2cache.sav_object,
20431195e687SMark J Musante 		    &spa->spa_l2cache.sav_config) != 0)
20441195e687SMark J Musante 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
2045fa94a07fSbrendan 
2046e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
2047fa94a07fSbrendan 		spa_load_l2cache(spa);
2048e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
20491195e687SMark J Musante 	} else if (error == 0) {
20501195e687SMark J Musante 		spa->spa_l2cache.sav_sync = B_TRUE;
2051fa94a07fSbrendan 	}
2052fa94a07fSbrendan 
2053990b4856Slling 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
2054ecd6cf80Smarks 
20551195e687SMark J Musante 	error = spa_dir_prop(spa, DMU_POOL_PROPS, &spa->spa_pool_props_object);
20561195e687SMark J Musante 	if (error && error != ENOENT)
20571195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
2058b1b8ab34Slling 
2059b1b8ab34Slling 	if (error == 0) {
20601195e687SMark J Musante 		uint64_t autoreplace;
20611195e687SMark J Musante 
20621195e687SMark J Musante 		spa_prop_find(spa, ZPOOL_PROP_BOOTFS, &spa->spa_bootfs);
20631195e687SMark J Musante 		spa_prop_find(spa, ZPOOL_PROP_AUTOREPLACE, &autoreplace);
20641195e687SMark J Musante 		spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
20651195e687SMark J Musante 		spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
20661195e687SMark J Musante 		spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
20671195e687SMark J Musante 		spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
20681195e687SMark J Musante 		    &spa->spa_dedup_ditto);
20691195e687SMark J Musante 
2070b693757aSEric Schrock 		spa->spa_autoreplace = (autoreplace != 0);
2071b1b8ab34Slling 	}
2072b1b8ab34Slling 
20733d7072f8Seschrock 	/*
20743d7072f8Seschrock 	 * If the 'autoreplace' property is set, then post a resource notifying
20753d7072f8Seschrock 	 * the ZFS DE that it should not issue any faults for unopenable
20763d7072f8Seschrock 	 * devices.  We also iterate over the vdevs, and post a sysevent for any
20773d7072f8Seschrock 	 * unopenable vdevs so that the normal autoreplace handler can take
20783d7072f8Seschrock 	 * over.
20793d7072f8Seschrock 	 */
2080b693757aSEric Schrock 	if (spa->spa_autoreplace && state != SPA_LOAD_TRYIMPORT) {
20813d7072f8Seschrock 		spa_check_removed(spa->spa_root_vdev);
2082b693757aSEric Schrock 		/*
2083b693757aSEric Schrock 		 * For the import case, this is done in spa_import(), because
2084b693757aSEric Schrock 		 * at this point we're using the spare definitions from
2085b693757aSEric Schrock 		 * the MOS config, not necessarily from the userland config.
2086b693757aSEric Schrock 		 */
2087b693757aSEric Schrock 		if (state != SPA_LOAD_IMPORT) {
2088b693757aSEric Schrock 			spa_aux_check_removed(&spa->spa_spares);
2089b693757aSEric Schrock 			spa_aux_check_removed(&spa->spa_l2cache);
2090b693757aSEric Schrock 		}
2091b693757aSEric Schrock 	}
20923d7072f8Seschrock 
2093ea8dc4b6Seschrock 	/*
2094560e6e96Seschrock 	 * Load the vdev state for all toplevel vdevs.
2095ea8dc4b6Seschrock 	 */
2096560e6e96Seschrock 	vdev_load(rvd);
20970373e76bSbonwick 
2098fa9e4066Sahrens 	/*
2099fa9e4066Sahrens 	 * Propagate the leaf DTLs we just loaded all the way up the tree.
2100fa9e4066Sahrens 	 */
2101e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
2102fa9e4066Sahrens 	vdev_dtl_reassess(rvd, 0, 0, B_FALSE);
2103e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
2104fa9e4066Sahrens 
2105b24ab676SJeff Bonwick 	/*
2106b24ab676SJeff Bonwick 	 * Load the DDTs (dedup tables).
2107b24ab676SJeff Bonwick 	 */
2108b24ab676SJeff Bonwick 	error = ddt_load(spa);
21091195e687SMark J Musante 	if (error != 0)
21101195e687SMark J Musante 		return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
2111b24ab676SJeff Bonwick 
2112485bbbf5SGeorge Wilson 	spa_update_dspace(spa);
2113485bbbf5SGeorge Wilson 
2114b24ab676SJeff Bonwick 	/*
21154b964adaSGeorge Wilson 	 * Validate the config, using the MOS config to fill in any
21164b964adaSGeorge Wilson 	 * information which might be missing.  If we fail to validate
21174b964adaSGeorge Wilson 	 * the config then declare the pool unfit for use. If we're
21184b964adaSGeorge Wilson 	 * assembling a pool from a split, the log is not transferred
21194b964adaSGeorge Wilson 	 * over.
2120b24ab676SJeff Bonwick 	 */
21211195e687SMark J Musante 	if (type != SPA_IMPORT_ASSEMBLE) {
2122871a9500SMark J Musante 		nvlist_t *nvconfig;
2123871a9500SMark J Musante 
2124871a9500SMark J Musante 		if (load_nvlist(spa, spa->spa_config_object, &nvconfig) != 0)
2125871a9500SMark J Musante 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
2126871a9500SMark J Musante 
21274b964adaSGeorge Wilson 		if (!spa_config_valid(spa, nvconfig)) {
21284b964adaSGeorge Wilson 			nvlist_free(nvconfig);
21294b964adaSGeorge Wilson 			return (spa_vdev_err(rvd, VDEV_AUX_BAD_GUID_SUM,
21304b964adaSGeorge Wilson 			    ENXIO));
21314b964adaSGeorge Wilson 		}
21321195e687SMark J Musante 		nvlist_free(nvconfig);
21331195e687SMark J Musante 
21344b964adaSGeorge Wilson 		/*
21354b964adaSGeorge Wilson 		 * Now that we've validate the config, check the state of the
21364b964adaSGeorge Wilson 		 * root vdev.  If it can't be opened, it indicates one or
21374b964adaSGeorge Wilson 		 * more toplevel vdevs are faulted.
21384b964adaSGeorge Wilson 		 */
21394b964adaSGeorge Wilson 		if (rvd->vdev_state <= VDEV_STATE_CANT_OPEN)
21404b964adaSGeorge Wilson 			return (ENXIO);
21414b964adaSGeorge Wilson 
21421195e687SMark J Musante 		if (spa_check_logs(spa)) {
21431195e687SMark J Musante 			*ereport = FM_EREPORT_ZFS_LOG_REPLAY;
21441195e687SMark J Musante 			return (spa_vdev_err(rvd, VDEV_AUX_BAD_LOG, ENXIO));
21451195e687SMark J Musante 		}
2146b24ab676SJeff Bonwick 	}
2147b24ab676SJeff Bonwick 
21484b964adaSGeorge Wilson 	/*
21494b964adaSGeorge Wilson 	 * We've successfully opened the pool, verify that we're ready
21504b964adaSGeorge Wilson 	 * to start pushing transactions.
21514b964adaSGeorge Wilson 	 */
21524b964adaSGeorge Wilson 	if (state != SPA_LOAD_TRYIMPORT) {
21534b964adaSGeorge Wilson 		if (error = spa_load_verify(spa))
21544b964adaSGeorge Wilson 			return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
21554b964adaSGeorge Wilson 			    error));
21564b964adaSGeorge Wilson 	}
21574b964adaSGeorge Wilson 
2158468c413aSTim Haley 	if (spa_writeable(spa) && (state == SPA_LOAD_RECOVER ||
2159468c413aSTim Haley 	    spa->spa_load_max_txg == UINT64_MAX)) {
21605dabedeeSbonwick 		dmu_tx_t *tx;
21610373e76bSbonwick 		int need_update = B_FALSE;
21628ad4d6ddSJeff Bonwick 
21638ad4d6ddSJeff Bonwick 		ASSERT(state != SPA_LOAD_TRYIMPORT);
21645dabedeeSbonwick 
21650373e76bSbonwick 		/*
21660373e76bSbonwick 		 * Claim log blocks that haven't been committed yet.
21670373e76bSbonwick 		 * This must all happen in a single txg.
2168b24ab676SJeff Bonwick 		 * Note: spa_claim_max_txg is updated by spa_claim_notify(),
2169b24ab676SJeff Bonwick 		 * invoked from zil_claim_log_block()'s i/o done callback.
2170468c413aSTim Haley 		 * Price of rollback is that we abandon the log.
21710373e76bSbonwick 		 */
2172b24ab676SJeff Bonwick 		spa->spa_claiming = B_TRUE;
2173b24ab676SJeff Bonwick 
21745dabedeeSbonwick 		tx = dmu_tx_create_assigned(spa_get_dsl(spa),
2175fa9e4066Sahrens 		    spa_first_txg(spa));
2176e14bb325SJeff Bonwick 		(void) dmu_objset_find(spa_name(spa),
21770b69c2f0Sahrens 		    zil_claim, tx, DS_FIND_CHILDREN);
2178fa9e4066Sahrens 		dmu_tx_commit(tx);
2179fa9e4066Sahrens 
2180b24ab676SJeff Bonwick 		spa->spa_claiming = B_FALSE;
2181b24ab676SJeff Bonwick 
21821195e687SMark J Musante 		spa_set_log_state(spa, SPA_LOG_GOOD);
2183fa9e4066Sahrens 		spa->spa_sync_on = B_TRUE;
2184fa9e4066Sahrens 		txg_sync_start(spa->spa_dsl_pool);
2185fa9e4066Sahrens 
2186fa9e4066Sahrens 		/*
2187b24ab676SJeff Bonwick 		 * Wait for all claims to sync.  We sync up to the highest
2188b24ab676SJeff Bonwick 		 * claimed log block birth time so that claimed log blocks
2189b24ab676SJeff Bonwick 		 * don't appear to be from the future.  spa_claim_max_txg
2190b24ab676SJeff Bonwick 		 * will have been set for us by either zil_check_log_chain()
2191b24ab676SJeff Bonwick 		 * (invoked from spa_check_logs()) or zil_claim() above.
2192fa9e4066Sahrens 		 */
2193b24ab676SJeff Bonwick 		txg_wait_synced(spa->spa_dsl_pool, spa->spa_claim_max_txg);
21940e34b6a7Sbonwick 
21950e34b6a7Sbonwick 		/*
21960373e76bSbonwick 		 * If the config cache is stale, or we have uninitialized
21970373e76bSbonwick 		 * metaslabs (see spa_vdev_add()), then update the config.
2198bc758434SLin Ling 		 *
21994b964adaSGeorge Wilson 		 * If this is a verbatim import, trust the current
2200bc758434SLin Ling 		 * in-core spa_config and update the disk labels.
22010e34b6a7Sbonwick 		 */
22020373e76bSbonwick 		if (config_cache_txg != spa->spa_config_txg ||
22034b964adaSGeorge Wilson 		    state == SPA_LOAD_IMPORT ||
22044b964adaSGeorge Wilson 		    state == SPA_LOAD_RECOVER ||
22054b964adaSGeorge Wilson 		    (spa->spa_import_flags & ZFS_IMPORT_VERBATIM))
22060373e76bSbonwick 			need_update = B_TRUE;
22070373e76bSbonwick 
22088ad4d6ddSJeff Bonwick 		for (int c = 0; c < rvd->vdev_children; c++)
22090373e76bSbonwick 			if (rvd->vdev_child[c]->vdev_ms_array == 0)
22100373e76bSbonwick 				need_update = B_TRUE;
22110e34b6a7Sbonwick 
22120e34b6a7Sbonwick 		/*
22130373e76bSbonwick 		 * Update the config cache asychronously in case we're the
22140373e76bSbonwick 		 * root pool, in which case the config cache isn't writable yet.
22150e34b6a7Sbonwick 		 */
22160373e76bSbonwick 		if (need_update)
22170373e76bSbonwick 			spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
22188ad4d6ddSJeff Bonwick 
22198ad4d6ddSJeff Bonwick 		/*
22208ad4d6ddSJeff Bonwick 		 * Check all DTLs to see if anything needs resilvering.
22218ad4d6ddSJeff Bonwick 		 */
22223f9d6ad7SLin Ling 		if (!dsl_scan_resilvering(spa->spa_dsl_pool) &&
22233f9d6ad7SLin Ling 		    vdev_resilver_needed(rvd, NULL, NULL))
22248ad4d6ddSJeff Bonwick 			spa_async_request(spa, SPA_ASYNC_RESILVER);
2225503ad85cSMatthew Ahrens 
2226503ad85cSMatthew Ahrens 		/*
2227503ad85cSMatthew Ahrens 		 * Delete any inconsistent datasets.
2228503ad85cSMatthew Ahrens 		 */
2229503ad85cSMatthew Ahrens 		(void) dmu_objset_find(spa_name(spa),
2230503ad85cSMatthew Ahrens 		    dsl_destroy_inconsistent, NULL, DS_FIND_CHILDREN);
2231ca45db41SChris Kirby 
2232ca45db41SChris Kirby 		/*
2233ca45db41SChris Kirby 		 * Clean up any stale temporary dataset userrefs.
2234ca45db41SChris Kirby 		 */
2235ca45db41SChris Kirby 		dsl_pool_clean_tmp_userrefs(spa->spa_dsl_pool);
2236fa9e4066Sahrens 	}
2237fa9e4066Sahrens 
22381195e687SMark J Musante 	return (0);
2239fa9e4066Sahrens }
2240fa9e4066Sahrens 
2241468c413aSTim Haley static int
2242468c413aSTim Haley spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
2243468c413aSTim Haley {
2244468c413aSTim Haley 	spa_unload(spa);
2245468c413aSTim Haley 	spa_deactivate(spa);
2246468c413aSTim Haley 
2247468c413aSTim Haley 	spa->spa_load_max_txg--;
2248468c413aSTim Haley 
2249468c413aSTim Haley 	spa_activate(spa, spa_mode_global);
2250468c413aSTim Haley 	spa_async_suspend(spa);
2251468c413aSTim Haley 
22521195e687SMark J Musante 	return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
2253468c413aSTim Haley }
2254468c413aSTim Haley 
2255468c413aSTim Haley static int
2256468c413aSTim Haley spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
2257c8ee1847SVictor Latushkin     uint64_t max_request, int rewind_flags)
2258468c413aSTim Haley {
2259468c413aSTim Haley 	nvlist_t *config = NULL;
2260468c413aSTim Haley 	int load_error, rewind_error;
2261c8ee1847SVictor Latushkin 	uint64_t safe_rewind_txg;
2262468c413aSTim Haley 	uint64_t min_txg;
2263468c413aSTim Haley 
2264a33cae98STim Haley 	if (spa->spa_load_txg && state == SPA_LOAD_RECOVER) {
2265468c413aSTim Haley 		spa->spa_load_max_txg = spa->spa_load_txg;
22661195e687SMark J Musante 		spa_set_log_state(spa, SPA_LOG_CLEAR);
2267a33cae98STim Haley 	} else {
2268468c413aSTim Haley 		spa->spa_load_max_txg = max_request;
2269a33cae98STim Haley 	}
2270468c413aSTim Haley 
22711195e687SMark J Musante 	load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
22721195e687SMark J Musante 	    mosconfig);
2273468c413aSTim Haley 	if (load_error == 0)
2274468c413aSTim Haley 		return (0);
2275468c413aSTim Haley 
2276468c413aSTim Haley 	if (spa->spa_root_vdev != NULL)
2277468c413aSTim Haley 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
2278468c413aSTim Haley 
2279468c413aSTim Haley 	spa->spa_last_ubsync_txg = spa->spa_uberblock.ub_txg;
2280468c413aSTim Haley 	spa->spa_last_ubsync_txg_ts = spa->spa_uberblock.ub_timestamp;
2281468c413aSTim Haley 
2282c8ee1847SVictor Latushkin 	if (rewind_flags & ZPOOL_NEVER_REWIND) {
2283468c413aSTim Haley 		nvlist_free(config);
2284468c413aSTim Haley 		return (load_error);
2285468c413aSTim Haley 	}
2286468c413aSTim Haley 
2287468c413aSTim Haley 	/* Price of rolling back is discarding txgs, including log */
2288468c413aSTim Haley 	if (state == SPA_LOAD_RECOVER)
22891195e687SMark J Musante 		spa_set_log_state(spa, SPA_LOG_CLEAR);
2290468c413aSTim Haley 
2291c8ee1847SVictor Latushkin 	spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
2292c8ee1847SVictor Latushkin 	safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
2293c8ee1847SVictor Latushkin 	min_txg = (rewind_flags & ZPOOL_EXTREME_REWIND) ?
2294c8ee1847SVictor Latushkin 	    TXG_INITIAL : safe_rewind_txg;
2295468c413aSTim Haley 
2296c8ee1847SVictor Latushkin 	/*
2297c8ee1847SVictor Latushkin 	 * Continue as long as we're finding errors, we're still within
2298c8ee1847SVictor Latushkin 	 * the acceptable rewind range, and we're still finding uberblocks
2299c8ee1847SVictor Latushkin 	 */
2300c8ee1847SVictor Latushkin 	while (rewind_error && spa->spa_uberblock.ub_txg >= min_txg &&
2301c8ee1847SVictor Latushkin 	    spa->spa_uberblock.ub_txg <= spa->spa_load_max_txg) {
2302c8ee1847SVictor Latushkin 		if (spa->spa_load_max_txg < safe_rewind_txg)
2303468c413aSTim Haley 			spa->spa_extreme_rewind = B_TRUE;
2304468c413aSTim Haley 		rewind_error = spa_load_retry(spa, state, mosconfig);
2305468c413aSTim Haley 	}
2306468c413aSTim Haley 
2307468c413aSTim Haley 	spa->spa_extreme_rewind = B_FALSE;
2308468c413aSTim Haley 	spa->spa_load_max_txg = UINT64_MAX;
2309468c413aSTim Haley 
2310468c413aSTim Haley 	if (config && (rewind_error || state != SPA_LOAD_RECOVER))
2311468c413aSTim Haley 		spa_config_set(spa, config);
2312468c413aSTim Haley 
2313468c413aSTim Haley 	return (state == SPA_LOAD_RECOVER ? rewind_error : load_error);
2314468c413aSTim Haley }
2315468c413aSTim Haley 
2316fa9e4066Sahrens /*
2317fa9e4066Sahrens  * Pool Open/Import
2318fa9e4066Sahrens  *
2319fa9e4066Sahrens  * The import case is identical to an open except that the configuration is sent
2320fa9e4066Sahrens  * down from userland, instead of grabbed from the configuration cache.  For the
2321fa9e4066Sahrens  * case of an open, the pool configuration will exist in the
23223d7072f8Seschrock  * POOL_STATE_UNINITIALIZED state.
2323fa9e4066Sahrens  *
2324fa9e4066Sahrens  * The stats information (gen/count/ustats) is used to gather vdev statistics at
2325fa9e4066Sahrens  * the same time open the pool, without having to keep around the spa_t in some
2326fa9e4066Sahrens  * ambiguous state.
2327fa9e4066Sahrens  */
2328fa9e4066Sahrens static int
2329468c413aSTim Haley spa_open_common(const char *pool, spa_t **spapp, void *tag, nvlist_t *nvpolicy,
2330468c413aSTim Haley     nvlist_t **config)
2331fa9e4066Sahrens {
2332fa9e4066Sahrens 	spa_t *spa;
23334b964adaSGeorge Wilson 	spa_load_state_t state = SPA_LOAD_OPEN;
2334fa9e4066Sahrens 	int error;
2335fa9e4066Sahrens 	int locked = B_FALSE;
2336fa9e4066Sahrens 
2337fa9e4066Sahrens 	*spapp = NULL;
2338fa9e4066Sahrens 
2339fa9e4066Sahrens 	/*
2340fa9e4066Sahrens 	 * As disgusting as this is, we need to support recursive calls to this
2341fa9e4066Sahrens 	 * function because dsl_dir_open() is called during spa_load(), and ends
2342fa9e4066Sahrens 	 * up calling spa_open() again.  The real fix is to figure out how to
2343fa9e4066Sahrens 	 * avoid dsl_dir_open() calling this in the first place.
2344fa9e4066Sahrens 	 */
2345fa9e4066Sahrens 	if (mutex_owner(&spa_namespace_lock) != curthread) {
2346fa9e4066Sahrens 		mutex_enter(&spa_namespace_lock);
2347fa9e4066Sahrens 		locked = B_TRUE;
2348fa9e4066Sahrens 	}
2349fa9e4066Sahrens 
2350fa9e4066Sahrens 	if ((spa = spa_lookup(pool)) == NULL) {
2351fa9e4066Sahrens 		if (locked)
2352fa9e4066Sahrens 			mutex_exit(&spa_namespace_lock);
2353fa9e4066Sahrens 		return (ENOENT);
2354fa9e4066Sahrens 	}
2355468c413aSTim Haley 
2356fa9e4066Sahrens 	if (spa->spa_state == POOL_STATE_UNINITIALIZED) {
23574b44c88cSTim Haley 		zpool_rewind_policy_t policy;
23584b44c88cSTim Haley 
23594b44c88cSTim Haley 		zpool_get_rewind_policy(nvpolicy ? nvpolicy : spa->spa_config,
23604b44c88cSTim Haley 		    &policy);
23614b44c88cSTim Haley 		if (policy.zrp_request & ZPOOL_DO_REWIND)
23624b44c88cSTim Haley 			state = SPA_LOAD_RECOVER;
2363fa9e4066Sahrens 
23648ad4d6ddSJeff Bonwick 		spa_activate(spa, spa_mode_global);
2365fa9e4066Sahrens 
2366468c413aSTim Haley 		if (state != SPA_LOAD_RECOVER)
2367468c413aSTim Haley 			spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
2368468c413aSTim Haley 
2369468c413aSTim Haley 		error = spa_load_best(spa, state, B_FALSE, policy.zrp_txg,
2370c8ee1847SVictor Latushkin 		    policy.zrp_request);
2371fa9e4066Sahrens 
2372fa9e4066Sahrens 		if (error == EBADF) {
2373fa9e4066Sahrens 			/*
2374560e6e96Seschrock 			 * If vdev_validate() returns failure (indicated by
2375560e6e96Seschrock 			 * EBADF), it indicates that one of the vdevs indicates
2376560e6e96Seschrock 			 * that the pool has been exported or destroyed.  If
2377560e6e96Seschrock 			 * this is the case, the config cache is out of sync and
2378560e6e96Seschrock 			 * we should remove the pool from the namespace.
2379fa9e4066Sahrens 			 */
2380fa9e4066Sahrens 			spa_unload(spa);
2381fa9e4066Sahrens 			spa_deactivate(spa);
2382c5904d13Seschrock 			spa_config_sync(spa, B_TRUE, B_TRUE);
2383fa9e4066Sahrens 			spa_remove(spa);
2384fa9e4066Sahrens 			if (locked)
2385fa9e4066Sahrens 				mutex_exit(&spa_namespace_lock);
2386fa9e4066Sahrens 			return (ENOENT);
2387ea8dc4b6Seschrock 		}
2388ea8dc4b6Seschrock 
2389ea8dc4b6Seschrock 		if (error) {
2390fa9e4066Sahrens 			/*
2391fa9e4066Sahrens 			 * We can't open the pool, but we still have useful
2392fa9e4066Sahrens 			 * information: the state of each vdev after the
2393fa9e4066Sahrens 			 * attempted vdev_open().  Return this to the user.
2394fa9e4066Sahrens 			 */
23954b964adaSGeorge Wilson 			if (config != NULL && spa->spa_config) {
2396468c413aSTim Haley 				VERIFY(nvlist_dup(spa->spa_config, config,
2397468c413aSTim Haley 				    KM_SLEEP) == 0);
23984b964adaSGeorge Wilson 				VERIFY(nvlist_add_nvlist(*config,
23994b964adaSGeorge Wilson 				    ZPOOL_CONFIG_LOAD_INFO,
24004b964adaSGeorge Wilson 				    spa->spa_load_info) == 0);
24014b964adaSGeorge Wilson 			}
2402fa9e4066Sahrens 			spa_unload(spa);
2403fa9e4066Sahrens 			spa_deactivate(spa);
2404468c413aSTim Haley 			spa->spa_last_open_failed = error;
2405fa9e4066Sahrens 			if (locked)
2406fa9e4066Sahrens 				mutex_exit(&spa_namespace_lock);
2407fa9e4066Sahrens 			*spapp = NULL;
2408fa9e4066Sahrens 			return (error);
2409fa9e4066Sahrens 		}
2410fa9e4066Sahrens 	}
2411fa9e4066Sahrens 
2412fa9e4066Sahrens 	spa_open_ref(spa, tag);
24133d7072f8Seschrock 
2414468c413aSTim Haley 	if (config != NULL)
2415468c413aSTim Haley 		*config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
2416468c413aSTim Haley 
24174b964adaSGeorge Wilson 	/*
24184b964adaSGeorge Wilson 	 * If we've recovered the pool, pass back any information we
24194b964adaSGeorge Wilson 	 * gathered while doing the load.
24204b964adaSGeorge Wilson 	 */
24214b964adaSGeorge Wilson 	if (state == SPA_LOAD_RECOVER) {
24224b964adaSGeorge Wilson 		VERIFY(nvlist_add_nvlist(*config, ZPOOL_CONFIG_LOAD_INFO,
24234b964adaSGeorge Wilson 		    spa->spa_load_info) == 0);
24244b964adaSGeorge Wilson 	}
24254b964adaSGeorge Wilson 
2426a33cae98STim Haley 	if (locked) {
2427a33cae98STim Haley 		spa->spa_last_open_failed = 0;
2428a33cae98STim Haley 		spa->spa_last_ubsync_txg = 0;
2429a33cae98STim Haley 		spa->spa_load_txg = 0;
2430fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
2431a33cae98STim Haley 	}
2432fa9e4066Sahrens 
2433fa9e4066Sahrens 	*spapp = spa;
2434fa9e4066Sahrens 
2435fa9e4066Sahrens 	return (0);
2436fa9e4066Sahrens }
2437fa9e4066Sahrens 
2438468c413aSTim Haley int
2439468c413aSTim Haley spa_open_rewind(const char *name, spa_t **spapp, void *tag, nvlist_t *policy,
2440468c413aSTim Haley     nvlist_t **config)
2441468c413aSTim Haley {
2442468c413aSTim Haley 	return (spa_open_common(name, spapp, tag, policy, config));
2443468c413aSTim Haley }
2444468c413aSTim Haley 
2445fa9e4066Sahrens int
2446fa9e4066Sahrens spa_open(const char *name, spa_t **spapp, void *tag)
2447fa9e4066Sahrens {
2448468c413aSTim Haley 	return (spa_open_common(name, spapp, tag, NULL, NULL));
2449fa9e4066Sahrens }
2450fa9e4066Sahrens 
2451ea8dc4b6Seschrock /*
2452ea8dc4b6Seschrock  * Lookup the given spa_t, incrementing the inject count in the process,
2453ea8dc4b6Seschrock  * preventing it from being exported or destroyed.
2454ea8dc4b6Seschrock  */
2455ea8dc4b6Seschrock spa_t *
2456ea8dc4b6Seschrock spa_inject_addref(char *name)
2457ea8dc4b6Seschrock {
2458ea8dc4b6Seschrock 	spa_t *spa;
2459ea8dc4b6Seschrock 
2460ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
2461ea8dc4b6Seschrock 	if ((spa = spa_lookup(name)) == NULL) {
2462ea8dc4b6Seschrock 		mutex_exit(&spa_namespace_lock);
2463ea8dc4b6Seschrock 		return (NULL);
2464ea8dc4b6Seschrock 	}
2465ea8dc4b6Seschrock 	spa->spa_inject_ref++;
2466ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
2467ea8dc4b6Seschrock 
2468ea8dc4b6Seschrock 	return (spa);
2469ea8dc4b6Seschrock }
2470ea8dc4b6Seschrock 
2471ea8dc4b6Seschrock void
2472ea8dc4b6Seschrock spa_inject_delref(spa_t *spa)
2473ea8dc4b6Seschrock {
2474ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
2475ea8dc4b6Seschrock 	spa->spa_inject_ref--;
2476ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
2477ea8dc4b6Seschrock }
2478ea8dc4b6Seschrock 
2479fa94a07fSbrendan /*
2480fa94a07fSbrendan  * Add spares device information to the nvlist.
2481fa94a07fSbrendan  */
248299653d4eSeschrock static void
248399653d4eSeschrock spa_add_spares(spa_t *spa, nvlist_t *config)
248499653d4eSeschrock {
248599653d4eSeschrock 	nvlist_t **spares;
248699653d4eSeschrock 	uint_t i, nspares;
248799653d4eSeschrock 	nvlist_t *nvroot;
248899653d4eSeschrock 	uint64_t guid;
248999653d4eSeschrock 	vdev_stat_t *vs;
249099653d4eSeschrock 	uint_t vsc;
249139c23413Seschrock 	uint64_t pool;
249299653d4eSeschrock 
24936809eb4eSEric Schrock 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
24946809eb4eSEric Schrock 
2495fa94a07fSbrendan 	if (spa->spa_spares.sav_count == 0)
249699653d4eSeschrock 		return;
249799653d4eSeschrock 
249899653d4eSeschrock 	VERIFY(nvlist_lookup_nvlist(config,
249999653d4eSeschrock 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2500fa94a07fSbrendan 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
250199653d4eSeschrock 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
250299653d4eSeschrock 	if (nspares != 0) {
250399653d4eSeschrock 		VERIFY(nvlist_add_nvlist_array(nvroot,
250499653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
250599653d4eSeschrock 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
250699653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0);
250799653d4eSeschrock 
250899653d4eSeschrock 		/*
250999653d4eSeschrock 		 * Go through and find any spares which have since been
251099653d4eSeschrock 		 * repurposed as an active spare.  If this is the case, update
251199653d4eSeschrock 		 * their status appropriately.
251299653d4eSeschrock 		 */
251399653d4eSeschrock 		for (i = 0; i < nspares; i++) {
251499653d4eSeschrock 			VERIFY(nvlist_lookup_uint64(spares[i],
251599653d4eSeschrock 			    ZPOOL_CONFIG_GUID, &guid) == 0);
251689a89ebfSlling 			if (spa_spare_exists(guid, &pool, NULL) &&
251789a89ebfSlling 			    pool != 0ULL) {
251899653d4eSeschrock 				VERIFY(nvlist_lookup_uint64_array(
25193f9d6ad7SLin Ling 				    spares[i], ZPOOL_CONFIG_VDEV_STATS,
252099653d4eSeschrock 				    (uint64_t **)&vs, &vsc) == 0);
252199653d4eSeschrock 				vs->vs_state = VDEV_STATE_CANT_OPEN;
252299653d4eSeschrock 				vs->vs_aux = VDEV_AUX_SPARED;
252399653d4eSeschrock 			}
252499653d4eSeschrock 		}
252599653d4eSeschrock 	}
252699653d4eSeschrock }
252799653d4eSeschrock 
2528fa94a07fSbrendan /*
2529fa94a07fSbrendan  * Add l2cache device information to the nvlist, including vdev stats.
2530fa94a07fSbrendan  */
2531fa94a07fSbrendan static void
2532fa94a07fSbrendan spa_add_l2cache(spa_t *spa, nvlist_t *config)
2533fa94a07fSbrendan {
2534fa94a07fSbrendan 	nvlist_t **l2cache;
2535fa94a07fSbrendan 	uint_t i, j, nl2cache;
2536fa94a07fSbrendan 	nvlist_t *nvroot;
2537fa94a07fSbrendan 	uint64_t guid;
2538fa94a07fSbrendan 	vdev_t *vd;
2539fa94a07fSbrendan 	vdev_stat_t *vs;
2540fa94a07fSbrendan 	uint_t vsc;
2541fa94a07fSbrendan 
25426809eb4eSEric Schrock 	ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
25436809eb4eSEric Schrock 
2544fa94a07fSbrendan 	if (spa->spa_l2cache.sav_count == 0)
2545fa94a07fSbrendan 		return;
2546fa94a07fSbrendan 
2547fa94a07fSbrendan 	VERIFY(nvlist_lookup_nvlist(config,
2548fa94a07fSbrendan 	    ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
2549fa94a07fSbrendan 	VERIFY(nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
2550fa94a07fSbrendan 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
2551fa94a07fSbrendan 	if (nl2cache != 0) {
2552fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(nvroot,
2553fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
2554fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(nvroot,
2555fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
2556fa94a07fSbrendan 
2557fa94a07fSbrendan 		/*
2558fa94a07fSbrendan 		 * Update level 2 cache device stats.
2559fa94a07fSbrendan 		 */
2560fa94a07fSbrendan 
2561fa94a07fSbrendan 		for (i = 0; i < nl2cache; i++) {
2562fa94a07fSbrendan 			VERIFY(nvlist_lookup_uint64(l2cache[i],
2563fa94a07fSbrendan 			    ZPOOL_CONFIG_GUID, &guid) == 0);
2564fa94a07fSbrendan 
2565fa94a07fSbrendan 			vd = NULL;
2566fa94a07fSbrendan 			for (j = 0; j < spa->spa_l2cache.sav_count; j++) {
2567fa94a07fSbrendan 				if (guid ==
2568fa94a07fSbrendan 				    spa->spa_l2cache.sav_vdevs[j]->vdev_guid) {
2569fa94a07fSbrendan 					vd = spa->spa_l2cache.sav_vdevs[j];
2570fa94a07fSbrendan 					break;
2571fa94a07fSbrendan 				}
2572fa94a07fSbrendan 			}
2573fa94a07fSbrendan 			ASSERT(vd != NULL);
2574fa94a07fSbrendan 
2575fa94a07fSbrendan 			VERIFY(nvlist_lookup_uint64_array(l2cache[i],
25763f9d6ad7SLin Ling 			    ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
25773f9d6ad7SLin Ling 			    == 0);
2578fa94a07fSbrendan 			vdev_get_stats(vd, vs);
2579fa94a07fSbrendan 		}
2580fa94a07fSbrendan 	}
2581fa94a07fSbrendan }
2582fa94a07fSbrendan 
2583fa9e4066Sahrens int
2584ea8dc4b6Seschrock spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
2585fa9e4066Sahrens {
2586fa9e4066Sahrens 	int error;
2587fa9e4066Sahrens 	spa_t *spa;
2588fa9e4066Sahrens 
2589fa9e4066Sahrens 	*config = NULL;
2590468c413aSTim Haley 	error = spa_open_common(name, &spa, FTAG, NULL, config);
2591fa9e4066Sahrens 
25926809eb4eSEric Schrock 	if (spa != NULL) {
25936809eb4eSEric Schrock 		/*
25946809eb4eSEric Schrock 		 * This still leaves a window of inconsistency where the spares
25956809eb4eSEric Schrock 		 * or l2cache devices could change and the config would be
25966809eb4eSEric Schrock 		 * self-inconsistent.
25976809eb4eSEric Schrock 		 */
25986809eb4eSEric Schrock 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
2599ea8dc4b6Seschrock 
26006809eb4eSEric Schrock 		if (*config != NULL) {
260111027bc7STim Haley 			uint64_t loadtimes[2];
260211027bc7STim Haley 
260311027bc7STim Haley 			loadtimes[0] = spa->spa_loaded_ts.tv_sec;
260411027bc7STim Haley 			loadtimes[1] = spa->spa_loaded_ts.tv_nsec;
260511027bc7STim Haley 			VERIFY(nvlist_add_uint64_array(*config,
260611027bc7STim Haley 			    ZPOOL_CONFIG_LOADED_TIME, loadtimes, 2) == 0);
260711027bc7STim Haley 
2608e14bb325SJeff Bonwick 			VERIFY(nvlist_add_uint64(*config,
26096809eb4eSEric Schrock 			    ZPOOL_CONFIG_ERRCOUNT,
26106809eb4eSEric Schrock 			    spa_get_errlog_size(spa)) == 0);
2611e14bb325SJeff Bonwick 
26126809eb4eSEric Schrock 			if (spa_suspended(spa))
26136809eb4eSEric Schrock 				VERIFY(nvlist_add_uint64(*config,
26146809eb4eSEric Schrock 				    ZPOOL_CONFIG_SUSPENDED,
26156809eb4eSEric Schrock 				    spa->spa_failmode) == 0);
26166809eb4eSEric Schrock 
26176809eb4eSEric Schrock 			spa_add_spares(spa, *config);
26186809eb4eSEric Schrock 			spa_add_l2cache(spa, *config);
26196809eb4eSEric Schrock 		}
262099653d4eSeschrock 	}
262199653d4eSeschrock 
2622ea8dc4b6Seschrock 	/*
2623ea8dc4b6Seschrock 	 * We want to get the alternate root even for faulted pools, so we cheat
2624ea8dc4b6Seschrock 	 * and call spa_lookup() directly.
2625ea8dc4b6Seschrock 	 */
2626ea8dc4b6Seschrock 	if (altroot) {
2627ea8dc4b6Seschrock 		if (spa == NULL) {
2628ea8dc4b6Seschrock 			mutex_enter(&spa_namespace_lock);
2629ea8dc4b6Seschrock 			spa = spa_lookup(name);
2630ea8dc4b6Seschrock 			if (spa)
2631ea8dc4b6Seschrock 				spa_altroot(spa, altroot, buflen);
2632ea8dc4b6Seschrock 			else
2633ea8dc4b6Seschrock 				altroot[0] = '\0';
2634ea8dc4b6Seschrock 			spa = NULL;
2635ea8dc4b6Seschrock 			mutex_exit(&spa_namespace_lock);
2636ea8dc4b6Seschrock 		} else {
2637ea8dc4b6Seschrock 			spa_altroot(spa, altroot, buflen);
2638ea8dc4b6Seschrock 		}
2639ea8dc4b6Seschrock 	}
2640ea8dc4b6Seschrock 
26416809eb4eSEric Schrock 	if (spa != NULL) {
26426809eb4eSEric Schrock 		spa_config_exit(spa, SCL_CONFIG, FTAG);
2643fa9e4066Sahrens 		spa_close(spa, FTAG);
26446809eb4eSEric Schrock 	}
2645fa9e4066Sahrens 
2646fa9e4066Sahrens 	return (error);
2647fa9e4066Sahrens }
2648fa9e4066Sahrens 
264999653d4eSeschrock /*
2650fa94a07fSbrendan  * Validate that the auxiliary device array is well formed.  We must have an
2651fa94a07fSbrendan  * array of nvlists, each which describes a valid leaf vdev.  If this is an
2652fa94a07fSbrendan  * import (mode is VDEV_ALLOC_SPARE), then we allow corrupted spares to be
2653fa94a07fSbrendan  * specified, as long as they are well-formed.
265499653d4eSeschrock  */
265599653d4eSeschrock static int
2656fa94a07fSbrendan spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
2657fa94a07fSbrendan     spa_aux_vdev_t *sav, const char *config, uint64_t version,
2658fa94a07fSbrendan     vdev_labeltype_t label)
265999653d4eSeschrock {
2660fa94a07fSbrendan 	nvlist_t **dev;
2661fa94a07fSbrendan 	uint_t i, ndev;
266299653d4eSeschrock 	vdev_t *vd;
266399653d4eSeschrock 	int error;
266499653d4eSeschrock 
2665e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
2666e14bb325SJeff Bonwick 
266799653d4eSeschrock 	/*
2668fa94a07fSbrendan 	 * It's acceptable to have no devs specified.
266999653d4eSeschrock 	 */
2670fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, config, &dev, &ndev) != 0)
267199653d4eSeschrock 		return (0);
267299653d4eSeschrock 
2673fa94a07fSbrendan 	if (ndev == 0)
267499653d4eSeschrock 		return (EINVAL);
267599653d4eSeschrock 
267699653d4eSeschrock 	/*
2677fa94a07fSbrendan 	 * Make sure the pool is formatted with a version that supports this
2678fa94a07fSbrendan 	 * device type.
267999653d4eSeschrock 	 */
2680fa94a07fSbrendan 	if (spa_version(spa) < version)
268199653d4eSeschrock 		return (ENOTSUP);
268299653d4eSeschrock 
268339c23413Seschrock 	/*
2684fa94a07fSbrendan 	 * Set the pending device list so we correctly handle device in-use
268539c23413Seschrock 	 * checking.
268639c23413Seschrock 	 */
2687fa94a07fSbrendan 	sav->sav_pending = dev;
2688fa94a07fSbrendan 	sav->sav_npending = ndev;
268939c23413Seschrock 
2690fa94a07fSbrendan 	for (i = 0; i < ndev; i++) {
2691fa94a07fSbrendan 		if ((error = spa_config_parse(spa, &vd, dev[i], NULL, 0,
269299653d4eSeschrock 		    mode)) != 0)
269339c23413Seschrock 			goto out;
269499653d4eSeschrock 
269599653d4eSeschrock 		if (!vd->vdev_ops->vdev_op_leaf) {
269699653d4eSeschrock 			vdev_free(vd);
269739c23413Seschrock 			error = EINVAL;
269839c23413Seschrock 			goto out;
269999653d4eSeschrock 		}
270099653d4eSeschrock 
2701fa94a07fSbrendan 		/*
2702e14bb325SJeff Bonwick 		 * The L2ARC currently only supports disk devices in
2703e14bb325SJeff Bonwick 		 * kernel context.  For user-level testing, we allow it.
2704fa94a07fSbrendan 		 */
2705e14bb325SJeff Bonwick #ifdef _KERNEL
2706fa94a07fSbrendan 		if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
2707fa94a07fSbrendan 		    strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
2708fa94a07fSbrendan 			error = ENOTBLK;
2709fa94a07fSbrendan 			goto out;
2710fa94a07fSbrendan 		}
2711e14bb325SJeff Bonwick #endif
271299653d4eSeschrock 		vd->vdev_top = vd;
271399653d4eSeschrock 
271439c23413Seschrock 		if ((error = vdev_open(vd)) == 0 &&
2715fa94a07fSbrendan 		    (error = vdev_label_init(vd, crtxg, label)) == 0) {
2716fa94a07fSbrendan 			VERIFY(nvlist_add_uint64(dev[i], ZPOOL_CONFIG_GUID,
271739c23413Seschrock 			    vd->vdev_guid) == 0);
271839c23413Seschrock 		}
271999653d4eSeschrock 
272099653d4eSeschrock 		vdev_free(vd);
272139c23413Seschrock 
2722fa94a07fSbrendan 		if (error &&
2723fa94a07fSbrendan 		    (mode != VDEV_ALLOC_SPARE && mode != VDEV_ALLOC_L2CACHE))
272439c23413Seschrock 			goto out;
272539c23413Seschrock 		else
272639c23413Seschrock 			error = 0;
272799653d4eSeschrock 	}
272899653d4eSeschrock 
272939c23413Seschrock out:
2730fa94a07fSbrendan 	sav->sav_pending = NULL;
2731fa94a07fSbrendan 	sav->sav_npending = 0;
273239c23413Seschrock 	return (error);
273399653d4eSeschrock }
273499653d4eSeschrock 
2735fa94a07fSbrendan static int
2736fa94a07fSbrendan spa_validate_aux(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode)
2737fa94a07fSbrendan {
2738fa94a07fSbrendan 	int error;
2739fa94a07fSbrendan 
2740e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
2741e14bb325SJeff Bonwick 
2742fa94a07fSbrendan 	if ((error = spa_validate_aux_devs(spa, nvroot, crtxg, mode,
2743fa94a07fSbrendan 	    &spa->spa_spares, ZPOOL_CONFIG_SPARES, SPA_VERSION_SPARES,
2744fa94a07fSbrendan 	    VDEV_LABEL_SPARE)) != 0) {
2745fa94a07fSbrendan 		return (error);
2746fa94a07fSbrendan 	}
2747fa94a07fSbrendan 
2748fa94a07fSbrendan 	return (spa_validate_aux_devs(spa, nvroot, crtxg, mode,
2749fa94a07fSbrendan 	    &spa->spa_l2cache, ZPOOL_CONFIG_L2CACHE, SPA_VERSION_L2CACHE,
2750fa94a07fSbrendan 	    VDEV_LABEL_L2CACHE));
2751fa94a07fSbrendan }
2752fa94a07fSbrendan 
2753fa94a07fSbrendan static void
2754fa94a07fSbrendan spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
2755fa94a07fSbrendan     const char *config)
2756fa94a07fSbrendan {
2757fa94a07fSbrendan 	int i;
2758fa94a07fSbrendan 
2759fa94a07fSbrendan 	if (sav->sav_config != NULL) {
2760fa94a07fSbrendan 		nvlist_t **olddevs;
2761fa94a07fSbrendan 		uint_t oldndevs;
2762fa94a07fSbrendan 		nvlist_t **newdevs;
2763fa94a07fSbrendan 
2764fa94a07fSbrendan 		/*
2765fa94a07fSbrendan 		 * Generate new dev list by concatentating with the
2766fa94a07fSbrendan 		 * current dev list.
2767fa94a07fSbrendan 		 */
2768fa94a07fSbrendan 		VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
2769fa94a07fSbrendan 		    &olddevs, &oldndevs) == 0);
2770fa94a07fSbrendan 
2771fa94a07fSbrendan 		newdevs = kmem_alloc(sizeof (void *) *
2772fa94a07fSbrendan 		    (ndevs + oldndevs), KM_SLEEP);
2773fa94a07fSbrendan 		for (i = 0; i < oldndevs; i++)
2774fa94a07fSbrendan 			VERIFY(nvlist_dup(olddevs[i], &newdevs[i],
2775fa94a07fSbrendan 			    KM_SLEEP) == 0);
2776fa94a07fSbrendan 		for (i = 0; i < ndevs; i++)
2777fa94a07fSbrendan 			VERIFY(nvlist_dup(devs[i], &newdevs[i + oldndevs],
2778fa94a07fSbrendan 			    KM_SLEEP) == 0);
2779fa94a07fSbrendan 
2780fa94a07fSbrendan 		VERIFY(nvlist_remove(sav->sav_config, config,
2781fa94a07fSbrendan 		    DATA_TYPE_NVLIST_ARRAY) == 0);
2782fa94a07fSbrendan 
2783fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(sav->sav_config,
2784fa94a07fSbrendan 		    config, newdevs, ndevs + oldndevs) == 0);
2785fa94a07fSbrendan 		for (i = 0; i < oldndevs + ndevs; i++)
2786fa94a07fSbrendan 			nvlist_free(newdevs[i]);
2787fa94a07fSbrendan 		kmem_free(newdevs, (oldndevs + ndevs) * sizeof (void *));
2788fa94a07fSbrendan 	} else {
2789fa94a07fSbrendan 		/*
2790fa94a07fSbrendan 		 * Generate a new dev list.
2791fa94a07fSbrendan 		 */
2792fa94a07fSbrendan 		VERIFY(nvlist_alloc(&sav->sav_config, NV_UNIQUE_NAME,
2793fa94a07fSbrendan 		    KM_SLEEP) == 0);
2794fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(sav->sav_config, config,
2795fa94a07fSbrendan 		    devs, ndevs) == 0);
2796fa94a07fSbrendan 	}
2797fa94a07fSbrendan }
2798fa94a07fSbrendan 
2799fa94a07fSbrendan /*
2800fa94a07fSbrendan  * Stop and drop level 2 ARC devices
2801fa94a07fSbrendan  */
2802fa94a07fSbrendan void
2803fa94a07fSbrendan spa_l2cache_drop(spa_t *spa)
2804fa94a07fSbrendan {
2805fa94a07fSbrendan 	vdev_t *vd;
2806fa94a07fSbrendan 	int i;
2807fa94a07fSbrendan 	spa_aux_vdev_t *sav = &spa->spa_l2cache;
2808fa94a07fSbrendan 
2809fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++) {
2810fa94a07fSbrendan 		uint64_t pool;
2811fa94a07fSbrendan 
2812fa94a07fSbrendan 		vd = sav->sav_vdevs[i];
2813fa94a07fSbrendan 		ASSERT(vd != NULL);
2814fa94a07fSbrendan 
28158ad4d6ddSJeff Bonwick 		if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
28168ad4d6ddSJeff Bonwick 		    pool != 0ULL && l2arc_vdev_present(vd))
2817fa94a07fSbrendan 			l2arc_remove_vdev(vd);
2818fa94a07fSbrendan 		if (vd->vdev_isl2cache)
2819fa94a07fSbrendan 			spa_l2cache_remove(vd);
2820fa94a07fSbrendan 		vdev_clear_stats(vd);
2821fa94a07fSbrendan 		(void) vdev_close(vd);
2822fa94a07fSbrendan 	}
2823fa94a07fSbrendan }
2824fa94a07fSbrendan 
2825fa9e4066Sahrens /*
2826fa9e4066Sahrens  * Pool Creation
2827fa9e4066Sahrens  */
2828fa9e4066Sahrens int
2829990b4856Slling spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
28300a48a24eStimh     const char *history_str, nvlist_t *zplprops)
2831fa9e4066Sahrens {
2832fa9e4066Sahrens 	spa_t *spa;
2833990b4856Slling 	char *altroot = NULL;
28340373e76bSbonwick 	vdev_t *rvd;
2835fa9e4066Sahrens 	dsl_pool_t *dp;
2836fa9e4066Sahrens 	dmu_tx_t *tx;
2837573ca77eSGeorge Wilson 	int error = 0;
2838fa9e4066Sahrens 	uint64_t txg = TXG_INITIAL;
2839fa94a07fSbrendan 	nvlist_t **spares, **l2cache;
2840fa94a07fSbrendan 	uint_t nspares, nl2cache;
2841cde58dbcSMatthew Ahrens 	uint64_t version, obj;
2842fa9e4066Sahrens 
2843fa9e4066Sahrens 	/*
2844fa9e4066Sahrens 	 * If this pool already exists, return failure.
2845fa9e4066Sahrens 	 */
2846fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
2847fa9e4066Sahrens 	if (spa_lookup(pool) != NULL) {
2848fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
2849fa9e4066Sahrens 		return (EEXIST);
2850fa9e4066Sahrens 	}
2851fa9e4066Sahrens 
2852fa9e4066Sahrens 	/*
2853fa9e4066Sahrens 	 * Allocate a new spa_t structure.
2854fa9e4066Sahrens 	 */
2855990b4856Slling 	(void) nvlist_lookup_string(props,
2856990b4856Slling 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
2857468c413aSTim Haley 	spa = spa_add(pool, NULL, altroot);
28588ad4d6ddSJeff Bonwick 	spa_activate(spa, spa_mode_global);
2859fa9e4066Sahrens 
2860990b4856Slling 	if (props && (error = spa_prop_validate(spa, props))) {
2861990b4856Slling 		spa_deactivate(spa);
2862990b4856Slling 		spa_remove(spa);
2863c5904d13Seschrock 		mutex_exit(&spa_namespace_lock);
2864990b4856Slling 		return (error);
2865990b4856Slling 	}
2866990b4856Slling 
2867990b4856Slling 	if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION),
2868990b4856Slling 	    &version) != 0)
2869990b4856Slling 		version = SPA_VERSION;
2870990b4856Slling 	ASSERT(version <= SPA_VERSION);
2871b24ab676SJeff Bonwick 
2872b24ab676SJeff Bonwick 	spa->spa_first_txg = txg;
2873b24ab676SJeff Bonwick 	spa->spa_uberblock.ub_txg = txg - 1;
2874990b4856Slling 	spa->spa_uberblock.ub_version = version;
2875fa9e4066Sahrens 	spa->spa_ubsync = spa->spa_uberblock;
2876fa9e4066Sahrens 
287754d692b7SGeorge Wilson 	/*
287854d692b7SGeorge Wilson 	 * Create "The Godfather" zio to hold all async IOs
287954d692b7SGeorge Wilson 	 */
288025f89ee2SJeff Bonwick 	spa->spa_async_zio_root = zio_root(spa, NULL, NULL,
288125f89ee2SJeff Bonwick 	    ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE | ZIO_FLAG_GODFATHER);
288254d692b7SGeorge Wilson 
28830373e76bSbonwick 	/*
28840373e76bSbonwick 	 * Create the root vdev.
28850373e76bSbonwick 	 */
2886e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
28870373e76bSbonwick 
288899653d4eSeschrock 	error = spa_config_parse(spa, &rvd, nvroot, NULL, 0, VDEV_ALLOC_ADD);
28890373e76bSbonwick 
289099653d4eSeschrock 	ASSERT(error != 0 || rvd != NULL);
289199653d4eSeschrock 	ASSERT(error != 0 || spa->spa_root_vdev == rvd);
28920373e76bSbonwick 
2893b7b97454Sperrin 	if (error == 0 && !zfs_allocatable_devs(nvroot))
28940373e76bSbonwick 		error = EINVAL;
289599653d4eSeschrock 
289699653d4eSeschrock 	if (error == 0 &&
289799653d4eSeschrock 	    (error = vdev_create(rvd, txg, B_FALSE)) == 0 &&
2898fa94a07fSbrendan 	    (error = spa_validate_aux(spa, nvroot, txg,
289999653d4eSeschrock 	    VDEV_ALLOC_ADD)) == 0) {
2900573ca77eSGeorge Wilson 		for (int c = 0; c < rvd->vdev_children; c++) {
2901573ca77eSGeorge Wilson 			vdev_metaslab_set_size(rvd->vdev_child[c]);
2902573ca77eSGeorge Wilson 			vdev_expand(rvd->vdev_child[c], txg);
2903573ca77eSGeorge Wilson 		}
29040373e76bSbonwick 	}
29050373e76bSbonwick 
2906e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
2907fa9e4066Sahrens 
290899653d4eSeschrock 	if (error != 0) {
2909fa9e4066Sahrens 		spa_unload(spa);
2910fa9e4066Sahrens 		spa_deactivate(spa);
2911fa9e4066Sahrens 		spa_remove(spa);
2912fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
2913fa9e4066Sahrens 		return (error);
2914fa9e4066Sahrens 	}
2915fa9e4066Sahrens 
291699653d4eSeschrock 	/*
291799653d4eSeschrock 	 * Get the list of spares, if specified.
291899653d4eSeschrock 	 */
291999653d4eSeschrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
292099653d4eSeschrock 	    &spares, &nspares) == 0) {
2921fa94a07fSbrendan 		VERIFY(nvlist_alloc(&spa->spa_spares.sav_config, NV_UNIQUE_NAME,
292299653d4eSeschrock 		    KM_SLEEP) == 0);
2923fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
292499653d4eSeschrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
2925e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
292699653d4eSeschrock 		spa_load_spares(spa);
2927e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
2928fa94a07fSbrendan 		spa->spa_spares.sav_sync = B_TRUE;
2929fa94a07fSbrendan 	}
2930fa94a07fSbrendan 
2931fa94a07fSbrendan 	/*
2932fa94a07fSbrendan 	 * Get the list of level 2 cache devices, if specified.
2933fa94a07fSbrendan 	 */
2934fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
2935fa94a07fSbrendan 	    &l2cache, &nl2cache) == 0) {
2936fa94a07fSbrendan 		VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
2937fa94a07fSbrendan 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
2938fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
2939fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
2940e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
2941fa94a07fSbrendan 		spa_load_l2cache(spa);
2942e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
2943fa94a07fSbrendan 		spa->spa_l2cache.sav_sync = B_TRUE;
294499653d4eSeschrock 	}
294599653d4eSeschrock 
29460a48a24eStimh 	spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
2947fa9e4066Sahrens 	spa->spa_meta_objset = dp->dp_meta_objset;
2948fa9e4066Sahrens 
2949485bbbf5SGeorge Wilson 	/*
2950485bbbf5SGeorge Wilson 	 * Create DDTs (dedup tables).
2951485bbbf5SGeorge Wilson 	 */
2952485bbbf5SGeorge Wilson 	ddt_create(spa);
2953485bbbf5SGeorge Wilson 
2954485bbbf5SGeorge Wilson 	spa_update_dspace(spa);
2955485bbbf5SGeorge Wilson 
2956fa9e4066Sahrens 	tx = dmu_tx_create_assigned(dp, txg);
2957fa9e4066Sahrens 
2958fa9e4066Sahrens 	/*
2959fa9e4066Sahrens 	 * Create the pool config object.
2960fa9e4066Sahrens 	 */
2961fa9e4066Sahrens 	spa->spa_config_object = dmu_object_alloc(spa->spa_meta_objset,
2962f7991ba4STim Haley 	    DMU_OT_PACKED_NVLIST, SPA_CONFIG_BLOCKSIZE,
2963fa9e4066Sahrens 	    DMU_OT_PACKED_NVLIST_SIZE, sizeof (uint64_t), tx);
2964fa9e4066Sahrens 
2965ea8dc4b6Seschrock 	if (zap_add(spa->spa_meta_objset,
2966fa9e4066Sahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CONFIG,
2967ea8dc4b6Seschrock 	    sizeof (uint64_t), 1, &spa->spa_config_object, tx) != 0) {
2968ea8dc4b6Seschrock 		cmn_err(CE_PANIC, "failed to add pool config");
2969ea8dc4b6Seschrock 	}
2970fa9e4066Sahrens 
29713f9d6ad7SLin Ling 	if (zap_add(spa->spa_meta_objset,
29723f9d6ad7SLin Ling 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
29733f9d6ad7SLin Ling 	    sizeof (uint64_t), 1, &version, tx) != 0) {
29743f9d6ad7SLin Ling 		cmn_err(CE_PANIC, "failed to add pool version");
29753f9d6ad7SLin Ling 	}
29763f9d6ad7SLin Ling 
2977990b4856Slling 	/* Newly created pools with the right version are always deflated. */
2978990b4856Slling 	if (version >= SPA_VERSION_RAIDZ_DEFLATE) {
2979990b4856Slling 		spa->spa_deflate = TRUE;
2980990b4856Slling 		if (zap_add(spa->spa_meta_objset,
2981990b4856Slling 		    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
2982990b4856Slling 		    sizeof (uint64_t), 1, &spa->spa_deflate, tx) != 0) {
2983990b4856Slling 			cmn_err(CE_PANIC, "failed to add deflate");
2984990b4856Slling 		}
298599653d4eSeschrock 	}
298699653d4eSeschrock 
2987fa9e4066Sahrens 	/*
2988cde58dbcSMatthew Ahrens 	 * Create the deferred-free bpobj.  Turn off compression
2989fa9e4066Sahrens 	 * because sync-to-convergence takes longer if the blocksize
2990fa9e4066Sahrens 	 * keeps changing.
2991fa9e4066Sahrens 	 */
2992cde58dbcSMatthew Ahrens 	obj = bpobj_alloc(spa->spa_meta_objset, 1 << 14, tx);
2993cde58dbcSMatthew Ahrens 	dmu_object_set_compress(spa->spa_meta_objset, obj,
2994cde58dbcSMatthew Ahrens 	    ZIO_COMPRESS_OFF, tx);
2995ea8dc4b6Seschrock 	if (zap_add(spa->spa_meta_objset,
2996cde58dbcSMatthew Ahrens 	    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_SYNC_BPOBJ,
2997cde58dbcSMatthew Ahrens 	    sizeof (uint64_t), 1, &obj, tx) != 0) {
2998cde58dbcSMatthew Ahrens 		cmn_err(CE_PANIC, "failed to add bpobj");
2999ea8dc4b6Seschrock 	}
3000cde58dbcSMatthew Ahrens 	VERIFY3U(0, ==, bpobj_open(&spa->spa_deferred_bpobj,
3001cde58dbcSMatthew Ahrens 	    spa->spa_meta_objset, obj));
3002fa9e4066Sahrens 
300306eeb2adSek 	/*
300406eeb2adSek 	 * Create the pool's history object.
300506eeb2adSek 	 */
3006990b4856Slling 	if (version >= SPA_VERSION_ZPOOL_HISTORY)
3007990b4856Slling 		spa_history_create_obj(spa, tx);
3008990b4856Slling 
3009990b4856Slling 	/*
3010990b4856Slling 	 * Set pool properties.
3011990b4856Slling 	 */
3012990b4856Slling 	spa->spa_bootfs = zpool_prop_default_numeric(ZPOOL_PROP_BOOTFS);
3013990b4856Slling 	spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
30140a4e9518Sgw 	spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
3015573ca77eSGeorge Wilson 	spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
3016b24ab676SJeff Bonwick 
3017379c004dSEric Schrock 	if (props != NULL) {
3018379c004dSEric Schrock 		spa_configfile_set(spa, props, B_FALSE);
30193f9d6ad7SLin Ling 		spa_sync_props(spa, props, tx);
3020379c004dSEric Schrock 	}
302106eeb2adSek 
3022fa9e4066Sahrens 	dmu_tx_commit(tx);
3023fa9e4066Sahrens 
3024fa9e4066Sahrens 	spa->spa_sync_on = B_TRUE;
3025fa9e4066Sahrens 	txg_sync_start(spa->spa_dsl_pool);
3026fa9e4066Sahrens 
3027fa9e4066Sahrens 	/*
3028fa9e4066Sahrens 	 * We explicitly wait for the first transaction to complete so that our
3029fa9e4066Sahrens 	 * bean counters are appropriately updated.
3030fa9e4066Sahrens 	 */
3031fa9e4066Sahrens 	txg_wait_synced(spa->spa_dsl_pool, txg);
3032fa9e4066Sahrens 
3033c5904d13Seschrock 	spa_config_sync(spa, B_FALSE, B_TRUE);
3034fa9e4066Sahrens 
3035990b4856Slling 	if (version >= SPA_VERSION_ZPOOL_HISTORY && history_str != NULL)
3036228975ccSek 		(void) spa_history_log(spa, history_str, LOG_CMD_POOL_CREATE);
3037c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_CREATE);
3038228975ccSek 
3039088f3894Sahrens 	spa->spa_minref = refcount_count(&spa->spa_refcount);
3040088f3894Sahrens 
3041daaa36a7SGeorge Wilson 	mutex_exit(&spa_namespace_lock);
3042daaa36a7SGeorge Wilson 
3043fa9e4066Sahrens 	return (0);
3044fa9e4066Sahrens }
3045fa9e4066Sahrens 
3046e7cbe64fSgw #ifdef _KERNEL
3047e7cbe64fSgw /*
304821ecdf64SLin Ling  * Get the root pool information from the root disk, then import the root pool
304921ecdf64SLin Ling  * during the system boot up time.
3050e7cbe64fSgw  */
305121ecdf64SLin Ling extern int vdev_disk_read_rootlabel(char *, char *, nvlist_t **);
305221ecdf64SLin Ling 
305321ecdf64SLin Ling static nvlist_t *
305421ecdf64SLin Ling spa_generate_rootconf(char *devpath, char *devid, uint64_t *guid)
3055e7cbe64fSgw {
305621ecdf64SLin Ling 	nvlist_t *config;
3057e7cbe64fSgw 	nvlist_t *nvtop, *nvroot;
3058e7cbe64fSgw 	uint64_t pgid;
3059e7cbe64fSgw 
306021ecdf64SLin Ling 	if (vdev_disk_read_rootlabel(devpath, devid, &config) != 0)
306121ecdf64SLin Ling 		return (NULL);
306221ecdf64SLin Ling 
3063e7cbe64fSgw 	/*
3064e7cbe64fSgw 	 * Add this top-level vdev to the child array.
3065e7cbe64fSgw 	 */
306621ecdf64SLin Ling 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
306721ecdf64SLin Ling 	    &nvtop) == 0);
306821ecdf64SLin Ling 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
306921ecdf64SLin Ling 	    &pgid) == 0);
307021ecdf64SLin Ling 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, guid) == 0);
3071e7cbe64fSgw 
3072e7cbe64fSgw 	/*
3073e7cbe64fSgw 	 * Put this pool's top-level vdevs into a root vdev.
3074e7cbe64fSgw 	 */
3075e7cbe64fSgw 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
307621ecdf64SLin Ling 	VERIFY(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
307721ecdf64SLin Ling 	    VDEV_TYPE_ROOT) == 0);
3078e7cbe64fSgw 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_ID, 0ULL) == 0);
3079e7cbe64fSgw 	VERIFY(nvlist_add_uint64(nvroot, ZPOOL_CONFIG_GUID, pgid) == 0);
3080e7cbe64fSgw 	VERIFY(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
3081e7cbe64fSgw 	    &nvtop, 1) == 0);
3082e7cbe64fSgw 
3083e7cbe64fSgw 	/*
3084e7cbe64fSgw 	 * Replace the existing vdev_tree with the new root vdev in
3085e7cbe64fSgw 	 * this pool's configuration (remove the old, add the new).
3086e7cbe64fSgw 	 */
3087e7cbe64fSgw 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
3088e7cbe64fSgw 	nvlist_free(nvroot);
308921ecdf64SLin Ling 	return (config);
3090e7cbe64fSgw }
3091e7cbe64fSgw 
3092e7cbe64fSgw /*
309321ecdf64SLin Ling  * Walk the vdev tree and see if we can find a device with "better"
309421ecdf64SLin Ling  * configuration. A configuration is "better" if the label on that
309521ecdf64SLin Ling  * device has a more recent txg.
3096051aabe6Staylor  */
309721ecdf64SLin Ling static void
309821ecdf64SLin Ling spa_alt_rootvdev(vdev_t *vd, vdev_t **avd, uint64_t *txg)
3099051aabe6Staylor {
3100573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++)
310121ecdf64SLin Ling 		spa_alt_rootvdev(vd->vdev_child[c], avd, txg);
3102051aabe6Staylor 
310321ecdf64SLin Ling 	if (vd->vdev_ops->vdev_op_leaf) {
310421ecdf64SLin Ling 		nvlist_t *label;
310521ecdf64SLin Ling 		uint64_t label_txg;
3106051aabe6Staylor 
310721ecdf64SLin Ling 		if (vdev_disk_read_rootlabel(vd->vdev_physpath, vd->vdev_devid,
310821ecdf64SLin Ling 		    &label) != 0)
310921ecdf64SLin Ling 			return;
3110051aabe6Staylor 
311121ecdf64SLin Ling 		VERIFY(nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_TXG,
311221ecdf64SLin Ling 		    &label_txg) == 0);
3113051aabe6Staylor 
311421ecdf64SLin Ling 		/*
311521ecdf64SLin Ling 		 * Do we have a better boot device?
311621ecdf64SLin Ling 		 */
311721ecdf64SLin Ling 		if (label_txg > *txg) {
311821ecdf64SLin Ling 			*txg = label_txg;
311921ecdf64SLin Ling 			*avd = vd;
3120051aabe6Staylor 		}
312121ecdf64SLin Ling 		nvlist_free(label);
3122051aabe6Staylor 	}
3123051aabe6Staylor }
3124051aabe6Staylor 
3125e7cbe64fSgw /*
3126e7cbe64fSgw  * Import a root pool.
3127e7cbe64fSgw  *
3128051aabe6Staylor  * For x86. devpath_list will consist of devid and/or physpath name of
3129051aabe6Staylor  * the vdev (e.g. "id1,sd@SSEAGATE..." or "/pci@1f,0/ide@d/disk@0,0:a").
3130051aabe6Staylor  * The GRUB "findroot" command will return the vdev we should boot.
3131e7cbe64fSgw  *
3132e7cbe64fSgw  * For Sparc, devpath_list consists the physpath name of the booting device
3133e7cbe64fSgw  * no matter the rootpool is a single device pool or a mirrored pool.
3134e7cbe64fSgw  * e.g.
3135e7cbe64fSgw  *	"/pci@1f,0/ide@d/disk@0,0:a"
3136e7cbe64fSgw  */
3137e7cbe64fSgw int
3138051aabe6Staylor spa_import_rootpool(char *devpath, char *devid)
3139e7cbe64fSgw {
314021ecdf64SLin Ling 	spa_t *spa;
314121ecdf64SLin Ling 	vdev_t *rvd, *bvd, *avd = NULL;
314221ecdf64SLin Ling 	nvlist_t *config, *nvtop;
314321ecdf64SLin Ling 	uint64_t guid, txg;
3144e7cbe64fSgw 	char *pname;
3145e7cbe64fSgw 	int error;
3146e7cbe64fSgw 
3147e7cbe64fSgw 	/*
314821ecdf64SLin Ling 	 * Read the label from the boot device and generate a configuration.
3149e7cbe64fSgw 	 */
3150dedec472SJack Meng 	config = spa_generate_rootconf(devpath, devid, &guid);
3151dedec472SJack Meng #if defined(_OBP) && defined(_KERNEL)
3152dedec472SJack Meng 	if (config == NULL) {
3153dedec472SJack Meng 		if (strstr(devpath, "/iscsi/ssd") != NULL) {
3154dedec472SJack Meng 			/* iscsi boot */
3155dedec472SJack Meng 			get_iscsi_bootpath_phy(devpath);
3156dedec472SJack Meng 			config = spa_generate_rootconf(devpath, devid, &guid);
3157dedec472SJack Meng 		}
3158dedec472SJack Meng 	}
3159dedec472SJack Meng #endif
3160dedec472SJack Meng 	if (config == NULL) {
316121ecdf64SLin Ling 		cmn_err(CE_NOTE, "Can not read the pool label from '%s'",
316221ecdf64SLin Ling 		    devpath);
316321ecdf64SLin Ling 		return (EIO);
316421ecdf64SLin Ling 	}
3165e7cbe64fSgw 
316621ecdf64SLin Ling 	VERIFY(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
316721ecdf64SLin Ling 	    &pname) == 0);
316821ecdf64SLin Ling 	VERIFY(nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_TXG, &txg) == 0);
3169e7cbe64fSgw 
31706809eb4eSEric Schrock 	mutex_enter(&spa_namespace_lock);
31716809eb4eSEric Schrock 	if ((spa = spa_lookup(pname)) != NULL) {
31726809eb4eSEric Schrock 		/*
31736809eb4eSEric Schrock 		 * Remove the existing root pool from the namespace so that we
31746809eb4eSEric Schrock 		 * can replace it with the correct config we just read in.
31756809eb4eSEric Schrock 		 */
31766809eb4eSEric Schrock 		spa_remove(spa);
31776809eb4eSEric Schrock 	}
31786809eb4eSEric Schrock 
3179468c413aSTim Haley 	spa = spa_add(pname, config, NULL);
31806809eb4eSEric Schrock 	spa->spa_is_root = B_TRUE;
31814b964adaSGeorge Wilson 	spa->spa_import_flags = ZFS_IMPORT_VERBATIM;
3182e7cbe64fSgw 
318321ecdf64SLin Ling 	/*
318421ecdf64SLin Ling 	 * Build up a vdev tree based on the boot device's label config.
318521ecdf64SLin Ling 	 */
318621ecdf64SLin Ling 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
318721ecdf64SLin Ling 	    &nvtop) == 0);
318821ecdf64SLin Ling 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
318921ecdf64SLin Ling 	error = spa_config_parse(spa, &rvd, nvtop, NULL, 0,
319021ecdf64SLin Ling 	    VDEV_ALLOC_ROOTPOOL);
319121ecdf64SLin Ling 	spa_config_exit(spa, SCL_ALL, FTAG);
319221ecdf64SLin Ling 	if (error) {
319321ecdf64SLin Ling 		mutex_exit(&spa_namespace_lock);
319421ecdf64SLin Ling 		nvlist_free(config);
319521ecdf64SLin Ling 		cmn_err(CE_NOTE, "Can not parse the config for pool '%s'",
319621ecdf64SLin Ling 		    pname);
319721ecdf64SLin Ling 		return (error);
319821ecdf64SLin Ling 	}
319921ecdf64SLin Ling 
320021ecdf64SLin Ling 	/*
320121ecdf64SLin Ling 	 * Get the boot vdev.
320221ecdf64SLin Ling 	 */
320321ecdf64SLin Ling 	if ((bvd = vdev_lookup_by_guid(rvd, guid)) == NULL) {
320421ecdf64SLin Ling 		cmn_err(CE_NOTE, "Can not find the boot vdev for guid %llu",
320521ecdf64SLin Ling 		    (u_longlong_t)guid);
320621ecdf64SLin Ling 		error = ENOENT;
320721ecdf64SLin Ling 		goto out;
320821ecdf64SLin Ling 	}
3209e7cbe64fSgw 
321021ecdf64SLin Ling 	/*
321121ecdf64SLin Ling 	 * Determine if there is a better boot device.
321221ecdf64SLin Ling 	 */
321321ecdf64SLin Ling 	avd = bvd;
321421ecdf64SLin Ling 	spa_alt_rootvdev(rvd, &avd, &txg);
321521ecdf64SLin Ling 	if (avd != bvd) {
321621ecdf64SLin Ling 		cmn_err(CE_NOTE, "The boot device is 'degraded'. Please "
321721ecdf64SLin Ling 		    "try booting from '%s'", avd->vdev_path);
321821ecdf64SLin Ling 		error = EINVAL;
321921ecdf64SLin Ling 		goto out;
322021ecdf64SLin Ling 	}
3221e7cbe64fSgw 
322221ecdf64SLin Ling 	/*
322321ecdf64SLin Ling 	 * If the boot device is part of a spare vdev then ensure that
322421ecdf64SLin Ling 	 * we're booting off the active spare.
322521ecdf64SLin Ling 	 */
322621ecdf64SLin Ling 	if (bvd->vdev_parent->vdev_ops == &vdev_spare_ops &&
322721ecdf64SLin Ling 	    !bvd->vdev_isspare) {
322821ecdf64SLin Ling 		cmn_err(CE_NOTE, "The boot device is currently spared. Please "
322921ecdf64SLin Ling 		    "try booting from '%s'",
3230*cb04b873SMark J Musante 		    bvd->vdev_parent->
3231*cb04b873SMark J Musante 		    vdev_child[bvd->vdev_parent->vdev_children - 1]->vdev_path);
323221ecdf64SLin Ling 		error = EINVAL;
323321ecdf64SLin Ling 		goto out;
323421ecdf64SLin Ling 	}
323521ecdf64SLin Ling 
323621ecdf64SLin Ling 	error = 0;
3237c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_IMPORT);
323821ecdf64SLin Ling out:
323921ecdf64SLin Ling 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
324021ecdf64SLin Ling 	vdev_free(rvd);
324121ecdf64SLin Ling 	spa_config_exit(spa, SCL_ALL, FTAG);
324221ecdf64SLin Ling 	mutex_exit(&spa_namespace_lock);
324321ecdf64SLin Ling 
324421ecdf64SLin Ling 	nvlist_free(config);
3245e7cbe64fSgw 	return (error);
3246e7cbe64fSgw }
324721ecdf64SLin Ling 
3248e7cbe64fSgw #endif
3249e7cbe64fSgw 
32506809eb4eSEric Schrock /*
32516809eb4eSEric Schrock  * Import a non-root pool into the system.
32526809eb4eSEric Schrock  */
3253c5904d13Seschrock int
32544b964adaSGeorge Wilson spa_import(const char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
3255c5904d13Seschrock {
32566809eb4eSEric Schrock 	spa_t *spa;
32576809eb4eSEric Schrock 	char *altroot = NULL;
3258468c413aSTim Haley 	spa_load_state_t state = SPA_LOAD_IMPORT;
3259468c413aSTim Haley 	zpool_rewind_policy_t policy;
32606809eb4eSEric Schrock 	int error;
32616809eb4eSEric Schrock 	nvlist_t *nvroot;
32626809eb4eSEric Schrock 	nvlist_t **spares, **l2cache;
32636809eb4eSEric Schrock 	uint_t nspares, nl2cache;
32646809eb4eSEric Schrock 
32656809eb4eSEric Schrock 	/*
32666809eb4eSEric Schrock 	 * If a pool with this name exists, return failure.
32676809eb4eSEric Schrock 	 */
32686809eb4eSEric Schrock 	mutex_enter(&spa_namespace_lock);
32691195e687SMark J Musante 	if (spa_lookup(pool) != NULL) {
32706809eb4eSEric Schrock 		mutex_exit(&spa_namespace_lock);
32716809eb4eSEric Schrock 		return (EEXIST);
32726809eb4eSEric Schrock 	}
32736809eb4eSEric Schrock 
32746809eb4eSEric Schrock 	/*
32756809eb4eSEric Schrock 	 * Create and initialize the spa structure.
32766809eb4eSEric Schrock 	 */
32776809eb4eSEric Schrock 	(void) nvlist_lookup_string(props,
32786809eb4eSEric Schrock 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
3279468c413aSTim Haley 	spa = spa_add(pool, config, altroot);
32804b964adaSGeorge Wilson 	spa->spa_import_flags = flags;
32814b964adaSGeorge Wilson 
32824b964adaSGeorge Wilson 	/*
32834b964adaSGeorge Wilson 	 * Verbatim import - Take a pool and insert it into the namespace
32844b964adaSGeorge Wilson 	 * as if it had been loaded at boot.
32854b964adaSGeorge Wilson 	 */
32864b964adaSGeorge Wilson 	if (spa->spa_import_flags & ZFS_IMPORT_VERBATIM) {
32874b964adaSGeorge Wilson 		if (props != NULL)
32884b964adaSGeorge Wilson 			spa_configfile_set(spa, props, B_FALSE);
32894b964adaSGeorge Wilson 
32904b964adaSGeorge Wilson 		spa_config_sync(spa, B_FALSE, B_TRUE);
32914b964adaSGeorge Wilson 
32924b964adaSGeorge Wilson 		mutex_exit(&spa_namespace_lock);
32934b964adaSGeorge Wilson 		spa_history_log_version(spa, LOG_POOL_IMPORT);
32944b964adaSGeorge Wilson 
32954b964adaSGeorge Wilson 		return (0);
32964b964adaSGeorge Wilson 	}
32974b964adaSGeorge Wilson 
32986809eb4eSEric Schrock 	spa_activate(spa, spa_mode_global);
32996809eb4eSEric Schrock 
330025f89ee2SJeff Bonwick 	/*
330125f89ee2SJeff Bonwick 	 * Don't start async tasks until we know everything is healthy.
330225f89ee2SJeff Bonwick 	 */
330325f89ee2SJeff Bonwick 	spa_async_suspend(spa);
330425f89ee2SJeff Bonwick 
33054b964adaSGeorge Wilson 	zpool_get_rewind_policy(config, &policy);
33064b964adaSGeorge Wilson 	if (policy.zrp_request & ZPOOL_DO_REWIND)
33074b964adaSGeorge Wilson 		state = SPA_LOAD_RECOVER;
33084b964adaSGeorge Wilson 
33096809eb4eSEric Schrock 	/*
33106809eb4eSEric Schrock 	 * Pass off the heavy lifting to spa_load().  Pass TRUE for mosconfig
33116809eb4eSEric Schrock 	 * because the user-supplied config is actually the one to trust when
33126809eb4eSEric Schrock 	 * doing an import.
33136809eb4eSEric Schrock 	 */
3314468c413aSTim Haley 	if (state != SPA_LOAD_RECOVER)
3315468c413aSTim Haley 		spa->spa_last_ubsync_txg = spa->spa_load_txg = 0;
33164b964adaSGeorge Wilson 
3317468c413aSTim Haley 	error = spa_load_best(spa, state, B_TRUE, policy.zrp_txg,
3318c8ee1847SVictor Latushkin 	    policy.zrp_request);
3319468c413aSTim Haley 
3320468c413aSTim Haley 	/*
33214b964adaSGeorge Wilson 	 * Propagate anything learned while loading the pool and pass it
33224b964adaSGeorge Wilson 	 * back to caller (i.e. rewind info, missing devices, etc).
3323468c413aSTim Haley 	 */
33244b964adaSGeorge Wilson 	VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
33254b964adaSGeorge Wilson 	    spa->spa_load_info) == 0);
33266809eb4eSEric Schrock 
33276809eb4eSEric Schrock 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
33286809eb4eSEric Schrock 	/*
33296809eb4eSEric Schrock 	 * Toss any existing sparelist, as it doesn't have any validity
33306809eb4eSEric Schrock 	 * anymore, and conflicts with spa_has_spare().
33316809eb4eSEric Schrock 	 */
33326809eb4eSEric Schrock 	if (spa->spa_spares.sav_config) {
33336809eb4eSEric Schrock 		nvlist_free(spa->spa_spares.sav_config);
33346809eb4eSEric Schrock 		spa->spa_spares.sav_config = NULL;
33356809eb4eSEric Schrock 		spa_load_spares(spa);
33366809eb4eSEric Schrock 	}
33376809eb4eSEric Schrock 	if (spa->spa_l2cache.sav_config) {
33386809eb4eSEric Schrock 		nvlist_free(spa->spa_l2cache.sav_config);
33396809eb4eSEric Schrock 		spa->spa_l2cache.sav_config = NULL;
33406809eb4eSEric Schrock 		spa_load_l2cache(spa);
33416809eb4eSEric Schrock 	}
33426809eb4eSEric Schrock 
33436809eb4eSEric Schrock 	VERIFY(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
33446809eb4eSEric Schrock 	    &nvroot) == 0);
33456809eb4eSEric Schrock 	if (error == 0)
33466809eb4eSEric Schrock 		error = spa_validate_aux(spa, nvroot, -1ULL,
33476809eb4eSEric Schrock 		    VDEV_ALLOC_SPARE);
33486809eb4eSEric Schrock 	if (error == 0)
33496809eb4eSEric Schrock 		error = spa_validate_aux(spa, nvroot, -1ULL,
33506809eb4eSEric Schrock 		    VDEV_ALLOC_L2CACHE);
33516809eb4eSEric Schrock 	spa_config_exit(spa, SCL_ALL, FTAG);
33526809eb4eSEric Schrock 
33536809eb4eSEric Schrock 	if (props != NULL)
33546809eb4eSEric Schrock 		spa_configfile_set(spa, props, B_FALSE);
33556809eb4eSEric Schrock 
33566809eb4eSEric Schrock 	if (error != 0 || (props && spa_writeable(spa) &&
33576809eb4eSEric Schrock 	    (error = spa_prop_set(spa, props)))) {
33586809eb4eSEric Schrock 		spa_unload(spa);
33596809eb4eSEric Schrock 		spa_deactivate(spa);
33606809eb4eSEric Schrock 		spa_remove(spa);
33616809eb4eSEric Schrock 		mutex_exit(&spa_namespace_lock);
33626809eb4eSEric Schrock 		return (error);
33636809eb4eSEric Schrock 	}
33646809eb4eSEric Schrock 
3365955ef359SLin Ling 	spa_async_resume(spa);
3366955ef359SLin Ling 
33676809eb4eSEric Schrock 	/*
33686809eb4eSEric Schrock 	 * Override any spares and level 2 cache devices as specified by
33696809eb4eSEric Schrock 	 * the user, as these may have correct device names/devids, etc.
33706809eb4eSEric Schrock 	 */
33716809eb4eSEric Schrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
33726809eb4eSEric Schrock 	    &spares, &nspares) == 0) {
33736809eb4eSEric Schrock 		if (spa->spa_spares.sav_config)
33746809eb4eSEric Schrock 			VERIFY(nvlist_remove(spa->spa_spares.sav_config,
33756809eb4eSEric Schrock 			    ZPOOL_CONFIG_SPARES, DATA_TYPE_NVLIST_ARRAY) == 0);
33766809eb4eSEric Schrock 		else
33776809eb4eSEric Schrock 			VERIFY(nvlist_alloc(&spa->spa_spares.sav_config,
33786809eb4eSEric Schrock 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
33796809eb4eSEric Schrock 		VERIFY(nvlist_add_nvlist_array(spa->spa_spares.sav_config,
33806809eb4eSEric Schrock 		    ZPOOL_CONFIG_SPARES, spares, nspares) == 0);
33816809eb4eSEric Schrock 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
33826809eb4eSEric Schrock 		spa_load_spares(spa);
33836809eb4eSEric Schrock 		spa_config_exit(spa, SCL_ALL, FTAG);
33846809eb4eSEric Schrock 		spa->spa_spares.sav_sync = B_TRUE;
33856809eb4eSEric Schrock 	}
33866809eb4eSEric Schrock 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
33876809eb4eSEric Schrock 	    &l2cache, &nl2cache) == 0) {
33886809eb4eSEric Schrock 		if (spa->spa_l2cache.sav_config)
33896809eb4eSEric Schrock 			VERIFY(nvlist_remove(spa->spa_l2cache.sav_config,
33906809eb4eSEric Schrock 			    ZPOOL_CONFIG_L2CACHE, DATA_TYPE_NVLIST_ARRAY) == 0);
33916809eb4eSEric Schrock 		else
33926809eb4eSEric Schrock 			VERIFY(nvlist_alloc(&spa->spa_l2cache.sav_config,
33936809eb4eSEric Schrock 			    NV_UNIQUE_NAME, KM_SLEEP) == 0);
33946809eb4eSEric Schrock 		VERIFY(nvlist_add_nvlist_array(spa->spa_l2cache.sav_config,
33956809eb4eSEric Schrock 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache) == 0);
33966809eb4eSEric Schrock 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
33976809eb4eSEric Schrock 		spa_load_l2cache(spa);
33986809eb4eSEric Schrock 		spa_config_exit(spa, SCL_ALL, FTAG);
33996809eb4eSEric Schrock 		spa->spa_l2cache.sav_sync = B_TRUE;
34006809eb4eSEric Schrock 	}
34016809eb4eSEric Schrock 
3402b693757aSEric Schrock 	/*
3403b693757aSEric Schrock 	 * Check for any removed devices.
3404b693757aSEric Schrock 	 */
3405b693757aSEric Schrock 	if (spa->spa_autoreplace) {
3406b693757aSEric Schrock 		spa_aux_check_removed(&spa->spa_spares);
3407b693757aSEric Schrock 		spa_aux_check_removed(&spa->spa_l2cache);
3408b693757aSEric Schrock 	}
3409b693757aSEric Schrock 
34106809eb4eSEric Schrock 	if (spa_writeable(spa)) {
34116809eb4eSEric Schrock 		/*
34126809eb4eSEric Schrock 		 * Update the config cache to include the newly-imported pool.
34136809eb4eSEric Schrock 		 */
3414bc758434SLin Ling 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
34156809eb4eSEric Schrock 	}
34166809eb4eSEric Schrock 
3417573ca77eSGeorge Wilson 	/*
3418573ca77eSGeorge Wilson 	 * It's possible that the pool was expanded while it was exported.
3419573ca77eSGeorge Wilson 	 * We kick off an async task to handle this for us.
3420573ca77eSGeorge Wilson 	 */
3421573ca77eSGeorge Wilson 	spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
3422573ca77eSGeorge Wilson 
34236809eb4eSEric Schrock 	mutex_exit(&spa_namespace_lock);
3424c8e1f6d2SMark J Musante 	spa_history_log_version(spa, LOG_POOL_IMPORT);
34256809eb4eSEric Schrock 
34266809eb4eSEric Schrock 	return (0);
3427c5904d13Seschrock }
3428c5904d13Seschrock 
3429fa9e4066Sahrens nvlist_t *
3430fa9e4066Sahrens spa_tryimport(nvlist_t *tryconfig)
3431fa9e4066Sahrens {
3432fa9e4066Sahrens 	nvlist_t *config = NULL;
3433fa9e4066Sahrens 	char *poolname;
3434fa9e4066Sahrens 	spa_t *spa;
3435fa9e4066Sahrens 	uint64_t state;
34367b7154beSLin Ling 	int error;
3437fa9e4066Sahrens 
3438fa9e4066Sahrens 	if (nvlist_lookup_string(tryconfig, ZPOOL_CONFIG_POOL_NAME, &poolname))
3439fa9e4066Sahrens 		return (NULL);
3440fa9e4066Sahrens 
3441fa9e4066Sahrens 	if (nvlist_lookup_uint64(tryconfig, ZPOOL_CONFIG_POOL_STATE, &state))
3442fa9e4066Sahrens 		return (NULL);
3443fa9e4066Sahrens 
3444fa9e4066Sahrens 	/*
34450373e76bSbonwick 	 * Create and initialize the spa structure.
3446fa9e4066Sahrens 	 */
34470373e76bSbonwick 	mutex_enter(&spa_namespace_lock);
3448468c413aSTim Haley 	spa = spa_add(TRYIMPORT_NAME, tryconfig, NULL);
34498ad4d6ddSJeff Bonwick 	spa_activate(spa, FREAD);
3450fa9e4066Sahrens 
3451fa9e4066Sahrens 	/*
34520373e76bSbonwick 	 * Pass off the heavy lifting to spa_load().
3453ecc2d604Sbonwick 	 * Pass TRUE for mosconfig because the user-supplied config
3454ecc2d604Sbonwick 	 * is actually the one to trust when doing an import.
3455fa9e4066Sahrens 	 */
34561195e687SMark J Musante 	error = spa_load(spa, SPA_LOAD_TRYIMPORT, SPA_IMPORT_EXISTING, B_TRUE);
3457fa9e4066Sahrens 
3458fa9e4066Sahrens 	/*
3459fa9e4066Sahrens 	 * If 'tryconfig' was at least parsable, return the current config.
3460fa9e4066Sahrens 	 */
3461fa9e4066Sahrens 	if (spa->spa_root_vdev != NULL) {
3462fa9e4066Sahrens 		config = spa_config_generate(spa, NULL, -1ULL, B_TRUE);
3463fa9e4066Sahrens 		VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
3464fa9e4066Sahrens 		    poolname) == 0);
3465fa9e4066Sahrens 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
3466fa9e4066Sahrens 		    state) == 0);
346795173954Sek 		VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
346895173954Sek 		    spa->spa_uberblock.ub_timestamp) == 0);
346999653d4eSeschrock 
3470e7cbe64fSgw 		/*
3471e7cbe64fSgw 		 * If the bootfs property exists on this pool then we
3472e7cbe64fSgw 		 * copy it out so that external consumers can tell which
3473e7cbe64fSgw 		 * pools are bootable.
3474e7cbe64fSgw 		 */
34757b7154beSLin Ling 		if ((!error || error == EEXIST) && spa->spa_bootfs) {
3476e7cbe64fSgw 			char *tmpname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3477e7cbe64fSgw 
3478e7cbe64fSgw 			/*
3479e7cbe64fSgw 			 * We have to play games with the name since the
3480e7cbe64fSgw 			 * pool was opened as TRYIMPORT_NAME.
3481e7cbe64fSgw 			 */
3482e14bb325SJeff Bonwick 			if (dsl_dsobj_to_dsname(spa_name(spa),
3483e7cbe64fSgw 			    spa->spa_bootfs, tmpname) == 0) {
3484e7cbe64fSgw 				char *cp;
3485e7cbe64fSgw 				char *dsname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
3486e7cbe64fSgw 
3487e7cbe64fSgw 				cp = strchr(tmpname, '/');
3488e7cbe64fSgw 				if (cp == NULL) {
3489e7cbe64fSgw 					(void) strlcpy(dsname, tmpname,
3490e7cbe64fSgw 					    MAXPATHLEN);
3491e7cbe64fSgw 				} else {
3492e7cbe64fSgw 					(void) snprintf(dsname, MAXPATHLEN,
3493e7cbe64fSgw 					    "%s/%s", poolname, ++cp);
3494e7cbe64fSgw 				}
3495e7cbe64fSgw 				VERIFY(nvlist_add_string(config,
3496e7cbe64fSgw 				    ZPOOL_CONFIG_BOOTFS, dsname) == 0);
3497e7cbe64fSgw 				kmem_free(dsname, MAXPATHLEN);
3498e7cbe64fSgw 			}
3499e7cbe64fSgw 			kmem_free(tmpname, MAXPATHLEN);
3500e7cbe64fSgw 		}
3501e7cbe64fSgw 
350299653d4eSeschrock 		/*
3503fa94a07fSbrendan 		 * Add the list of hot spares and level 2 cache devices.
350499653d4eSeschrock 		 */
35056809eb4eSEric Schrock 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
350699653d4eSeschrock 		spa_add_spares(spa, config);
3507fa94a07fSbrendan 		spa_add_l2cache(spa, config);
35086809eb4eSEric Schrock 		spa_config_exit(spa, SCL_CONFIG, FTAG);
3509fa9e4066Sahrens 	}
3510fa9e4066Sahrens 
3511fa9e4066Sahrens 	spa_unload(spa);
3512fa9e4066Sahrens 	spa_deactivate(spa);
3513fa9e4066Sahrens 	spa_remove(spa);
3514fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
3515fa9e4066Sahrens 
3516fa9e4066Sahrens 	return (config);
3517fa9e4066Sahrens }
3518fa9e4066Sahrens 
3519fa9e4066Sahrens /*
3520fa9e4066Sahrens  * Pool export/destroy
3521fa9e4066Sahrens  *
3522fa9e4066Sahrens  * The act of destroying or exporting a pool is very simple.  We make sure there
3523fa9e4066Sahrens  * is no more pending I/O and any references to the pool are gone.  Then, we
3524fa9e4066Sahrens  * update the pool state and sync all the labels to disk, removing the
3525394ab0cbSGeorge Wilson  * configuration from the cache afterwards. If the 'hardforce' flag is set, then
3526394ab0cbSGeorge Wilson  * we don't sync the labels or remove the configuration cache.
3527fa9e4066Sahrens  */
3528fa9e4066Sahrens static int
352989a89ebfSlling spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
3530394ab0cbSGeorge Wilson     boolean_t force, boolean_t hardforce)
3531fa9e4066Sahrens {
3532fa9e4066Sahrens 	spa_t *spa;
3533fa9e4066Sahrens 
353444cd46caSbillm 	if (oldconfig)
353544cd46caSbillm 		*oldconfig = NULL;
353644cd46caSbillm 
35378ad4d6ddSJeff Bonwick 	if (!(spa_mode_global & FWRITE))
3538fa9e4066Sahrens 		return (EROFS);
3539fa9e4066Sahrens 
3540fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
3541fa9e4066Sahrens 	if ((spa = spa_lookup(pool)) == NULL) {
3542fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
3543fa9e4066Sahrens 		return (ENOENT);
3544fa9e4066Sahrens 	}
3545fa9e4066Sahrens 
3546ea8dc4b6Seschrock 	/*
3547ea8dc4b6Seschrock 	 * Put a hold on the pool, drop the namespace lock, stop async tasks,
3548ea8dc4b6Seschrock 	 * reacquire the namespace lock, and see if we can export.
3549ea8dc4b6Seschrock 	 */
3550ea8dc4b6Seschrock 	spa_open_ref(spa, FTAG);
3551ea8dc4b6Seschrock 	mutex_exit(&spa_namespace_lock);
3552ea8dc4b6Seschrock 	spa_async_suspend(spa);
3553ea8dc4b6Seschrock 	mutex_enter(&spa_namespace_lock);
3554ea8dc4b6Seschrock 	spa_close(spa, FTAG);
3555ea8dc4b6Seschrock 
3556fa9e4066Sahrens 	/*
3557fa9e4066Sahrens 	 * The pool will be in core if it's openable,
3558fa9e4066Sahrens 	 * in which case we can modify its state.
3559fa9e4066Sahrens 	 */
3560fa9e4066Sahrens 	if (spa->spa_state != POOL_STATE_UNINITIALIZED && spa->spa_sync_on) {
3561fa9e4066Sahrens 		/*
3562fa9e4066Sahrens 		 * Objsets may be open only because they're dirty, so we
3563fa9e4066Sahrens 		 * have to force it to sync before checking spa_refcnt.
3564fa9e4066Sahrens 		 */
3565fa9e4066Sahrens 		txg_wait_synced(spa->spa_dsl_pool, 0);
3566fa9e4066Sahrens 
3567ea8dc4b6Seschrock 		/*
3568ea8dc4b6Seschrock 		 * A pool cannot be exported or destroyed if there are active
3569ea8dc4b6Seschrock 		 * references.  If we are resetting a pool, allow references by
3570ea8dc4b6Seschrock 		 * fault injection handlers.
3571ea8dc4b6Seschrock 		 */
3572ea8dc4b6Seschrock 		if (!spa_refcount_zero(spa) ||
3573ea8dc4b6Seschrock 		    (spa->spa_inject_ref != 0 &&
3574ea8dc4b6Seschrock 		    new_state != POOL_STATE_UNINITIALIZED)) {
3575ea8dc4b6Seschrock 			spa_async_resume(spa);
3576fa9e4066Sahrens 			mutex_exit(&spa_namespace_lock);
3577fa9e4066Sahrens 			return (EBUSY);
3578fa9e4066Sahrens 		}
3579fa9e4066Sahrens 
358089a89ebfSlling 		/*
358189a89ebfSlling 		 * A pool cannot be exported if it has an active shared spare.
358289a89ebfSlling 		 * This is to prevent other pools stealing the active spare
358389a89ebfSlling 		 * from an exported pool. At user's own will, such pool can
358489a89ebfSlling 		 * be forcedly exported.
358589a89ebfSlling 		 */
358689a89ebfSlling 		if (!force && new_state == POOL_STATE_EXPORTED &&
358789a89ebfSlling 		    spa_has_active_shared_spare(spa)) {
358889a89ebfSlling 			spa_async_resume(spa);
358989a89ebfSlling 			mutex_exit(&spa_namespace_lock);
359089a89ebfSlling 			return (EXDEV);
359189a89ebfSlling 		}
359289a89ebfSlling 
3593fa9e4066Sahrens 		/*
3594fa9e4066Sahrens 		 * We want this to be reflected on every label,
3595fa9e4066Sahrens 		 * so mark them all dirty.  spa_unload() will do the
3596fa9e4066Sahrens 		 * final sync that pushes these changes out.
3597fa9e4066Sahrens 		 */
3598394ab0cbSGeorge Wilson 		if (new_state != POOL_STATE_UNINITIALIZED && !hardforce) {
3599e14bb325SJeff Bonwick 			spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
3600ea8dc4b6Seschrock 			spa->spa_state = new_state;
36013f9d6ad7SLin Ling 			spa->spa_final_txg = spa_last_synced_txg(spa) +
36023f9d6ad7SLin Ling 			    TXG_DEFER_SIZE + 1;
3603ea8dc4b6Seschrock 			vdev_config_dirty(spa->spa_root_vdev);
3604e14bb325SJeff Bonwick 			spa_config_exit(spa, SCL_ALL, FTAG);
3605ea8dc4b6Seschrock 		}
3606fa9e4066Sahrens 	}
3607fa9e4066Sahrens 
36083d7072f8Seschrock 	spa_event_notify(spa, NULL, ESC_ZFS_POOL_DESTROY);
36093d7072f8Seschrock 
3610fa9e4066Sahrens 	if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
3611fa9e4066Sahrens 		spa_unload(spa);
3612fa9e4066Sahrens 		spa_deactivate(spa);
3613fa9e4066Sahrens 	}
3614fa9e4066Sahrens 
361544cd46caSbillm 	if (oldconfig && spa->spa_config)
361644cd46caSbillm 		VERIFY(nvlist_dup(spa->spa_config, oldconfig, 0) == 0);
361744cd46caSbillm 
3618ea8dc4b6Seschrock 	if (new_state != POOL_STATE_UNINITIALIZED) {
3619394ab0cbSGeorge Wilson 		if (!hardforce)
3620394ab0cbSGeorge Wilson 			spa_config_sync(spa, B_TRUE, B_TRUE);
3621ea8dc4b6Seschrock 		spa_remove(spa);
3622ea8dc4b6Seschrock 	}
3623fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
3624fa9e4066Sahrens 
3625fa9e4066Sahrens 	return (0);
3626fa9e4066Sahrens }
3627fa9e4066Sahrens 
3628fa9e4066Sahrens /*
3629fa9e4066Sahrens  * Destroy a storage pool.
3630fa9e4066Sahrens  */
3631fa9e4066Sahrens int
3632fa9e4066Sahrens spa_destroy(char *pool)
3633fa9e4066Sahrens {
3634394ab0cbSGeorge Wilson 	return (spa_export_common(pool, POOL_STATE_DESTROYED, NULL,
3635394ab0cbSGeorge Wilson 	    B_FALSE, B_FALSE));
3636fa9e4066Sahrens }
3637fa9e4066Sahrens 
3638fa9e4066Sahrens /*
3639fa9e4066Sahrens  * Export a storage pool.
3640fa9e4066Sahrens  */
3641fa9e4066Sahrens int
3642394ab0cbSGeorge Wilson spa_export(char *pool, nvlist_t **oldconfig, boolean_t force,
3643394ab0cbSGeorge Wilson     boolean_t hardforce)
3644fa9e4066Sahrens {
3645394ab0cbSGeorge Wilson 	return (spa_export_common(pool, POOL_STATE_EXPORTED, oldconfig,
3646394ab0cbSGeorge Wilson 	    force, hardforce));
3647fa9e4066Sahrens }
3648fa9e4066Sahrens 
3649ea8dc4b6Seschrock /*
3650ea8dc4b6Seschrock  * Similar to spa_export(), this unloads the spa_t without actually removing it
3651ea8dc4b6Seschrock  * from the namespace in any way.
3652ea8dc4b6Seschrock  */
3653ea8dc4b6Seschrock int
3654ea8dc4b6Seschrock spa_reset(char *pool)
3655ea8dc4b6Seschrock {
365689a89ebfSlling 	return (spa_export_common(pool, POOL_STATE_UNINITIALIZED, NULL,
3657394ab0cbSGeorge Wilson 	    B_FALSE, B_FALSE));
3658ea8dc4b6Seschrock }
3659ea8dc4b6Seschrock 
3660fa9e4066Sahrens /*
3661fa9e4066Sahrens  * ==========================================================================
3662fa9e4066Sahrens  * Device manipulation
3663fa9e4066Sahrens  * ==========================================================================
3664fa9e4066Sahrens  */
3665fa9e4066Sahrens 
3666fa9e4066Sahrens /*
36678654d025Sperrin  * Add a device to a storage pool.
3668fa9e4066Sahrens  */
3669fa9e4066Sahrens int
3670fa9e4066Sahrens spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
3671fa9e4066Sahrens {
367288ecc943SGeorge Wilson 	uint64_t txg, id;
36738ad4d6ddSJeff Bonwick 	int error;
3674fa9e4066Sahrens 	vdev_t *rvd = spa->spa_root_vdev;
36750e34b6a7Sbonwick 	vdev_t *vd, *tvd;
3676fa94a07fSbrendan 	nvlist_t **spares, **l2cache;
3677fa94a07fSbrendan 	uint_t nspares, nl2cache;
3678fa9e4066Sahrens 
3679fa9e4066Sahrens 	txg = spa_vdev_enter(spa);
3680fa9e4066Sahrens 
368199653d4eSeschrock 	if ((error = spa_config_parse(spa, &vd, nvroot, NULL, 0,
368299653d4eSeschrock 	    VDEV_ALLOC_ADD)) != 0)
368399653d4eSeschrock 		return (spa_vdev_exit(spa, NULL, txg, error));
3684fa9e4066Sahrens 
3685e14bb325SJeff Bonwick 	spa->spa_pending_vdev = vd;	/* spa_vdev_exit() will clear this */
368699653d4eSeschrock 
3687fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
3688fa94a07fSbrendan 	    &nspares) != 0)
368999653d4eSeschrock 		nspares = 0;
369099653d4eSeschrock 
3691fa94a07fSbrendan 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE, &l2cache,
3692fa94a07fSbrendan 	    &nl2cache) != 0)
3693fa94a07fSbrendan 		nl2cache = 0;
3694fa94a07fSbrendan 
3695e14bb325SJeff Bonwick 	if (vd->vdev_children == 0 && nspares == 0 && nl2cache == 0)
3696fa9e4066Sahrens 		return (spa_vdev_exit(spa, vd, txg, EINVAL));
3697fa9e4066Sahrens 
3698e14bb325SJeff Bonwick 	if (vd->vdev_children != 0 &&
3699e14bb325SJeff Bonwick 	    (error = vdev_create(vd, txg, B_FALSE)) != 0)
3700e14bb325SJeff Bonwick 		return (spa_vdev_exit(spa, vd, txg, error));
370199653d4eSeschrock 
370239c23413Seschrock 	/*
3703fa94a07fSbrendan 	 * We must validate the spares and l2cache devices after checking the
3704fa94a07fSbrendan 	 * children.  Otherwise, vdev_inuse() will blindly overwrite the spare.
370539c23413Seschrock 	 */
3706e14bb325SJeff Bonwick 	if ((error = spa_validate_aux(spa, nvroot, txg, VDEV_ALLOC_ADD)) != 0)
370739c23413Seschrock 		return (spa_vdev_exit(spa, vd, txg, error));
370839c23413Seschrock 
370939c23413Seschrock 	/*
371039c23413Seschrock 	 * Transfer each new top-level vdev from vd to rvd.
371139c23413Seschrock 	 */
37128ad4d6ddSJeff Bonwick 	for (int c = 0; c < vd->vdev_children; c++) {
371388ecc943SGeorge Wilson 
371488ecc943SGeorge Wilson 		/*
371588ecc943SGeorge Wilson 		 * Set the vdev id to the first hole, if one exists.
371688ecc943SGeorge Wilson 		 */
371788ecc943SGeorge Wilson 		for (id = 0; id < rvd->vdev_children; id++) {
371888ecc943SGeorge Wilson 			if (rvd->vdev_child[id]->vdev_ishole) {
371988ecc943SGeorge Wilson 				vdev_free(rvd->vdev_child[id]);
372088ecc943SGeorge Wilson 				break;
372188ecc943SGeorge Wilson 			}
372288ecc943SGeorge Wilson 		}
372339c23413Seschrock 		tvd = vd->vdev_child[c];
372439c23413Seschrock 		vdev_remove_child(vd, tvd);
372588ecc943SGeorge Wilson 		tvd->vdev_id = id;
372639c23413Seschrock 		vdev_add_child(rvd, tvd);
372739c23413Seschrock 		vdev_config_dirty(tvd);
372839c23413Seschrock 	}
372939c23413Seschrock 
373099653d4eSeschrock 	if (nspares != 0) {
3731fa94a07fSbrendan 		spa_set_aux_vdevs(&spa->spa_spares, spares, nspares,
3732fa94a07fSbrendan 		    ZPOOL_CONFIG_SPARES);
373399653d4eSeschrock 		spa_load_spares(spa);
3734fa94a07fSbrendan 		spa->spa_spares.sav_sync = B_TRUE;
3735fa94a07fSbrendan 	}
3736fa94a07fSbrendan 
3737fa94a07fSbrendan 	if (nl2cache != 0) {
3738fa94a07fSbrendan 		spa_set_aux_vdevs(&spa->spa_l2cache, l2cache, nl2cache,
3739fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE);
3740fa94a07fSbrendan 		spa_load_l2cache(spa);
3741fa94a07fSbrendan 		spa->spa_l2cache.sav_sync = B_TRUE;
3742fa9e4066Sahrens 	}
3743fa9e4066Sahrens 
3744fa9e4066Sahrens 	/*
37450e34b6a7Sbonwick 	 * We have to be careful when adding new vdevs to an existing pool.
37460e34b6a7Sbonwick 	 * If other threads start allocating from these vdevs before we
37470e34b6a7Sbonwick 	 * sync the config cache, and we lose power, then upon reboot we may
37480e34b6a7Sbonwick 	 * fail to open the pool because there are DVAs that the config cache
37490e34b6a7Sbonwick 	 * can't translate.  Therefore, we first add the vdevs without
37500e34b6a7Sbonwick 	 * initializing metaslabs; sync the config cache (via spa_vdev_exit());
37510373e76bSbonwick 	 * and then let spa_config_update() initialize the new metaslabs.
37520e34b6a7Sbonwick 	 *
37530e34b6a7Sbonwick 	 * spa_load() checks for added-but-not-initialized vdevs, so that
37540e34b6a7Sbonwick 	 * if we lose power at any point in this sequence, the remaining
37550e34b6a7Sbonwick 	 * steps will be completed the next time we load the pool.
37560e34b6a7Sbonwick 	 */
37570373e76bSbonwick 	(void) spa_vdev_exit(spa, vd, txg, 0);
37580e34b6a7Sbonwick 
37590373e76bSbonwick 	mutex_enter(&spa_namespace_lock);
37600373e76bSbonwick 	spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
37610373e76bSbonwick 	mutex_exit(&spa_namespace_lock);
3762fa9e4066Sahrens 
37630373e76bSbonwick 	return (0);
3764fa9e4066Sahrens }
3765fa9e4066Sahrens 
3766fa9e4066Sahrens /*
3767fa9e4066Sahrens  * Attach a device to a mirror.  The arguments are the path to any device
3768fa9e4066Sahrens  * in the mirror, and the nvroot for the new device.  If the path specifies
3769fa9e4066Sahrens  * a device that is not mirrored, we automatically insert the mirror vdev.
3770fa9e4066Sahrens  *
3771fa9e4066Sahrens  * If 'replacing' is specified, the new device is intended to replace the
3772fa9e4066Sahrens  * existing device; in this case the two devices are made into their own
37733d7072f8Seschrock  * mirror using the 'replacing' vdev, which is functionally identical to
3774fa9e4066Sahrens  * the mirror vdev (it actually reuses all the same ops) but has a few
3775fa9e4066Sahrens  * extra rules: you can't attach to it after it's been created, and upon
3776fa9e4066Sahrens  * completion of resilvering, the first disk (the one being replaced)
3777fa9e4066Sahrens  * is automatically detached.
3778fa9e4066Sahrens  */
3779fa9e4066Sahrens int
3780ea8dc4b6Seschrock spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
3781fa9e4066Sahrens {
37823f9d6ad7SLin Ling 	uint64_t txg, dtl_max_txg;
3783fa9e4066Sahrens 	vdev_t *rvd = spa->spa_root_vdev;
3784fa9e4066Sahrens 	vdev_t *oldvd, *newvd, *newrootvd, *pvd, *tvd;
378599653d4eSeschrock 	vdev_ops_t *pvops;
37869b3f6b42SEric Kustarz 	char *oldvdpath, *newvdpath;
37879b3f6b42SEric Kustarz 	int newvd_isspare;
37889b3f6b42SEric Kustarz 	int error;
3789fa9e4066Sahrens 
3790fa9e4066Sahrens 	txg = spa_vdev_enter(spa);
3791fa9e4066Sahrens 
3792c5904d13Seschrock 	oldvd = spa_lookup_by_guid(spa, guid, B_FALSE);
3793fa9e4066Sahrens 
3794fa9e4066Sahrens 	if (oldvd == NULL)
3795fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
3796fa9e4066Sahrens 
37970e34b6a7Sbonwick 	if (!oldvd->vdev_ops->vdev_op_leaf)
37980e34b6a7Sbonwick 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
37990e34b6a7Sbonwick 
3800fa9e4066Sahrens 	pvd = oldvd->vdev_parent;
3801fa9e4066Sahrens 
380299653d4eSeschrock 	if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
38033d7072f8Seschrock 	    VDEV_ALLOC_ADD)) != 0)
38043d7072f8Seschrock 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
38053d7072f8Seschrock 
38063d7072f8Seschrock 	if (newrootvd->vdev_children != 1)
3807fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
3808fa9e4066Sahrens 
3809fa9e4066Sahrens 	newvd = newrootvd->vdev_child[0];
3810fa9e4066Sahrens 
3811fa9e4066Sahrens 	if (!newvd->vdev_ops->vdev_op_leaf)
3812fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EINVAL));
3813fa9e4066Sahrens 
381499653d4eSeschrock 	if ((error = vdev_create(newrootvd, txg, replacing)) != 0)
3815fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, error));
3816fa9e4066Sahrens 
38178654d025Sperrin 	/*
38188654d025Sperrin 	 * Spares can't replace logs
38198654d025Sperrin 	 */
3820ee0eb9f2SEric Schrock 	if (oldvd->vdev_top->vdev_islog && newvd->vdev_isspare)
38218654d025Sperrin 		return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
38228654d025Sperrin 
382399653d4eSeschrock 	if (!replacing) {
382499653d4eSeschrock 		/*
382599653d4eSeschrock 		 * For attach, the only allowable parent is a mirror or the root
382699653d4eSeschrock 		 * vdev.
382799653d4eSeschrock 		 */
382899653d4eSeschrock 		if (pvd->vdev_ops != &vdev_mirror_ops &&
382999653d4eSeschrock 		    pvd->vdev_ops != &vdev_root_ops)
383099653d4eSeschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
383199653d4eSeschrock 
383299653d4eSeschrock 		pvops = &vdev_mirror_ops;
383399653d4eSeschrock 	} else {
383499653d4eSeschrock 		/*
383599653d4eSeschrock 		 * Active hot spares can only be replaced by inactive hot
383699653d4eSeschrock 		 * spares.
383799653d4eSeschrock 		 */
383899653d4eSeschrock 		if (pvd->vdev_ops == &vdev_spare_ops &&
3839*cb04b873SMark J Musante 		    oldvd->vdev_isspare &&
384099653d4eSeschrock 		    !spa_has_spare(spa, newvd->vdev_guid))
384199653d4eSeschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
384299653d4eSeschrock 
384399653d4eSeschrock 		/*
384499653d4eSeschrock 		 * If the source is a hot spare, and the parent isn't already a
384599653d4eSeschrock 		 * spare, then we want to create a new hot spare.  Otherwise, we
384639c23413Seschrock 		 * want to create a replacing vdev.  The user is not allowed to
384739c23413Seschrock 		 * attach to a spared vdev child unless the 'isspare' state is
384839c23413Seschrock 		 * the same (spare replaces spare, non-spare replaces
384939c23413Seschrock 		 * non-spare).
385099653d4eSeschrock 		 */
3851*cb04b873SMark J Musante 		if (pvd->vdev_ops == &vdev_replacing_ops &&
3852*cb04b873SMark J Musante 		    spa_version(spa) < SPA_VERSION_MULTI_REPLACE) {
385399653d4eSeschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
3854*cb04b873SMark J Musante 		} else if (pvd->vdev_ops == &vdev_spare_ops &&
3855*cb04b873SMark J Musante 		    newvd->vdev_isspare != oldvd->vdev_isspare) {
385639c23413Seschrock 			return (spa_vdev_exit(spa, newrootvd, txg, ENOTSUP));
3857*cb04b873SMark J Musante 		}
3858*cb04b873SMark J Musante 
3859*cb04b873SMark J Musante 		if (newvd->vdev_isspare)
386099653d4eSeschrock 			pvops = &vdev_spare_ops;
386199653d4eSeschrock 		else
386299653d4eSeschrock 			pvops = &vdev_replacing_ops;
386399653d4eSeschrock 	}
386499653d4eSeschrock 
38652a79c5feSlling 	/*
3866573ca77eSGeorge Wilson 	 * Make sure the new device is big enough.
38672a79c5feSlling 	 */
3868573ca77eSGeorge Wilson 	if (newvd->vdev_asize < vdev_get_min_asize(oldvd))
3869fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
3870fa9e4066Sahrens 
3871ecc2d604Sbonwick 	/*
3872ecc2d604Sbonwick 	 * The new device cannot have a higher alignment requirement
3873ecc2d604Sbonwick 	 * than the top-level vdev.
3874ecc2d604Sbonwick 	 */
3875ecc2d604Sbonwick 	if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
3876fa9e4066Sahrens 		return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
3877fa9e4066Sahrens 
3878fa9e4066Sahrens 	/*
3879fa9e4066Sahrens 	 * If this is an in-place replacement, update oldvd's path and devid
3880fa9e4066Sahrens 	 * to make it distinguishable from newvd, and unopenable from now on.
3881fa9e4066Sahrens 	 */
3882fa9e4066Sahrens 	if (strcmp(oldvd->vdev_path, newvd->vdev_path) == 0) {
3883fa9e4066Sahrens 		spa_strfree(oldvd->vdev_path);
3884fa9e4066Sahrens 		oldvd->vdev_path = kmem_alloc(strlen(newvd->vdev_path) + 5,
3885fa9e4066Sahrens 		    KM_SLEEP);
3886fa9e4066Sahrens 		(void) sprintf(oldvd->vdev_path, "%s/%s",
3887fa9e4066Sahrens 		    newvd->vdev_path, "old");
3888fa9e4066Sahrens 		if (oldvd->vdev_devid != NULL) {
3889fa9e4066Sahrens 			spa_strfree(oldvd->vdev_devid);
3890fa9e4066Sahrens 			oldvd->vdev_devid = NULL;
3891fa9e4066Sahrens 		}
3892fa9e4066Sahrens 	}
3893fa9e4066Sahrens 
3894*cb04b873SMark J Musante 	/* mark the device being resilvered */
3895*cb04b873SMark J Musante 	newvd->vdev_resilvering = B_TRUE;
3896*cb04b873SMark J Musante 
3897fa9e4066Sahrens 	/*
389899653d4eSeschrock 	 * If the parent is not a mirror, or if we're replacing, insert the new
389999653d4eSeschrock 	 * mirror/replacing/spare vdev above oldvd.
3900fa9e4066Sahrens 	 */
3901fa9e4066Sahrens 	if (pvd->vdev_ops != pvops)
3902fa9e4066Sahrens 		pvd = vdev_add_parent(oldvd, pvops);
3903fa9e4066Sahrens 
3904fa9e4066Sahrens 	ASSERT(pvd->vdev_top->vdev_parent == rvd);
3905fa9e4066Sahrens 	ASSERT(pvd->vdev_ops == pvops);
3906fa9e4066Sahrens 	ASSERT(oldvd->vdev_parent == pvd);
3907fa9e4066Sahrens 
3908fa9e4066Sahrens 	/*
3909fa9e4066Sahrens 	 * Extract the new device from its root and add it to pvd.
3910fa9e4066Sahrens 	 */
3911fa9e4066Sahrens 	vdev_remove_child(newrootvd, newvd);
3912fa9e4066Sahrens 	newvd->vdev_id = pvd->vdev_children;
391388ecc943SGeorge Wilson 	newvd->vdev_crtxg = oldvd->vdev_crtxg;
3914fa9e4066Sahrens 	vdev_add_child(pvd, newvd);
3915fa9e4066Sahrens 
3916fa9e4066Sahrens 	tvd = newvd->vdev_top;
3917fa9e4066Sahrens 	ASSERT(pvd->vdev_top == tvd);
3918fa9e4066Sahrens 	ASSERT(tvd->vdev_parent == rvd);
3919fa9e4066Sahrens 
3920fa9e4066Sahrens 	vdev_config_dirty(tvd);
3921fa9e4066Sahrens 
3922fa9e4066Sahrens 	/*
39233f9d6ad7SLin Ling 	 * Set newvd's DTL to [TXG_INITIAL, dtl_max_txg) so that we account
39243f9d6ad7SLin Ling 	 * for any dmu_sync-ed blocks.  It will propagate upward when
39253f9d6ad7SLin Ling 	 * spa_vdev_exit() calls vdev_dtl_reassess().
3926fa9e4066Sahrens 	 */
39273f9d6ad7SLin Ling 	dtl_max_txg = txg + TXG_CONCURRENT_STATES;
3928fa9e4066Sahrens 
39293f9d6ad7SLin Ling 	vdev_dtl_dirty(newvd, DTL_MISSING, TXG_INITIAL,
39303f9d6ad7SLin Ling 	    dtl_max_txg - TXG_INITIAL);
3931fa9e4066Sahrens 
39326809eb4eSEric Schrock 	if (newvd->vdev_isspare) {
393339c23413Seschrock 		spa_spare_activate(newvd);
39346809eb4eSEric Schrock 		spa_event_notify(spa, newvd, ESC_ZFS_VDEV_SPARE);
39356809eb4eSEric Schrock 	}
39366809eb4eSEric Schrock 
3937e14bb325SJeff Bonwick 	oldvdpath = spa_strdup(oldvd->vdev_path);
3938e14bb325SJeff Bonwick 	newvdpath = spa_strdup(newvd->vdev_path);
39399b3f6b42SEric Kustarz 	newvd_isspare = newvd->vdev_isspare;
3940ea8dc4b6Seschrock 
3941fa9e4066Sahrens 	/*
3942fa9e4066Sahrens 	 * Mark newvd's DTL dirty in this txg.
3943fa9e4066Sahrens 	 */
3944ecc2d604Sbonwick 	vdev_dirty(tvd, VDD_DTL, newvd, txg);
3945fa9e4066Sahrens 
39463f9d6ad7SLin Ling 	/*
39473f9d6ad7SLin Ling 	 * Restart the resilver
39483f9d6ad7SLin Ling 	 */
39493f9d6ad7SLin Ling 	dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
39503f9d6ad7SLin Ling 
39513f9d6ad7SLin Ling 	/*
39523f9d6ad7SLin Ling 	 * Commit the config
39533f9d6ad7SLin Ling 	 */
39543f9d6ad7SLin Ling 	(void) spa_vdev_exit(spa, newrootvd, dtl_max_txg, 0);
3955fa9e4066Sahrens 
39563f9d6ad7SLin Ling 	spa_history_log_internal(LOG_POOL_VDEV_ATTACH, spa, NULL,
39573f9d6ad7SLin Ling 	    "%s vdev=%s %s vdev=%s",
3958c8e1f6d2SMark J Musante 	    replacing && newvd_isspare ? "spare in" :
3959c8e1f6d2SMark J Musante 	    replacing ? "replace" : "attach", newvdpath,
3960c8e1f6d2SMark J Musante 	    replacing ? "for" : "to", oldvdpath);
39619b3f6b42SEric Kustarz 
39629b3f6b42SEric Kustarz 	spa_strfree(oldvdpath);
39639b3f6b42SEric Kustarz 	spa_strfree(newvdpath);
39649b3f6b42SEric Kustarz 
3965943e9869SLori Alt 	if (spa->spa_bootfs)
3966943e9869SLori Alt 		spa_event_notify(spa, newvd, ESC_ZFS_BOOTFS_VDEV_ATTACH);
3967943e9869SLori Alt 
3968fa9e4066Sahrens 	return (0);
3969fa9e4066Sahrens }
3970fa9e4066Sahrens 
3971fa9e4066Sahrens /*
3972fa9e4066Sahrens  * Detach a device from a mirror or replacing vdev.
3973fa9e4066Sahrens  * If 'replace_done' is specified, only detach if the parent
3974fa9e4066Sahrens  * is a replacing vdev.
3975fa9e4066Sahrens  */
3976fa9e4066Sahrens int
39778ad4d6ddSJeff Bonwick spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
3978fa9e4066Sahrens {
3979fa9e4066Sahrens 	uint64_t txg;
39808ad4d6ddSJeff Bonwick 	int error;
3981fa9e4066Sahrens 	vdev_t *rvd = spa->spa_root_vdev;
3982fa9e4066Sahrens 	vdev_t *vd, *pvd, *cvd, *tvd;
398399653d4eSeschrock 	boolean_t unspare = B_FALSE;
398499653d4eSeschrock 	uint64_t unspare_guid;
39851195e687SMark J Musante 	char *vdpath;
3986fa9e4066Sahrens 
3987fa9e4066Sahrens 	txg = spa_vdev_enter(spa);
3988fa9e4066Sahrens 
3989c5904d13Seschrock 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
3990fa9e4066Sahrens 
3991fa9e4066Sahrens 	if (vd == NULL)
3992fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, ENODEV));
3993fa9e4066Sahrens 
39940e34b6a7Sbonwick 	if (!vd->vdev_ops->vdev_op_leaf)
39950e34b6a7Sbonwick 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
39960e34b6a7Sbonwick 
3997fa9e4066Sahrens 	pvd = vd->vdev_parent;
3998fa9e4066Sahrens 
39998ad4d6ddSJeff Bonwick 	/*
40008ad4d6ddSJeff Bonwick 	 * If the parent/child relationship is not as expected, don't do it.
40018ad4d6ddSJeff Bonwick 	 * Consider M(A,R(B,C)) -- that is, a mirror of A with a replacing
40028ad4d6ddSJeff Bonwick 	 * vdev that's replacing B with C.  The user's intent in replacing
40038ad4d6ddSJeff Bonwick 	 * is to go from M(A,B) to M(A,C).  If the user decides to cancel
40048ad4d6ddSJeff Bonwick 	 * the replace by detaching C, the expected behavior is to end up
40058ad4d6ddSJeff Bonwick 	 * M(A,B).  But suppose that right after deciding to detach C,
40068ad4d6ddSJeff Bonwick 	 * the replacement of B completes.  We would have M(A,C), and then
40078ad4d6ddSJeff Bonwick 	 * ask to detach C, which would leave us with just A -- not what
40088ad4d6ddSJeff Bonwick 	 * the user wanted.  To prevent this, we make sure that the
40098ad4d6ddSJeff Bonwick 	 * parent/child relationship hasn't changed -- in this example,
40108ad4d6ddSJeff Bonwick 	 * that C's parent is still the replacing vdev R.
40118ad4d6ddSJeff Bonwick 	 */
40128ad4d6ddSJeff Bonwick 	if (pvd->vdev_guid != pguid && pguid != 0)
40138ad4d6ddSJeff Bonwick 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
40148ad4d6ddSJeff Bonwick 
4015fa9e4066Sahrens 	/*
4016*cb04b873SMark J Musante 	 * Only 'replacing' or 'spare' vdevs can be replaced.
401799653d4eSeschrock 	 */
4018*cb04b873SMark J Musante 	if (replace_done && pvd->vdev_ops != &vdev_replacing_ops &&
4019*cb04b873SMark J Musante 	    pvd->vdev_ops != &vdev_spare_ops)
4020*cb04b873SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
402199653d4eSeschrock 
402299653d4eSeschrock 	ASSERT(pvd->vdev_ops != &vdev_spare_ops ||
4023e7437265Sahrens 	    spa_version(spa) >= SPA_VERSION_SPARES);
4024fa9e4066Sahrens 
4025fa9e4066Sahrens 	/*
402699653d4eSeschrock 	 * Only mirror, replacing, and spare vdevs support detach.
4027fa9e4066Sahrens 	 */
4028fa9e4066Sahrens 	if (pvd->vdev_ops != &vdev_replacing_ops &&
402999653d4eSeschrock 	    pvd->vdev_ops != &vdev_mirror_ops &&
403099653d4eSeschrock 	    pvd->vdev_ops != &vdev_spare_ops)
4031fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, ENOTSUP));
4032fa9e4066Sahrens 
4033fa9e4066Sahrens 	/*
40348ad4d6ddSJeff Bonwick 	 * If this device has the only valid copy of some data,
40358ad4d6ddSJeff Bonwick 	 * we cannot safely detach it.
4036fa9e4066Sahrens 	 */
40378ad4d6ddSJeff Bonwick 	if (vdev_dtl_required(vd))
4038fa9e4066Sahrens 		return (spa_vdev_exit(spa, NULL, txg, EBUSY));
4039fa9e4066Sahrens 
40408ad4d6ddSJeff Bonwick 	ASSERT(pvd->vdev_children >= 2);
4041fa9e4066Sahrens 
4042bf82a41bSeschrock 	/*
4043bf82a41bSeschrock 	 * If we are detaching the second disk from a replacing vdev, then
4044bf82a41bSeschrock 	 * check to see if we changed the original vdev's path to have "/old"
4045bf82a41bSeschrock 	 * at the end in spa_vdev_attach().  If so, undo that change now.
4046bf82a41bSeschrock 	 */
4047*cb04b873SMark J Musante 	if (pvd->vdev_ops == &vdev_replacing_ops && vd->vdev_id > 0 &&
4048*cb04b873SMark J Musante 	    vd->vdev_path != NULL) {
4049*cb04b873SMark J Musante 		size_t len = strlen(vd->vdev_path);
4050*cb04b873SMark J Musante 
4051*cb04b873SMark J Musante 		for (int c = 0; c < pvd->vdev_children; c++) {
4052*cb04b873SMark J Musante 			cvd = pvd->vdev_child[c];
4053*cb04b873SMark J Musante 
4054*cb04b873SMark J Musante 			if (cvd == vd || cvd->vdev_path == NULL)
4055*cb04b873SMark J Musante 				continue;
4056*cb04b873SMark J Musante 
4057*cb04b873SMark J Musante 			if (strncmp(cvd->vdev_path, vd->vdev_path, len) == 0 &&
4058*cb04b873SMark J Musante 			    strcmp(cvd->vdev_path + len, "/old") == 0) {
4059*cb04b873SMark J Musante 				spa_strfree(cvd->vdev_path);
4060*cb04b873SMark J Musante 				cvd->vdev_path = spa_strdup(vd->vdev_path);
4061*cb04b873SMark J Musante 				break;
4062*cb04b873SMark J Musante 			}
4063bf82a41bSeschrock 		}
4064bf82a41bSeschrock 	}
4065bf82a41bSeschrock 
406699653d4eSeschrock 	/*
406799653d4eSeschrock 	 * If we are detaching the original disk from a spare, then it implies
406899653d4eSeschrock 	 * that the spare should become a real disk, and be removed from the
406999653d4eSeschrock 	 * active spare list for the pool.
407099653d4eSeschrock 	 */
407199653d4eSeschrock 	if (pvd->vdev_ops == &vdev_spare_ops &&
4072*cb04b873SMark J Musante 	    vd->vdev_id == 0 &&
4073*cb04b873SMark J Musante 	    pvd->vdev_child[pvd->vdev_children - 1]->vdev_isspare)
407499653d4eSeschrock 		unspare = B_TRUE;
407599653d4eSeschrock 
4076fa9e4066Sahrens 	/*
4077fa9e4066Sahrens 	 * Erase the disk labels so the disk can be used for other things.
4078fa9e4066Sahrens 	 * This must be done after all other error cases are handled,
4079fa9e4066Sahrens 	 * but before we disembowel vd (so we can still do I/O to it).
4080fa9e4066Sahrens 	 * But if we can't do it, don't treat the error as fatal --
4081fa9e4066Sahrens 	 * it may be that the unwritability of the disk is the reason
4082fa9e4066Sahrens 	 * it's being detached!
4083fa9e4066Sahrens 	 */
408439c23413Seschrock 	error = vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
4085fa9e4066Sahrens 
4086fa9e4066Sahrens 	/*
4087fa9e4066Sahrens 	 * Remove vd from its parent and compact the parent's children.
4088fa9e4066Sahrens 	 */
4089fa9e4066Sahrens 	vdev_remove_child(pvd, vd);
4090fa9e4066Sahrens 	vdev_compact_children(pvd);
4091fa9e4066Sahrens 
4092fa9e4066Sahrens 	/*
4093fa9e4066Sahrens 	 * Remember one of the remaining children so we can get tvd below.
4094fa9e4066Sahrens 	 */
4095*cb04b873SMark J Musante 	cvd = pvd->vdev_child[pvd->vdev_children - 1];
4096fa9e4066Sahrens 
409799653d4eSeschrock 	/*
409899653d4eSeschrock 	 * If we need to remove the remaining child from the list of hot spares,
40998ad4d6ddSJeff Bonwick 	 * do it now, marking the vdev as no longer a spare in the process.
41008ad4d6ddSJeff Bonwick 	 * We must do this before vdev_remove_parent(), because that can
41018ad4d6ddSJeff Bonwick 	 * change the GUID if it creates a new toplevel GUID.  For a similar
41028ad4d6ddSJeff Bonwick 	 * reason, we must remove the spare now, in the same txg as the detach;
41038ad4d6ddSJeff Bonwick 	 * otherwise someone could attach a new sibling, change the GUID, and
41048ad4d6ddSJeff Bonwick 	 * the subsequent attempt to spa_vdev_remove(unspare_guid) would fail.
410599653d4eSeschrock 	 */
410699653d4eSeschrock 	if (unspare) {
410799653d4eSeschrock 		ASSERT(cvd->vdev_isspare);
410839c23413Seschrock 		spa_spare_remove(cvd);
410999653d4eSeschrock 		unspare_guid = cvd->vdev_guid;
41108ad4d6ddSJeff Bonwick 		(void) spa_vdev_remove(spa, unspare_guid, B_TRUE);
4111*cb04b873SMark J Musante 		cvd->vdev_unspare = B_TRUE;
411299653d4eSeschrock 	}
411399653d4eSeschrock 
4114fa9e4066Sahrens 	/*
4115fa9e4066Sahrens 	 * If the parent mirror/replacing vdev only has one child,
4116fa9e4066Sahrens 	 * the parent is no longer needed.  Remove it from the tree.
4117fa9e4066Sahrens 	 */
4118*cb04b873SMark J Musante 	if (pvd->vdev_children == 1) {
4119*cb04b873SMark J Musante 		if (pvd->vdev_ops == &vdev_spare_ops)
4120*cb04b873SMark J Musante 			cvd->vdev_unspare = B_FALSE;
4121fa9e4066Sahrens 		vdev_remove_parent(cvd);
4122*cb04b873SMark J Musante 		cvd->vdev_resilvering = B_FALSE;
4123*cb04b873SMark J Musante 	}
4124*cb04b873SMark J Musante 
4125fa9e4066Sahrens 
4126fa9e4066Sahrens 	/*
4127fa9e4066Sahrens 	 * We don't set tvd until now because the parent we just removed
4128fa9e4066Sahrens 	 * may have been the previous top-level vdev.
4129fa9e4066Sahrens 	 */
4130fa9e4066Sahrens 	tvd = cvd->vdev_top;
4131fa9e4066Sahrens 	ASSERT(tvd->vdev_parent == rvd);
4132fa9e4066Sahrens 
4133fa9e4066Sahrens 	/*
413439c23413Seschrock 	 * Reevaluate the parent vdev state.
4135fa9e4066Sahrens 	 */
41363d7072f8Seschrock 	vdev_propagate_state(cvd);
4137fa9e4066Sahrens 
4138fa9e4066Sahrens 	/*
4139573ca77eSGeorge Wilson 	 * If the 'autoexpand' property is set on the pool then automatically
4140573ca77eSGeorge Wilson 	 * try to expand the size of the pool. For example if the device we
4141573ca77eSGeorge Wilson 	 * just detached was smaller than the others, it may be possible to
4142573ca77eSGeorge Wilson 	 * add metaslabs (i.e. grow the pool). We need to reopen the vdev
4143573ca77eSGeorge Wilson 	 * first so that we can obtain the updated sizes of the leaf vdevs.
4144fa9e4066Sahrens 	 */
4145573ca77eSGeorge Wilson 	if (spa->spa_autoexpand) {
4146573ca77eSGeorge Wilson 		vdev_reopen(tvd);
4147573ca77eSGeorge Wilson 		vdev_expand(tvd, txg);
4148573ca77eSGeorge Wilson 	}
4149fa9e4066Sahrens 
4150fa9e4066Sahrens 	vdev_config_dirty(tvd);
4151fa9e4066Sahrens 
4152fa9e4066Sahrens 	/*
415339c23413Seschrock 	 * Mark vd's DTL as dirty in this txg.  vdev_dtl_sync() will see that
415439c23413Seschrock 	 * vd->vdev_detached is set and free vd's DTL object in syncing context.
415539c23413Seschrock 	 * But first make sure we're not on any *other* txg's DTL list, to
415639c23413Seschrock 	 * prevent vd from being accessed after it's freed.
4157fa9e4066Sahrens 	 */
41581195e687SMark J Musante 	vdpath = spa_strdup(vd->vdev_path);
41598ad4d6ddSJeff Bonwick 	for (int t = 0; t < TXG_SIZE; t++)
4160fa9e4066Sahrens 		(void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
4161ecc2d604Sbonwick 	vd->vdev_detached = B_TRUE;
4162ecc2d604Sbonwick 	vdev_dirty(tvd, VDD_DTL, vd, txg);
4163fa9e4066Sahrens 
41643d7072f8Seschrock 	spa_event_notify(spa, vd, ESC_ZFS_VDEV_REMOVE);
41653d7072f8Seschrock 
4166*cb04b873SMark J Musante 	/* hang on to the spa before we release the lock */
4167*cb04b873SMark J Musante 	spa_open_ref(spa, FTAG);
4168*cb04b873SMark J Musante 
416999653d4eSeschrock 	error = spa_vdev_exit(spa, vd, txg, 0);
417099653d4eSeschrock 
41713f9d6ad7SLin Ling 	spa_history_log_internal(LOG_POOL_VDEV_DETACH, spa, NULL,
41721195e687SMark J Musante 	    "vdev=%s", vdpath);
41731195e687SMark J Musante 	spa_strfree(vdpath);
41741195e687SMark J Musante 
417599653d4eSeschrock 	/*
417639c23413Seschrock 	 * If this was the removal of the original device in a hot spare vdev,
417739c23413Seschrock 	 * then we want to go through and remove the device from the hot spare
417839c23413Seschrock 	 * list of every other pool.
417999653d4eSeschrock 	 */
418099653d4eSeschrock 	if (unspare) {
4181*cb04b873SMark J Musante 		spa_t *altspa = NULL;
4182*cb04b873SMark J Musante 
418399653d4eSeschrock 		mutex_enter(&spa_namespace_lock);
4184*cb04b873SMark J Musante 		while ((altspa = spa_next(altspa)) != NULL) {
4185*cb04b873SMark J Musante 			if (altspa->spa_state != POOL_STATE_ACTIVE ||
4186*cb04b873SMark J Musante 			    altspa == spa)
418799653d4eSeschrock 				continue;
4188*cb04b873SMark J Musante 
4189*cb04b873SMark J Musante 			spa_open_ref(altspa, FTAG);
41909af0a4dfSJeff Bonwick 			mutex_exit(&spa_namespace_lock);
4191*cb04b873SMark J Musante 			(void) spa_vdev_remove(altspa, unspare_guid, B_TRUE);
41929af0a4dfSJeff Bonwick 			mutex_enter(&spa_namespace_lock);
4193*cb04b873SMark J Musante 			spa_close(altspa, FTAG);
419499653d4eSeschrock 		}
419599653d4eSeschrock 		mutex_exit(&spa_namespace_lock);
4196*cb04b873SMark J Musante 
4197*cb04b873SMark J Musante 		/* search the rest of the vdevs for spares to remove */
4198*cb04b873SMark J Musante 		spa_vdev_resilver_done(spa);
419999653d4eSeschrock 	}
420099653d4eSeschrock 
4201*cb04b873SMark J Musante 	/* all done with the spa; OK to release */
4202*cb04b873SMark J Musante 	mutex_enter(&spa_namespace_lock);
4203*cb04b873SMark J Musante 	spa_close(spa, FTAG);
4204*cb04b873SMark J Musante 	mutex_exit(&spa_namespace_lock);
4205*cb04b873SMark J Musante 
420699653d4eSeschrock 	return (error);
420799653d4eSeschrock }
420899653d4eSeschrock 
42091195e687SMark J Musante /*
42101195e687SMark J Musante  * Split a set of devices from their mirrors, and create a new pool from them.
42111195e687SMark J Musante  */
42121195e687SMark J Musante int
42131195e687SMark J Musante spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
42141195e687SMark J Musante     nvlist_t *props, boolean_t exp)
42151195e687SMark J Musante {
42161195e687SMark J Musante 	int error = 0;
42171195e687SMark J Musante 	uint64_t txg, *glist;
42181195e687SMark J Musante 	spa_t *newspa;
42191195e687SMark J Musante 	uint_t c, children, lastlog;
42201195e687SMark J Musante 	nvlist_t **child, *nvl, *tmp;
42211195e687SMark J Musante 	dmu_tx_t *tx;
42221195e687SMark J Musante 	char *altroot = NULL;
42231195e687SMark J Musante 	vdev_t *rvd, **vml = NULL;			/* vdev modify list */
42241195e687SMark J Musante 	boolean_t activate_slog;
42251195e687SMark J Musante 
42261195e687SMark J Musante 	if (!spa_writeable(spa))
42271195e687SMark J Musante 		return (EROFS);
42281195e687SMark J Musante 
42291195e687SMark J Musante 	txg = spa_vdev_enter(spa);
42301195e687SMark J Musante 
42311195e687SMark J Musante 	/* clear the log and flush everything up to now */
42321195e687SMark J Musante 	activate_slog = spa_passivate_log(spa);
42331195e687SMark J Musante 	(void) spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
42341195e687SMark J Musante 	error = spa_offline_log(spa);
42351195e687SMark J Musante 	txg = spa_vdev_config_enter(spa);
42361195e687SMark J Musante 
42371195e687SMark J Musante 	if (activate_slog)
42381195e687SMark J Musante 		spa_activate_log(spa);
42391195e687SMark J Musante 
42401195e687SMark J Musante 	if (error != 0)
42411195e687SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, error));
42421195e687SMark J Musante 
42431195e687SMark J Musante 	/* check new spa name before going any further */
42441195e687SMark J Musante 	if (spa_lookup(newname) != NULL)
42451195e687SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, EEXIST));
42461195e687SMark J Musante 
42471195e687SMark J Musante 	/*
42481195e687SMark J Musante 	 * scan through all the children to ensure they're all mirrors
42491195e687SMark J Musante 	 */
42501195e687SMark J Musante 	if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvl) != 0 ||
42511195e687SMark J Musante 	    nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN, &child,
42521195e687SMark J Musante 	    &children) != 0)
42531195e687SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
42541195e687SMark J Musante 
42551195e687SMark J Musante 	/* first, check to ensure we've got the right child count */
42561195e687SMark J Musante 	rvd = spa->spa_root_vdev;
42571195e687SMark J Musante 	lastlog = 0;
42581195e687SMark J Musante 	for (c = 0; c < rvd->vdev_children; c++) {
42591195e687SMark J Musante 		vdev_t *vd = rvd->vdev_child[c];
42601195e687SMark J Musante 
42611195e687SMark J Musante 		/* don't count the holes & logs as children */
42621195e687SMark J Musante 		if (vd->vdev_islog || vd->vdev_ishole) {
42631195e687SMark J Musante 			if (lastlog == 0)
42641195e687SMark J Musante 				lastlog = c;
42651195e687SMark J Musante 			continue;
42661195e687SMark J Musante 		}
42671195e687SMark J Musante 
42681195e687SMark J Musante 		lastlog = 0;
42691195e687SMark J Musante 	}
42701195e687SMark J Musante 	if (children != (lastlog != 0 ? lastlog : rvd->vdev_children))
42711195e687SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
42721195e687SMark J Musante 
42731195e687SMark J Musante 	/* next, ensure no spare or cache devices are part of the split */
42741195e687SMark J Musante 	if (nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_SPARES, &tmp) == 0 ||
42751195e687SMark J Musante 	    nvlist_lookup_nvlist(nvl, ZPOOL_CONFIG_L2CACHE, &tmp) == 0)
42761195e687SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, EINVAL));
42771195e687SMark J Musante 
42781195e687SMark J Musante 	vml = kmem_zalloc(children * sizeof (vdev_t *), KM_SLEEP);
42791195e687SMark J Musante 	glist = kmem_zalloc(children * sizeof (uint64_t), KM_SLEEP);
42801195e687SMark J Musante 
42811195e687SMark J Musante 	/* then, loop over each vdev and validate it */
42821195e687SMark J Musante 	for (c = 0; c < children; c++) {
42831195e687SMark J Musante 		uint64_t is_hole = 0;
42841195e687SMark J Musante 
42851195e687SMark J Musante 		(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_HOLE,
42861195e687SMark J Musante 		    &is_hole);
42871195e687SMark J Musante 
42881195e687SMark J Musante 		if (is_hole != 0) {
42891195e687SMark J Musante 			if (spa->spa_root_vdev->vdev_child[c]->vdev_ishole ||
42901195e687SMark J Musante 			    spa->spa_root_vdev->vdev_child[c]->vdev_islog) {
42911195e687SMark J Musante 				continue;
42921195e687SMark J Musante 			} else {
42931195e687SMark J Musante 				error = EINVAL;
42941195e687SMark J Musante 				break;
42951195e687SMark J Musante 			}
42961195e687SMark J Musante 		}
42971195e687SMark J Musante 
42981195e687SMark J Musante 		/* which disk is going to be split? */
42991195e687SMark J Musante 		if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_GUID,
43001195e687SMark J Musante 		    &glist[c]) != 0) {
43011195e687SMark J Musante 			error = EINVAL;
43021195e687SMark J Musante 			break;
43031195e687SMark J Musante 		}
43041195e687SMark J Musante 
43051195e687SMark J Musante 		/* look it up in the spa */
43061195e687SMark J Musante 		vml[c] = spa_lookup_by_guid(spa, glist[c], B_FALSE);
43071195e687SMark J Musante 		if (vml[c] == NULL) {
43081195e687SMark J Musante 			error = ENODEV;
43091195e687SMark J Musante 			break;
43101195e687SMark J Musante 		}
43111195e687SMark J Musante 
43121195e687SMark J Musante 		/* make sure there's nothing stopping the split */
43131195e687SMark J Musante 		if (vml[c]->vdev_parent->vdev_ops != &vdev_mirror_ops ||
43141195e687SMark J Musante 		    vml[c]->vdev_islog ||
43151195e687SMark J Musante 		    vml[c]->vdev_ishole ||
43161195e687SMark J Musante 		    vml[c]->vdev_isspare ||
43171195e687SMark J Musante 		    vml[c]->vdev_isl2cache ||
43181195e687SMark J Musante 		    !vdev_writeable(vml[c]) ||
4319d41c4376SMark J Musante 		    vml[c]->vdev_children != 0 ||
43201195e687SMark J Musante 		    vml[c]->vdev_state != VDEV_STATE_HEALTHY ||
43211195e687SMark J Musante 		    c != spa->spa_root_vdev->vdev_child[c]->vdev_id) {
43221195e687SMark J Musante 			error = EINVAL;
43231195e687SMark J Musante 			break;
43241195e687SMark J Musante 		}
43251195e687SMark J Musante 
43261195e687SMark J Musante 		if (vdev_dtl_required(vml[c])) {
43271195e687SMark J Musante 			error = EBUSY;
43281195e687SMark J Musante 			break;
43291195e687SMark J Musante 		}
43301195e687SMark J Musante 
43311195e687SMark J Musante 		/* we need certain info from the top level */
43321195e687SMark J Musante 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_ARRAY,
43331195e687SMark J Musante 		    vml[c]->vdev_top->vdev_ms_array) == 0);
43341195e687SMark J Musante 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_METASLAB_SHIFT,
43351195e687SMark J Musante 		    vml[c]->vdev_top->vdev_ms_shift) == 0);
43361195e687SMark J Musante 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASIZE,
43371195e687SMark J Musante 		    vml[c]->vdev_top->vdev_asize) == 0);
43381195e687SMark J Musante 		VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
43391195e687SMark J Musante 		    vml[c]->vdev_top->vdev_ashift) == 0);
43401195e687SMark J Musante 	}
43411195e687SMark J Musante 
43421195e687SMark J Musante 	if (error != 0) {
43431195e687SMark J Musante 		kmem_free(vml, children * sizeof (vdev_t *));
43441195e687SMark J Musante 		kmem_free(glist, children * sizeof (uint64_t));
43451195e687SMark J Musante 		return (spa_vdev_exit(spa, NULL, txg, error));
43461195e687SMark J Musante 	}
43471195e687SMark J Musante 
43481195e687SMark J Musante 	/* stop writers from using the disks */
43491195e687SMark J Musante 	for (c = 0; c < children; c++) {
43501195e687SMark J Musante 		if (vml[c] != NULL)
43511195e687SMark J Musante 			vml[c]->vdev_offline = B_TRUE;
43521195e687SMark J Musante 	}
43531195e687SMark J Musante 	vdev_reopen(spa->spa_root_vdev);
43541195e687SMark J Musante 
43551195e687SMark J Musante 	/*
43561195e687SMark J Musante 	 * Temporarily record the splitting vdevs in the spa config.  This
43571195e687SMark J Musante 	 * will disappear once the config is regenerated.
43581195e687SMark J Musante 	 */
43591195e687SMark J Musante 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
43601195e687SMark J Musante 	VERIFY(nvlist_add_uint64_array(nvl, ZPOOL_CONFIG_SPLIT_LIST,
43611195e687SMark J Musante 	    glist, children) == 0);
43621195e687SMark J Musante 	kmem_free(glist, children * sizeof (uint64_t));
43631195e687SMark J Musante 
436498295d61SMark J Musante 	mutex_enter(&spa->spa_props_lock);
43651195e687SMark J Musante 	VERIFY(nvlist_add_nvlist(spa->spa_config, ZPOOL_CONFIG_SPLIT,
43661195e687SMark J Musante 	    nvl) == 0);
436798295d61SMark J Musante 	mutex_exit(&spa->spa_props_lock);
43681195e687SMark J Musante 	spa->spa_config_splitting = nvl;
43691195e687SMark J Musante 	vdev_config_dirty(spa->spa_root_vdev);
43701195e687SMark J Musante 
43711195e687SMark J Musante 	/* configure and create the new pool */
43721195e687SMark J Musante 	VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, newname) == 0);
43731195e687SMark J Musante 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
43741195e687SMark J Musante 	    exp ? POOL_STATE_EXPORTED : POOL_STATE_ACTIVE) == 0);
43751195e687SMark J Musante 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
43761195e687SMark J Musante 	    spa_version(spa)) == 0);
43771195e687SMark J Musante 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
43781195e687SMark J Musante 	    spa->spa_config_txg) == 0);
43791195e687SMark J Musante 	VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
43801195e687SMark J Musante 	    spa_generate_guid(NULL)) == 0);
43811195e687SMark J Musante 	(void) nvlist_lookup_string(props,
43821195e687SMark J Musante 	    zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
43831195e687SMark J Musante 
4384d41c4376SMark J Musante 	/* add the new pool to the namespace */
43851195e687SMark J Musante 	newspa = spa_add(newname, config, altroot);
43861195e687SMark J Musante 	newspa->spa_config_txg = spa->spa_config_txg;
43871195e687SMark J Musante 	spa_set_log_state(newspa, SPA_LOG_CLEAR);
43881195e687SMark J Musante 
43891195e687SMark J Musante 	/* release the spa config lock, retaining the namespace lock */
43901195e687SMark J Musante 	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
43911195e687SMark J Musante 
43921195e687SMark J Musante 	if (zio_injection_enabled)
43931195e687SMark J Musante 		zio_handle_panic_injection(spa, FTAG, 1);
43941195e687SMark J Musante 
43951195e687SMark J Musante 	spa_activate(newspa, spa_mode_global);
43961195e687SMark J Musante 	spa_async_suspend(newspa);
43971195e687SMark J Musante 
43981195e687SMark J Musante 	/* create the new pool from the disks of the original pool */
43991195e687SMark J Musante 	error = spa_load(newspa, SPA_LOAD_IMPORT, SPA_IMPORT_ASSEMBLE, B_TRUE);
44001195e687SMark J Musante 	if (error)
44011195e687SMark J Musante 		goto out;
44021195e687SMark J Musante 
44031195e687SMark J Musante 	/* if that worked, generate a real config for the new pool */
44041195e687SMark J Musante 	if (newspa->spa_root_vdev != NULL) {
44051195e687SMark J Musante 		VERIFY(nvlist_alloc(&newspa->spa_config_splitting,
44061195e687SMark J Musante 		    NV_UNIQUE_NAME, KM_SLEEP) == 0);
44071195e687SMark J Musante 		VERIFY(nvlist_add_uint64(newspa->spa_config_splitting,
44081195e687SMark J Musante 		    ZPOOL_CONFIG_SPLIT_GUID, spa_guid(spa)) == 0);
44091195e687SMark J Musante 		spa_config_set(newspa, spa_config_generate(newspa, NULL, -1ULL,
44101195e687SMark J Musante 		    B_TRUE));
44111195e687SMark J Musante 	}
44121195e687SMark J Musante 
44131195e687SMark J Musante 	/* set the props */
44141195e687SMark J Musante 	if (props != NULL) {
44151195e687SMark J Musante 		spa_configfile_set(newspa, props, B_FALSE);
44161195e687SMark J Musante 		error = spa_prop_set(newspa, props);
44171195e687SMark J Musante 		if (error)
44181195e687SMark J Musante 			goto out;
44191195e687SMark J Musante 	}
44201195e687SMark J Musante 
44211195e687SMark J Musante 	/* flush everything */
44221195e687SMark J Musante 	txg = spa_vdev_config_enter(newspa);
44231195e687SMark J Musante 	vdev_config_dirty(newspa->spa_root_vdev);
44241195e687SMark J Musante 	(void) spa_vdev_config_exit(newspa, NULL, txg, 0, FTAG);
44251195e687SMark J Musante 
44261195e687SMark J Musante 	if (zio_injection_enabled)
44271195e687SMark J Musante 		zio_handle_panic_injection(spa, FTAG, 2);
44281195e687SMark J Musante 
44291195e687SMark J Musante 	spa_async_resume(newspa);
44301195e687SMark J Musante 
44311195e687SMark J Musante 	/* finally, update the original pool's config */
44321195e687SMark J Musante 	txg = spa_vdev_config_enter(spa);
44331195e687SMark J Musante 	tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir);
44341195e687SMark J Musante 	error = dmu_tx_assign(tx, TXG_WAIT);
44351195e687SMark J Musante 	if (error != 0)
44361195e687SMark J Musante 		dmu_tx_abort(tx);
44371195e687SMark J Musante 	for (c = 0; c < children; c++) {
44381195e687SMark J Musante 		if (vml[c] != NULL) {
44391195e687SMark J Musante 			vdev_split(vml[c]);
44401195e687SMark J Musante 			if (error == 0)
44413f9d6ad7SLin Ling 				spa_history_log_internal(LOG_POOL_VDEV_DETACH,
44423f9d6ad7SLin Ling 				    spa, tx, "vdev=%s",
44431195e687SMark J Musante 				    vml[c]->vdev_path);
44441195e687SMark J Musante 			vdev_free(vml[c]);
44451195e687SMark J Musante 		}
44461195e687SMark J Musante 	}
44471195e687SMark J Musante 	vdev_config_dirty(spa->spa_root_vdev);
44481195e687SMark J Musante 	spa->spa_config_splitting = NULL;
44491195e687SMark J Musante 	nvlist_free(nvl);
44501195e687SMark J Musante 	if (error == 0)
44511195e687SMark J Musante 		dmu_tx_commit(tx);
44521195e687SMark J Musante 	(void) spa_vdev_exit(spa, NULL, txg, 0);
44531195e687SMark J Musante 
44541195e687SMark J Musante 	if (zio_injection_enabled)
44551195e687SMark J Musante 		zio_handle_panic_injection(spa, FTAG, 3);
44561195e687SMark J Musante 
44571195e687SMark J Musante 	/* split is complete; log a history record */
44583f9d6ad7SLin Ling 	spa_history_log_internal(LOG_POOL_SPLIT, newspa, NULL,
44591195e687SMark J Musante 	    "split new pool %s from pool %s", newname, spa_name(spa));
44601195e687SMark J Musante 
44611195e687SMark J Musante 	kmem_free(vml, children * sizeof (vdev_t *));
44621195e687SMark J Musante 
44631195e687SMark J Musante 	/* if we're not going to mount the filesystems in userland, export */
44641195e687SMark J Musante 	if (exp)
44651195e687SMark J Musante 		error = spa_export_common(newname, POOL_STATE_EXPORTED, NULL,
44661195e687SMark J Musante 		    B_FALSE, B_FALSE);
44671195e687SMark J Musante 
44681195e687SMark J Musante 	return (error);
44691195e687SMark J Musante 
44701195e687SMark J Musante out:
44711195e687SMark J Musante 	spa_unload(newspa);
44721195e687SMark J Musante 	spa_deactivate(newspa);
44731195e687SMark J Musante 	spa_remove(newspa);
44741195e687SMark J Musante 
44751195e687SMark J Musante 	txg = spa_vdev_config_enter(spa);
447698295d61SMark J Musante 
447798295d61SMark J Musante 	/* re-online all offlined disks */
447898295d61SMark J Musante 	for (c = 0; c < children; c++) {
447998295d61SMark J Musante 		if (vml[c] != NULL)
448098295d61SMark J Musante 			vml[c]->vdev_offline = B_FALSE;
448198295d61SMark J Musante 	}
448298295d61SMark J Musante 	vdev_reopen(spa->spa_root_vdev);
448398295d61SMark J Musante 
44841195e687SMark J Musante 	nvlist_free(spa->spa_config_splitting);
44851195e687SMark J Musante 	spa->spa_config_splitting = NULL;
4486d41c4376SMark J Musante 	(void) spa_vdev_exit(spa, NULL, txg, error);
44871195e687SMark J Musante 
44881195e687SMark J Musante 	kmem_free(vml, children * sizeof (vdev_t *));
44891195e687SMark J Musante 	return (error);
44901195e687SMark J Musante }
44911195e687SMark J Musante 
4492e14bb325SJeff Bonwick static nvlist_t *
4493e14bb325SJeff Bonwick spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid)
449499653d4eSeschrock {
4495e14bb325SJeff Bonwick 	for (int i = 0; i < count; i++) {
4496e14bb325SJeff Bonwick 		uint64_t guid;
449799653d4eSeschrock 
4498e14bb325SJeff Bonwick 		VERIFY(nvlist_lookup_uint64(nvpp[i], ZPOOL_CONFIG_GUID,
4499e14bb325SJeff Bonwick 		    &guid) == 0);
450099653d4eSeschrock 
4501e14bb325SJeff Bonwick 		if (guid == target_guid)
4502e14bb325SJeff Bonwick 			return (nvpp[i]);
450399653d4eSeschrock 	}
450499653d4eSeschrock 
4505e14bb325SJeff Bonwick 	return (NULL);
4506fa94a07fSbrendan }
4507fa94a07fSbrendan 
4508e14bb325SJeff Bonwick static void
4509e14bb325SJeff Bonwick spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count,
4510e14bb325SJeff Bonwick 	nvlist_t *dev_to_remove)
4511fa94a07fSbrendan {
4512e14bb325SJeff Bonwick 	nvlist_t **newdev = NULL;
4513fa94a07fSbrendan 
4514e14bb325SJeff Bonwick 	if (count > 1)
4515e14bb325SJeff Bonwick 		newdev = kmem_alloc((count - 1) * sizeof (void *), KM_SLEEP);
4516fa94a07fSbrendan 
4517e14bb325SJeff Bonwick 	for (int i = 0, j = 0; i < count; i++) {
4518e14bb325SJeff Bonwick 		if (dev[i] == dev_to_remove)
4519e14bb325SJeff Bonwick 			continue;
4520e14bb325SJeff Bonwick 		VERIFY(nvlist_dup(dev[i], &newdev[j++], KM_SLEEP) == 0);
4521fa94a07fSbrendan 	}
4522fa94a07fSbrendan 
4523e14bb325SJeff Bonwick 	VERIFY(nvlist_remove(config, name, DATA_TYPE_NVLIST_ARRAY) == 0);
4524e14bb325SJeff Bonwick 	VERIFY(nvlist_add_nvlist_array(config, name, newdev, count - 1) == 0);
4525fa94a07fSbrendan 
4526e14bb325SJeff Bonwick 	for (int i = 0; i < count - 1; i++)
4527e14bb325SJeff Bonwick 		nvlist_free(newdev[i]);
4528fa94a07fSbrendan 
4529e14bb325SJeff Bonwick 	if (count > 1)
4530e14bb325SJeff Bonwick 		kmem_free(newdev, (count - 1) * sizeof (void *));
4531fa94a07fSbrendan }
4532fa94a07fSbrendan 
453388ecc943SGeorge Wilson /*
453488ecc943SGeorge Wilson  * Evacuate the device.
453588ecc943SGeorge Wilson  */
45363f9d6ad7SLin Ling static int
453788ecc943SGeorge Wilson spa_vdev_remove_evacuate(spa_t *spa, vdev_t *vd)
453888ecc943SGeorge Wilson {
453988ecc943SGeorge Wilson 	uint64_t txg;
45403f9d6ad7SLin Ling 	int error = 0;
454188ecc943SGeorge Wilson 
454288ecc943SGeorge Wilson 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
454388ecc943SGeorge Wilson 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
4544b24ab676SJeff Bonwick 	ASSERT(vd == vd->vdev_top);
454588ecc943SGeorge Wilson 
454688ecc943SGeorge Wilson 	/*
454788ecc943SGeorge Wilson 	 * Evacuate the device.  We don't hold the config lock as writer
454888ecc943SGeorge Wilson 	 * since we need to do I/O but we do keep the
454988ecc943SGeorge Wilson 	 * spa_namespace_lock held.  Once this completes the device
455088ecc943SGeorge Wilson 	 * should no longer have any blocks allocated on it.
455188ecc943SGeorge Wilson 	 */
455288ecc943SGeorge Wilson 	if (vd->vdev_islog) {
45533f9d6ad7SLin Ling 		if (vd->vdev_stat.vs_alloc != 0)
45543f9d6ad7SLin Ling 			error = spa_offline_log(spa);
4555a1521560SJeff Bonwick 	} else {
45563f9d6ad7SLin Ling 		error = ENOTSUP;
455788ecc943SGeorge Wilson 	}
455888ecc943SGeorge Wilson 
4559a1521560SJeff Bonwick 	if (error)
4560a1521560SJeff Bonwick 		return (error);
4561a1521560SJeff Bonwick 
456288ecc943SGeorge Wilson 	/*
4563a1521560SJeff Bonwick 	 * The evacuation succeeded.  Remove any remaining MOS metadata
4564a1521560SJeff Bonwick 	 * associated with this vdev, and wait for these changes to sync.
456588ecc943SGeorge Wilson 	 */
45663f9d6ad7SLin Ling 	ASSERT3U(vd->vdev_stat.vs_alloc, ==, 0);
456788ecc943SGeorge Wilson 	txg = spa_vdev_config_enter(spa);
456888ecc943SGeorge Wilson 	vd->vdev_removing = B_TRUE;
456988ecc943SGeorge Wilson 	vdev_dirty(vd, 0, NULL, txg);
457088ecc943SGeorge Wilson 	vdev_config_dirty(vd);
457188ecc943SGeorge Wilson 	spa_vdev_config_exit(spa, NULL, txg, 0, FTAG);
457288ecc943SGeorge Wilson 
457388ecc943SGeorge Wilson 	return (0);
457488ecc943SGeorge Wilson }
457588ecc943SGeorge Wilson 
457688ecc943SGeorge Wilson /*
457788ecc943SGeorge Wilson  * Complete the removal by cleaning up the namespace.
457888ecc943SGeorge Wilson  */
45793f9d6ad7SLin Ling static void
4580a1521560SJeff Bonwick spa_vdev_remove_from_namespace(spa_t *spa, vdev_t *vd)
458188ecc943SGeorge Wilson {
458288ecc943SGeorge Wilson 	vdev_t *rvd = spa->spa_root_vdev;
458388ecc943SGeorge Wilson 	uint64_t id = vd->vdev_id;
458488ecc943SGeorge Wilson 	boolean_t last_vdev = (id == (rvd->vdev_children - 1));
458588ecc943SGeorge Wilson 
458688ecc943SGeorge Wilson 	ASSERT(MUTEX_HELD(&spa_namespace_lock));
458788ecc943SGeorge Wilson 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
4588b24ab676SJeff Bonwick 	ASSERT(vd == vd->vdev_top);
458988ecc943SGeorge Wilson 
45903f9d6ad7SLin Ling 	/*
45913f9d6ad7SLin Ling 	 * Only remove any devices which are empty.
45923f9d6ad7SLin Ling 	 */
45933f9d6ad7SLin Ling 	if (vd->vdev_stat.vs_alloc != 0)
45943f9d6ad7SLin Ling 		return;
45953f9d6ad7SLin Ling 
459688ecc943SGeorge Wilson 	(void) vdev_label_init(vd, 0, VDEV_LABEL_REMOVE);
4597b24ab676SJeff Bonwick 
4598b24ab676SJeff Bonwick 	if (list_link_active(&vd->vdev_state_dirty_node))
4599b24ab676SJeff Bonwick 		vdev_state_clean(vd);
4600b24ab676SJeff Bonwick 	if (list_link_active(&vd->vdev_config_dirty_node))
4601b24ab676SJeff Bonwick 		vdev_config_clean(vd);
4602b24ab676SJeff Bonwick 
460388ecc943SGeorge Wilson 	vdev_free(vd);
460488ecc943SGeorge Wilson 
460588ecc943SGeorge Wilson 	if (last_vdev) {
460688ecc943SGeorge Wilson 		vdev_compact_children(rvd);
460788ecc943SGeorge Wilson 	} else {
460888ecc943SGeorge Wilson 		vd = vdev_alloc_common(spa, id, 0, &vdev_hole_ops);
460988ecc943SGeorge Wilson 		vdev_add_child(rvd, vd);
461088ecc943SGeorge Wilson 	}
4611fcbfa62bSLin Ling 	vdev_config_dirty(rvd);
4612fcbfa62bSLin Ling 
4613fcbfa62bSLin Ling 	/*
4614fcbfa62bSLin Ling 	 * Reassess the health of our root vdev.
4615fcbfa62bSLin Ling 	 */
4616fcbfa62bSLin Ling 	vdev_reopen(rvd);
461788ecc943SGeorge Wilson }
461888ecc943SGeorge Wilson 
46193f9d6ad7SLin Ling /*
46203f9d6ad7SLin Ling  * Remove a device from the pool -
46213f9d6ad7SLin Ling  *
46223f9d6ad7SLin Ling  * Removing a device from the vdev namespace requires several steps
46233f9d6ad7SLin Ling  * and can take a significant amount of time.  As a result we use
46243f9d6ad7SLin Ling  * the spa_vdev_config_[enter/exit] functions which allow us to
46253f9d6ad7SLin Ling  * grab and release the spa_config_lock while still holding the namespace
46263f9d6ad7SLin Ling  * lock.  During each step the configuration is synced out.
46273f9d6ad7SLin Ling  */
46283f9d6ad7SLin Ling 
4629fa94a07fSbrendan /*
4630fa94a07fSbrendan  * Remove a device from the pool.  Currently, this supports removing only hot
463188ecc943SGeorge Wilson  * spares, slogs, and level 2 ARC devices.
4632fa94a07fSbrendan  */
4633fa94a07fSbrendan int
4634fa94a07fSbrendan spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
4635fa94a07fSbrendan {
4636fa94a07fSbrendan 	vdev_t *vd;
4637a1521560SJeff Bonwick 	metaslab_group_t *mg;
4638e14bb325SJeff Bonwick 	nvlist_t **spares, **l2cache, *nv;
46398ad4d6ddSJeff Bonwick 	uint64_t txg = 0;
464088ecc943SGeorge Wilson 	uint_t nspares, nl2cache;
4641fa94a07fSbrendan 	int error = 0;
46428ad4d6ddSJeff Bonwick 	boolean_t locked = MUTEX_HELD(&spa_namespace_lock);
4643fa94a07fSbrendan 
46448ad4d6ddSJeff Bonwick 	if (!locked)
46458ad4d6ddSJeff Bonwick 		txg = spa_vdev_enter(spa);
4646fa94a07fSbrendan 
4647c5904d13Seschrock 	vd = spa_lookup_by_guid(spa, guid, B_FALSE);
4648fa94a07fSbrendan 
4649fa94a07fSbrendan 	if (spa->spa_spares.sav_vdevs != NULL &&
4650fa94a07fSbrendan 	    nvlist_lookup_nvlist_array(spa->spa_spares.sav_config,
4651e14bb325SJeff Bonwick 	    ZPOOL_CONFIG_SPARES, &spares, &nspares) == 0 &&
4652e14bb325SJeff Bonwick 	    (nv = spa_nvlist_lookup_by_guid(spares, nspares, guid)) != NULL) {
4653e14bb325SJeff Bonwick 		/*
4654e14bb325SJeff Bonwick 		 * Only remove the hot spare if it's not currently in use
4655e14bb325SJeff Bonwick 		 * in this pool.
4656e14bb325SJeff Bonwick 		 */
4657e14bb325SJeff Bonwick 		if (vd == NULL || unspare) {
4658e14bb325SJeff Bonwick 			spa_vdev_remove_aux(spa->spa_spares.sav_config,
4659e14bb325SJeff Bonwick 			    ZPOOL_CONFIG_SPARES, spares, nspares, nv);
4660e14bb325SJeff Bonwick 			spa_load_spares(spa);
4661e14bb325SJeff Bonwick 			spa->spa_spares.sav_sync = B_TRUE;
4662e14bb325SJeff Bonwick 		} else {
4663e14bb325SJeff Bonwick 			error = EBUSY;
4664e14bb325SJeff Bonwick 		}
4665e14bb325SJeff Bonwick 	} else if (spa->spa_l2cache.sav_vdevs != NULL &&
4666fa94a07fSbrendan 	    nvlist_lookup_nvlist_array(spa->spa_l2cache.sav_config,
4667e14bb325SJeff Bonwick 	    ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0 &&
4668e14bb325SJeff Bonwick 	    (nv = spa_nvlist_lookup_by_guid(l2cache, nl2cache, guid)) != NULL) {
4669e14bb325SJeff Bonwick 		/*
4670e14bb325SJeff Bonwick 		 * Cache devices can always be removed.
4671e14bb325SJeff Bonwick 		 */
4672e14bb325SJeff Bonwick 		spa_vdev_remove_aux(spa->spa_l2cache.sav_config,
4673e14bb325SJeff Bonwick 		    ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv);
4674fa94a07fSbrendan 		spa_load_l2cache(spa);
4675fa94a07fSbrendan 		spa->spa_l2cache.sav_sync = B_TRUE;
467688ecc943SGeorge Wilson 	} else if (vd != NULL && vd->vdev_islog) {
467788ecc943SGeorge Wilson 		ASSERT(!locked);
4678b24ab676SJeff Bonwick 		ASSERT(vd == vd->vdev_top);
467988ecc943SGeorge Wilson 
468088ecc943SGeorge Wilson 		/*
468188ecc943SGeorge Wilson 		 * XXX - Once we have bp-rewrite this should
468288ecc943SGeorge Wilson 		 * become the common case.
468388ecc943SGeorge Wilson 		 */
468488ecc943SGeorge Wilson 
4685a1521560SJeff Bonwick 		mg = vd->vdev_mg;
4686a1521560SJeff Bonwick 
468788ecc943SGeorge Wilson 		/*
4688a1521560SJeff Bonwick 		 * Stop allocating from this vdev.
468988ecc943SGeorge Wilson 		 */
4690a1521560SJeff Bonwick 		metaslab_group_passivate(mg);
469188ecc943SGeorge Wilson 
4692b24ab676SJeff Bonwick 		/*
4693b24ab676SJeff Bonwick 		 * Wait for the youngest allocations and frees to sync,
4694b24ab676SJeff Bonwick 		 * and then wait for the deferral of those frees to finish.
4695b24ab676SJeff Bonwick 		 */
4696b24ab676SJeff Bonwick 		spa_vdev_config_exit(spa, NULL,
4697b24ab676SJeff Bonwick 		    txg + TXG_CONCURRENT_STATES + TXG_DEFER_SIZE, 0, FTAG);
4698b24ab676SJeff Bonwick 
4699a1521560SJeff Bonwick 		/*
4700a1521560SJeff Bonwick 		 * Attempt to evacuate the vdev.
4701a1521560SJeff Bonwick 		 */
4702a1521560SJeff Bonwick 		error = spa_vdev_remove_evacuate(spa, vd);
4703a1521560SJeff Bonwick 
470488ecc943SGeorge Wilson 		txg = spa_vdev_config_enter(spa);
470588ecc943SGeorge Wilson 
4706a1521560SJeff Bonwick 		/*
4707a1521560SJeff Bonwick 		 * If we couldn't evacuate the vdev, unwind.
4708a1521560SJeff Bonwick 		 */
4709a1521560SJeff Bonwick 		if (error) {
4710a1521560SJeff Bonwick 			metaslab_group_activate(mg);
4711a1521560SJeff Bonwick 			return (spa_vdev_exit(spa, NULL, txg, error));
4712a1521560SJeff Bonwick 		}
4713a1521560SJeff Bonwick 
4714a1521560SJeff Bonwick 		/*
4715a1521560SJeff Bonwick 		 * Clean up the vdev namespace.
4716a1521560SJeff Bonwick 		 */
4717a1521560SJeff Bonwick 		spa_vdev_remove_from_namespace(spa, vd);
471888ecc943SGeorge Wilson 
4719e14bb325SJeff Bonwick 	} else if (vd != NULL) {
4720e14bb325SJeff Bonwick 		/*
4721e14bb325SJeff Bonwick 		 * Normal vdevs cannot be removed (yet).
4722e14bb325SJeff Bonwick 		 */
4723e14bb325SJeff Bonwick 		error = ENOTSUP;
4724e14bb325SJeff Bonwick 	} else {
4725e14bb325SJeff Bonwick 		/*
4726e14bb325SJeff Bonwick 		 * There is no vdev of any kind with the specified guid.
4727e14bb325SJeff Bonwick 		 */
4728e14bb325SJeff Bonwick 		error = ENOENT;
4729fa94a07fSbrendan 	}
473099653d4eSeschrock 
47318ad4d6ddSJeff Bonwick 	if (!locked)
47328ad4d6ddSJeff Bonwick 		return (spa_vdev_exit(spa, NULL, txg, error));
47338ad4d6ddSJeff Bonwick 
47348ad4d6ddSJeff Bonwick 	return (error);
4735fa9e4066Sahrens }
4736fa9e4066Sahrens 
4737fa9e4066Sahrens /*
47383d7072f8Seschrock  * Find any device that's done replacing, or a vdev marked 'unspare' that's
47393d7072f8Seschrock  * current spared, so we can detach it.
4740fa9e4066Sahrens  */
4741ea8dc4b6Seschrock static vdev_t *
47423d7072f8Seschrock spa_vdev_resilver_done_hunt(vdev_t *vd)
4743fa9e4066Sahrens {
4744ea8dc4b6Seschrock 	vdev_t *newvd, *oldvd;
4745fa9e4066Sahrens 
4746573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++) {
47473d7072f8Seschrock 		oldvd = spa_vdev_resilver_done_hunt(vd->vdev_child[c]);
4748ea8dc4b6Seschrock 		if (oldvd != NULL)
4749ea8dc4b6Seschrock 			return (oldvd);
4750ea8dc4b6Seschrock 	}
4751fa9e4066Sahrens 
47523d7072f8Seschrock 	/*
4753*cb04b873SMark J Musante 	 * Check for a completed replacement.  We always consider the first
4754*cb04b873SMark J Musante 	 * vdev in the list to be the oldest vdev, and the last one to be
4755*cb04b873SMark J Musante 	 * the newest (see spa_vdev_attach() for how that works).  In
4756*cb04b873SMark J Musante 	 * the case where the newest vdev is faulted, we will not automatically
4757*cb04b873SMark J Musante 	 * remove it after a resilver completes.  This is OK as it will require
4758*cb04b873SMark J Musante 	 * user intervention to determine which disk the admin wishes to keep.
47593d7072f8Seschrock 	 */
4760*cb04b873SMark J Musante 	if (vd->vdev_ops == &vdev_replacing_ops) {
4761*cb04b873SMark J Musante 		ASSERT(vd->vdev_children > 1);
4762*cb04b873SMark J Musante 
4763*cb04b873SMark J Musante 		newvd = vd->vdev_child[vd->vdev_children - 1];
4764ea8dc4b6Seschrock 		oldvd = vd->vdev_child[0];
4765ea8dc4b6Seschrock 
47668ad4d6ddSJeff Bonwick 		if (vdev_dtl_empty(newvd, DTL_MISSING) &&
4767e69acc92SVictor Latushkin 		    vdev_dtl_empty(newvd, DTL_OUTAGE) &&
47688ad4d6ddSJeff Bonwick 		    !vdev_dtl_required(oldvd))
4769ea8dc4b6Seschrock 			return (oldvd);
4770fa9e4066Sahrens 	}
4771ea8dc4b6Seschrock 
47723d7072f8Seschrock 	/*
47733d7072f8Seschrock 	 * Check for a completed resilver with the 'unspare' flag set.
47743d7072f8Seschrock 	 */
4775*cb04b873SMark J Musante 	if (vd->vdev_ops == &vdev_spare_ops) {
4776*cb04b873SMark J Musante 		vdev_t *first = vd->vdev_child[0];
4777*cb04b873SMark J Musante 		vdev_t *last = vd->vdev_child[vd->vdev_children - 1];
4778*cb04b873SMark J Musante 
4779*cb04b873SMark J Musante 		if (last->vdev_unspare) {
4780*cb04b873SMark J Musante 			oldvd = first;
4781*cb04b873SMark J Musante 			newvd = last;
4782*cb04b873SMark J Musante 		} else if (first->vdev_unspare) {
4783*cb04b873SMark J Musante 			oldvd = last;
4784*cb04b873SMark J Musante 			newvd = first;
4785*cb04b873SMark J Musante 		} else {
4786*cb04b873SMark J Musante 			oldvd = NULL;
4787*cb04b873SMark J Musante 		}
47883d7072f8Seschrock 
4789*cb04b873SMark J Musante 		if (oldvd != NULL &&
47908ad4d6ddSJeff Bonwick 		    vdev_dtl_empty(newvd, DTL_MISSING) &&
4791e69acc92SVictor Latushkin 		    vdev_dtl_empty(newvd, DTL_OUTAGE) &&
4792*cb04b873SMark J Musante 		    !vdev_dtl_required(oldvd))
47933d7072f8Seschrock 			return (oldvd);
4794*cb04b873SMark J Musante 
4795*cb04b873SMark J Musante 		/*
4796*cb04b873SMark J Musante 		 * If there are more than two spares attached to a disk,
4797*cb04b873SMark J Musante 		 * and those spares are not required, then we want to
4798*cb04b873SMark J Musante 		 * attempt to free them up now so that they can be used
4799*cb04b873SMark J Musante 		 * by other pools.  Once we're back down to a single
4800*cb04b873SMark J Musante 		 * disk+spare, we stop removing them.
4801*cb04b873SMark J Musante 		 */
4802*cb04b873SMark J Musante 		if (vd->vdev_children > 2) {
4803*cb04b873SMark J Musante 			newvd = vd->vdev_child[1];
4804*cb04b873SMark J Musante 
4805*cb04b873SMark J Musante 			if (newvd->vdev_isspare && last->vdev_isspare &&
4806*cb04b873SMark J Musante 			    vdev_dtl_empty(last, DTL_MISSING) &&
4807*cb04b873SMark J Musante 			    vdev_dtl_empty(last, DTL_OUTAGE) &&
4808*cb04b873SMark J Musante 			    !vdev_dtl_required(newvd))
4809*cb04b873SMark J Musante 				return (newvd);
48103d7072f8Seschrock 		}
48113d7072f8Seschrock 	}
48123d7072f8Seschrock 
4813ea8dc4b6Seschrock 	return (NULL);
4814fa9e4066Sahrens }
4815fa9e4066Sahrens 
4816ea8dc4b6Seschrock static void
48173d7072f8Seschrock spa_vdev_resilver_done(spa_t *spa)
4818fa9e4066Sahrens {
48198ad4d6ddSJeff Bonwick 	vdev_t *vd, *pvd, *ppvd;
48208ad4d6ddSJeff Bonwick 	uint64_t guid, sguid, pguid, ppguid;
4821ea8dc4b6Seschrock 
48228ad4d6ddSJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
4823ea8dc4b6Seschrock 
48243d7072f8Seschrock 	while ((vd = spa_vdev_resilver_done_hunt(spa->spa_root_vdev)) != NULL) {
48258ad4d6ddSJeff Bonwick 		pvd = vd->vdev_parent;
48268ad4d6ddSJeff Bonwick 		ppvd = pvd->vdev_parent;
4827ea8dc4b6Seschrock 		guid = vd->vdev_guid;
48288ad4d6ddSJeff Bonwick 		pguid = pvd->vdev_guid;
48298ad4d6ddSJeff Bonwick 		ppguid = ppvd->vdev_guid;
48308ad4d6ddSJeff Bonwick 		sguid = 0;
483199653d4eSeschrock 		/*
483299653d4eSeschrock 		 * If we have just finished replacing a hot spared device, then
483399653d4eSeschrock 		 * we need to detach the parent's first child (the original hot
483499653d4eSeschrock 		 * spare) as well.
483599653d4eSeschrock 		 */
4836*cb04b873SMark J Musante 		if (ppvd->vdev_ops == &vdev_spare_ops && pvd->vdev_id == 0 &&
4837*cb04b873SMark J Musante 		    ppvd->vdev_children == 2) {
483899653d4eSeschrock 			ASSERT(pvd->vdev_ops == &vdev_replacing_ops);
48398ad4d6ddSJeff Bonwick 			sguid = ppvd->vdev_child[1]->vdev_guid;
484099653d4eSeschrock 		}
48418ad4d6ddSJeff Bonwick 		spa_config_exit(spa, SCL_ALL, FTAG);
48428ad4d6ddSJeff Bonwick 		if (spa_vdev_detach(spa, guid, pguid, B_TRUE) != 0)
4843ea8dc4b6Seschrock 			return;
48448ad4d6ddSJeff Bonwick 		if (sguid && spa_vdev_detach(spa, sguid, ppguid, B_TRUE) != 0)
484599653d4eSeschrock 			return;
48468ad4d6ddSJeff Bonwick 		spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
4847fa9e4066Sahrens 	}
4848fa9e4066Sahrens 
48498ad4d6ddSJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
4850fa9e4066Sahrens }
4851fa9e4066Sahrens 
4852c67d9675Seschrock /*
4853b3388e4fSEric Taylor  * Update the stored path or FRU for this vdev.
4854c67d9675Seschrock  */
4855c67d9675Seschrock int
48566809eb4eSEric Schrock spa_vdev_set_common(spa_t *spa, uint64_t guid, const char *value,
48576809eb4eSEric Schrock     boolean_t ispath)
4858c67d9675Seschrock {
4859c5904d13Seschrock 	vdev_t *vd;
4860208044b8SGeorge Wilson 	boolean_t sync = B_FALSE;
4861c67d9675Seschrock 
4862b3388e4fSEric Taylor 	spa_vdev_state_enter(spa, SCL_ALL);
4863c67d9675Seschrock 
48646809eb4eSEric Schrock 	if ((vd = spa_lookup_by_guid(spa, guid, B_TRUE)) == NULL)
4865b3388e4fSEric Taylor 		return (spa_vdev_state_exit(spa, NULL, ENOENT));
4866c67d9675Seschrock 
48670e34b6a7Sbonwick 	if (!vd->vdev_ops->vdev_op_leaf)
4868b3388e4fSEric Taylor 		return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
48690e34b6a7Sbonwick 
48706809eb4eSEric Schrock 	if (ispath) {
4871208044b8SGeorge Wilson 		if (strcmp(value, vd->vdev_path) != 0) {
4872208044b8SGeorge Wilson 			spa_strfree(vd->vdev_path);
4873208044b8SGeorge Wilson 			vd->vdev_path = spa_strdup(value);
4874208044b8SGeorge Wilson 			sync = B_TRUE;
4875208044b8SGeorge Wilson 		}
48766809eb4eSEric Schrock 	} else {
4877208044b8SGeorge Wilson 		if (vd->vdev_fru == NULL) {
4878208044b8SGeorge Wilson 			vd->vdev_fru = spa_strdup(value);
4879208044b8SGeorge Wilson 			sync = B_TRUE;
4880208044b8SGeorge Wilson 		} else if (strcmp(value, vd->vdev_fru) != 0) {
48816809eb4eSEric Schrock 			spa_strfree(vd->vdev_fru);
4882208044b8SGeorge Wilson 			vd->vdev_fru = spa_strdup(value);
4883208044b8SGeorge Wilson 			sync = B_TRUE;
4884208044b8SGeorge Wilson 		}
48856809eb4eSEric Schrock 	}
4886c67d9675Seschrock 
4887208044b8SGeorge Wilson 	return (spa_vdev_state_exit(spa, sync ? vd : NULL, 0));
4888c67d9675Seschrock }
4889c67d9675Seschrock 
48906809eb4eSEric Schrock int
48916809eb4eSEric Schrock spa_vdev_setpath(spa_t *spa, uint64_t guid, const char *newpath)
48926809eb4eSEric Schrock {
48936809eb4eSEric Schrock 	return (spa_vdev_set_common(spa, guid, newpath, B_TRUE));
48946809eb4eSEric Schrock }
48956809eb4eSEric Schrock 
48966809eb4eSEric Schrock int
48976809eb4eSEric Schrock spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
48986809eb4eSEric Schrock {
48996809eb4eSEric Schrock 	return (spa_vdev_set_common(spa, guid, newfru, B_FALSE));
49006809eb4eSEric Schrock }
49016809eb4eSEric Schrock 
4902fa9e4066Sahrens /*
4903fa9e4066Sahrens  * ==========================================================================
49043f9d6ad7SLin Ling  * SPA Scanning
4905fa9e4066Sahrens  * ==========================================================================
4906fa9e4066Sahrens  */
4907fa9e4066Sahrens 
4908ea8dc4b6Seschrock int
49093f9d6ad7SLin Ling spa_scan_stop(spa_t *spa)
4910fa9e4066Sahrens {
4911e14bb325SJeff Bonwick 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
49123f9d6ad7SLin Ling 	if (dsl_scan_resilvering(spa->spa_dsl_pool))
49133f9d6ad7SLin Ling 		return (EBUSY);
49143f9d6ad7SLin Ling 	return (dsl_scan_cancel(spa->spa_dsl_pool));
49153f9d6ad7SLin Ling }
4916bb8b5132Sek 
49173f9d6ad7SLin Ling int
49183f9d6ad7SLin Ling spa_scan(spa_t *spa, pool_scan_func_t func)
49193f9d6ad7SLin Ling {
49203f9d6ad7SLin Ling 	ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
49213f9d6ad7SLin Ling 
49223f9d6ad7SLin Ling 	if (func >= POOL_SCAN_FUNCS || func == POOL_SCAN_NONE)
4923fa9e4066Sahrens 		return (ENOTSUP);
4924fa9e4066Sahrens 
4925fa9e4066Sahrens 	/*
4926088f3894Sahrens 	 * If a resilver was requested, but there is no DTL on a
4927088f3894Sahrens 	 * writeable leaf device, we have nothing to do.
4928fa9e4066Sahrens 	 */
49293f9d6ad7SLin Ling 	if (func == POOL_SCAN_RESILVER &&
4930088f3894Sahrens 	    !vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL)) {
4931088f3894Sahrens 		spa_async_request(spa, SPA_ASYNC_RESILVER_DONE);
4932ea8dc4b6Seschrock 		return (0);
4933ea8dc4b6Seschrock 	}
4934fa9e4066Sahrens 
49353f9d6ad7SLin Ling 	return (dsl_scan(spa->spa_dsl_pool, func));
4936fa9e4066Sahrens }
4937fa9e4066Sahrens 
4938ea8dc4b6Seschrock /*
4939ea8dc4b6Seschrock  * ==========================================================================
4940ea8dc4b6Seschrock  * SPA async task processing
4941ea8dc4b6Seschrock  * ==========================================================================
4942ea8dc4b6Seschrock  */
4943ea8dc4b6Seschrock 
4944ea8dc4b6Seschrock static void
49453d7072f8Seschrock spa_async_remove(spa_t *spa, vdev_t *vd)
4946fa9e4066Sahrens {
494749cf58c0SBrendan Gregg - Sun Microsystems 	if (vd->vdev_remove_wanted) {
494898d1cbfeSGeorge Wilson 		vd->vdev_remove_wanted = B_FALSE;
494998d1cbfeSGeorge Wilson 		vd->vdev_delayed_close = B_FALSE;
495049cf58c0SBrendan Gregg - Sun Microsystems 		vdev_set_state(vd, B_FALSE, VDEV_STATE_REMOVED, VDEV_AUX_NONE);
49511d713200SEric Schrock 
49521d713200SEric Schrock 		/*
49531d713200SEric Schrock 		 * We want to clear the stats, but we don't want to do a full
49541d713200SEric Schrock 		 * vdev_clear() as that will cause us to throw away
49551d713200SEric Schrock 		 * degraded/faulted state as well as attempt to reopen the
49561d713200SEric Schrock 		 * device, all of which is a waste.
49571d713200SEric Schrock 		 */
49581d713200SEric Schrock 		vd->vdev_stat.vs_read_errors = 0;
49591d713200SEric Schrock 		vd->vdev_stat.vs_write_errors = 0;
49601d713200SEric Schrock 		vd->vdev_stat.vs_checksum_errors = 0;
49611d713200SEric Schrock 
4962e14bb325SJeff Bonwick 		vdev_state_dirty(vd->vdev_top);
4963ea8dc4b6Seschrock 	}
496449cf58c0SBrendan Gregg - Sun Microsystems 
4965e14bb325SJeff Bonwick 	for (int c = 0; c < vd->vdev_children; c++)
496649cf58c0SBrendan Gregg - Sun Microsystems 		spa_async_remove(spa, vd->vdev_child[c]);
4967ea8dc4b6Seschrock }
4968fa9e4066Sahrens 
4969e14bb325SJeff Bonwick static void
4970e14bb325SJeff Bonwick spa_async_probe(spa_t *spa, vdev_t *vd)
4971e14bb325SJeff Bonwick {
4972e14bb325SJeff Bonwick 	if (vd->vdev_probe_wanted) {
497398d1cbfeSGeorge Wilson 		vd->vdev_probe_wanted = B_FALSE;
4974e14bb325SJeff Bonwick 		vdev_reopen(vd);	/* vdev_open() does the actual probe */
4975e14bb325SJeff Bonwick 	}
4976e14bb325SJeff Bonwick 
4977e14bb325SJeff Bonwick 	for (int c = 0; c < vd->vdev_children; c++)
4978e14bb325SJeff Bonwick 		spa_async_probe(spa, vd->vdev_child[c]);
4979e14bb325SJeff Bonwick }
4980e14bb325SJeff Bonwick 
4981573ca77eSGeorge Wilson static void
4982573ca77eSGeorge Wilson spa_async_autoexpand(spa_t *spa, vdev_t *vd)
4983573ca77eSGeorge Wilson {
4984573ca77eSGeorge Wilson 	sysevent_id_t eid;
4985573ca77eSGeorge Wilson 	nvlist_t *attr;
4986573ca77eSGeorge Wilson 	char *physpath;
4987573ca77eSGeorge Wilson 
4988573ca77eSGeorge Wilson 	if (!spa->spa_autoexpand)
4989573ca77eSGeorge Wilson 		return;
4990573ca77eSGeorge Wilson 
4991573ca77eSGeorge Wilson 	for (int c = 0; c < vd->vdev_children; c++) {
4992573ca77eSGeorge Wilson 		vdev_t *cvd = vd->vdev_child[c];
4993573ca77eSGeorge Wilson 		spa_async_autoexpand(spa, cvd);
4994573ca77eSGeorge Wilson 	}
4995573ca77eSGeorge Wilson 
4996573ca77eSGeorge Wilson 	if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
4997573ca77eSGeorge Wilson 		return;
4998573ca77eSGeorge Wilson 
4999573ca77eSGeorge Wilson 	physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
5000573ca77eSGeorge Wilson 	(void) snprintf(physpath, MAXPATHLEN, "/devices%s", vd->vdev_physpath);
5001573ca77eSGeorge Wilson 
5002573ca77eSGeorge Wilson 	VERIFY(nvlist_alloc(&attr, NV_UNIQUE_NAME, KM_SLEEP) == 0);
5003573ca77eSGeorge Wilson 	VERIFY(nvlist_add_string(attr, DEV_PHYS_PATH, physpath) == 0);
5004573ca77eSGeorge Wilson 
5005573ca77eSGeorge Wilson 	(void) ddi_log_sysevent(zfs_dip, SUNW_VENDOR, EC_DEV_STATUS,
5006573ca77eSGeorge Wilson 	    ESC_DEV_DLE, attr, &eid, DDI_SLEEP);
5007573ca77eSGeorge Wilson 
5008573ca77eSGeorge Wilson 	nvlist_free(attr);
5009573ca77eSGeorge Wilson 	kmem_free(physpath, MAXPATHLEN);
5010573ca77eSGeorge Wilson }
5011573ca77eSGeorge Wilson 
5012ea8dc4b6Seschrock static void
5013ea8dc4b6Seschrock spa_async_thread(spa_t *spa)
5014ea8dc4b6Seschrock {
5015e14bb325SJeff Bonwick 	int tasks;
5016ea8dc4b6Seschrock 
5017ea8dc4b6Seschrock 	ASSERT(spa->spa_sync_on);
5018ea8dc4b6Seschrock 
5019ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
5020ea8dc4b6Seschrock 	tasks = spa->spa_async_tasks;
5021ea8dc4b6Seschrock 	spa->spa_async_tasks = 0;
5022ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
5023ea8dc4b6Seschrock 
50240373e76bSbonwick 	/*
50250373e76bSbonwick 	 * See if the config needs to be updated.
50260373e76bSbonwick 	 */
50270373e76bSbonwick 	if (tasks & SPA_ASYNC_CONFIG_UPDATE) {
5028b24ab676SJeff Bonwick 		uint64_t old_space, new_space;
5029573ca77eSGeorge Wilson 
50300373e76bSbonwick 		mutex_enter(&spa_namespace_lock);
5031b24ab676SJeff Bonwick 		old_space = metaslab_class_get_space(spa_normal_class(spa));
50320373e76bSbonwick 		spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
5033b24ab676SJeff Bonwick 		new_space = metaslab_class_get_space(spa_normal_class(spa));
50340373e76bSbonwick 		mutex_exit(&spa_namespace_lock);
5035573ca77eSGeorge Wilson 
5036573ca77eSGeorge Wilson 		/*
5037573ca77eSGeorge Wilson 		 * If the pool grew as a result of the config update,
5038573ca77eSGeorge Wilson 		 * then log an internal history event.
5039573ca77eSGeorge Wilson 		 */
5040b24ab676SJeff Bonwick 		if (new_space != old_space) {
50413f9d6ad7SLin Ling 			spa_history_log_internal(LOG_POOL_VDEV_ONLINE,
50423f9d6ad7SLin Ling 			    spa, NULL,
5043c8e1f6d2SMark J Musante 			    "pool '%s' size: %llu(+%llu)",
5044b24ab676SJeff Bonwick 			    spa_name(spa), new_space, new_space - old_space);
5045573ca77eSGeorge Wilson 		}
50460373e76bSbonwick 	}
50470373e76bSbonwick 
5048ea8dc4b6Seschrock 	/*
50493d7072f8Seschrock 	 * See if any devices need to be marked REMOVED.
5050ea8dc4b6Seschrock 	 */
5051e14bb325SJeff Bonwick 	if (tasks & SPA_ASYNC_REMOVE) {
50528f18d1faSGeorge Wilson 		spa_vdev_state_enter(spa, SCL_NONE);
50533d7072f8Seschrock 		spa_async_remove(spa, spa->spa_root_vdev);
5054e14bb325SJeff Bonwick 		for (int i = 0; i < spa->spa_l2cache.sav_count; i++)
505549cf58c0SBrendan Gregg - Sun Microsystems 			spa_async_remove(spa, spa->spa_l2cache.sav_vdevs[i]);
5056e14bb325SJeff Bonwick 		for (int i = 0; i < spa->spa_spares.sav_count; i++)
505749cf58c0SBrendan Gregg - Sun Microsystems 			spa_async_remove(spa, spa->spa_spares.sav_vdevs[i]);
5058e14bb325SJeff Bonwick 		(void) spa_vdev_state_exit(spa, NULL, 0);
5059e14bb325SJeff Bonwick 	}
5060e14bb325SJeff Bonwick 
5061573ca77eSGeorge Wilson 	if ((tasks & SPA_ASYNC_AUTOEXPAND) && !spa_suspended(spa)) {
5062573ca77eSGeorge Wilson 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
5063573ca77eSGeorge Wilson 		spa_async_autoexpand(spa, spa->spa_root_vdev);
5064573ca77eSGeorge Wilson 		spa_config_exit(spa, SCL_CONFIG, FTAG);
5065573ca77eSGeorge Wilson 	}
5066573ca77eSGeorge Wilson 
5067e14bb325SJeff Bonwick 	/*
5068e14bb325SJeff Bonwick 	 * See if any devices need to be probed.
5069e14bb325SJeff Bonwick 	 */
5070e14bb325SJeff Bonwick 	if (tasks & SPA_ASYNC_PROBE) {
50718f18d1faSGeorge Wilson 		spa_vdev_state_enter(spa, SCL_NONE);
5072e14bb325SJeff Bonwick 		spa_async_probe(spa, spa->spa_root_vdev);
5073e14bb325SJeff Bonwick 		(void) spa_vdev_state_exit(spa, NULL, 0);
50743d7072f8Seschrock 	}
5075ea8dc4b6Seschrock 
5076ea8dc4b6Seschrock 	/*
5077ea8dc4b6Seschrock 	 * If any devices are done replacing, detach them.
5078ea8dc4b6Seschrock 	 */
50793d7072f8Seschrock 	if (tasks & SPA_ASYNC_RESILVER_DONE)
50803d7072f8Seschrock 		spa_vdev_resilver_done(spa);
5081fa9e4066Sahrens 
5082ea8dc4b6Seschrock 	/*
5083ea8dc4b6Seschrock 	 * Kick off a resilver.
5084ea8dc4b6Seschrock 	 */
5085088f3894Sahrens 	if (tasks & SPA_ASYNC_RESILVER)
50863f9d6ad7SLin Ling 		dsl_resilver_restart(spa->spa_dsl_pool, 0);
5087ea8dc4b6Seschrock 
5088ea8dc4b6Seschrock 	/*
5089ea8dc4b6Seschrock 	 * Let the world know that we're done.
5090ea8dc4b6Seschrock 	 */
5091ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
5092ea8dc4b6Seschrock 	spa->spa_async_thread = NULL;
5093ea8dc4b6Seschrock 	cv_broadcast(&spa->spa_async_cv);
5094ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
5095ea8dc4b6Seschrock 	thread_exit();
5096ea8dc4b6Seschrock }
5097ea8dc4b6Seschrock 
5098ea8dc4b6Seschrock void
5099ea8dc4b6Seschrock spa_async_suspend(spa_t *spa)
5100ea8dc4b6Seschrock {
5101ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
5102ea8dc4b6Seschrock 	spa->spa_async_suspended++;
5103ea8dc4b6Seschrock 	while (spa->spa_async_thread != NULL)
5104ea8dc4b6Seschrock 		cv_wait(&spa->spa_async_cv, &spa->spa_async_lock);
5105ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
5106ea8dc4b6Seschrock }
5107ea8dc4b6Seschrock 
5108ea8dc4b6Seschrock void
5109ea8dc4b6Seschrock spa_async_resume(spa_t *spa)
5110ea8dc4b6Seschrock {
5111ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
5112ea8dc4b6Seschrock 	ASSERT(spa->spa_async_suspended != 0);
5113ea8dc4b6Seschrock 	spa->spa_async_suspended--;
5114ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
5115ea8dc4b6Seschrock }
5116ea8dc4b6Seschrock 
5117ea8dc4b6Seschrock static void
5118ea8dc4b6Seschrock spa_async_dispatch(spa_t *spa)
5119ea8dc4b6Seschrock {
5120ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
5121ea8dc4b6Seschrock 	if (spa->spa_async_tasks && !spa->spa_async_suspended &&
51220373e76bSbonwick 	    spa->spa_async_thread == NULL &&
51230373e76bSbonwick 	    rootdir != NULL && !vn_is_readonly(rootdir))
5124ea8dc4b6Seschrock 		spa->spa_async_thread = thread_create(NULL, 0,
5125ea8dc4b6Seschrock 		    spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
5126ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
5127ea8dc4b6Seschrock }
5128ea8dc4b6Seschrock 
5129ea8dc4b6Seschrock void
5130ea8dc4b6Seschrock spa_async_request(spa_t *spa, int task)
5131ea8dc4b6Seschrock {
51323f9d6ad7SLin Ling 	zfs_dbgmsg("spa=%s async request task=%u", spa->spa_name, task);
5133ea8dc4b6Seschrock 	mutex_enter(&spa->spa_async_lock);
5134ea8dc4b6Seschrock 	spa->spa_async_tasks |= task;
5135ea8dc4b6Seschrock 	mutex_exit(&spa->spa_async_lock);
5136fa9e4066Sahrens }
5137fa9e4066Sahrens 
5138fa9e4066Sahrens /*
5139fa9e4066Sahrens  * ==========================================================================
5140fa9e4066Sahrens  * SPA syncing routines
5141fa9e4066Sahrens  * ==========================================================================
5142fa9e4066Sahrens  */
5143fa9e4066Sahrens 
5144cde58dbcSMatthew Ahrens static int
5145cde58dbcSMatthew Ahrens bpobj_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
5146cde58dbcSMatthew Ahrens {
5147cde58dbcSMatthew Ahrens 	bpobj_t *bpo = arg;
5148cde58dbcSMatthew Ahrens 	bpobj_enqueue(bpo, bp, tx);
5149cde58dbcSMatthew Ahrens 	return (0);
5150b24ab676SJeff Bonwick }
5151b24ab676SJeff Bonwick 
5152cde58dbcSMatthew Ahrens static int
5153cde58dbcSMatthew Ahrens spa_free_sync_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
5154b24ab676SJeff Bonwick {
5155b24ab676SJeff Bonwick 	zio_t *zio = arg;
5156b24ab676SJeff Bonwick 
5157b24ab676SJeff Bonwick 	zio_nowait(zio_free_sync(zio, zio->io_spa, dmu_tx_get_txg(tx), bp,
5158b24ab676SJeff Bonwick 	    zio->io_flags));
5159cde58dbcSMatthew Ahrens 	return (0);
5160fa9e4066Sahrens }
5161fa9e4066Sahrens 
5162fa9e4066Sahrens static void
516399653d4eSeschrock spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
5164fa9e4066Sahrens {
5165fa9e4066Sahrens 	char *packed = NULL;
5166f7991ba4STim Haley 	size_t bufsize;
5167fa9e4066Sahrens 	size_t nvsize = 0;
5168fa9e4066Sahrens 	dmu_buf_t *db;
5169fa9e4066Sahrens 
517099653d4eSeschrock 	VERIFY(nvlist_size(nv, &nvsize, NV_ENCODE_XDR) == 0);
5171fa9e4066Sahrens 
5172f7991ba4STim Haley 	/*
5173f7991ba4STim Haley 	 * Write full (SPA_CONFIG_BLOCKSIZE) blocks of configuration
5174f7991ba4STim Haley 	 * information.  This avoids the dbuf_will_dirty() path and
5175f7991ba4STim Haley 	 * saves us a pre-read to get data we don't actually care about.
5176f7991ba4STim Haley 	 */
5177f7991ba4STim Haley 	bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
5178f7991ba4STim Haley 	packed = kmem_alloc(bufsize, KM_SLEEP);
5179fa9e4066Sahrens 
518099653d4eSeschrock 	VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
5181ea8dc4b6Seschrock 	    KM_SLEEP) == 0);
5182f7991ba4STim Haley 	bzero(packed + nvsize, bufsize - nvsize);
5183fa9e4066Sahrens 
5184f7991ba4STim Haley 	dmu_write(spa->spa_meta_objset, obj, 0, bufsize, packed, tx);
5185fa9e4066Sahrens 
5186f7991ba4STim Haley 	kmem_free(packed, bufsize);
5187fa9e4066Sahrens 
518899653d4eSeschrock 	VERIFY(0 == dmu_bonus_hold(spa->spa_meta_objset, obj, FTAG, &db));
5189fa9e4066Sahrens 	dmu_buf_will_dirty(db, tx);
5190fa9e4066Sahrens 	*(uint64_t *)db->db_data = nvsize;
5191ea8dc4b6Seschrock 	dmu_buf_rele(db, FTAG);
5192fa9e4066Sahrens }
5193fa9e4066Sahrens 
519499653d4eSeschrock static void
5195fa94a07fSbrendan spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx,
5196fa94a07fSbrendan     const char *config, const char *entry)
519799653d4eSeschrock {
519899653d4eSeschrock 	nvlist_t *nvroot;
5199fa94a07fSbrendan 	nvlist_t **list;
520099653d4eSeschrock 	int i;
520199653d4eSeschrock 
5202fa94a07fSbrendan 	if (!sav->sav_sync)
520399653d4eSeschrock 		return;
520499653d4eSeschrock 
520599653d4eSeschrock 	/*
5206fa94a07fSbrendan 	 * Update the MOS nvlist describing the list of available devices.
5207fa94a07fSbrendan 	 * spa_validate_aux() will have already made sure this nvlist is
52083d7072f8Seschrock 	 * valid and the vdevs are labeled appropriately.
520999653d4eSeschrock 	 */
5210fa94a07fSbrendan 	if (sav->sav_object == 0) {
5211fa94a07fSbrendan 		sav->sav_object = dmu_object_alloc(spa->spa_meta_objset,
5212fa94a07fSbrendan 		    DMU_OT_PACKED_NVLIST, 1 << 14, DMU_OT_PACKED_NVLIST_SIZE,
5213fa94a07fSbrendan 		    sizeof (uint64_t), tx);
521499653d4eSeschrock 		VERIFY(zap_update(spa->spa_meta_objset,
5215fa94a07fSbrendan 		    DMU_POOL_DIRECTORY_OBJECT, entry, sizeof (uint64_t), 1,
5216fa94a07fSbrendan 		    &sav->sav_object, tx) == 0);
521799653d4eSeschrock 	}
521899653d4eSeschrock 
521999653d4eSeschrock 	VERIFY(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, KM_SLEEP) == 0);
5220fa94a07fSbrendan 	if (sav->sav_count == 0) {
5221fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(nvroot, config, NULL, 0) == 0);
522299653d4eSeschrock 	} else {
5223fa94a07fSbrendan 		list = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
5224fa94a07fSbrendan 		for (i = 0; i < sav->sav_count; i++)
5225fa94a07fSbrendan 			list[i] = vdev_config_generate(spa, sav->sav_vdevs[i],
52263f9d6ad7SLin Ling 			    B_FALSE, VDEV_CONFIG_L2CACHE);
5227fa94a07fSbrendan 		VERIFY(nvlist_add_nvlist_array(nvroot, config, list,
5228fa94a07fSbrendan 		    sav->sav_count) == 0);
5229fa94a07fSbrendan 		for (i = 0; i < sav->sav_count; i++)
5230fa94a07fSbrendan 			nvlist_free(list[i]);
5231fa94a07fSbrendan 		kmem_free(list, sav->sav_count * sizeof (void *));
523299653d4eSeschrock 	}
523399653d4eSeschrock 
5234fa94a07fSbrendan 	spa_sync_nvlist(spa, sav->sav_object, nvroot, tx);
523506eeb2adSek 	nvlist_free(nvroot);
523699653d4eSeschrock 
5237fa94a07fSbrendan 	sav->sav_sync = B_FALSE;
523899653d4eSeschrock }
523999653d4eSeschrock 
524099653d4eSeschrock static void
524199653d4eSeschrock spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
524299653d4eSeschrock {
524399653d4eSeschrock 	nvlist_t *config;
524499653d4eSeschrock 
5245e14bb325SJeff Bonwick 	if (list_is_empty(&spa->spa_config_dirty_list))
524699653d4eSeschrock 		return;
524799653d4eSeschrock 
5248e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
5249e14bb325SJeff Bonwick 
5250e14bb325SJeff Bonwick 	config = spa_config_generate(spa, spa->spa_root_vdev,
5251e14bb325SJeff Bonwick 	    dmu_tx_get_txg(tx), B_FALSE);
5252e14bb325SJeff Bonwick 
5253e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_STATE, FTAG);
525499653d4eSeschrock 
525599653d4eSeschrock 	if (spa->spa_config_syncing)
525699653d4eSeschrock 		nvlist_free(spa->spa_config_syncing);
525799653d4eSeschrock 	spa->spa_config_syncing = config;
525899653d4eSeschrock 
525999653d4eSeschrock 	spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
526099653d4eSeschrock }
526199653d4eSeschrock 
5262990b4856Slling /*
5263990b4856Slling  * Set zpool properties.
5264990b4856Slling  */
5265b1b8ab34Slling static void
52663f9d6ad7SLin Ling spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
5267b1b8ab34Slling {
5268b1b8ab34Slling 	spa_t *spa = arg1;
5269b1b8ab34Slling 	objset_t *mos = spa->spa_meta_objset;
5270990b4856Slling 	nvlist_t *nvp = arg2;
5271990b4856Slling 	nvpair_t *elem;
52723d7072f8Seschrock 	uint64_t intval;
5273c5904d13Seschrock 	char *strval;
5274990b4856Slling 	zpool_prop_t prop;
5275990b4856Slling 	const char *propname;
5276990b4856Slling 	zprop_type_t proptype;
5277b1b8ab34Slling 
5278e14bb325SJeff Bonwick 	mutex_enter(&spa->spa_props_lock);
5279e14bb325SJeff Bonwick 
5280990b4856Slling 	elem = NULL;
5281990b4856Slling 	while ((elem = nvlist_next_nvpair(nvp, elem))) {
5282990b4856Slling 		switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
5283990b4856Slling 		case ZPOOL_PROP_VERSION:
5284990b4856Slling 			/*
5285990b4856Slling 			 * Only set version for non-zpool-creation cases
5286990b4856Slling 			 * (set/import). spa_create() needs special care
5287990b4856Slling 			 * for version setting.
5288990b4856Slling 			 */
5289990b4856Slling 			if (tx->tx_txg != TXG_INITIAL) {
5290990b4856Slling 				VERIFY(nvpair_value_uint64(elem,
5291990b4856Slling 				    &intval) == 0);
5292990b4856Slling 				ASSERT(intval <= SPA_VERSION);
5293990b4856Slling 				ASSERT(intval >= spa_version(spa));
5294990b4856Slling 				spa->spa_uberblock.ub_version = intval;
5295990b4856Slling 				vdev_config_dirty(spa->spa_root_vdev);
5296990b4856Slling 			}
5297ecd6cf80Smarks 			break;
5298990b4856Slling 
5299990b4856Slling 		case ZPOOL_PROP_ALTROOT:
5300990b4856Slling 			/*
5301990b4856Slling 			 * 'altroot' is a non-persistent property. It should
5302990b4856Slling 			 * have been set temporarily at creation or import time.
5303990b4856Slling 			 */
5304990b4856Slling 			ASSERT(spa->spa_root != NULL);
5305b1b8ab34Slling 			break;
53063d7072f8Seschrock 
53072f8aaab3Seschrock 		case ZPOOL_PROP_CACHEFILE:
5308990b4856Slling 			/*
5309379c004dSEric Schrock 			 * 'cachefile' is also a non-persisitent property.
5310990b4856Slling 			 */
53113d7072f8Seschrock 			break;
5312990b4856Slling 		default:
5313990b4856Slling 			/*
5314990b4856Slling 			 * Set pool property values in the poolprops mos object.
5315990b4856Slling 			 */
5316990b4856Slling 			if (spa->spa_pool_props_object == 0) {
5317990b4856Slling 				VERIFY((spa->spa_pool_props_object =
5318990b4856Slling 				    zap_create(mos, DMU_OT_POOL_PROPS,
5319990b4856Slling 				    DMU_OT_NONE, 0, tx)) > 0);
5320990b4856Slling 
5321990b4856Slling 				VERIFY(zap_update(mos,
5322990b4856Slling 				    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
5323990b4856Slling 				    8, 1, &spa->spa_pool_props_object, tx)
5324990b4856Slling 				    == 0);
5325990b4856Slling 			}
5326990b4856Slling 
5327990b4856Slling 			/* normalize the property name */
5328990b4856Slling 			propname = zpool_prop_to_name(prop);
5329990b4856Slling 			proptype = zpool_prop_get_type(prop);
5330990b4856Slling 
5331990b4856Slling 			if (nvpair_type(elem) == DATA_TYPE_STRING) {
5332990b4856Slling 				ASSERT(proptype == PROP_TYPE_STRING);
5333990b4856Slling 				VERIFY(nvpair_value_string(elem, &strval) == 0);
5334990b4856Slling 				VERIFY(zap_update(mos,
5335990b4856Slling 				    spa->spa_pool_props_object, propname,
5336990b4856Slling 				    1, strlen(strval) + 1, strval, tx) == 0);
5337990b4856Slling 
5338990b4856Slling 			} else if (nvpair_type(elem) == DATA_TYPE_UINT64) {
5339990b4856Slling 				VERIFY(nvpair_value_uint64(elem, &intval) == 0);
5340990b4856Slling 
5341990b4856Slling 				if (proptype == PROP_TYPE_INDEX) {
5342990b4856Slling 					const char *unused;
5343990b4856Slling 					VERIFY(zpool_prop_index_to_string(
5344990b4856Slling 					    prop, intval, &unused) == 0);
5345990b4856Slling 				}
5346990b4856Slling 				VERIFY(zap_update(mos,
5347990b4856Slling 				    spa->spa_pool_props_object, propname,
5348990b4856Slling 				    8, 1, &intval, tx) == 0);
5349990b4856Slling 			} else {
5350990b4856Slling 				ASSERT(0); /* not allowed */
5351990b4856Slling 			}
5352990b4856Slling 
53530a4e9518Sgw 			switch (prop) {
53540a4e9518Sgw 			case ZPOOL_PROP_DELEGATION:
5355990b4856Slling 				spa->spa_delegation = intval;
53560a4e9518Sgw 				break;
53570a4e9518Sgw 			case ZPOOL_PROP_BOOTFS:
5358990b4856Slling 				spa->spa_bootfs = intval;
53590a4e9518Sgw 				break;
53600a4e9518Sgw 			case ZPOOL_PROP_FAILUREMODE:
53610a4e9518Sgw 				spa->spa_failmode = intval;
53620a4e9518Sgw 				break;
5363573ca77eSGeorge Wilson 			case ZPOOL_PROP_AUTOEXPAND:
5364573ca77eSGeorge Wilson 				spa->spa_autoexpand = intval;
5365b98131cfSEric Taylor 				if (tx->tx_txg != TXG_INITIAL)
5366b98131cfSEric Taylor 					spa_async_request(spa,
5367b98131cfSEric Taylor 					    SPA_ASYNC_AUTOEXPAND);
5368573ca77eSGeorge Wilson 				break;
5369b24ab676SJeff Bonwick 			case ZPOOL_PROP_DEDUPDITTO:
5370b24ab676SJeff Bonwick 				spa->spa_dedup_ditto = intval;
5371b24ab676SJeff Bonwick 				break;
53720a4e9518Sgw 			default:
53730a4e9518Sgw 				break;
53740a4e9518Sgw 			}
5375990b4856Slling 		}
5376990b4856Slling 
5377990b4856Slling 		/* log internal history if this is not a zpool create */
5378990b4856Slling 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY &&
5379990b4856Slling 		    tx->tx_txg != TXG_INITIAL) {
53803f9d6ad7SLin Ling 			spa_history_log_internal(LOG_POOL_PROPSET,
53813f9d6ad7SLin Ling 			    spa, tx, "%s %lld %s",
5382e14bb325SJeff Bonwick 			    nvpair_name(elem), intval, spa_name(spa));
5383b1b8ab34Slling 		}
5384b1b8ab34Slling 	}
5385e14bb325SJeff Bonwick 
5386e14bb325SJeff Bonwick 	mutex_exit(&spa->spa_props_lock);
5387b1b8ab34Slling }
5388b1b8ab34Slling 
5389cde58dbcSMatthew Ahrens /*
5390cde58dbcSMatthew Ahrens  * Perform one-time upgrade on-disk changes.  spa_version() does not
5391cde58dbcSMatthew Ahrens  * reflect the new version this txg, so there must be no changes this
5392cde58dbcSMatthew Ahrens  * txg to anything that the upgrade code depends on after it executes.
5393cde58dbcSMatthew Ahrens  * Therefore this must be called after dsl_pool_sync() does the sync
5394cde58dbcSMatthew Ahrens  * tasks.
5395cde58dbcSMatthew Ahrens  */
5396cde58dbcSMatthew Ahrens static void
5397cde58dbcSMatthew Ahrens spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
5398cde58dbcSMatthew Ahrens {
5399cde58dbcSMatthew Ahrens 	dsl_pool_t *dp = spa->spa_dsl_pool;
5400cde58dbcSMatthew Ahrens 
5401cde58dbcSMatthew Ahrens 	ASSERT(spa->spa_sync_pass == 1);
5402cde58dbcSMatthew Ahrens 
5403cde58dbcSMatthew Ahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_ORIGIN &&
5404cde58dbcSMatthew Ahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_ORIGIN) {
5405cde58dbcSMatthew Ahrens 		dsl_pool_create_origin(dp, tx);
5406cde58dbcSMatthew Ahrens 
5407cde58dbcSMatthew Ahrens 		/* Keeping the origin open increases spa_minref */
5408cde58dbcSMatthew Ahrens 		spa->spa_minref += 3;
5409cde58dbcSMatthew Ahrens 	}
5410cde58dbcSMatthew Ahrens 
5411cde58dbcSMatthew Ahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_NEXT_CLONES &&
5412cde58dbcSMatthew Ahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_NEXT_CLONES) {
5413cde58dbcSMatthew Ahrens 		dsl_pool_upgrade_clones(dp, tx);
5414cde58dbcSMatthew Ahrens 	}
5415cde58dbcSMatthew Ahrens 
5416cde58dbcSMatthew Ahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_DIR_CLONES &&
5417cde58dbcSMatthew Ahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_DIR_CLONES) {
5418cde58dbcSMatthew Ahrens 		dsl_pool_upgrade_dir_clones(dp, tx);
5419cde58dbcSMatthew Ahrens 
5420cde58dbcSMatthew Ahrens 		/* Keeping the freedir open increases spa_minref */
5421cde58dbcSMatthew Ahrens 		spa->spa_minref += 3;
5422cde58dbcSMatthew Ahrens 	}
5423cde58dbcSMatthew Ahrens }
5424cde58dbcSMatthew Ahrens 
5425fa9e4066Sahrens /*
5426fa9e4066Sahrens  * Sync the specified transaction group.  New blocks may be dirtied as
5427fa9e4066Sahrens  * part of the process, so we iterate until it converges.
5428fa9e4066Sahrens  */
5429fa9e4066Sahrens void
5430fa9e4066Sahrens spa_sync(spa_t *spa, uint64_t txg)
5431fa9e4066Sahrens {
5432fa9e4066Sahrens 	dsl_pool_t *dp = spa->spa_dsl_pool;
5433fa9e4066Sahrens 	objset_t *mos = spa->spa_meta_objset;
5434cde58dbcSMatthew Ahrens 	bpobj_t *defer_bpo = &spa->spa_deferred_bpobj;
5435b24ab676SJeff Bonwick 	bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
54360373e76bSbonwick 	vdev_t *rvd = spa->spa_root_vdev;
5437fa9e4066Sahrens 	vdev_t *vd;
5438fa9e4066Sahrens 	dmu_tx_t *tx;
5439e14bb325SJeff Bonwick 	int error;
5440fa9e4066Sahrens 
5441fa9e4066Sahrens 	/*
5442fa9e4066Sahrens 	 * Lock out configuration changes.
5443fa9e4066Sahrens 	 */
5444e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
5445fa9e4066Sahrens 
5446fa9e4066Sahrens 	spa->spa_syncing_txg = txg;
5447fa9e4066Sahrens 	spa->spa_sync_pass = 0;
5448fa9e4066Sahrens 
5449e14bb325SJeff Bonwick 	/*
5450e14bb325SJeff Bonwick 	 * If there are any pending vdev state changes, convert them
5451e14bb325SJeff Bonwick 	 * into config changes that go out with this transaction group.
5452e14bb325SJeff Bonwick 	 */
5453e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
54548ad4d6ddSJeff Bonwick 	while (list_head(&spa->spa_state_dirty_list) != NULL) {
54558ad4d6ddSJeff Bonwick 		/*
54568ad4d6ddSJeff Bonwick 		 * We need the write lock here because, for aux vdevs,
54578ad4d6ddSJeff Bonwick 		 * calling vdev_config_dirty() modifies sav_config.
54588ad4d6ddSJeff Bonwick 		 * This is ugly and will become unnecessary when we
54598ad4d6ddSJeff Bonwick 		 * eliminate the aux vdev wart by integrating all vdevs
54608ad4d6ddSJeff Bonwick 		 * into the root vdev tree.
54618ad4d6ddSJeff Bonwick 		 */
54628ad4d6ddSJeff Bonwick 		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
54638ad4d6ddSJeff Bonwick 		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_WRITER);
54648ad4d6ddSJeff Bonwick 		while ((vd = list_head(&spa->spa_state_dirty_list)) != NULL) {
54658ad4d6ddSJeff Bonwick 			vdev_state_clean(vd);
54668ad4d6ddSJeff Bonwick 			vdev_config_dirty(vd);
54678ad4d6ddSJeff Bonwick 		}
54688ad4d6ddSJeff Bonwick 		spa_config_exit(spa, SCL_CONFIG | SCL_STATE, FTAG);
54698ad4d6ddSJeff Bonwick 		spa_config_enter(spa, SCL_CONFIG | SCL_STATE, FTAG, RW_READER);
5470e14bb325SJeff Bonwick 	}
5471e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_STATE, FTAG);
5472e14bb325SJeff Bonwick 
547399653d4eSeschrock 	tx = dmu_tx_create_assigned(dp, txg);
547499653d4eSeschrock 
547599653d4eSeschrock 	/*
5476e7437265Sahrens 	 * If we are upgrading to SPA_VERSION_RAIDZ_DEFLATE this txg,
547799653d4eSeschrock 	 * set spa_deflate if we have no raid-z vdevs.
547899653d4eSeschrock 	 */
5479e7437265Sahrens 	if (spa->spa_ubsync.ub_version < SPA_VERSION_RAIDZ_DEFLATE &&
5480e7437265Sahrens 	    spa->spa_uberblock.ub_version >= SPA_VERSION_RAIDZ_DEFLATE) {
548199653d4eSeschrock 		int i;
548299653d4eSeschrock 
548399653d4eSeschrock 		for (i = 0; i < rvd->vdev_children; i++) {
548499653d4eSeschrock 			vd = rvd->vdev_child[i];
548599653d4eSeschrock 			if (vd->vdev_deflate_ratio != SPA_MINBLOCKSIZE)
548699653d4eSeschrock 				break;
548799653d4eSeschrock 		}
548899653d4eSeschrock 		if (i == rvd->vdev_children) {
548999653d4eSeschrock 			spa->spa_deflate = TRUE;
549099653d4eSeschrock 			VERIFY(0 == zap_add(spa->spa_meta_objset,
549199653d4eSeschrock 			    DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_DEFLATE,
549299653d4eSeschrock 			    sizeof (uint64_t), 1, &spa->spa_deflate, tx));
549399653d4eSeschrock 		}
549499653d4eSeschrock 	}
549599653d4eSeschrock 
5496fa9e4066Sahrens 	/*
54973f9d6ad7SLin Ling 	 * If anything has changed in this txg, or if someone is waiting
54983f9d6ad7SLin Ling 	 * for this txg to sync (eg, spa_vdev_remove()), push the
54993f9d6ad7SLin Ling 	 * deferred frees from the previous txg.  If not, leave them
55003f9d6ad7SLin Ling 	 * alone so that we don't generate work on an otherwise idle
55013f9d6ad7SLin Ling 	 * system.
5502fa9e4066Sahrens 	 */
5503fa9e4066Sahrens 	if (!txg_list_empty(&dp->dp_dirty_datasets, txg) ||
55041615a317Sek 	    !txg_list_empty(&dp->dp_dirty_dirs, txg) ||
55053f9d6ad7SLin Ling 	    !txg_list_empty(&dp->dp_sync_tasks, txg) ||
5506cde58dbcSMatthew Ahrens 	    ((dsl_scan_active(dp->dp_scan) ||
5507cde58dbcSMatthew Ahrens 	    txg_sync_waiting(dp)) && !spa_shutting_down(spa))) {
5508cde58dbcSMatthew Ahrens 		zio_t *zio = zio_root(spa, NULL, NULL, 0);
5509cde58dbcSMatthew Ahrens 		VERIFY3U(bpobj_iterate(defer_bpo,
5510cde58dbcSMatthew Ahrens 		    spa_free_sync_cb, zio, tx), ==, 0);
5511cde58dbcSMatthew Ahrens 		VERIFY3U(zio_wait(zio), ==, 0);
5512cde58dbcSMatthew Ahrens 	}
5513fa9e4066Sahrens 
5514fa9e4066Sahrens 	/*
5515fa9e4066Sahrens 	 * Iterate to convergence.
5516fa9e4066Sahrens 	 */
5517fa9e4066Sahrens 	do {
5518b24ab676SJeff Bonwick 		int pass = ++spa->spa_sync_pass;
5519fa9e4066Sahrens 
5520fa9e4066Sahrens 		spa_sync_config_object(spa, tx);
5521fa94a07fSbrendan 		spa_sync_aux_dev(spa, &spa->spa_spares, tx,
5522fa94a07fSbrendan 		    ZPOOL_CONFIG_SPARES, DMU_POOL_SPARES);
5523fa94a07fSbrendan 		spa_sync_aux_dev(spa, &spa->spa_l2cache, tx,
5524fa94a07fSbrendan 		    ZPOOL_CONFIG_L2CACHE, DMU_POOL_L2CACHE);
5525ea8dc4b6Seschrock 		spa_errlog_sync(spa, txg);
5526fa9e4066Sahrens 		dsl_pool_sync(dp, txg);
5527fa9e4066Sahrens 
5528b24ab676SJeff Bonwick 		if (pass <= SYNC_PASS_DEFERRED_FREE) {
5529b24ab676SJeff Bonwick 			zio_t *zio = zio_root(spa, NULL, NULL, 0);
5530cde58dbcSMatthew Ahrens 			bplist_iterate(free_bpl, spa_free_sync_cb,
5531cde58dbcSMatthew Ahrens 			    zio, tx);
5532b24ab676SJeff Bonwick 			VERIFY(zio_wait(zio) == 0);
5533b24ab676SJeff Bonwick 		} else {
5534cde58dbcSMatthew Ahrens 			bplist_iterate(free_bpl, bpobj_enqueue_cb,
5535cde58dbcSMatthew Ahrens 			    defer_bpo, tx);
5536fa9e4066Sahrens 		}
5537fa9e4066Sahrens 
5538b24ab676SJeff Bonwick 		ddt_sync(spa, txg);
55393f9d6ad7SLin Ling 		dsl_scan_sync(dp, tx);
5540afee20e4SGeorge Wilson 
5541b24ab676SJeff Bonwick 		while (vd = txg_list_remove(&spa->spa_vdev_txg_list, txg))
5542b24ab676SJeff Bonwick 			vdev_sync(vd, txg);
5543b24ab676SJeff Bonwick 
5544cde58dbcSMatthew Ahrens 		if (pass == 1)
5545cde58dbcSMatthew Ahrens 			spa_sync_upgrades(spa, tx);
5546fa9e4066Sahrens 
5547cde58dbcSMatthew Ahrens 	} while (dmu_objset_is_dirty(mos, txg));
5548fa9e4066Sahrens 
5549fa9e4066Sahrens 	/*
5550fa9e4066Sahrens 	 * Rewrite the vdev configuration (which includes the uberblock)
5551fa9e4066Sahrens 	 * to commit the transaction group.
55520373e76bSbonwick 	 *
555317f17c2dSbonwick 	 * If there are no dirty vdevs, we sync the uberblock to a few
555417f17c2dSbonwick 	 * random top-level vdevs that are known to be visible in the
5555e14bb325SJeff Bonwick 	 * config cache (see spa_vdev_add() for a complete description).
5556e14bb325SJeff Bonwick 	 * If there *are* dirty vdevs, sync the uberblock to all vdevs.
55570373e76bSbonwick 	 */
5558e14bb325SJeff Bonwick 	for (;;) {
5559e14bb325SJeff Bonwick 		/*
5560e14bb325SJeff Bonwick 		 * We hold SCL_STATE to prevent vdev open/close/etc.
5561e14bb325SJeff Bonwick 		 * while we're attempting to write the vdev labels.
5562e14bb325SJeff Bonwick 		 */
5563e14bb325SJeff Bonwick 		spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
5564e14bb325SJeff Bonwick 
5565e14bb325SJeff Bonwick 		if (list_is_empty(&spa->spa_config_dirty_list)) {
5566e14bb325SJeff Bonwick 			vdev_t *svd[SPA_DVAS_PER_BP];
5567e14bb325SJeff Bonwick 			int svdcount = 0;
5568e14bb325SJeff Bonwick 			int children = rvd->vdev_children;
5569e14bb325SJeff Bonwick 			int c0 = spa_get_random(children);
5570e14bb325SJeff Bonwick 
5571573ca77eSGeorge Wilson 			for (int c = 0; c < children; c++) {
5572e14bb325SJeff Bonwick 				vd = rvd->vdev_child[(c0 + c) % children];
5573e14bb325SJeff Bonwick 				if (vd->vdev_ms_array == 0 || vd->vdev_islog)
5574e14bb325SJeff Bonwick 					continue;
5575e14bb325SJeff Bonwick 				svd[svdcount++] = vd;
5576e14bb325SJeff Bonwick 				if (svdcount == SPA_DVAS_PER_BP)
5577e14bb325SJeff Bonwick 					break;
5578e14bb325SJeff Bonwick 			}
55798956713aSEric Schrock 			error = vdev_config_sync(svd, svdcount, txg, B_FALSE);
55808956713aSEric Schrock 			if (error != 0)
55818956713aSEric Schrock 				error = vdev_config_sync(svd, svdcount, txg,
55828956713aSEric Schrock 				    B_TRUE);
5583e14bb325SJeff Bonwick 		} else {
5584e14bb325SJeff Bonwick 			error = vdev_config_sync(rvd->vdev_child,
55858956713aSEric Schrock 			    rvd->vdev_children, txg, B_FALSE);
55868956713aSEric Schrock 			if (error != 0)
55878956713aSEric Schrock 				error = vdev_config_sync(rvd->vdev_child,
55888956713aSEric Schrock 				    rvd->vdev_children, txg, B_TRUE);
55890373e76bSbonwick 		}
5590e14bb325SJeff Bonwick 
5591e14bb325SJeff Bonwick 		spa_config_exit(spa, SCL_STATE, FTAG);
5592e14bb325SJeff Bonwick 
5593e14bb325SJeff Bonwick 		if (error == 0)
5594e14bb325SJeff Bonwick 			break;
5595e14bb325SJeff Bonwick 		zio_suspend(spa, NULL);
5596e14bb325SJeff Bonwick 		zio_resume_wait(spa);
55970373e76bSbonwick 	}
559899653d4eSeschrock 	dmu_tx_commit(tx);
559999653d4eSeschrock 
56000373e76bSbonwick 	/*
56010373e76bSbonwick 	 * Clear the dirty config list.
5602fa9e4066Sahrens 	 */
5603e14bb325SJeff Bonwick 	while ((vd = list_head(&spa->spa_config_dirty_list)) != NULL)
56040373e76bSbonwick 		vdev_config_clean(vd);
56050373e76bSbonwick 
56060373e76bSbonwick 	/*
56070373e76bSbonwick 	 * Now that the new config has synced transactionally,
56080373e76bSbonwick 	 * let it become visible to the config cache.
56090373e76bSbonwick 	 */
56100373e76bSbonwick 	if (spa->spa_config_syncing != NULL) {
56110373e76bSbonwick 		spa_config_set(spa, spa->spa_config_syncing);
56120373e76bSbonwick 		spa->spa_config_txg = txg;
56130373e76bSbonwick 		spa->spa_config_syncing = NULL;
56140373e76bSbonwick 	}
5615fa9e4066Sahrens 
5616fa9e4066Sahrens 	spa->spa_ubsync = spa->spa_uberblock;
5617fa9e4066Sahrens 
5618b24ab676SJeff Bonwick 	dsl_pool_sync_done(dp, txg);
5619fa9e4066Sahrens 
5620fa9e4066Sahrens 	/*
5621fa9e4066Sahrens 	 * Update usable space statistics.
5622fa9e4066Sahrens 	 */
5623fa9e4066Sahrens 	while (vd = txg_list_remove(&spa->spa_vdev_txg_list, TXG_CLEAN(txg)))
5624fa9e4066Sahrens 		vdev_sync_done(vd, txg);
5625fa9e4066Sahrens 
5626485bbbf5SGeorge Wilson 	spa_update_dspace(spa);
5627485bbbf5SGeorge Wilson 
5628fa9e4066Sahrens 	/*
5629fa9e4066Sahrens 	 * It had better be the case that we didn't dirty anything
563099653d4eSeschrock 	 * since vdev_config_sync().
5631fa9e4066Sahrens 	 */
5632fa9e4066Sahrens 	ASSERT(txg_list_empty(&dp->dp_dirty_datasets, txg));
5633fa9e4066Sahrens 	ASSERT(txg_list_empty(&dp->dp_dirty_dirs, txg));
5634fa9e4066Sahrens 	ASSERT(txg_list_empty(&spa->spa_vdev_txg_list, txg));
5635b24ab676SJeff Bonwick 
5636b24ab676SJeff Bonwick 	spa->spa_sync_pass = 0;
5637fa9e4066Sahrens 
5638e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_CONFIG, FTAG);
5639ea8dc4b6Seschrock 
5640468c413aSTim Haley 	spa_handle_ignored_writes(spa);
5641468c413aSTim Haley 
5642ea8dc4b6Seschrock 	/*
5643ea8dc4b6Seschrock 	 * If any async tasks have been requested, kick them off.
5644ea8dc4b6Seschrock 	 */
5645ea8dc4b6Seschrock 	spa_async_dispatch(spa);
5646fa9e4066Sahrens }
5647fa9e4066Sahrens 
5648fa9e4066Sahrens /*
5649fa9e4066Sahrens  * Sync all pools.  We don't want to hold the namespace lock across these
5650fa9e4066Sahrens  * operations, so we take a reference on the spa_t and drop the lock during the
5651fa9e4066Sahrens  * sync.
5652fa9e4066Sahrens  */
5653fa9e4066Sahrens void
5654fa9e4066Sahrens spa_sync_allpools(void)
5655fa9e4066Sahrens {
5656fa9e4066Sahrens 	spa_t *spa = NULL;
5657fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
5658fa9e4066Sahrens 	while ((spa = spa_next(spa)) != NULL) {
5659e14bb325SJeff Bonwick 		if (spa_state(spa) != POOL_STATE_ACTIVE || spa_suspended(spa))
5660fa9e4066Sahrens 			continue;
5661fa9e4066Sahrens 		spa_open_ref(spa, FTAG);
5662fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
5663fa9e4066Sahrens 		txg_wait_synced(spa_get_dsl(spa), 0);
5664fa9e4066Sahrens 		mutex_enter(&spa_namespace_lock);
5665fa9e4066Sahrens 		spa_close(spa, FTAG);
5666fa9e4066Sahrens 	}
5667fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
5668fa9e4066Sahrens }
5669fa9e4066Sahrens 
5670fa9e4066Sahrens /*
5671fa9e4066Sahrens  * ==========================================================================
5672fa9e4066Sahrens  * Miscellaneous routines
5673fa9e4066Sahrens  * ==========================================================================
5674fa9e4066Sahrens  */
5675fa9e4066Sahrens 
5676fa9e4066Sahrens /*
5677fa9e4066Sahrens  * Remove all pools in the system.
5678fa9e4066Sahrens  */
5679fa9e4066Sahrens void
5680fa9e4066Sahrens spa_evict_all(void)
5681fa9e4066Sahrens {
5682fa9e4066Sahrens 	spa_t *spa;
5683fa9e4066Sahrens 
5684fa9e4066Sahrens 	/*
5685fa9e4066Sahrens 	 * Remove all cached state.  All pools should be closed now,
5686fa9e4066Sahrens 	 * so every spa in the AVL tree should be unreferenced.
5687fa9e4066Sahrens 	 */
5688fa9e4066Sahrens 	mutex_enter(&spa_namespace_lock);
5689fa9e4066Sahrens 	while ((spa = spa_next(NULL)) != NULL) {
5690fa9e4066Sahrens 		/*
5691ea8dc4b6Seschrock 		 * Stop async tasks.  The async thread may need to detach
5692ea8dc4b6Seschrock 		 * a device that's been replaced, which requires grabbing
5693ea8dc4b6Seschrock 		 * spa_namespace_lock, so we must drop it here.
5694fa9e4066Sahrens 		 */
5695fa9e4066Sahrens 		spa_open_ref(spa, FTAG);
5696fa9e4066Sahrens 		mutex_exit(&spa_namespace_lock);
5697ea8dc4b6Seschrock 		spa_async_suspend(spa);
5698fa9e4066Sahrens 		mutex_enter(&spa_namespace_lock);
5699fa9e4066Sahrens 		spa_close(spa, FTAG);
5700fa9e4066Sahrens 
5701fa9e4066Sahrens 		if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
5702fa9e4066Sahrens 			spa_unload(spa);
5703fa9e4066Sahrens 			spa_deactivate(spa);
5704fa9e4066Sahrens 		}
5705fa9e4066Sahrens 		spa_remove(spa);
5706fa9e4066Sahrens 	}
5707fa9e4066Sahrens 	mutex_exit(&spa_namespace_lock);
5708fa9e4066Sahrens }
5709ea8dc4b6Seschrock 
5710ea8dc4b6Seschrock vdev_t *
57116809eb4eSEric Schrock spa_lookup_by_guid(spa_t *spa, uint64_t guid, boolean_t aux)
5712ea8dc4b6Seschrock {
5713c5904d13Seschrock 	vdev_t *vd;
5714c5904d13Seschrock 	int i;
5715c5904d13Seschrock 
5716c5904d13Seschrock 	if ((vd = vdev_lookup_by_guid(spa->spa_root_vdev, guid)) != NULL)
5717c5904d13Seschrock 		return (vd);
5718c5904d13Seschrock 
57196809eb4eSEric Schrock 	if (aux) {
5720c5904d13Seschrock 		for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
5721c5904d13Seschrock 			vd = spa->spa_l2cache.sav_vdevs[i];
57226809eb4eSEric Schrock 			if (vd->vdev_guid == guid)
57236809eb4eSEric Schrock 				return (vd);
57246809eb4eSEric Schrock 		}
57256809eb4eSEric Schrock 
57266809eb4eSEric Schrock 		for (i = 0; i < spa->spa_spares.sav_count; i++) {
57276809eb4eSEric Schrock 			vd = spa->spa_spares.sav_vdevs[i];
5728c5904d13Seschrock 			if (vd->vdev_guid == guid)
5729c5904d13Seschrock 				return (vd);
5730c5904d13Seschrock 		}
5731c5904d13Seschrock 	}
5732c5904d13Seschrock 
5733c5904d13Seschrock 	return (NULL);
5734ea8dc4b6Seschrock }
5735eaca9bbdSeschrock 
5736eaca9bbdSeschrock void
5737990b4856Slling spa_upgrade(spa_t *spa, uint64_t version)
5738eaca9bbdSeschrock {
5739e14bb325SJeff Bonwick 	spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
5740eaca9bbdSeschrock 
5741eaca9bbdSeschrock 	/*
5742eaca9bbdSeschrock 	 * This should only be called for a non-faulted pool, and since a
5743eaca9bbdSeschrock 	 * future version would result in an unopenable pool, this shouldn't be
5744eaca9bbdSeschrock 	 * possible.
5745eaca9bbdSeschrock 	 */
5746e7437265Sahrens 	ASSERT(spa->spa_uberblock.ub_version <= SPA_VERSION);
5747990b4856Slling 	ASSERT(version >= spa->spa_uberblock.ub_version);
5748eaca9bbdSeschrock 
5749990b4856Slling 	spa->spa_uberblock.ub_version = version;
5750eaca9bbdSeschrock 	vdev_config_dirty(spa->spa_root_vdev);
5751eaca9bbdSeschrock 
5752e14bb325SJeff Bonwick 	spa_config_exit(spa, SCL_ALL, FTAG);
575399653d4eSeschrock 
575499653d4eSeschrock 	txg_wait_synced(spa_get_dsl(spa), 0);
575599653d4eSeschrock }
575699653d4eSeschrock 
575799653d4eSeschrock boolean_t
575899653d4eSeschrock spa_has_spare(spa_t *spa, uint64_t guid)
575999653d4eSeschrock {
576099653d4eSeschrock 	int i;
576139c23413Seschrock 	uint64_t spareguid;
5762fa94a07fSbrendan 	spa_aux_vdev_t *sav = &spa->spa_spares;
576399653d4eSeschrock 
5764fa94a07fSbrendan 	for (i = 0; i < sav->sav_count; i++)
5765fa94a07fSbrendan 		if (sav->sav_vdevs[i]->vdev_guid == guid)
576699653d4eSeschrock 			return (B_TRUE);
576799653d4eSeschrock 
5768fa94a07fSbrendan 	for (i = 0; i < sav->sav_npending; i++) {
5769fa94a07fSbrendan 		if (nvlist_lookup_uint64(sav->sav_pending[i], ZPOOL_CONFIG_GUID,
5770fa94a07fSbrendan 		    &spareguid) == 0 && spareguid == guid)
577139c23413Seschrock 			return (B_TRUE);
577239c23413Seschrock 	}
577339c23413Seschrock 
577499653d4eSeschrock 	return (B_FALSE);
5775eaca9bbdSeschrock }
5776b1b8ab34Slling 
577789a89ebfSlling /*
577889a89ebfSlling  * Check if a pool has an active shared spare device.
577989a89ebfSlling  * Note: reference count of an active spare is 2, as a spare and as a replace
578089a89ebfSlling  */
578189a89ebfSlling static boolean_t
578289a89ebfSlling spa_has_active_shared_spare(spa_t *spa)
578389a89ebfSlling {
578489a89ebfSlling 	int i, refcnt;
578589a89ebfSlling 	uint64_t pool;
578689a89ebfSlling 	spa_aux_vdev_t *sav = &spa->spa_spares;
578789a89ebfSlling 
578889a89ebfSlling 	for (i = 0; i < sav->sav_count; i++) {
578989a89ebfSlling 		if (spa_spare_exists(sav->sav_vdevs[i]->vdev_guid, &pool,
579089a89ebfSlling 		    &refcnt) && pool != 0ULL && pool == spa_guid(spa) &&
579189a89ebfSlling 		    refcnt > 2)
579289a89ebfSlling 			return (B_TRUE);
579389a89ebfSlling 	}
579489a89ebfSlling 
579589a89ebfSlling 	return (B_FALSE);
579689a89ebfSlling }
579789a89ebfSlling 
57983d7072f8Seschrock /*
57993d7072f8Seschrock  * Post a sysevent corresponding to the given event.  The 'name' must be one of
58003d7072f8Seschrock  * the event definitions in sys/sysevent/eventdefs.h.  The payload will be
58013d7072f8Seschrock  * filled in from the spa and (optionally) the vdev.  This doesn't do anything
58023d7072f8Seschrock  * in the userland libzpool, as we don't want consumers to misinterpret ztest
58033d7072f8Seschrock  * or zdb as real changes.
58043d7072f8Seschrock  */
58053d7072f8Seschrock void
58063d7072f8Seschrock spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
58073d7072f8Seschrock {
58083d7072f8Seschrock #ifdef _KERNEL
58093d7072f8Seschrock 	sysevent_t		*ev;
58103d7072f8Seschrock 	sysevent_attr_list_t	*attr = NULL;
58113d7072f8Seschrock 	sysevent_value_t	value;
58123d7072f8Seschrock 	sysevent_id_t		eid;
58133d7072f8Seschrock 
58143d7072f8Seschrock 	ev = sysevent_alloc(EC_ZFS, (char *)name, SUNW_KERN_PUB "zfs",
58153d7072f8Seschrock 	    SE_SLEEP);
58163d7072f8Seschrock 
58173d7072f8Seschrock 	value.value_type = SE_DATA_TYPE_STRING;
58183d7072f8Seschrock 	value.value.sv_string = spa_name(spa);
58193d7072f8Seschrock 	if (sysevent_add_attr(&attr, ZFS_EV_POOL_NAME, &value, SE_SLEEP) != 0)
58203d7072f8Seschrock 		goto done;
58213d7072f8Seschrock 
58223d7072f8Seschrock 	value.value_type = SE_DATA_TYPE_UINT64;
58233d7072f8Seschrock 	value.value.sv_uint64 = spa_guid(spa);
58243d7072f8Seschrock 	if (sysevent_add_attr(&attr, ZFS_EV_POOL_GUID, &value, SE_SLEEP) != 0)
58253d7072f8Seschrock 		goto done;
58263d7072f8Seschrock 
58273d7072f8Seschrock 	if (vd) {
58283d7072f8Seschrock 		value.value_type = SE_DATA_TYPE_UINT64;
58293d7072f8Seschrock 		value.value.sv_uint64 = vd->vdev_guid;
58303d7072f8Seschrock 		if (sysevent_add_attr(&attr, ZFS_EV_VDEV_GUID, &value,
58313d7072f8Seschrock 		    SE_SLEEP) != 0)
58323d7072f8Seschrock 			goto done;
58333d7072f8Seschrock 
58343d7072f8Seschrock 		if (vd->vdev_path) {
58353d7072f8Seschrock 			value.value_type = SE_DATA_TYPE_STRING;
58363d7072f8Seschrock 			value.value.sv_string = vd->vdev_path;
58373d7072f8Seschrock 			if (sysevent_add_attr(&attr, ZFS_EV_VDEV_PATH,
58383d7072f8Seschrock 			    &value, SE_SLEEP) != 0)
58393d7072f8Seschrock 				goto done;
58403d7072f8Seschrock 		}
58413d7072f8Seschrock 	}
58423d7072f8Seschrock 
5843b01c3b58Seschrock 	if (sysevent_attach_attributes(ev, attr) != 0)
5844b01c3b58Seschrock 		goto done;
5845b01c3b58Seschrock 	attr = NULL;
5846b01c3b58Seschrock 
58473d7072f8Seschrock 	(void) log_sysevent(ev, SE_SLEEP, &eid);
58483d7072f8Seschrock 
58493d7072f8Seschrock done:
58503d7072f8Seschrock 	if (attr)
58513d7072f8Seschrock 		sysevent_free_attr(attr);
58523d7072f8Seschrock 	sysevent_free(ev);
58533d7072f8Seschrock #endif
58543d7072f8Seschrock }
5855